perf: don't load entire files into memory

This commit is contained in:
daimond113 2024-08-08 20:37:51 +02:00
parent a8a8ffcbe2
commit d0aecbdabc
No known key found for this signature in database
GPG key ID: 3A8ECE51328B513C
4 changed files with 49 additions and 16 deletions

View file

@ -1,11 +1,15 @@
use crate::util::hash;
use relative_path::RelativePathBuf;
use serde::{Deserialize, Serialize};
use std::{ use std::{
collections::BTreeMap, collections::BTreeMap,
io::{BufWriter, Read, Write},
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
use relative_path::RelativePathBuf;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use crate::util::hash;
/// A file system entry /// A file system entry
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FSEntry { pub enum FSEntry {
@ -40,6 +44,43 @@ pub(crate) fn store_in_cas<P: AsRef<Path>>(
Ok((hash, cas_path)) Ok((hash, cas_path))
} }
pub(crate) fn store_reader_in_cas<P: AsRef<Path>>(
cas_dir: P,
contents: &mut dyn Read,
) -> std::io::Result<String> {
let tmp_dir = cas_dir.as_ref().join(".tmp");
std::fs::create_dir_all(&tmp_dir)?;
let mut hasher = Sha256::new();
let mut buf = [0; 8 * 1024];
let mut file_writer = BufWriter::new(tempfile::NamedTempFile::new_in(&tmp_dir)?);
loop {
let bytes_read = contents.read(&mut buf)?;
if bytes_read == 0 {
break;
}
let bytes = &buf[..bytes_read];
hasher.update(bytes);
file_writer.write_all(bytes)?;
}
let hash = format!("{:x}", hasher.finalize());
let (prefix, rest) = hash.split_at(2);
let folder = cas_dir.as_ref().join(prefix);
std::fs::create_dir_all(&folder)?;
let cas_path = folder.join(rest);
match file_writer.into_inner()?.persist_noclobber(cas_path) {
Ok(_) => {}
Err(e) if e.error.kind() == std::io::ErrorKind::AlreadyExists => {}
Err(e) => return Err(e.error),
};
Ok(hash)
}
impl PackageFS { impl PackageFS {
/// Write the package to the given destination /// Write the package to the given destination
pub fn write_to<P: AsRef<Path>, Q: AsRef<Path>>( pub fn write_to<P: AsRef<Path>, Q: AsRef<Path>>(

View file

@ -5,7 +5,7 @@ use crate::{
}, },
names::{PackageName, PackageNames}, names::{PackageName, PackageNames},
source::{ source::{
fs::{store_in_cas, FSEntry, PackageFS}, fs::{store_reader_in_cas, FSEntry, PackageFS},
git_index::GitBasedSource, git_index::GitBasedSource,
DependencySpecifiers, PackageSource, ResolveResult, VersionId, DependencySpecifiers, PackageSource, ResolveResult, VersionId,
}, },
@ -22,7 +22,6 @@ use std::{
collections::{BTreeMap, BTreeSet}, collections::{BTreeMap, BTreeSet},
fmt::Debug, fmt::Debug,
hash::Hash, hash::Hash,
io::Read,
path::PathBuf, path::PathBuf,
}; };
@ -296,10 +295,7 @@ impl PackageSource for PesdePackageSource {
continue; continue;
} }
let mut contents = vec![]; let hash = store_reader_in_cas(project.cas_dir(), &mut entry)?;
entry.read_to_end(&mut contents)?;
let hash = store_in_cas(&project.cas_dir, &contents)?.0;
entries.insert(path, FSEntry::File(hash)); entries.insert(path, FSEntry::File(hash));
} }

View file

@ -1,6 +1,5 @@
use std::{ use std::{
collections::{BTreeMap, VecDeque}, collections::{BTreeMap, VecDeque},
io::Read,
path::PathBuf, path::PathBuf,
}; };
@ -13,7 +12,7 @@ use crate::{
manifest::target::{Target, TargetKind}, manifest::target::{Target, TargetKind},
names::PackageNames, names::PackageNames,
source::{ source::{
fs::{store_in_cas, FSEntry, PackageFS}, fs::{store_reader_in_cas, FSEntry, PackageFS},
git_index::GitBasedSource, git_index::GitBasedSource,
traits::PackageSource, traits::PackageSource,
version_id::VersionId, version_id::VersionId,
@ -213,10 +212,7 @@ impl PackageSource for WallyPackageSource {
} }
let mut file = std::fs::File::open(entry.path())?; let mut file = std::fs::File::open(entry.path())?;
let mut contents = vec![]; let hash = store_reader_in_cas(project.cas_dir(), &mut file)?;
file.read_to_end(&mut contents)?;
let hash = store_in_cas(&project.cas_dir, &contents)?.0;
entries.insert(path, FSEntry::File(hash)); entries.insert(path, FSEntry::File(hash));
} }