perf: don't load entire files into memory

This commit is contained in:
daimond113 2024-08-08 20:37:51 +02:00
parent a8a8ffcbe2
commit d0aecbdabc
No known key found for this signature in database
GPG key ID: 3A8ECE51328B513C
4 changed files with 49 additions and 16 deletions

View file

@ -1,11 +1,15 @@
use crate::util::hash;
use relative_path::RelativePathBuf;
use serde::{Deserialize, Serialize};
use std::{
collections::BTreeMap,
io::{BufWriter, Read, Write},
path::{Path, PathBuf},
};
use relative_path::RelativePathBuf;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use crate::util::hash;
/// A file system entry
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FSEntry {
@ -40,6 +44,43 @@ pub(crate) fn store_in_cas<P: AsRef<Path>>(
Ok((hash, cas_path))
}
pub(crate) fn store_reader_in_cas<P: AsRef<Path>>(
cas_dir: P,
contents: &mut dyn Read,
) -> std::io::Result<String> {
let tmp_dir = cas_dir.as_ref().join(".tmp");
std::fs::create_dir_all(&tmp_dir)?;
let mut hasher = Sha256::new();
let mut buf = [0; 8 * 1024];
let mut file_writer = BufWriter::new(tempfile::NamedTempFile::new_in(&tmp_dir)?);
loop {
let bytes_read = contents.read(&mut buf)?;
if bytes_read == 0 {
break;
}
let bytes = &buf[..bytes_read];
hasher.update(bytes);
file_writer.write_all(bytes)?;
}
let hash = format!("{:x}", hasher.finalize());
let (prefix, rest) = hash.split_at(2);
let folder = cas_dir.as_ref().join(prefix);
std::fs::create_dir_all(&folder)?;
let cas_path = folder.join(rest);
match file_writer.into_inner()?.persist_noclobber(cas_path) {
Ok(_) => {}
Err(e) if e.error.kind() == std::io::ErrorKind::AlreadyExists => {}
Err(e) => return Err(e.error),
};
Ok(hash)
}
impl PackageFS {
/// Write the package to the given destination
pub fn write_to<P: AsRef<Path>, Q: AsRef<Path>>(

View file

@ -5,7 +5,7 @@ use crate::{
},
names::{PackageName, PackageNames},
source::{
fs::{store_in_cas, FSEntry, PackageFS},
fs::{store_reader_in_cas, FSEntry, PackageFS},
git_index::GitBasedSource,
DependencySpecifiers, PackageSource, ResolveResult, VersionId,
},
@ -22,7 +22,6 @@ use std::{
collections::{BTreeMap, BTreeSet},
fmt::Debug,
hash::Hash,
io::Read,
path::PathBuf,
};
@ -296,10 +295,7 @@ impl PackageSource for PesdePackageSource {
continue;
}
let mut contents = vec![];
entry.read_to_end(&mut contents)?;
let hash = store_in_cas(&project.cas_dir, &contents)?.0;
let hash = store_reader_in_cas(project.cas_dir(), &mut entry)?;
entries.insert(path, FSEntry::File(hash));
}

View file

@ -1,6 +1,5 @@
use std::{
collections::{BTreeMap, VecDeque},
io::Read,
path::PathBuf,
};
@ -13,7 +12,7 @@ use crate::{
manifest::target::{Target, TargetKind},
names::PackageNames,
source::{
fs::{store_in_cas, FSEntry, PackageFS},
fs::{store_reader_in_cas, FSEntry, PackageFS},
git_index::GitBasedSource,
traits::PackageSource,
version_id::VersionId,
@ -213,10 +212,7 @@ impl PackageSource for WallyPackageSource {
}
let mut file = std::fs::File::open(entry.path())?;
let mut contents = vec![];
file.read_to_end(&mut contents)?;
let hash = store_in_cas(&project.cas_dir, &contents)?.0;
let hash = store_reader_in_cas(project.cas_dir(), &mut file)?;
entries.insert(path, FSEntry::File(hash));
}