From d0aecbdabcda58bfbb96e314417041b3bec2e567 Mon Sep 17 00:00:00 2001 From: daimond113 <72147841+daimond113@users.noreply.github.com> Date: Thu, 8 Aug 2024 20:37:51 +0200 Subject: [PATCH] perf: don't load entire files into memory --- src/resolver.rs | 2 +- src/source/fs.rs | 47 ++++++++++++++++++++++++++++++++++++++--- src/source/pesde/mod.rs | 8 ++----- src/source/wally/mod.rs | 8 ++----- 4 files changed, 49 insertions(+), 16 deletions(-) diff --git a/src/resolver.rs b/src/resolver.rs index 089a330..a5352b0 100644 --- a/src/resolver.rs +++ b/src/resolver.rs @@ -173,7 +173,7 @@ impl Project { // specifiers in indices store the index url in this field .unwrap() }; - + PackageSources::Wally(crate::source::wally::WallyPackageSource::new(index_url)) } }; diff --git a/src/source/fs.rs b/src/source/fs.rs index e0e1f58..d252c6b 100644 --- a/src/source/fs.rs +++ b/src/source/fs.rs @@ -1,11 +1,15 @@ -use crate::util::hash; -use relative_path::RelativePathBuf; -use serde::{Deserialize, Serialize}; use std::{ collections::BTreeMap, + io::{BufWriter, Read, Write}, path::{Path, PathBuf}, }; +use relative_path::RelativePathBuf; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; + +use crate::util::hash; + /// A file system entry #[derive(Debug, Clone, Serialize, Deserialize)] pub enum FSEntry { @@ -40,6 +44,43 @@ pub(crate) fn store_in_cas>( Ok((hash, cas_path)) } +pub(crate) fn store_reader_in_cas>( + cas_dir: P, + contents: &mut dyn Read, +) -> std::io::Result { + let tmp_dir = cas_dir.as_ref().join(".tmp"); + std::fs::create_dir_all(&tmp_dir)?; + let mut hasher = Sha256::new(); + let mut buf = [0; 8 * 1024]; + let mut file_writer = BufWriter::new(tempfile::NamedTempFile::new_in(&tmp_dir)?); + + loop { + let bytes_read = contents.read(&mut buf)?; + if bytes_read == 0 { + break; + } + + let bytes = &buf[..bytes_read]; + hasher.update(bytes); + file_writer.write_all(bytes)?; + } + + let hash = format!("{:x}", hasher.finalize()); + let (prefix, rest) = hash.split_at(2); + + let folder = cas_dir.as_ref().join(prefix); + std::fs::create_dir_all(&folder)?; + + let cas_path = folder.join(rest); + match file_writer.into_inner()?.persist_noclobber(cas_path) { + Ok(_) => {} + Err(e) if e.error.kind() == std::io::ErrorKind::AlreadyExists => {} + Err(e) => return Err(e.error), + }; + + Ok(hash) +} + impl PackageFS { /// Write the package to the given destination pub fn write_to, Q: AsRef>( diff --git a/src/source/pesde/mod.rs b/src/source/pesde/mod.rs index 1d84d7b..418d467 100644 --- a/src/source/pesde/mod.rs +++ b/src/source/pesde/mod.rs @@ -5,7 +5,7 @@ use crate::{ }, names::{PackageName, PackageNames}, source::{ - fs::{store_in_cas, FSEntry, PackageFS}, + fs::{store_reader_in_cas, FSEntry, PackageFS}, git_index::GitBasedSource, DependencySpecifiers, PackageSource, ResolveResult, VersionId, }, @@ -22,7 +22,6 @@ use std::{ collections::{BTreeMap, BTreeSet}, fmt::Debug, hash::Hash, - io::Read, path::PathBuf, }; @@ -296,10 +295,7 @@ impl PackageSource for PesdePackageSource { continue; } - let mut contents = vec![]; - entry.read_to_end(&mut contents)?; - - let hash = store_in_cas(&project.cas_dir, &contents)?.0; + let hash = store_reader_in_cas(project.cas_dir(), &mut entry)?; entries.insert(path, FSEntry::File(hash)); } diff --git a/src/source/wally/mod.rs b/src/source/wally/mod.rs index bce3eae..68086c8 100644 --- a/src/source/wally/mod.rs +++ b/src/source/wally/mod.rs @@ -1,6 +1,5 @@ use std::{ collections::{BTreeMap, VecDeque}, - io::Read, path::PathBuf, }; @@ -13,7 +12,7 @@ use crate::{ manifest::target::{Target, TargetKind}, names::PackageNames, source::{ - fs::{store_in_cas, FSEntry, PackageFS}, + fs::{store_reader_in_cas, FSEntry, PackageFS}, git_index::GitBasedSource, traits::PackageSource, version_id::VersionId, @@ -213,10 +212,7 @@ impl PackageSource for WallyPackageSource { } let mut file = std::fs::File::open(entry.path())?; - let mut contents = vec![]; - file.read_to_end(&mut contents)?; - - let hash = store_in_cas(&project.cas_dir, &contents)?.0; + let hash = store_reader_in_cas(project.cas_dir(), &mut file)?; entries.insert(path, FSEntry::File(hash)); }