From 42a954206db8add37df6d400cbcb563a74bfdff9 Mon Sep 17 00:00:00 2001 From: Adrian Date: Wed, 10 Apr 2024 09:40:19 +0200 Subject: [PATCH 01/28] add append example --- README.md | 1 + examples/append.rs | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 examples/append.rs diff --git a/README.md b/README.md index f06cdbb5..a45234ac 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ See the [examples directory](examples) for: * How to extract a zip file. * How to extract a single file from a zip. * How to read a zip from the standard input. + * How to append a directory to an existing archive Fuzzing ------- diff --git a/examples/append.rs b/examples/append.rs new file mode 100644 index 00000000..eb79e111 --- /dev/null +++ b/examples/append.rs @@ -0,0 +1,68 @@ +use std::{ + fs::{File, OpenOptions}, + io::{Read, Write}, + path::{Path, PathBuf}, + str::FromStr, +}; +use zip; + +fn gather_files<'a, T: Into<&'a Path>>(path: T, files: &mut Vec) { + let path: &Path = path.into(); + + for entry in path.read_dir().unwrap() { + match entry { + Ok(e) => { + if e.path().is_dir() { + gather_files(e.path().as_ref(), files); + } else if e.path().is_file() { + files.push(e.path()); + } + } + Err(_) => todo!(), + } + } +} + +fn real_main() -> i32 { + let args: Vec<_> = std::env::args().collect(); + if args.len() < 3 { + println!("Usage: {} ", args[0]); + return 1; + } + + let existing_archive_path = &*args[1]; + let append_dir_path = &*args[2]; + let archive = PathBuf::from_str(existing_archive_path).unwrap(); + let to_append = PathBuf::from_str(append_dir_path).unwrap(); + + let existing_zip = OpenOptions::new() + .read(true) + .write(true) + .open(&archive) + .unwrap(); + let mut append_zip = zip::ZipWriter::new_append(existing_zip).unwrap(); + + let mut files: Vec = vec![]; + gather_files(to_append.as_ref(), &mut files); + + let mut buf: Vec = vec![]; + for file in files { + append_zip + .start_file(file.to_string_lossy(), Default::default()) + .unwrap(); + + let mut f = File::open(file).unwrap(); + f.read_to_end(&mut buf).unwrap(); + + append_zip.write_all(&buf).unwrap(); + buf.clear(); + } + + append_zip.finish().unwrap(); + + 0 +} + +fn main() { + std::process::exit(real_main()); +} From f6a34093fcd2c32a976890fc98e795f75b64eaee Mon Sep 17 00:00:00 2001 From: Adrian Date: Mon, 15 Apr 2024 10:34:49 +0200 Subject: [PATCH 02/28] use io::copy instead of read_to_end --- examples/append.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/examples/append.rs b/examples/append.rs index eb79e111..560136d3 100644 --- a/examples/append.rs +++ b/examples/append.rs @@ -45,17 +45,13 @@ fn real_main() -> i32 { let mut files: Vec = vec![]; gather_files(to_append.as_ref(), &mut files); - let mut buf: Vec = vec![]; for file in files { append_zip .start_file(file.to_string_lossy(), Default::default()) .unwrap(); let mut f = File::open(file).unwrap(); - f.read_to_end(&mut buf).unwrap(); - - append_zip.write_all(&buf).unwrap(); - buf.clear(); + let _ = std::io::copy(&mut f, &mut append_zip); } append_zip.finish().unwrap(); From b944c3ad8600b978871e53764fcabe20e1ec7e65 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Wed, 1 May 2024 09:55:07 -0700 Subject: [PATCH 03/28] Fix failing build Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- examples/append.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/append.rs b/examples/append.rs index 560136d3..8bb1e4d8 100644 --- a/examples/append.rs +++ b/examples/append.rs @@ -1,6 +1,5 @@ use std::{ fs::{File, OpenOptions}, - io::{Read, Write}, path::{Path, PathBuf}, str::FromStr, }; @@ -47,7 +46,7 @@ fn real_main() -> i32 { for file in files { append_zip - .start_file(file.to_string_lossy(), Default::default()) + .start_file_from_path(file, Default::default()) .unwrap(); let mut f = File::open(file).unwrap(); From 68f7f5d4521dc7fa7202a42766e2114f6cffaed8 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 7 Sep 2023 01:06:34 -0400 Subject: [PATCH 04/28] add finish_into_readable() --- src/read.rs | 49 ++++++++++++++++++++++++++++++++++++++++--------- src/write.rs | 44 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 79 insertions(+), 14 deletions(-) diff --git a/src/read.rs b/src/read.rs index 0a39faef..f94ebaee 100644 --- a/src/read.rs +++ b/src/read.rs @@ -51,7 +51,6 @@ pub(crate) mod zip_archive { pub(crate) names_map: super::HashMap, usize>, pub(super) offset: u64, pub(super) dir_start: u64, - pub(super) dir_end: u64, } /// ZIP archive reader @@ -331,6 +330,39 @@ pub(crate) struct CentralDirectoryInfo { pub(crate) disk_with_central_directory: u32, } +impl ZipArchive { + pub(crate) fn from_finalized_writer( + files: Vec, + comment: Vec, + reader: R, + central_start: u64, + ) -> ZipResult { + if files.is_empty() { + return Err(ZipError::InvalidArchive( + "attempt to finalize empty zip writer into readable", + )); + } + /* This is where the whole file starts. */ + let initial_offset = files.first().unwrap().header_start; + let names_map: HashMap, usize> = files + .iter() + .enumerate() + .map(|(i, d)| (d.file_name.clone(), i)) + .collect(); + let shared = Arc::new(zip_archive::Shared { + files: files.into_boxed_slice(), + names_map, + offset: initial_offset, + dir_start: central_start, + }); + Ok(Self { + reader, + shared, + comment: comment.into_boxed_slice().into(), + }) + } +} + impl ZipArchive { fn get_directory_info_zip32( footer: &spec::CentralDirectoryEnd, @@ -482,11 +514,12 @@ impl ZipArchive { result.and_then(|dir_info| { // If the parsed number of files is greater than the offset then // something fishy is going on and we shouldn't trust number_of_files. - let file_capacity = if dir_info.number_of_files > cde_start_pos as usize { - 0 - } else { - dir_info.number_of_files - }; + let file_capacity = + if dir_info.number_of_files > dir_info.directory_start as usize { + 0 + } else { + dir_info.number_of_files + }; let mut files = Vec::with_capacity(file_capacity); let mut names_map = HashMap::with_capacity(file_capacity); reader.seek(io::SeekFrom::Start(dir_info.directory_start))?; @@ -495,7 +528,6 @@ impl ZipArchive { names_map.insert(file.file_name.clone(), files.len()); files.push(file); } - let dir_end = reader.seek(io::SeekFrom::Start(dir_info.directory_start))?; if dir_info.disk_number != dir_info.disk_with_central_directory { unsupported_zip_error("Support for multi-disk files is not implemented") } else { @@ -504,7 +536,6 @@ impl ZipArchive { names_map, offset: dir_info.archive_offset, dir_start: dir_info.directory_start, - dir_end, }) } }) @@ -524,7 +555,7 @@ impl ZipArchive { } let shared = ok_results .into_iter() - .max_by_key(|shared| shared.dir_end) + .max_by_key(|shared| shared.dir_start) .unwrap(); reader.seek(io::SeekFrom::Start(shared.dir_start))?; Ok(shared) diff --git a/src/write.rs b/src/write.rs index 0051f253..35f9bf46 100644 --- a/src/write.rs +++ b/src/write.rs @@ -597,6 +597,39 @@ impl ZipWriter { ) -> ZipResult<()> { self.deep_copy_file(&path_to_string(src_path), &path_to_string(dest_path)) } + + /// Write the zip file into the backing stream, then produce a readable archive of that data. + /// + /// This method avoids parsing the central directory records at the end of the stream for + /// a slight performance improvement over running [`ZipArchive::new()`] on the output of + /// [`Self::finish()`]. + /// + ///``` + /// # fn main() -> Result<(), zip::result::ZipError> { + /// use std::io::{Cursor, prelude::*}; + /// use zip::{ZipArchive, ZipWriter, write::SimpleFileOptions}; + /// + /// let buf = Cursor::new(Vec::new()); + /// let mut zip = ZipWriter::new(buf); + /// let options = SimpleFileOptions::default(); + /// zip.start_file("a.txt", options)?; + /// zip.write_all(b"hello\n")?; + /// + /// let mut zip = zip.finish_into_readable()?; + /// let mut s: String = String::new(); + /// zip.by_name("a.txt")?.read_to_string(&mut s)?; + /// assert_eq!(s, "hello\n"); + /// # Ok(()) + /// # } + ///``` + pub fn finish_into_readable(mut self) -> ZipResult> { + let central_start = self.finalize()?; + let inner = mem::replace(&mut self.inner, Closed).unwrap(); + let comment = mem::take(&mut self.comment); + let files = mem::take(&mut self.files); + let archive = ZipArchive::from_finalized_writer(files, comment, inner, central_start)?; + Ok(archive) + } } impl ZipWriter { @@ -1100,7 +1133,7 @@ impl ZipWriter { /// This will return the writer, but one should normally not append any data to the end of the file. /// Note that the zipfile will also be finished on drop. pub fn finish(&mut self) -> ZipResult { - self.finalize()?; + let _central_start = self.finalize()?; let inner = mem::replace(&mut self.inner, Closed); Ok(inner.unwrap()) } @@ -1160,10 +1193,10 @@ impl ZipWriter { self.add_symlink(path_to_string(path), path_to_string(target), options) } - fn finalize(&mut self) -> ZipResult<()> { + fn finalize(&mut self) -> ZipResult { self.finish_file()?; - { + let central_start = { let central_start = self.write_central_and_footer()?; let writer = self.inner.get_plain(); let footer_end = writer.stream_position()?; @@ -1175,9 +1208,10 @@ impl ZipWriter { writer.seek(SeekFrom::End(-(central_and_footer_size as i64)))?; self.write_central_and_footer()?; } - } + central_start + }; - Ok(()) + Ok(central_start) } fn write_central_and_footer(&mut self) -> Result { From aad5d988d66a33dcdd3b0d64ad2baa88386e6871 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 7 Sep 2023 02:10:24 -0400 Subject: [PATCH 05/28] add ZipWriter::merge_archive() method --- src/read.rs | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/write.rs | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/src/read.rs b/src/read.rs index 0a39faef..589baec2 100644 --- a/src/read.rs +++ b/src/read.rs @@ -332,6 +332,67 @@ pub(crate) struct CentralDirectoryInfo { } impl ZipArchive { + pub(crate) fn merge_contents( + &mut self, + mut w: W, + ) -> ZipResult> { + let mut new_files = self.shared.files.clone(); + if new_files.is_empty() { + return Ok(vec![]); + } + /* The first file header will probably start at the beginning of the file, but zip doesn't + * enforce that, and executable zips like PEX files will have a shebang line so will + * definitely be greater than 0. + * + * assert_eq!(0, new_files[0].header_start); // Avoid this. + */ + + let new_initial_header_start = w.stream_position()?; + /* Push back file header starts for all entries in the covered files. */ + new_files.iter_mut().try_for_each(|f| { + /* This is probably the only really important thing to change. */ + f.header_start = f.header_start.checked_add(new_initial_header_start).ok_or( + ZipError::InvalidArchive("new header start from merge would have been too large"), + )?; + /* This is only ever used internally to cache metadata lookups (it's not part of the + * zip spec), and 0 is the sentinel value. */ + f.central_header_start = 0; + /* This is an atomic variable so it can be updated from another thread in the + * implementation (which is good!). */ + if let Some(old_data_start) = f.data_start.take() { + let new_data_start = old_data_start.checked_add(new_initial_header_start).ok_or( + ZipError::InvalidArchive("new data start from merge would have been too large"), + )?; + f.data_start.get_or_init(|| new_data_start); + } + Ok::<_, ZipError>(()) + })?; + + /* Rewind to the beginning of the file. + * + * NB: we *could* decide to start copying from new_files[0].header_start instead, which + * would avoid copying over e.g. any pex shebangs or other file contents that start before + * the first zip file entry. However, zip files actually shouldn't care about garbage data + * in *between* real entries, since the central directory header records the correct start + * location of each, and keeping track of that math is more complicated logic that will only + * rarely be used, since most zips that get merged together are likely to be produced + * specifically for that purpose (and therefore are unlikely to have a shebang or other + * preface). Finally, this preserves any data that might actually be useful. + */ + self.reader.rewind()?; + /* Find the end of the file data. */ + let length_to_read = self.shared.dir_start; + /* Produce a Read that reads bytes up until the start of the central directory header. + * This "as &mut dyn Read" trick is used elsewhere to avoid having to clone the underlying + * handle, which it really shouldn't need to anyway. */ + let mut limited_raw = (&mut self.reader as &mut dyn Read).take(length_to_read); + /* Copy over file data from source archive directly. */ + io::copy(&mut limited_raw, &mut w)?; + + /* Return the files we've just written to the data stream. */ + Ok(new_files.into_vec()) + } + fn get_directory_info_zip32( footer: &spec::CentralDirectoryEnd, cde_start_pos: u64, diff --git a/src/write.rs b/src/write.rs index 0051f253..db326a74 100644 --- a/src/write.rs +++ b/src/write.rs @@ -934,6 +934,68 @@ impl ZipWriter { Ok(()) } + /* TODO: link to/use Self::finish_into_readable() from https://github.com/zip-rs/zip/pull/400 in + * this docstring. */ + /// Copy over the entire contents of another archive verbatim. + /// + /// This method extracts file metadata from the `source` archive, then simply performs a single + /// big [`io::copy()`](io::copy) to transfer all the actual file contents without any + /// decompression or decryption. This is more performant than the equivalent operation of + /// calling [`Self::raw_copy_file()`] for each entry from the `source` archive in sequence. + /// + ///``` + /// # fn main() -> Result<(), zip::result::ZipError> { + /// use std::io::{Cursor, prelude::*}; + /// use zip::{ZipArchive, ZipWriter, write::SimpleFileOptions}; + /// + /// let buf = Cursor::new(Vec::new()); + /// let mut zip = ZipWriter::new(buf); + /// zip.start_file("a.txt", SimpleFileOptions::default())?; + /// zip.write_all(b"hello\n")?; + /// let src = ZipArchive::new(zip.finish()?)?; + /// + /// let buf = Cursor::new(Vec::new()); + /// let mut zip = ZipWriter::new(buf); + /// zip.start_file("b.txt", SimpleFileOptions::default())?; + /// zip.write_all(b"hey\n")?; + /// let src2 = ZipArchive::new(zip.finish()?)?; + /// + /// let buf = Cursor::new(Vec::new()); + /// let mut zip = ZipWriter::new(buf); + /// zip.merge_archive(src)?; + /// zip.merge_archive(src2)?; + /// let mut result = ZipArchive::new(zip.finish()?)?; + /// + /// let mut s: String = String::new(); + /// result.by_name("a.txt")?.read_to_string(&mut s)?; + /// assert_eq!(s, "hello\n"); + /// s.clear(); + /// result.by_name("b.txt")?.read_to_string(&mut s)?; + /// assert_eq!(s, "hey\n"); + /// # Ok(()) + /// # } + ///``` + pub fn merge_archive(&mut self, mut source: ZipArchive) -> ZipResult<()> + where + R: Read + io::Seek, + { + self.finish_file()?; + + /* Ensure we accept the file contents on faith (and avoid overwriting the data). + * See raw_copy_file_rename(). */ + self.writing_to_file = true; + self.writing_raw = true; + + let writer = self.inner.get_plain(); + /* Get the file entries from the source archive. */ + let new_files = source.merge_contents(writer)?; + + /* These file entries are now ours! */ + self.files.extend(new_files); + + Ok(()) + } + fn normalize_options(options: &mut FileOptions) { if options.permissions.is_none() { options.permissions = Some(0o644); From e42ff64449e43463bb0a670db765c226c4f22691 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 7 Sep 2023 02:58:24 -0400 Subject: [PATCH 06/28] add merge_archive benchmarks --- Cargo.toml | 4 ++ benches/merge_archive.rs | 120 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) create mode 100644 benches/merge_archive.rs diff --git a/Cargo.toml b/Cargo.toml index fbf4c7ae..b68b9555 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -87,3 +87,7 @@ harness = false [[bench]] name = "read_metadata" harness = false + +[[bench]] +name = "merge_archive" +harness = false diff --git a/benches/merge_archive.rs b/benches/merge_archive.rs new file mode 100644 index 00000000..c5cb26c5 --- /dev/null +++ b/benches/merge_archive.rs @@ -0,0 +1,120 @@ +use bencher::{benchmark_group, benchmark_main}; + +use std::io::{Cursor, Read, Seek, Write}; + +use bencher::Bencher; +use getrandom::getrandom; +use zip::{result::ZipResult, write::SimpleFileOptions, ZipArchive, ZipWriter}; + +fn generate_random_archive( + num_entries: usize, + entry_size: usize, + options: SimpleFileOptions, +) -> ZipResult<(usize, ZipArchive>>)> { + let buf = Cursor::new(Vec::new()); + let mut zip = ZipWriter::new(buf); + + let mut bytes = vec![0u8; entry_size]; + for i in 0..num_entries { + let name = format!("random{}.dat", i); + zip.start_file(name, options)?; + getrandom(&mut bytes).unwrap(); + zip.write_all(&bytes)?; + } + + let buf = zip.finish()?.into_inner(); + let len = buf.len(); + + Ok((len, ZipArchive::new(Cursor::new(buf))?)) +} + +fn perform_merge( + src: ZipArchive, + mut target: ZipWriter, +) -> ZipResult> { + target.merge_archive(src)?; + Ok(target) +} + +fn perform_raw_copy_file( + mut src: ZipArchive, + mut target: ZipWriter, +) -> ZipResult> { + for i in 0..src.len() { + let entry = src.by_index(i)?; + target.raw_copy_file(entry)?; + } + Ok(target) +} + +const NUM_ENTRIES: usize = 100; +const ENTRY_SIZE: usize = 1024; + +fn merge_archive_stored(bench: &mut Bencher) { + let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + let (len, src) = generate_random_archive(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); + + bench.bytes = len as u64; + + bench.iter(|| { + let buf = Cursor::new(Vec::new()); + let zip = ZipWriter::new(buf); + let mut zip = perform_merge(src.clone(), zip).unwrap(); + let buf = zip.finish().unwrap().into_inner(); + assert_eq!(buf.len(), len); + }); +} + +fn merge_archive_compressed(bench: &mut Bencher) { + let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Deflated); + let (len, src) = generate_random_archive(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); + + bench.bytes = len as u64; + + bench.iter(|| { + let buf = Cursor::new(Vec::new()); + let zip = ZipWriter::new(buf); + let mut zip = perform_merge(src.clone(), zip).unwrap(); + let buf = zip.finish().unwrap().into_inner(); + assert_eq!(buf.len(), len); + }); +} + +fn merge_archive_raw_copy_file_stored(bench: &mut Bencher) { + let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored); + let (len, src) = generate_random_archive(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); + + bench.bytes = len as u64; + + bench.iter(|| { + let buf = Cursor::new(Vec::new()); + let zip = ZipWriter::new(buf); + let mut zip = perform_raw_copy_file(src.clone(), zip).unwrap(); + let buf = zip.finish().unwrap().into_inner(); + assert_eq!(buf.len(), len); + }); +} + +fn merge_archive_raw_copy_file_compressed(bench: &mut Bencher) { + let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Deflated); + let (len, src) = generate_random_archive(NUM_ENTRIES, ENTRY_SIZE, options).unwrap(); + + bench.bytes = len as u64; + + bench.iter(|| { + let buf = Cursor::new(Vec::new()); + let zip = ZipWriter::new(buf); + let mut zip = perform_raw_copy_file(src.clone(), zip).unwrap(); + let buf = zip.finish().unwrap().into_inner(); + assert_eq!(buf.len(), len); + }); +} + +benchmark_group!( + benches, + merge_archive_stored, + merge_archive_compressed, + merge_archive_raw_copy_file_stored, + merge_archive_raw_copy_file_compressed, +); +benchmark_main!(benches); From 3cfbdfca83cc3a3ca32845460b6f224c6ce272e2 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 12 Jul 2023 00:01:23 -0400 Subject: [PATCH 07/28] use num_enum to clean up the System type --- Cargo.toml | 1 + src/read.rs | 4 ++-- src/types.rs | 29 ++++++++++++----------------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fbf4c7ae..719f3f78 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,7 @@ constant_time_eq = { version = "0.3.0", optional = true } crc32fast = "1.4.0" flate2 = { version = "1.0.28", default-features = false, optional = true } hmac = { version = "0.12.1", optional = true, features = ["reset"] } +num_enum = "0.6.1" pbkdf2 = { version = "0.12.2", optional = true } sha1 = { version = "0.10.6", optional = true } time = { workspace = true, optional = true, features = [ diff --git a/src/read.rs b/src/read.rs index 0a39faef..e0615fca 100644 --- a/src/read.rs +++ b/src/read.rs @@ -812,7 +812,7 @@ fn central_header_to_zip_file_inner( // Construct the result let mut result = ZipFileData { - system: System::from_u8((version_made_by >> 8) as u8), + system: System::from((version_made_by >> 8) as u8), version_made_by: version_made_by as u8, encrypted, using_data_descriptor, @@ -1178,7 +1178,7 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult> 8) as u8), + system: System::from((version_made_by >> 8) as u8), version_made_by: version_made_by as u8, encrypted, using_data_descriptor, diff --git a/src/types.rs b/src/types.rs index b4079f6c..7ac8a971 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,5 +1,6 @@ //! Types that specify what is contained in a ZIP. use path::{Component, Path, PathBuf}; +use num_enum::{FromPrimitive, IntoPrimitive}; use std::path; use std::sync::{Arc, OnceLock}; @@ -49,25 +50,15 @@ use crate::result::DateTimeRangeError; #[cfg(feature = "time")] use time::{error::ComponentRange, Date, Month, OffsetDateTime, PrimitiveDateTime, Time}; -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, FromPrimitive, IntoPrimitive)] +#[repr(u8)] pub enum System { Dos = 0, Unix = 3, + #[num_enum(default)] Unknown, } -impl System { - pub const fn from_u8(system: u8) -> System { - use self::System::*; - - match system { - 0 => Dos, - 3 => Unix, - _ => Unknown, - } - } -} - /// Representation of a moment in time. /// /// Zip files use an old format from DOS to store timestamps, @@ -512,10 +503,14 @@ mod test { #[test] fn system() { use super::System; - assert_eq!(System::Dos as u16, 0u16); - assert_eq!(System::Unix as u16, 3u16); - assert_eq!(System::from_u8(0), System::Dos); - assert_eq!(System::from_u8(3), System::Unix); + assert_eq!(u8::from(System::Dos), 0u8); + assert_eq!(System::Dos as u8, 0u8); + assert_eq!(System::Unix as u8, 3u8); + assert_eq!(u8::from(System::Unix), 3u8); + assert_eq!(System::from(0), System::Dos); + assert_eq!(System::from(3), System::Unix); + assert_eq!(u8::from(System::Unknown), 4u8); + assert_eq!(System::Unknown as u8, 4u8); } #[test] From a16a34f1a5a4cbe9ceb30fb984c2ebccf930d109 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 7 Sep 2023 00:21:31 -0400 Subject: [PATCH 08/28] use displaydoc and thiserror to remove some boilerplate --- Cargo.toml | 2 ++ src/result.rs | 51 +++++++++++---------------------------------------- src/write.rs | 5 ++++- 3 files changed, 17 insertions(+), 41 deletions(-) mode change 100644 => 100755 src/result.rs diff --git a/Cargo.toml b/Cargo.toml index fbf4c7ae..26129c36 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,10 +28,12 @@ bzip2-rs = { version = "0.1.2", optional = true } chrono = { version = "0.4.38", optional = true } constant_time_eq = { version = "0.3.0", optional = true } crc32fast = "1.4.0" +displaydoc = { version = "0.2.4", default-features = false } flate2 = { version = "1.0.28", default-features = false, optional = true } hmac = { version = "0.12.1", optional = true, features = ["reset"] } pbkdf2 = { version = "0.12.2", optional = true } sha1 = { version = "0.10.6", optional = true } +thiserror = "1.0.48" time = { workspace = true, optional = true, features = [ "std", ] } diff --git a/src/result.rs b/src/result.rs old mode 100644 new mode 100755 index f2bb4609..2e231144 --- a/src/result.rs +++ b/src/result.rs @@ -1,67 +1,38 @@ +#![allow(unknown_lints)] // non_local_definitions isn't in Rust 1.70 +#![allow(non_local_definitions)] //! Error types that can be emitted from this library +use displaydoc::Display; +use thiserror::Error; + use std::error::Error; use std::fmt; use std::io; -use std::io::IntoInnerError; use std::num::TryFromIntError; /// Generic result type with ZipError as its error variant pub type ZipResult = Result; /// Error type for Zip -#[derive(Debug)] +#[derive(Debug, Display, Error)] #[non_exhaustive] pub enum ZipError { - /// An Error caused by I/O - Io(io::Error), + /// i/o error: {0} + Io(#[from] io::Error), - /// This file is probably not a zip archive + /// invalid Zip archive: {0} InvalidArchive(&'static str), - /// This archive is not supported + /// unsupported Zip archive: {0} UnsupportedArchive(&'static str), - /// The requested file could not be found in the archive + /// specified file not found in archive FileNotFound, /// The password provided is incorrect InvalidPassword, } -impl From for ZipError { - fn from(err: io::Error) -> ZipError { - ZipError::Io(err) - } -} - -impl From> for ZipError { - fn from(value: IntoInnerError) -> Self { - ZipError::Io(value.into_error()) - } -} - -impl fmt::Display for ZipError { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - match self { - ZipError::Io(err) => write!(fmt, "{err}"), - ZipError::InvalidArchive(err) => write!(fmt, "invalid Zip archive: {err}"), - ZipError::UnsupportedArchive(err) => write!(fmt, "unsupported Zip archive: {err}"), - ZipError::FileNotFound => write!(fmt, "specified file not found in archive"), - ZipError::InvalidPassword => write!(fmt, "incorrect password for encrypted file"), - } - } -} - -impl Error for ZipError { - fn source(&self) -> Option<&(dyn Error + 'static)> { - match self { - ZipError::Io(err) => Some(err), - _ => None, - } - } -} - impl ZipError { /// The text used as an error when a password is required and not supplied /// diff --git a/src/write.rs b/src/write.rs index 0051f253..379a7bd8 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1418,7 +1418,10 @@ impl GenericZipWriter { #[cfg(feature = "deflate-zopfli")] GenericZipWriter::ZopfliDeflater(w) => w.finish()?, #[cfg(feature = "deflate-zopfli")] - GenericZipWriter::BufferedZopfliDeflater(w) => w.into_inner()?.finish()?, + GenericZipWriter::BufferedZopfliDeflater(w) => w + .into_inner() + .map_err(|e| ZipError::Io(e.into_error()))? + .finish()?, #[cfg(feature = "bzip2")] GenericZipWriter::Bzip2(w) => w.finish()?, #[cfg(feature = "zstd")] From 0321c0555732cd8ecdac123b52c396112814395b Mon Sep 17 00:00:00 2001 From: Jan Starke Date: Sat, 2 Mar 2024 19:49:39 +0100 Subject: [PATCH 09/28] fix some clippy warnings fix another clippy complaint ad support for extended timestamp support missing timestamps in the extended timestamps field handle inconsistencies between flags and len handle len_left add getter Update README.md to state that the crate has moved ad support for extended timestamp handle inconsistencies between flags and len handle len_left add getter Update README.md to state that the crate has moved --- README.md | 97 +------------------------- src/cp437.rs | 1 + src/extra_fields/extended_timestamp.rs | 88 +++++++++++++++++++++++ src/extra_fields/mod.rs | 29 ++++++++ src/lib.rs | 2 + src/read.rs | 24 ++++++- src/read/stream.rs | 3 +- src/types.rs | 5 ++ src/write.rs | 3 +- 9 files changed, 152 insertions(+), 100 deletions(-) create mode 100644 src/extra_fields/extended_timestamp.rs create mode 100644 src/extra_fields/mod.rs diff --git a/README.md b/README.md index f06cdbb5..3c1f9f5a 100644 --- a/README.md +++ b/README.md @@ -1,95 +1,4 @@ -zip-rs -====== +zip has moved +============= -[![Build Status](https://img.shields.io/github/workflow/status/zip-rs/zip/CI)](https://github.com/zip-rs/zip/actions?query=branch%3Amaster+workflow%3ACI) -[![Crates.io version](https://img.shields.io/crates/v/zip.svg)](https://crates.io/crates/zip) -[![Discord](https://badgen.net/badge/icon/discord?icon=discord&label)](https://discord.gg/rQ7H9cSsF4) - -[Documentation](https://docs.rs/zip/0.6.3/zip/) - -Info ----- - - -A zip library for rust which supports reading and writing of simple ZIP files. - -Supported compression formats: - -* stored (i.e. none) -* deflate -* bzip2 -* zstd - -Currently unsupported zip extensions: - -* Encryption -* Multi-disk - -Usage ------ - -With all default features: - -```toml -[dependencies] -zip = "0.6" -``` - -Without the default features: - -```toml -[dependencies] -zip = { version = "0.6.6", default-features = false } -``` - -The features available are: - -* `aes-crypto`: Enables decryption of files which were encrypted with AES. Supports AE-1 and AE-2 methods. -* `deflate`: Enables the deflate compression algorithm, which is the default for zip files. -* `bzip2`: Enables the BZip2 compression algorithm. -* `time`: Enables features using the [time](https://github.com/rust-lang-deprecated/time) crate. -* `zstd`: Enables the Zstandard compression algorithm. - -All of these are enabled by default. - -MSRV ----- - -Our current Minimum Supported Rust Version is **1.59.0**. When adding features, -we will follow these guidelines: - -- We will always support the latest four minor Rust versions. This gives you a 6 - month window to upgrade your compiler. -- Any change to the MSRV will be accompanied with a **minor** version bump - - While the crate is pre-1.0, this will be a change to the PATCH version. - -Examples --------- - -See the [examples directory](examples) for: - * How to write a file to a zip. - * How to write a directory of files to a zip (using [walkdir](https://github.com/BurntSushi/walkdir)). - * How to extract a zip file. - * How to extract a single file from a zip. - * How to read a zip from the standard input. - -Fuzzing -------- - -Fuzzing support is through [cargo fuzz](https://github.com/rust-fuzz/cargo-fuzz). To install cargo fuzz: - -```bash -cargo install cargo-fuzz -``` - -To list fuzz targets: - -```bash -cargo +nightly fuzz list -``` - -To start fuzzing zip extraction: - -```bash -cargo +nightly fuzz run fuzz_read -``` +This repository was formerly the source of the [zip](https://crates.io/crates/zip) Rust crate for compressing and decompressing ZIP files, but that has moved to https://github.com/Pr0methean/zip. Please submit all issues and pull requests there, and close any existing copies here. Once the existing ones are closed, this repository will be archived. diff --git a/src/cp437.rs b/src/cp437.rs index 4dba9af1..696c0506 100644 --- a/src/cp437.rs +++ b/src/cp437.rs @@ -187,6 +187,7 @@ mod test { } #[test] + #[allow(invalid_from_utf8)] fn example_slice() { use super::FromCp437; let data = b"Cura\x87ao"; diff --git a/src/extra_fields/extended_timestamp.rs b/src/extra_fields/extended_timestamp.rs new file mode 100644 index 00000000..26f2943c --- /dev/null +++ b/src/extra_fields/extended_timestamp.rs @@ -0,0 +1,88 @@ +use std::io::Read; + +use byteorder::LittleEndian; +use byteorder::ReadBytesExt; + +use crate::result::{ZipError, ZipResult}; + +/// extended timestamp, as described in + +#[derive(Debug, Clone)] +pub struct ExtendedTimestamp { + mod_time: Option, + ac_time: Option, + cr_time: Option, +} + +impl ExtendedTimestamp { + /// creates an extended timestamp struct by reading the required bytes from the reader. + /// + /// This method assumes that the length has already been read, therefore + /// it must be passed as an argument + pub fn try_from_reader(reader: &mut R, len: u16) -> ZipResult + where + R: Read, + { + let flags = reader.read_u8()?; + + // the `flags` field refers to the local headers and might not correspond + // to the len field. If the length field is 1+4, we assume that only + // the modification time has been set + + // > Those times that are present will appear in the order indicated, but + // > any combination of times may be omitted. (Creation time may be + // > present without access time, for example.) TSize should equal + // > (1 + 4*(number of set bits in Flags)), as the block is currently + // > defined. + if len != 5 && len as u32 != 1 + 4 * flags.count_ones() { + //panic!("found len {len} and flags {flags:08b}"); + return Err(ZipError::UnsupportedArchive( + "flags and len don't match in extended timestamp field", + )); + } + + if flags & 0b11111000 != 0 { + return Err(ZipError::UnsupportedArchive( + "found unsupported timestamps in the extended timestamp header", + )); + } + + let mod_time = if (flags & 0b00000001u8 == 0b00000001u8) || len == 5 { + Some(reader.read_u32::()?) + } else { + None + }; + + let ac_time = if flags & 0b00000010u8 == 0b00000010u8 && len > 5 { + Some(reader.read_u32::()?) + } else { + None + }; + + let cr_time = if flags & 0b00000100u8 == 0b00000100u8 && len > 5 { + Some(reader.read_u32::()?) + } else { + None + }; + Ok(Self { + mod_time, + ac_time, + cr_time, + }) + } + + /// returns the last modification timestamp + pub fn mod_time(&self) -> Option<&u32> { + self.mod_time.as_ref() + } + + /// returns the last access timestamp + pub fn ac_time(&self) -> Option<&u32> { + self.ac_time.as_ref() + } + + /// returns the creation timestamp + pub fn cr_time(&self) -> Option<&u32> { + self.cr_time.as_ref() + } +} diff --git a/src/extra_fields/mod.rs b/src/extra_fields/mod.rs new file mode 100644 index 00000000..9ccfdd6c --- /dev/null +++ b/src/extra_fields/mod.rs @@ -0,0 +1,29 @@ +//! types for extra fields + +/// marker trait to denote the place where this extra field has been stored +pub trait ExtraFieldVersion {} + +/// use this to mark extra fields specified in a local header + +#[derive(Debug, Clone)] +pub struct LocalHeaderVersion; + +/// use this to mark extra fields specified in the central header + +#[derive(Debug, Clone)] +pub struct CentralHeaderVersion; + +impl ExtraFieldVersion for LocalHeaderVersion {} +impl ExtraFieldVersion for CentralHeaderVersion {} + +mod extended_timestamp; + +pub use extended_timestamp::*; + +/// contains one extra field +#[derive(Debug, Clone)] +pub enum ExtraField { + + /// extended timestamp, as described in + ExtendedTimestamp(ExtendedTimestamp) +} diff --git a/src/lib.rs b/src/lib.rs index e2228e5b..45aeca60 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,6 +42,8 @@ mod spec; mod types; pub mod write; mod zipcrypto; +pub mod extra_fields; +pub use extra_fields::ExtraField; /// Unstable APIs /// diff --git a/src/read.rs b/src/read.rs index b702b4f2..8dedd1d1 100644 --- a/src/read.rs +++ b/src/read.rs @@ -5,6 +5,7 @@ use crate::aes::{AesReader, AesReaderValid}; use crate::compression::CompressionMethod; use crate::cp437::FromCp437; use crate::crc32::Crc32Reader; +use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::result::{InvalidPassword, ZipError, ZipResult}; use crate::spec; use crate::types::{AesMode, AesVendorVersion, AtomicU64, DateTime, System, ZipFileData}; @@ -724,6 +725,7 @@ fn central_header_to_zip_file_inner( external_attributes: external_file_attributes, large_file: false, aes_mode: None, + extra_fields: Vec::new(), }; match parse_extra_field(&mut result) { @@ -803,6 +805,17 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { CompressionMethod::from_u16(compression_method) }; } + 0x5455 => { + // extended timestamp + // https://libzip.org/specifications/extrafld.txt + + file.extra_fields.push(ExtraField::ExtendedTimestamp( + ExtendedTimestamp::try_from_reader(&mut reader, len)?, + )); + + // the reader for ExtendedTimestamp consumes `len` bytes + len_left = 0; + } _ => { // Other fields are ignored } @@ -935,8 +948,7 @@ impl<'a> ZipFile<'a> { pub fn is_dir(&self) -> bool { self.name() .chars() - .rev() - .next() + .next_back() .map_or(false, |c| c == '/' || c == '\\') } @@ -973,6 +985,11 @@ impl<'a> ZipFile<'a> { pub fn central_header_start(&self) -> u64 { self.data.central_header_start } + + /// iterate through all extra fields + pub fn extra_data_fields(&self) -> impl Iterator { + self.data.extra_fields.iter() + } } impl<'a> Read for ZipFile<'a> { @@ -991,7 +1008,7 @@ impl<'a> Drop for ZipFile<'a> { // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped. let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader { ZipFileReader::NoReader => { - let innerreader = ::std::mem::replace(&mut self.crypto_reader, None); + let innerreader = self.crypto_reader.take(); innerreader.expect("Invalid reader state").into_inner() } reader => { @@ -1091,6 +1108,7 @@ pub fn read_zipfile_from_stream<'a, R: io::Read>( external_attributes: 0, large_file: false, aes_mode: None, + extra_fields: Vec::new(), }; match parse_extra_field(&mut result) { diff --git a/src/read/stream.rs b/src/read/stream.rs index 5a01b23f..dc967ee8 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -184,8 +184,7 @@ impl ZipStreamFileMetadata { pub fn is_dir(&self) -> bool { self.name() .chars() - .rev() - .next() + .next_back() .map_or(false, |c| c == '/' || c == '\\') } diff --git a/src/types.rs b/src/types.rs index c3d0a45d..e496015a 100644 --- a/src/types.rs +++ b/src/types.rs @@ -49,6 +49,7 @@ mod atomic { } } +use crate::extra_fields::ExtraField; #[cfg(feature = "time")] use crate::result::DateTimeRangeError; #[cfg(feature = "time")] @@ -350,6 +351,9 @@ pub struct ZipFileData { pub large_file: bool, /// AES mode if applicable pub aes_mode: Option<(AesMode, AesVendorVersion)>, + + /// extra fields, see + pub extra_fields: Vec, } impl ZipFileData { @@ -508,6 +512,7 @@ mod test { external_attributes: 0, large_file: false, aes_mode: None, + extra_fields: Vec::new(), }; assert_eq!( data.file_name_sanitized(), diff --git a/src/write.rs b/src/write.rs index 4cdc031b..4b1d2cbd 100644 --- a/src/write.rs +++ b/src/write.rs @@ -397,6 +397,7 @@ impl ZipWriter { external_attributes: permissions << 16, large_file: options.large_file, aes_mode: None, + extra_fields: Vec::new(), }; write_local_file_header(writer, &file)?; @@ -411,7 +412,7 @@ impl ZipWriter { } if let Some(keys) = options.encrypt_with { let mut zipwriter = crate::zipcrypto::ZipCryptoWriter { writer: core::mem::replace(&mut self.inner, GenericZipWriter::Closed).unwrap(), buffer: vec![], keys }; - let mut crypto_header = [0u8; 12]; + let crypto_header = [0u8; 12]; zipwriter.write_all(&crypto_header)?; self.inner = GenericZipWriter::Storer(MaybeEncrypted::Encrypted(zipwriter)); From 09331a935e592e0d3e5a15f9bfa814e667aae056 Mon Sep 17 00:00:00 2001 From: Jan Starke Date: Thu, 2 May 2024 09:24:05 +0200 Subject: [PATCH 10/28] add clippy exclusion --- src/read.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/read.rs b/src/read.rs index 8dedd1d1..3c5ba04a 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1017,6 +1017,7 @@ impl<'a> Drop for ZipFile<'a> { } }; + #[allow(clippy::unused_io_amount)] loop { match reader.read(&mut buffer) { Ok(0) => break, From ccaba9df74f89baae82ffa5a861284e3f54dc11f Mon Sep 17 00:00:00 2001 From: Jan Starke Date: Thu, 2 May 2024 09:34:20 +0200 Subject: [PATCH 11/28] add test case for extended timestamp --- tests/data/extended_timestamp.zip | Bin 0 -> 297 bytes tests/zip_extended_timestamp.rs | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 tests/data/extended_timestamp.zip create mode 100644 tests/zip_extended_timestamp.rs diff --git a/tests/data/extended_timestamp.zip b/tests/data/extended_timestamp.zip new file mode 100644 index 0000000000000000000000000000000000000000..aa93eb62afa3520fefa674de59f49693032803b1 GIT binary patch literal 297 zcmWIWW@Zs#fB;2?xMM~<>Oc+%a{zH}W^QUpWkG6UK|xMta$-qlex80=UW#6RVsU1% zUVcGpUP^v)X>Mv>iC#%+MM(hAFfOoJXT29ifEiGNgF%L&B()f*tfC||gp+|;(9t+e z9*9dTxEUB(UNAE-fQbNaMkYOG+zx`7xug-qf;kVQOO6?r%@Qz83|ks~foz03SRwAf Ta04qFNDC7XwgBmV5QhN(bWS#E literal 0 HcmV?d00001 diff --git a/tests/zip_extended_timestamp.rs b/tests/zip_extended_timestamp.rs new file mode 100644 index 00000000..9fecbacd --- /dev/null +++ b/tests/zip_extended_timestamp.rs @@ -0,0 +1,19 @@ +use std::io; +use zip::ZipArchive; + +#[test] +fn test_extended_timestamp() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/extended_timestamp.zip")); + let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file"); + + for field in archive.by_name("test.txt").unwrap().extra_data_fields() { + match field { + zip::ExtraField::ExtendedTimestamp(ts) => { + assert!(ts.ac_time().is_none()); + assert!(ts.cr_time().is_none()); + assert_eq!(*ts.mod_time().unwrap(), 1714635025); + }, + } + } +} From d80245352542f0a804826f4b3663cc0bc17d9f6a Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 09:35:46 -0700 Subject: [PATCH 12/28] style: rearrange imports to satisfy cargo fmt check --- src/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types.rs b/src/types.rs index 7ac8a971..0a0b8948 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,6 +1,6 @@ //! Types that specify what is contained in a ZIP. -use path::{Component, Path, PathBuf}; use num_enum::{FromPrimitive, IntoPrimitive}; +use path::{Component, Path, PathBuf}; use std::path; use std::sync::{Arc, OnceLock}; From a994667db62c166fd5f8db31f32472d560358a11 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 09:39:50 -0700 Subject: [PATCH 13/28] style: remove extra spaces before comment --- src/result.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/result.rs b/src/result.rs index 2e231144..391a6a82 100755 --- a/src/result.rs +++ b/src/result.rs @@ -1,4 +1,4 @@ -#![allow(unknown_lints)] // non_local_definitions isn't in Rust 1.70 +#![allow(unknown_lints)] // non_local_definitions isn't in Rust 1.70 #![allow(non_local_definitions)] //! Error types that can be emitted from this library From 9af296d080a93668f71009c49f4f18cd7e6c60de Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 10:55:41 -0700 Subject: [PATCH 14/28] style: cargo fmt --all, fix bzip2 error --- src/extra_fields/extended_timestamp.rs | 2 +- src/extra_fields/mod.rs | 3 +-- src/lib.rs | 2 +- src/read.rs | 8 ++++---- tests/zip_extended_timestamp.rs | 2 +- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/extra_fields/extended_timestamp.rs b/src/extra_fields/extended_timestamp.rs index 26f2943c..e5e3fb70 100644 --- a/src/extra_fields/extended_timestamp.rs +++ b/src/extra_fields/extended_timestamp.rs @@ -17,7 +17,7 @@ pub struct ExtendedTimestamp { impl ExtendedTimestamp { /// creates an extended timestamp struct by reading the required bytes from the reader. /// - /// This method assumes that the length has already been read, therefore + /// This method assumes that the length has already been read, therefore /// it must be passed as an argument pub fn try_from_reader(reader: &mut R, len: u16) -> ZipResult where diff --git a/src/extra_fields/mod.rs b/src/extra_fields/mod.rs index 9ccfdd6c..145cfade 100644 --- a/src/extra_fields/mod.rs +++ b/src/extra_fields/mod.rs @@ -23,7 +23,6 @@ pub use extended_timestamp::*; /// contains one extra field #[derive(Debug, Clone)] pub enum ExtraField { - /// extended timestamp, as described in - ExtendedTimestamp(ExtendedTimestamp) + ExtendedTimestamp(ExtendedTimestamp), } diff --git a/src/lib.rs b/src/lib.rs index b2ef9445..8ece3c20 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,13 +39,13 @@ mod aes_ctr; mod compression; mod cp437; mod crc32; +pub mod extra_fields; pub mod read; pub mod result; mod spec; mod types; pub mod write; mod zipcrypto; -pub mod extra_fields; pub use extra_fields::ExtraField; #[doc = "Unstable APIs\n\ diff --git a/src/read.rs b/src/read.rs index 3491b764..f76de446 100644 --- a/src/read.rs +++ b/src/read.rs @@ -30,7 +30,7 @@ use flate2::read::DeflateDecoder; use deflate64::Deflate64Decoder; #[cfg(feature = "bzip2")] -use bzip2::read::BzDecoder; +use bzip2_rs::decoder::DecoderReader; #[cfg(feature = "zstd")] use zstd::stream::read::Decoder as ZstdDecoder; @@ -146,7 +146,7 @@ pub(crate) enum ZipFileReader<'a> { #[cfg(feature = "deflate64")] Deflate64(Crc32Reader>>>), #[cfg(feature = "bzip2")] - Bzip2(Crc32Reader>>), + Bzip2(Crc32Reader>>), #[cfg(feature = "zstd")] Zstd(Crc32Reader>>>), #[cfg(feature = "lzma")] @@ -307,7 +307,7 @@ pub(crate) fn make_reader( } #[cfg(feature = "bzip2")] CompressionMethod::Bzip2 => { - let bzip2_reader = BzDecoder::new(reader); + let bzip2_reader = DecoderReader::new(reader); ZipFileReader::Bzip2(Crc32Reader::new(bzip2_reader, crc32, ae2_encrypted)) } #[cfg(feature = "zstd")] @@ -1102,7 +1102,7 @@ impl<'a> ZipFile<'a> { } /// iterate through all extra fields - pub fn extra_data_fields(&self) -> impl Iterator { + pub fn extra_data_fields(&self) -> impl Iterator { self.data.extra_fields.iter() } } diff --git a/tests/zip_extended_timestamp.rs b/tests/zip_extended_timestamp.rs index 9fecbacd..983e4fb5 100644 --- a/tests/zip_extended_timestamp.rs +++ b/tests/zip_extended_timestamp.rs @@ -13,7 +13,7 @@ fn test_extended_timestamp() { assert!(ts.ac_time().is_none()); assert!(ts.cr_time().is_none()); assert_eq!(*ts.mod_time().unwrap(), 1714635025); - }, + } } } } From 3d6621236698861d0382f2262fa62488484de077 Mon Sep 17 00:00:00 2001 From: Uli Schlachter Date: Thu, 2 May 2024 20:58:34 +0200 Subject: [PATCH 15/28] Remove unnecessary "mut" roundtrip() takes a &mut, but only uses this argument non-mutably. --- src/aes_ctr.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/aes_ctr.rs b/src/aes_ctr.rs index d9f04b64..85f3ab7d 100644 --- a/src/aes_ctr.rs +++ b/src/aes_ctr.rs @@ -154,7 +154,7 @@ mod tests { /// Checks whether `crypt_in_place` produces the correct plaintext after one use and yields the /// cipertext again after applying it again. - fn roundtrip(key: &[u8], ciphertext: &mut [u8], expected_plaintext: &[u8]) + fn roundtrip(key: &[u8], ciphertext: &[u8], expected_plaintext: &[u8]) where Aes: AesKind, Aes::Cipher: KeyInit + BlockEncrypt, @@ -182,7 +182,7 @@ mod tests { // `7z a -phelloworld -mem=AES256 -mx=0 aes256_40byte.zip 40byte_data.txt` #[test] fn crypt_aes_256_0_byte() { - let mut ciphertext = []; + let ciphertext = []; let expected_plaintext = &[]; let key = [ 0x0b, 0xec, 0x2e, 0xf2, 0x46, 0xf0, 0x7e, 0x35, 0x16, 0x54, 0xe0, 0x98, 0x10, 0xb3, @@ -190,36 +190,36 @@ mod tests { 0x5c, 0xd0, 0xc0, 0x54, ]; - roundtrip::(&key, &mut ciphertext, expected_plaintext); + roundtrip::(&key, &ciphertext, expected_plaintext); } #[test] fn crypt_aes_128_5_byte() { - let mut ciphertext = [0x98, 0xa9, 0x8c, 0x26, 0x0e]; + let ciphertext = [0x98, 0xa9, 0x8c, 0x26, 0x0e]; let expected_plaintext = b"asdf\n"; let key = [ 0xe0, 0x25, 0x7b, 0x57, 0x97, 0x6a, 0xa4, 0x23, 0xab, 0x94, 0xaa, 0x44, 0xfd, 0x47, 0x4f, 0xa5, ]; - roundtrip::(&key, &mut ciphertext, expected_plaintext); + roundtrip::(&key, &ciphertext, expected_plaintext); } #[test] fn crypt_aes_192_5_byte() { - let mut ciphertext = [0x36, 0x55, 0x5c, 0x61, 0x3c]; + let ciphertext = [0x36, 0x55, 0x5c, 0x61, 0x3c]; let expected_plaintext = b"asdf\n"; let key = [ 0xe4, 0x4a, 0x88, 0x52, 0x8f, 0xf7, 0x0b, 0x81, 0x7b, 0x75, 0xf1, 0x74, 0x21, 0x37, 0x8c, 0x90, 0xad, 0xbe, 0x4a, 0x65, 0xa8, 0x96, 0x0e, 0xcc, ]; - roundtrip::(&key, &mut ciphertext, expected_plaintext); + roundtrip::(&key, &ciphertext, expected_plaintext); } #[test] fn crypt_aes_256_5_byte() { - let mut ciphertext = [0xc2, 0x47, 0xc0, 0xdc, 0x56]; + let ciphertext = [0xc2, 0x47, 0xc0, 0xdc, 0x56]; let expected_plaintext = b"asdf\n"; let key = [ 0x79, 0x5e, 0x17, 0xf2, 0xc6, 0x3d, 0x28, 0x9b, 0x4b, 0x4b, 0xbb, 0xa9, 0xba, 0xc9, @@ -227,12 +227,12 @@ mod tests { 0x15, 0xb2, 0x86, 0xab, ]; - roundtrip::(&key, &mut ciphertext, expected_plaintext); + roundtrip::(&key, &ciphertext, expected_plaintext); } #[test] fn crypt_aes_128_40_byte() { - let mut ciphertext = [ + let ciphertext = [ 0xcf, 0x72, 0x6b, 0xa1, 0xb2, 0x0f, 0xdf, 0xaa, 0x10, 0xad, 0x9c, 0x7f, 0x6d, 0x1c, 0x8d, 0xb5, 0x16, 0x7e, 0xbb, 0x11, 0x69, 0x52, 0x8c, 0x89, 0x80, 0x32, 0xaa, 0x76, 0xa6, 0x18, 0x31, 0x98, 0xee, 0xdd, 0x22, 0x68, 0xb7, 0xe6, 0x77, 0xd2, @@ -243,12 +243,12 @@ mod tests { 0x81, 0xb6, ]; - roundtrip::(&key, &mut ciphertext, expected_plaintext); + roundtrip::(&key, &ciphertext, expected_plaintext); } #[test] fn crypt_aes_192_40_byte() { - let mut ciphertext = [ + let ciphertext = [ 0xa6, 0xfc, 0x52, 0x79, 0x2c, 0x6c, 0xfe, 0x68, 0xb1, 0xa8, 0xb3, 0x07, 0x52, 0x8b, 0x82, 0xa6, 0x87, 0x9c, 0x72, 0x42, 0x3a, 0xf8, 0xc6, 0xa9, 0xc9, 0xfb, 0x61, 0x19, 0x37, 0xb9, 0x56, 0x62, 0xf4, 0xfc, 0x5e, 0x7a, 0xdd, 0x55, 0x0a, 0x48, @@ -259,12 +259,12 @@ mod tests { 0xfe, 0xae, 0x1b, 0xba, 0x01, 0x97, 0x97, 0x79, 0xbb, 0xa6, ]; - roundtrip::(&key, &mut ciphertext, expected_plaintext); + roundtrip::(&key, &ciphertext, expected_plaintext); } #[test] fn crypt_aes_256_40_byte() { - let mut ciphertext = [ + let ciphertext = [ 0xa9, 0x99, 0xbd, 0xea, 0x82, 0x9b, 0x8f, 0x2f, 0xb7, 0x52, 0x2f, 0x6b, 0xd8, 0xf6, 0xab, 0x0e, 0x24, 0x51, 0x9e, 0x18, 0x0f, 0xc0, 0x8f, 0x54, 0x15, 0x80, 0xae, 0xbc, 0xa0, 0x5c, 0x8a, 0x11, 0x8d, 0x14, 0x7e, 0xc5, 0xb4, 0xae, 0xd3, 0x37, @@ -276,6 +276,6 @@ mod tests { 0xc2, 0x07, 0x36, 0xb6, ]; - roundtrip::(&key, &mut ciphertext, expected_plaintext); + roundtrip::(&key, &ciphertext, expected_plaintext); } } From da3bbc87d7cde34bff29ca191d9fa43662b76c0a Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 12:58:31 -0700 Subject: [PATCH 16/28] doc(examples): Fix a bug where type SimpleFileOptions must be specified --- examples/append.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/append.rs b/examples/append.rs index 8bb1e4d8..6c5e41d9 100644 --- a/examples/append.rs +++ b/examples/append.rs @@ -1,9 +1,11 @@ use std::{ fs::{File, OpenOptions}, + io::{Read, Write}, path::{Path, PathBuf}, str::FromStr, }; use zip; +use zip::write::SimpleFileOptions; fn gather_files<'a, T: Into<&'a Path>>(path: T, files: &mut Vec) { let path: &Path = path.into(); @@ -46,7 +48,7 @@ fn real_main() -> i32 { for file in files { append_zip - .start_file_from_path(file, Default::default()) + .start_file(file.to_string_lossy(), SimpleFileOptions::default()) .unwrap(); let mut f = File::open(file).unwrap(); From 08d30e976aeee41ef501e2d4ae3a05488db54027 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 13:31:38 -0700 Subject: [PATCH 17/28] style: Fix Clippy warnings: unused imports --- examples/append.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/append.rs b/examples/append.rs index 6c5e41d9..88a5147e 100644 --- a/examples/append.rs +++ b/examples/append.rs @@ -1,10 +1,8 @@ use std::{ fs::{File, OpenOptions}, - io::{Read, Write}, path::{Path, PathBuf}, str::FromStr, }; -use zip; use zip::write::SimpleFileOptions; fn gather_files<'a, T: Into<&'a Path>>(path: T, files: &mut Vec) { @@ -39,7 +37,7 @@ fn real_main() -> i32 { let existing_zip = OpenOptions::new() .read(true) .write(true) - .open(&archive) + .open(archive) .unwrap(); let mut append_zip = zip::ZipWriter::new_append(existing_zip).unwrap(); From 84ae5fc157a2ac17847ec5608f50b213f2eca1ea Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 12:41:47 -0700 Subject: [PATCH 18/28] refactor: Remove byteorder dependency (#83) --- Cargo.toml | 1 - src/aes_ctr.rs | 4 +- src/read.rs | 84 +++++++++++++++++++------------------- src/read/stream.rs | 5 +-- src/spec.rs | 92 +++++++++++++++++++++--------------------- src/unstable.rs | 49 +++++++++++++++++++++++ src/write.rs | 98 ++++++++++++++++++++++----------------------- tests/end_to_end.rs | 5 +-- 8 files changed, 193 insertions(+), 145 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d2d4ab72..0094b84e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,6 @@ time = { version = "0.3.36", default-features = false } [dependencies] aes = { version = "0.8.4", optional = true } -byteorder = "1.5.0" bzip2 = { version = "0.4.4", optional = true } chrono = { version = "0.4.38", optional = true } constant_time_eq = { version = "0.3.0", optional = true } diff --git a/src/aes_ctr.rs b/src/aes_ctr.rs index d9f04b64..9bc3748e 100644 --- a/src/aes_ctr.rs +++ b/src/aes_ctr.rs @@ -4,9 +4,9 @@ //! different byte order (little endian) than NIST (big endian). //! See [AesCtrZipKeyStream] for more information. +use crate::unstable::LittleEndianWriteExt; use aes::cipher::generic_array::GenericArray; use aes::cipher::{BlockEncrypt, KeyInit}; -use byteorder::WriteBytesExt; use std::{any, fmt}; /// Internal block size of an AES cipher. @@ -112,7 +112,7 @@ where // Note: AES block size is always 16 bytes, same as u128. self.buffer .as_mut() - .write_u128::(self.counter) + .write_u128_le(self.counter) .expect("did not expect u128 le conversion to fail"); self.cipher .encrypt_block(GenericArray::from_mut_slice(&mut self.buffer)); diff --git a/src/read.rs b/src/read.rs index 25ff7f4d..a58537ad 100644 --- a/src/read.rs +++ b/src/read.rs @@ -11,7 +11,6 @@ use crate::result::{ZipError, ZipResult}; use crate::spec; use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipFileData}; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; -use byteorder::{LittleEndian, ReadBytesExt}; use std::borrow::{Borrow, Cow}; use std::collections::HashMap; use std::io::{self, prelude::*}; @@ -87,6 +86,7 @@ pub(crate) mod zip_archive { use crate::read::lzma::LzmaDecoder; use crate::result::ZipError::InvalidPassword; use crate::spec::path_to_string; +use crate::unstable::LittleEndianReadExt; pub use zip_archive::ZipArchive; #[allow(clippy::large_enum_variant)] @@ -210,15 +210,15 @@ pub(crate) fn find_content<'a>( ) -> ZipResult> { // Parse local header reader.seek(io::SeekFrom::Start(data.header_start))?; - let signature = reader.read_u32::()?; + let signature = reader.read_u32_le()?; if signature != spec::LOCAL_FILE_HEADER_SIGNATURE { return Err(ZipError::InvalidArchive("Invalid local file header")); } let data_start = match data.data_start.get() { None => { reader.seek(io::SeekFrom::Current(22))?; - let file_name_length = reader.read_u16::()? as u64; - let extra_field_length = reader.read_u16::()? as u64; + let file_name_length = reader.read_u16_le()? as u64; + let extra_field_length = reader.read_u16_le()? as u64; let magic_and_header = 4 + 22 + 2 + 2; let data_start = data.header_start + magic_and_header + file_name_length + extra_field_length; @@ -762,7 +762,7 @@ pub(crate) fn central_header_to_zip_file( let central_header_start = reader.stream_position()?; // Parse central header - let signature = reader.read_u32::()?; + let signature = reader.read_u32_le()?; if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { Err(ZipError::InvalidArchive("Invalid Central Directory header")) } else { @@ -776,25 +776,25 @@ fn central_header_to_zip_file_inner( archive_offset: u64, central_header_start: u64, ) -> ZipResult { - let version_made_by = reader.read_u16::()?; - let _version_to_extract = reader.read_u16::()?; - let flags = reader.read_u16::()?; + let version_made_by = reader.read_u16_le()?; + let _version_to_extract = reader.read_u16_le()?; + let flags = reader.read_u16_le()?; let encrypted = flags & 1 == 1; let is_utf8 = flags & (1 << 11) != 0; let using_data_descriptor = flags & (1 << 3) != 0; - let compression_method = reader.read_u16::()?; - let last_mod_time = reader.read_u16::()?; - let last_mod_date = reader.read_u16::()?; - let crc32 = reader.read_u32::()?; - let compressed_size = reader.read_u32::()?; - let uncompressed_size = reader.read_u32::()?; - let file_name_length = reader.read_u16::()? as usize; - let extra_field_length = reader.read_u16::()? as usize; - let file_comment_length = reader.read_u16::()? as usize; - let _disk_number = reader.read_u16::()?; - let _internal_file_attributes = reader.read_u16::()?; - let external_file_attributes = reader.read_u32::()?; - let offset = reader.read_u32::()? as u64; + let compression_method = reader.read_u16_le()?; + let last_mod_time = reader.read_u16_le()?; + let last_mod_date = reader.read_u16_le()?; + let crc32 = reader.read_u32_le()?; + let compressed_size = reader.read_u32_le()?; + let uncompressed_size = reader.read_u32_le()?; + let file_name_length = reader.read_u16_le()? as usize; + let extra_field_length = reader.read_u16_le()? as usize; + let file_comment_length = reader.read_u16_le()? as usize; + let _disk_number = reader.read_u16_le()?; + let _internal_file_attributes = reader.read_u16_le()?; + let external_file_attributes = reader.read_u32_le()?; + let offset = reader.read_u32_le()? as u64; let mut file_name_raw = vec![0; file_name_length]; reader.read_exact(&mut file_name_raw)?; let mut extra_field = vec![0; extra_field_length]; @@ -868,24 +868,24 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { let mut reader = io::Cursor::new(extra_field.as_ref()); while (reader.position() as usize) < extra_field.len() { - let kind = reader.read_u16::()?; - let len = reader.read_u16::()?; + let kind = reader.read_u16_le()?; + let len = reader.read_u16_le()?; let mut len_left = len as i64; match kind { // Zip64 extended information extra field 0x0001 => { if file.uncompressed_size == spec::ZIP64_BYTES_THR { file.large_file = true; - file.uncompressed_size = reader.read_u64::()?; + file.uncompressed_size = reader.read_u64_le()?; len_left -= 8; } if file.compressed_size == spec::ZIP64_BYTES_THR { file.large_file = true; - file.compressed_size = reader.read_u64::()?; + file.compressed_size = reader.read_u64_le()?; len_left -= 8; } if file.header_start == spec::ZIP64_BYTES_THR { - file.header_start = reader.read_u64::()?; + file.header_start = reader.read_u64_le()?; len_left -= 8; } } @@ -896,10 +896,12 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { "AES extra data field has an unsupported length", )); } - let vendor_version = reader.read_u16::()?; - let vendor_id = reader.read_u16::()?; - let aes_mode = reader.read_u8()?; - let compression_method = reader.read_u16::()?; + let vendor_version = reader.read_u16_le()?; + let vendor_id = reader.read_u16_le()?; + let mut out = [0u8]; + reader.read_exact(&mut out)?; + let aes_mode = out[0]; + let compression_method = reader.read_u16_le()?; if vendor_id != 0x4541 { return Err(ZipError::InvalidArchive("Invalid AES vendor")); @@ -1163,7 +1165,7 @@ impl<'a> Drop for ZipFile<'a> { /// * `data_start`: set to 0 /// * `external_attributes`: `unix_mode()`: will return None pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult>> { - let signature = reader.read_u32::()?; + let signature = reader.read_u32_le()?; match signature { spec::LOCAL_FILE_HEADER_SIGNATURE => (), @@ -1171,20 +1173,20 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult return Err(ZipError::InvalidArchive("Invalid local file header")), } - let version_made_by = reader.read_u16::()?; - let flags = reader.read_u16::()?; + let version_made_by = reader.read_u16_le()?; + let flags = reader.read_u16_le()?; let encrypted = flags & 1 == 1; let is_utf8 = flags & (1 << 11) != 0; let using_data_descriptor = flags & (1 << 3) != 0; #[allow(deprecated)] - let compression_method = CompressionMethod::from_u16(reader.read_u16::()?); - let last_mod_time = reader.read_u16::()?; - let last_mod_date = reader.read_u16::()?; - let crc32 = reader.read_u32::()?; - let compressed_size = reader.read_u32::()?; - let uncompressed_size = reader.read_u32::()?; - let file_name_length = reader.read_u16::()? as usize; - let extra_field_length = reader.read_u16::()? as usize; + let compression_method = CompressionMethod::from_u16(reader.read_u16_le()?); + let last_mod_time = reader.read_u16_le()?; + let last_mod_date = reader.read_u16_le()?; + let crc32 = reader.read_u32_le()?; + let compressed_size = reader.read_u32_le()?; + let uncompressed_size = reader.read_u32_le()?; + let file_name_length = reader.read_u16_le()? as usize; + let extra_field_length = reader.read_u16_le()? as usize; let mut file_name_raw = vec![0; file_name_length]; reader.read_exact(&mut file_name_raw)?; diff --git a/src/read/stream.rs b/src/read/stream.rs index f99a0e5a..40cb9efc 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -1,3 +1,4 @@ +use crate::unstable::LittleEndianReadExt; use std::fs; use std::io::{self, Read}; use std::path::{Path, PathBuf}; @@ -7,8 +8,6 @@ use super::{ ZipFileData, ZipResult, }; -use byteorder::{LittleEndian, ReadBytesExt}; - /// Stream decoder for zip. #[derive(Debug)] pub struct ZipStreamReader(R); @@ -28,7 +27,7 @@ impl ZipStreamReader { let central_header_start = 0; // Parse central header - let signature = self.0.read_u32::()?; + let signature = self.0.read_u32_le()?; if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { Ok(None) } else { diff --git a/src/spec.rs b/src/spec.rs index b620c01e..8c5da7d8 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -1,5 +1,5 @@ use crate::result::{ZipError, ZipResult}; -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use crate::unstable::{LittleEndianReadExt, LittleEndianWriteExt}; use std::borrow::Cow; use std::io; use std::io::prelude::*; @@ -26,17 +26,17 @@ pub struct CentralDirectoryEnd { impl CentralDirectoryEnd { pub fn parse(reader: &mut T) -> ZipResult { - let magic = reader.read_u32::()?; + let magic = reader.read_u32_le()?; if magic != CENTRAL_DIRECTORY_END_SIGNATURE { return Err(ZipError::InvalidArchive("Invalid digital signature header")); } - let disk_number = reader.read_u16::()?; - let disk_with_central_directory = reader.read_u16::()?; - let number_of_files_on_this_disk = reader.read_u16::()?; - let number_of_files = reader.read_u16::()?; - let central_directory_size = reader.read_u32::()?; - let central_directory_offset = reader.read_u32::()?; - let zip_file_comment_length = reader.read_u16::()? as usize; + let disk_number = reader.read_u16_le()?; + let disk_with_central_directory = reader.read_u16_le()?; + let number_of_files_on_this_disk = reader.read_u16_le()?; + let number_of_files = reader.read_u16_le()?; + let central_directory_size = reader.read_u32_le()?; + let central_directory_offset = reader.read_u32_le()?; + let zip_file_comment_length = reader.read_u16_le()? as usize; let mut zip_file_comment = vec![0; zip_file_comment_length]; reader.read_exact(&mut zip_file_comment)?; @@ -65,7 +65,7 @@ impl CentralDirectoryEnd { let mut pos = file_length - HEADER_SIZE; while pos >= search_upper_bound { reader.seek(io::SeekFrom::Start(pos))?; - if reader.read_u32::()? == CENTRAL_DIRECTORY_END_SIGNATURE { + if reader.read_u32_le()? == CENTRAL_DIRECTORY_END_SIGNATURE { reader.seek(io::SeekFrom::Current( BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE as i64, ))?; @@ -85,14 +85,14 @@ impl CentralDirectoryEnd { } pub fn write(&self, writer: &mut T) -> ZipResult<()> { - writer.write_u32::(CENTRAL_DIRECTORY_END_SIGNATURE)?; - writer.write_u16::(self.disk_number)?; - writer.write_u16::(self.disk_with_central_directory)?; - writer.write_u16::(self.number_of_files_on_this_disk)?; - writer.write_u16::(self.number_of_files)?; - writer.write_u32::(self.central_directory_size)?; - writer.write_u32::(self.central_directory_offset)?; - writer.write_u16::(self.zip_file_comment.len() as u16)?; + writer.write_u32_le(CENTRAL_DIRECTORY_END_SIGNATURE)?; + writer.write_u16_le(self.disk_number)?; + writer.write_u16_le(self.disk_with_central_directory)?; + writer.write_u16_le(self.number_of_files_on_this_disk)?; + writer.write_u16_le(self.number_of_files)?; + writer.write_u32_le(self.central_directory_size)?; + writer.write_u32_le(self.central_directory_offset)?; + writer.write_u16_le(self.zip_file_comment.len() as u16)?; writer.write_all(&self.zip_file_comment)?; Ok(()) } @@ -106,15 +106,15 @@ pub struct Zip64CentralDirectoryEndLocator { impl Zip64CentralDirectoryEndLocator { pub fn parse(reader: &mut T) -> ZipResult { - let magic = reader.read_u32::()?; + let magic = reader.read_u32_le()?; if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE { return Err(ZipError::InvalidArchive( "Invalid zip64 locator digital signature header", )); } - let disk_with_central_directory = reader.read_u32::()?; - let end_of_central_directory_offset = reader.read_u64::()?; - let number_of_disks = reader.read_u32::()?; + let disk_with_central_directory = reader.read_u32_le()?; + let end_of_central_directory_offset = reader.read_u64_le()?; + let number_of_disks = reader.read_u32_le()?; Ok(Zip64CentralDirectoryEndLocator { disk_with_central_directory, @@ -124,10 +124,10 @@ impl Zip64CentralDirectoryEndLocator { } pub fn write(&self, writer: &mut T) -> ZipResult<()> { - writer.write_u32::(ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE)?; - writer.write_u32::(self.disk_with_central_directory)?; - writer.write_u64::(self.end_of_central_directory_offset)?; - writer.write_u32::(self.number_of_disks)?; + writer.write_u32_le(ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE)?; + writer.write_u32_le(self.disk_with_central_directory)?; + writer.write_u64_le(self.end_of_central_directory_offset)?; + writer.write_u32_le(self.number_of_disks)?; Ok(()) } } @@ -156,20 +156,20 @@ impl Zip64CentralDirectoryEnd { while pos >= nominal_offset { reader.seek(io::SeekFrom::Start(pos))?; - if reader.read_u32::()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE { + if reader.read_u32_le()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE { let archive_offset = pos - nominal_offset; - let _record_size = reader.read_u64::()?; + let _record_size = reader.read_u64_le()?; // We would use this value if we did anything with the "zip64 extensible data sector". - let version_made_by = reader.read_u16::()?; - let version_needed_to_extract = reader.read_u16::()?; - let disk_number = reader.read_u32::()?; - let disk_with_central_directory = reader.read_u32::()?; - let number_of_files_on_this_disk = reader.read_u64::()?; - let number_of_files = reader.read_u64::()?; - let central_directory_size = reader.read_u64::()?; - let central_directory_offset = reader.read_u64::()?; + let version_made_by = reader.read_u16_le()?; + let version_needed_to_extract = reader.read_u16_le()?; + let disk_number = reader.read_u32_le()?; + let disk_with_central_directory = reader.read_u32_le()?; + let number_of_files_on_this_disk = reader.read_u64_le()?; + let number_of_files = reader.read_u64_le()?; + let central_directory_size = reader.read_u64_le()?; + let central_directory_offset = reader.read_u64_le()?; results.push(( Zip64CentralDirectoryEnd { @@ -201,16 +201,16 @@ impl Zip64CentralDirectoryEnd { } pub fn write(&self, writer: &mut T) -> ZipResult<()> { - writer.write_u32::(ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE)?; - writer.write_u64::(44)?; // record size - writer.write_u16::(self.version_made_by)?; - writer.write_u16::(self.version_needed_to_extract)?; - writer.write_u32::(self.disk_number)?; - writer.write_u32::(self.disk_with_central_directory)?; - writer.write_u64::(self.number_of_files_on_this_disk)?; - writer.write_u64::(self.number_of_files)?; - writer.write_u64::(self.central_directory_size)?; - writer.write_u64::(self.central_directory_offset)?; + writer.write_u32_le(ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE)?; + writer.write_u64_le(44)?; // record size + writer.write_u16_le(self.version_made_by)?; + writer.write_u16_le(self.version_needed_to_extract)?; + writer.write_u32_le(self.disk_number)?; + writer.write_u32_le(self.disk_with_central_directory)?; + writer.write_u64_le(self.number_of_files_on_this_disk)?; + writer.write_u64_le(self.number_of_files)?; + writer.write_u64_le(self.central_directory_size)?; + writer.write_u64_le(self.central_directory_offset)?; Ok(()) } } diff --git a/src/unstable.rs b/src/unstable.rs index beb35a41..f6ea888a 100644 --- a/src/unstable.rs +++ b/src/unstable.rs @@ -1,3 +1,8 @@ +#![allow(missing_docs)] + +use std::io; +use std::io::{Read, Write}; + /// Provides high level API for reading from a stream. pub mod stream { pub use crate::read::stream::*; @@ -18,3 +23,47 @@ pub mod write { } } } + +/// Helper methods for writing unsigned integers in little-endian form. +pub trait LittleEndianWriteExt: Write { + fn write_u16_le(&mut self, input: u16) -> io::Result<()> { + self.write_all(&input.to_le_bytes()) + } + + fn write_u32_le(&mut self, input: u32) -> io::Result<()> { + self.write_all(&input.to_le_bytes()) + } + + fn write_u64_le(&mut self, input: u64) -> io::Result<()> { + self.write_all(&input.to_le_bytes()) + } + + fn write_u128_le(&mut self, input: u128) -> io::Result<()> { + self.write_all(&input.to_le_bytes()) + } +} + +impl LittleEndianWriteExt for W {} + +/// Helper methods for reading unsigned integers in little-endian form. +pub trait LittleEndianReadExt: Read { + fn read_u16_le(&mut self) -> io::Result { + let mut out = [0u8; 2]; + self.read_exact(&mut out)?; + Ok(u16::from_le_bytes(out)) + } + + fn read_u32_le(&mut self) -> io::Result { + let mut out = [0u8; 4]; + self.read_exact(&mut out)?; + Ok(u32::from_le_bytes(out)) + } + + fn read_u64_le(&mut self) -> io::Result { + let mut out = [0u8; 8]; + self.read_exact(&mut out)?; + Ok(u64::from_le_bytes(out)) + } +} + +impl LittleEndianReadExt for R {} diff --git a/src/write.rs b/src/write.rs index 7179487f..2606fe97 100644 --- a/src/write.rs +++ b/src/write.rs @@ -5,7 +5,6 @@ use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader}; use crate::result::{ZipError, ZipResult}; use crate::spec; use crate::types::{ffi, DateTime, System, ZipFileData, DEFAULT_VERSION}; -use byteorder::{LittleEndian, WriteBytesExt}; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64; use crc32fast::Hasher; @@ -126,6 +125,7 @@ use crate::result::ZipError::InvalidArchive; #[cfg(feature = "lzma")] use crate::result::ZipError::UnsupportedArchive; use crate::spec::path_to_string; +use crate::unstable::LittleEndianWriteExt; use crate::write::GenericZipWriter::{Closed, Storer}; use crate::zipcrypto::ZipCryptoKeys; use crate::CompressionMethod::Stored; @@ -378,8 +378,8 @@ impl FileOptions { } }; vec.reserve_exact(data.len() + 4); - vec.write_u16::(header_id)?; - vec.write_u16::(data.len() as u16)?; + vec.write_u16_le(header_id)?; + vec.write_u16_le(data.len() as u16)?; vec.write_all(data)?; Ok(()) } @@ -701,34 +701,34 @@ impl ZipWriter { let index = self.insert_file_data(file)?; let file = &mut self.files[index]; let writer = self.inner.get_plain(); - writer.write_u32::(spec::LOCAL_FILE_HEADER_SIGNATURE)?; + writer.write_u32_le(spec::LOCAL_FILE_HEADER_SIGNATURE)?; // version needed to extract - writer.write_u16::(file.version_needed())?; + writer.write_u16_le(file.version_needed())?; // general purpose bit flag let flag = if !file.file_name.is_ascii() { 1u16 << 11 } else { 0 } | if file.encrypted { 1u16 << 0 } else { 0 }; - writer.write_u16::(flag)?; + writer.write_u16_le(flag)?; // Compression method #[allow(deprecated)] - writer.write_u16::(file.compression_method.to_u16())?; + writer.write_u16_le(file.compression_method.to_u16())?; // last mod file time and last mod file date - writer.write_u16::(file.last_modified_time.timepart())?; - writer.write_u16::(file.last_modified_time.datepart())?; + writer.write_u16_le(file.last_modified_time.timepart())?; + writer.write_u16_le(file.last_modified_time.datepart())?; // crc-32 - writer.write_u32::(file.crc32)?; + writer.write_u32_le(file.crc32)?; // compressed size and uncompressed size if file.large_file { - writer.write_u32::(spec::ZIP64_BYTES_THR as u32)?; - writer.write_u32::(spec::ZIP64_BYTES_THR as u32)?; + writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?; + writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?; } else { - writer.write_u32::(file.compressed_size as u32)?; - writer.write_u32::(file.uncompressed_size as u32)?; + writer.write_u32_le(file.compressed_size as u32)?; + writer.write_u32_le(file.uncompressed_size as u32)?; } // file name length - writer.write_u16::(file.file_name.as_bytes().len() as u16)?; + writer.write_u16_le(file.file_name.as_bytes().len() as u16)?; // extra field length let mut extra_field_length = file.extra_field_len(); if file.large_file { @@ -739,7 +739,7 @@ impl ZipWriter { return Err(InvalidArchive("Extra data field is too large")); } let extra_field_length = extra_field_length as u16; - writer.write_u16::(extra_field_length)?; + writer.write_u16_le(extra_field_length)?; // file name writer.write_all(file.file_name.as_bytes())?; // zip64 extra field @@ -768,7 +768,7 @@ impl ZipWriter { let pad_body = vec![0; pad_length - 4]; writer.write_all(b"za").map_err(ZipError::from)?; // 0x617a writer - .write_u16::(pad_body.len() as u16) + .write_u16_le(pad_body.len() as u16) .map_err(ZipError::from)?; writer.write_all(&pad_body).map_err(ZipError::from)?; } else { @@ -781,7 +781,7 @@ impl ZipWriter { // Update extra field length in local file header. writer.seek(SeekFrom::Start(file.header_start + 28))?; - writer.write_u16::(new_extra_field_length)?; + writer.write_u16_le(new_extra_field_length)?; writer.seek(SeekFrom::Start(header_end))?; debug_assert_eq!(header_end % align, 0); } @@ -1522,7 +1522,7 @@ fn clamp_opt>( fn update_local_file_header(writer: &mut T, file: &ZipFileData) -> ZipResult<()> { const CRC32_OFFSET: u64 = 14; writer.seek(SeekFrom::Start(file.header_start + CRC32_OFFSET))?; - writer.write_u32::(file.crc32)?; + writer.write_u32_le(file.crc32)?; if file.large_file { update_local_zip64_extra_field(writer, file)?; } else { @@ -1533,9 +1533,9 @@ fn update_local_file_header(writer: &mut T, file: &ZipFileData) "Large file option has not been set", ))); } - writer.write_u32::(file.compressed_size as u32)?; + writer.write_u32_le(file.compressed_size as u32)?; // uncompressed size is already checked on write to catch it as soon as possible - writer.write_u32::(file.uncompressed_size as u32)?; + writer.write_u32_le(file.uncompressed_size as u32)?; } Ok(()) } @@ -1547,49 +1547,49 @@ fn write_central_directory_header(writer: &mut T, file: &ZipFileData) write_central_zip64_extra_field(&mut zip64_extra_field.as_mut(), file)?; // central file header signature - writer.write_u32::(spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)?; + writer.write_u32_le(spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)?; // version made by let version_made_by = (file.system as u16) << 8 | (file.version_made_by as u16); - writer.write_u16::(version_made_by)?; + writer.write_u16_le(version_made_by)?; // version needed to extract - writer.write_u16::(file.version_needed())?; + writer.write_u16_le(file.version_needed())?; // general purpose bit flag let flag = if !file.file_name.is_ascii() { 1u16 << 11 } else { 0 } | if file.encrypted { 1u16 << 0 } else { 0 }; - writer.write_u16::(flag)?; + writer.write_u16_le(flag)?; // compression method #[allow(deprecated)] - writer.write_u16::(file.compression_method.to_u16())?; + writer.write_u16_le(file.compression_method.to_u16())?; // last mod file time + date - writer.write_u16::(file.last_modified_time.timepart())?; - writer.write_u16::(file.last_modified_time.datepart())?; + writer.write_u16_le(file.last_modified_time.timepart())?; + writer.write_u16_le(file.last_modified_time.datepart())?; // crc-32 - writer.write_u32::(file.crc32)?; + writer.write_u32_le(file.crc32)?; // compressed size - writer.write_u32::(file.compressed_size.min(spec::ZIP64_BYTES_THR) as u32)?; + writer.write_u32_le(file.compressed_size.min(spec::ZIP64_BYTES_THR) as u32)?; // uncompressed size - writer.write_u32::(file.uncompressed_size.min(spec::ZIP64_BYTES_THR) as u32)?; + writer.write_u32_le(file.uncompressed_size.min(spec::ZIP64_BYTES_THR) as u32)?; // file name length - writer.write_u16::(file.file_name.as_bytes().len() as u16)?; + writer.write_u16_le(file.file_name.as_bytes().len() as u16)?; // extra field length - writer.write_u16::( + writer.write_u16_le( zip64_extra_field_length + file.extra_field_len() as u16 + file.central_extra_field_len() as u16, )?; // file comment length - writer.write_u16::(0)?; + writer.write_u16_le(0)?; // disk number start - writer.write_u16::(0)?; + writer.write_u16_le(0)?; // internal file attributes - writer.write_u16::(0)?; + writer.write_u16_le(0)?; // external file attributes - writer.write_u32::(file.external_attributes)?; + writer.write_u32_le(file.external_attributes)?; // relative offset of local header - writer.write_u32::(file.header_start.min(spec::ZIP64_BYTES_THR) as u32)?; + writer.write_u32_le(file.header_start.min(spec::ZIP64_BYTES_THR) as u32)?; // file name writer.write_all(file.file_name.as_bytes())?; // zip64 extra field @@ -1643,10 +1643,10 @@ fn validate_extra_data(header_id: u16, data: &[u8]) -> ZipResult<()> { fn write_local_zip64_extra_field(writer: &mut T, file: &ZipFileData) -> ZipResult<()> { // This entry in the Local header MUST include BOTH original // and compressed file size fields. - writer.write_u16::(0x0001)?; - writer.write_u16::(16)?; - writer.write_u64::(file.uncompressed_size)?; - writer.write_u64::(file.compressed_size)?; + writer.write_u16_le(0x0001)?; + writer.write_u16_le(16)?; + writer.write_u64_le(file.uncompressed_size)?; + writer.write_u64_le(file.compressed_size)?; // Excluded fields: // u32: disk start number Ok(()) @@ -1658,8 +1658,8 @@ fn update_local_zip64_extra_field( ) -> ZipResult<()> { let zip64_extra_field = file.header_start + 30 + file.file_name.as_bytes().len() as u64; writer.seek(SeekFrom::Start(zip64_extra_field + 4))?; - writer.write_u64::(file.uncompressed_size)?; - writer.write_u64::(file.compressed_size)?; + writer.write_u64_le(file.uncompressed_size)?; + writer.write_u64_le(file.compressed_size)?; // Excluded fields: // u32: disk start number Ok(()) @@ -1684,18 +1684,18 @@ fn write_central_zip64_extra_field(writer: &mut T, file: &ZipFileData) size += 8; } if size > 0 { - writer.write_u16::(0x0001)?; - writer.write_u16::(size)?; + writer.write_u16_le(0x0001)?; + writer.write_u16_le(size)?; size += 4; if uncompressed_size { - writer.write_u64::(file.uncompressed_size)?; + writer.write_u64_le(file.uncompressed_size)?; } if compressed_size { - writer.write_u64::(file.compressed_size)?; + writer.write_u64_le(file.compressed_size)?; } if header_start { - writer.write_u64::(file.header_start)?; + writer.write_u64_le(file.header_start)?; } // Excluded fields: // u32: disk start number diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs index faad769c..ee342382 100644 --- a/tests/end_to_end.rs +++ b/tests/end_to_end.rs @@ -1,4 +1,3 @@ -use byteorder::{LittleEndian, WriteBytesExt}; use std::collections::HashSet; use std::io::prelude::*; use std::io::Cursor; @@ -159,8 +158,8 @@ fn check_test_archive(zip_file: R) -> ZipResult(0xbeef)?; - extra_data.write_u16::(EXTRA_DATA.len() as u16)?; + extra_data.write_u16(0xbeef)?; + extra_data.write_u16(EXTRA_DATA.len() as u16)?; extra_data.write_all(EXTRA_DATA)?; assert_eq!( file_with_extra_data.extra_data(), From b520c7f5171bd4c9c4a05129ffa38188c83cc268 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 13:24:50 -0700 Subject: [PATCH 19/28] test: Fix end-to-end test --- tests/end_to_end.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs index ee342382..fc2059c1 100644 --- a/tests/end_to_end.rs +++ b/tests/end_to_end.rs @@ -2,6 +2,7 @@ use std::collections::HashSet; use std::io::prelude::*; use std::io::Cursor; use zip::result::ZipResult; +use zip::unstable::LittleEndianWriteExt; use zip::write::ExtendedFileOptions; use zip::write::FileOptions; use zip::write::SimpleFileOptions; @@ -158,8 +159,8 @@ fn check_test_archive(zip_file: R) -> ZipResult Date: Thu, 2 May 2024 17:55:13 -0700 Subject: [PATCH 20/28] chore: Update due to merge of #82 --- src/extra_fields/extended_timestamp.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/extra_fields/extended_timestamp.rs b/src/extra_fields/extended_timestamp.rs index e5e3fb70..f173e944 100644 --- a/src/extra_fields/extended_timestamp.rs +++ b/src/extra_fields/extended_timestamp.rs @@ -1,9 +1,6 @@ -use std::io::Read; - -use byteorder::LittleEndian; -use byteorder::ReadBytesExt; - use crate::result::{ZipError, ZipResult}; +use crate::unstable::LittleEndianReadExt; +use std::io::Read; /// extended timestamp, as described in @@ -23,7 +20,9 @@ impl ExtendedTimestamp { where R: Read, { - let flags = reader.read_u8()?; + let mut flags = [0u8]; + reader.read_exact(&mut flags)?; + let flags = flags[0]; // the `flags` field refers to the local headers and might not correspond // to the len field. If the length field is 1+4, we assume that only @@ -48,19 +47,19 @@ impl ExtendedTimestamp { } let mod_time = if (flags & 0b00000001u8 == 0b00000001u8) || len == 5 { - Some(reader.read_u32::()?) + Some(reader.read_u32_le()?) } else { None }; let ac_time = if flags & 0b00000010u8 == 0b00000010u8 && len > 5 { - Some(reader.read_u32::()?) + Some(reader.read_u32_le()?) } else { None }; let cr_time = if flags & 0b00000100u8 == 0b00000100u8 && len > 5 { - Some(reader.read_u32::()?) + Some(reader.read_u32_le()?) } else { None }; From 5e7939002b72862a0947cbe00ce12ab8edad6b6f Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 19:42:49 -0700 Subject: [PATCH 21/28] ci: run checks on release branches, since they don't run on release PRs --- .github/workflows/ci.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 674a3b37..e50a61df 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -6,7 +6,6 @@ on: - 'master' push: branches-ignore: - - 'release-plz-**' - 'gh-readonly-queue/**' workflow_dispatch: merge_group: From b6caa1c377adc598c99dc148d706dff11b1c11a4 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 20:12:43 -0700 Subject: [PATCH 22/28] ci: Run unit tests on multiple feature sets --- .github/workflows/ci.yaml | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e50a61df..b441c409 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -21,6 +21,7 @@ jobs: matrix: os: [ubuntu-latest, macOS-latest, windows-latest] rustalias: [stable, nightly, msrv] + feature_flag: ["--all-features", "--no-default-features", ""] include: - rustalias: stable rust: stable @@ -28,7 +29,7 @@ jobs: rust: '1.70' - rustalias: nightly rust: nightly - name: 'Build and test: ${{ matrix.os }}, ${{ matrix.rustalias }}' + name: 'Build and test ${{ matrix.feature_flag }}: ${{ matrix.os }}, ${{ matrix.rustalias }}' runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@master @@ -43,19 +44,13 @@ jobs: uses: actions-rs/cargo@v1 with: command: check - args: --all --bins --examples + args: --all ${{ matrix.feature_flag }} --bins --examples - name: Tests uses: actions-rs/cargo@v1 with: command: test - args: --all - - - name: Tests (no features) - uses: actions-rs/cargo@v1 - with: - command: test - args: --all --no-default-features + args: --all ${{ matrix.feature_flag }} style_and_docs: if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name From 6f2887831d4cae7ec77e38ccf4b6212399ae9d69 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 May 2024 10:24:10 +0000 Subject: [PATCH 23/28] chore(deps): update num_enum requirement from 0.6.1 to 0.7.2 Updates the requirements on [num_enum](https://github.com/illicitonion/num_enum) to permit the latest version. - [Commits](https://github.com/illicitonion/num_enum/compare/0.6.1...0.7.2) --- updated-dependencies: - dependency-name: num_enum dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 172cd129..2d7b41e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ crc32fast = "1.4.0" displaydoc = { version = "0.2.4", default-features = false } flate2 = { version = "1.0.28", default-features = false, optional = true } hmac = { version = "0.12.1", optional = true, features = ["reset"] } -num_enum = "0.6.1" +num_enum = "0.7.2" pbkdf2 = { version = "0.12.2", optional = true } sha1 = { version = "0.10.6", optional = true } thiserror = "1.0.48" From d1695053f551e1ec4d8691b3416fdd989ac32658 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 11 Jul 2023 10:17:00 -0400 Subject: [PATCH 24/28] use indexmap --- Cargo.toml | 1 + src/read.rs | 69 ++++++++++++++++++++++------------------------------ src/write.rs | 38 +++++++++++++---------------- 3 files changed, 47 insertions(+), 61 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dd944f5a..7ddafc08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ constant_time_eq = { version = "0.3.0", optional = true } crc32fast = "1.4.0" displaydoc = { version = "0.2.4", default-features = false } flate2 = { version = "1.0.28", default-features = false, optional = true } +indexmap = "2" hmac = { version = "0.12.1", optional = true, features = ["reset"] } num_enum = "0.6.1" pbkdf2 = { version = "0.12.2", optional = true } diff --git a/src/read.rs b/src/read.rs index b352d57c..d2e8e114 100644 --- a/src/read.rs +++ b/src/read.rs @@ -11,8 +11,8 @@ use crate::result::{ZipError, ZipResult}; use crate::spec; use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipFileData}; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; -use std::borrow::{Borrow, Cow}; -use std::collections::HashMap; +use indexmap::IndexMap; +use std::borrow::Cow; use std::io::{self, prelude::*}; use std::ops::Deref; use std::path::{Path, PathBuf}; @@ -47,8 +47,7 @@ pub(crate) mod zip_archive { /// Extract immutable data from `ZipArchive` to make it cheap to clone #[derive(Debug)] pub(crate) struct Shared { - pub(crate) files: Box<[super::ZipFileData]>, - pub(crate) names_map: super::HashMap, usize>, + pub(crate) files: super::IndexMap, super::ZipFileData>, pub(super) offset: u64, pub(super) dir_start: u64, } @@ -333,7 +332,7 @@ pub(crate) struct CentralDirectoryInfo { impl ZipArchive { pub(crate) fn from_finalized_writer( - files: Vec, + files: IndexMap, ZipFileData>, comment: Vec, reader: R, central_start: u64, @@ -344,15 +343,9 @@ impl ZipArchive { )); } /* This is where the whole file starts. */ - let initial_offset = files.first().unwrap().header_start; - let names_map: HashMap, usize> = files - .iter() - .enumerate() - .map(|(i, d)| (d.file_name.clone(), i)) - .collect(); + let initial_offset = files.first().unwrap().1.header_start; let shared = Arc::new(zip_archive::Shared { - files: files.into_boxed_slice(), - names_map, + files, offset: initial_offset, dir_start: central_start, }); @@ -368,10 +361,10 @@ impl ZipArchive { pub(crate) fn merge_contents( &mut self, mut w: W, - ) -> ZipResult> { + ) -> ZipResult, ZipFileData>> { let mut new_files = self.shared.files.clone(); if new_files.is_empty() { - return Ok(vec![]); + return Ok(IndexMap::new()); } /* The first file header will probably start at the beginning of the file, but zip doesn't * enforce that, and executable zips like PEX files will have a shebang line so will @@ -382,7 +375,7 @@ impl ZipArchive { let new_initial_header_start = w.stream_position()?; /* Push back file header starts for all entries in the covered files. */ - new_files.iter_mut().try_for_each(|f| { + new_files.values_mut().try_for_each(|f| { /* This is probably the only really important thing to change. */ f.header_start = f.header_start.checked_add(new_initial_header_start).ok_or( ZipError::InvalidArchive("new header start from merge would have been too large"), @@ -423,7 +416,7 @@ impl ZipArchive { io::copy(&mut limited_raw, &mut w)?; /* Return the files we've just written to the data stream. */ - Ok(new_files.into_vec()) + Ok(new_files) } fn get_directory_info_zip32( @@ -582,20 +575,17 @@ impl ZipArchive { } else { dir_info.number_of_files }; - let mut files = Vec::with_capacity(file_capacity); - let mut names_map = HashMap::with_capacity(file_capacity); + let mut files = IndexMap::with_capacity(file_capacity); reader.seek(io::SeekFrom::Start(dir_info.directory_start))?; for _ in 0..dir_info.number_of_files { let file = central_header_to_zip_file(reader, dir_info.archive_offset)?; - names_map.insert(file.file_name.clone(), files.len()); - files.push(file); + files.insert(file.file_name.clone(), file); } if dir_info.disk_number != dir_info.disk_with_central_directory { unsupported_zip_error("Support for multi-disk files is not implemented") } else { Ok(Shared { - files: files.into(), - names_map, + files, offset: dir_info.archive_offset, dir_start: dir_info.directory_start, }) @@ -699,7 +689,7 @@ impl ZipArchive { /// Returns an iterator over all the file and directory names in this archive. pub fn file_names(&self) -> impl Iterator { - self.shared.names_map.keys().map(Box::borrow) + self.shared.files.keys().map(|s| s.as_ref()) } /// Search for a file entry by name, decrypt with given password @@ -727,7 +717,7 @@ impl ZipArchive { /// Get the index of a file entry by name, if it's present. #[inline(always)] pub fn index_for_name(&self, name: &str) -> Option { - self.shared.names_map.get(name).copied() + self.shared.files.get_index_of(&*name) } /// Get the index of a file entry by path, if it's present. @@ -741,8 +731,8 @@ impl ZipArchive { pub fn name_for_index(&self, index: usize) -> Option<&str> { self.shared .files - .get(index) - .map(|file_data| &*file_data.file_name) + .get_index(index) + .map(|(name, _)| name.as_ref()) } fn by_name_with_optional_password<'a>( @@ -750,7 +740,7 @@ impl ZipArchive { name: &str, password: Option<&[u8]>, ) -> ZipResult> { - let Some(index) = self.index_for_name(name) else { + let Some(index) = self.shared.files.get_index_of(name) else { return Err(ZipError::FileNotFound); }; self.by_index_with_optional_password(index, password) @@ -785,17 +775,16 @@ impl ZipArchive { /// Get a contained file by index without decompressing it pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult> { let reader = &mut self.reader; - self.shared + let (_, data) = self + .shared .files - .get(file_number) - .ok_or(ZipError::FileNotFound) - .and_then(move |data| { - Ok(ZipFile { - crypto_reader: None, - reader: ZipFileReader::Raw(find_content(data, reader)?), - data: Cow::Borrowed(data), - }) - }) + .get_index(file_number) + .ok_or(ZipError::FileNotFound)?; + Ok(ZipFile { + crypto_reader: None, + reader: ZipFileReader::Raw(find_content(data, reader)?), + data: Cow::Borrowed(data), + }) } fn by_index_with_optional_password( @@ -803,10 +792,10 @@ impl ZipArchive { file_number: usize, mut password: Option<&[u8]>, ) -> ZipResult> { - let data = self + let (_, data) = self .shared .files - .get(file_number) + .get_index(file_number) .ok_or(ZipError::FileNotFound)?; match (password, data.encrypted) { diff --git a/src/write.rs b/src/write.rs index 0ae282e3..72907a11 100644 --- a/src/write.rs +++ b/src/write.rs @@ -8,7 +8,7 @@ use crate::types::{ffi, DateTime, System, ZipFileData, DEFAULT_VERSION}; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64; use crc32fast::Hasher; -use std::collections::HashMap; +use indexmap::IndexMap; use std::default::Default; use std::io; use std::io::prelude::*; @@ -110,8 +110,7 @@ pub(crate) mod zip_writer { /// ``` pub struct ZipWriter { pub(super) inner: GenericZipWriter, - pub(super) files: Vec, - pub(super) files_by_name: HashMap, usize>, + pub(super) files: IndexMap, ZipFileData>, pub(super) stats: ZipWriterStats, pub(super) writing_to_file: bool, pub(super) writing_raw: bool, @@ -435,7 +434,7 @@ impl Write for ZipWriter { if let Ok(count) = write_result { self.stats.update(&buf[0..count]); if self.stats.bytes_written > spec::ZIP64_BYTES_THR - && !self.files.last_mut().unwrap().large_file + && !self.files.last_mut().unwrap().1.large_file { self.abort_file().unwrap(); return Err(io::Error::new( @@ -479,8 +478,7 @@ impl ZipWriter { Ok(ZipWriter { inner: Storer(MaybeEncrypted::Unencrypted(readwriter)), - files: metadata.files.into(), - files_by_name: metadata.names_map, + files: metadata.files, stats: Default::default(), writing_to_file: false, comment: footer.zip_file_comment, @@ -641,8 +639,7 @@ impl ZipWriter { pub fn new(inner: W) -> ZipWriter { ZipWriter { inner: Storer(MaybeEncrypted::Unencrypted(inner)), - files: Vec::new(), - files_by_name: HashMap::new(), + files: IndexMap::new(), stats: Default::default(), writing_to_file: false, writing_raw: false, @@ -842,15 +839,12 @@ impl ZipWriter { } fn insert_file_data(&mut self, file: ZipFileData) -> ZipResult { - let name = &file.file_name; - if self.files_by_name.contains_key(name) { + if self.files.contains_key(&file.file_name) { return Err(InvalidArchive("Duplicate filename")); } - let name = name.to_owned(); - self.files.push(file); - let index = self.files.len() - 1; - self.files_by_name.insert(name, index); - Ok(index) + let name = file.file_name.to_owned(); + self.files.insert(name.clone(), file); + Ok(self.files.get_index_of(&name).unwrap()) } fn finish_file(&mut self) -> ZipResult<()> { @@ -871,7 +865,7 @@ impl ZipWriter { if !self.writing_raw { let file = match self.files.last_mut() { None => return Ok(()), - Some(f) => f, + Some((_, f)) => f, }; file.crc32 = self.stats.hasher.clone().finalize(); file.uncompressed_size = self.stats.bytes_written; @@ -911,8 +905,7 @@ impl ZipWriter { /// Removes the file currently being written from the archive if there is one, or else removes /// the file most recently written. pub fn abort_file(&mut self) -> ZipResult<()> { - let last_file = self.files.pop().ok_or(ZipError::FileNotFound)?; - self.files_by_name.remove(&last_file.file_name); + let (_, last_file) = self.files.pop().ok_or(ZipError::FileNotFound)?; let make_plain_writer = self.inner.prepare_next_writer( Stored, None, @@ -925,7 +918,7 @@ impl ZipWriter { // overwrite a valid file and corrupt the archive let rewind_safe: bool = match last_file.data_start.get() { None => self.files.is_empty(), - Some(last_file_start) => self.files.iter().all(|file| { + Some(last_file_start) => self.files.values().all(|file| { file.data_start .get() .is_some_and(|start| start < last_file_start) @@ -1281,7 +1274,7 @@ impl ZipWriter { let writer = self.inner.get_plain(); let central_start = writer.stream_position()?; - for file in self.files.iter() { + for file in self.files.values() { write_central_directory_header(writer, file)?; } let central_size = writer.stream_position()? - central_start; @@ -1327,7 +1320,10 @@ impl ZipWriter { } fn index_by_name(&self, name: &str) -> ZipResult { - Ok(*self.files_by_name.get(name).ok_or(ZipError::FileNotFound)?) + Ok(self + .files + .get_index_of(name) + .ok_or(ZipError::FileNotFound)?) } /// Adds another entry to the central directory referring to the same content as an existing From d98772e6339606ab2adebcff15f392b992001f24 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 12:11:00 -0700 Subject: [PATCH 25/28] style: Fix a Clippy warning in read.rs --- src/read.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/read.rs b/src/read.rs index d2e8e114..d05f61b1 100644 --- a/src/read.rs +++ b/src/read.rs @@ -717,7 +717,7 @@ impl ZipArchive { /// Get the index of a file entry by name, if it's present. #[inline(always)] pub fn index_for_name(&self, name: &str) -> Option { - self.shared.files.get_index_of(&*name) + self.shared.files.get_index_of(name) } /// Get the index of a file entry by path, if it's present. From 875ee30f9128c89b665a8d8de600ef74e708136d Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 12:12:01 -0700 Subject: [PATCH 26/28] style: Fix a Clippy warning in write.rs --- src/write.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/write.rs b/src/write.rs index 72907a11..48699f0f 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1320,10 +1320,10 @@ impl ZipWriter { } fn index_by_name(&self, name: &str) -> ZipResult { - Ok(self + self .files .get_index_of(name) - .ok_or(ZipError::FileNotFound)?) + .ok_or(ZipError::FileNotFound) } /// Adds another entry to the central directory referring to the same content as an existing From 20bfcd960cce344fa1deb0f401c3db2775b7a403 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 13:55:32 -0700 Subject: [PATCH 27/28] style: fix a cargo fmt check --- src/write.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/write.rs b/src/write.rs index 48699f0f..0760365b 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1320,10 +1320,7 @@ impl ZipWriter { } fn index_by_name(&self, name: &str) -> ZipResult { - self - .files - .get_index_of(name) - .ok_or(ZipError::FileNotFound) + self.files.get_index_of(name).ok_or(ZipError::FileNotFound) } /// Adds another entry to the central directory referring to the same content as an existing From a86a72fdc611a52a8caca8b07ad8e4dc1d679654 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 2 May 2024 19:20:43 -0700 Subject: [PATCH 28/28] chore: Fix conflicts with other recently-merged PRs --- src/read.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/read.rs b/src/read.rs index d05f61b1..10b9eaaf 100644 --- a/src/read.rs +++ b/src/read.rs @@ -343,7 +343,8 @@ impl ZipArchive { )); } /* This is where the whole file starts. */ - let initial_offset = files.first().unwrap().1.header_start; + let (_, first_header) = files.first().unwrap(); + let initial_offset = first_header.header_start; let shared = Arc::new(zip_archive::Shared { files, offset: initial_offset,