From 46c42c7f820ccd0b29fca8f2b52f0a1904ac8b4d Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 02:04:07 -0400 Subject: [PATCH] review comments 1 --- benches/read_metadata.rs | 20 +++++++------- src/read.rs | 58 ++++++++++++++++++++++++---------------- 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index f42793b5..f54f7c2d 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -41,7 +41,7 @@ fn read_metadata(bench: &mut Bencher) { const COMMENT_SIZE: usize = 50_000; -fn generate_random_zip32_archive_with_comment(comment_length: usize) -> ZipResult> { +fn generate_zip32_archive_with_random_comment(comment_length: usize) -> ZipResult> { let data = Vec::new(); let mut writer = ZipWriter::new(Cursor::new(data)); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); @@ -56,19 +56,19 @@ fn generate_random_zip32_archive_with_comment(comment_length: usize) -> ZipResul Ok(writer.finish()?.into_inner()) } -fn parse_comment(bench: &mut Bencher) { - let bytes = generate_random_zip32_archive_with_comment(COMMENT_SIZE).unwrap(); +fn parse_archive_with_comment(bench: &mut Bencher) { + let bytes = generate_zip32_archive_with_random_comment(COMMENT_SIZE).unwrap(); bench.iter(|| { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); - archive.len() + archive.comment().len() }); bench.bytes = bytes.len() as u64; } const COMMENT_SIZE_64: usize = 500_000; -fn generate_random_zip64_archive_with_comment(comment_length: usize) -> ZipResult> { +fn generate_zip64_archive_with_random_comment(comment_length: usize) -> ZipResult> { let data = Vec::new(); let mut writer = ZipWriter::new(Cursor::new(data)); let options = SimpleFileOptions::default() @@ -85,12 +85,12 @@ fn generate_random_zip64_archive_with_comment(comment_length: usize) -> ZipResul Ok(writer.finish()?.into_inner()) } -fn parse_zip64_comment(bench: &mut Bencher) { - let bytes = generate_random_zip64_archive_with_comment(COMMENT_SIZE_64).unwrap(); +fn parse_zip64_archive_with_comment(bench: &mut Bencher) { + let bytes = generate_zip64_archive_with_random_comment(COMMENT_SIZE_64).unwrap(); bench.iter(|| { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); - archive.len() + archive.comment().len() }); bench.bytes = bytes.len() as u64; } @@ -121,8 +121,8 @@ fn parse_stream_archive(bench: &mut Bencher) { benchmark_group!( benches, read_metadata, - parse_comment, - parse_zip64_comment, + parse_archive_with_comment, + parse_zip64_archive_with_comment, /* FIXME: this currently errors */ /* parse_stream_archive */ ); diff --git a/src/read.rs b/src/read.rs index 584e021e..430475ff 100644 --- a/src/read.rs +++ b/src/read.rs @@ -486,6 +486,19 @@ impl ZipArchive { }) } + const fn zip64_cde_len() -> usize { + mem::size_of::() + + mem::size_of::() + } + + const fn order_lower_upper_bounds(a: u64, b: u64) -> (u64, u64) { + if a > b { + (b, a) + } else { + (a, b) + } + } + fn get_directory_info_zip64( reader: &mut R, footer: &spec::Zip32CentralDirectoryEnd, @@ -495,6 +508,7 @@ impl ZipArchive { // have its signature 20 bytes in front of the standard footer. The // standard footer, in turn, is 22+N bytes large, where N is the // comment length. Therefore: + /* TODO: compute this from constant sizes and offsets! */ reader.seek(io::SeekFrom::End( -(20 + 22 + footer.zip_file_comment.len() as i64), ))?; @@ -510,22 +524,16 @@ impl ZipArchive { // ZIP comment data. let search_upper_bound = cde_start_pos - // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator - .checked_sub(60) + .checked_sub(Self::zip64_cde_len() as u64) .ok_or(ZipError::InvalidArchive( "File cannot contain ZIP64 central directory end", ))?; - let (lower, upper) = if locator64.end_of_central_directory_offset > search_upper_bound { - ( - search_upper_bound, - locator64.end_of_central_directory_offset, - ) - } else { - ( - locator64.end_of_central_directory_offset, - search_upper_bound, - ) - }; + + let (lower, upper) = Self::order_lower_upper_bounds( + locator64.end_of_central_directory_offset, + search_upper_bound, + ); + let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?; let results: Vec> = search_results.into_iter().map(|(footer64, archive_offset)| { @@ -1012,6 +1020,13 @@ pub(crate) fn central_header_to_zip_file( central_header_to_zip_file_inner(reader, archive_offset, central_header_start, block) } +#[inline] +fn read_variable_length_byte_field(reader: &mut R, len: usize) -> io::Result> { + let mut data = vec![0; len]; + reader.read_exact(&mut data)?; + Ok(data.into_boxed_slice()) +} + /// Parse a central directory entry to collect the information for the file. fn central_header_to_zip_file_inner( reader: &mut R, @@ -1044,20 +1059,17 @@ fn central_header_to_zip_file_inner( let is_utf8 = flags & (1 << 11) != 0; let using_data_descriptor = flags & (1 << 3) != 0; - let mut file_name_raw = vec![0; file_name_length as usize]; - reader.read_exact(&mut file_name_raw)?; - let mut extra_field = vec![0; extra_field_length as usize]; - reader.read_exact(&mut extra_field)?; - let mut file_comment_raw = vec![0; file_comment_length as usize]; - reader.read_exact(&mut file_comment_raw)?; + let file_name_raw = read_variable_length_byte_field(reader, file_name_length as usize)?; + let extra_field = read_variable_length_byte_field(reader, extra_field_length as usize)?; + let file_comment_raw = read_variable_length_byte_field(reader, file_comment_length as usize)?; let file_name: Box = match is_utf8 { true => String::from_utf8_lossy(&file_name_raw).into(), - false => file_name_raw.from_cp437().into(), + false => file_name_raw.clone().from_cp437(), }; let file_comment: Box = match is_utf8 { true => String::from_utf8_lossy(&file_comment_raw).into(), - false => file_comment_raw.from_cp437().into(), + false => file_comment_raw.from_cp437(), }; // Construct the result @@ -1077,8 +1089,8 @@ fn central_header_to_zip_file_inner( compressed_size: compressed_size as u64, uncompressed_size: uncompressed_size as u64, file_name, - file_name_raw: file_name_raw.into(), - extra_field: Some(Arc::new(extra_field)), + file_name_raw, + extra_field: Some(Arc::new(extra_field.to_vec())), central_extra_field: None, file_comment, header_start: offset.into(),