From c934c82405cce3a2a90fb03114be34d3987e9288 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 21 Jun 2024 20:22:15 -0700 Subject: [PATCH] fix: Some archives with over u16::MAX files were handled incorrectly or slowly (#189) --- src/read.rs | 175 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 119 insertions(+), 56 deletions(-) diff --git a/src/read.rs b/src/read.rs index 7f9f5881..b8ade849 100644 --- a/src/read.rs +++ b/src/read.rs @@ -69,7 +69,6 @@ pub(crate) mod zip_archive { pub(crate) files: Vec, pub(super) offset: u64, pub(super) dir_start: u64, - pub(super) dir_end: u64, // This isn't yet used anywhere, but it is here for use cases in the future. #[allow(dead_code)] pub(super) config: super::Config, @@ -405,9 +404,11 @@ pub(crate) fn make_reader( pub(crate) struct CentralDirectoryInfo { pub(crate) archive_offset: u64, pub(crate) directory_start: u64, + pub(crate) cde_position: u64, pub(crate) number_of_files: usize, pub(crate) disk_number: u32, pub(crate) disk_with_central_directory: u32, + pub(crate) is_zip64: bool, } impl ZipArchive { @@ -560,6 +561,8 @@ impl ZipArchive { number_of_files, disk_number: footer.disk_number as u32, disk_with_central_directory: footer.disk_with_central_directory as u32, + cde_position: cde_start_pos, + is_zip64: false }) } @@ -662,6 +665,8 @@ impl ZipArchive { number_of_files: footer64.number_of_files as usize, disk_number: footer64.disk_number, disk_with_central_directory: footer64.disk_with_central_directory, + cde_position: cde_start_pos, + is_zip64: true, }) } }).collect(); @@ -674,56 +679,93 @@ impl ZipArchive { config: Config, reader: &mut R, ) -> ZipResult<(Zip32CentralDirectoryEnd, Shared)> { - let mut invalid_errors = Vec::new(); - let mut unsupported_errors = Vec::new(); + let mut invalid_errors_32 = Vec::new(); + let mut unsupported_errors_32 = Vec::new(); + let mut invalid_errors_64 = Vec::new(); + let mut unsupported_errors_64 = Vec::new(); let mut ok_results = Vec::new(); let cde_locations = spec::Zip32CentralDirectoryEnd::find_and_parse(reader)?; - cde_locations - .into_vec() - .into_iter() - .for_each(|(footer, cde_start_pos)| { + cde_locations.into_vec().into_iter().for_each(|(footer, cde_start_pos)| { let zip32_result = Self::get_directory_info_zip32(&config, reader, &footer, cde_start_pos); Self::sort_result( - zip32_result - .and_then(|result| Self::read_central_header(result, config, reader)), - &mut invalid_errors, - &mut unsupported_errors, + zip32_result, + &mut invalid_errors_32, + &mut unsupported_errors_32, &mut ok_results, &footer, ); + let mut inner_results = Vec::with_capacity(1); // Check if file has a zip64 footer - if let Ok(zip64_footers) = - Self::get_directory_info_zip64(&config, reader, &footer, cde_start_pos) + let zip64_vec_result = + Self::get_directory_info_zip64(&config, reader, &footer, cde_start_pos); + Self::sort_result( + zip64_vec_result, + &mut invalid_errors_64, + &mut unsupported_errors_64, + &mut inner_results, + &(), + ); + inner_results.into_iter().for_each(|(_, results)| { + results.into_iter().for_each(|result| { + Self::sort_result( + result, + &mut invalid_errors_64, + &mut unsupported_errors_64, + &mut ok_results, + &footer, + ); + }); + }); + } + ); + ok_results.sort_by_key(|(_, result)| ( + !result.is_zip64, // try ZIP64 first + u64::MAX - result.cde_position, // try the last one first + )); + let mut best_result = None; + for (footer, result) in ok_results { + let mut inner_result = Vec::with_capacity(1); + let is_zip64 = result.is_zip64; + Self::sort_result( + Self::read_central_header(result, config, reader), + if is_zip64 { + &mut invalid_errors_64 + } else { + &mut invalid_errors_32 + }, + if is_zip64 { + &mut unsupported_errors_64 + } else { + &mut unsupported_errors_32 + }, + &mut inner_result, + &() + ); + if let Some((_, shared)) = inner_result.into_iter().next() { + if shared.files.len() == footer.number_of_files as usize + || (is_zip64 && footer.number_of_files == ZIP64_ENTRY_THR as u16) { - zip64_footers - .into_iter() - .map(|result| { - result.and_then(|dir_info| { - Self::read_central_header(dir_info, config, reader) - }) - }) - .for_each(|result| { - Self::sort_result( - result, - &mut invalid_errors, - &mut unsupported_errors, - &mut ok_results, - &footer, - ) - }); + best_result = Some((footer, shared)); + break; + } else { + if is_zip64 { + &mut invalid_errors_64 + } else { + &mut invalid_errors_32 + }.push(InvalidArchive("wrong number of files")) } - }); - if ok_results.is_empty() { - return Err(unsupported_errors - .into_iter() - .next() - .unwrap_or_else(|| invalid_errors.into_iter().next().unwrap())); + } } - let (footer, shared) = ok_results - .into_iter() - .max_by_key(|(_, shared)| (shared.dir_end, u64::MAX - shared.dir_start)) - .unwrap(); + let Some((footer, shared)) = best_result else { + return Err(unsupported_errors_32.into_iter() + .chain(unsupported_errors_64.into_iter()) + .chain(invalid_errors_32.into_iter()) + .chain(invalid_errors_64.into_iter()) + + .next() + .unwrap()); + }; reader.seek(io::SeekFrom::Start(shared.dir_start))?; Ok((Rc::try_unwrap(footer).unwrap(), shared.build())) } @@ -749,37 +791,27 @@ impl ZipArchive { let file = central_header_to_zip_file(reader, dir_info.archive_offset)?; files.push(file); } - let dir_end = reader.stream_position()?; Ok(SharedBuilder { files, offset: dir_info.archive_offset, dir_start: dir_info.directory_start, - dir_end, config, }) } - fn sort_result( - result: Result, + fn sort_result( + result: ZipResult, invalid_errors: &mut Vec, unsupported_errors: &mut Vec, - ok_results: &mut Vec<(Rc, SharedBuilder)>, - footer: &Rc, + ok_results: &mut Vec<(U, T)>, + footer: &U, ) { match result { Err(ZipError::UnsupportedArchive(e)) => { unsupported_errors.push(ZipError::UnsupportedArchive(e)) } Err(e) => invalid_errors.push(e), - Ok(o) => { - if o.files.len() == footer.number_of_files as usize - || footer.number_of_files == ZIP64_ENTRY_THR as u16 - { - ok_results.push((footer.clone(), o)) - } else { - invalid_errors.push(InvalidArchive("wrong number of files")) - } - } + Ok(o) => ok_results.push((footer.clone(), o)), } } @@ -1659,9 +1691,12 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult ZipResult<()> { + let mut writer = ZipWriter::new(Cursor::new(Vec::new())); + let options = SimpleFileOptions {compression_method: Stored, ..Default::default()}; + for i in 0..=u16::MAX { + let file_name = format!("{i}.txt"); + writer.start_file(&*file_name, options)?; + writer.write_all(i.to_string().as_bytes())?; + } + + let mut reader = ZipArchive::new(writer.finish()?)?; + for i in 0..=u16::MAX { + let expected_name = format!("{i}.txt"); + let expected_contents = i.to_string(); + let expected_contents = expected_contents.as_bytes(); + let mut file = reader.by_name(&expected_name)?; + let mut contents = Vec::with_capacity(expected_contents.len()); + file.read_to_end(&mut contents)?; + assert_eq!(contents, expected_contents); + drop(file); + contents.clear(); + let mut file = reader.by_index(i as usize)?; + file.read_to_end(&mut contents)?; + assert_eq!(contents, expected_contents); + } + Ok(()) + } }