diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 53a111d3..337e3800 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -98,7 +98,7 @@ jobs: - name: run fuzz timeout-minutes: 330 run: | - cargo fuzz run fuzz_read -- -timeout=10s -fork=2 -runs=100000000 -max_len=2048 -dict=fuzz/fuzz.dict + cargo fuzz run fuzz_read -- fuzz/corpus/seed -timeout=10s -fork=2 -runs=100000000 -max_len=600 -len_control=500 -dict=fuzz/fuzz.dict - name: Upload any failure inputs if: always() uses: actions/upload-artifact@v4 @@ -125,13 +125,13 @@ jobs: - name: run fuzz timeout-minutes: 330 run: | - cargo fuzz run --no-default-features fuzz_read -- -timeout=10s -fork=2 -runs=100000000 -max_len=16384 -dict=fuzz/fuzz.dict + cargo fuzz run --no-default-features fuzz_read -- fuzz/corpus/seed -timeout=10s -fork=2 -runs=1000000000 -max_len=16384 -len_control=10000 -dict=fuzz/fuzz.dict - name: Upload any failure inputs if: always() uses: actions/upload-artifact@v4 with: - name: fuzz_read_bad_inputs - path: fuzz/artifacts/fuzz_read_no_features/crash-* + name: fuzz_read_no_features_bad_inputs + path: fuzz/artifacts/fuzz_read/crash-* if-no-files-found: ignore fuzz_write: @@ -152,7 +152,7 @@ jobs: - name: run fuzz timeout-minutes: 330 run: | - cargo fuzz run fuzz_write -- -timeout=10s -fork=2 -runs=5000000 -max_len=2000 -dict=fuzz/fuzz.dict + cargo fuzz run fuzz_write -- -timeout=10s -fork=2 -runs=10000000 -max_len=1100 -len_control=200 -dict=fuzz/fuzz.dict - name: Upload any failure inputs if: always() uses: actions/upload-artifact@v4 @@ -179,11 +179,11 @@ jobs: - name: run fuzz timeout-minutes: 330 run: | - cargo fuzz run --no-default-features fuzz_write -- -timeout=10s -fork=2 -runs=10000000 -max_len=10000 -dict=fuzz/fuzz.dict + cargo fuzz run --no-default-features fuzz_write -- -timeout=10s -fork=2 -runs=20000000 -max_len=10000 -len_control=200 -dict=fuzz/fuzz.dict - name: Upload any failure inputs if: always() uses: actions/upload-artifact@v4 with: - name: fuzz_write_bad_inputs - path: fuzz/artifacts/fuzz_write_no_features/crash-* + name: fuzz_write_no_features_bad_inputs + path: fuzz/artifacts/fuzz_write/crash-* if-no-files-found: ignore \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 9934555f..08d3c4e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ sha1 = {version = "0.10.6", optional = true } time = { version = "0.3.34", optional = true, default-features = false, features = ["std"] } zstd = { version = "0.13.0", optional = true, default-features = false } zopfli = { version = "0.8.0", optional = true } -deflate64 = { git = "https://github.com/Pr0methean/deflate64-rs.git", optional = true } +deflate64 = { git = "https://github.com/anatawa12/deflate64-rs.git", optional = true } [target.'cfg(any(all(target_arch = "arm", target_pointer_width = "32"), target_arch = "mips", target_arch = "powerpc"))'.dependencies] crossbeam-utils = "0.8.19" diff --git a/fuzz/.gitignore b/fuzz/.gitignore index a0925114..8d76f8b6 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -1,3 +1,4 @@ target corpus +!corpus/seed artifacts diff --git a/fuzz/corpus/seed/aes_archive.zip b/fuzz/corpus/seed/aes_archive.zip new file mode 120000 index 00000000..03c268ca --- /dev/null +++ b/fuzz/corpus/seed/aes_archive.zip @@ -0,0 +1 @@ +../../../tests/data/aes_archive.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/comment_garbage.zip b/fuzz/corpus/seed/comment_garbage.zip new file mode 120000 index 00000000..5ed32871 --- /dev/null +++ b/fuzz/corpus/seed/comment_garbage.zip @@ -0,0 +1 @@ +../../../tests/data/comment_garbage.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/deflate64.zip b/fuzz/corpus/seed/deflate64.zip new file mode 120000 index 00000000..e3ac7b90 --- /dev/null +++ b/fuzz/corpus/seed/deflate64.zip @@ -0,0 +1 @@ +../../../tests/data/deflate64.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/deflate64_issue_25.zip b/fuzz/corpus/seed/deflate64_issue_25.zip new file mode 120000 index 00000000..6098769a --- /dev/null +++ b/fuzz/corpus/seed/deflate64_issue_25.zip @@ -0,0 +1 @@ +../../../tests/data/deflate64_issue_25.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/empty.zip b/fuzz/corpus/seed/empty.zip new file mode 100644 index 00000000..a3cdf74b Binary files /dev/null and b/fuzz/corpus/seed/empty.zip differ diff --git a/fuzz/corpus/seed/empty_Bzip2.zip b/fuzz/corpus/seed/empty_Bzip2.zip new file mode 100644 index 00000000..af7aa1ad Binary files /dev/null and b/fuzz/corpus/seed/empty_Bzip2.zip differ diff --git a/fuzz/corpus/seed/empty_Bzip2_largefile.zip b/fuzz/corpus/seed/empty_Bzip2_largefile.zip new file mode 100644 index 00000000..574a92f4 Binary files /dev/null and b/fuzz/corpus/seed/empty_Bzip2_largefile.zip differ diff --git a/fuzz/corpus/seed/empty_Bzip2_zipcrypto.zip b/fuzz/corpus/seed/empty_Bzip2_zipcrypto.zip new file mode 100644 index 00000000..1d8d624b Binary files /dev/null and b/fuzz/corpus/seed/empty_Bzip2_zipcrypto.zip differ diff --git a/fuzz/corpus/seed/empty_Deflated.zip b/fuzz/corpus/seed/empty_Deflated.zip new file mode 100644 index 00000000..34473f96 Binary files /dev/null and b/fuzz/corpus/seed/empty_Deflated.zip differ diff --git a/fuzz/corpus/seed/empty_Deflated_largefile.zip b/fuzz/corpus/seed/empty_Deflated_largefile.zip new file mode 100644 index 00000000..e8fa8c0f Binary files /dev/null and b/fuzz/corpus/seed/empty_Deflated_largefile.zip differ diff --git a/fuzz/corpus/seed/empty_Deflated_zipcrypto.zip b/fuzz/corpus/seed/empty_Deflated_zipcrypto.zip new file mode 100644 index 00000000..2813649f Binary files /dev/null and b/fuzz/corpus/seed/empty_Deflated_zipcrypto.zip differ diff --git a/fuzz/corpus/seed/empty_Stored.zip b/fuzz/corpus/seed/empty_Stored.zip new file mode 100644 index 00000000..0fc97bef Binary files /dev/null and b/fuzz/corpus/seed/empty_Stored.zip differ diff --git a/fuzz/corpus/seed/empty_Stored_largefile.zip b/fuzz/corpus/seed/empty_Stored_largefile.zip new file mode 100644 index 00000000..2a63ce03 Binary files /dev/null and b/fuzz/corpus/seed/empty_Stored_largefile.zip differ diff --git a/fuzz/corpus/seed/empty_Stored_zipcrypto.zip b/fuzz/corpus/seed/empty_Stored_zipcrypto.zip new file mode 100644 index 00000000..ee5a89ac Binary files /dev/null and b/fuzz/corpus/seed/empty_Stored_zipcrypto.zip differ diff --git a/fuzz/corpus/seed/empty_Zstd.zip b/fuzz/corpus/seed/empty_Zstd.zip new file mode 100644 index 00000000..a863e209 Binary files /dev/null and b/fuzz/corpus/seed/empty_Zstd.zip differ diff --git a/fuzz/corpus/seed/empty_Zstd_largefile.zip b/fuzz/corpus/seed/empty_Zstd_largefile.zip new file mode 100644 index 00000000..48eec018 Binary files /dev/null and b/fuzz/corpus/seed/empty_Zstd_largefile.zip differ diff --git a/fuzz/corpus/seed/empty_Zstd_zipcrypto.zip b/fuzz/corpus/seed/empty_Zstd_zipcrypto.zip new file mode 100644 index 00000000..e21b8be5 Binary files /dev/null and b/fuzz/corpus/seed/empty_Zstd_zipcrypto.zip differ diff --git a/fuzz/corpus/seed/files_and_dirs.zip b/fuzz/corpus/seed/files_and_dirs.zip new file mode 120000 index 00000000..b4d40f44 --- /dev/null +++ b/fuzz/corpus/seed/files_and_dirs.zip @@ -0,0 +1 @@ +../../../tests/data/files_and_dirs.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/invalid_cde_number_of_files_allocation_greater_offset.zip b/fuzz/corpus/seed/invalid_cde_number_of_files_allocation_greater_offset.zip new file mode 120000 index 00000000..2e11137c --- /dev/null +++ b/fuzz/corpus/seed/invalid_cde_number_of_files_allocation_greater_offset.zip @@ -0,0 +1 @@ +../../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/invalid_cde_number_of_files_allocation_smaller_offset.zip b/fuzz/corpus/seed/invalid_cde_number_of_files_allocation_smaller_offset.zip new file mode 120000 index 00000000..01b527a3 --- /dev/null +++ b/fuzz/corpus/seed/invalid_cde_number_of_files_allocation_smaller_offset.zip @@ -0,0 +1 @@ +../../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/invalid_offset.zip b/fuzz/corpus/seed/invalid_offset.zip new file mode 120000 index 00000000..4f8c160b --- /dev/null +++ b/fuzz/corpus/seed/invalid_offset.zip @@ -0,0 +1 @@ +../../../tests/data/invalid_offset.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/invalid_offset2.zip b/fuzz/corpus/seed/invalid_offset2.zip new file mode 120000 index 00000000..6dbace8b --- /dev/null +++ b/fuzz/corpus/seed/invalid_offset2.zip @@ -0,0 +1 @@ +../../../tests/data/invalid_offset2.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/mimetype.zip b/fuzz/corpus/seed/mimetype.zip new file mode 120000 index 00000000..0d372380 --- /dev/null +++ b/fuzz/corpus/seed/mimetype.zip @@ -0,0 +1 @@ +../../../tests/data/mimetype.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/raw_deflate64_index_out_of_bounds.zip b/fuzz/corpus/seed/raw_deflate64_index_out_of_bounds.zip new file mode 120000 index 00000000..54aa7863 --- /dev/null +++ b/fuzz/corpus/seed/raw_deflate64_index_out_of_bounds.zip @@ -0,0 +1 @@ +../../../tests/data/raw_deflate64_index_out_of_bounds.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/zip64_demo.zip b/fuzz/corpus/seed/zip64_demo.zip new file mode 120000 index 00000000..01619790 --- /dev/null +++ b/fuzz/corpus/seed/zip64_demo.zip @@ -0,0 +1 @@ +../../../tests/data/zip64_demo.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/zip64_magic_in_filename_1.zip b/fuzz/corpus/seed/zip64_magic_in_filename_1.zip new file mode 120000 index 00000000..f4e3d984 --- /dev/null +++ b/fuzz/corpus/seed/zip64_magic_in_filename_1.zip @@ -0,0 +1 @@ +../../../tests/data/zip64_magic_in_filename_1.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/zip64_magic_in_filename_2.zip b/fuzz/corpus/seed/zip64_magic_in_filename_2.zip new file mode 120000 index 00000000..52a969aa --- /dev/null +++ b/fuzz/corpus/seed/zip64_magic_in_filename_2.zip @@ -0,0 +1 @@ +../../../tests/data/zip64_magic_in_filename_2.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/zip64_magic_in_filename_3.zip b/fuzz/corpus/seed/zip64_magic_in_filename_3.zip new file mode 120000 index 00000000..11cc6a85 --- /dev/null +++ b/fuzz/corpus/seed/zip64_magic_in_filename_3.zip @@ -0,0 +1 @@ +../../../tests/data/zip64_magic_in_filename_3.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/zip64_magic_in_filename_4.zip b/fuzz/corpus/seed/zip64_magic_in_filename_4.zip new file mode 120000 index 00000000..b1d99ed6 --- /dev/null +++ b/fuzz/corpus/seed/zip64_magic_in_filename_4.zip @@ -0,0 +1 @@ +../../../tests/data/zip64_magic_in_filename_4.zip \ No newline at end of file diff --git a/fuzz/corpus/seed/zip64_magic_in_filename_5.zip b/fuzz/corpus/seed/zip64_magic_in_filename_5.zip new file mode 120000 index 00000000..611213e0 --- /dev/null +++ b/fuzz/corpus/seed/zip64_magic_in_filename_5.zip @@ -0,0 +1 @@ +../../../tests/data/zip64_magic_in_filename_5.zip \ No newline at end of file diff --git a/fuzz/fuzz.dict b/fuzz/fuzz.dict index b33a23f6..c586acc4 100644 --- a/fuzz/fuzz.dict +++ b/fuzz/fuzz.dict @@ -16,4 +16,5 @@ compression_method_deflate64="\x09\x00" compression_method_bzip2="\x0E\x00" compression_method_zstd="]\x00" compression_method_aes="C\x00" +compression_method_unsupported="\xFF\x00" "\xFF\xFF" \ No newline at end of file diff --git a/src/read.rs b/src/read.rs index e1f5573e..1b39b14f 100644 --- a/src/read.rs +++ b/src/read.rs @@ -5,6 +5,7 @@ use crate::aes::{AesReader, AesReaderValid}; use crate::compression::CompressionMethod; use crate::cp437::FromCp437; use crate::crc32::Crc32Reader; +use crate::read::zip_archive::Shared; use crate::result::{ZipError, ZipResult}; use crate::spec; use crate::types::{AesMode, AesVendorVersion, AtomicU64, DateTime, System, ZipFileData}; @@ -38,13 +39,16 @@ pub(crate) mod stream; // Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely pub(crate) mod zip_archive { + use std::sync::Arc; + /// Extract immutable data from `ZipArchive` to make it cheap to clone #[derive(Debug)] pub(crate) struct Shared { - pub(super) files: Vec, - pub(super) names_map: super::HashMap, + pub(crate) files: Vec, + pub(crate) names_map: super::HashMap, pub(super) offset: u64, - pub(super) comment: Vec, + pub(super) dir_start: u64, + pub(super) dir_end: u64, } /// ZIP archive reader @@ -70,7 +74,8 @@ pub(crate) mod zip_archive { #[derive(Clone, Debug)] pub struct ZipArchive { pub(super) reader: R, - pub(super) shared: super::Arc, + pub(super) shared: Arc, + pub(super) comment: Arc>, } } @@ -312,7 +317,7 @@ pub(crate) fn make_reader( } } -pub(crate) struct DirectoryCounts { +pub(crate) struct CentralDirectoryInfo { pub(crate) archive_offset: u64, pub(crate) directory_start: u64, pub(crate) number_of_files: usize, @@ -321,10 +326,10 @@ pub(crate) struct DirectoryCounts { } impl ZipArchive { - fn get_directory_counts_zip32( + fn get_directory_info_zip32( footer: &spec::CentralDirectoryEnd, cde_start_pos: u64, - ) -> ZipResult { + ) -> ZipResult { // Some zip files have data prepended to them, resulting in the // offsets all being too small. Get the amount of error by comparing // the actual file position we found the CDE at with the offset @@ -338,7 +343,7 @@ impl ZipArchive { let directory_start = footer.central_directory_offset as u64 + archive_offset; let number_of_files = footer.number_of_files_on_this_disk as usize; - Ok(DirectoryCounts { + Ok(CentralDirectoryInfo { archive_offset, directory_start, number_of_files, @@ -347,11 +352,11 @@ impl ZipArchive { }) } - fn get_directory_counts_zip64( + fn get_directory_info_zip64( reader: &mut R, footer: &spec::CentralDirectoryEnd, cde_start_pos: u64, - ) -> ZipResult { + ) -> ZipResult>> { // See if there's a ZIP64 footer. The ZIP64 locator if present will // have its signature 20 bytes in front of the standard footer. The // standard footer, in turn, is 22+N bytes large, where N is the @@ -367,99 +372,156 @@ impl ZipArchive { // actual offset in the file, since there may be junk at its // beginning. Therefore we need to perform another search, as in // read::CentralDirectoryEnd::find_and_parse, except now we search - // forward. + // forward. There may be multiple results because of Zip64 central-directory signatures in + // ZIP comment data. + + let mut results = Vec::new(); let search_upper_bound = cde_start_pos .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator .ok_or(ZipError::InvalidArchive( "File cannot contain ZIP64 central directory end", ))?; - let (footer64, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse( + let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse( reader, locator64.end_of_central_directory_offset, search_upper_bound, )?; - - let directory_start = footer64 - .central_directory_offset - .checked_add(archive_offset) - .ok_or(ZipError::InvalidArchive( - "Invalid central directory size or offset", - ))?; - if directory_start > search_upper_bound { - return Err(ZipError::InvalidArchive( - "Invalid central directory size or offset", - )); - } - if footer64.number_of_files_on_this_disk > footer64.number_of_files { - return Err(ZipError::InvalidArchive( - "ZIP64 footer indicates more files on this disk than in the whole archive", - )); - } - if footer64.version_needed_to_extract > footer64.version_made_by { - return Err(ZipError::InvalidArchive( - "ZIP64 footer indicates a new version is needed to extract this archive than the \ - version that wrote it", - )); - } - - Ok(DirectoryCounts { - archive_offset, - directory_start, - number_of_files: footer64.number_of_files as usize, - disk_number: footer64.disk_number, - disk_with_central_directory: footer64.disk_with_central_directory, - }) + search_results.into_iter().for_each(|(footer64, archive_offset)| { + results.push({ + let directory_start_result = footer64 + .central_directory_offset + .checked_add(archive_offset) + .ok_or(ZipError::InvalidArchive( + "Invalid central directory size or offset", + )); + directory_start_result.and_then(|directory_start| { + if directory_start > search_upper_bound { + Err(ZipError::InvalidArchive( + "Invalid central directory size or offset", + )) + } else if footer64.number_of_files_on_this_disk > footer64.number_of_files { + Err(ZipError::InvalidArchive( + "ZIP64 footer indicates more files on this disk than in the whole archive", + )) + } else if footer64.version_needed_to_extract > footer64.version_made_by { + Err(ZipError::InvalidArchive( + "ZIP64 footer indicates a new version is needed to extract this archive than the \ + version that wrote it", + )) + } else { + Ok(CentralDirectoryInfo { + archive_offset, + directory_start, + number_of_files: footer64.number_of_files as usize, + disk_number: footer64.disk_number, + disk_with_central_directory: footer64.disk_with_central_directory, + }) + } + }) + }); + }); + Ok(results) } /// Get the directory start offset and number of files. This is done in a /// separate function to ease the control flow design. - pub(crate) fn get_directory_counts( + pub(crate) fn get_metadata( reader: &mut R, footer: &spec::CentralDirectoryEnd, cde_start_pos: u64, - ) -> ZipResult { + ) -> ZipResult { // Check if file has a zip64 footer - let counts_64 = Self::get_directory_counts_zip64(reader, footer, cde_start_pos); - let counts_32 = Self::get_directory_counts_zip32(footer, cde_start_pos); - match counts_64 { - Err(_) => match counts_32 { - Err(e) => Err(e), - Ok(counts) => { - if counts.disk_number != counts.disk_with_central_directory { - return unsupported_zip_error( - "Support for multi-disk files is not implemented", - ); + let mut results = Self::get_directory_info_zip64(reader, footer, cde_start_pos) + .unwrap_or_else(|e| vec![Err(e)]); + let zip32_result = Self::get_directory_info_zip32(footer, cde_start_pos); + let mut invalid_errors = Vec::new(); + let mut unsupported_errors = Vec::new(); + let mut ok_results = Vec::new(); + results.iter_mut().for_each(|result| { + if let Ok(central_dir) = result { + if let Ok(zip32_central_dir) = &zip32_result { + // Both zip32 and zip64 footers exist, so check if the zip64 footer is valid; if not, try zip32 + if central_dir.number_of_files != zip32_central_dir.number_of_files + && zip32_central_dir.number_of_files != u16::MAX as usize + { + *result = Err(ZipError::InvalidArchive( + "ZIP32 and ZIP64 file counts don't match", + )); + return; } - Ok(counts) - } - }, - Ok(counts_64) => { - match counts_32 { - Err(_) => Ok(counts_64), - Ok(counts_32) => { - // Both zip32 and zip64 footers exist, so check if the zip64 footer is valid; if not, try zip32 - if counts_64.number_of_files != counts_32.number_of_files - && counts_32.number_of_files != u16::MAX as usize - { - return Ok(counts_32); - } - if counts_64.disk_number != counts_32.disk_number - && counts_32.disk_number != u16::MAX as u32 - { - return Ok(counts_32); - } - if counts_64.disk_with_central_directory - != counts_32.disk_with_central_directory - && counts_32.disk_with_central_directory != u16::MAX as u32 - { - return Ok(counts_32); - } - Ok(counts_64) + if central_dir.disk_number != zip32_central_dir.disk_number + && zip32_central_dir.disk_number != u16::MAX as u32 + { + *result = Err(ZipError::InvalidArchive( + "ZIP32 and ZIP64 disk numbers don't match", + )); + return; + } + if central_dir.disk_with_central_directory + != zip32_central_dir.disk_with_central_directory + && zip32_central_dir.disk_with_central_directory != u16::MAX as u32 + { + *result = Err(ZipError::InvalidArchive( + "ZIP32 and ZIP64 last-disk numbers don't match", + )); } } } + }); + results.push(zip32_result); + results + .into_iter() + .map(|result| { + result.and_then(|dir_info| { + // If the parsed number of files is greater than the offset then + // something fishy is going on and we shouldn't trust number_of_files. + let file_capacity = if dir_info.number_of_files > cde_start_pos as usize { + 0 + } else { + dir_info.number_of_files + }; + let mut files = Vec::with_capacity(file_capacity); + let mut names_map = HashMap::with_capacity(file_capacity); + reader.seek(io::SeekFrom::Start(dir_info.directory_start))?; + for _ in 0..dir_info.number_of_files { + let file = central_header_to_zip_file(reader, dir_info.archive_offset)?; + names_map.insert(file.file_name.clone(), files.len()); + files.push(file); + } + let dir_end = reader.seek(io::SeekFrom::Start(dir_info.directory_start))?; + if dir_info.disk_number != dir_info.disk_with_central_directory { + unsupported_zip_error("Support for multi-disk files is not implemented") + } else { + Ok(Shared { + files, + names_map, + offset: dir_info.archive_offset, + dir_start: dir_info.directory_start, + dir_end, + }) + } + }) + }) + .for_each(|result| match result { + Err(ZipError::UnsupportedArchive(e)) => { + unsupported_errors.push(ZipError::UnsupportedArchive(e)) + } + Err(e) => invalid_errors.push(e), + Ok(o) => ok_results.push(o), + }); + if ok_results.is_empty() { + return Err(unsupported_errors + .into_iter() + .next() + .unwrap_or_else(|| invalid_errors.into_iter().next().unwrap())); } + let shared = ok_results + .into_iter() + .max_by_key(|shared| shared.dir_end) + .unwrap(); + reader.seek(io::SeekFrom::Start(shared.dir_start))?; + Ok(shared) } /// Read a ZIP archive, collecting the files it contains @@ -467,47 +529,12 @@ impl ZipArchive { /// This uses the central directory record of the ZIP file, and ignores local file headers pub fn new(mut reader: R) -> ZipResult> { let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?; - - let counts = Self::get_directory_counts(&mut reader, &footer, cde_start_pos)?; - - if counts.disk_number != counts.disk_with_central_directory { - return unsupported_zip_error("Support for multi-disk files is not implemented"); - } - - // If the parsed number of files is greater than the offset then - // something fishy is going on and we shouldn't trust number_of_files. - let file_capacity = if counts.number_of_files > cde_start_pos as usize { - 0 - } else { - counts.number_of_files - }; - - let mut files = Vec::with_capacity(file_capacity); - let mut names_map = HashMap::with_capacity(file_capacity); - - if reader - .seek(io::SeekFrom::Start(counts.directory_start)) - .is_err() - { - return Err(ZipError::InvalidArchive( - "Could not seek to start of central directory", - )); - } - - for _ in 0..counts.number_of_files { - let file = central_header_to_zip_file(&mut reader, counts.archive_offset)?; - names_map.insert(file.file_name.clone(), files.len()); - files.push(file); - } - - let shared = Arc::new(zip_archive::Shared { - files, - names_map, - offset: counts.archive_offset, - comment: footer.zip_file_comment, - }); - - Ok(ZipArchive { reader, shared }) + let shared = Self::get_metadata(&mut reader, &footer, cde_start_pos)?; + Ok(ZipArchive { + reader, + shared: shared.into(), + comment: footer.zip_file_comment.into(), + }) } /// Extract a Zip archive into a directory, overwriting files if they /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. @@ -568,7 +595,7 @@ impl ZipArchive { /// Get the comment of the zip archive. pub fn comment(&self) -> &[u8] { - &self.shared.comment + &self.comment } /// Returns an iterator over all the file and directory names in this archive. @@ -1364,4 +1391,12 @@ mod test { std::io::copy(&mut reader.by_index(0)?, &mut std::io::sink()).expect_err("Invalid file"); Ok(()) } + + #[cfg(feature = "deflate64")] + #[test] + fn deflate64_not_enough_space() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/deflate64_issue_25.zip")); + ZipArchive::new(Cursor::new(v)).expect_err("Invalid file"); + } } diff --git a/src/spec.rs b/src/spec.rs index 691f2cab..d7cdd7c6 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -145,7 +145,8 @@ impl Zip64CentralDirectoryEnd { reader: &mut T, nominal_offset: u64, search_upper_bound: u64, - ) -> ZipResult<(Zip64CentralDirectoryEnd, u64)> { + ) -> ZipResult> { + let mut results = Vec::new(); let mut pos = search_upper_bound; while pos >= nominal_offset { @@ -166,7 +167,7 @@ impl Zip64CentralDirectoryEnd { let central_directory_size = reader.read_u64::()?; let central_directory_offset = reader.read_u64::()?; - return Ok(( + results.push(( Zip64CentralDirectoryEnd { version_made_by, version_needed_to_extract, @@ -186,10 +187,13 @@ impl Zip64CentralDirectoryEnd { break; } } - - Err(ZipError::InvalidArchive( - "Could not find ZIP64 central directory end", - )) + if results.is_empty() { + Err(ZipError::InvalidArchive( + "Could not find ZIP64 central directory end", + )) + } else { + Ok(results) + } } pub fn write(&self, writer: &mut T) -> ZipResult<()> { diff --git a/src/write.rs b/src/write.rs index 2abf7861..1cdf1c3d 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1,7 +1,7 @@ //! Types for creating ZIP archives use crate::compression::CompressionMethod; -use crate::read::{central_header_to_zip_file, find_content, ZipArchive, ZipFile, ZipFileReader}; +use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader}; use crate::result::{ZipError, ZipResult}; use crate::spec; use crate::types::{ffi, AtomicU64, DateTime, System, ZipFileData, DEFAULT_VERSION}; @@ -435,39 +435,12 @@ impl ZipWriter { /// Initializes the archive from an existing ZIP archive, making it ready for append. pub fn new_append(mut readwriter: A) -> ZipResult> { let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut readwriter)?; - - let counts = ZipArchive::get_directory_counts(&mut readwriter, &footer, cde_start_pos)?; - - if counts.disk_number != counts.disk_with_central_directory { - return Err(ZipError::UnsupportedArchive( - "Support for multi-disk files is not implemented", - )); - } - - if readwriter - .seek(SeekFrom::Start(counts.directory_start)) - .is_err() - { - return Err(InvalidArchive( - "Could not seek to start of central directory", - )); - } - - let files = (0..counts.number_of_files) - .map(|_| central_header_to_zip_file(&mut readwriter, counts.archive_offset)) - .collect::, _>>()?; - - let mut files_by_name = HashMap::new(); - for (index, file) in files.iter().enumerate() { - files_by_name.insert(file.file_name.to_owned(), index); - } - - let _ = readwriter.seek(SeekFrom::Start(counts.directory_start)); // seek directory_start to overwrite it + let metadata = ZipArchive::get_metadata(&mut readwriter, &footer, cde_start_pos)?; Ok(ZipWriter { inner: Storer(MaybeEncrypted::Unencrypted(readwriter)), - files, - files_by_name, + files: metadata.files, + files_by_name: metadata.names_map, stats: Default::default(), writing_to_file: false, comment: footer.zip_file_comment, @@ -2077,4 +2050,20 @@ mod test { writer.write_all(&[]).unwrap(); Ok(()) } + + #[test] + fn crash_with_no_features() -> ZipResult<()> { + const ORIGINAL_FILE_NAME: &str = "PK\u{6}\u{6}\0\0\0\0\0\0\0\0\0\u{2}g\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\u{1}\0\0\0\0\0\0\0\0\0\0PK\u{6}\u{7}\0\0\0\0\0\0\0\0\0\0\0\0\u{7}\0\t'"; + let mut writer = ZipWriter::new(io::Cursor::new(Vec::new())); + let mut options = FileOptions::default(); + options = options + .with_alignment(3584) + .compression_method(CompressionMethod::Stored); + writer.start_file(ORIGINAL_FILE_NAME, options)?; + let archive = writer.finish()?; + let mut writer = ZipWriter::new_append(archive)?; + writer.shallow_copy_file(ORIGINAL_FILE_NAME, "\u{6}\\")?; + writer.finish()?; + Ok(()) + } } diff --git a/tests/data/deflate64_issue_25.zip b/tests/data/deflate64_issue_25.zip new file mode 100644 index 00000000..78139edd Binary files /dev/null and b/tests/data/deflate64_issue_25.zip differ