From 29a3f30a72300887f4309d647fce52808af93b38 Mon Sep 17 00:00:00 2001 From: Chris Hennick Date: Fri, 5 May 2023 09:11:49 -0700 Subject: [PATCH] Bug fix: try decoding file as ZIP32 if it's not valid as ZIP64 --- src/read.rs | 192 +++++++++++++++++++++++++--------------------------- 1 file changed, 91 insertions(+), 101 deletions(-) diff --git a/src/read.rs b/src/read.rs index e7400ef3..ebde0209 100644 --- a/src/read.rs +++ b/src/read.rs @@ -121,14 +121,14 @@ impl<'a> CryptoReader<'a> { pub(crate) enum ZipFileReader<'a> { NoReader, - Raw(io::Take<&'a mut dyn io::Read>), + Raw(io::Take<&'a mut dyn Read>), Stored(Crc32Reader>), #[cfg(any( feature = "deflate", feature = "deflate-miniz", feature = "deflate-zlib" ))] - Deflated(Crc32Reader>>), + Deflated(Crc32Reader>>), #[cfg(feature = "bzip2")] Bzip2(Crc32Reader>>), #[cfg(feature = "zstd")] @@ -207,11 +207,11 @@ pub(crate) fn find_content<'a>( #[allow(clippy::too_many_arguments)] pub(crate) fn make_crypto_reader<'a>( - compression_method: crate::compression::CompressionMethod, + compression_method: CompressionMethod, crc32: u32, last_modified_time: DateTime, using_data_descriptor: bool, - reader: io::Take<&'a mut dyn io::Read>, + reader: io::Take<&'a mut dyn Read>, password: Option<&[u8]>, aes_info: Option<(AesMode, AesVendorVersion)>, #[cfg(feature = "aes-crypto")] compressed_size: u64, @@ -291,10 +291,28 @@ pub(crate) fn make_reader( } } -impl ZipArchive { - /// Get the directory start offset and number of files. This is done in a - /// separate function to ease the control flow design. - pub(crate) fn get_directory_counts( +impl ZipArchive { + fn get_directory_counts_zip32( + footer: &spec::CentralDirectoryEnd, + cde_start_pos: u64, + ) -> ZipResult<(u64, u64, usize)> { + // Some zip files have data prepended to them, resulting in the + // offsets all being too small. Get the amount of error by comparing + // the actual file position we found the CDE at with the offset + // recorded in the CDE. + let archive_offset = cde_start_pos + .checked_sub(footer.central_directory_size as u64) + .and_then(|x| x.checked_sub(footer.central_directory_offset as u64)) + .ok_or(ZipError::InvalidArchive( + "Invalid central directory size or offset", + ))?; + + let directory_start = footer.central_directory_offset as u64 + archive_offset; + let number_of_files = footer.number_of_files_on_this_disk as usize; + Ok((archive_offset, directory_start, number_of_files)) + } + + fn get_directory_counts_zip64( reader: &mut R, footer: &spec::CentralDirectoryEnd, cde_start_pos: u64, @@ -303,96 +321,68 @@ impl ZipArchive { // have its signature 20 bytes in front of the standard footer. The // standard footer, in turn, is 22+N bytes large, where N is the // comment length. Therefore: - let zip64locator = if reader + let locator64 = reader .seek(io::SeekFrom::End( -(20 + 22 + footer.zip_file_comment.len() as i64), - )) - .is_ok() + ))?; + if !footer.record_too_small() + && footer.disk_number as u32 != locator64.disk_with_central_directory { - match spec::Zip64CentralDirectoryEndLocator::parse(reader) { - Ok(loc) => Some(loc), - Err(ZipError::InvalidArchive(_)) => { - // No ZIP64 header; that's actually fine. We're done here. - None - } - Err(e) => { - // Yikes, a real problem - return Err(e); - } - } - } else { - // Empty Zip files will have nothing else so this error might be fine. If - // not, we'll find out soon. - None - }; - - match zip64locator { - None => { - // Some zip files have data prepended to them, resulting in the - // offsets all being too small. Get the amount of error by comparing - // the actual file position we found the CDE at with the offset - // recorded in the CDE. - let archive_offset = cde_start_pos - .checked_sub(footer.central_directory_size as u64) - .and_then(|x| x.checked_sub(footer.central_directory_offset as u64)) - .ok_or(ZipError::InvalidArchive( - "Invalid central directory size or offset", - ))?; - - let directory_start = footer.central_directory_offset as u64 + archive_offset; - let number_of_files = footer.number_of_files_on_this_disk as usize; - Ok((archive_offset, directory_start, number_of_files)) - } - Some(locator64) => { - // If we got here, this is indeed a ZIP64 file. - - if !footer.record_too_small() - && footer.disk_number as u32 != locator64.disk_with_central_directory - { - return unsupported_zip_error( - "Support for multi-disk files is not implemented", - ); - } - - // We need to reassess `archive_offset`. We know where the ZIP64 - // central-directory-end structure *should* be, but unfortunately we - // don't know how to precisely relate that location to our current - // actual offset in the file, since there may be junk at its - // beginning. Therefore we need to perform another search, as in - // read::CentralDirectoryEnd::find_and_parse, except now we search - // forward. - - let search_upper_bound = cde_start_pos - .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator - .ok_or(ZipError::InvalidArchive( - "File cannot contain ZIP64 central directory end", - ))?; - let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse( - reader, - locator64.end_of_central_directory_offset, - search_upper_bound, - )?; - - if footer.disk_number != footer.disk_with_central_directory { - return unsupported_zip_error( - "Support for multi-disk files is not implemented", - ); - } - - let directory_start = footer - .central_directory_offset - .checked_add(archive_offset) - .ok_or({ - ZipError::InvalidArchive("Invalid central directory size or offset") - })?; - - Ok(( - archive_offset, - directory_start, - footer.number_of_files as usize, - )) - } + return unsupported_zip_error( + "Support for multi-disk files is not implemented", + ); } + + // We need to reassess `archive_offset`. We know where the ZIP64 + // central-directory-end structure *should* be, but unfortunately we + // don't know how to precisely relate that location to our current + // actual offset in the file, since there may be junk at its + // beginning. Therefore we need to perform another search, as in + // read::CentralDirectoryEnd::find_and_parse, except now we search + // forward. + + let search_upper_bound = cde_start_pos + .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator + .ok_or(ZipError::InvalidArchive( + "File cannot contain ZIP64 central directory end", + ))?; + let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse( + reader, + locator64.end_of_central_directory_offset, + search_upper_bound, + )?; + + if footer.disk_number != footer.disk_with_central_directory { + return unsupported_zip_error( + "Support for multi-disk files is not implemented", + ); + } + + let directory_start = footer + .central_directory_offset + .checked_add(archive_offset) + .ok_or({ + ZipError::InvalidArchive("Invalid central directory size or offset") + })?; + + Ok(( + archive_offset, + directory_start, + footer.number_of_files as usize, + )) + } + + /// Get the directory start offset and number of files. This is done in a + /// separate function to ease the control flow design. + pub(crate) fn get_directory_counts( + reader: &mut R, + footer: &spec::CentralDirectoryEnd, + cde_start_pos: u64, + ) -> ZipResult<(u64, u64, usize)> { + // Check if file is valid as ZIP64 first; if not, try it as ZIP32 + Self::get_directory_counts_zip64(reader, footer, cde_start_pos).or_else( + || get_directory_counts_zip32(footer, cde_start_pos) + ) } /// Read a ZIP archive, collecting the files it contains @@ -643,7 +633,7 @@ fn unsupported_zip_error(detail: &'static str) -> ZipResult { } /// Parse a central directory entry to collect the information for the file. -pub(crate) fn central_header_to_zip_file( +pub(crate) fn central_header_to_zip_file( reader: &mut R, archive_offset: u64, ) -> ZipResult { @@ -873,7 +863,7 @@ impl<'a> ZipFile<'a> { note = "by stripping `..`s from the path, the meaning of paths can change. `mangled_name` can be used if this behaviour is desirable" )] - pub fn sanitized_name(&self) -> ::std::path::PathBuf { + pub fn sanitized_name(&self) -> std::path::PathBuf { self.mangled_name() } @@ -889,7 +879,7 @@ impl<'a> ZipFile<'a> { /// [`ZipFile::enclosed_name`] is the better option in most scenarios. /// /// [`ParentDir`]: `Component::ParentDir` - pub fn mangled_name(&self) -> ::std::path::PathBuf { + pub fn mangled_name(&self) -> std::path::PathBuf { self.data.file_name_sanitized() } @@ -989,13 +979,13 @@ impl<'a> Drop for ZipFile<'a> { let mut buffer = [0; 1 << 16]; // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped. - let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader { + let mut reader: io::Take<&mut dyn Read> = match &mut self.reader { ZipFileReader::NoReader => { let innerreader = self.crypto_reader.take(); innerreader.expect("Invalid reader state").into_inner() } reader => { - let innerreader = ::std::mem::replace(reader, ZipFileReader::NoReader); + let innerreader = std::mem::replace(reader, ZipFileReader::NoReader); innerreader.into_inner() } }; @@ -1029,7 +1019,7 @@ impl<'a> Drop for ZipFile<'a> { /// * `comment`: set to an empty string /// * `data_start`: set to 0 /// * `external_attributes`: `unix_mode()`: will return None -pub fn read_zipfile_from_stream<'a, R: io::Read>( +pub fn read_zipfile_from_stream<'a, R: Read>( reader: &'a mut R, ) -> ZipResult>> { let signature = reader.read_u32::()?; @@ -1105,7 +1095,7 @@ pub fn read_zipfile_from_stream<'a, R: io::Read>( return unsupported_zip_error("The file length is not available in the local header"); } - let limit_reader = (reader as &'a mut dyn io::Read).take(result.compressed_size); + let limit_reader = (reader as &'a mut dyn Read).take(result.compressed_size); let result_crc32 = result.crc32; let result_compression_method = result.compression_method;