Bug fix: try decoding file as ZIP32 if it's not valid as ZIP64

This commit is contained in:
Chris Hennick 2023-05-05 09:11:49 -07:00
parent 6e7ff280e9
commit 29a3f30a72
No known key found for this signature in database
GPG key ID: 25653935CC8B6C74

View file

@ -121,14 +121,14 @@ impl<'a> CryptoReader<'a> {
pub(crate) enum ZipFileReader<'a> { pub(crate) enum ZipFileReader<'a> {
NoReader, NoReader,
Raw(io::Take<&'a mut dyn io::Read>), Raw(io::Take<&'a mut dyn Read>),
Stored(Crc32Reader<CryptoReader<'a>>), Stored(Crc32Reader<CryptoReader<'a>>),
#[cfg(any( #[cfg(any(
feature = "deflate", feature = "deflate",
feature = "deflate-miniz", feature = "deflate-miniz",
feature = "deflate-zlib" feature = "deflate-zlib"
))] ))]
Deflated(Crc32Reader<flate2::read::DeflateDecoder<CryptoReader<'a>>>), Deflated(Crc32Reader<DeflateDecoder<CryptoReader<'a>>>),
#[cfg(feature = "bzip2")] #[cfg(feature = "bzip2")]
Bzip2(Crc32Reader<BzDecoder<CryptoReader<'a>>>), Bzip2(Crc32Reader<BzDecoder<CryptoReader<'a>>>),
#[cfg(feature = "zstd")] #[cfg(feature = "zstd")]
@ -207,11 +207,11 @@ pub(crate) fn find_content<'a>(
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub(crate) fn make_crypto_reader<'a>( pub(crate) fn make_crypto_reader<'a>(
compression_method: crate::compression::CompressionMethod, compression_method: CompressionMethod,
crc32: u32, crc32: u32,
last_modified_time: DateTime, last_modified_time: DateTime,
using_data_descriptor: bool, using_data_descriptor: bool,
reader: io::Take<&'a mut dyn io::Read>, reader: io::Take<&'a mut dyn Read>,
password: Option<&[u8]>, password: Option<&[u8]>,
aes_info: Option<(AesMode, AesVendorVersion)>, aes_info: Option<(AesMode, AesVendorVersion)>,
#[cfg(feature = "aes-crypto")] compressed_size: u64, #[cfg(feature = "aes-crypto")] compressed_size: u64,
@ -291,10 +291,28 @@ pub(crate) fn make_reader(
} }
} }
impl<R: Read + io::Seek> ZipArchive<R> { impl<R: Read + Seek> ZipArchive<R> {
/// Get the directory start offset and number of files. This is done in a fn get_directory_counts_zip32(
/// separate function to ease the control flow design. footer: &spec::CentralDirectoryEnd,
pub(crate) fn get_directory_counts( cde_start_pos: u64,
) -> ZipResult<(u64, u64, usize)> {
// Some zip files have data prepended to them, resulting in the
// offsets all being too small. Get the amount of error by comparing
// the actual file position we found the CDE at with the offset
// recorded in the CDE.
let archive_offset = cde_start_pos
.checked_sub(footer.central_directory_size as u64)
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
.ok_or(ZipError::InvalidArchive(
"Invalid central directory size or offset",
))?;
let directory_start = footer.central_directory_offset as u64 + archive_offset;
let number_of_files = footer.number_of_files_on_this_disk as usize;
Ok((archive_offset, directory_start, number_of_files))
}
fn get_directory_counts_zip64(
reader: &mut R, reader: &mut R,
footer: &spec::CentralDirectoryEnd, footer: &spec::CentralDirectoryEnd,
cde_start_pos: u64, cde_start_pos: u64,
@ -303,96 +321,68 @@ impl<R: Read + io::Seek> ZipArchive<R> {
// have its signature 20 bytes in front of the standard footer. The // have its signature 20 bytes in front of the standard footer. The
// standard footer, in turn, is 22+N bytes large, where N is the // standard footer, in turn, is 22+N bytes large, where N is the
// comment length. Therefore: // comment length. Therefore:
let zip64locator = if reader let locator64 = reader
.seek(io::SeekFrom::End( .seek(io::SeekFrom::End(
-(20 + 22 + footer.zip_file_comment.len() as i64), -(20 + 22 + footer.zip_file_comment.len() as i64),
)) ))?;
.is_ok() if !footer.record_too_small()
&& footer.disk_number as u32 != locator64.disk_with_central_directory
{ {
match spec::Zip64CentralDirectoryEndLocator::parse(reader) { return unsupported_zip_error(
Ok(loc) => Some(loc), "Support for multi-disk files is not implemented",
Err(ZipError::InvalidArchive(_)) => { );
// No ZIP64 header; that's actually fine. We're done here.
None
}
Err(e) => {
// Yikes, a real problem
return Err(e);
}
}
} else {
// Empty Zip files will have nothing else so this error might be fine. If
// not, we'll find out soon.
None
};
match zip64locator {
None => {
// Some zip files have data prepended to them, resulting in the
// offsets all being too small. Get the amount of error by comparing
// the actual file position we found the CDE at with the offset
// recorded in the CDE.
let archive_offset = cde_start_pos
.checked_sub(footer.central_directory_size as u64)
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
.ok_or(ZipError::InvalidArchive(
"Invalid central directory size or offset",
))?;
let directory_start = footer.central_directory_offset as u64 + archive_offset;
let number_of_files = footer.number_of_files_on_this_disk as usize;
Ok((archive_offset, directory_start, number_of_files))
}
Some(locator64) => {
// If we got here, this is indeed a ZIP64 file.
if !footer.record_too_small()
&& footer.disk_number as u32 != locator64.disk_with_central_directory
{
return unsupported_zip_error(
"Support for multi-disk files is not implemented",
);
}
// We need to reassess `archive_offset`. We know where the ZIP64
// central-directory-end structure *should* be, but unfortunately we
// don't know how to precisely relate that location to our current
// actual offset in the file, since there may be junk at its
// beginning. Therefore we need to perform another search, as in
// read::CentralDirectoryEnd::find_and_parse, except now we search
// forward.
let search_upper_bound = cde_start_pos
.checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
.ok_or(ZipError::InvalidArchive(
"File cannot contain ZIP64 central directory end",
))?;
let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
reader,
locator64.end_of_central_directory_offset,
search_upper_bound,
)?;
if footer.disk_number != footer.disk_with_central_directory {
return unsupported_zip_error(
"Support for multi-disk files is not implemented",
);
}
let directory_start = footer
.central_directory_offset
.checked_add(archive_offset)
.ok_or({
ZipError::InvalidArchive("Invalid central directory size or offset")
})?;
Ok((
archive_offset,
directory_start,
footer.number_of_files as usize,
))
}
} }
// We need to reassess `archive_offset`. We know where the ZIP64
// central-directory-end structure *should* be, but unfortunately we
// don't know how to precisely relate that location to our current
// actual offset in the file, since there may be junk at its
// beginning. Therefore we need to perform another search, as in
// read::CentralDirectoryEnd::find_and_parse, except now we search
// forward.
let search_upper_bound = cde_start_pos
.checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
.ok_or(ZipError::InvalidArchive(
"File cannot contain ZIP64 central directory end",
))?;
let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
reader,
locator64.end_of_central_directory_offset,
search_upper_bound,
)?;
if footer.disk_number != footer.disk_with_central_directory {
return unsupported_zip_error(
"Support for multi-disk files is not implemented",
);
}
let directory_start = footer
.central_directory_offset
.checked_add(archive_offset)
.ok_or({
ZipError::InvalidArchive("Invalid central directory size or offset")
})?;
Ok((
archive_offset,
directory_start,
footer.number_of_files as usize,
))
}
/// Get the directory start offset and number of files. This is done in a
/// separate function to ease the control flow design.
pub(crate) fn get_directory_counts(
reader: &mut R,
footer: &spec::CentralDirectoryEnd,
cde_start_pos: u64,
) -> ZipResult<(u64, u64, usize)> {
// Check if file is valid as ZIP64 first; if not, try it as ZIP32
Self::get_directory_counts_zip64(reader, footer, cde_start_pos).or_else(
|| get_directory_counts_zip32(footer, cde_start_pos)
)
} }
/// Read a ZIP archive, collecting the files it contains /// Read a ZIP archive, collecting the files it contains
@ -643,7 +633,7 @@ fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
} }
/// Parse a central directory entry to collect the information for the file. /// Parse a central directory entry to collect the information for the file.
pub(crate) fn central_header_to_zip_file<R: Read + io::Seek>( pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
reader: &mut R, reader: &mut R,
archive_offset: u64, archive_offset: u64,
) -> ZipResult<ZipFileData> { ) -> ZipResult<ZipFileData> {
@ -873,7 +863,7 @@ impl<'a> ZipFile<'a> {
note = "by stripping `..`s from the path, the meaning of paths can change. note = "by stripping `..`s from the path, the meaning of paths can change.
`mangled_name` can be used if this behaviour is desirable" `mangled_name` can be used if this behaviour is desirable"
)] )]
pub fn sanitized_name(&self) -> ::std::path::PathBuf { pub fn sanitized_name(&self) -> std::path::PathBuf {
self.mangled_name() self.mangled_name()
} }
@ -889,7 +879,7 @@ impl<'a> ZipFile<'a> {
/// [`ZipFile::enclosed_name`] is the better option in most scenarios. /// [`ZipFile::enclosed_name`] is the better option in most scenarios.
/// ///
/// [`ParentDir`]: `Component::ParentDir` /// [`ParentDir`]: `Component::ParentDir`
pub fn mangled_name(&self) -> ::std::path::PathBuf { pub fn mangled_name(&self) -> std::path::PathBuf {
self.data.file_name_sanitized() self.data.file_name_sanitized()
} }
@ -989,13 +979,13 @@ impl<'a> Drop for ZipFile<'a> {
let mut buffer = [0; 1 << 16]; let mut buffer = [0; 1 << 16];
// Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped. // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader { let mut reader: io::Take<&mut dyn Read> = match &mut self.reader {
ZipFileReader::NoReader => { ZipFileReader::NoReader => {
let innerreader = self.crypto_reader.take(); let innerreader = self.crypto_reader.take();
innerreader.expect("Invalid reader state").into_inner() innerreader.expect("Invalid reader state").into_inner()
} }
reader => { reader => {
let innerreader = ::std::mem::replace(reader, ZipFileReader::NoReader); let innerreader = std::mem::replace(reader, ZipFileReader::NoReader);
innerreader.into_inner() innerreader.into_inner()
} }
}; };
@ -1029,7 +1019,7 @@ impl<'a> Drop for ZipFile<'a> {
/// * `comment`: set to an empty string /// * `comment`: set to an empty string
/// * `data_start`: set to 0 /// * `data_start`: set to 0
/// * `external_attributes`: `unix_mode()`: will return None /// * `external_attributes`: `unix_mode()`: will return None
pub fn read_zipfile_from_stream<'a, R: io::Read>( pub fn read_zipfile_from_stream<'a, R: Read>(
reader: &'a mut R, reader: &'a mut R,
) -> ZipResult<Option<ZipFile<'_>>> { ) -> ZipResult<Option<ZipFile<'_>>> {
let signature = reader.read_u32::<LittleEndian>()?; let signature = reader.read_u32::<LittleEndian>()?;
@ -1105,7 +1095,7 @@ pub fn read_zipfile_from_stream<'a, R: io::Read>(
return unsupported_zip_error("The file length is not available in the local header"); return unsupported_zip_error("The file length is not available in the local header");
} }
let limit_reader = (reader as &'a mut dyn io::Read).take(result.compressed_size); let limit_reader = (reader as &'a mut dyn Read).take(result.compressed_size);
let result_crc32 = result.crc32; let result_crc32 = result.crc32;
let result_compression_method = result.compression_method; let result_compression_method = result.compression_method;