feat: Allow the archive offset behavior of the reader to be configured.

Aside from supporting the current behavior which allows archives to be
preceded by arbitrary data (added in fc749a09), this also allows
detection of the offset to use by checking whether a central directory
header is at the expected location. This is configurable because if the
behavior were based only on detection, there could be false positives if
archive data happened to contain a central directory header at the right
spot.
This commit is contained in:
Alex Franchuk 2024-05-10 16:08:13 -04:00
parent 8ef61cc5fc
commit a8875b0226
No known key found for this signature in database
GPG key ID: 5E36A95E6D39C562
3 changed files with 120 additions and 20 deletions

View file

@ -40,6 +40,10 @@ use bzip2::read::BzDecoder;
#[cfg(feature = "zstd")] #[cfg(feature = "zstd")]
use zstd::stream::read::Decoder as ZstdDecoder; use zstd::stream::read::Decoder as ZstdDecoder;
mod config;
pub use config::*;
/// Provides high level API for reading from a stream. /// Provides high level API for reading from a stream.
pub(crate) mod stream; pub(crate) mod stream;
@ -56,6 +60,9 @@ pub(crate) mod zip_archive {
pub(crate) files: super::IndexMap<Box<str>, super::ZipFileData>, pub(crate) files: super::IndexMap<Box<str>, super::ZipFileData>,
pub(super) offset: u64, pub(super) offset: u64,
pub(super) dir_start: u64, pub(super) dir_start: u64,
// This isn't yet used anywhere, but it is here for use cases in the future.
#[allow(dead_code)]
pub(super) config: super::Config,
} }
/// ZIP archive reader /// ZIP archive reader
@ -382,12 +389,15 @@ impl<R> ZipArchive<R> {
) -> ZipResult<Self> { ) -> ZipResult<Self> {
let initial_offset = match files.first() { let initial_offset = match files.first() {
Some((_, file)) => file.header_start, Some((_, file)) => file.header_start,
None => 0, None => central_start,
}; };
let shared = Arc::new(zip_archive::Shared { let shared = Arc::new(zip_archive::Shared {
files, files,
offset: initial_offset, offset: initial_offset,
dir_start: central_start, dir_start: central_start,
config: Config {
archive_offset: ArchiveOffset::Known(initial_offset),
},
}); });
Ok(Self { Ok(Self {
reader, reader,
@ -473,19 +483,44 @@ impl<R: Read + Seek> ZipArchive<R> {
} }
fn get_directory_info_zip32( fn get_directory_info_zip32(
config: &Config,
reader: &mut R,
footer: &spec::Zip32CentralDirectoryEnd, footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64, cde_start_pos: u64,
) -> ZipResult<CentralDirectoryInfo> { ) -> ZipResult<CentralDirectoryInfo> {
// Some zip files have data prepended to them, resulting in the let archive_offset = match config.archive_offset {
// offsets all being too small. Get the amount of error by comparing ArchiveOffset::Known(n) => n,
// the actual file position we found the CDE at with the offset ArchiveOffset::FromCentralDirectory | ArchiveOffset::Detect => {
// recorded in the CDE. // Some zip files have data prepended to them, resulting in the
let archive_offset = cde_start_pos // offsets all being too small. Get the amount of error by comparing
.checked_sub(footer.central_directory_size as u64) // the actual file position we found the CDE at with the offset
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64)) // recorded in the CDE.
.ok_or(ZipError::InvalidArchive( let mut offset = cde_start_pos
"Invalid central directory size or offset", .checked_sub(footer.central_directory_size as u64)
))?; .and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
.ok_or(ZipError::InvalidArchive(
"Invalid central directory size or offset",
))?;
if config.archive_offset == ArchiveOffset::Detect {
// Check whether the archive offset makes sense by peeking at the directory start. If it
// doesn't, fall back to using no archive offset. This supports zips with the central
// directory entries somewhere other than directly preceding the end of central directory.
reader.seek(io::SeekFrom::Start(
offset + footer.central_directory_offset as u64,
))?;
let mut buf = [0; 4];
reader.read_exact(&mut buf)?;
if spec::Magic::from_le_bytes(buf)
!= spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE
{
offset = 0;
}
}
offset
}
};
let directory_start = footer.central_directory_offset as u64 + archive_offset; let directory_start = footer.central_directory_offset as u64 + archive_offset;
let number_of_files = footer.number_of_files_on_this_disk as usize; let number_of_files = footer.number_of_files_on_this_disk as usize;
@ -512,6 +547,7 @@ impl<R: Read + Seek> ZipArchive<R> {
} }
fn get_directory_info_zip64( fn get_directory_info_zip64(
config: &Config,
reader: &mut R, reader: &mut R,
footer: &spec::Zip32CentralDirectoryEnd, footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64, cde_start_pos: u64,
@ -549,6 +585,27 @@ impl<R: Read + Seek> ZipArchive<R> {
let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?; let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?;
let results: Vec<ZipResult<CentralDirectoryInfo>> = let results: Vec<ZipResult<CentralDirectoryInfo>> =
search_results.into_iter().map(|(footer64, archive_offset)| { search_results.into_iter().map(|(footer64, archive_offset)| {
let archive_offset = match config.archive_offset {
ArchiveOffset::Known(n) => n,
ArchiveOffset::FromCentralDirectory => archive_offset,
ArchiveOffset::Detect => {
archive_offset.checked_add(footer64.central_directory_offset)
.and_then(|start| {
// Check whether the archive offset makes sense by peeking at the directory start.
//
// If any errors occur or no header signature is found, fall back to no offset to see if that works.
reader.seek(io::SeekFrom::Start(start)).ok()?;
let mut buf = [0; 4];
reader.read_exact(&mut buf).ok()?;
if spec::Magic::from_le_bytes(buf) != spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
None
} else {
Some(archive_offset)
}
})
.unwrap_or(0)
}
};
let directory_start = footer64 let directory_start = footer64
.central_directory_offset .central_directory_offset
.checked_add(archive_offset) .checked_add(archive_offset)
@ -584,14 +641,15 @@ impl<R: Read + Seek> ZipArchive<R> {
/// Get the directory start offset and number of files. This is done in a /// Get the directory start offset and number of files. This is done in a
/// separate function to ease the control flow design. /// separate function to ease the control flow design.
pub(crate) fn get_metadata( pub(crate) fn get_metadata(
config: Config,
reader: &mut R, reader: &mut R,
footer: &spec::Zip32CentralDirectoryEnd, footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64, cde_start_pos: u64,
) -> ZipResult<Shared> { ) -> ZipResult<Shared> {
// Check if file has a zip64 footer // Check if file has a zip64 footer
let mut results = Self::get_directory_info_zip64(reader, footer, cde_start_pos) let mut results = Self::get_directory_info_zip64(&config, reader, footer, cde_start_pos)
.unwrap_or_else(|e| vec![Err(e)]); .unwrap_or_else(|e| vec![Err(e)]);
let zip32_result = Self::get_directory_info_zip32(footer, cde_start_pos); let zip32_result = Self::get_directory_info_zip32(&config, reader, footer, cde_start_pos);
let mut invalid_errors = Vec::new(); let mut invalid_errors = Vec::new();
let mut unsupported_errors = Vec::new(); let mut unsupported_errors = Vec::new();
let mut ok_results = Vec::new(); let mut ok_results = Vec::new();
@ -652,6 +710,7 @@ impl<R: Read + Seek> ZipArchive<R> {
files, files,
offset: dir_info.archive_offset, offset: dir_info.archive_offset,
dir_start: dir_info.directory_start, dir_start: dir_info.directory_start,
config,
}) })
} }
}) })
@ -712,18 +771,28 @@ impl<R: Read + Seek> ZipArchive<R> {
} }
} }
/// Read a ZIP archive, collecting the files it contains /// Read a ZIP archive, collecting the files it contains.
/// ///
/// This uses the central directory record of the ZIP file, and ignores local file headers /// This uses the central directory record of the ZIP file, and ignores local file headers.
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> { ///
/// A default [`Config`] is used.
pub fn new(reader: R) -> ZipResult<ZipArchive<R>> {
Self::with_config(Default::default(), reader)
}
/// Read a ZIP archive providing a read configuration, collecting the files it contains.
///
/// This uses the central directory record of the ZIP file, and ignores local file headers.
pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?; let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?;
let shared = Self::get_metadata(&mut reader, &footer, cde_start_pos)?; let shared = Self::get_metadata(config, &mut reader, &footer, cde_start_pos)?;
Ok(ZipArchive { Ok(ZipArchive {
reader, reader,
shared: shared.into(), shared: shared.into(),
comment: footer.zip_file_comment.into(), comment: footer.zip_file_comment.into(),
}) })
} }
/// Extract a Zip archive into a directory, overwriting files if they /// Extract a Zip archive into a directory, overwriting files if they
/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
/// ///

22
src/read/config.rs Normal file
View file

@ -0,0 +1,22 @@
/// Configuration for reading ZIP archives.
#[repr(transparent)]
#[derive(Debug, Default, Clone, Copy)]
pub struct Config {
/// An offset into the reader to use to find the start of the archive.
pub archive_offset: ArchiveOffset,
}
/// The offset of the start of the archive from the beginning of the reader.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ArchiveOffset {
/// Try to detect the archive offset automatically.
///
/// This will look at the central directory specified by `FromCentralDirectory` for a header.
/// If missing, this will behave as if `None` were specified.
#[default]
Detect,
/// Use the central directory length and offset to determine the start of the archive.
FromCentralDirectory,
/// Specify a fixed archive offset.
Known(u64),
}

View file

@ -3,7 +3,7 @@
#[cfg(feature = "aes-crypto")] #[cfg(feature = "aes-crypto")]
use crate::aes::AesWriter; use crate::aes::AesWriter;
use crate::compression::CompressionMethod; use crate::compression::CompressionMethod;
use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader}; use crate::read::{find_content, Config, ZipArchive, ZipFile, ZipFileReader};
use crate::result::{ZipError, ZipResult}; use crate::result::{ZipError, ZipResult};
use crate::spec::{self, Block}; use crate::spec::{self, Block};
#[cfg(feature = "aes-crypto")] #[cfg(feature = "aes-crypto")]
@ -538,10 +538,19 @@ impl ZipWriterStats {
impl<A: Read + Write + Seek> ZipWriter<A> { impl<A: Read + Write + Seek> ZipWriter<A> {
/// Initializes the archive from an existing ZIP archive, making it ready for append. /// Initializes the archive from an existing ZIP archive, making it ready for append.
pub fn new_append(mut readwriter: A) -> ZipResult<ZipWriter<A>> { ///
/// This uses a default configuration to initially read the archive.
pub fn new_append(readwriter: A) -> ZipResult<ZipWriter<A>> {
Self::new_append_with_config(Default::default(), readwriter)
}
/// Initializes the archive from an existing ZIP archive, making it ready for append.
///
/// This uses the given read configuration to initially read the archive.
pub fn new_append_with_config(config: Config, mut readwriter: A) -> ZipResult<ZipWriter<A>> {
let (footer, cde_start_pos) = let (footer, cde_start_pos) =
spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?; spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?;
let metadata = ZipArchive::get_metadata(&mut readwriter, &footer, cde_start_pos)?; let metadata = ZipArchive::get_metadata(config, &mut readwriter, &footer, cde_start_pos)?;
Ok(ZipWriter { Ok(ZipWriter {
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)), inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),