feat: Allow the archive offset behavior of the reader to be configured.

Aside from supporting the current behavior which allows archives to be
preceded by arbitrary data (added in fc749a09), this also allows
detection of the offset to use by checking whether a central directory
header is at the expected location. This is configurable because if the
behavior were based only on detection, there could be false positives if
archive data happened to contain a central directory header at the right
spot.
This commit is contained in:
Alex Franchuk 2024-05-10 16:08:13 -04:00
parent 8ef61cc5fc
commit a8875b0226
No known key found for this signature in database
GPG key ID: 5E36A95E6D39C562
3 changed files with 120 additions and 20 deletions

View file

@ -40,6 +40,10 @@ use bzip2::read::BzDecoder;
#[cfg(feature = "zstd")]
use zstd::stream::read::Decoder as ZstdDecoder;
mod config;
pub use config::*;
/// Provides high level API for reading from a stream.
pub(crate) mod stream;
@ -56,6 +60,9 @@ pub(crate) mod zip_archive {
pub(crate) files: super::IndexMap<Box<str>, super::ZipFileData>,
pub(super) offset: u64,
pub(super) dir_start: u64,
// This isn't yet used anywhere, but it is here for use cases in the future.
#[allow(dead_code)]
pub(super) config: super::Config,
}
/// ZIP archive reader
@ -382,12 +389,15 @@ impl<R> ZipArchive<R> {
) -> ZipResult<Self> {
let initial_offset = match files.first() {
Some((_, file)) => file.header_start,
None => 0,
None => central_start,
};
let shared = Arc::new(zip_archive::Shared {
files,
offset: initial_offset,
dir_start: central_start,
config: Config {
archive_offset: ArchiveOffset::Known(initial_offset),
},
});
Ok(Self {
reader,
@ -473,19 +483,44 @@ impl<R: Read + Seek> ZipArchive<R> {
}
fn get_directory_info_zip32(
config: &Config,
reader: &mut R,
footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64,
) -> ZipResult<CentralDirectoryInfo> {
// Some zip files have data prepended to them, resulting in the
// offsets all being too small. Get the amount of error by comparing
// the actual file position we found the CDE at with the offset
// recorded in the CDE.
let archive_offset = cde_start_pos
.checked_sub(footer.central_directory_size as u64)
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
.ok_or(ZipError::InvalidArchive(
"Invalid central directory size or offset",
))?;
let archive_offset = match config.archive_offset {
ArchiveOffset::Known(n) => n,
ArchiveOffset::FromCentralDirectory | ArchiveOffset::Detect => {
// Some zip files have data prepended to them, resulting in the
// offsets all being too small. Get the amount of error by comparing
// the actual file position we found the CDE at with the offset
// recorded in the CDE.
let mut offset = cde_start_pos
.checked_sub(footer.central_directory_size as u64)
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
.ok_or(ZipError::InvalidArchive(
"Invalid central directory size or offset",
))?;
if config.archive_offset == ArchiveOffset::Detect {
// Check whether the archive offset makes sense by peeking at the directory start. If it
// doesn't, fall back to using no archive offset. This supports zips with the central
// directory entries somewhere other than directly preceding the end of central directory.
reader.seek(io::SeekFrom::Start(
offset + footer.central_directory_offset as u64,
))?;
let mut buf = [0; 4];
reader.read_exact(&mut buf)?;
if spec::Magic::from_le_bytes(buf)
!= spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE
{
offset = 0;
}
}
offset
}
};
let directory_start = footer.central_directory_offset as u64 + archive_offset;
let number_of_files = footer.number_of_files_on_this_disk as usize;
@ -512,6 +547,7 @@ impl<R: Read + Seek> ZipArchive<R> {
}
fn get_directory_info_zip64(
config: &Config,
reader: &mut R,
footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64,
@ -549,6 +585,27 @@ impl<R: Read + Seek> ZipArchive<R> {
let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?;
let results: Vec<ZipResult<CentralDirectoryInfo>> =
search_results.into_iter().map(|(footer64, archive_offset)| {
let archive_offset = match config.archive_offset {
ArchiveOffset::Known(n) => n,
ArchiveOffset::FromCentralDirectory => archive_offset,
ArchiveOffset::Detect => {
archive_offset.checked_add(footer64.central_directory_offset)
.and_then(|start| {
// Check whether the archive offset makes sense by peeking at the directory start.
//
// If any errors occur or no header signature is found, fall back to no offset to see if that works.
reader.seek(io::SeekFrom::Start(start)).ok()?;
let mut buf = [0; 4];
reader.read_exact(&mut buf).ok()?;
if spec::Magic::from_le_bytes(buf) != spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
None
} else {
Some(archive_offset)
}
})
.unwrap_or(0)
}
};
let directory_start = footer64
.central_directory_offset
.checked_add(archive_offset)
@ -584,14 +641,15 @@ impl<R: Read + Seek> ZipArchive<R> {
/// Get the directory start offset and number of files. This is done in a
/// separate function to ease the control flow design.
pub(crate) fn get_metadata(
config: Config,
reader: &mut R,
footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64,
) -> ZipResult<Shared> {
// Check if file has a zip64 footer
let mut results = Self::get_directory_info_zip64(reader, footer, cde_start_pos)
let mut results = Self::get_directory_info_zip64(&config, reader, footer, cde_start_pos)
.unwrap_or_else(|e| vec![Err(e)]);
let zip32_result = Self::get_directory_info_zip32(footer, cde_start_pos);
let zip32_result = Self::get_directory_info_zip32(&config, reader, footer, cde_start_pos);
let mut invalid_errors = Vec::new();
let mut unsupported_errors = Vec::new();
let mut ok_results = Vec::new();
@ -652,6 +710,7 @@ impl<R: Read + Seek> ZipArchive<R> {
files,
offset: dir_info.archive_offset,
dir_start: dir_info.directory_start,
config,
})
}
})
@ -712,18 +771,28 @@ impl<R: Read + Seek> ZipArchive<R> {
}
}
/// Read a ZIP archive, collecting the files it contains
/// Read a ZIP archive, collecting the files it contains.
///
/// This uses the central directory record of the ZIP file, and ignores local file headers
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
/// This uses the central directory record of the ZIP file, and ignores local file headers.
///
/// A default [`Config`] is used.
pub fn new(reader: R) -> ZipResult<ZipArchive<R>> {
Self::with_config(Default::default(), reader)
}
/// Read a ZIP archive providing a read configuration, collecting the files it contains.
///
/// This uses the central directory record of the ZIP file, and ignores local file headers.
pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?;
let shared = Self::get_metadata(&mut reader, &footer, cde_start_pos)?;
let shared = Self::get_metadata(config, &mut reader, &footer, cde_start_pos)?;
Ok(ZipArchive {
reader,
shared: shared.into(),
comment: footer.zip_file_comment.into(),
})
}
/// Extract a Zip archive into a directory, overwriting files if they
/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
///

22
src/read/config.rs Normal file
View file

@ -0,0 +1,22 @@
/// Configuration for reading ZIP archives.
#[repr(transparent)]
#[derive(Debug, Default, Clone, Copy)]
pub struct Config {
/// An offset into the reader to use to find the start of the archive.
pub archive_offset: ArchiveOffset,
}
/// The offset of the start of the archive from the beginning of the reader.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ArchiveOffset {
/// Try to detect the archive offset automatically.
///
/// This will look at the central directory specified by `FromCentralDirectory` for a header.
/// If missing, this will behave as if `None` were specified.
#[default]
Detect,
/// Use the central directory length and offset to determine the start of the archive.
FromCentralDirectory,
/// Specify a fixed archive offset.
Known(u64),
}

View file

@ -3,7 +3,7 @@
#[cfg(feature = "aes-crypto")]
use crate::aes::AesWriter;
use crate::compression::CompressionMethod;
use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader};
use crate::read::{find_content, Config, ZipArchive, ZipFile, ZipFileReader};
use crate::result::{ZipError, ZipResult};
use crate::spec::{self, Block};
#[cfg(feature = "aes-crypto")]
@ -538,10 +538,19 @@ impl ZipWriterStats {
impl<A: Read + Write + Seek> ZipWriter<A> {
/// Initializes the archive from an existing ZIP archive, making it ready for append.
pub fn new_append(mut readwriter: A) -> ZipResult<ZipWriter<A>> {
///
/// This uses a default configuration to initially read the archive.
pub fn new_append(readwriter: A) -> ZipResult<ZipWriter<A>> {
Self::new_append_with_config(Default::default(), readwriter)
}
/// Initializes the archive from an existing ZIP archive, making it ready for append.
///
/// This uses the given read configuration to initially read the archive.
pub fn new_append_with_config(config: Config, mut readwriter: A) -> ZipResult<ZipWriter<A>> {
let (footer, cde_start_pos) =
spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?;
let metadata = ZipArchive::get_metadata(&mut readwriter, &footer, cde_start_pos)?;
let metadata = ZipArchive::get_metadata(config, &mut readwriter, &footer, cde_start_pos)?;
Ok(ZipWriter {
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),