Merge pull request #109 from afranchuk/configure-archive-offset

feat: Allow the archive offset behavior of the reader to be configured.
This commit is contained in:
Chris Hennick 2024-05-30 04:21:03 +00:00 committed by GitHub
commit 6539545524
Signed by: DevComp
GPG key ID: B5690EEEBB952194
3 changed files with 120 additions and 20 deletions

View file

@ -40,6 +40,10 @@ use bzip2::read::BzDecoder;
#[cfg(feature = "zstd")] #[cfg(feature = "zstd")]
use zstd::stream::read::Decoder as ZstdDecoder; use zstd::stream::read::Decoder as ZstdDecoder;
mod config;
pub use config::*;
/// Provides high level API for reading from a stream. /// Provides high level API for reading from a stream.
pub(crate) mod stream; pub(crate) mod stream;
@ -56,6 +60,9 @@ pub(crate) mod zip_archive {
pub(crate) files: super::IndexMap<Box<str>, super::ZipFileData>, pub(crate) files: super::IndexMap<Box<str>, super::ZipFileData>,
pub(super) offset: u64, pub(super) offset: u64,
pub(super) dir_start: u64, pub(super) dir_start: u64,
// This isn't yet used anywhere, but it is here for use cases in the future.
#[allow(dead_code)]
pub(super) config: super::Config,
} }
/// ZIP archive reader /// ZIP archive reader
@ -382,12 +389,15 @@ impl<R> ZipArchive<R> {
) -> ZipResult<Self> { ) -> ZipResult<Self> {
let initial_offset = match files.first() { let initial_offset = match files.first() {
Some((_, file)) => file.header_start, Some((_, file)) => file.header_start,
None => 0, None => central_start,
}; };
let shared = Arc::new(zip_archive::Shared { let shared = Arc::new(zip_archive::Shared {
files, files,
offset: initial_offset, offset: initial_offset,
dir_start: central_start, dir_start: central_start,
config: Config {
archive_offset: ArchiveOffset::Known(initial_offset),
},
}); });
Ok(Self { Ok(Self {
reader, reader,
@ -473,19 +483,44 @@ impl<R: Read + Seek> ZipArchive<R> {
} }
fn get_directory_info_zip32( fn get_directory_info_zip32(
config: &Config,
reader: &mut R,
footer: &spec::Zip32CentralDirectoryEnd, footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64, cde_start_pos: u64,
) -> ZipResult<CentralDirectoryInfo> { ) -> ZipResult<CentralDirectoryInfo> {
// Some zip files have data prepended to them, resulting in the let archive_offset = match config.archive_offset {
// offsets all being too small. Get the amount of error by comparing ArchiveOffset::Known(n) => n,
// the actual file position we found the CDE at with the offset ArchiveOffset::FromCentralDirectory | ArchiveOffset::Detect => {
// recorded in the CDE. // Some zip files have data prepended to them, resulting in the
let archive_offset = cde_start_pos // offsets all being too small. Get the amount of error by comparing
.checked_sub(footer.central_directory_size as u64) // the actual file position we found the CDE at with the offset
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64)) // recorded in the CDE.
.ok_or(ZipError::InvalidArchive( let mut offset = cde_start_pos
"Invalid central directory size or offset", .checked_sub(footer.central_directory_size as u64)
))?; .and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
.ok_or(ZipError::InvalidArchive(
"Invalid central directory size or offset",
))?;
if config.archive_offset == ArchiveOffset::Detect {
// Check whether the archive offset makes sense by peeking at the directory start. If it
// doesn't, fall back to using no archive offset. This supports zips with the central
// directory entries somewhere other than directly preceding the end of central directory.
reader.seek(io::SeekFrom::Start(
offset + footer.central_directory_offset as u64,
))?;
let mut buf = [0; 4];
reader.read_exact(&mut buf)?;
if spec::Magic::from_le_bytes(buf)
!= spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE
{
offset = 0;
}
}
offset
}
};
let directory_start = footer.central_directory_offset as u64 + archive_offset; let directory_start = footer.central_directory_offset as u64 + archive_offset;
let number_of_files = footer.number_of_files_on_this_disk as usize; let number_of_files = footer.number_of_files_on_this_disk as usize;
@ -512,6 +547,7 @@ impl<R: Read + Seek> ZipArchive<R> {
} }
fn get_directory_info_zip64( fn get_directory_info_zip64(
config: &Config,
reader: &mut R, reader: &mut R,
footer: &spec::Zip32CentralDirectoryEnd, footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64, cde_start_pos: u64,
@ -549,6 +585,27 @@ impl<R: Read + Seek> ZipArchive<R> {
let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?; let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?;
let results: Vec<ZipResult<CentralDirectoryInfo>> = let results: Vec<ZipResult<CentralDirectoryInfo>> =
search_results.into_iter().map(|(footer64, archive_offset)| { search_results.into_iter().map(|(footer64, archive_offset)| {
let archive_offset = match config.archive_offset {
ArchiveOffset::Known(n) => n,
ArchiveOffset::FromCentralDirectory => archive_offset,
ArchiveOffset::Detect => {
archive_offset.checked_add(footer64.central_directory_offset)
.and_then(|start| {
// Check whether the archive offset makes sense by peeking at the directory start.
//
// If any errors occur or no header signature is found, fall back to no offset to see if that works.
reader.seek(io::SeekFrom::Start(start)).ok()?;
let mut buf = [0; 4];
reader.read_exact(&mut buf).ok()?;
if spec::Magic::from_le_bytes(buf) != spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
None
} else {
Some(archive_offset)
}
})
.unwrap_or(0)
}
};
let directory_start = footer64 let directory_start = footer64
.central_directory_offset .central_directory_offset
.checked_add(archive_offset) .checked_add(archive_offset)
@ -584,14 +641,15 @@ impl<R: Read + Seek> ZipArchive<R> {
/// Get the directory start offset and number of files. This is done in a /// Get the directory start offset and number of files. This is done in a
/// separate function to ease the control flow design. /// separate function to ease the control flow design.
pub(crate) fn get_metadata( pub(crate) fn get_metadata(
config: Config,
reader: &mut R, reader: &mut R,
footer: &spec::Zip32CentralDirectoryEnd, footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64, cde_start_pos: u64,
) -> ZipResult<Shared> { ) -> ZipResult<Shared> {
// Check if file has a zip64 footer // Check if file has a zip64 footer
let mut results = Self::get_directory_info_zip64(reader, footer, cde_start_pos) let mut results = Self::get_directory_info_zip64(&config, reader, footer, cde_start_pos)
.unwrap_or_else(|e| vec![Err(e)]); .unwrap_or_else(|e| vec![Err(e)]);
let zip32_result = Self::get_directory_info_zip32(footer, cde_start_pos); let zip32_result = Self::get_directory_info_zip32(&config, reader, footer, cde_start_pos);
let mut invalid_errors = Vec::new(); let mut invalid_errors = Vec::new();
let mut unsupported_errors = Vec::new(); let mut unsupported_errors = Vec::new();
let mut ok_results = Vec::new(); let mut ok_results = Vec::new();
@ -652,6 +710,7 @@ impl<R: Read + Seek> ZipArchive<R> {
files, files,
offset: dir_info.archive_offset, offset: dir_info.archive_offset,
dir_start: dir_info.directory_start, dir_start: dir_info.directory_start,
config,
}) })
} }
}) })
@ -712,18 +771,28 @@ impl<R: Read + Seek> ZipArchive<R> {
} }
} }
/// Read a ZIP archive, collecting the files it contains /// Read a ZIP archive, collecting the files it contains.
/// ///
/// This uses the central directory record of the ZIP file, and ignores local file headers /// This uses the central directory record of the ZIP file, and ignores local file headers.
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> { ///
/// A default [`Config`] is used.
pub fn new(reader: R) -> ZipResult<ZipArchive<R>> {
Self::with_config(Default::default(), reader)
}
/// Read a ZIP archive providing a read configuration, collecting the files it contains.
///
/// This uses the central directory record of the ZIP file, and ignores local file headers.
pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?; let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?;
let shared = Self::get_metadata(&mut reader, &footer, cde_start_pos)?; let shared = Self::get_metadata(config, &mut reader, &footer, cde_start_pos)?;
Ok(ZipArchive { Ok(ZipArchive {
reader, reader,
shared: shared.into(), shared: shared.into(),
comment: footer.zip_file_comment.into(), comment: footer.zip_file_comment.into(),
}) })
} }
/// Extract a Zip archive into a directory, overwriting files if they /// Extract a Zip archive into a directory, overwriting files if they
/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
/// ///

22
src/read/config.rs Normal file
View file

@ -0,0 +1,22 @@
/// Configuration for reading ZIP archives.
#[repr(transparent)]
#[derive(Debug, Default, Clone, Copy)]
pub struct Config {
/// An offset into the reader to use to find the start of the archive.
pub archive_offset: ArchiveOffset,
}
/// The offset of the start of the archive from the beginning of the reader.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ArchiveOffset {
/// Try to detect the archive offset automatically.
///
/// This will look at the central directory specified by `FromCentralDirectory` for a header.
/// If missing, this will behave as if `None` were specified.
#[default]
Detect,
/// Use the central directory length and offset to determine the start of the archive.
FromCentralDirectory,
/// Specify a fixed archive offset.
Known(u64),
}

View file

@ -3,7 +3,7 @@
#[cfg(feature = "aes-crypto")] #[cfg(feature = "aes-crypto")]
use crate::aes::AesWriter; use crate::aes::AesWriter;
use crate::compression::CompressionMethod; use crate::compression::CompressionMethod;
use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader}; use crate::read::{find_content, Config, ZipArchive, ZipFile, ZipFileReader};
use crate::result::{ZipError, ZipResult}; use crate::result::{ZipError, ZipResult};
use crate::spec::{self, Block}; use crate::spec::{self, Block};
#[cfg(feature = "aes-crypto")] #[cfg(feature = "aes-crypto")]
@ -538,10 +538,19 @@ impl ZipWriterStats {
impl<A: Read + Write + Seek> ZipWriter<A> { impl<A: Read + Write + Seek> ZipWriter<A> {
/// Initializes the archive from an existing ZIP archive, making it ready for append. /// Initializes the archive from an existing ZIP archive, making it ready for append.
pub fn new_append(mut readwriter: A) -> ZipResult<ZipWriter<A>> { ///
/// This uses a default configuration to initially read the archive.
pub fn new_append(readwriter: A) -> ZipResult<ZipWriter<A>> {
Self::new_append_with_config(Default::default(), readwriter)
}
/// Initializes the archive from an existing ZIP archive, making it ready for append.
///
/// This uses the given read configuration to initially read the archive.
pub fn new_append_with_config(config: Config, mut readwriter: A) -> ZipResult<ZipWriter<A>> {
let (footer, cde_start_pos) = let (footer, cde_start_pos) =
spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?; spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?;
let metadata = ZipArchive::get_metadata(&mut readwriter, &footer, cde_start_pos)?; let metadata = ZipArchive::get_metadata(config, &mut readwriter, &footer, cde_start_pos)?;
Ok(ZipWriter { Ok(ZipWriter {
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)), inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),