Merge pull request #109 from afranchuk/configure-archive-offset
feat: Allow the archive offset behavior of the reader to be configured.
This commit is contained in:
commit
6539545524
3 changed files with 120 additions and 20 deletions
103
src/read.rs
103
src/read.rs
|
@ -40,6 +40,10 @@ use bzip2::read::BzDecoder;
|
||||||
#[cfg(feature = "zstd")]
|
#[cfg(feature = "zstd")]
|
||||||
use zstd::stream::read::Decoder as ZstdDecoder;
|
use zstd::stream::read::Decoder as ZstdDecoder;
|
||||||
|
|
||||||
|
mod config;
|
||||||
|
|
||||||
|
pub use config::*;
|
||||||
|
|
||||||
/// Provides high level API for reading from a stream.
|
/// Provides high level API for reading from a stream.
|
||||||
pub(crate) mod stream;
|
pub(crate) mod stream;
|
||||||
|
|
||||||
|
@ -56,6 +60,9 @@ pub(crate) mod zip_archive {
|
||||||
pub(crate) files: super::IndexMap<Box<str>, super::ZipFileData>,
|
pub(crate) files: super::IndexMap<Box<str>, super::ZipFileData>,
|
||||||
pub(super) offset: u64,
|
pub(super) offset: u64,
|
||||||
pub(super) dir_start: u64,
|
pub(super) dir_start: u64,
|
||||||
|
// This isn't yet used anywhere, but it is here for use cases in the future.
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub(super) config: super::Config,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// ZIP archive reader
|
/// ZIP archive reader
|
||||||
|
@ -382,12 +389,15 @@ impl<R> ZipArchive<R> {
|
||||||
) -> ZipResult<Self> {
|
) -> ZipResult<Self> {
|
||||||
let initial_offset = match files.first() {
|
let initial_offset = match files.first() {
|
||||||
Some((_, file)) => file.header_start,
|
Some((_, file)) => file.header_start,
|
||||||
None => 0,
|
None => central_start,
|
||||||
};
|
};
|
||||||
let shared = Arc::new(zip_archive::Shared {
|
let shared = Arc::new(zip_archive::Shared {
|
||||||
files,
|
files,
|
||||||
offset: initial_offset,
|
offset: initial_offset,
|
||||||
dir_start: central_start,
|
dir_start: central_start,
|
||||||
|
config: Config {
|
||||||
|
archive_offset: ArchiveOffset::Known(initial_offset),
|
||||||
|
},
|
||||||
});
|
});
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
reader,
|
reader,
|
||||||
|
@ -473,19 +483,44 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_directory_info_zip32(
|
fn get_directory_info_zip32(
|
||||||
|
config: &Config,
|
||||||
|
reader: &mut R,
|
||||||
footer: &spec::Zip32CentralDirectoryEnd,
|
footer: &spec::Zip32CentralDirectoryEnd,
|
||||||
cde_start_pos: u64,
|
cde_start_pos: u64,
|
||||||
) -> ZipResult<CentralDirectoryInfo> {
|
) -> ZipResult<CentralDirectoryInfo> {
|
||||||
// Some zip files have data prepended to them, resulting in the
|
let archive_offset = match config.archive_offset {
|
||||||
// offsets all being too small. Get the amount of error by comparing
|
ArchiveOffset::Known(n) => n,
|
||||||
// the actual file position we found the CDE at with the offset
|
ArchiveOffset::FromCentralDirectory | ArchiveOffset::Detect => {
|
||||||
// recorded in the CDE.
|
// Some zip files have data prepended to them, resulting in the
|
||||||
let archive_offset = cde_start_pos
|
// offsets all being too small. Get the amount of error by comparing
|
||||||
.checked_sub(footer.central_directory_size as u64)
|
// the actual file position we found the CDE at with the offset
|
||||||
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
|
// recorded in the CDE.
|
||||||
.ok_or(ZipError::InvalidArchive(
|
let mut offset = cde_start_pos
|
||||||
"Invalid central directory size or offset",
|
.checked_sub(footer.central_directory_size as u64)
|
||||||
))?;
|
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
|
||||||
|
.ok_or(ZipError::InvalidArchive(
|
||||||
|
"Invalid central directory size or offset",
|
||||||
|
))?;
|
||||||
|
|
||||||
|
if config.archive_offset == ArchiveOffset::Detect {
|
||||||
|
// Check whether the archive offset makes sense by peeking at the directory start. If it
|
||||||
|
// doesn't, fall back to using no archive offset. This supports zips with the central
|
||||||
|
// directory entries somewhere other than directly preceding the end of central directory.
|
||||||
|
reader.seek(io::SeekFrom::Start(
|
||||||
|
offset + footer.central_directory_offset as u64,
|
||||||
|
))?;
|
||||||
|
let mut buf = [0; 4];
|
||||||
|
reader.read_exact(&mut buf)?;
|
||||||
|
if spec::Magic::from_le_bytes(buf)
|
||||||
|
!= spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE
|
||||||
|
{
|
||||||
|
offset = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
offset
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let directory_start = footer.central_directory_offset as u64 + archive_offset;
|
let directory_start = footer.central_directory_offset as u64 + archive_offset;
|
||||||
let number_of_files = footer.number_of_files_on_this_disk as usize;
|
let number_of_files = footer.number_of_files_on_this_disk as usize;
|
||||||
|
@ -512,6 +547,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_directory_info_zip64(
|
fn get_directory_info_zip64(
|
||||||
|
config: &Config,
|
||||||
reader: &mut R,
|
reader: &mut R,
|
||||||
footer: &spec::Zip32CentralDirectoryEnd,
|
footer: &spec::Zip32CentralDirectoryEnd,
|
||||||
cde_start_pos: u64,
|
cde_start_pos: u64,
|
||||||
|
@ -549,6 +585,27 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?;
|
let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?;
|
||||||
let results: Vec<ZipResult<CentralDirectoryInfo>> =
|
let results: Vec<ZipResult<CentralDirectoryInfo>> =
|
||||||
search_results.into_iter().map(|(footer64, archive_offset)| {
|
search_results.into_iter().map(|(footer64, archive_offset)| {
|
||||||
|
let archive_offset = match config.archive_offset {
|
||||||
|
ArchiveOffset::Known(n) => n,
|
||||||
|
ArchiveOffset::FromCentralDirectory => archive_offset,
|
||||||
|
ArchiveOffset::Detect => {
|
||||||
|
archive_offset.checked_add(footer64.central_directory_offset)
|
||||||
|
.and_then(|start| {
|
||||||
|
// Check whether the archive offset makes sense by peeking at the directory start.
|
||||||
|
//
|
||||||
|
// If any errors occur or no header signature is found, fall back to no offset to see if that works.
|
||||||
|
reader.seek(io::SeekFrom::Start(start)).ok()?;
|
||||||
|
let mut buf = [0; 4];
|
||||||
|
reader.read_exact(&mut buf).ok()?;
|
||||||
|
if spec::Magic::from_le_bytes(buf) != spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(archive_offset)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap_or(0)
|
||||||
|
}
|
||||||
|
};
|
||||||
let directory_start = footer64
|
let directory_start = footer64
|
||||||
.central_directory_offset
|
.central_directory_offset
|
||||||
.checked_add(archive_offset)
|
.checked_add(archive_offset)
|
||||||
|
@ -584,14 +641,15 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
/// Get the directory start offset and number of files. This is done in a
|
/// Get the directory start offset and number of files. This is done in a
|
||||||
/// separate function to ease the control flow design.
|
/// separate function to ease the control flow design.
|
||||||
pub(crate) fn get_metadata(
|
pub(crate) fn get_metadata(
|
||||||
|
config: Config,
|
||||||
reader: &mut R,
|
reader: &mut R,
|
||||||
footer: &spec::Zip32CentralDirectoryEnd,
|
footer: &spec::Zip32CentralDirectoryEnd,
|
||||||
cde_start_pos: u64,
|
cde_start_pos: u64,
|
||||||
) -> ZipResult<Shared> {
|
) -> ZipResult<Shared> {
|
||||||
// Check if file has a zip64 footer
|
// Check if file has a zip64 footer
|
||||||
let mut results = Self::get_directory_info_zip64(reader, footer, cde_start_pos)
|
let mut results = Self::get_directory_info_zip64(&config, reader, footer, cde_start_pos)
|
||||||
.unwrap_or_else(|e| vec![Err(e)]);
|
.unwrap_or_else(|e| vec![Err(e)]);
|
||||||
let zip32_result = Self::get_directory_info_zip32(footer, cde_start_pos);
|
let zip32_result = Self::get_directory_info_zip32(&config, reader, footer, cde_start_pos);
|
||||||
let mut invalid_errors = Vec::new();
|
let mut invalid_errors = Vec::new();
|
||||||
let mut unsupported_errors = Vec::new();
|
let mut unsupported_errors = Vec::new();
|
||||||
let mut ok_results = Vec::new();
|
let mut ok_results = Vec::new();
|
||||||
|
@ -652,6 +710,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
files,
|
files,
|
||||||
offset: dir_info.archive_offset,
|
offset: dir_info.archive_offset,
|
||||||
dir_start: dir_info.directory_start,
|
dir_start: dir_info.directory_start,
|
||||||
|
config,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
@ -712,18 +771,28 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read a ZIP archive, collecting the files it contains
|
/// Read a ZIP archive, collecting the files it contains.
|
||||||
///
|
///
|
||||||
/// This uses the central directory record of the ZIP file, and ignores local file headers
|
/// This uses the central directory record of the ZIP file, and ignores local file headers.
|
||||||
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
|
///
|
||||||
|
/// A default [`Config`] is used.
|
||||||
|
pub fn new(reader: R) -> ZipResult<ZipArchive<R>> {
|
||||||
|
Self::with_config(Default::default(), reader)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read a ZIP archive providing a read configuration, collecting the files it contains.
|
||||||
|
///
|
||||||
|
/// This uses the central directory record of the ZIP file, and ignores local file headers.
|
||||||
|
pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
|
||||||
let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?;
|
let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?;
|
||||||
let shared = Self::get_metadata(&mut reader, &footer, cde_start_pos)?;
|
let shared = Self::get_metadata(config, &mut reader, &footer, cde_start_pos)?;
|
||||||
Ok(ZipArchive {
|
Ok(ZipArchive {
|
||||||
reader,
|
reader,
|
||||||
shared: shared.into(),
|
shared: shared.into(),
|
||||||
comment: footer.zip_file_comment.into(),
|
comment: footer.zip_file_comment.into(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract a Zip archive into a directory, overwriting files if they
|
/// Extract a Zip archive into a directory, overwriting files if they
|
||||||
/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
|
/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
|
||||||
///
|
///
|
||||||
|
|
22
src/read/config.rs
Normal file
22
src/read/config.rs
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
/// Configuration for reading ZIP archives.
|
||||||
|
#[repr(transparent)]
|
||||||
|
#[derive(Debug, Default, Clone, Copy)]
|
||||||
|
pub struct Config {
|
||||||
|
/// An offset into the reader to use to find the start of the archive.
|
||||||
|
pub archive_offset: ArchiveOffset,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The offset of the start of the archive from the beginning of the reader.
|
||||||
|
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub enum ArchiveOffset {
|
||||||
|
/// Try to detect the archive offset automatically.
|
||||||
|
///
|
||||||
|
/// This will look at the central directory specified by `FromCentralDirectory` for a header.
|
||||||
|
/// If missing, this will behave as if `None` were specified.
|
||||||
|
#[default]
|
||||||
|
Detect,
|
||||||
|
/// Use the central directory length and offset to determine the start of the archive.
|
||||||
|
FromCentralDirectory,
|
||||||
|
/// Specify a fixed archive offset.
|
||||||
|
Known(u64),
|
||||||
|
}
|
15
src/write.rs
15
src/write.rs
|
@ -3,7 +3,7 @@
|
||||||
#[cfg(feature = "aes-crypto")]
|
#[cfg(feature = "aes-crypto")]
|
||||||
use crate::aes::AesWriter;
|
use crate::aes::AesWriter;
|
||||||
use crate::compression::CompressionMethod;
|
use crate::compression::CompressionMethod;
|
||||||
use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader};
|
use crate::read::{find_content, Config, ZipArchive, ZipFile, ZipFileReader};
|
||||||
use crate::result::{ZipError, ZipResult};
|
use crate::result::{ZipError, ZipResult};
|
||||||
use crate::spec::{self, Block};
|
use crate::spec::{self, Block};
|
||||||
#[cfg(feature = "aes-crypto")]
|
#[cfg(feature = "aes-crypto")]
|
||||||
|
@ -538,10 +538,19 @@ impl ZipWriterStats {
|
||||||
|
|
||||||
impl<A: Read + Write + Seek> ZipWriter<A> {
|
impl<A: Read + Write + Seek> ZipWriter<A> {
|
||||||
/// Initializes the archive from an existing ZIP archive, making it ready for append.
|
/// Initializes the archive from an existing ZIP archive, making it ready for append.
|
||||||
pub fn new_append(mut readwriter: A) -> ZipResult<ZipWriter<A>> {
|
///
|
||||||
|
/// This uses a default configuration to initially read the archive.
|
||||||
|
pub fn new_append(readwriter: A) -> ZipResult<ZipWriter<A>> {
|
||||||
|
Self::new_append_with_config(Default::default(), readwriter)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Initializes the archive from an existing ZIP archive, making it ready for append.
|
||||||
|
///
|
||||||
|
/// This uses the given read configuration to initially read the archive.
|
||||||
|
pub fn new_append_with_config(config: Config, mut readwriter: A) -> ZipResult<ZipWriter<A>> {
|
||||||
let (footer, cde_start_pos) =
|
let (footer, cde_start_pos) =
|
||||||
spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?;
|
spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?;
|
||||||
let metadata = ZipArchive::get_metadata(&mut readwriter, &footer, cde_start_pos)?;
|
let metadata = ZipArchive::get_metadata(config, &mut readwriter, &footer, cde_start_pos)?;
|
||||||
|
|
||||||
Ok(ZipWriter {
|
Ok(ZipWriter {
|
||||||
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),
|
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),
|
||||||
|
|
Loading…
Add table
Reference in a new issue