fix: rewrite the EOCD/EOCD64 detection to fix extreme performance regression (#247)

* fix: resolve clippy warning in nightly

* wip: major rework of cde location

* wip: rework CDE lookup

* refactor: magic finder, eocd lookup retry

* wip: handle empty zips

* fix: satisfy tests, add documentation

* chore: remove unused dependencies

* feat: support both zip32 and zip64 comments

* feat: add zip64 comment functions to ZipWriter

* fix: first pass on maintainer comments

* fix: continue searching for EOCD when the central directory is invalid

* chore: satisfy clippy lints

* chore: satisfy style_and_docs

* feat: support both directions in MagicFinder, correctly find first CDFH

* fix: more checks to EOCD parsing, move comment size error from parse to write

* fix: use saturating add when checking eocd64 record_size upper bound

* fix: correctly handle mid window offsets in forward mode

* fix: compare maximum possible comment length against file size, not search region end

* feat: handle zip64 detection as a hint

* fix: detect oversized central directories when locating EOCD64

* fix: oopsie

---------

Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com>
Co-authored-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com>
This commit is contained in:
Richard Ivánek 2024-12-16 04:32:55 +01:00 committed by GitHub
parent 810d18a9a1
commit 33c71ccc80
Signed by: DevComp
GPG key ID: B5690EEEBB952194
6 changed files with 800 additions and 517 deletions

View file

@ -8,10 +8,7 @@ use crate::crc32::Crc32Reader;
use crate::extra_fields::{ExtendedTimestamp, ExtraField};
use crate::read::zip_archive::{Shared, SharedBuilder};
use crate::result::{ZipError, ZipResult};
use crate::spec::{
self, FixedSizeBlock, Pod, Zip32CentralDirectoryEnd, Zip64CDELocatorBlock,
Zip64CentralDirectoryEnd, ZIP64_ENTRY_THR,
};
use crate::spec::{self, CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, Pod};
use crate::types::{
AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData,
ZipLocalEntryBlock,
@ -26,7 +23,6 @@ use std::mem;
use std::mem::size_of;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::rc::Rc;
use std::sync::{Arc, OnceLock};
mod config;
@ -42,6 +38,8 @@ pub(crate) mod lzma;
#[cfg(feature = "xz")]
pub(crate) mod xz;
pub(crate) mod magic_finder;
// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
pub(crate) mod zip_archive {
use indexmap::IndexMap;
@ -56,6 +54,8 @@ pub(crate) mod zip_archive {
// This isn't yet used anywhere, but it is here for use cases in the future.
#[allow(dead_code)]
pub(super) config: super::Config,
pub(crate) comment: Box<[u8]>,
pub(crate) zip64_comment: Option<Box<[u8]>>,
}
#[derive(Debug)]
@ -69,7 +69,7 @@ pub(crate) mod zip_archive {
}
impl SharedBuilder {
pub fn build(self) -> Shared {
pub fn build(self, comment: Box<[u8]>, zip64_comment: Option<Box<[u8]>>) -> Shared {
let mut index_map = IndexMap::with_capacity(self.files.len());
self.files.into_iter().for_each(|file| {
index_map.insert(file.file_name.clone(), file);
@ -79,6 +79,8 @@ pub(crate) mod zip_archive {
offset: self.offset,
dir_start: self.dir_start,
config: self.config,
comment,
zip64_comment,
}
}
}
@ -108,7 +110,6 @@ pub(crate) mod zip_archive {
pub struct ZipArchive<R> {
pub(super) reader: R,
pub(super) shared: Arc<Shared>,
pub(super) comment: Arc<[u8]>,
}
}
@ -360,6 +361,7 @@ fn find_data_start(
block.file_name_length as u64 + block.extra_field_length as u64;
let data_start =
data.header_start + size_of::<ZipLocalEntryBlock>() as u64 + variable_fields_len;
// Set the value so we don't have to read it again.
match data.data_start.set(data_start) {
Ok(()) => (),
@ -369,6 +371,7 @@ fn find_data_start(
debug_assert_eq!(*data.data_start.get().unwrap(), data_start);
}
}
Ok(data_start)
}
@ -434,17 +437,62 @@ pub(crate) fn make_reader(
pub(crate) struct CentralDirectoryInfo {
pub(crate) archive_offset: u64,
pub(crate) directory_start: u64,
pub(crate) cde_position: u64,
pub(crate) number_of_files: usize,
pub(crate) disk_number: u32,
pub(crate) disk_with_central_directory: u32,
pub(crate) is_zip64: bool,
}
impl<'a> TryFrom<&'a CentralDirectoryEndInfo> for CentralDirectoryInfo {
type Error = ZipError;
fn try_from(value: &'a CentralDirectoryEndInfo) -> Result<Self, Self::Error> {
let (relative_cd_offset, number_of_files, disk_number, disk_with_central_directory) =
match &value.eocd64 {
Some(DataAndPosition { data: eocd64, .. }) => {
if eocd64.number_of_files_on_this_disk > eocd64.number_of_files {
return Err(InvalidArchive(
"ZIP64 footer indicates more files on this disk than in the whole archive",
));
} else if eocd64.version_needed_to_extract > eocd64.version_made_by {
return Err(InvalidArchive(
"ZIP64 footer indicates a new version is needed to extract this archive than the \
version that wrote it",
));
}
(
eocd64.central_directory_offset,
eocd64.number_of_files as usize,
eocd64.disk_number,
eocd64.disk_with_central_directory,
)
}
_ => (
value.eocd.data.central_directory_offset as u64,
value.eocd.data.number_of_files_on_this_disk as usize,
value.eocd.data.disk_number as u32,
value.eocd.data.disk_with_central_directory as u32,
),
};
let directory_start = relative_cd_offset
.checked_add(value.archive_offset)
.ok_or(InvalidArchive("Invalid central directory size or offset"))?;
Ok(Self {
archive_offset: value.archive_offset,
directory_start,
number_of_files,
disk_number,
disk_with_central_directory,
})
}
}
impl<R> ZipArchive<R> {
pub(crate) fn from_finalized_writer(
files: IndexMap<Box<str>, ZipFileData>,
comment: Box<[u8]>,
zip64_comment: Option<Box<[u8]>>,
reader: R,
central_start: u64,
) -> ZipResult<Self> {
@ -459,12 +507,10 @@ impl<R> ZipArchive<R> {
config: Config {
archive_offset: ArchiveOffset::Known(initial_offset),
},
comment,
zip64_comment,
});
Ok(Self {
reader,
shared,
comment: comment.into(),
})
Ok(Self { reader, shared })
}
/// Total size of the files in the archive, if it can be known. Doesn't include directories or
@ -549,264 +595,36 @@ impl<R: Read + Seek> ZipArchive<R> {
Ok(new_files)
}
fn get_directory_info_zip32(
config: &Config,
reader: &mut R,
footer: &Zip32CentralDirectoryEnd,
cde_start_pos: u64,
) -> ZipResult<CentralDirectoryInfo> {
let archive_offset = match config.archive_offset {
ArchiveOffset::Known(n) => n,
ArchiveOffset::FromCentralDirectory | ArchiveOffset::Detect => {
// Some zip files have data prepended to them, resulting in the
// offsets all being too small. Get the amount of error by comparing
// the actual file position we found the CDE at with the offset
// recorded in the CDE.
let mut offset = cde_start_pos
.checked_sub(footer.central_directory_size as u64)
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
.ok_or(InvalidArchive("Invalid central directory size or offset"))?;
if config.archive_offset == ArchiveOffset::Detect {
// Check whether the archive offset makes sense by peeking at the directory start. If it
// doesn't, fall back to using no archive offset. This supports zips with the central
// directory entries somewhere other than directly preceding the end of central directory.
reader.seek(SeekFrom::Start(
offset + footer.central_directory_offset as u64,
))?;
let mut buf = [0; 4];
reader.read_exact(&mut buf)?;
if spec::Magic::from_le_bytes(buf)
!= spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE
{
offset = 0;
}
}
offset
}
};
let directory_start = footer.central_directory_offset as u64 + archive_offset;
let number_of_files = footer.number_of_files_on_this_disk as usize;
Ok(CentralDirectoryInfo {
archive_offset,
directory_start,
number_of_files,
disk_number: footer.disk_number as u32,
disk_with_central_directory: footer.disk_with_central_directory as u32,
cde_position: cde_start_pos,
is_zip64: false,
})
}
const fn order_lower_upper_bounds(a: u64, b: u64) -> (u64, u64) {
if a > b {
(b, a)
} else {
(a, b)
}
}
fn get_directory_info_zip64(
config: &Config,
reader: &mut R,
cde_start_pos: u64,
) -> ZipResult<Vec<ZipResult<CentralDirectoryInfo>>> {
// See if there's a ZIP64 footer. The ZIP64 locator if present will
// have its signature 20 bytes in front of the standard footer. The
// standard footer, in turn, is 22+N bytes large, where N is the
// comment length. Therefore:
reader.seek(SeekFrom::Start(
cde_start_pos
.checked_sub(size_of::<Zip64CDELocatorBlock>() as u64)
.ok_or(InvalidArchive(
"No room for ZIP64 locator before central directory end",
))?,
))?;
let locator64 = spec::Zip64CentralDirectoryEndLocator::parse(reader)?;
// We need to reassess `archive_offset`. We know where the ZIP64
// central-directory-end structure *should* be, but unfortunately we
// don't know how to precisely relate that location to our current
// actual offset in the file, since there may be junk at its
// beginning. Therefore we need to perform another search, as in
// read::Zip32CentralDirectoryEnd::find_and_parse, except now we search
// forward. There may be multiple results because of Zip64 central-directory signatures in
// ZIP comment data.
let search_upper_bound = cde_start_pos
.checked_sub(
(size_of::<Zip64CentralDirectoryEnd>()
+ size_of::<spec::Zip64CentralDirectoryEndLocator>()) as u64,
)
.ok_or(InvalidArchive(
"File cannot contain ZIP64 central directory end",
))?;
let (lower, upper) = Self::order_lower_upper_bounds(
locator64.end_of_central_directory_offset,
search_upper_bound,
);
let search_results = Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?;
let results: Vec<ZipResult<CentralDirectoryInfo>> =
search_results.into_iter().map(|(footer64, archive_offset)| {
let archive_offset = match config.archive_offset {
ArchiveOffset::Known(n) => n,
ArchiveOffset::FromCentralDirectory => archive_offset,
ArchiveOffset::Detect => {
archive_offset.checked_add(footer64.central_directory_offset)
.and_then(|start| {
// Check whether the archive offset makes sense by peeking at the directory start.
//
// If any errors occur or no header signature is found, fall back to no offset to see if that works.
reader.seek(SeekFrom::Start(start)).ok()?;
let mut buf = [0; 4];
reader.read_exact(&mut buf).ok()?;
if spec::Magic::from_le_bytes(buf) != spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
None
} else {
Some(archive_offset)
}
})
.unwrap_or(0)
}
};
let directory_start = footer64
.central_directory_offset
.checked_add(archive_offset)
.ok_or(InvalidArchive(
"Invalid central directory size or offset",
))?;
if directory_start > search_upper_bound {
Err(InvalidArchive(
"Invalid central directory size or offset",
))
} else if footer64.number_of_files_on_this_disk > footer64.number_of_files {
Err(InvalidArchive(
"ZIP64 footer indicates more files on this disk than in the whole archive",
))
} else if footer64.version_needed_to_extract > footer64.version_made_by {
Err(InvalidArchive(
"ZIP64 footer indicates a new version is needed to extract this archive than the \
version that wrote it",
))
} else {
Ok(CentralDirectoryInfo {
archive_offset,
directory_start,
number_of_files: footer64.number_of_files as usize,
disk_number: footer64.disk_number,
disk_with_central_directory: footer64.disk_with_central_directory,
cde_position: cde_start_pos,
is_zip64: true,
})
}
}).collect();
Ok(results)
}
/// Get the directory start offset and number of files. This is done in a
/// separate function to ease the control flow design.
pub(crate) fn get_metadata(
config: Config,
reader: &mut R,
) -> ZipResult<(Zip32CentralDirectoryEnd, Shared)> {
let mut invalid_errors_32 = Vec::new();
let mut unsupported_errors_32 = Vec::new();
let mut invalid_errors_64 = Vec::new();
let mut unsupported_errors_64 = Vec::new();
let mut ok_results = Vec::new();
let cde_locations = Zip32CentralDirectoryEnd::find_and_parse(reader)?;
cde_locations
.into_vec()
.into_iter()
.for_each(|(footer, cde_start_pos)| {
let zip32_result =
Self::get_directory_info_zip32(&config, reader, &footer, cde_start_pos);
Self::sort_result(
zip32_result,
&mut invalid_errors_32,
&mut unsupported_errors_32,
&mut ok_results,
&footer,
);
let mut inner_results = Vec::with_capacity(1);
// Check if file has a zip64 footer
let zip64_vec_result =
Self::get_directory_info_zip64(&config, reader, cde_start_pos);
Self::sort_result(
zip64_vec_result,
&mut invalid_errors_64,
&mut unsupported_errors_64,
&mut inner_results,
&(),
);
inner_results.into_iter().for_each(|(_, results)| {
results.into_iter().for_each(|result| {
Self::sort_result(
result,
&mut invalid_errors_64,
&mut unsupported_errors_64,
&mut ok_results,
&footer,
);
});
});
});
ok_results.sort_by_key(|(_, result)| {
(
u64::MAX - result.cde_position, // try the last one first
!result.is_zip64, // try ZIP64 first
)
});
let mut best_result = None;
for (footer, result) in ok_results {
let mut inner_result = Vec::with_capacity(1);
let is_zip64 = result.is_zip64;
Self::sort_result(
Self::read_central_header(result, config, reader),
if is_zip64 {
&mut invalid_errors_64
} else {
&mut invalid_errors_32
},
if is_zip64 {
&mut unsupported_errors_64
} else {
&mut unsupported_errors_32
},
&mut inner_result,
&(),
);
if let Some((_, shared)) = inner_result.into_iter().next() {
if shared.files.len() == footer.number_of_files as usize
|| (is_zip64 && footer.number_of_files == ZIP64_ENTRY_THR as u16)
{
best_result = Some((footer, shared));
break;
} else {
if is_zip64 {
&mut invalid_errors_64
} else {
&mut invalid_errors_32
}
.push(InvalidArchive("wrong number of files"))
}
}
pub(crate) fn get_metadata(config: Config, reader: &mut R) -> ZipResult<Shared> {
// End of the probed region, initially set to the end of the file
let file_len = reader.seek(io::SeekFrom::End(0))?;
let mut end_exclusive = file_len;
loop {
// Find the EOCD and possibly EOCD64 entries and determine the archive offset.
let cde = spec::find_central_directory(
reader,
config.archive_offset,
end_exclusive,
file_len,
)?;
// Turn EOCD into internal representation.
let Ok(shared) = CentralDirectoryInfo::try_from(&cde)
.and_then(|info| Self::read_central_header(info, config, reader))
else {
// The next EOCD candidate should start before the current one.
end_exclusive = cde.eocd.position;
continue;
};
return Ok(shared.build(
cde.eocd.data.zip_file_comment,
cde.eocd64.map(|v| v.data.extensible_data_sector),
));
}
let Some((footer, shared)) = best_result else {
return Err(unsupported_errors_32
.into_iter()
.chain(unsupported_errors_64)
.chain(invalid_errors_32)
.chain(invalid_errors_64)
.next()
.unwrap());
};
reader.seek(SeekFrom::Start(shared.dir_start))?;
Ok((Rc::try_unwrap(footer).unwrap(), shared.build()))
}
fn read_central_header(
@ -821,15 +639,22 @@ impl<R: Read + Seek> ZipArchive<R> {
} else {
dir_info.number_of_files
};
if dir_info.disk_number != dir_info.disk_with_central_directory {
return unsupported_zip_error("Support for multi-disk files is not implemented");
}
if file_capacity.saturating_mul(size_of::<ZipFileData>()) > isize::MAX as usize {
return unsupported_zip_error("Oversized central directory");
}
let mut files = Vec::with_capacity(file_capacity);
reader.seek(SeekFrom::Start(dir_info.directory_start))?;
for _ in 0..dir_info.number_of_files {
let file = central_header_to_zip_file(reader, dir_info.archive_offset)?;
let file = central_header_to_zip_file(reader, &dir_info)?;
files.push(file);
}
Ok(SharedBuilder {
files,
offset: dir_info.archive_offset,
@ -838,22 +663,6 @@ impl<R: Read + Seek> ZipArchive<R> {
})
}
fn sort_result<T, U: Clone>(
result: ZipResult<T>,
invalid_errors: &mut Vec<ZipError>,
unsupported_errors: &mut Vec<ZipError>,
ok_results: &mut Vec<(U, T)>,
footer: &U,
) {
match result {
Err(ZipError::UnsupportedArchive(e)) => {
unsupported_errors.push(ZipError::UnsupportedArchive(e))
}
Err(e) => invalid_errors.push(e),
Ok(o) => ok_results.push((footer.clone(), o)),
}
}
/// Returns the verification value and salt for the AES encryption of the file
///
/// It fails if the file number is invalid.
@ -902,15 +711,12 @@ impl<R: Read + Seek> ZipArchive<R> {
///
/// This uses the central directory record of the ZIP file, and ignores local file headers.
pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
reader.seek(SeekFrom::Start(0))?;
if let Ok((footer, shared)) = Self::get_metadata(config, &mut reader) {
return Ok(ZipArchive {
reader,
shared: shared.into(),
comment: footer.zip_file_comment.into(),
});
}
Err(InvalidArchive("No valid central directory found"))
let shared = Self::get_metadata(config, &mut reader)?;
Ok(ZipArchive {
reader,
shared: shared.into(),
})
}
/// Extract a Zip archive into a directory, overwriting files if they
@ -1050,7 +856,12 @@ impl<R: Read + Seek> ZipArchive<R> {
/// Get the comment of the zip archive.
pub fn comment(&self) -> &[u8] {
&self.comment
&self.shared.comment
}
/// Get the ZIP64 comment of the zip archive, if it is ZIP64.
pub fn zip64_comment(&self) -> Option<&[u8]> {
self.shared.zip64_comment.as_deref()
}
/// Returns an iterator over all the file and directory names in this archive.
@ -1235,21 +1046,36 @@ const fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
/// Parse a central directory entry to collect the information for the file.
pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
reader: &mut R,
archive_offset: u64,
central_directory: &CentralDirectoryInfo,
) -> ZipResult<ZipFileData> {
let central_header_start = reader.stream_position()?;
// Parse central header
let block = ZipCentralEntryBlock::parse(reader)?;
let file =
central_header_to_zip_file_inner(reader, archive_offset, central_header_start, block)?;
let file = central_header_to_zip_file_inner(
reader,
central_directory.archive_offset,
central_header_start,
block,
)?;
let central_header_end = reader.stream_position()?;
let data_start = find_data_start(&file, reader)?;
if data_start > central_header_start {
if file.header_start >= central_directory.directory_start {
return Err(InvalidArchive(
"A file can't start after its central-directory header",
"A local file entry can't start after the central directory",
));
}
let data_start = find_data_start(&file, reader)?;
if data_start > central_directory.directory_start {
return Err(InvalidArchive(
"File data can't start after the central directory",
));
}
reader.seek(SeekFrom::Start(central_header_end))?;
Ok(file)
}

View file

@ -16,6 +16,7 @@ pub enum ArchiveOffset {
#[default]
Detect,
/// Use the central directory length and offset to determine the start of the archive.
#[deprecated(since = "2.3.0", note = "use `Detect` instead")]
FromCentralDirectory,
/// Specify a fixed archive offset.
Known(u64),

279
src/read/magic_finder.rs Normal file
View file

@ -0,0 +1,279 @@
use std::io::{Read, Seek, SeekFrom};
use memchr::memmem::{Finder, FinderRev};
use crate::result::ZipResult;
pub trait FinderDirection<'a> {
fn new(needle: &'a [u8]) -> Self;
fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64;
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize);
fn needle(&self) -> &[u8];
fn find(&self, haystack: &[u8]) -> Option<usize>;
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64>;
fn move_scope(&self, offset: usize) -> usize;
}
pub struct Forward<'a>(Finder<'a>);
impl<'a> FinderDirection<'a> for Forward<'a> {
fn new(needle: &'a [u8]) -> Self {
Self(Finder::new(needle))
}
fn reset_cursor((start_inclusive, _): (u64, u64), _: usize) -> u64 {
start_inclusive
}
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
(&window[mid_window_offset..], mid_window_offset)
}
fn find(&self, haystack: &[u8]) -> Option<usize> {
self.0.find(haystack)
}
fn needle(&self) -> &[u8] {
self.0.needle()
}
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
let magic_overlap = self.needle().len().saturating_sub(1) as u64;
let next = cursor.saturating_add(window_size as u64 - magic_overlap);
if next >= bounds.1 {
None
} else {
Some(next)
}
}
fn move_scope(&self, offset: usize) -> usize {
offset + self.needle().len()
}
}
pub struct Backwards<'a>(FinderRev<'a>);
impl<'a> FinderDirection<'a> for Backwards<'a> {
fn new(needle: &'a [u8]) -> Self {
Self(FinderRev::new(needle))
}
fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64 {
bounds
.1
.saturating_sub(window_size as u64)
.clamp(bounds.0, bounds.1)
}
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
(&window[..mid_window_offset], 0)
}
fn find(&self, haystack: &[u8]) -> Option<usize> {
self.0.rfind(haystack)
}
fn needle(&self) -> &[u8] {
self.0.needle()
}
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
let magic_overlap = self.needle().len().saturating_sub(1) as u64;
if cursor <= bounds.0 {
None
} else {
Some(
cursor
.saturating_add(magic_overlap)
.saturating_sub(window_size as u64)
.clamp(bounds.0, bounds.1),
)
}
}
fn move_scope(&self, offset: usize) -> usize {
offset
}
}
/// A utility for finding magic symbols from the end of a seekable reader.
///
/// Can be repurposed to recycle the internal buffer.
pub struct MagicFinder<Direction> {
buffer: Box<[u8]>,
pub(self) finder: Direction,
cursor: u64,
mid_buffer_offset: Option<usize>,
bounds: (u64, u64),
}
impl<'a, T: FinderDirection<'a>> MagicFinder<T> {
/// Create a new magic bytes finder to look within specific bounds.
pub fn new(magic_bytes: &'a [u8], start_inclusive: u64, end_exclusive: u64) -> Self {
const BUFFER_SIZE: usize = 2048;
// Smaller buffer size would be unable to locate bytes.
// Equal buffer size would stall (the window could not be moved).
debug_assert!(BUFFER_SIZE >= magic_bytes.len());
Self {
buffer: vec![0; BUFFER_SIZE].into_boxed_slice(),
finder: T::new(magic_bytes),
cursor: T::reset_cursor((start_inclusive, end_exclusive), BUFFER_SIZE),
mid_buffer_offset: None,
bounds: (start_inclusive, end_exclusive),
}
}
/// Repurpose the finder for different bytes or bounds.
pub fn repurpose(&mut self, magic_bytes: &'a [u8], bounds: (u64, u64)) -> &mut Self {
debug_assert!(self.buffer.len() >= magic_bytes.len());
self.finder = T::new(magic_bytes);
self.cursor = T::reset_cursor(bounds, self.buffer.len());
self.bounds = bounds;
// Reset the mid-buffer offset, to invalidate buffer content.
self.mid_buffer_offset = None;
self
}
/// Find the next magic bytes in the direction specified in the type.
pub fn next<R: Read + Seek>(&mut self, reader: &mut R) -> ZipResult<Option<u64>> {
loop {
if self.cursor < self.bounds.0 || self.cursor >= self.bounds.1 {
// The finder is consumed
break;
}
/* Position the window and ensure correct length */
let window_start = self.cursor;
let window_end = self
.cursor
.saturating_add(self.buffer.len() as u64)
.min(self.bounds.1);
if window_end <= window_start {
// Short-circuit on zero-sized windows to prevent loop
break;
}
let window = &mut self.buffer[..(window_end - window_start) as usize];
if self.mid_buffer_offset.is_none() {
reader.seek(SeekFrom::Start(window_start))?;
reader.read_exact(window)?;
}
let (window, window_start_offset) = match self.mid_buffer_offset {
Some(mid_buffer_offset) => T::scope_window(window, mid_buffer_offset),
None => (&*window, 0usize),
};
if let Some(offset) = self.finder.find(window) {
let magic_pos = window_start + window_start_offset as u64 + offset as u64;
reader.seek(SeekFrom::Start(magic_pos))?;
self.mid_buffer_offset = Some(self.finder.move_scope(window_start_offset + offset));
return Ok(Some(magic_pos));
}
self.mid_buffer_offset = None;
match self
.finder
.move_cursor(self.cursor, self.bounds, self.buffer.len())
{
Some(new_cursor) => {
self.cursor = new_cursor;
}
None => {
// Destroy the finder when we've reached the end of the bounds.
self.bounds.0 = self.bounds.1;
break;
}
}
}
Ok(None)
}
}
/// A magic bytes finder with an optimistic guess that is tried before
/// the inner finder begins searching from end. This enables much faster
/// lookup in files without appended junk, because the magic bytes will be
/// found directly.
///
/// The guess can be marked as mandatory to produce an error. This is useful
/// if the ArchiveOffset is known and auto-detection is not desired.
pub struct OptimisticMagicFinder<Direction> {
inner: MagicFinder<Direction>,
initial_guess: Option<(u64, bool)>,
}
/// This is a temporary restriction, to avoid heap allocation in [`Self::next_back`].
///
/// We only use magic bytes of size 4 at the moment.
const STACK_BUFFER_SIZE: usize = 8;
impl<'a, Direction: FinderDirection<'a>> OptimisticMagicFinder<Direction> {
/// Create a new empty optimistic magic bytes finder.
pub fn new_empty() -> Self {
Self {
inner: MagicFinder::new(&[], 0, 0),
initial_guess: None,
}
}
/// Repurpose the finder for different bytes, bounds and initial guesses.
pub fn repurpose(
&mut self,
magic_bytes: &'a [u8],
bounds: (u64, u64),
initial_guess: Option<(u64, bool)>,
) -> &mut Self {
debug_assert!(magic_bytes.len() <= STACK_BUFFER_SIZE);
self.inner.repurpose(magic_bytes, bounds);
self.initial_guess = initial_guess;
self
}
/// Equivalent to `next_back`, with an optional initial guess attempted before
/// proceeding with reading from the back of the reader.
pub fn next<R: Read + Seek>(&mut self, reader: &mut R) -> ZipResult<Option<u64>> {
if let Some((v, mandatory)) = self.initial_guess {
reader.seek(SeekFrom::Start(v))?;
let mut buffer = [0; STACK_BUFFER_SIZE];
let buffer = &mut buffer[..self.inner.finder.needle().len()];
// Attempt to match only if there's enough space for the needle
if v.saturating_add(buffer.len() as u64) <= self.inner.bounds.1 {
reader.read_exact(buffer)?;
// If a match is found, yield it.
if self.inner.finder.needle() == buffer {
self.initial_guess.take();
reader.seek(SeekFrom::Start(v))?;
return Ok(Some(v));
}
}
// If a match is not found, but the initial guess was mandatory, return an error.
if mandatory {
return Ok(None);
}
// If the initial guess was not mandatory, remove it, as it was not found.
self.initial_guess.take();
}
self.inner.next(reader)
}
}

View file

@ -1,11 +1,11 @@
#![macro_use]
use crate::read::magic_finder::{Backwards, Forward, MagicFinder, OptimisticMagicFinder};
use crate::read::ArchiveOffset;
use crate::result::{ZipError, ZipResult};
use core::mem;
use memchr::memmem::FinderRev;
use std::io;
use std::io::prelude::*;
use std::rc::Rc;
use std::slice;
/// "Magic" header values used in the zip spec to locate metadata records.
@ -22,6 +22,7 @@ impl Magic {
}
#[inline(always)]
#[allow(dead_code)]
pub const fn from_le_bytes(bytes: [u8; 4]) -> Self {
Self(u32::from_le_bytes(bytes))
}
@ -289,7 +290,7 @@ pub(crate) struct Zip32CentralDirectoryEnd {
}
impl Zip32CentralDirectoryEnd {
fn block_and_comment(self) -> ZipResult<(Zip32CDEBlock, Box<[u8]>)> {
fn into_block_and_comment(self) -> (Zip32CDEBlock, Box<[u8]>) {
let Self {
disk_number,
disk_with_central_directory,
@ -307,12 +308,10 @@ impl Zip32CentralDirectoryEnd {
number_of_files,
central_directory_size,
central_directory_offset,
zip_file_comment_length: zip_file_comment
.len()
.try_into()
.map_err(|_| ZipError::InvalidArchive("File comment must be less than 64 KiB"))?,
zip_file_comment_length: zip_file_comment.len() as u16,
};
Ok((block, zip_file_comment))
(block, zip_file_comment)
}
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip32CentralDirectoryEnd> {
@ -329,7 +328,15 @@ impl Zip32CentralDirectoryEnd {
} = Zip32CDEBlock::parse(reader)?;
let mut zip_file_comment = vec![0u8; zip_file_comment_length as usize].into_boxed_slice();
reader.read_exact(&mut zip_file_comment)?;
if let Err(e) = reader.read_exact(&mut zip_file_comment) {
if e.kind() == io::ErrorKind::UnexpectedEof {
return Err(ZipError::InvalidArchive(
"EOCD comment exceeds file boundary",
));
}
return Err(e.into());
}
Ok(Zip32CentralDirectoryEnd {
disk_number,
@ -342,99 +349,23 @@ impl Zip32CentralDirectoryEnd {
})
}
#[allow(clippy::type_complexity)]
pub fn find_and_parse<T: Read + Seek>(
reader: &mut T,
) -> ZipResult<Box<[(Rc<Zip32CentralDirectoryEnd>, u64)]>> {
let mut results = vec![];
let file_length = reader.seek(io::SeekFrom::End(0))?;
if file_length < mem::size_of::<Zip32CDEBlock>() as u64 {
return Err(ZipError::InvalidArchive("Invalid zip header"));
}
// The End Of Central Directory Record should be the last thing in
// the file and so searching the last 65557 bytes of the file should
// be enough. However, not all zips are well-formed and other
// programs may consume zips with extra junk at the end without
// error, so we go back 128K to be compatible with them. 128K is
// arbitrary, but it matches what Info-Zip does.
const EOCDR_SEARCH_SIZE: u64 = 128 * 1024;
let search_lower_bound = file_length.saturating_sub(EOCDR_SEARCH_SIZE);
const END_WINDOW_SIZE: usize = 8192;
/* TODO: use static_assertions!() */
debug_assert!(END_WINDOW_SIZE > mem::size_of::<Magic>());
const SIG_BYTES: [u8; mem::size_of::<Magic>()] =
Magic::CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
let finder = FinderRev::new(&SIG_BYTES);
let mut window_start: u64 = file_length.saturating_sub(END_WINDOW_SIZE as u64);
let mut window = [0u8; END_WINDOW_SIZE];
while window_start >= search_lower_bound {
/* Go to the start of the window in the file. */
reader.seek(io::SeekFrom::Start(window_start))?;
/* Identify how many bytes to read (this may be less than the window size for files
* smaller than END_WINDOW_SIZE). */
let end = (window_start + END_WINDOW_SIZE as u64).min(file_length);
let cur_len = (end - window_start) as usize;
debug_assert!(cur_len > 0);
debug_assert!(cur_len <= END_WINDOW_SIZE);
let cur_window: &mut [u8] = &mut window[..cur_len];
/* Read the window into the bytes! */
reader.read_exact(cur_window)?;
/* Find instances of the magic signature. */
for offset in finder.rfind_iter(cur_window) {
let cde_start_pos = window_start + offset as u64;
reader.seek(io::SeekFrom::Start(cde_start_pos))?;
/* Drop any headers that don't parse. */
if let Ok(cde) = Self::parse(reader) {
results.push((Rc::new(cde), cde_start_pos));
}
}
/* We always want to make sure we go allllll the way back to the start of the file if
* we can't find it elsewhere. However, our `while` condition doesn't check that. So we
* avoid infinite looping by checking at the end of the loop. */
if window_start == search_lower_bound {
break;
}
/* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that
* overlap our nice neat window boundaries! */
window_start = (window_start
/* NB: To catch matches across window boundaries, we need to make our blocks overlap
* by the width of the pattern to match. */
+ mem::size_of::<Magic>() as u64)
/* This should never happen, but make sure we don't go past the end of the file. */
.min(file_length);
window_start = window_start
.saturating_sub(
/* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at
* once (unless limited by file_length). */
END_WINDOW_SIZE as u64,
)
/* This will never go below the value of `search_lower_bound`, so we have a special
* `if window_start == search_lower_bound` check above. */
.max(search_lower_bound);
}
if results.is_empty() {
Err(ZipError::InvalidArchive(
"Could not find central directory end",
))
} else {
Ok(results.into_boxed_slice())
}
}
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
let (block, comment) = self.block_and_comment()?;
let (block, comment) = self.into_block_and_comment();
if comment.len() > u16::MAX as usize {
return Err(ZipError::InvalidArchive(
"EOCD comment length exceeds u16::MAX",
));
}
block.write(writer)?;
writer.write_all(&comment)?;
Ok(())
}
pub fn may_be_zip64(&self) -> bool {
self.number_of_files == u16::MAX || self.central_directory_offset == u32::MAX
}
}
#[derive(Copy, Clone)]
@ -551,6 +482,7 @@ impl FixedSizeBlock for Zip64CDEBlock {
}
pub(crate) struct Zip64CentralDirectoryEnd {
pub record_size: u64,
pub version_made_by: u16,
pub version_needed_to_extract: u16,
pub disk_number: u32,
@ -559,13 +491,13 @@ pub(crate) struct Zip64CentralDirectoryEnd {
pub number_of_files: u64,
pub central_directory_size: u64,
pub central_directory_offset: u64,
//pub extensible_data_sector: Vec<u8>, <-- We don't do anything with this at the moment.
pub extensible_data_sector: Box<[u8]>,
}
impl Zip64CentralDirectoryEnd {
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip64CentralDirectoryEnd> {
pub fn parse<T: Read>(reader: &mut T, max_size: u64) -> ZipResult<Zip64CentralDirectoryEnd> {
let Zip64CDEBlock {
// record_size,
record_size,
version_made_by,
version_needed_to_extract,
disk_number,
@ -576,7 +508,20 @@ impl Zip64CentralDirectoryEnd {
central_directory_offset,
..
} = Zip64CDEBlock::parse(reader)?;
if record_size < 44 {
return Err(ZipError::InvalidArchive("Low EOCD64 record size"));
} else if record_size.saturating_add(12) > max_size {
return Err(ZipError::InvalidArchive(
"EOCD64 extends beyond EOCD64 locator",
));
}
let mut zip_file_comment = vec![0u8; record_size as usize - 44].into_boxed_slice();
reader.read_exact(&mut zip_file_comment)?;
Ok(Self {
record_size,
version_made_by,
version_needed_to_extract,
disk_number,
@ -585,94 +530,13 @@ impl Zip64CentralDirectoryEnd {
number_of_files,
central_directory_size,
central_directory_offset,
extensible_data_sector: zip_file_comment,
})
}
pub fn find_and_parse<T: Read + Seek>(
reader: &mut T,
search_lower_bound: u64,
search_upper_bound: u64,
) -> ZipResult<Vec<(Zip64CentralDirectoryEnd, u64)>> {
let mut results = Vec::new();
const END_WINDOW_SIZE: usize = 2048;
/* TODO: use static_assertions!() */
debug_assert!(END_WINDOW_SIZE > mem::size_of::<Magic>());
const SIG_BYTES: [u8; mem::size_of::<Magic>()] =
Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
let finder = FinderRev::new(&SIG_BYTES);
let mut window_start: u64 = search_upper_bound
.saturating_sub(END_WINDOW_SIZE as u64)
.max(search_lower_bound);
let mut window = [0u8; END_WINDOW_SIZE];
while window_start >= search_lower_bound {
reader.seek(io::SeekFrom::Start(window_start))?;
/* Identify how many bytes to read (this may be less than the window size for files
* smaller than END_WINDOW_SIZE). */
let end = (window_start + END_WINDOW_SIZE as u64).min(search_upper_bound);
debug_assert!(end >= window_start);
let cur_len = (end - window_start) as usize;
if cur_len == 0 {
break;
}
debug_assert!(cur_len <= END_WINDOW_SIZE);
let cur_window: &mut [u8] = &mut window[..cur_len];
/* Read the window into the bytes! */
reader.read_exact(cur_window)?;
/* Find instances of the magic signature. */
for offset in finder.rfind_iter(cur_window) {
let cde_start_pos = window_start + offset as u64;
reader.seek(io::SeekFrom::Start(cde_start_pos))?;
debug_assert!(cde_start_pos >= search_lower_bound);
let archive_offset = cde_start_pos - search_lower_bound;
let cde = Self::parse(reader)?;
results.push((cde, archive_offset));
}
/* We always want to make sure we go allllll the way back to the start of the file if
* we can't find it elsewhere. However, our `while` condition doesn't check that. So we
* avoid infinite looping by checking at the end of the loop. */
if window_start == search_lower_bound {
break;
}
/* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that
* overlap our nice neat window boundaries! */
window_start = (window_start
/* NB: To catch matches across window boundaries, we need to make our blocks overlap
* by the width of the pattern to match. */
+ mem::size_of::<Magic>() as u64)
/* This may never happen, but make sure we don't go past the end of the specified
* range. */
.min(search_upper_bound);
window_start = window_start
.saturating_sub(
/* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at
* once (unless limited by search_upper_bound). */
END_WINDOW_SIZE as u64,
)
/* This will never go below the value of `search_lower_bound`, so we have a special
* `if window_start == search_lower_bound` check above. */
.max(search_lower_bound);
}
if results.is_empty() {
Err(ZipError::InvalidArchive(
"Could not find ZIP64 central directory end",
))
} else {
Ok(results)
}
}
pub fn block(self) -> Zip64CDEBlock {
pub fn into_block_and_comment(self) -> (Zip64CDEBlock, Box<[u8]>) {
let Self {
record_size,
version_made_by,
version_needed_to_extract,
disk_number,
@ -681,27 +545,277 @@ impl Zip64CentralDirectoryEnd {
number_of_files,
central_directory_size,
central_directory_offset,
extensible_data_sector,
} = self;
Zip64CDEBlock {
magic: Zip64CDEBlock::MAGIC,
/* currently unused */
record_size: 44,
version_made_by,
version_needed_to_extract,
disk_number,
disk_with_central_directory,
number_of_files_on_this_disk,
number_of_files,
central_directory_size,
central_directory_offset,
}
(
Zip64CDEBlock {
magic: Zip64CDEBlock::MAGIC,
record_size,
version_made_by,
version_needed_to_extract,
disk_number,
disk_with_central_directory,
number_of_files_on_this_disk,
number_of_files,
central_directory_size,
central_directory_offset,
},
extensible_data_sector,
)
}
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
self.block().write(writer)
let (block, comment) = self.into_block_and_comment();
block.write(writer)?;
writer.write_all(&comment)?;
Ok(())
}
}
pub(crate) struct DataAndPosition<T> {
pub data: T,
#[allow(dead_code)]
pub position: u64,
}
impl<T> From<(T, u64)> for DataAndPosition<T> {
fn from(value: (T, u64)) -> Self {
Self {
data: value.0,
position: value.1,
}
}
}
pub(crate) struct CentralDirectoryEndInfo {
pub eocd: DataAndPosition<Zip32CentralDirectoryEnd>,
pub eocd64: Option<DataAndPosition<Zip64CentralDirectoryEnd>>,
pub archive_offset: u64,
}
/// Finds the EOCD and possibly the EOCD64 block and determines the archive offset.
///
/// In the best case scenario (no prepended junk), this function will not backtrack
/// in the reader.
pub(crate) fn find_central_directory<R: Read + Seek>(
reader: &mut R,
archive_offset: ArchiveOffset,
end_exclusive: u64,
file_len: u64,
) -> ZipResult<CentralDirectoryEndInfo> {
const EOCD_SIG_BYTES: [u8; mem::size_of::<Magic>()] =
Magic::CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
const EOCD64_SIG_BYTES: [u8; mem::size_of::<Magic>()] =
Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
const CDFH_SIG_BYTES: [u8; mem::size_of::<Magic>()] =
Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE.to_le_bytes();
// Instantiate the mandatory finder
let mut eocd_finder = MagicFinder::<Backwards<'static>>::new(&EOCD_SIG_BYTES, 0, end_exclusive);
let mut subfinder: Option<OptimisticMagicFinder<Forward<'static>>> = None;
// Keep the last errors for cases of improper EOCD instances.
let mut parsing_error = None;
while let Some(eocd_offset) = eocd_finder.next(reader)? {
// Attempt to parse the EOCD block
let eocd = match Zip32CentralDirectoryEnd::parse(reader) {
Ok(eocd) => eocd,
Err(e) => {
if parsing_error.is_none() {
parsing_error = Some(e);
}
continue;
}
};
// ! Relaxed (inequality) due to garbage-after-comment Python files
// Consistency check: the EOCD comment must terminate before the end of file
if eocd.zip_file_comment.len() as u64 + eocd_offset + 22 > file_len {
parsing_error = Some(ZipError::InvalidArchive("Invalid EOCD comment length"));
continue;
}
let zip64_metadata = if eocd.may_be_zip64() {
fn try_read_eocd64_locator(
reader: &mut (impl Read + Seek),
eocd_offset: u64,
) -> ZipResult<(u64, Zip64CentralDirectoryEndLocator)> {
if eocd_offset < mem::size_of::<Zip64CDELocatorBlock>() as u64 {
return Err(ZipError::InvalidArchive(
"EOCD64 Locator does not fit in file",
));
}
let locator64_offset = eocd_offset - mem::size_of::<Zip64CDELocatorBlock>() as u64;
reader.seek(io::SeekFrom::Start(locator64_offset))?;
Ok((
locator64_offset,
Zip64CentralDirectoryEndLocator::parse(reader)?,
))
}
try_read_eocd64_locator(reader, eocd_offset).ok()
} else {
None
};
let Some((locator64_offset, locator64)) = zip64_metadata else {
// Branch out for zip32
let relative_cd_offset = eocd.central_directory_offset as u64;
// If the archive is empty, there is nothing more to be checked, the archive is correct.
if eocd.number_of_files == 0 {
return Ok(CentralDirectoryEndInfo {
eocd: (eocd, eocd_offset).into(),
eocd64: None,
archive_offset: eocd_offset.saturating_sub(relative_cd_offset),
});
}
// Consistency check: the CD relative offset cannot be after the EOCD
if relative_cd_offset >= eocd_offset {
parsing_error = Some(ZipError::InvalidArchive("Invalid CDFH offset in EOCD"));
continue;
}
// Attempt to find the first CDFH
let subfinder = subfinder
.get_or_insert_with(OptimisticMagicFinder::new_empty)
.repurpose(
&CDFH_SIG_BYTES,
// The CDFH must be before the EOCD and after the relative offset,
// because prepended junk can only move it forward.
(relative_cd_offset, eocd_offset),
match archive_offset {
ArchiveOffset::Known(n) => {
Some((relative_cd_offset.saturating_add(n).min(eocd_offset), true))
}
_ => Some((relative_cd_offset, false)),
},
);
// Consistency check: find the first CDFH
if let Some(cd_offset) = subfinder.next(reader)? {
// The first CDFH will define the archive offset
let archive_offset = cd_offset - relative_cd_offset;
return Ok(CentralDirectoryEndInfo {
eocd: (eocd, eocd_offset).into(),
eocd64: None,
archive_offset,
});
}
parsing_error = Some(ZipError::InvalidArchive("No CDFH found"));
continue;
};
// Consistency check: the EOCD64 offset must be before EOCD64 Locator offset */
if locator64.end_of_central_directory_offset >= locator64_offset {
parsing_error = Some(ZipError::InvalidArchive("Invalid EOCD64 Locator CD offset"));
continue;
}
if locator64.number_of_disks > 1 {
parsing_error = Some(ZipError::InvalidArchive(
"Multi-disk ZIP files are not supported",
));
continue;
}
// This was hidden inside a function to collect errors in a single place.
// Once try blocks are stabilized, this can go away.
fn try_read_eocd64<R: Read + Seek>(
reader: &mut R,
locator64: &Zip64CentralDirectoryEndLocator,
expected_length: u64,
) -> ZipResult<Zip64CentralDirectoryEnd> {
let z64 = Zip64CentralDirectoryEnd::parse(reader, expected_length)?;
// Consistency check: EOCD64 locator should agree with the EOCD64
if z64.disk_with_central_directory != locator64.disk_with_central_directory {
return Err(ZipError::InvalidArchive(
"Invalid EOCD64: inconsistency with Locator data",
));
}
// Consistency check: the EOCD64 must have the expected length
if z64.record_size + 12 != expected_length {
return Err(ZipError::InvalidArchive(
"Invalid EOCD64: inconsistent length",
));
}
Ok(z64)
}
// Attempt to find the EOCD64 with an initial guess
let subfinder = subfinder
.get_or_insert_with(OptimisticMagicFinder::new_empty)
.repurpose(
&EOCD64_SIG_BYTES,
(locator64.end_of_central_directory_offset, locator64_offset),
match archive_offset {
ArchiveOffset::Known(n) => Some((
locator64
.end_of_central_directory_offset
.saturating_add(n)
.min(locator64_offset),
true,
)),
_ => Some((locator64.end_of_central_directory_offset, false)),
},
);
// Consistency check: Find the EOCD64
let mut local_error = None;
while let Some(eocd64_offset) = subfinder.next(reader)? {
let archive_offset = eocd64_offset - locator64.end_of_central_directory_offset;
match try_read_eocd64(
reader,
&locator64,
locator64_offset.saturating_sub(eocd64_offset),
) {
Ok(eocd64) => {
if eocd64_offset
< eocd64
.number_of_files
.saturating_mul(
mem::size_of::<crate::types::ZipCentralEntryBlock>() as u64
)
.saturating_add(eocd64.central_directory_offset)
{
local_error = Some(ZipError::InvalidArchive(
"Invalid EOCD64: inconsistent number of files",
));
continue;
}
return Ok(CentralDirectoryEndInfo {
eocd: (eocd, eocd_offset).into(),
eocd64: Some((eocd64, eocd64_offset).into()),
archive_offset,
});
}
Err(e) => {
local_error = Some(e);
}
}
}
parsing_error = local_error.or(Some(ZipError::InvalidArchive("Could not find EOCD64")));
}
Err(parsing_error.unwrap_or(ZipError::InvalidArchive("Could not find EOCD")))
}
pub(crate) fn is_dir(filename: &str) -> bool {
filename
.chars()

View file

@ -160,6 +160,7 @@ pub(crate) mod zip_writer {
pub(super) writing_to_file: bool,
pub(super) writing_raw: bool,
pub(super) comment: Box<[u8]>,
pub(super) zip64_comment: Option<Box<[u8]>>,
pub(super) flush_on_finish_file: bool,
}
@ -628,19 +629,19 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
/// This uses the given read configuration to initially read the archive.
pub fn new_append_with_config(config: Config, mut readwriter: A) -> ZipResult<ZipWriter<A>> {
readwriter.seek(SeekFrom::Start(0))?;
if let Ok((footer, shared)) = ZipArchive::get_metadata(config, &mut readwriter) {
Ok(ZipWriter {
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),
files: shared.files,
stats: Default::default(),
writing_to_file: false,
comment: footer.zip_file_comment,
writing_raw: true, // avoid recomputing the last file's header
flush_on_finish_file: false,
})
} else {
Err(InvalidArchive("No central-directory end header found"))
}
let shared = ZipArchive::get_metadata(config, &mut readwriter)?;
Ok(ZipWriter {
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),
files: shared.files,
stats: Default::default(),
writing_to_file: false,
comment: shared.comment,
zip64_comment: shared.zip64_comment,
writing_raw: true, // avoid recomputing the last file's header
flush_on_finish_file: false,
})
}
/// `flush_on_finish_file` is designed to support a streaming `inner` that may unload flushed
@ -774,8 +775,11 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
let central_start = self.finalize()?;
let inner = mem::replace(&mut self.inner, Closed).unwrap();
let comment = mem::take(&mut self.comment);
let zip64_comment = mem::take(&mut self.zip64_comment);
let files = mem::take(&mut self.files);
let archive = ZipArchive::from_finalized_writer(files, comment, inner, central_start)?;
let archive =
ZipArchive::from_finalized_writer(files, comment, zip64_comment, inner, central_start)?;
Ok(archive)
}
}
@ -794,6 +798,7 @@ impl<W: Write + Seek> ZipWriter<W> {
writing_to_file: false,
writing_raw: false,
comment: Box::new([]),
zip64_comment: None,
flush_on_finish_file: false,
}
}
@ -832,6 +837,35 @@ impl<W: Write + Seek> ZipWriter<W> {
&self.comment
}
/// Set ZIP64 archive comment.
pub fn set_zip64_comment<S>(&mut self, comment: Option<S>)
where
S: Into<Box<str>>,
{
self.set_raw_zip64_comment(comment.map(|v| v.into().into_boxed_bytes()))
}
/// Set ZIP64 archive comment.
///
/// This sets the raw bytes of the comment. The comment
/// is typically expected to be encoded in UTF-8.
pub fn set_raw_zip64_comment(&mut self, comment: Option<Box<[u8]>>) {
self.zip64_comment = comment;
}
/// Get ZIP64 archive comment.
pub fn get_zip64_comment(&mut self) -> Option<Result<&str, Utf8Error>> {
self.get_raw_zip64_comment().map(from_utf8)
}
/// Get ZIP archive comment.
///
/// This returns the raw bytes of the comment. The comment
/// is typically expected to be encoded in UTF-8.
pub fn get_raw_zip64_comment(&self) -> Option<&[u8]> {
self.zip64_comment.as_deref()
}
/// Set the file length and crc32 manually.
///
/// # Safety
@ -1516,11 +1550,15 @@ impl<W: Write + Seek> ZipWriter<W> {
version_needed = version_needed.max(file.version_needed());
}
let central_size = writer.stream_position()? - central_start;
if self.files.len() > spec::ZIP64_ENTRY_THR
let is64 = self.files.len() > spec::ZIP64_ENTRY_THR
|| central_size.max(central_start) > spec::ZIP64_BYTES_THR
{
|| self.zip64_comment.is_some();
if is64 {
let comment = self.zip64_comment.clone().unwrap_or_default();
let zip64_footer = spec::Zip64CentralDirectoryEnd {
record_size: comment.len() as u64 + 44,
version_made_by: version_needed,
version_needed_to_extract: version_needed,
disk_number: 0,
@ -1529,6 +1567,7 @@ impl<W: Write + Seek> ZipWriter<W> {
number_of_files: self.files.len() as u64,
central_directory_size: central_size,
central_directory_offset: central_start,
extensible_data_sector: comment,
};
zip64_footer.write(writer)?;

View file

@ -0,0 +1,24 @@
use std::io::Cursor;
use zip::ZipArchive;
#[test]
fn test_prepended_garbage() {
let mut v = vec![0, 1, 2, 3];
v.extend_from_slice(include_bytes!("../tests/data/extended_timestamp.zip"));
let mut archive = ZipArchive::new(Cursor::new(v)).expect("couldn't open test zip file");
assert_eq!(2, archive.len());
for file_idx in 0..archive.len() {
let file = archive.by_index(file_idx).unwrap();
let outpath = file.enclosed_name().unwrap();
println!(
"Entry {} has name \"{}\" ({} bytes)",
file_idx,
outpath.display(),
file.size()
);
}
}