fix: rewrite the EOCD/EOCD64 detection to fix extreme performance regression (#247)
* fix: resolve clippy warning in nightly * wip: major rework of cde location * wip: rework CDE lookup * refactor: magic finder, eocd lookup retry * wip: handle empty zips * fix: satisfy tests, add documentation * chore: remove unused dependencies * feat: support both zip32 and zip64 comments * feat: add zip64 comment functions to ZipWriter * fix: first pass on maintainer comments * fix: continue searching for EOCD when the central directory is invalid * chore: satisfy clippy lints * chore: satisfy style_and_docs * feat: support both directions in MagicFinder, correctly find first CDFH * fix: more checks to EOCD parsing, move comment size error from parse to write * fix: use saturating add when checking eocd64 record_size upper bound * fix: correctly handle mid window offsets in forward mode * fix: compare maximum possible comment length against file size, not search region end * feat: handle zip64 detection as a hint * fix: detect oversized central directories when locating EOCD64 * fix: oopsie --------- Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Co-authored-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com>
This commit is contained in:
parent
810d18a9a1
commit
33c71ccc80
6 changed files with 800 additions and 517 deletions
430
src/read.rs
430
src/read.rs
|
@ -8,10 +8,7 @@ use crate::crc32::Crc32Reader;
|
||||||
use crate::extra_fields::{ExtendedTimestamp, ExtraField};
|
use crate::extra_fields::{ExtendedTimestamp, ExtraField};
|
||||||
use crate::read::zip_archive::{Shared, SharedBuilder};
|
use crate::read::zip_archive::{Shared, SharedBuilder};
|
||||||
use crate::result::{ZipError, ZipResult};
|
use crate::result::{ZipError, ZipResult};
|
||||||
use crate::spec::{
|
use crate::spec::{self, CentralDirectoryEndInfo, DataAndPosition, FixedSizeBlock, Pod};
|
||||||
self, FixedSizeBlock, Pod, Zip32CentralDirectoryEnd, Zip64CDELocatorBlock,
|
|
||||||
Zip64CentralDirectoryEnd, ZIP64_ENTRY_THR,
|
|
||||||
};
|
|
||||||
use crate::types::{
|
use crate::types::{
|
||||||
AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData,
|
AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData,
|
||||||
ZipLocalEntryBlock,
|
ZipLocalEntryBlock,
|
||||||
|
@ -26,7 +23,6 @@ use std::mem;
|
||||||
use std::mem::size_of;
|
use std::mem::size_of;
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::rc::Rc;
|
|
||||||
use std::sync::{Arc, OnceLock};
|
use std::sync::{Arc, OnceLock};
|
||||||
|
|
||||||
mod config;
|
mod config;
|
||||||
|
@ -42,6 +38,8 @@ pub(crate) mod lzma;
|
||||||
#[cfg(feature = "xz")]
|
#[cfg(feature = "xz")]
|
||||||
pub(crate) mod xz;
|
pub(crate) mod xz;
|
||||||
|
|
||||||
|
pub(crate) mod magic_finder;
|
||||||
|
|
||||||
// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
|
// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
|
||||||
pub(crate) mod zip_archive {
|
pub(crate) mod zip_archive {
|
||||||
use indexmap::IndexMap;
|
use indexmap::IndexMap;
|
||||||
|
@ -56,6 +54,8 @@ pub(crate) mod zip_archive {
|
||||||
// This isn't yet used anywhere, but it is here for use cases in the future.
|
// This isn't yet used anywhere, but it is here for use cases in the future.
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub(super) config: super::Config,
|
pub(super) config: super::Config,
|
||||||
|
pub(crate) comment: Box<[u8]>,
|
||||||
|
pub(crate) zip64_comment: Option<Box<[u8]>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -69,7 +69,7 @@ pub(crate) mod zip_archive {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SharedBuilder {
|
impl SharedBuilder {
|
||||||
pub fn build(self) -> Shared {
|
pub fn build(self, comment: Box<[u8]>, zip64_comment: Option<Box<[u8]>>) -> Shared {
|
||||||
let mut index_map = IndexMap::with_capacity(self.files.len());
|
let mut index_map = IndexMap::with_capacity(self.files.len());
|
||||||
self.files.into_iter().for_each(|file| {
|
self.files.into_iter().for_each(|file| {
|
||||||
index_map.insert(file.file_name.clone(), file);
|
index_map.insert(file.file_name.clone(), file);
|
||||||
|
@ -79,6 +79,8 @@ pub(crate) mod zip_archive {
|
||||||
offset: self.offset,
|
offset: self.offset,
|
||||||
dir_start: self.dir_start,
|
dir_start: self.dir_start,
|
||||||
config: self.config,
|
config: self.config,
|
||||||
|
comment,
|
||||||
|
zip64_comment,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -108,7 +110,6 @@ pub(crate) mod zip_archive {
|
||||||
pub struct ZipArchive<R> {
|
pub struct ZipArchive<R> {
|
||||||
pub(super) reader: R,
|
pub(super) reader: R,
|
||||||
pub(super) shared: Arc<Shared>,
|
pub(super) shared: Arc<Shared>,
|
||||||
pub(super) comment: Arc<[u8]>,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -360,6 +361,7 @@ fn find_data_start(
|
||||||
block.file_name_length as u64 + block.extra_field_length as u64;
|
block.file_name_length as u64 + block.extra_field_length as u64;
|
||||||
let data_start =
|
let data_start =
|
||||||
data.header_start + size_of::<ZipLocalEntryBlock>() as u64 + variable_fields_len;
|
data.header_start + size_of::<ZipLocalEntryBlock>() as u64 + variable_fields_len;
|
||||||
|
|
||||||
// Set the value so we don't have to read it again.
|
// Set the value so we don't have to read it again.
|
||||||
match data.data_start.set(data_start) {
|
match data.data_start.set(data_start) {
|
||||||
Ok(()) => (),
|
Ok(()) => (),
|
||||||
|
@ -369,6 +371,7 @@ fn find_data_start(
|
||||||
debug_assert_eq!(*data.data_start.get().unwrap(), data_start);
|
debug_assert_eq!(*data.data_start.get().unwrap(), data_start);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(data_start)
|
Ok(data_start)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -434,17 +437,62 @@ pub(crate) fn make_reader(
|
||||||
pub(crate) struct CentralDirectoryInfo {
|
pub(crate) struct CentralDirectoryInfo {
|
||||||
pub(crate) archive_offset: u64,
|
pub(crate) archive_offset: u64,
|
||||||
pub(crate) directory_start: u64,
|
pub(crate) directory_start: u64,
|
||||||
pub(crate) cde_position: u64,
|
|
||||||
pub(crate) number_of_files: usize,
|
pub(crate) number_of_files: usize,
|
||||||
pub(crate) disk_number: u32,
|
pub(crate) disk_number: u32,
|
||||||
pub(crate) disk_with_central_directory: u32,
|
pub(crate) disk_with_central_directory: u32,
|
||||||
pub(crate) is_zip64: bool,
|
}
|
||||||
|
|
||||||
|
impl<'a> TryFrom<&'a CentralDirectoryEndInfo> for CentralDirectoryInfo {
|
||||||
|
type Error = ZipError;
|
||||||
|
|
||||||
|
fn try_from(value: &'a CentralDirectoryEndInfo) -> Result<Self, Self::Error> {
|
||||||
|
let (relative_cd_offset, number_of_files, disk_number, disk_with_central_directory) =
|
||||||
|
match &value.eocd64 {
|
||||||
|
Some(DataAndPosition { data: eocd64, .. }) => {
|
||||||
|
if eocd64.number_of_files_on_this_disk > eocd64.number_of_files {
|
||||||
|
return Err(InvalidArchive(
|
||||||
|
"ZIP64 footer indicates more files on this disk than in the whole archive",
|
||||||
|
));
|
||||||
|
} else if eocd64.version_needed_to_extract > eocd64.version_made_by {
|
||||||
|
return Err(InvalidArchive(
|
||||||
|
"ZIP64 footer indicates a new version is needed to extract this archive than the \
|
||||||
|
version that wrote it",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
(
|
||||||
|
eocd64.central_directory_offset,
|
||||||
|
eocd64.number_of_files as usize,
|
||||||
|
eocd64.disk_number,
|
||||||
|
eocd64.disk_with_central_directory,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
_ => (
|
||||||
|
value.eocd.data.central_directory_offset as u64,
|
||||||
|
value.eocd.data.number_of_files_on_this_disk as usize,
|
||||||
|
value.eocd.data.disk_number as u32,
|
||||||
|
value.eocd.data.disk_with_central_directory as u32,
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
let directory_start = relative_cd_offset
|
||||||
|
.checked_add(value.archive_offset)
|
||||||
|
.ok_or(InvalidArchive("Invalid central directory size or offset"))?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
archive_offset: value.archive_offset,
|
||||||
|
directory_start,
|
||||||
|
number_of_files,
|
||||||
|
disk_number,
|
||||||
|
disk_with_central_directory,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R> ZipArchive<R> {
|
impl<R> ZipArchive<R> {
|
||||||
pub(crate) fn from_finalized_writer(
|
pub(crate) fn from_finalized_writer(
|
||||||
files: IndexMap<Box<str>, ZipFileData>,
|
files: IndexMap<Box<str>, ZipFileData>,
|
||||||
comment: Box<[u8]>,
|
comment: Box<[u8]>,
|
||||||
|
zip64_comment: Option<Box<[u8]>>,
|
||||||
reader: R,
|
reader: R,
|
||||||
central_start: u64,
|
central_start: u64,
|
||||||
) -> ZipResult<Self> {
|
) -> ZipResult<Self> {
|
||||||
|
@ -459,12 +507,10 @@ impl<R> ZipArchive<R> {
|
||||||
config: Config {
|
config: Config {
|
||||||
archive_offset: ArchiveOffset::Known(initial_offset),
|
archive_offset: ArchiveOffset::Known(initial_offset),
|
||||||
},
|
},
|
||||||
|
comment,
|
||||||
|
zip64_comment,
|
||||||
});
|
});
|
||||||
Ok(Self {
|
Ok(Self { reader, shared })
|
||||||
reader,
|
|
||||||
shared,
|
|
||||||
comment: comment.into(),
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Total size of the files in the archive, if it can be known. Doesn't include directories or
|
/// Total size of the files in the archive, if it can be known. Doesn't include directories or
|
||||||
|
@ -549,264 +595,36 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
Ok(new_files)
|
Ok(new_files)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_directory_info_zip32(
|
|
||||||
config: &Config,
|
|
||||||
reader: &mut R,
|
|
||||||
footer: &Zip32CentralDirectoryEnd,
|
|
||||||
cde_start_pos: u64,
|
|
||||||
) -> ZipResult<CentralDirectoryInfo> {
|
|
||||||
let archive_offset = match config.archive_offset {
|
|
||||||
ArchiveOffset::Known(n) => n,
|
|
||||||
ArchiveOffset::FromCentralDirectory | ArchiveOffset::Detect => {
|
|
||||||
// Some zip files have data prepended to them, resulting in the
|
|
||||||
// offsets all being too small. Get the amount of error by comparing
|
|
||||||
// the actual file position we found the CDE at with the offset
|
|
||||||
// recorded in the CDE.
|
|
||||||
let mut offset = cde_start_pos
|
|
||||||
.checked_sub(footer.central_directory_size as u64)
|
|
||||||
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
|
|
||||||
.ok_or(InvalidArchive("Invalid central directory size or offset"))?;
|
|
||||||
|
|
||||||
if config.archive_offset == ArchiveOffset::Detect {
|
|
||||||
// Check whether the archive offset makes sense by peeking at the directory start. If it
|
|
||||||
// doesn't, fall back to using no archive offset. This supports zips with the central
|
|
||||||
// directory entries somewhere other than directly preceding the end of central directory.
|
|
||||||
reader.seek(SeekFrom::Start(
|
|
||||||
offset + footer.central_directory_offset as u64,
|
|
||||||
))?;
|
|
||||||
let mut buf = [0; 4];
|
|
||||||
reader.read_exact(&mut buf)?;
|
|
||||||
if spec::Magic::from_le_bytes(buf)
|
|
||||||
!= spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE
|
|
||||||
{
|
|
||||||
offset = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
offset
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let directory_start = footer.central_directory_offset as u64 + archive_offset;
|
|
||||||
let number_of_files = footer.number_of_files_on_this_disk as usize;
|
|
||||||
Ok(CentralDirectoryInfo {
|
|
||||||
archive_offset,
|
|
||||||
directory_start,
|
|
||||||
number_of_files,
|
|
||||||
disk_number: footer.disk_number as u32,
|
|
||||||
disk_with_central_directory: footer.disk_with_central_directory as u32,
|
|
||||||
cde_position: cde_start_pos,
|
|
||||||
is_zip64: false,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
const fn order_lower_upper_bounds(a: u64, b: u64) -> (u64, u64) {
|
|
||||||
if a > b {
|
|
||||||
(b, a)
|
|
||||||
} else {
|
|
||||||
(a, b)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_directory_info_zip64(
|
|
||||||
config: &Config,
|
|
||||||
reader: &mut R,
|
|
||||||
cde_start_pos: u64,
|
|
||||||
) -> ZipResult<Vec<ZipResult<CentralDirectoryInfo>>> {
|
|
||||||
// See if there's a ZIP64 footer. The ZIP64 locator if present will
|
|
||||||
// have its signature 20 bytes in front of the standard footer. The
|
|
||||||
// standard footer, in turn, is 22+N bytes large, where N is the
|
|
||||||
// comment length. Therefore:
|
|
||||||
reader.seek(SeekFrom::Start(
|
|
||||||
cde_start_pos
|
|
||||||
.checked_sub(size_of::<Zip64CDELocatorBlock>() as u64)
|
|
||||||
.ok_or(InvalidArchive(
|
|
||||||
"No room for ZIP64 locator before central directory end",
|
|
||||||
))?,
|
|
||||||
))?;
|
|
||||||
let locator64 = spec::Zip64CentralDirectoryEndLocator::parse(reader)?;
|
|
||||||
|
|
||||||
// We need to reassess `archive_offset`. We know where the ZIP64
|
|
||||||
// central-directory-end structure *should* be, but unfortunately we
|
|
||||||
// don't know how to precisely relate that location to our current
|
|
||||||
// actual offset in the file, since there may be junk at its
|
|
||||||
// beginning. Therefore we need to perform another search, as in
|
|
||||||
// read::Zip32CentralDirectoryEnd::find_and_parse, except now we search
|
|
||||||
// forward. There may be multiple results because of Zip64 central-directory signatures in
|
|
||||||
// ZIP comment data.
|
|
||||||
|
|
||||||
let search_upper_bound = cde_start_pos
|
|
||||||
.checked_sub(
|
|
||||||
(size_of::<Zip64CentralDirectoryEnd>()
|
|
||||||
+ size_of::<spec::Zip64CentralDirectoryEndLocator>()) as u64,
|
|
||||||
)
|
|
||||||
.ok_or(InvalidArchive(
|
|
||||||
"File cannot contain ZIP64 central directory end",
|
|
||||||
))?;
|
|
||||||
|
|
||||||
let (lower, upper) = Self::order_lower_upper_bounds(
|
|
||||||
locator64.end_of_central_directory_offset,
|
|
||||||
search_upper_bound,
|
|
||||||
);
|
|
||||||
|
|
||||||
let search_results = Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?;
|
|
||||||
let results: Vec<ZipResult<CentralDirectoryInfo>> =
|
|
||||||
search_results.into_iter().map(|(footer64, archive_offset)| {
|
|
||||||
let archive_offset = match config.archive_offset {
|
|
||||||
ArchiveOffset::Known(n) => n,
|
|
||||||
ArchiveOffset::FromCentralDirectory => archive_offset,
|
|
||||||
ArchiveOffset::Detect => {
|
|
||||||
archive_offset.checked_add(footer64.central_directory_offset)
|
|
||||||
.and_then(|start| {
|
|
||||||
// Check whether the archive offset makes sense by peeking at the directory start.
|
|
||||||
//
|
|
||||||
// If any errors occur or no header signature is found, fall back to no offset to see if that works.
|
|
||||||
reader.seek(SeekFrom::Start(start)).ok()?;
|
|
||||||
let mut buf = [0; 4];
|
|
||||||
reader.read_exact(&mut buf).ok()?;
|
|
||||||
if spec::Magic::from_le_bytes(buf) != spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(archive_offset)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.unwrap_or(0)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let directory_start = footer64
|
|
||||||
.central_directory_offset
|
|
||||||
.checked_add(archive_offset)
|
|
||||||
.ok_or(InvalidArchive(
|
|
||||||
"Invalid central directory size or offset",
|
|
||||||
))?;
|
|
||||||
if directory_start > search_upper_bound {
|
|
||||||
Err(InvalidArchive(
|
|
||||||
"Invalid central directory size or offset",
|
|
||||||
))
|
|
||||||
} else if footer64.number_of_files_on_this_disk > footer64.number_of_files {
|
|
||||||
Err(InvalidArchive(
|
|
||||||
"ZIP64 footer indicates more files on this disk than in the whole archive",
|
|
||||||
))
|
|
||||||
} else if footer64.version_needed_to_extract > footer64.version_made_by {
|
|
||||||
Err(InvalidArchive(
|
|
||||||
"ZIP64 footer indicates a new version is needed to extract this archive than the \
|
|
||||||
version that wrote it",
|
|
||||||
))
|
|
||||||
} else {
|
|
||||||
Ok(CentralDirectoryInfo {
|
|
||||||
archive_offset,
|
|
||||||
directory_start,
|
|
||||||
number_of_files: footer64.number_of_files as usize,
|
|
||||||
disk_number: footer64.disk_number,
|
|
||||||
disk_with_central_directory: footer64.disk_with_central_directory,
|
|
||||||
cde_position: cde_start_pos,
|
|
||||||
is_zip64: true,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}).collect();
|
|
||||||
Ok(results)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the directory start offset and number of files. This is done in a
|
/// Get the directory start offset and number of files. This is done in a
|
||||||
/// separate function to ease the control flow design.
|
/// separate function to ease the control flow design.
|
||||||
pub(crate) fn get_metadata(
|
pub(crate) fn get_metadata(config: Config, reader: &mut R) -> ZipResult<Shared> {
|
||||||
config: Config,
|
// End of the probed region, initially set to the end of the file
|
||||||
reader: &mut R,
|
let file_len = reader.seek(io::SeekFrom::End(0))?;
|
||||||
) -> ZipResult<(Zip32CentralDirectoryEnd, Shared)> {
|
let mut end_exclusive = file_len;
|
||||||
let mut invalid_errors_32 = Vec::new();
|
|
||||||
let mut unsupported_errors_32 = Vec::new();
|
loop {
|
||||||
let mut invalid_errors_64 = Vec::new();
|
// Find the EOCD and possibly EOCD64 entries and determine the archive offset.
|
||||||
let mut unsupported_errors_64 = Vec::new();
|
let cde = spec::find_central_directory(
|
||||||
let mut ok_results = Vec::new();
|
reader,
|
||||||
let cde_locations = Zip32CentralDirectoryEnd::find_and_parse(reader)?;
|
config.archive_offset,
|
||||||
cde_locations
|
end_exclusive,
|
||||||
.into_vec()
|
file_len,
|
||||||
.into_iter()
|
)?;
|
||||||
.for_each(|(footer, cde_start_pos)| {
|
|
||||||
let zip32_result =
|
// Turn EOCD into internal representation.
|
||||||
Self::get_directory_info_zip32(&config, reader, &footer, cde_start_pos);
|
let Ok(shared) = CentralDirectoryInfo::try_from(&cde)
|
||||||
Self::sort_result(
|
.and_then(|info| Self::read_central_header(info, config, reader))
|
||||||
zip32_result,
|
else {
|
||||||
&mut invalid_errors_32,
|
// The next EOCD candidate should start before the current one.
|
||||||
&mut unsupported_errors_32,
|
end_exclusive = cde.eocd.position;
|
||||||
&mut ok_results,
|
continue;
|
||||||
&footer,
|
};
|
||||||
);
|
|
||||||
let mut inner_results = Vec::with_capacity(1);
|
return Ok(shared.build(
|
||||||
// Check if file has a zip64 footer
|
cde.eocd.data.zip_file_comment,
|
||||||
let zip64_vec_result =
|
cde.eocd64.map(|v| v.data.extensible_data_sector),
|
||||||
Self::get_directory_info_zip64(&config, reader, cde_start_pos);
|
));
|
||||||
Self::sort_result(
|
|
||||||
zip64_vec_result,
|
|
||||||
&mut invalid_errors_64,
|
|
||||||
&mut unsupported_errors_64,
|
|
||||||
&mut inner_results,
|
|
||||||
&(),
|
|
||||||
);
|
|
||||||
inner_results.into_iter().for_each(|(_, results)| {
|
|
||||||
results.into_iter().for_each(|result| {
|
|
||||||
Self::sort_result(
|
|
||||||
result,
|
|
||||||
&mut invalid_errors_64,
|
|
||||||
&mut unsupported_errors_64,
|
|
||||||
&mut ok_results,
|
|
||||||
&footer,
|
|
||||||
);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
ok_results.sort_by_key(|(_, result)| {
|
|
||||||
(
|
|
||||||
u64::MAX - result.cde_position, // try the last one first
|
|
||||||
!result.is_zip64, // try ZIP64 first
|
|
||||||
)
|
|
||||||
});
|
|
||||||
let mut best_result = None;
|
|
||||||
for (footer, result) in ok_results {
|
|
||||||
let mut inner_result = Vec::with_capacity(1);
|
|
||||||
let is_zip64 = result.is_zip64;
|
|
||||||
Self::sort_result(
|
|
||||||
Self::read_central_header(result, config, reader),
|
|
||||||
if is_zip64 {
|
|
||||||
&mut invalid_errors_64
|
|
||||||
} else {
|
|
||||||
&mut invalid_errors_32
|
|
||||||
},
|
|
||||||
if is_zip64 {
|
|
||||||
&mut unsupported_errors_64
|
|
||||||
} else {
|
|
||||||
&mut unsupported_errors_32
|
|
||||||
},
|
|
||||||
&mut inner_result,
|
|
||||||
&(),
|
|
||||||
);
|
|
||||||
if let Some((_, shared)) = inner_result.into_iter().next() {
|
|
||||||
if shared.files.len() == footer.number_of_files as usize
|
|
||||||
|| (is_zip64 && footer.number_of_files == ZIP64_ENTRY_THR as u16)
|
|
||||||
{
|
|
||||||
best_result = Some((footer, shared));
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
if is_zip64 {
|
|
||||||
&mut invalid_errors_64
|
|
||||||
} else {
|
|
||||||
&mut invalid_errors_32
|
|
||||||
}
|
|
||||||
.push(InvalidArchive("wrong number of files"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
let Some((footer, shared)) = best_result else {
|
|
||||||
return Err(unsupported_errors_32
|
|
||||||
.into_iter()
|
|
||||||
.chain(unsupported_errors_64)
|
|
||||||
.chain(invalid_errors_32)
|
|
||||||
.chain(invalid_errors_64)
|
|
||||||
.next()
|
|
||||||
.unwrap());
|
|
||||||
};
|
|
||||||
reader.seek(SeekFrom::Start(shared.dir_start))?;
|
|
||||||
Ok((Rc::try_unwrap(footer).unwrap(), shared.build()))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_central_header(
|
fn read_central_header(
|
||||||
|
@ -821,15 +639,22 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
} else {
|
} else {
|
||||||
dir_info.number_of_files
|
dir_info.number_of_files
|
||||||
};
|
};
|
||||||
|
|
||||||
if dir_info.disk_number != dir_info.disk_with_central_directory {
|
if dir_info.disk_number != dir_info.disk_with_central_directory {
|
||||||
return unsupported_zip_error("Support for multi-disk files is not implemented");
|
return unsupported_zip_error("Support for multi-disk files is not implemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if file_capacity.saturating_mul(size_of::<ZipFileData>()) > isize::MAX as usize {
|
||||||
|
return unsupported_zip_error("Oversized central directory");
|
||||||
|
}
|
||||||
|
|
||||||
let mut files = Vec::with_capacity(file_capacity);
|
let mut files = Vec::with_capacity(file_capacity);
|
||||||
reader.seek(SeekFrom::Start(dir_info.directory_start))?;
|
reader.seek(SeekFrom::Start(dir_info.directory_start))?;
|
||||||
for _ in 0..dir_info.number_of_files {
|
for _ in 0..dir_info.number_of_files {
|
||||||
let file = central_header_to_zip_file(reader, dir_info.archive_offset)?;
|
let file = central_header_to_zip_file(reader, &dir_info)?;
|
||||||
files.push(file);
|
files.push(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(SharedBuilder {
|
Ok(SharedBuilder {
|
||||||
files,
|
files,
|
||||||
offset: dir_info.archive_offset,
|
offset: dir_info.archive_offset,
|
||||||
|
@ -838,22 +663,6 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sort_result<T, U: Clone>(
|
|
||||||
result: ZipResult<T>,
|
|
||||||
invalid_errors: &mut Vec<ZipError>,
|
|
||||||
unsupported_errors: &mut Vec<ZipError>,
|
|
||||||
ok_results: &mut Vec<(U, T)>,
|
|
||||||
footer: &U,
|
|
||||||
) {
|
|
||||||
match result {
|
|
||||||
Err(ZipError::UnsupportedArchive(e)) => {
|
|
||||||
unsupported_errors.push(ZipError::UnsupportedArchive(e))
|
|
||||||
}
|
|
||||||
Err(e) => invalid_errors.push(e),
|
|
||||||
Ok(o) => ok_results.push((footer.clone(), o)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the verification value and salt for the AES encryption of the file
|
/// Returns the verification value and salt for the AES encryption of the file
|
||||||
///
|
///
|
||||||
/// It fails if the file number is invalid.
|
/// It fails if the file number is invalid.
|
||||||
|
@ -902,15 +711,12 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
///
|
///
|
||||||
/// This uses the central directory record of the ZIP file, and ignores local file headers.
|
/// This uses the central directory record of the ZIP file, and ignores local file headers.
|
||||||
pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
|
pub fn with_config(config: Config, mut reader: R) -> ZipResult<ZipArchive<R>> {
|
||||||
reader.seek(SeekFrom::Start(0))?;
|
let shared = Self::get_metadata(config, &mut reader)?;
|
||||||
if let Ok((footer, shared)) = Self::get_metadata(config, &mut reader) {
|
|
||||||
return Ok(ZipArchive {
|
Ok(ZipArchive {
|
||||||
reader,
|
reader,
|
||||||
shared: shared.into(),
|
shared: shared.into(),
|
||||||
comment: footer.zip_file_comment.into(),
|
})
|
||||||
});
|
|
||||||
}
|
|
||||||
Err(InvalidArchive("No valid central directory found"))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract a Zip archive into a directory, overwriting files if they
|
/// Extract a Zip archive into a directory, overwriting files if they
|
||||||
|
@ -1050,7 +856,12 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
|
|
||||||
/// Get the comment of the zip archive.
|
/// Get the comment of the zip archive.
|
||||||
pub fn comment(&self) -> &[u8] {
|
pub fn comment(&self) -> &[u8] {
|
||||||
&self.comment
|
&self.shared.comment
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the ZIP64 comment of the zip archive, if it is ZIP64.
|
||||||
|
pub fn zip64_comment(&self) -> Option<&[u8]> {
|
||||||
|
self.shared.zip64_comment.as_deref()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an iterator over all the file and directory names in this archive.
|
/// Returns an iterator over all the file and directory names in this archive.
|
||||||
|
@ -1235,21 +1046,36 @@ const fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T> {
|
||||||
/// Parse a central directory entry to collect the information for the file.
|
/// Parse a central directory entry to collect the information for the file.
|
||||||
pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
|
pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
|
||||||
reader: &mut R,
|
reader: &mut R,
|
||||||
archive_offset: u64,
|
central_directory: &CentralDirectoryInfo,
|
||||||
) -> ZipResult<ZipFileData> {
|
) -> ZipResult<ZipFileData> {
|
||||||
let central_header_start = reader.stream_position()?;
|
let central_header_start = reader.stream_position()?;
|
||||||
|
|
||||||
// Parse central header
|
// Parse central header
|
||||||
let block = ZipCentralEntryBlock::parse(reader)?;
|
let block = ZipCentralEntryBlock::parse(reader)?;
|
||||||
let file =
|
|
||||||
central_header_to_zip_file_inner(reader, archive_offset, central_header_start, block)?;
|
let file = central_header_to_zip_file_inner(
|
||||||
|
reader,
|
||||||
|
central_directory.archive_offset,
|
||||||
|
central_header_start,
|
||||||
|
block,
|
||||||
|
)?;
|
||||||
|
|
||||||
let central_header_end = reader.stream_position()?;
|
let central_header_end = reader.stream_position()?;
|
||||||
let data_start = find_data_start(&file, reader)?;
|
|
||||||
if data_start > central_header_start {
|
if file.header_start >= central_directory.directory_start {
|
||||||
return Err(InvalidArchive(
|
return Err(InvalidArchive(
|
||||||
"A file can't start after its central-directory header",
|
"A local file entry can't start after the central directory",
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let data_start = find_data_start(&file, reader)?;
|
||||||
|
|
||||||
|
if data_start > central_directory.directory_start {
|
||||||
|
return Err(InvalidArchive(
|
||||||
|
"File data can't start after the central directory",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
reader.seek(SeekFrom::Start(central_header_end))?;
|
reader.seek(SeekFrom::Start(central_header_end))?;
|
||||||
Ok(file)
|
Ok(file)
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@ pub enum ArchiveOffset {
|
||||||
#[default]
|
#[default]
|
||||||
Detect,
|
Detect,
|
||||||
/// Use the central directory length and offset to determine the start of the archive.
|
/// Use the central directory length and offset to determine the start of the archive.
|
||||||
|
#[deprecated(since = "2.3.0", note = "use `Detect` instead")]
|
||||||
FromCentralDirectory,
|
FromCentralDirectory,
|
||||||
/// Specify a fixed archive offset.
|
/// Specify a fixed archive offset.
|
||||||
Known(u64),
|
Known(u64),
|
||||||
|
|
279
src/read/magic_finder.rs
Normal file
279
src/read/magic_finder.rs
Normal file
|
@ -0,0 +1,279 @@
|
||||||
|
use std::io::{Read, Seek, SeekFrom};
|
||||||
|
|
||||||
|
use memchr::memmem::{Finder, FinderRev};
|
||||||
|
|
||||||
|
use crate::result::ZipResult;
|
||||||
|
|
||||||
|
pub trait FinderDirection<'a> {
|
||||||
|
fn new(needle: &'a [u8]) -> Self;
|
||||||
|
fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64;
|
||||||
|
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize);
|
||||||
|
|
||||||
|
fn needle(&self) -> &[u8];
|
||||||
|
fn find(&self, haystack: &[u8]) -> Option<usize>;
|
||||||
|
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64>;
|
||||||
|
fn move_scope(&self, offset: usize) -> usize;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Forward<'a>(Finder<'a>);
|
||||||
|
impl<'a> FinderDirection<'a> for Forward<'a> {
|
||||||
|
fn new(needle: &'a [u8]) -> Self {
|
||||||
|
Self(Finder::new(needle))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reset_cursor((start_inclusive, _): (u64, u64), _: usize) -> u64 {
|
||||||
|
start_inclusive
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
|
||||||
|
(&window[mid_window_offset..], mid_window_offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find(&self, haystack: &[u8]) -> Option<usize> {
|
||||||
|
self.0.find(haystack)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn needle(&self) -> &[u8] {
|
||||||
|
self.0.needle()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
|
||||||
|
let magic_overlap = self.needle().len().saturating_sub(1) as u64;
|
||||||
|
let next = cursor.saturating_add(window_size as u64 - magic_overlap);
|
||||||
|
|
||||||
|
if next >= bounds.1 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(next)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn move_scope(&self, offset: usize) -> usize {
|
||||||
|
offset + self.needle().len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Backwards<'a>(FinderRev<'a>);
|
||||||
|
impl<'a> FinderDirection<'a> for Backwards<'a> {
|
||||||
|
fn new(needle: &'a [u8]) -> Self {
|
||||||
|
Self(FinderRev::new(needle))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64 {
|
||||||
|
bounds
|
||||||
|
.1
|
||||||
|
.saturating_sub(window_size as u64)
|
||||||
|
.clamp(bounds.0, bounds.1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
|
||||||
|
(&window[..mid_window_offset], 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find(&self, haystack: &[u8]) -> Option<usize> {
|
||||||
|
self.0.rfind(haystack)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn needle(&self) -> &[u8] {
|
||||||
|
self.0.needle()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
|
||||||
|
let magic_overlap = self.needle().len().saturating_sub(1) as u64;
|
||||||
|
|
||||||
|
if cursor <= bounds.0 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(
|
||||||
|
cursor
|
||||||
|
.saturating_add(magic_overlap)
|
||||||
|
.saturating_sub(window_size as u64)
|
||||||
|
.clamp(bounds.0, bounds.1),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn move_scope(&self, offset: usize) -> usize {
|
||||||
|
offset
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A utility for finding magic symbols from the end of a seekable reader.
|
||||||
|
///
|
||||||
|
/// Can be repurposed to recycle the internal buffer.
|
||||||
|
pub struct MagicFinder<Direction> {
|
||||||
|
buffer: Box<[u8]>,
|
||||||
|
pub(self) finder: Direction,
|
||||||
|
cursor: u64,
|
||||||
|
mid_buffer_offset: Option<usize>,
|
||||||
|
bounds: (u64, u64),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T: FinderDirection<'a>> MagicFinder<T> {
|
||||||
|
/// Create a new magic bytes finder to look within specific bounds.
|
||||||
|
pub fn new(magic_bytes: &'a [u8], start_inclusive: u64, end_exclusive: u64) -> Self {
|
||||||
|
const BUFFER_SIZE: usize = 2048;
|
||||||
|
|
||||||
|
// Smaller buffer size would be unable to locate bytes.
|
||||||
|
// Equal buffer size would stall (the window could not be moved).
|
||||||
|
debug_assert!(BUFFER_SIZE >= magic_bytes.len());
|
||||||
|
|
||||||
|
Self {
|
||||||
|
buffer: vec![0; BUFFER_SIZE].into_boxed_slice(),
|
||||||
|
finder: T::new(magic_bytes),
|
||||||
|
cursor: T::reset_cursor((start_inclusive, end_exclusive), BUFFER_SIZE),
|
||||||
|
mid_buffer_offset: None,
|
||||||
|
bounds: (start_inclusive, end_exclusive),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Repurpose the finder for different bytes or bounds.
|
||||||
|
pub fn repurpose(&mut self, magic_bytes: &'a [u8], bounds: (u64, u64)) -> &mut Self {
|
||||||
|
debug_assert!(self.buffer.len() >= magic_bytes.len());
|
||||||
|
|
||||||
|
self.finder = T::new(magic_bytes);
|
||||||
|
self.cursor = T::reset_cursor(bounds, self.buffer.len());
|
||||||
|
self.bounds = bounds;
|
||||||
|
|
||||||
|
// Reset the mid-buffer offset, to invalidate buffer content.
|
||||||
|
self.mid_buffer_offset = None;
|
||||||
|
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find the next magic bytes in the direction specified in the type.
|
||||||
|
pub fn next<R: Read + Seek>(&mut self, reader: &mut R) -> ZipResult<Option<u64>> {
|
||||||
|
loop {
|
||||||
|
if self.cursor < self.bounds.0 || self.cursor >= self.bounds.1 {
|
||||||
|
// The finder is consumed
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Position the window and ensure correct length */
|
||||||
|
let window_start = self.cursor;
|
||||||
|
let window_end = self
|
||||||
|
.cursor
|
||||||
|
.saturating_add(self.buffer.len() as u64)
|
||||||
|
.min(self.bounds.1);
|
||||||
|
|
||||||
|
if window_end <= window_start {
|
||||||
|
// Short-circuit on zero-sized windows to prevent loop
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let window = &mut self.buffer[..(window_end - window_start) as usize];
|
||||||
|
|
||||||
|
if self.mid_buffer_offset.is_none() {
|
||||||
|
reader.seek(SeekFrom::Start(window_start))?;
|
||||||
|
reader.read_exact(window)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let (window, window_start_offset) = match self.mid_buffer_offset {
|
||||||
|
Some(mid_buffer_offset) => T::scope_window(window, mid_buffer_offset),
|
||||||
|
None => (&*window, 0usize),
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(offset) = self.finder.find(window) {
|
||||||
|
let magic_pos = window_start + window_start_offset as u64 + offset as u64;
|
||||||
|
reader.seek(SeekFrom::Start(magic_pos))?;
|
||||||
|
|
||||||
|
self.mid_buffer_offset = Some(self.finder.move_scope(window_start_offset + offset));
|
||||||
|
|
||||||
|
return Ok(Some(magic_pos));
|
||||||
|
}
|
||||||
|
|
||||||
|
self.mid_buffer_offset = None;
|
||||||
|
|
||||||
|
match self
|
||||||
|
.finder
|
||||||
|
.move_cursor(self.cursor, self.bounds, self.buffer.len())
|
||||||
|
{
|
||||||
|
Some(new_cursor) => {
|
||||||
|
self.cursor = new_cursor;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
// Destroy the finder when we've reached the end of the bounds.
|
||||||
|
self.bounds.0 = self.bounds.1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A magic bytes finder with an optimistic guess that is tried before
|
||||||
|
/// the inner finder begins searching from end. This enables much faster
|
||||||
|
/// lookup in files without appended junk, because the magic bytes will be
|
||||||
|
/// found directly.
|
||||||
|
///
|
||||||
|
/// The guess can be marked as mandatory to produce an error. This is useful
|
||||||
|
/// if the ArchiveOffset is known and auto-detection is not desired.
|
||||||
|
pub struct OptimisticMagicFinder<Direction> {
|
||||||
|
inner: MagicFinder<Direction>,
|
||||||
|
initial_guess: Option<(u64, bool)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This is a temporary restriction, to avoid heap allocation in [`Self::next_back`].
|
||||||
|
///
|
||||||
|
/// We only use magic bytes of size 4 at the moment.
|
||||||
|
const STACK_BUFFER_SIZE: usize = 8;
|
||||||
|
|
||||||
|
impl<'a, Direction: FinderDirection<'a>> OptimisticMagicFinder<Direction> {
|
||||||
|
/// Create a new empty optimistic magic bytes finder.
|
||||||
|
pub fn new_empty() -> Self {
|
||||||
|
Self {
|
||||||
|
inner: MagicFinder::new(&[], 0, 0),
|
||||||
|
initial_guess: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Repurpose the finder for different bytes, bounds and initial guesses.
|
||||||
|
pub fn repurpose(
|
||||||
|
&mut self,
|
||||||
|
magic_bytes: &'a [u8],
|
||||||
|
bounds: (u64, u64),
|
||||||
|
initial_guess: Option<(u64, bool)>,
|
||||||
|
) -> &mut Self {
|
||||||
|
debug_assert!(magic_bytes.len() <= STACK_BUFFER_SIZE);
|
||||||
|
|
||||||
|
self.inner.repurpose(magic_bytes, bounds);
|
||||||
|
self.initial_guess = initial_guess;
|
||||||
|
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Equivalent to `next_back`, with an optional initial guess attempted before
|
||||||
|
/// proceeding with reading from the back of the reader.
|
||||||
|
pub fn next<R: Read + Seek>(&mut self, reader: &mut R) -> ZipResult<Option<u64>> {
|
||||||
|
if let Some((v, mandatory)) = self.initial_guess {
|
||||||
|
reader.seek(SeekFrom::Start(v))?;
|
||||||
|
|
||||||
|
let mut buffer = [0; STACK_BUFFER_SIZE];
|
||||||
|
let buffer = &mut buffer[..self.inner.finder.needle().len()];
|
||||||
|
|
||||||
|
// Attempt to match only if there's enough space for the needle
|
||||||
|
if v.saturating_add(buffer.len() as u64) <= self.inner.bounds.1 {
|
||||||
|
reader.read_exact(buffer)?;
|
||||||
|
|
||||||
|
// If a match is found, yield it.
|
||||||
|
if self.inner.finder.needle() == buffer {
|
||||||
|
self.initial_guess.take();
|
||||||
|
reader.seek(SeekFrom::Start(v))?;
|
||||||
|
return Ok(Some(v));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If a match is not found, but the initial guess was mandatory, return an error.
|
||||||
|
if mandatory {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the initial guess was not mandatory, remove it, as it was not found.
|
||||||
|
self.initial_guess.take();
|
||||||
|
}
|
||||||
|
|
||||||
|
self.inner.next(reader)
|
||||||
|
}
|
||||||
|
}
|
510
src/spec.rs
510
src/spec.rs
|
@ -1,11 +1,11 @@
|
||||||
#![macro_use]
|
#![macro_use]
|
||||||
|
|
||||||
|
use crate::read::magic_finder::{Backwards, Forward, MagicFinder, OptimisticMagicFinder};
|
||||||
|
use crate::read::ArchiveOffset;
|
||||||
use crate::result::{ZipError, ZipResult};
|
use crate::result::{ZipError, ZipResult};
|
||||||
use core::mem;
|
use core::mem;
|
||||||
use memchr::memmem::FinderRev;
|
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
use std::rc::Rc;
|
|
||||||
use std::slice;
|
use std::slice;
|
||||||
|
|
||||||
/// "Magic" header values used in the zip spec to locate metadata records.
|
/// "Magic" header values used in the zip spec to locate metadata records.
|
||||||
|
@ -22,6 +22,7 @@ impl Magic {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
|
#[allow(dead_code)]
|
||||||
pub const fn from_le_bytes(bytes: [u8; 4]) -> Self {
|
pub const fn from_le_bytes(bytes: [u8; 4]) -> Self {
|
||||||
Self(u32::from_le_bytes(bytes))
|
Self(u32::from_le_bytes(bytes))
|
||||||
}
|
}
|
||||||
|
@ -289,7 +290,7 @@ pub(crate) struct Zip32CentralDirectoryEnd {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Zip32CentralDirectoryEnd {
|
impl Zip32CentralDirectoryEnd {
|
||||||
fn block_and_comment(self) -> ZipResult<(Zip32CDEBlock, Box<[u8]>)> {
|
fn into_block_and_comment(self) -> (Zip32CDEBlock, Box<[u8]>) {
|
||||||
let Self {
|
let Self {
|
||||||
disk_number,
|
disk_number,
|
||||||
disk_with_central_directory,
|
disk_with_central_directory,
|
||||||
|
@ -307,12 +308,10 @@ impl Zip32CentralDirectoryEnd {
|
||||||
number_of_files,
|
number_of_files,
|
||||||
central_directory_size,
|
central_directory_size,
|
||||||
central_directory_offset,
|
central_directory_offset,
|
||||||
zip_file_comment_length: zip_file_comment
|
zip_file_comment_length: zip_file_comment.len() as u16,
|
||||||
.len()
|
|
||||||
.try_into()
|
|
||||||
.map_err(|_| ZipError::InvalidArchive("File comment must be less than 64 KiB"))?,
|
|
||||||
};
|
};
|
||||||
Ok((block, zip_file_comment))
|
|
||||||
|
(block, zip_file_comment)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip32CentralDirectoryEnd> {
|
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip32CentralDirectoryEnd> {
|
||||||
|
@ -329,7 +328,15 @@ impl Zip32CentralDirectoryEnd {
|
||||||
} = Zip32CDEBlock::parse(reader)?;
|
} = Zip32CDEBlock::parse(reader)?;
|
||||||
|
|
||||||
let mut zip_file_comment = vec![0u8; zip_file_comment_length as usize].into_boxed_slice();
|
let mut zip_file_comment = vec![0u8; zip_file_comment_length as usize].into_boxed_slice();
|
||||||
reader.read_exact(&mut zip_file_comment)?;
|
if let Err(e) = reader.read_exact(&mut zip_file_comment) {
|
||||||
|
if e.kind() == io::ErrorKind::UnexpectedEof {
|
||||||
|
return Err(ZipError::InvalidArchive(
|
||||||
|
"EOCD comment exceeds file boundary",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Err(e.into());
|
||||||
|
}
|
||||||
|
|
||||||
Ok(Zip32CentralDirectoryEnd {
|
Ok(Zip32CentralDirectoryEnd {
|
||||||
disk_number,
|
disk_number,
|
||||||
|
@ -342,99 +349,23 @@ impl Zip32CentralDirectoryEnd {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::type_complexity)]
|
|
||||||
pub fn find_and_parse<T: Read + Seek>(
|
|
||||||
reader: &mut T,
|
|
||||||
) -> ZipResult<Box<[(Rc<Zip32CentralDirectoryEnd>, u64)]>> {
|
|
||||||
let mut results = vec![];
|
|
||||||
let file_length = reader.seek(io::SeekFrom::End(0))?;
|
|
||||||
|
|
||||||
if file_length < mem::size_of::<Zip32CDEBlock>() as u64 {
|
|
||||||
return Err(ZipError::InvalidArchive("Invalid zip header"));
|
|
||||||
}
|
|
||||||
|
|
||||||
// The End Of Central Directory Record should be the last thing in
|
|
||||||
// the file and so searching the last 65557 bytes of the file should
|
|
||||||
// be enough. However, not all zips are well-formed and other
|
|
||||||
// programs may consume zips with extra junk at the end without
|
|
||||||
// error, so we go back 128K to be compatible with them. 128K is
|
|
||||||
// arbitrary, but it matches what Info-Zip does.
|
|
||||||
const EOCDR_SEARCH_SIZE: u64 = 128 * 1024;
|
|
||||||
let search_lower_bound = file_length.saturating_sub(EOCDR_SEARCH_SIZE);
|
|
||||||
|
|
||||||
const END_WINDOW_SIZE: usize = 8192;
|
|
||||||
/* TODO: use static_assertions!() */
|
|
||||||
debug_assert!(END_WINDOW_SIZE > mem::size_of::<Magic>());
|
|
||||||
|
|
||||||
const SIG_BYTES: [u8; mem::size_of::<Magic>()] =
|
|
||||||
Magic::CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
|
|
||||||
let finder = FinderRev::new(&SIG_BYTES);
|
|
||||||
|
|
||||||
let mut window_start: u64 = file_length.saturating_sub(END_WINDOW_SIZE as u64);
|
|
||||||
let mut window = [0u8; END_WINDOW_SIZE];
|
|
||||||
while window_start >= search_lower_bound {
|
|
||||||
/* Go to the start of the window in the file. */
|
|
||||||
reader.seek(io::SeekFrom::Start(window_start))?;
|
|
||||||
|
|
||||||
/* Identify how many bytes to read (this may be less than the window size for files
|
|
||||||
* smaller than END_WINDOW_SIZE). */
|
|
||||||
let end = (window_start + END_WINDOW_SIZE as u64).min(file_length);
|
|
||||||
let cur_len = (end - window_start) as usize;
|
|
||||||
debug_assert!(cur_len > 0);
|
|
||||||
debug_assert!(cur_len <= END_WINDOW_SIZE);
|
|
||||||
let cur_window: &mut [u8] = &mut window[..cur_len];
|
|
||||||
/* Read the window into the bytes! */
|
|
||||||
reader.read_exact(cur_window)?;
|
|
||||||
|
|
||||||
/* Find instances of the magic signature. */
|
|
||||||
for offset in finder.rfind_iter(cur_window) {
|
|
||||||
let cde_start_pos = window_start + offset as u64;
|
|
||||||
reader.seek(io::SeekFrom::Start(cde_start_pos))?;
|
|
||||||
/* Drop any headers that don't parse. */
|
|
||||||
if let Ok(cde) = Self::parse(reader) {
|
|
||||||
results.push((Rc::new(cde), cde_start_pos));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We always want to make sure we go allllll the way back to the start of the file if
|
|
||||||
* we can't find it elsewhere. However, our `while` condition doesn't check that. So we
|
|
||||||
* avoid infinite looping by checking at the end of the loop. */
|
|
||||||
if window_start == search_lower_bound {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that
|
|
||||||
* overlap our nice neat window boundaries! */
|
|
||||||
window_start = (window_start
|
|
||||||
/* NB: To catch matches across window boundaries, we need to make our blocks overlap
|
|
||||||
* by the width of the pattern to match. */
|
|
||||||
+ mem::size_of::<Magic>() as u64)
|
|
||||||
/* This should never happen, but make sure we don't go past the end of the file. */
|
|
||||||
.min(file_length);
|
|
||||||
window_start = window_start
|
|
||||||
.saturating_sub(
|
|
||||||
/* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at
|
|
||||||
* once (unless limited by file_length). */
|
|
||||||
END_WINDOW_SIZE as u64,
|
|
||||||
)
|
|
||||||
/* This will never go below the value of `search_lower_bound`, so we have a special
|
|
||||||
* `if window_start == search_lower_bound` check above. */
|
|
||||||
.max(search_lower_bound);
|
|
||||||
}
|
|
||||||
if results.is_empty() {
|
|
||||||
Err(ZipError::InvalidArchive(
|
|
||||||
"Could not find central directory end",
|
|
||||||
))
|
|
||||||
} else {
|
|
||||||
Ok(results.into_boxed_slice())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
|
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
|
||||||
let (block, comment) = self.block_and_comment()?;
|
let (block, comment) = self.into_block_and_comment();
|
||||||
|
|
||||||
|
if comment.len() > u16::MAX as usize {
|
||||||
|
return Err(ZipError::InvalidArchive(
|
||||||
|
"EOCD comment length exceeds u16::MAX",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
block.write(writer)?;
|
block.write(writer)?;
|
||||||
writer.write_all(&comment)?;
|
writer.write_all(&comment)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn may_be_zip64(&self) -> bool {
|
||||||
|
self.number_of_files == u16::MAX || self.central_directory_offset == u32::MAX
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
|
@ -551,6 +482,7 @@ impl FixedSizeBlock for Zip64CDEBlock {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct Zip64CentralDirectoryEnd {
|
pub(crate) struct Zip64CentralDirectoryEnd {
|
||||||
|
pub record_size: u64,
|
||||||
pub version_made_by: u16,
|
pub version_made_by: u16,
|
||||||
pub version_needed_to_extract: u16,
|
pub version_needed_to_extract: u16,
|
||||||
pub disk_number: u32,
|
pub disk_number: u32,
|
||||||
|
@ -559,13 +491,13 @@ pub(crate) struct Zip64CentralDirectoryEnd {
|
||||||
pub number_of_files: u64,
|
pub number_of_files: u64,
|
||||||
pub central_directory_size: u64,
|
pub central_directory_size: u64,
|
||||||
pub central_directory_offset: u64,
|
pub central_directory_offset: u64,
|
||||||
//pub extensible_data_sector: Vec<u8>, <-- We don't do anything with this at the moment.
|
pub extensible_data_sector: Box<[u8]>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Zip64CentralDirectoryEnd {
|
impl Zip64CentralDirectoryEnd {
|
||||||
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip64CentralDirectoryEnd> {
|
pub fn parse<T: Read>(reader: &mut T, max_size: u64) -> ZipResult<Zip64CentralDirectoryEnd> {
|
||||||
let Zip64CDEBlock {
|
let Zip64CDEBlock {
|
||||||
// record_size,
|
record_size,
|
||||||
version_made_by,
|
version_made_by,
|
||||||
version_needed_to_extract,
|
version_needed_to_extract,
|
||||||
disk_number,
|
disk_number,
|
||||||
|
@ -576,7 +508,20 @@ impl Zip64CentralDirectoryEnd {
|
||||||
central_directory_offset,
|
central_directory_offset,
|
||||||
..
|
..
|
||||||
} = Zip64CDEBlock::parse(reader)?;
|
} = Zip64CDEBlock::parse(reader)?;
|
||||||
|
|
||||||
|
if record_size < 44 {
|
||||||
|
return Err(ZipError::InvalidArchive("Low EOCD64 record size"));
|
||||||
|
} else if record_size.saturating_add(12) > max_size {
|
||||||
|
return Err(ZipError::InvalidArchive(
|
||||||
|
"EOCD64 extends beyond EOCD64 locator",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut zip_file_comment = vec![0u8; record_size as usize - 44].into_boxed_slice();
|
||||||
|
reader.read_exact(&mut zip_file_comment)?;
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
|
record_size,
|
||||||
version_made_by,
|
version_made_by,
|
||||||
version_needed_to_extract,
|
version_needed_to_extract,
|
||||||
disk_number,
|
disk_number,
|
||||||
|
@ -585,94 +530,13 @@ impl Zip64CentralDirectoryEnd {
|
||||||
number_of_files,
|
number_of_files,
|
||||||
central_directory_size,
|
central_directory_size,
|
||||||
central_directory_offset,
|
central_directory_offset,
|
||||||
|
extensible_data_sector: zip_file_comment,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn find_and_parse<T: Read + Seek>(
|
pub fn into_block_and_comment(self) -> (Zip64CDEBlock, Box<[u8]>) {
|
||||||
reader: &mut T,
|
|
||||||
search_lower_bound: u64,
|
|
||||||
search_upper_bound: u64,
|
|
||||||
) -> ZipResult<Vec<(Zip64CentralDirectoryEnd, u64)>> {
|
|
||||||
let mut results = Vec::new();
|
|
||||||
|
|
||||||
const END_WINDOW_SIZE: usize = 2048;
|
|
||||||
/* TODO: use static_assertions!() */
|
|
||||||
debug_assert!(END_WINDOW_SIZE > mem::size_of::<Magic>());
|
|
||||||
|
|
||||||
const SIG_BYTES: [u8; mem::size_of::<Magic>()] =
|
|
||||||
Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
|
|
||||||
let finder = FinderRev::new(&SIG_BYTES);
|
|
||||||
|
|
||||||
let mut window_start: u64 = search_upper_bound
|
|
||||||
.saturating_sub(END_WINDOW_SIZE as u64)
|
|
||||||
.max(search_lower_bound);
|
|
||||||
let mut window = [0u8; END_WINDOW_SIZE];
|
|
||||||
while window_start >= search_lower_bound {
|
|
||||||
reader.seek(io::SeekFrom::Start(window_start))?;
|
|
||||||
|
|
||||||
/* Identify how many bytes to read (this may be less than the window size for files
|
|
||||||
* smaller than END_WINDOW_SIZE). */
|
|
||||||
let end = (window_start + END_WINDOW_SIZE as u64).min(search_upper_bound);
|
|
||||||
|
|
||||||
debug_assert!(end >= window_start);
|
|
||||||
let cur_len = (end - window_start) as usize;
|
|
||||||
if cur_len == 0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
debug_assert!(cur_len <= END_WINDOW_SIZE);
|
|
||||||
let cur_window: &mut [u8] = &mut window[..cur_len];
|
|
||||||
/* Read the window into the bytes! */
|
|
||||||
reader.read_exact(cur_window)?;
|
|
||||||
|
|
||||||
/* Find instances of the magic signature. */
|
|
||||||
for offset in finder.rfind_iter(cur_window) {
|
|
||||||
let cde_start_pos = window_start + offset as u64;
|
|
||||||
reader.seek(io::SeekFrom::Start(cde_start_pos))?;
|
|
||||||
|
|
||||||
debug_assert!(cde_start_pos >= search_lower_bound);
|
|
||||||
let archive_offset = cde_start_pos - search_lower_bound;
|
|
||||||
let cde = Self::parse(reader)?;
|
|
||||||
|
|
||||||
results.push((cde, archive_offset));
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We always want to make sure we go allllll the way back to the start of the file if
|
|
||||||
* we can't find it elsewhere. However, our `while` condition doesn't check that. So we
|
|
||||||
* avoid infinite looping by checking at the end of the loop. */
|
|
||||||
if window_start == search_lower_bound {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that
|
|
||||||
* overlap our nice neat window boundaries! */
|
|
||||||
window_start = (window_start
|
|
||||||
/* NB: To catch matches across window boundaries, we need to make our blocks overlap
|
|
||||||
* by the width of the pattern to match. */
|
|
||||||
+ mem::size_of::<Magic>() as u64)
|
|
||||||
/* This may never happen, but make sure we don't go past the end of the specified
|
|
||||||
* range. */
|
|
||||||
.min(search_upper_bound);
|
|
||||||
window_start = window_start
|
|
||||||
.saturating_sub(
|
|
||||||
/* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at
|
|
||||||
* once (unless limited by search_upper_bound). */
|
|
||||||
END_WINDOW_SIZE as u64,
|
|
||||||
)
|
|
||||||
/* This will never go below the value of `search_lower_bound`, so we have a special
|
|
||||||
* `if window_start == search_lower_bound` check above. */
|
|
||||||
.max(search_lower_bound);
|
|
||||||
}
|
|
||||||
|
|
||||||
if results.is_empty() {
|
|
||||||
Err(ZipError::InvalidArchive(
|
|
||||||
"Could not find ZIP64 central directory end",
|
|
||||||
))
|
|
||||||
} else {
|
|
||||||
Ok(results)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn block(self) -> Zip64CDEBlock {
|
|
||||||
let Self {
|
let Self {
|
||||||
|
record_size,
|
||||||
version_made_by,
|
version_made_by,
|
||||||
version_needed_to_extract,
|
version_needed_to_extract,
|
||||||
disk_number,
|
disk_number,
|
||||||
|
@ -681,27 +545,277 @@ impl Zip64CentralDirectoryEnd {
|
||||||
number_of_files,
|
number_of_files,
|
||||||
central_directory_size,
|
central_directory_size,
|
||||||
central_directory_offset,
|
central_directory_offset,
|
||||||
|
extensible_data_sector,
|
||||||
} = self;
|
} = self;
|
||||||
Zip64CDEBlock {
|
|
||||||
magic: Zip64CDEBlock::MAGIC,
|
(
|
||||||
/* currently unused */
|
Zip64CDEBlock {
|
||||||
record_size: 44,
|
magic: Zip64CDEBlock::MAGIC,
|
||||||
version_made_by,
|
record_size,
|
||||||
version_needed_to_extract,
|
version_made_by,
|
||||||
disk_number,
|
version_needed_to_extract,
|
||||||
disk_with_central_directory,
|
disk_number,
|
||||||
number_of_files_on_this_disk,
|
disk_with_central_directory,
|
||||||
number_of_files,
|
number_of_files_on_this_disk,
|
||||||
central_directory_size,
|
number_of_files,
|
||||||
central_directory_offset,
|
central_directory_size,
|
||||||
}
|
central_directory_offset,
|
||||||
|
},
|
||||||
|
extensible_data_sector,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
|
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
|
||||||
self.block().write(writer)
|
let (block, comment) = self.into_block_and_comment();
|
||||||
|
block.write(writer)?;
|
||||||
|
writer.write_all(&comment)?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) struct DataAndPosition<T> {
|
||||||
|
pub data: T,
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub position: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> From<(T, u64)> for DataAndPosition<T> {
|
||||||
|
fn from(value: (T, u64)) -> Self {
|
||||||
|
Self {
|
||||||
|
data: value.0,
|
||||||
|
position: value.1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct CentralDirectoryEndInfo {
|
||||||
|
pub eocd: DataAndPosition<Zip32CentralDirectoryEnd>,
|
||||||
|
pub eocd64: Option<DataAndPosition<Zip64CentralDirectoryEnd>>,
|
||||||
|
|
||||||
|
pub archive_offset: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Finds the EOCD and possibly the EOCD64 block and determines the archive offset.
|
||||||
|
///
|
||||||
|
/// In the best case scenario (no prepended junk), this function will not backtrack
|
||||||
|
/// in the reader.
|
||||||
|
pub(crate) fn find_central_directory<R: Read + Seek>(
|
||||||
|
reader: &mut R,
|
||||||
|
archive_offset: ArchiveOffset,
|
||||||
|
end_exclusive: u64,
|
||||||
|
file_len: u64,
|
||||||
|
) -> ZipResult<CentralDirectoryEndInfo> {
|
||||||
|
const EOCD_SIG_BYTES: [u8; mem::size_of::<Magic>()] =
|
||||||
|
Magic::CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
|
||||||
|
|
||||||
|
const EOCD64_SIG_BYTES: [u8; mem::size_of::<Magic>()] =
|
||||||
|
Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
|
||||||
|
|
||||||
|
const CDFH_SIG_BYTES: [u8; mem::size_of::<Magic>()] =
|
||||||
|
Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE.to_le_bytes();
|
||||||
|
|
||||||
|
// Instantiate the mandatory finder
|
||||||
|
let mut eocd_finder = MagicFinder::<Backwards<'static>>::new(&EOCD_SIG_BYTES, 0, end_exclusive);
|
||||||
|
let mut subfinder: Option<OptimisticMagicFinder<Forward<'static>>> = None;
|
||||||
|
|
||||||
|
// Keep the last errors for cases of improper EOCD instances.
|
||||||
|
let mut parsing_error = None;
|
||||||
|
|
||||||
|
while let Some(eocd_offset) = eocd_finder.next(reader)? {
|
||||||
|
// Attempt to parse the EOCD block
|
||||||
|
let eocd = match Zip32CentralDirectoryEnd::parse(reader) {
|
||||||
|
Ok(eocd) => eocd,
|
||||||
|
Err(e) => {
|
||||||
|
if parsing_error.is_none() {
|
||||||
|
parsing_error = Some(e);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// ! Relaxed (inequality) due to garbage-after-comment Python files
|
||||||
|
// Consistency check: the EOCD comment must terminate before the end of file
|
||||||
|
if eocd.zip_file_comment.len() as u64 + eocd_offset + 22 > file_len {
|
||||||
|
parsing_error = Some(ZipError::InvalidArchive("Invalid EOCD comment length"));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let zip64_metadata = if eocd.may_be_zip64() {
|
||||||
|
fn try_read_eocd64_locator(
|
||||||
|
reader: &mut (impl Read + Seek),
|
||||||
|
eocd_offset: u64,
|
||||||
|
) -> ZipResult<(u64, Zip64CentralDirectoryEndLocator)> {
|
||||||
|
if eocd_offset < mem::size_of::<Zip64CDELocatorBlock>() as u64 {
|
||||||
|
return Err(ZipError::InvalidArchive(
|
||||||
|
"EOCD64 Locator does not fit in file",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let locator64_offset = eocd_offset - mem::size_of::<Zip64CDELocatorBlock>() as u64;
|
||||||
|
|
||||||
|
reader.seek(io::SeekFrom::Start(locator64_offset))?;
|
||||||
|
Ok((
|
||||||
|
locator64_offset,
|
||||||
|
Zip64CentralDirectoryEndLocator::parse(reader)?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
try_read_eocd64_locator(reader, eocd_offset).ok()
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let Some((locator64_offset, locator64)) = zip64_metadata else {
|
||||||
|
// Branch out for zip32
|
||||||
|
let relative_cd_offset = eocd.central_directory_offset as u64;
|
||||||
|
|
||||||
|
// If the archive is empty, there is nothing more to be checked, the archive is correct.
|
||||||
|
if eocd.number_of_files == 0 {
|
||||||
|
return Ok(CentralDirectoryEndInfo {
|
||||||
|
eocd: (eocd, eocd_offset).into(),
|
||||||
|
eocd64: None,
|
||||||
|
archive_offset: eocd_offset.saturating_sub(relative_cd_offset),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consistency check: the CD relative offset cannot be after the EOCD
|
||||||
|
if relative_cd_offset >= eocd_offset {
|
||||||
|
parsing_error = Some(ZipError::InvalidArchive("Invalid CDFH offset in EOCD"));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to find the first CDFH
|
||||||
|
let subfinder = subfinder
|
||||||
|
.get_or_insert_with(OptimisticMagicFinder::new_empty)
|
||||||
|
.repurpose(
|
||||||
|
&CDFH_SIG_BYTES,
|
||||||
|
// The CDFH must be before the EOCD and after the relative offset,
|
||||||
|
// because prepended junk can only move it forward.
|
||||||
|
(relative_cd_offset, eocd_offset),
|
||||||
|
match archive_offset {
|
||||||
|
ArchiveOffset::Known(n) => {
|
||||||
|
Some((relative_cd_offset.saturating_add(n).min(eocd_offset), true))
|
||||||
|
}
|
||||||
|
_ => Some((relative_cd_offset, false)),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// Consistency check: find the first CDFH
|
||||||
|
if let Some(cd_offset) = subfinder.next(reader)? {
|
||||||
|
// The first CDFH will define the archive offset
|
||||||
|
let archive_offset = cd_offset - relative_cd_offset;
|
||||||
|
|
||||||
|
return Ok(CentralDirectoryEndInfo {
|
||||||
|
eocd: (eocd, eocd_offset).into(),
|
||||||
|
eocd64: None,
|
||||||
|
archive_offset,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
parsing_error = Some(ZipError::InvalidArchive("No CDFH found"));
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Consistency check: the EOCD64 offset must be before EOCD64 Locator offset */
|
||||||
|
if locator64.end_of_central_directory_offset >= locator64_offset {
|
||||||
|
parsing_error = Some(ZipError::InvalidArchive("Invalid EOCD64 Locator CD offset"));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if locator64.number_of_disks > 1 {
|
||||||
|
parsing_error = Some(ZipError::InvalidArchive(
|
||||||
|
"Multi-disk ZIP files are not supported",
|
||||||
|
));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This was hidden inside a function to collect errors in a single place.
|
||||||
|
// Once try blocks are stabilized, this can go away.
|
||||||
|
fn try_read_eocd64<R: Read + Seek>(
|
||||||
|
reader: &mut R,
|
||||||
|
locator64: &Zip64CentralDirectoryEndLocator,
|
||||||
|
expected_length: u64,
|
||||||
|
) -> ZipResult<Zip64CentralDirectoryEnd> {
|
||||||
|
let z64 = Zip64CentralDirectoryEnd::parse(reader, expected_length)?;
|
||||||
|
|
||||||
|
// Consistency check: EOCD64 locator should agree with the EOCD64
|
||||||
|
if z64.disk_with_central_directory != locator64.disk_with_central_directory {
|
||||||
|
return Err(ZipError::InvalidArchive(
|
||||||
|
"Invalid EOCD64: inconsistency with Locator data",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consistency check: the EOCD64 must have the expected length
|
||||||
|
if z64.record_size + 12 != expected_length {
|
||||||
|
return Err(ZipError::InvalidArchive(
|
||||||
|
"Invalid EOCD64: inconsistent length",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(z64)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempt to find the EOCD64 with an initial guess
|
||||||
|
let subfinder = subfinder
|
||||||
|
.get_or_insert_with(OptimisticMagicFinder::new_empty)
|
||||||
|
.repurpose(
|
||||||
|
&EOCD64_SIG_BYTES,
|
||||||
|
(locator64.end_of_central_directory_offset, locator64_offset),
|
||||||
|
match archive_offset {
|
||||||
|
ArchiveOffset::Known(n) => Some((
|
||||||
|
locator64
|
||||||
|
.end_of_central_directory_offset
|
||||||
|
.saturating_add(n)
|
||||||
|
.min(locator64_offset),
|
||||||
|
true,
|
||||||
|
)),
|
||||||
|
_ => Some((locator64.end_of_central_directory_offset, false)),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// Consistency check: Find the EOCD64
|
||||||
|
let mut local_error = None;
|
||||||
|
while let Some(eocd64_offset) = subfinder.next(reader)? {
|
||||||
|
let archive_offset = eocd64_offset - locator64.end_of_central_directory_offset;
|
||||||
|
|
||||||
|
match try_read_eocd64(
|
||||||
|
reader,
|
||||||
|
&locator64,
|
||||||
|
locator64_offset.saturating_sub(eocd64_offset),
|
||||||
|
) {
|
||||||
|
Ok(eocd64) => {
|
||||||
|
if eocd64_offset
|
||||||
|
< eocd64
|
||||||
|
.number_of_files
|
||||||
|
.saturating_mul(
|
||||||
|
mem::size_of::<crate::types::ZipCentralEntryBlock>() as u64
|
||||||
|
)
|
||||||
|
.saturating_add(eocd64.central_directory_offset)
|
||||||
|
{
|
||||||
|
local_error = Some(ZipError::InvalidArchive(
|
||||||
|
"Invalid EOCD64: inconsistent number of files",
|
||||||
|
));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Ok(CentralDirectoryEndInfo {
|
||||||
|
eocd: (eocd, eocd_offset).into(),
|
||||||
|
eocd64: Some((eocd64, eocd64_offset).into()),
|
||||||
|
archive_offset,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
local_error = Some(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
parsing_error = local_error.or(Some(ZipError::InvalidArchive("Could not find EOCD64")));
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(parsing_error.unwrap_or(ZipError::InvalidArchive("Could not find EOCD")))
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn is_dir(filename: &str) -> bool {
|
pub(crate) fn is_dir(filename: &str) -> bool {
|
||||||
filename
|
filename
|
||||||
.chars()
|
.chars()
|
||||||
|
|
73
src/write.rs
73
src/write.rs
|
@ -160,6 +160,7 @@ pub(crate) mod zip_writer {
|
||||||
pub(super) writing_to_file: bool,
|
pub(super) writing_to_file: bool,
|
||||||
pub(super) writing_raw: bool,
|
pub(super) writing_raw: bool,
|
||||||
pub(super) comment: Box<[u8]>,
|
pub(super) comment: Box<[u8]>,
|
||||||
|
pub(super) zip64_comment: Option<Box<[u8]>>,
|
||||||
pub(super) flush_on_finish_file: bool,
|
pub(super) flush_on_finish_file: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -628,19 +629,19 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
|
||||||
/// This uses the given read configuration to initially read the archive.
|
/// This uses the given read configuration to initially read the archive.
|
||||||
pub fn new_append_with_config(config: Config, mut readwriter: A) -> ZipResult<ZipWriter<A>> {
|
pub fn new_append_with_config(config: Config, mut readwriter: A) -> ZipResult<ZipWriter<A>> {
|
||||||
readwriter.seek(SeekFrom::Start(0))?;
|
readwriter.seek(SeekFrom::Start(0))?;
|
||||||
if let Ok((footer, shared)) = ZipArchive::get_metadata(config, &mut readwriter) {
|
|
||||||
Ok(ZipWriter {
|
let shared = ZipArchive::get_metadata(config, &mut readwriter)?;
|
||||||
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),
|
|
||||||
files: shared.files,
|
Ok(ZipWriter {
|
||||||
stats: Default::default(),
|
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),
|
||||||
writing_to_file: false,
|
files: shared.files,
|
||||||
comment: footer.zip_file_comment,
|
stats: Default::default(),
|
||||||
writing_raw: true, // avoid recomputing the last file's header
|
writing_to_file: false,
|
||||||
flush_on_finish_file: false,
|
comment: shared.comment,
|
||||||
})
|
zip64_comment: shared.zip64_comment,
|
||||||
} else {
|
writing_raw: true, // avoid recomputing the last file's header
|
||||||
Err(InvalidArchive("No central-directory end header found"))
|
flush_on_finish_file: false,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `flush_on_finish_file` is designed to support a streaming `inner` that may unload flushed
|
/// `flush_on_finish_file` is designed to support a streaming `inner` that may unload flushed
|
||||||
|
@ -774,8 +775,11 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
|
||||||
let central_start = self.finalize()?;
|
let central_start = self.finalize()?;
|
||||||
let inner = mem::replace(&mut self.inner, Closed).unwrap();
|
let inner = mem::replace(&mut self.inner, Closed).unwrap();
|
||||||
let comment = mem::take(&mut self.comment);
|
let comment = mem::take(&mut self.comment);
|
||||||
|
let zip64_comment = mem::take(&mut self.zip64_comment);
|
||||||
let files = mem::take(&mut self.files);
|
let files = mem::take(&mut self.files);
|
||||||
let archive = ZipArchive::from_finalized_writer(files, comment, inner, central_start)?;
|
|
||||||
|
let archive =
|
||||||
|
ZipArchive::from_finalized_writer(files, comment, zip64_comment, inner, central_start)?;
|
||||||
Ok(archive)
|
Ok(archive)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -794,6 +798,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
writing_to_file: false,
|
writing_to_file: false,
|
||||||
writing_raw: false,
|
writing_raw: false,
|
||||||
comment: Box::new([]),
|
comment: Box::new([]),
|
||||||
|
zip64_comment: None,
|
||||||
flush_on_finish_file: false,
|
flush_on_finish_file: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -832,6 +837,35 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
&self.comment
|
&self.comment
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set ZIP64 archive comment.
|
||||||
|
pub fn set_zip64_comment<S>(&mut self, comment: Option<S>)
|
||||||
|
where
|
||||||
|
S: Into<Box<str>>,
|
||||||
|
{
|
||||||
|
self.set_raw_zip64_comment(comment.map(|v| v.into().into_boxed_bytes()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set ZIP64 archive comment.
|
||||||
|
///
|
||||||
|
/// This sets the raw bytes of the comment. The comment
|
||||||
|
/// is typically expected to be encoded in UTF-8.
|
||||||
|
pub fn set_raw_zip64_comment(&mut self, comment: Option<Box<[u8]>>) {
|
||||||
|
self.zip64_comment = comment;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get ZIP64 archive comment.
|
||||||
|
pub fn get_zip64_comment(&mut self) -> Option<Result<&str, Utf8Error>> {
|
||||||
|
self.get_raw_zip64_comment().map(from_utf8)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get ZIP archive comment.
|
||||||
|
///
|
||||||
|
/// This returns the raw bytes of the comment. The comment
|
||||||
|
/// is typically expected to be encoded in UTF-8.
|
||||||
|
pub fn get_raw_zip64_comment(&self) -> Option<&[u8]> {
|
||||||
|
self.zip64_comment.as_deref()
|
||||||
|
}
|
||||||
|
|
||||||
/// Set the file length and crc32 manually.
|
/// Set the file length and crc32 manually.
|
||||||
///
|
///
|
||||||
/// # Safety
|
/// # Safety
|
||||||
|
@ -1516,11 +1550,15 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
version_needed = version_needed.max(file.version_needed());
|
version_needed = version_needed.max(file.version_needed());
|
||||||
}
|
}
|
||||||
let central_size = writer.stream_position()? - central_start;
|
let central_size = writer.stream_position()? - central_start;
|
||||||
|
let is64 = self.files.len() > spec::ZIP64_ENTRY_THR
|
||||||
if self.files.len() > spec::ZIP64_ENTRY_THR
|
|
||||||
|| central_size.max(central_start) > spec::ZIP64_BYTES_THR
|
|| central_size.max(central_start) > spec::ZIP64_BYTES_THR
|
||||||
{
|
|| self.zip64_comment.is_some();
|
||||||
|
|
||||||
|
if is64 {
|
||||||
|
let comment = self.zip64_comment.clone().unwrap_or_default();
|
||||||
|
|
||||||
let zip64_footer = spec::Zip64CentralDirectoryEnd {
|
let zip64_footer = spec::Zip64CentralDirectoryEnd {
|
||||||
|
record_size: comment.len() as u64 + 44,
|
||||||
version_made_by: version_needed,
|
version_made_by: version_needed,
|
||||||
version_needed_to_extract: version_needed,
|
version_needed_to_extract: version_needed,
|
||||||
disk_number: 0,
|
disk_number: 0,
|
||||||
|
@ -1529,6 +1567,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
number_of_files: self.files.len() as u64,
|
number_of_files: self.files.len() as u64,
|
||||||
central_directory_size: central_size,
|
central_directory_size: central_size,
|
||||||
central_directory_offset: central_start,
|
central_directory_offset: central_start,
|
||||||
|
extensible_data_sector: comment,
|
||||||
};
|
};
|
||||||
|
|
||||||
zip64_footer.write(writer)?;
|
zip64_footer.write(writer)?;
|
||||||
|
|
24
tests/prepended_garbage.rs
Normal file
24
tests/prepended_garbage.rs
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
use std::io::Cursor;
|
||||||
|
use zip::ZipArchive;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_prepended_garbage() {
|
||||||
|
let mut v = vec![0, 1, 2, 3];
|
||||||
|
v.extend_from_slice(include_bytes!("../tests/data/extended_timestamp.zip"));
|
||||||
|
|
||||||
|
let mut archive = ZipArchive::new(Cursor::new(v)).expect("couldn't open test zip file");
|
||||||
|
|
||||||
|
assert_eq!(2, archive.len());
|
||||||
|
|
||||||
|
for file_idx in 0..archive.len() {
|
||||||
|
let file = archive.by_index(file_idx).unwrap();
|
||||||
|
let outpath = file.enclosed_name().unwrap();
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"Entry {} has name \"{}\" ({} bytes)",
|
||||||
|
file_idx,
|
||||||
|
outpath.display(),
|
||||||
|
file.size()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue