Merge pull request #93 from cosmicexplorer/bulk-parsing
perf: parse headers in blocks and scan for magic numbers with memchr
This commit is contained in:
commit
b057d0dca2
10 changed files with 1382 additions and 518 deletions
|
@ -35,6 +35,7 @@ displaydoc = { version = "0.2.4", default-features = false }
|
||||||
flate2 = { version = "1.0.28", default-features = false, optional = true }
|
flate2 = { version = "1.0.28", default-features = false, optional = true }
|
||||||
indexmap = "2"
|
indexmap = "2"
|
||||||
hmac = { version = "0.12.1", optional = true, features = ["reset"] }
|
hmac = { version = "0.12.1", optional = true, features = ["reset"] }
|
||||||
|
memchr = "2.7.2"
|
||||||
pbkdf2 = { version = "0.12.2", optional = true }
|
pbkdf2 = { version = "0.12.2", optional = true }
|
||||||
rand = { version = "0.8.5", optional = true }
|
rand = { version = "0.8.5", optional = true }
|
||||||
sha1 = { version = "0.10.6", optional = true }
|
sha1 = { version = "0.10.6", optional = true }
|
||||||
|
@ -56,7 +57,7 @@ arbitrary = { version = "1.3.2", features = ["derive"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
bencher = "0.1.5"
|
bencher = "0.1.5"
|
||||||
getrandom = { version = "0.2.14", features = ["js"] }
|
getrandom = { version = "0.2.14", features = ["js", "std"] }
|
||||||
walkdir = "2.5.0"
|
walkdir = "2.5.0"
|
||||||
time = { workspace = true, features = ["formatting", "macros"] }
|
time = { workspace = true, features = ["formatting", "macros"] }
|
||||||
anyhow = "1"
|
anyhow = "1"
|
||||||
|
|
|
@ -1,38 +1,126 @@
|
||||||
use bencher::{benchmark_group, benchmark_main};
|
use bencher::{benchmark_group, benchmark_main};
|
||||||
|
|
||||||
use std::io::{Cursor, Write};
|
use std::fs;
|
||||||
|
use std::io::{self, prelude::*, Cursor};
|
||||||
|
|
||||||
use bencher::Bencher;
|
use bencher::Bencher;
|
||||||
|
use getrandom::getrandom;
|
||||||
|
use tempdir::TempDir;
|
||||||
use zip::write::SimpleFileOptions;
|
use zip::write::SimpleFileOptions;
|
||||||
use zip::{CompressionMethod, ZipArchive, ZipWriter};
|
use zip::{result::ZipResult, CompressionMethod, ZipArchive, ZipWriter};
|
||||||
|
|
||||||
const FILE_COUNT: usize = 15_000;
|
const FILE_COUNT: usize = 15_000;
|
||||||
const FILE_SIZE: usize = 1024;
|
const FILE_SIZE: usize = 1024;
|
||||||
|
|
||||||
fn generate_random_archive(count_files: usize, file_size: usize) -> Vec<u8> {
|
fn generate_random_archive(count_files: usize, file_size: usize) -> ZipResult<Vec<u8>> {
|
||||||
let data = Vec::new();
|
let data = Vec::new();
|
||||||
let mut writer = ZipWriter::new(Cursor::new(data));
|
let mut writer = ZipWriter::new(Cursor::new(data));
|
||||||
let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored);
|
let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored);
|
||||||
|
|
||||||
let bytes = vec![0u8; file_size];
|
let mut bytes = vec![0u8; file_size];
|
||||||
|
|
||||||
for i in 0..count_files {
|
for i in 0..count_files {
|
||||||
let name = format!("file_deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef_{i}.dat");
|
let name = format!("file_deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef_{i}.dat");
|
||||||
writer.start_file(name, options).unwrap();
|
writer.start_file(name, options)?;
|
||||||
writer.write_all(&bytes).unwrap();
|
getrandom(&mut bytes).map_err(io::Error::from)?;
|
||||||
|
writer.write_all(&bytes)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
writer.finish().unwrap().into_inner()
|
Ok(writer.finish()?.into_inner())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_metadata(bench: &mut Bencher) {
|
fn read_metadata(bench: &mut Bencher) {
|
||||||
let bytes = generate_random_archive(FILE_COUNT, FILE_SIZE);
|
let bytes = generate_random_archive(FILE_COUNT, FILE_SIZE).unwrap();
|
||||||
|
|
||||||
bench.iter(|| {
|
bench.iter(|| {
|
||||||
let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap();
|
let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap();
|
||||||
archive.len()
|
archive.len()
|
||||||
});
|
});
|
||||||
|
bench.bytes = bytes.len() as u64;
|
||||||
}
|
}
|
||||||
|
|
||||||
benchmark_group!(benches, read_metadata);
|
const COMMENT_SIZE: usize = 50_000;
|
||||||
|
|
||||||
|
fn generate_zip32_archive_with_random_comment(comment_length: usize) -> ZipResult<Vec<u8>> {
|
||||||
|
let data = Vec::new();
|
||||||
|
let mut writer = ZipWriter::new(Cursor::new(data));
|
||||||
|
let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored);
|
||||||
|
|
||||||
|
let mut bytes = vec![0u8; comment_length];
|
||||||
|
getrandom(&mut bytes).unwrap();
|
||||||
|
writer.set_raw_comment(bytes.into_boxed_slice());
|
||||||
|
|
||||||
|
writer.start_file("asdf.txt", options)?;
|
||||||
|
writer.write_all(b"asdf")?;
|
||||||
|
|
||||||
|
Ok(writer.finish()?.into_inner())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_archive_with_comment(bench: &mut Bencher) {
|
||||||
|
let bytes = generate_zip32_archive_with_random_comment(COMMENT_SIZE).unwrap();
|
||||||
|
|
||||||
|
bench.bench_n(1, |_| {
|
||||||
|
let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap();
|
||||||
|
let _ = archive.comment().len();
|
||||||
|
});
|
||||||
|
bench.bytes = bytes.len() as u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
const COMMENT_SIZE_64: usize = 500_000;
|
||||||
|
|
||||||
|
fn generate_zip64_archive_with_random_comment(comment_length: usize) -> ZipResult<Vec<u8>> {
|
||||||
|
let data = Vec::new();
|
||||||
|
let mut writer = ZipWriter::new(Cursor::new(data));
|
||||||
|
let options = SimpleFileOptions::default()
|
||||||
|
.compression_method(CompressionMethod::Stored)
|
||||||
|
.large_file(true);
|
||||||
|
|
||||||
|
let mut bytes = vec![0u8; comment_length];
|
||||||
|
getrandom(&mut bytes).unwrap();
|
||||||
|
writer.set_raw_comment(bytes.into_boxed_slice());
|
||||||
|
|
||||||
|
writer.start_file("asdf.txt", options)?;
|
||||||
|
writer.write_all(b"asdf")?;
|
||||||
|
|
||||||
|
Ok(writer.finish()?.into_inner())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_zip64_archive_with_comment(bench: &mut Bencher) {
|
||||||
|
let bytes = generate_zip64_archive_with_random_comment(COMMENT_SIZE_64).unwrap();
|
||||||
|
|
||||||
|
bench.iter(|| {
|
||||||
|
let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap();
|
||||||
|
archive.comment().len()
|
||||||
|
});
|
||||||
|
bench.bytes = bytes.len() as u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_stream_archive(bench: &mut Bencher) {
|
||||||
|
const STREAM_ZIP_ENTRIES: usize = 5;
|
||||||
|
const STREAM_FILE_SIZE: usize = 5;
|
||||||
|
|
||||||
|
let bytes = generate_random_archive(STREAM_ZIP_ENTRIES, STREAM_FILE_SIZE).unwrap();
|
||||||
|
|
||||||
|
/* Write to a temporary file path to incur some filesystem overhead from repeated reads */
|
||||||
|
let dir = TempDir::new("stream-bench").unwrap();
|
||||||
|
let out = dir.path().join("bench-out.zip");
|
||||||
|
fs::write(&out, &bytes).unwrap();
|
||||||
|
|
||||||
|
bench.iter(|| {
|
||||||
|
let mut f = fs::File::open(&out).unwrap();
|
||||||
|
while zip::read::read_zipfile_from_stream(&mut f)
|
||||||
|
.unwrap()
|
||||||
|
.is_some()
|
||||||
|
{}
|
||||||
|
});
|
||||||
|
bench.bytes = bytes.len() as u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
benchmark_group!(
|
||||||
|
benches,
|
||||||
|
read_metadata,
|
||||||
|
parse_archive_with_comment,
|
||||||
|
parse_zip64_archive_with_comment,
|
||||||
|
parse_stream_archive,
|
||||||
|
);
|
||||||
benchmark_main!(benches);
|
benchmark_main!(benches);
|
||||||
|
|
|
@ -90,13 +90,7 @@ impl CompressionMethod {
|
||||||
pub const AES: Self = CompressionMethod::Unsupported(99);
|
pub const AES: Self = CompressionMethod::Unsupported(99);
|
||||||
}
|
}
|
||||||
impl CompressionMethod {
|
impl CompressionMethod {
|
||||||
/// Converts an u16 to its corresponding CompressionMethod
|
pub(crate) const fn parse_from_u16(val: u16) -> Self {
|
||||||
#[deprecated(
|
|
||||||
since = "0.5.7",
|
|
||||||
note = "use a constant to construct a compression method"
|
|
||||||
)]
|
|
||||||
pub const fn from_u16(val: u16) -> CompressionMethod {
|
|
||||||
#[allow(deprecated)]
|
|
||||||
match val {
|
match val {
|
||||||
0 => CompressionMethod::Stored,
|
0 => CompressionMethod::Stored,
|
||||||
#[cfg(feature = "_deflate-any")]
|
#[cfg(feature = "_deflate-any")]
|
||||||
|
@ -111,18 +105,21 @@ impl CompressionMethod {
|
||||||
93 => CompressionMethod::Zstd,
|
93 => CompressionMethod::Zstd,
|
||||||
#[cfg(feature = "aes-crypto")]
|
#[cfg(feature = "aes-crypto")]
|
||||||
99 => CompressionMethod::Aes,
|
99 => CompressionMethod::Aes,
|
||||||
|
#[allow(deprecated)]
|
||||||
v => CompressionMethod::Unsupported(v),
|
v => CompressionMethod::Unsupported(v),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts a CompressionMethod to a u16
|
/// Converts a u16 to its corresponding CompressionMethod
|
||||||
#[deprecated(
|
#[deprecated(
|
||||||
since = "0.5.7",
|
since = "0.5.7",
|
||||||
note = "to match on other compression methods, use a constant"
|
note = "use a constant to construct a compression method"
|
||||||
)]
|
)]
|
||||||
pub const fn to_u16(self) -> u16 {
|
pub const fn from_u16(val: u16) -> CompressionMethod {
|
||||||
#[allow(deprecated)]
|
Self::parse_from_u16(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) const fn serialize_to_u16(self) -> u16 {
|
||||||
match self {
|
match self {
|
||||||
CompressionMethod::Stored => 0,
|
CompressionMethod::Stored => 0,
|
||||||
#[cfg(feature = "_deflate-any")]
|
#[cfg(feature = "_deflate-any")]
|
||||||
|
@ -137,10 +134,19 @@ impl CompressionMethod {
|
||||||
CompressionMethod::Zstd => 93,
|
CompressionMethod::Zstd => 93,
|
||||||
#[cfg(feature = "lzma")]
|
#[cfg(feature = "lzma")]
|
||||||
CompressionMethod::Lzma => 14,
|
CompressionMethod::Lzma => 14,
|
||||||
|
#[allow(deprecated)]
|
||||||
CompressionMethod::Unsupported(v) => v,
|
CompressionMethod::Unsupported(v) => v,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts a CompressionMethod to a u16
|
||||||
|
#[deprecated(
|
||||||
|
since = "0.5.7",
|
||||||
|
note = "to match on other compression methods, use a constant"
|
||||||
|
)]
|
||||||
|
pub const fn to_u16(self) -> u16 {
|
||||||
|
self.serialize_to_u16()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for CompressionMethod {
|
impl Default for CompressionMethod {
|
||||||
|
@ -180,10 +186,8 @@ mod test {
|
||||||
#[test]
|
#[test]
|
||||||
fn from_eq_to() {
|
fn from_eq_to() {
|
||||||
for v in 0..(u16::MAX as u32 + 1) {
|
for v in 0..(u16::MAX as u32 + 1) {
|
||||||
#[allow(deprecated)]
|
let from = CompressionMethod::parse_from_u16(v as u16);
|
||||||
let from = CompressionMethod::from_u16(v as u16);
|
let to = from.serialize_to_u16() as u32;
|
||||||
#[allow(deprecated)]
|
|
||||||
let to = from.to_u16() as u32;
|
|
||||||
assert_eq!(v, to);
|
assert_eq!(v, to);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -191,12 +195,9 @@ mod test {
|
||||||
#[test]
|
#[test]
|
||||||
fn to_eq_from() {
|
fn to_eq_from() {
|
||||||
fn check_match(method: CompressionMethod) {
|
fn check_match(method: CompressionMethod) {
|
||||||
#[allow(deprecated)]
|
let to = method.serialize_to_u16();
|
||||||
let to = method.to_u16();
|
let from = CompressionMethod::parse_from_u16(to);
|
||||||
#[allow(deprecated)]
|
let back = from.serialize_to_u16();
|
||||||
let from = CompressionMethod::from_u16(to);
|
|
||||||
#[allow(deprecated)]
|
|
||||||
let back = from.to_u16();
|
|
||||||
assert_eq!(to, back);
|
assert_eq!(to, back);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
304
src/read.rs
304
src/read.rs
|
@ -8,14 +8,18 @@ use crate::crc32::Crc32Reader;
|
||||||
use crate::extra_fields::{ExtendedTimestamp, ExtraField};
|
use crate::extra_fields::{ExtendedTimestamp, ExtraField};
|
||||||
use crate::read::zip_archive::Shared;
|
use crate::read::zip_archive::Shared;
|
||||||
use crate::result::{ZipError, ZipResult};
|
use crate::result::{ZipError, ZipResult};
|
||||||
use crate::spec;
|
use crate::spec::{self, Block};
|
||||||
use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipFileData};
|
use crate::types::{
|
||||||
|
AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData,
|
||||||
|
ZipLocalEntryBlock,
|
||||||
|
};
|
||||||
use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
|
use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
|
||||||
use indexmap::IndexMap;
|
use indexmap::IndexMap;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::ffi::OsString;
|
use std::ffi::OsString;
|
||||||
use std::fs::create_dir_all;
|
use std::fs::create_dir_all;
|
||||||
use std::io::{self, copy, prelude::*, sink};
|
use std::io::{self, copy, prelude::*, sink};
|
||||||
|
use std::mem;
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::{Arc, OnceLock};
|
use std::sync::{Arc, OnceLock};
|
||||||
|
@ -216,24 +220,35 @@ pub(crate) fn find_content<'a>(
|
||||||
data: &ZipFileData,
|
data: &ZipFileData,
|
||||||
reader: &'a mut (impl Read + Seek),
|
reader: &'a mut (impl Read + Seek),
|
||||||
) -> ZipResult<io::Take<&'a mut dyn Read>> {
|
) -> ZipResult<io::Take<&'a mut dyn Read>> {
|
||||||
// Parse local header
|
// TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result!
|
||||||
reader.seek(io::SeekFrom::Start(data.header_start))?;
|
|
||||||
let signature = reader.read_u32_le()?;
|
|
||||||
if signature != spec::LOCAL_FILE_HEADER_SIGNATURE {
|
|
||||||
return Err(ZipError::InvalidArchive("Invalid local file header"));
|
|
||||||
}
|
|
||||||
let data_start = match data.data_start.get() {
|
let data_start = match data.data_start.get() {
|
||||||
|
Some(data_start) => *data_start,
|
||||||
None => {
|
None => {
|
||||||
reader.seek(io::SeekFrom::Current(22))?;
|
// Go to start of data.
|
||||||
let file_name_length = reader.read_u16_le()? as u64;
|
reader.seek(io::SeekFrom::Start(data.header_start))?;
|
||||||
let extra_field_length = reader.read_u16_le()? as u64;
|
|
||||||
let magic_and_header = 4 + 22 + 2 + 2;
|
// Parse static-sized fields and check the magic value.
|
||||||
let data_start =
|
let block = ZipLocalEntryBlock::parse(reader)?;
|
||||||
data.header_start + magic_and_header + file_name_length + extra_field_length;
|
|
||||||
data.data_start.get_or_init(|| data_start);
|
// Calculate the end of the local header from the fields we just parsed.
|
||||||
|
let variable_fields_len =
|
||||||
|
// Each of these fields must be converted to u64 before adding, as the result may
|
||||||
|
// easily overflow a u16.
|
||||||
|
block.file_name_length as u64 + block.extra_field_length as u64;
|
||||||
|
let data_start = data.header_start
|
||||||
|
+ mem::size_of::<ZipLocalEntryBlock>() as u64
|
||||||
|
+ variable_fields_len;
|
||||||
|
// Set the value so we don't have to read it again.
|
||||||
|
match data.data_start.set(data_start) {
|
||||||
|
Ok(()) => (),
|
||||||
|
// If the value was already set in the meantime, ensure it matches (this is probably
|
||||||
|
// unnecessary).
|
||||||
|
Err(_) => {
|
||||||
|
assert_eq!(*data.data_start.get().unwrap(), data_start);
|
||||||
|
}
|
||||||
|
}
|
||||||
data_start
|
data_start
|
||||||
}
|
}
|
||||||
Some(start) => *start,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
reader.seek(io::SeekFrom::Start(data_start))?;
|
reader.seek(io::SeekFrom::Start(data_start))?;
|
||||||
|
@ -349,6 +364,7 @@ pub(crate) fn make_reader(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub(crate) struct CentralDirectoryInfo {
|
pub(crate) struct CentralDirectoryInfo {
|
||||||
pub(crate) archive_offset: u64,
|
pub(crate) archive_offset: u64,
|
||||||
pub(crate) directory_start: u64,
|
pub(crate) directory_start: u64,
|
||||||
|
@ -457,7 +473,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_directory_info_zip32(
|
fn get_directory_info_zip32(
|
||||||
footer: &spec::CentralDirectoryEnd,
|
footer: &spec::Zip32CentralDirectoryEnd,
|
||||||
cde_start_pos: u64,
|
cde_start_pos: u64,
|
||||||
) -> ZipResult<CentralDirectoryInfo> {
|
) -> ZipResult<CentralDirectoryInfo> {
|
||||||
// Some zip files have data prepended to them, resulting in the
|
// Some zip files have data prepended to them, resulting in the
|
||||||
|
@ -482,15 +498,29 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const fn zip64_cde_len() -> usize {
|
||||||
|
mem::size_of::<spec::Zip64CentralDirectoryEnd>()
|
||||||
|
+ mem::size_of::<spec::Zip64CentralDirectoryEndLocator>()
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn order_lower_upper_bounds(a: u64, b: u64) -> (u64, u64) {
|
||||||
|
if a > b {
|
||||||
|
(b, a)
|
||||||
|
} else {
|
||||||
|
(a, b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn get_directory_info_zip64(
|
fn get_directory_info_zip64(
|
||||||
reader: &mut R,
|
reader: &mut R,
|
||||||
footer: &spec::CentralDirectoryEnd,
|
footer: &spec::Zip32CentralDirectoryEnd,
|
||||||
cde_start_pos: u64,
|
cde_start_pos: u64,
|
||||||
) -> ZipResult<Vec<ZipResult<CentralDirectoryInfo>>> {
|
) -> ZipResult<Vec<ZipResult<CentralDirectoryInfo>>> {
|
||||||
// See if there's a ZIP64 footer. The ZIP64 locator if present will
|
// See if there's a ZIP64 footer. The ZIP64 locator if present will
|
||||||
// have its signature 20 bytes in front of the standard footer. The
|
// have its signature 20 bytes in front of the standard footer. The
|
||||||
// standard footer, in turn, is 22+N bytes large, where N is the
|
// standard footer, in turn, is 22+N bytes large, where N is the
|
||||||
// comment length. Therefore:
|
// comment length. Therefore:
|
||||||
|
/* TODO: compute this from constant sizes and offsets! */
|
||||||
reader.seek(io::SeekFrom::End(
|
reader.seek(io::SeekFrom::End(
|
||||||
-(20 + 22 + footer.zip_file_comment.len() as i64),
|
-(20 + 22 + footer.zip_file_comment.len() as i64),
|
||||||
))?;
|
))?;
|
||||||
|
@ -501,56 +531,53 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
// don't know how to precisely relate that location to our current
|
// don't know how to precisely relate that location to our current
|
||||||
// actual offset in the file, since there may be junk at its
|
// actual offset in the file, since there may be junk at its
|
||||||
// beginning. Therefore we need to perform another search, as in
|
// beginning. Therefore we need to perform another search, as in
|
||||||
// read::CentralDirectoryEnd::find_and_parse, except now we search
|
// read::Zip32CentralDirectoryEnd::find_and_parse, except now we search
|
||||||
// forward. There may be multiple results because of Zip64 central-directory signatures in
|
// forward. There may be multiple results because of Zip64 central-directory signatures in
|
||||||
// ZIP comment data.
|
// ZIP comment data.
|
||||||
|
|
||||||
let mut results = Vec::new();
|
|
||||||
|
|
||||||
let search_upper_bound = cde_start_pos
|
let search_upper_bound = cde_start_pos
|
||||||
.checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
|
.checked_sub(Self::zip64_cde_len() as u64)
|
||||||
.ok_or(ZipError::InvalidArchive(
|
.ok_or(ZipError::InvalidArchive(
|
||||||
"File cannot contain ZIP64 central directory end",
|
"File cannot contain ZIP64 central directory end",
|
||||||
))?;
|
))?;
|
||||||
let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(
|
|
||||||
reader,
|
let (lower, upper) = Self::order_lower_upper_bounds(
|
||||||
locator64.end_of_central_directory_offset,
|
locator64.end_of_central_directory_offset,
|
||||||
search_upper_bound,
|
search_upper_bound,
|
||||||
)?;
|
);
|
||||||
search_results.into_iter().for_each(|(footer64, archive_offset)| {
|
|
||||||
results.push({
|
let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?;
|
||||||
let directory_start_result = footer64
|
let results: Vec<ZipResult<CentralDirectoryInfo>> =
|
||||||
|
search_results.into_iter().map(|(footer64, archive_offset)| {
|
||||||
|
let directory_start = footer64
|
||||||
.central_directory_offset
|
.central_directory_offset
|
||||||
.checked_add(archive_offset)
|
.checked_add(archive_offset)
|
||||||
.ok_or(ZipError::InvalidArchive(
|
.ok_or(ZipError::InvalidArchive(
|
||||||
"Invalid central directory size or offset",
|
"Invalid central directory size or offset",
|
||||||
));
|
))?;
|
||||||
directory_start_result.and_then(|directory_start| {
|
if directory_start > search_upper_bound {
|
||||||
if directory_start > search_upper_bound {
|
Err(ZipError::InvalidArchive(
|
||||||
Err(ZipError::InvalidArchive(
|
"Invalid central directory size or offset",
|
||||||
"Invalid central directory size or offset",
|
))
|
||||||
))
|
} else if footer64.number_of_files_on_this_disk > footer64.number_of_files {
|
||||||
} else if footer64.number_of_files_on_this_disk > footer64.number_of_files {
|
Err(ZipError::InvalidArchive(
|
||||||
Err(ZipError::InvalidArchive(
|
"ZIP64 footer indicates more files on this disk than in the whole archive",
|
||||||
"ZIP64 footer indicates more files on this disk than in the whole archive",
|
))
|
||||||
))
|
} else if footer64.version_needed_to_extract > footer64.version_made_by {
|
||||||
} else if footer64.version_needed_to_extract > footer64.version_made_by {
|
Err(ZipError::InvalidArchive(
|
||||||
Err(ZipError::InvalidArchive(
|
"ZIP64 footer indicates a new version is needed to extract this archive than the \
|
||||||
"ZIP64 footer indicates a new version is needed to extract this archive than the \
|
version that wrote it",
|
||||||
version that wrote it",
|
))
|
||||||
))
|
} else {
|
||||||
} else {
|
Ok(CentralDirectoryInfo {
|
||||||
Ok(CentralDirectoryInfo {
|
archive_offset,
|
||||||
archive_offset,
|
directory_start,
|
||||||
directory_start,
|
number_of_files: footer64.number_of_files as usize,
|
||||||
number_of_files: footer64.number_of_files as usize,
|
disk_number: footer64.disk_number,
|
||||||
disk_number: footer64.disk_number,
|
disk_with_central_directory: footer64.disk_with_central_directory,
|
||||||
disk_with_central_directory: footer64.disk_with_central_directory,
|
})
|
||||||
})
|
}
|
||||||
}
|
}).collect();
|
||||||
})
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -558,7 +585,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
/// separate function to ease the control flow design.
|
/// separate function to ease the control flow design.
|
||||||
pub(crate) fn get_metadata(
|
pub(crate) fn get_metadata(
|
||||||
reader: &mut R,
|
reader: &mut R,
|
||||||
footer: &spec::CentralDirectoryEnd,
|
footer: &spec::Zip32CentralDirectoryEnd,
|
||||||
cde_start_pos: u64,
|
cde_start_pos: u64,
|
||||||
) -> ZipResult<Shared> {
|
) -> ZipResult<Shared> {
|
||||||
// Check if file has a zip64 footer
|
// Check if file has a zip64 footer
|
||||||
|
@ -689,7 +716,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
///
|
///
|
||||||
/// This uses the central directory record of the ZIP file, and ignores local file headers
|
/// This uses the central directory record of the ZIP file, and ignores local file headers
|
||||||
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
|
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
|
||||||
let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?;
|
let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?;
|
||||||
let shared = Self::get_metadata(&mut reader, &footer, cde_start_pos)?;
|
let shared = Self::get_metadata(&mut reader, &footer, cde_start_pos)?;
|
||||||
Ok(ZipArchive {
|
Ok(ZipArchive {
|
||||||
reader,
|
reader,
|
||||||
|
@ -1001,12 +1028,15 @@ pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
|
||||||
let central_header_start = reader.stream_position()?;
|
let central_header_start = reader.stream_position()?;
|
||||||
|
|
||||||
// Parse central header
|
// Parse central header
|
||||||
let signature = reader.read_u32_le()?;
|
let block = ZipCentralEntryBlock::parse(reader)?;
|
||||||
if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
|
central_header_to_zip_file_inner(reader, archive_offset, central_header_start, block)
|
||||||
Err(ZipError::InvalidArchive("Invalid Central Directory header"))
|
}
|
||||||
} else {
|
|
||||||
central_header_to_zip_file_inner(reader, archive_offset, central_header_start)
|
#[inline]
|
||||||
}
|
fn read_variable_length_byte_field<R: Read>(reader: &mut R, len: usize) -> io::Result<Box<[u8]>> {
|
||||||
|
let mut data = vec![0; len].into_boxed_slice();
|
||||||
|
reader.read_exact(&mut data)?;
|
||||||
|
Ok(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a central directory entry to collect the information for the file.
|
/// Parse a central directory entry to collect the information for the file.
|
||||||
|
@ -1014,63 +1044,65 @@ fn central_header_to_zip_file_inner<R: Read>(
|
||||||
reader: &mut R,
|
reader: &mut R,
|
||||||
archive_offset: u64,
|
archive_offset: u64,
|
||||||
central_header_start: u64,
|
central_header_start: u64,
|
||||||
|
block: ZipCentralEntryBlock,
|
||||||
) -> ZipResult<ZipFileData> {
|
) -> ZipResult<ZipFileData> {
|
||||||
let version_made_by = reader.read_u16_le()?;
|
let ZipCentralEntryBlock {
|
||||||
let _version_to_extract = reader.read_u16_le()?;
|
// magic,
|
||||||
let flags = reader.read_u16_le()?;
|
version_made_by,
|
||||||
|
// version_to_extract,
|
||||||
|
flags,
|
||||||
|
compression_method,
|
||||||
|
last_mod_time,
|
||||||
|
last_mod_date,
|
||||||
|
crc32,
|
||||||
|
compressed_size,
|
||||||
|
uncompressed_size,
|
||||||
|
file_name_length,
|
||||||
|
extra_field_length,
|
||||||
|
file_comment_length,
|
||||||
|
// disk_number,
|
||||||
|
// internal_file_attributes,
|
||||||
|
external_file_attributes,
|
||||||
|
offset,
|
||||||
|
..
|
||||||
|
} = block;
|
||||||
|
|
||||||
let encrypted = flags & 1 == 1;
|
let encrypted = flags & 1 == 1;
|
||||||
let is_utf8 = flags & (1 << 11) != 0;
|
let is_utf8 = flags & (1 << 11) != 0;
|
||||||
let using_data_descriptor = flags & (1 << 3) != 0;
|
let using_data_descriptor = flags & (1 << 3) != 0;
|
||||||
let compression_method = reader.read_u16_le()?;
|
|
||||||
let last_mod_time = reader.read_u16_le()?;
|
let file_name_raw = read_variable_length_byte_field(reader, file_name_length as usize)?;
|
||||||
let last_mod_date = reader.read_u16_le()?;
|
let extra_field = read_variable_length_byte_field(reader, extra_field_length as usize)?;
|
||||||
let crc32 = reader.read_u32_le()?;
|
let file_comment_raw = read_variable_length_byte_field(reader, file_comment_length as usize)?;
|
||||||
let compressed_size = reader.read_u32_le()?;
|
|
||||||
let uncompressed_size = reader.read_u32_le()?;
|
|
||||||
let file_name_length = reader.read_u16_le()? as usize;
|
|
||||||
let extra_field_length = reader.read_u16_le()? as usize;
|
|
||||||
let file_comment_length = reader.read_u16_le()? as usize;
|
|
||||||
let _disk_number = reader.read_u16_le()?;
|
|
||||||
let _internal_file_attributes = reader.read_u16_le()?;
|
|
||||||
let external_file_attributes = reader.read_u32_le()?;
|
|
||||||
let offset = reader.read_u32_le()? as u64;
|
|
||||||
let mut file_name_raw = vec![0; file_name_length];
|
|
||||||
reader.read_exact(&mut file_name_raw)?;
|
|
||||||
let mut extra_field = vec![0; extra_field_length];
|
|
||||||
reader.read_exact(&mut extra_field)?;
|
|
||||||
let mut file_comment_raw = vec![0; file_comment_length];
|
|
||||||
reader.read_exact(&mut file_comment_raw)?;
|
|
||||||
|
|
||||||
let file_name: Box<str> = match is_utf8 {
|
let file_name: Box<str> = match is_utf8 {
|
||||||
true => String::from_utf8_lossy(&file_name_raw).into(),
|
true => String::from_utf8_lossy(&file_name_raw).into(),
|
||||||
false => file_name_raw.from_cp437().into(),
|
false => file_name_raw.clone().from_cp437(),
|
||||||
};
|
};
|
||||||
let file_comment: Box<str> = match is_utf8 {
|
let file_comment: Box<str> = match is_utf8 {
|
||||||
true => String::from_utf8_lossy(&file_comment_raw).into(),
|
true => String::from_utf8_lossy(&file_comment_raw).into(),
|
||||||
false => file_comment_raw.from_cp437().into(),
|
false => file_comment_raw.from_cp437(),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Construct the result
|
// Construct the result
|
||||||
let mut result = ZipFileData {
|
let mut result = ZipFileData {
|
||||||
system: System::from((version_made_by >> 8) as u8),
|
system: System::from((version_made_by >> 8) as u8),
|
||||||
|
/* NB: this strips the top 8 bits! */
|
||||||
version_made_by: version_made_by as u8,
|
version_made_by: version_made_by as u8,
|
||||||
encrypted,
|
encrypted,
|
||||||
using_data_descriptor,
|
using_data_descriptor,
|
||||||
compression_method: {
|
compression_method: CompressionMethod::parse_from_u16(compression_method),
|
||||||
#[allow(deprecated)]
|
|
||||||
CompressionMethod::from_u16(compression_method)
|
|
||||||
},
|
|
||||||
compression_level: None,
|
compression_level: None,
|
||||||
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
|
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
|
||||||
crc32,
|
crc32,
|
||||||
compressed_size: compressed_size as u64,
|
compressed_size: compressed_size.into(),
|
||||||
uncompressed_size: uncompressed_size as u64,
|
uncompressed_size: uncompressed_size.into(),
|
||||||
file_name,
|
file_name,
|
||||||
file_name_raw: file_name_raw.into(),
|
file_name_raw,
|
||||||
extra_field: Some(Arc::new(extra_field)),
|
extra_field: Some(Arc::new(extra_field.to_vec())),
|
||||||
central_extra_field: None,
|
central_extra_field: None,
|
||||||
file_comment,
|
file_comment,
|
||||||
header_start: offset,
|
header_start: offset.into(),
|
||||||
extra_data_start: None,
|
extra_data_start: None,
|
||||||
central_header_start,
|
central_header_start,
|
||||||
data_start: OnceLock::new(),
|
data_start: OnceLock::new(),
|
||||||
|
@ -1108,6 +1140,7 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
|
||||||
};
|
};
|
||||||
let mut reader = io::Cursor::new(extra_field.as_ref());
|
let mut reader = io::Cursor::new(extra_field.as_ref());
|
||||||
|
|
||||||
|
/* TODO: codify this structure into Zip64ExtraFieldBlock fields! */
|
||||||
while (reader.position() as usize) < extra_field.len() {
|
while (reader.position() as usize) < extra_field.len() {
|
||||||
let kind = reader.read_u16_le()?;
|
let kind = reader.read_u16_le()?;
|
||||||
let len = reader.read_u16_le()?;
|
let len = reader.read_u16_le()?;
|
||||||
|
@ -1142,8 +1175,7 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
|
||||||
let mut out = [0u8];
|
let mut out = [0u8];
|
||||||
reader.read_exact(&mut out)?;
|
reader.read_exact(&mut out)?;
|
||||||
let aes_mode = out[0];
|
let aes_mode = out[0];
|
||||||
#[allow(deprecated)]
|
let compression_method = CompressionMethod::parse_from_u16(reader.read_u16_le()?);
|
||||||
let compression_method = CompressionMethod::from_u16(reader.read_u16_le()?);
|
|
||||||
|
|
||||||
if vendor_id != 0x4541 {
|
if vendor_id != 0x4541 {
|
||||||
return Err(ZipError::InvalidArchive("Invalid AES vendor"));
|
return Err(ZipError::InvalidArchive("Invalid AES vendor"));
|
||||||
|
@ -1403,75 +1435,25 @@ impl<'a> Drop for ZipFile<'a> {
|
||||||
/// * `data_start`: set to 0
|
/// * `data_start`: set to 0
|
||||||
/// * `external_attributes`: `unix_mode()`: will return None
|
/// * `external_attributes`: `unix_mode()`: will return None
|
||||||
pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult<Option<ZipFile<'_>>> {
|
pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult<Option<ZipFile<'_>>> {
|
||||||
let signature = reader.read_u32_le()?;
|
// We can't use the typical ::parse() method, as we follow separate code paths depending on the
|
||||||
|
// "magic" value (since the magic value will be from the central directory header if we've
|
||||||
|
// finished iterating over all the actual files).
|
||||||
|
/* TODO: smallvec? */
|
||||||
|
let mut block = [0u8; mem::size_of::<ZipLocalEntryBlock>()];
|
||||||
|
reader.read_exact(&mut block)?;
|
||||||
|
let block: Box<[u8]> = block.into();
|
||||||
|
|
||||||
|
let signature = spec::Magic::from_first_le_bytes(&block);
|
||||||
|
|
||||||
match signature {
|
match signature {
|
||||||
spec::LOCAL_FILE_HEADER_SIGNATURE => (),
|
spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (),
|
||||||
spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
|
spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
|
||||||
_ => return Err(ZipError::InvalidArchive("Invalid local file header")),
|
_ => return Err(ZipError::InvalidArchive("Invalid local file header")),
|
||||||
}
|
}
|
||||||
|
|
||||||
let version_made_by = reader.read_u16_le()?;
|
let block = ZipLocalEntryBlock::interpret(block)?;
|
||||||
let flags = reader.read_u16_le()?;
|
|
||||||
if flags & 1 == 1 {
|
|
||||||
return unsupported_zip_error("Encrypted files are not supported");
|
|
||||||
}
|
|
||||||
if flags & (1 << 3) == 1 << 3 {
|
|
||||||
// using_data_descriptor flag is set
|
|
||||||
return unsupported_zip_error("The file length is not available in the local header");
|
|
||||||
}
|
|
||||||
let is_utf8 = flags & (1 << 11) != 0;
|
|
||||||
#[allow(deprecated)]
|
|
||||||
let compression_method = CompressionMethod::from_u16(reader.read_u16_le()?);
|
|
||||||
let last_mod_time = reader.read_u16_le()?;
|
|
||||||
let last_mod_date = reader.read_u16_le()?;
|
|
||||||
let crc32 = reader.read_u32_le()?;
|
|
||||||
let compressed_size = reader.read_u32_le()?;
|
|
||||||
let uncompressed_size = reader.read_u32_le()?;
|
|
||||||
let file_name_length = reader.read_u16_le()? as usize;
|
|
||||||
let extra_field_length = reader.read_u16_le()? as usize;
|
|
||||||
|
|
||||||
let mut file_name_raw = vec![0; file_name_length];
|
let mut result = ZipFileData::from_local_block(block, reader)?;
|
||||||
reader.read_exact(&mut file_name_raw)?;
|
|
||||||
let mut extra_field = vec![0; extra_field_length];
|
|
||||||
reader.read_exact(&mut extra_field)?;
|
|
||||||
|
|
||||||
let file_name: Box<str> = match is_utf8 {
|
|
||||||
true => String::from_utf8_lossy(&file_name_raw).into(),
|
|
||||||
false => file_name_raw.clone().from_cp437().into(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut result = ZipFileData {
|
|
||||||
system: System::from((version_made_by >> 8) as u8),
|
|
||||||
version_made_by: version_made_by as u8,
|
|
||||||
encrypted: flags & 1 == 1,
|
|
||||||
using_data_descriptor: false,
|
|
||||||
compression_method,
|
|
||||||
compression_level: None,
|
|
||||||
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
|
|
||||||
crc32,
|
|
||||||
compressed_size: compressed_size as u64,
|
|
||||||
uncompressed_size: uncompressed_size as u64,
|
|
||||||
file_name,
|
|
||||||
file_name_raw: file_name_raw.into(),
|
|
||||||
extra_field: Some(Arc::new(extra_field)),
|
|
||||||
central_extra_field: None,
|
|
||||||
file_comment: String::with_capacity(0).into_boxed_str(), // file comment is only available in the central directory
|
|
||||||
// header_start and data start are not available, but also don't matter, since seeking is
|
|
||||||
// not available.
|
|
||||||
header_start: 0,
|
|
||||||
extra_data_start: None,
|
|
||||||
data_start: OnceLock::new(),
|
|
||||||
central_header_start: 0,
|
|
||||||
// The external_attributes field is only available in the central directory.
|
|
||||||
// We set this to zero, which should be valid as the docs state 'If input came
|
|
||||||
// from standard input, this field is set to zero.'
|
|
||||||
external_attributes: 0,
|
|
||||||
large_file: false,
|
|
||||||
aes_mode: None,
|
|
||||||
aes_extra_data_start: 0,
|
|
||||||
extra_fields: Vec::new(),
|
|
||||||
};
|
|
||||||
|
|
||||||
match parse_extra_field(&mut result) {
|
match parse_extra_field(&mut result) {
|
||||||
Ok(..) | Err(ZipError::Io(..)) => {}
|
Ok(..) | Err(ZipError::Io(..)) => {}
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
use crate::unstable::LittleEndianReadExt;
|
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io::{self, Read};
|
use std::io::{self, Read};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile,
|
central_header_to_zip_file_inner, read_zipfile_from_stream, ZipCentralEntryBlock, ZipError,
|
||||||
ZipFileData, ZipResult,
|
ZipFile, ZipFileData, ZipResult,
|
||||||
};
|
};
|
||||||
|
use crate::spec::Block;
|
||||||
|
|
||||||
/// Stream decoder for zip.
|
/// Stream decoder for zip.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -20,31 +20,31 @@ impl<R> ZipStreamReader<R> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<R: Read> ZipStreamReader<R> {
|
impl<R: Read> ZipStreamReader<R> {
|
||||||
fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> {
|
fn parse_central_directory(&mut self) -> ZipResult<ZipStreamFileMetadata> {
|
||||||
// Give archive_offset and central_header_start dummy value 0, since
|
// Give archive_offset and central_header_start dummy value 0, since
|
||||||
// they are not used in the output.
|
// they are not used in the output.
|
||||||
let archive_offset = 0;
|
let archive_offset = 0;
|
||||||
let central_header_start = 0;
|
let central_header_start = 0;
|
||||||
|
|
||||||
// Parse central header
|
// Parse central header
|
||||||
let signature = self.0.read_u32_le()?;
|
let block = ZipCentralEntryBlock::parse(&mut self.0)?;
|
||||||
if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
|
let file = central_header_to_zip_file_inner(
|
||||||
Ok(None)
|
&mut self.0,
|
||||||
} else {
|
archive_offset,
|
||||||
central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start)
|
central_header_start,
|
||||||
.map(ZipStreamFileMetadata)
|
block,
|
||||||
.map(Some)
|
)?;
|
||||||
}
|
Ok(ZipStreamFileMetadata(file))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iteraate over the stream and extract all file and their
|
/// Iterate over the stream and extract all file and their
|
||||||
/// metadata.
|
/// metadata.
|
||||||
pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
|
pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
|
||||||
while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
|
while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
|
||||||
visitor.visit_file(&mut file)?;
|
visitor.visit_file(&mut file)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
while let Some(metadata) = self.parse_central_directory()? {
|
while let Ok(metadata) = self.parse_central_directory() {
|
||||||
visitor.visit_additional_metadata(&metadata)?;
|
visitor.visit_additional_metadata(&metadata)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
708
src/spec.rs
708
src/spec.rs
|
@ -1,21 +1,241 @@
|
||||||
|
#![macro_use]
|
||||||
|
|
||||||
use crate::result::{ZipError, ZipResult};
|
use crate::result::{ZipError, ZipResult};
|
||||||
use crate::unstable::{LittleEndianReadExt, LittleEndianWriteExt};
|
use memchr::memmem::FinderRev;
|
||||||
use core::mem::size_of_val;
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
|
use std::mem;
|
||||||
use std::path::{Component, Path, MAIN_SEPARATOR};
|
use std::path::{Component, Path, MAIN_SEPARATOR};
|
||||||
|
|
||||||
pub const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50;
|
/// "Magic" header values used in the zip spec to locate metadata records.
|
||||||
pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: u32 = 0x02014b50;
|
///
|
||||||
pub(crate) const CENTRAL_DIRECTORY_END_SIGNATURE: u32 = 0x06054b50;
|
/// These values currently always take up a fixed four bytes, so we can parse and wrap them in this
|
||||||
pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: u32 = 0x06064b50;
|
/// struct to enforce some small amount of type safety.
|
||||||
pub(crate) const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: u32 = 0x07064b50;
|
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub(crate) struct Magic(u32);
|
||||||
|
|
||||||
|
impl Magic {
|
||||||
|
pub const fn literal(x: u32) -> Self {
|
||||||
|
Self(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub const fn from_le_bytes(bytes: [u8; 4]) -> Self {
|
||||||
|
Self(u32::from_le_bytes(bytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn from_first_le_bytes(data: &[u8]) -> Self {
|
||||||
|
let first_bytes: [u8; 4] = data[..mem::size_of::<Self>()].try_into().unwrap();
|
||||||
|
Self::from_le_bytes(first_bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub const fn to_le_bytes(self) -> [u8; 4] {
|
||||||
|
self.0.to_le_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::wrong_self_convention)]
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn from_le(self) -> Self {
|
||||||
|
Self(u32::from_le(self.0))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::wrong_self_convention)]
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn to_le(self) -> Self {
|
||||||
|
Self(u32::to_le(self.0))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const LOCAL_FILE_HEADER_SIGNATURE: Self = Self::literal(0x04034b50);
|
||||||
|
pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: Self = Self::literal(0x02014b50);
|
||||||
|
pub const CENTRAL_DIRECTORY_END_SIGNATURE: Self = Self::literal(0x06054b50);
|
||||||
|
pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: Self = Self::literal(0x06064b50);
|
||||||
|
pub const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: Self = Self::literal(0x07064b50);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Similar to [`Magic`], but used for extra field tags as per section 4.5.3 of APPNOTE.TXT.
|
||||||
|
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub(crate) struct ExtraFieldMagic(u16);
|
||||||
|
|
||||||
|
/* TODO: maybe try to use this for parsing extra fields as well as writing them? */
|
||||||
|
#[allow(dead_code)]
|
||||||
|
impl ExtraFieldMagic {
|
||||||
|
pub const fn literal(x: u16) -> Self {
|
||||||
|
Self(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub const fn from_le_bytes(bytes: [u8; 2]) -> Self {
|
||||||
|
Self(u16::from_le_bytes(bytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub const fn to_le_bytes(self) -> [u8; 2] {
|
||||||
|
self.0.to_le_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::wrong_self_convention)]
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn from_le(self) -> Self {
|
||||||
|
Self(u16::from_le(self.0))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::wrong_self_convention)]
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn to_le(self) -> Self {
|
||||||
|
Self(u16::to_le(self.0))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const ZIP64_EXTRA_FIELD_TAG: Self = Self::literal(0x0001);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This should be equal to `0xFFFFFFFF`.
|
||||||
pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64;
|
pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64;
|
||||||
pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize;
|
pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize;
|
||||||
|
|
||||||
pub struct CentralDirectoryEnd {
|
pub(crate) trait Block: Sized + Copy {
|
||||||
|
const MAGIC: Magic;
|
||||||
|
|
||||||
|
fn magic(self) -> Magic;
|
||||||
|
|
||||||
|
const ERROR: ZipError;
|
||||||
|
|
||||||
|
/* TODO: use smallvec? */
|
||||||
|
fn interpret(bytes: Box<[u8]>) -> ZipResult<Self> {
|
||||||
|
let block = Self::deserialize(&bytes).from_le();
|
||||||
|
if block.magic() != Self::MAGIC {
|
||||||
|
return Err(Self::ERROR);
|
||||||
|
}
|
||||||
|
Ok(block)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn deserialize(block: &[u8]) -> Self {
|
||||||
|
assert_eq!(block.len(), mem::size_of::<Self>());
|
||||||
|
let block_ptr: *const Self = block.as_ptr().cast();
|
||||||
|
unsafe { block_ptr.read() }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::wrong_self_convention)]
|
||||||
|
fn from_le(self) -> Self;
|
||||||
|
|
||||||
|
fn parse<T: Read>(reader: &mut T) -> ZipResult<Self> {
|
||||||
|
let mut block = vec![0u8; mem::size_of::<Self>()].into_boxed_slice();
|
||||||
|
reader.read_exact(&mut block)?;
|
||||||
|
Self::interpret(block)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode(self) -> Box<[u8]> {
|
||||||
|
self.to_le().serialize()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_le(self) -> Self;
|
||||||
|
|
||||||
|
/* TODO: use Box<[u8; mem::size_of::<Self>()]> when generic_const_exprs are stabilized! */
|
||||||
|
fn serialize(self) -> Box<[u8]> {
|
||||||
|
/* TODO: use Box::new_zeroed() when stabilized! */
|
||||||
|
/* TODO: also consider using smallvec! */
|
||||||
|
let mut out_block = vec![0u8; mem::size_of::<Self>()].into_boxed_slice();
|
||||||
|
let out_ptr: *mut Self = out_block.as_mut_ptr().cast();
|
||||||
|
unsafe {
|
||||||
|
out_ptr.write(self);
|
||||||
|
}
|
||||||
|
out_block
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
|
||||||
|
let block = self.encode();
|
||||||
|
writer.write_all(&block)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert all the fields of a struct *from* little-endian representations.
|
||||||
|
macro_rules! from_le {
|
||||||
|
($obj:ident, $field:ident, $type:ty) => {
|
||||||
|
$obj.$field = <$type>::from_le($obj.$field);
|
||||||
|
};
|
||||||
|
($obj:ident, [($field:ident, $type:ty) $(,)?]) => {
|
||||||
|
from_le![$obj, $field, $type];
|
||||||
|
};
|
||||||
|
($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => {
|
||||||
|
from_le![$obj, $field, $type];
|
||||||
|
from_le!($obj, [$($rest),+]);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert all the fields of a struct *into* little-endian representations.
|
||||||
|
macro_rules! to_le {
|
||||||
|
($obj:ident, $field:ident, $type:ty) => {
|
||||||
|
$obj.$field = <$type>::to_le($obj.$field);
|
||||||
|
};
|
||||||
|
($obj:ident, [($field:ident, $type:ty) $(,)?]) => {
|
||||||
|
to_le![$obj, $field, $type];
|
||||||
|
};
|
||||||
|
($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => {
|
||||||
|
to_le![$obj, $field, $type];
|
||||||
|
to_le!($obj, [$($rest),+]);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: derive macro to generate these fields? */
|
||||||
|
/// Implement `from_le()` and `to_le()`, providing the field specification to both macros
|
||||||
|
/// and methods.
|
||||||
|
macro_rules! to_and_from_le {
|
||||||
|
($($args:tt),+ $(,)?) => {
|
||||||
|
#[inline(always)]
|
||||||
|
fn from_le(mut self) -> Self {
|
||||||
|
from_le![self, [$($args),+]];
|
||||||
|
self
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
fn to_le(mut self) -> Self {
|
||||||
|
to_le![self, [$($args),+]];
|
||||||
|
self
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
#[repr(packed)]
|
||||||
|
pub(crate) struct Zip32CDEBlock {
|
||||||
|
magic: Magic,
|
||||||
|
pub disk_number: u16,
|
||||||
|
pub disk_with_central_directory: u16,
|
||||||
|
pub number_of_files_on_this_disk: u16,
|
||||||
|
pub number_of_files: u16,
|
||||||
|
pub central_directory_size: u32,
|
||||||
|
pub central_directory_offset: u32,
|
||||||
|
pub zip_file_comment_length: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Block for Zip32CDEBlock {
|
||||||
|
const MAGIC: Magic = Magic::CENTRAL_DIRECTORY_END_SIGNATURE;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn magic(self) -> Magic {
|
||||||
|
self.magic
|
||||||
|
}
|
||||||
|
|
||||||
|
const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header");
|
||||||
|
|
||||||
|
to_and_from_le![
|
||||||
|
(magic, Magic),
|
||||||
|
(disk_number, u16),
|
||||||
|
(disk_with_central_directory, u16),
|
||||||
|
(number_of_files_on_this_disk, u16),
|
||||||
|
(number_of_files, u16),
|
||||||
|
(central_directory_size, u32),
|
||||||
|
(central_directory_offset, u32),
|
||||||
|
(zip_file_comment_length, u16)
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(crate) struct Zip32CentralDirectoryEnd {
|
||||||
pub disk_number: u16,
|
pub disk_number: u16,
|
||||||
pub disk_with_central_directory: u16,
|
pub disk_with_central_directory: u16,
|
||||||
pub number_of_files_on_this_disk: u16,
|
pub number_of_files_on_this_disk: u16,
|
||||||
|
@ -25,23 +245,50 @@ pub struct CentralDirectoryEnd {
|
||||||
pub zip_file_comment: Box<[u8]>,
|
pub zip_file_comment: Box<[u8]>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CentralDirectoryEnd {
|
impl Zip32CentralDirectoryEnd {
|
||||||
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<CentralDirectoryEnd> {
|
fn block_and_comment(self) -> ZipResult<(Zip32CDEBlock, Box<[u8]>)> {
|
||||||
let magic = reader.read_u32_le()?;
|
let Self {
|
||||||
if magic != CENTRAL_DIRECTORY_END_SIGNATURE {
|
disk_number,
|
||||||
return Err(ZipError::InvalidArchive("Invalid digital signature header"));
|
disk_with_central_directory,
|
||||||
}
|
number_of_files_on_this_disk,
|
||||||
let disk_number = reader.read_u16_le()?;
|
number_of_files,
|
||||||
let disk_with_central_directory = reader.read_u16_le()?;
|
central_directory_size,
|
||||||
let number_of_files_on_this_disk = reader.read_u16_le()?;
|
central_directory_offset,
|
||||||
let number_of_files = reader.read_u16_le()?;
|
zip_file_comment,
|
||||||
let central_directory_size = reader.read_u32_le()?;
|
} = self;
|
||||||
let central_directory_offset = reader.read_u32_le()?;
|
let block = Zip32CDEBlock {
|
||||||
let zip_file_comment_length = reader.read_u16_le()? as usize;
|
magic: Zip32CDEBlock::MAGIC,
|
||||||
let mut zip_file_comment = vec![0; zip_file_comment_length].into_boxed_slice();
|
disk_number,
|
||||||
|
disk_with_central_directory,
|
||||||
|
number_of_files_on_this_disk,
|
||||||
|
number_of_files,
|
||||||
|
central_directory_size,
|
||||||
|
central_directory_offset,
|
||||||
|
zip_file_comment_length: zip_file_comment
|
||||||
|
.len()
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| ZipError::InvalidArchive("File comment must be less than 64 KiB"))?,
|
||||||
|
};
|
||||||
|
Ok((block, zip_file_comment))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip32CentralDirectoryEnd> {
|
||||||
|
let Zip32CDEBlock {
|
||||||
|
// magic,
|
||||||
|
disk_number,
|
||||||
|
disk_with_central_directory,
|
||||||
|
number_of_files_on_this_disk,
|
||||||
|
number_of_files,
|
||||||
|
central_directory_size,
|
||||||
|
central_directory_offset,
|
||||||
|
zip_file_comment_length,
|
||||||
|
..
|
||||||
|
} = Zip32CDEBlock::parse(reader)?;
|
||||||
|
|
||||||
|
let mut zip_file_comment = vec![0u8; zip_file_comment_length as usize].into_boxed_slice();
|
||||||
reader.read_exact(&mut zip_file_comment)?;
|
reader.read_exact(&mut zip_file_comment)?;
|
||||||
|
|
||||||
Ok(CentralDirectoryEnd {
|
Ok(Zip32CentralDirectoryEnd {
|
||||||
disk_number,
|
disk_number,
|
||||||
disk_with_central_directory,
|
disk_with_central_directory,
|
||||||
number_of_files_on_this_disk,
|
number_of_files_on_this_disk,
|
||||||
|
@ -52,61 +299,118 @@ impl CentralDirectoryEnd {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn find_and_parse<T: Read + Seek>(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u64)> {
|
pub fn find_and_parse<T: Read + Seek>(
|
||||||
const HEADER_SIZE: u64 = 22;
|
reader: &mut T,
|
||||||
const MAX_HEADER_AND_COMMENT_SIZE: u64 = 66000;
|
) -> ZipResult<(Zip32CentralDirectoryEnd, u64)> {
|
||||||
const BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE: u64 = HEADER_SIZE - 6;
|
|
||||||
let file_length = reader.seek(io::SeekFrom::End(0))?;
|
let file_length = reader.seek(io::SeekFrom::End(0))?;
|
||||||
|
|
||||||
let search_upper_bound = file_length.saturating_sub(MAX_HEADER_AND_COMMENT_SIZE);
|
if file_length < mem::size_of::<Zip32CDEBlock>() as u64 {
|
||||||
|
|
||||||
if file_length < HEADER_SIZE {
|
|
||||||
return Err(ZipError::InvalidArchive("Invalid zip header"));
|
return Err(ZipError::InvalidArchive("Invalid zip header"));
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut pos = file_length - HEADER_SIZE;
|
let search_lower_bound = 0;
|
||||||
while pos >= search_upper_bound {
|
|
||||||
let mut have_signature = false;
|
const END_WINDOW_SIZE: usize = 512;
|
||||||
reader.seek(io::SeekFrom::Start(pos))?;
|
/* TODO: use static_assertions!() */
|
||||||
if reader.read_u32_le()? == CENTRAL_DIRECTORY_END_SIGNATURE {
|
debug_assert!(END_WINDOW_SIZE > mem::size_of::<Magic>());
|
||||||
have_signature = true;
|
|
||||||
reader.seek(io::SeekFrom::Current(
|
const SIG_BYTES: [u8; mem::size_of::<Magic>()] =
|
||||||
BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE as i64,
|
Magic::CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
|
||||||
))?;
|
let finder = FinderRev::new(&SIG_BYTES);
|
||||||
let cde_start_pos = reader.seek(io::SeekFrom::Start(pos))?;
|
|
||||||
if let Ok(end_header) = CentralDirectoryEnd::parse(reader) {
|
let mut window_start: u64 = file_length.saturating_sub(END_WINDOW_SIZE as u64);
|
||||||
return Ok((end_header, cde_start_pos));
|
let mut window = [0u8; END_WINDOW_SIZE];
|
||||||
|
while window_start >= search_lower_bound {
|
||||||
|
/* Go to the start of the window in the file. */
|
||||||
|
reader.seek(io::SeekFrom::Start(window_start))?;
|
||||||
|
|
||||||
|
/* Identify how many bytes to read (this may be less than the window size for files
|
||||||
|
* smaller than END_WINDOW_SIZE). */
|
||||||
|
let end = (window_start + END_WINDOW_SIZE as u64).min(file_length);
|
||||||
|
let cur_len = (end - window_start) as usize;
|
||||||
|
debug_assert!(cur_len > 0);
|
||||||
|
debug_assert!(cur_len <= END_WINDOW_SIZE);
|
||||||
|
let cur_window: &mut [u8] = &mut window[..cur_len];
|
||||||
|
/* Read the window into the bytes! */
|
||||||
|
reader.read_exact(cur_window)?;
|
||||||
|
|
||||||
|
/* Find instances of the magic signature. */
|
||||||
|
for offset in finder.rfind_iter(cur_window) {
|
||||||
|
let cde_start_pos = window_start + offset as u64;
|
||||||
|
reader.seek(io::SeekFrom::Start(cde_start_pos))?;
|
||||||
|
/* Drop any headers that don't parse. */
|
||||||
|
if let Ok(cde) = Self::parse(reader) {
|
||||||
|
return Ok((cde, cde_start_pos));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pos = match pos.checked_sub(if have_signature {
|
|
||||||
size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE) as u64
|
/* We always want to make sure we go allllll the way back to the start of the file if
|
||||||
} else {
|
* we can't find it elsewhere. However, our `while` condition doesn't check that. So we
|
||||||
1
|
* avoid infinite looping by checking at the end of the loop. */
|
||||||
}) {
|
if window_start == search_lower_bound {
|
||||||
Some(p) => p,
|
break;
|
||||||
None => break,
|
}
|
||||||
};
|
/* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that
|
||||||
|
* overlap our nice neat window boundaries! */
|
||||||
|
window_start = (window_start
|
||||||
|
/* NB: To catch matches across window boundaries, we need to make our blocks overlap
|
||||||
|
* by the width of the pattern to match. */
|
||||||
|
+ mem::size_of::<Magic>() as u64)
|
||||||
|
/* This should never happen, but make sure we don't go past the end of the file. */
|
||||||
|
.min(file_length);
|
||||||
|
window_start = window_start
|
||||||
|
.saturating_sub(
|
||||||
|
/* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at
|
||||||
|
* once (unless limited by file_length). */
|
||||||
|
END_WINDOW_SIZE as u64,
|
||||||
|
)
|
||||||
|
/* This will never go below the value of `search_lower_bound`, so we have a special
|
||||||
|
* `if window_start == search_lower_bound` check above. */
|
||||||
|
.max(search_lower_bound);
|
||||||
}
|
}
|
||||||
|
|
||||||
Err(ZipError::InvalidArchive(
|
Err(ZipError::InvalidArchive(
|
||||||
"Could not find central directory end",
|
"Could not find central directory end",
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> {
|
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
|
||||||
writer.write_u32_le(CENTRAL_DIRECTORY_END_SIGNATURE)?;
|
let (block, comment) = self.block_and_comment()?;
|
||||||
writer.write_u16_le(self.disk_number)?;
|
block.write(writer)?;
|
||||||
writer.write_u16_le(self.disk_with_central_directory)?;
|
writer.write_all(&comment)?;
|
||||||
writer.write_u16_le(self.number_of_files_on_this_disk)?;
|
|
||||||
writer.write_u16_le(self.number_of_files)?;
|
|
||||||
writer.write_u32_le(self.central_directory_size)?;
|
|
||||||
writer.write_u32_le(self.central_directory_offset)?;
|
|
||||||
writer.write_u16_le(self.zip_file_comment.len() as u16)?;
|
|
||||||
writer.write_all(&self.zip_file_comment)?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Zip64CentralDirectoryEndLocator {
|
#[derive(Copy, Clone)]
|
||||||
|
#[repr(packed)]
|
||||||
|
pub(crate) struct Zip64CDELocatorBlock {
|
||||||
|
magic: Magic,
|
||||||
|
pub disk_with_central_directory: u32,
|
||||||
|
pub end_of_central_directory_offset: u64,
|
||||||
|
pub number_of_disks: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Block for Zip64CDELocatorBlock {
|
||||||
|
const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn magic(self) -> Magic {
|
||||||
|
self.magic
|
||||||
|
}
|
||||||
|
|
||||||
|
const ERROR: ZipError =
|
||||||
|
ZipError::InvalidArchive("Invalid zip64 locator digital signature header");
|
||||||
|
|
||||||
|
to_and_from_le![
|
||||||
|
(magic, Magic),
|
||||||
|
(disk_with_central_directory, u32),
|
||||||
|
(end_of_central_directory_offset, u64),
|
||||||
|
(number_of_disks, u32),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct Zip64CentralDirectoryEndLocator {
|
||||||
pub disk_with_central_directory: u32,
|
pub disk_with_central_directory: u32,
|
||||||
pub end_of_central_directory_offset: u64,
|
pub end_of_central_directory_offset: u64,
|
||||||
pub number_of_disks: u32,
|
pub number_of_disks: u32,
|
||||||
|
@ -114,15 +418,13 @@ pub struct Zip64CentralDirectoryEndLocator {
|
||||||
|
|
||||||
impl Zip64CentralDirectoryEndLocator {
|
impl Zip64CentralDirectoryEndLocator {
|
||||||
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip64CentralDirectoryEndLocator> {
|
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip64CentralDirectoryEndLocator> {
|
||||||
let magic = reader.read_u32_le()?;
|
let Zip64CDELocatorBlock {
|
||||||
if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE {
|
// magic,
|
||||||
return Err(ZipError::InvalidArchive(
|
disk_with_central_directory,
|
||||||
"Invalid zip64 locator digital signature header",
|
end_of_central_directory_offset,
|
||||||
));
|
number_of_disks,
|
||||||
}
|
..
|
||||||
let disk_with_central_directory = reader.read_u32_le()?;
|
} = Zip64CDELocatorBlock::parse(reader)?;
|
||||||
let end_of_central_directory_offset = reader.read_u64_le()?;
|
|
||||||
let number_of_disks = reader.read_u32_le()?;
|
|
||||||
|
|
||||||
Ok(Zip64CentralDirectoryEndLocator {
|
Ok(Zip64CentralDirectoryEndLocator {
|
||||||
disk_with_central_directory,
|
disk_with_central_directory,
|
||||||
|
@ -131,16 +433,64 @@ impl Zip64CentralDirectoryEndLocator {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> {
|
pub fn block(self) -> Zip64CDELocatorBlock {
|
||||||
writer.write_u32_le(ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE)?;
|
let Self {
|
||||||
writer.write_u32_le(self.disk_with_central_directory)?;
|
disk_with_central_directory,
|
||||||
writer.write_u64_le(self.end_of_central_directory_offset)?;
|
end_of_central_directory_offset,
|
||||||
writer.write_u32_le(self.number_of_disks)?;
|
number_of_disks,
|
||||||
Ok(())
|
} = self;
|
||||||
|
Zip64CDELocatorBlock {
|
||||||
|
magic: Zip64CDELocatorBlock::MAGIC,
|
||||||
|
disk_with_central_directory,
|
||||||
|
end_of_central_directory_offset,
|
||||||
|
number_of_disks,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
|
||||||
|
self.block().write(writer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Zip64CentralDirectoryEnd {
|
#[derive(Copy, Clone)]
|
||||||
|
#[repr(packed)]
|
||||||
|
pub(crate) struct Zip64CDEBlock {
|
||||||
|
magic: Magic,
|
||||||
|
pub record_size: u64,
|
||||||
|
pub version_made_by: u16,
|
||||||
|
pub version_needed_to_extract: u16,
|
||||||
|
pub disk_number: u32,
|
||||||
|
pub disk_with_central_directory: u32,
|
||||||
|
pub number_of_files_on_this_disk: u64,
|
||||||
|
pub number_of_files: u64,
|
||||||
|
pub central_directory_size: u64,
|
||||||
|
pub central_directory_offset: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Block for Zip64CDEBlock {
|
||||||
|
const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE;
|
||||||
|
|
||||||
|
fn magic(self) -> Magic {
|
||||||
|
self.magic
|
||||||
|
}
|
||||||
|
|
||||||
|
const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header");
|
||||||
|
|
||||||
|
to_and_from_le![
|
||||||
|
(magic, Magic),
|
||||||
|
(record_size, u64),
|
||||||
|
(version_made_by, u16),
|
||||||
|
(version_needed_to_extract, u16),
|
||||||
|
(disk_number, u32),
|
||||||
|
(disk_with_central_directory, u32),
|
||||||
|
(number_of_files_on_this_disk, u64),
|
||||||
|
(number_of_files, u64),
|
||||||
|
(central_directory_size, u64),
|
||||||
|
(central_directory_offset, u64),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct Zip64CentralDirectoryEnd {
|
||||||
pub version_made_by: u16,
|
pub version_made_by: u16,
|
||||||
pub version_needed_to_extract: u16,
|
pub version_needed_to_extract: u16,
|
||||||
pub disk_number: u32,
|
pub disk_number: u32,
|
||||||
|
@ -153,56 +503,105 @@ pub struct Zip64CentralDirectoryEnd {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Zip64CentralDirectoryEnd {
|
impl Zip64CentralDirectoryEnd {
|
||||||
|
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip64CentralDirectoryEnd> {
|
||||||
|
let Zip64CDEBlock {
|
||||||
|
// record_size,
|
||||||
|
version_made_by,
|
||||||
|
version_needed_to_extract,
|
||||||
|
disk_number,
|
||||||
|
disk_with_central_directory,
|
||||||
|
number_of_files_on_this_disk,
|
||||||
|
number_of_files,
|
||||||
|
central_directory_size,
|
||||||
|
central_directory_offset,
|
||||||
|
..
|
||||||
|
} = Zip64CDEBlock::parse(reader)?;
|
||||||
|
Ok(Self {
|
||||||
|
version_made_by,
|
||||||
|
version_needed_to_extract,
|
||||||
|
disk_number,
|
||||||
|
disk_with_central_directory,
|
||||||
|
number_of_files_on_this_disk,
|
||||||
|
number_of_files,
|
||||||
|
central_directory_size,
|
||||||
|
central_directory_offset,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub fn find_and_parse<T: Read + Seek>(
|
pub fn find_and_parse<T: Read + Seek>(
|
||||||
reader: &mut T,
|
reader: &mut T,
|
||||||
nominal_offset: u64,
|
search_lower_bound: u64,
|
||||||
search_upper_bound: u64,
|
search_upper_bound: u64,
|
||||||
) -> ZipResult<Vec<(Zip64CentralDirectoryEnd, u64)>> {
|
) -> ZipResult<Vec<(Zip64CentralDirectoryEnd, u64)>> {
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
let mut pos = search_upper_bound;
|
|
||||||
|
|
||||||
while pos >= nominal_offset {
|
const END_WINDOW_SIZE: usize = 2048;
|
||||||
let mut have_signature = false;
|
/* TODO: use static_assertions!() */
|
||||||
reader.seek(io::SeekFrom::Start(pos))?;
|
debug_assert!(END_WINDOW_SIZE > mem::size_of::<Magic>());
|
||||||
if reader.read_u32_le()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE {
|
|
||||||
have_signature = true;
|
|
||||||
let archive_offset = pos - nominal_offset;
|
|
||||||
|
|
||||||
let _record_size = reader.read_u64_le()?;
|
const SIG_BYTES: [u8; mem::size_of::<Magic>()] =
|
||||||
// We would use this value if we did anything with the "zip64 extensible data sector".
|
Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
|
||||||
|
let finder = FinderRev::new(&SIG_BYTES);
|
||||||
|
|
||||||
let version_made_by = reader.read_u16_le()?;
|
let mut window_start: u64 = search_upper_bound
|
||||||
let version_needed_to_extract = reader.read_u16_le()?;
|
.saturating_sub(END_WINDOW_SIZE as u64)
|
||||||
let disk_number = reader.read_u32_le()?;
|
.max(search_lower_bound);
|
||||||
let disk_with_central_directory = reader.read_u32_le()?;
|
let mut window = [0u8; END_WINDOW_SIZE];
|
||||||
let number_of_files_on_this_disk = reader.read_u64_le()?;
|
while window_start >= search_lower_bound {
|
||||||
let number_of_files = reader.read_u64_le()?;
|
reader.seek(io::SeekFrom::Start(window_start))?;
|
||||||
let central_directory_size = reader.read_u64_le()?;
|
|
||||||
let central_directory_offset = reader.read_u64_le()?;
|
|
||||||
|
|
||||||
results.push((
|
/* Identify how many bytes to read (this may be less than the window size for files
|
||||||
Zip64CentralDirectoryEnd {
|
* smaller than END_WINDOW_SIZE). */
|
||||||
version_made_by,
|
let end = (window_start + END_WINDOW_SIZE as u64).min(search_upper_bound);
|
||||||
version_needed_to_extract,
|
|
||||||
disk_number,
|
debug_assert!(end >= window_start);
|
||||||
disk_with_central_directory,
|
let cur_len = (end - window_start) as usize;
|
||||||
number_of_files_on_this_disk,
|
if cur_len == 0 {
|
||||||
number_of_files,
|
break;
|
||||||
central_directory_size,
|
|
||||||
central_directory_offset,
|
|
||||||
},
|
|
||||||
archive_offset,
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
pos = match pos.checked_sub(if have_signature {
|
debug_assert!(cur_len <= END_WINDOW_SIZE);
|
||||||
size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE) as u64
|
let cur_window: &mut [u8] = &mut window[..cur_len];
|
||||||
} else {
|
/* Read the window into the bytes! */
|
||||||
1
|
reader.read_exact(cur_window)?;
|
||||||
}) {
|
|
||||||
None => break,
|
/* Find instances of the magic signature. */
|
||||||
Some(p) => p,
|
for offset in finder.rfind_iter(cur_window) {
|
||||||
|
let cde_start_pos = window_start + offset as u64;
|
||||||
|
reader.seek(io::SeekFrom::Start(cde_start_pos))?;
|
||||||
|
|
||||||
|
debug_assert!(cde_start_pos >= search_lower_bound);
|
||||||
|
let archive_offset = cde_start_pos - search_lower_bound;
|
||||||
|
let cde = Self::parse(reader)?;
|
||||||
|
|
||||||
|
results.push((cde, archive_offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We always want to make sure we go allllll the way back to the start of the file if
|
||||||
|
* we can't find it elsewhere. However, our `while` condition doesn't check that. So we
|
||||||
|
* avoid infinite looping by checking at the end of the loop. */
|
||||||
|
if window_start == search_lower_bound {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that
|
||||||
|
* overlap our nice neat window boundaries! */
|
||||||
|
window_start = (window_start
|
||||||
|
/* NB: To catch matches across window boundaries, we need to make our blocks overlap
|
||||||
|
* by the width of the pattern to match. */
|
||||||
|
+ mem::size_of::<Magic>() as u64)
|
||||||
|
/* This may never happen, but make sure we don't go past the end of the specified
|
||||||
|
* range. */
|
||||||
|
.min(search_upper_bound);
|
||||||
|
window_start = window_start
|
||||||
|
.saturating_sub(
|
||||||
|
/* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at
|
||||||
|
* once (unless limited by search_upper_bound). */
|
||||||
|
END_WINDOW_SIZE as u64,
|
||||||
|
)
|
||||||
|
/* This will never go below the value of `search_lower_bound`, so we have a special
|
||||||
|
* `if window_start == search_lower_bound` check above. */
|
||||||
|
.max(search_lower_bound);
|
||||||
}
|
}
|
||||||
|
|
||||||
if results.is_empty() {
|
if results.is_empty() {
|
||||||
Err(ZipError::InvalidArchive(
|
Err(ZipError::InvalidArchive(
|
||||||
"Could not find ZIP64 central directory end",
|
"Could not find ZIP64 central directory end",
|
||||||
|
@ -212,18 +611,34 @@ impl Zip64CentralDirectoryEnd {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> {
|
pub fn block(self) -> Zip64CDEBlock {
|
||||||
writer.write_u32_le(ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE)?;
|
let Self {
|
||||||
writer.write_u64_le(44)?; // record size
|
version_made_by,
|
||||||
writer.write_u16_le(self.version_made_by)?;
|
version_needed_to_extract,
|
||||||
writer.write_u16_le(self.version_needed_to_extract)?;
|
disk_number,
|
||||||
writer.write_u32_le(self.disk_number)?;
|
disk_with_central_directory,
|
||||||
writer.write_u32_le(self.disk_with_central_directory)?;
|
number_of_files_on_this_disk,
|
||||||
writer.write_u64_le(self.number_of_files_on_this_disk)?;
|
number_of_files,
|
||||||
writer.write_u64_le(self.number_of_files)?;
|
central_directory_size,
|
||||||
writer.write_u64_le(self.central_directory_size)?;
|
central_directory_offset,
|
||||||
writer.write_u64_le(self.central_directory_offset)?;
|
} = self;
|
||||||
Ok(())
|
Zip64CDEBlock {
|
||||||
|
magic: Zip64CDEBlock::MAGIC,
|
||||||
|
/* currently unused */
|
||||||
|
record_size: 44,
|
||||||
|
version_made_by,
|
||||||
|
version_needed_to_extract,
|
||||||
|
disk_number,
|
||||||
|
disk_with_central_directory,
|
||||||
|
number_of_files_on_this_disk,
|
||||||
|
number_of_files,
|
||||||
|
central_directory_size,
|
||||||
|
central_directory_offset,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
|
||||||
|
self.block().write(writer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -280,3 +695,42 @@ pub(crate) fn path_to_string<T: AsRef<Path>>(path: T) -> Box<str> {
|
||||||
maybe_original.unwrap().into()
|
maybe_original.unwrap().into()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
use std::io::Cursor;
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
|
#[repr(packed)]
|
||||||
|
pub struct TestBlock {
|
||||||
|
magic: Magic,
|
||||||
|
pub file_name_length: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Block for TestBlock {
|
||||||
|
const MAGIC: Magic = Magic::literal(0x01111);
|
||||||
|
|
||||||
|
fn magic(self) -> Magic {
|
||||||
|
self.magic
|
||||||
|
}
|
||||||
|
|
||||||
|
const ERROR: ZipError = ZipError::InvalidArchive("unreachable");
|
||||||
|
|
||||||
|
to_and_from_le![(magic, Magic), (file_name_length, u16)];
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Demonstrate that a block object can be safely written to memory and deserialized back out.
|
||||||
|
#[test]
|
||||||
|
fn block_serde() {
|
||||||
|
let block = TestBlock {
|
||||||
|
magic: TestBlock::MAGIC,
|
||||||
|
file_name_length: 3,
|
||||||
|
};
|
||||||
|
let mut c = Cursor::new(Vec::new());
|
||||||
|
block.write(&mut c).unwrap();
|
||||||
|
c.set_position(0);
|
||||||
|
let block2 = TestBlock::parse(&mut c).unwrap();
|
||||||
|
assert_eq!(block, block2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
429
src/types.rs
429
src/types.rs
|
@ -1,13 +1,17 @@
|
||||||
//! Types that specify what is contained in a ZIP.
|
//! Types that specify what is contained in a ZIP.
|
||||||
|
use crate::cp437::FromCp437;
|
||||||
|
use crate::write::{FileOptionExtension, FileOptions};
|
||||||
use path::{Component, Path, PathBuf};
|
use path::{Component, Path, PathBuf};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
use std::mem;
|
||||||
use std::path;
|
use std::path;
|
||||||
use std::sync::{Arc, OnceLock};
|
use std::sync::{Arc, OnceLock};
|
||||||
|
|
||||||
#[cfg(feature = "chrono")]
|
#[cfg(feature = "chrono")]
|
||||||
use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike};
|
use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike};
|
||||||
#[cfg(doc)]
|
|
||||||
use {crate::read::ZipFile, crate::write::FileOptions};
|
use crate::result::{ZipError, ZipResult};
|
||||||
|
use crate::spec::{self, Block};
|
||||||
|
|
||||||
pub(crate) mod ffi {
|
pub(crate) mod ffi {
|
||||||
pub const S_IFDIR: u32 = 0o0040000;
|
pub const S_IFDIR: u32 = 0o0040000;
|
||||||
|
@ -23,6 +27,12 @@ use crate::CompressionMethod;
|
||||||
#[cfg(feature = "time")]
|
#[cfg(feature = "time")]
|
||||||
use time::{error::ComponentRange, Date, Month, OffsetDateTime, PrimitiveDateTime, Time};
|
use time::{error::ComponentRange, Date, Month, OffsetDateTime, PrimitiveDateTime, Time};
|
||||||
|
|
||||||
|
pub(crate) struct ZipRawValues {
|
||||||
|
pub(crate) crc32: u32,
|
||||||
|
pub(crate) compressed_size: u64,
|
||||||
|
pub(crate) uncompressed_size: u64,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
#[repr(u8)]
|
#[repr(u8)]
|
||||||
pub enum System {
|
pub enum System {
|
||||||
|
@ -58,7 +68,7 @@ impl From<System> for u8 {
|
||||||
/// For example, it has a resolution of 2 seconds!
|
/// For example, it has a resolution of 2 seconds!
|
||||||
///
|
///
|
||||||
/// A [`DateTime`] can be stored directly in a zipfile with [`FileOptions::last_modified_time`],
|
/// A [`DateTime`] can be stored directly in a zipfile with [`FileOptions::last_modified_time`],
|
||||||
/// or read from one with [`ZipFile::last_modified`]
|
/// or read from one with [`ZipFile::last_modified`](crate::read::ZipFile::last_modified).
|
||||||
///
|
///
|
||||||
/// # Warning
|
/// # Warning
|
||||||
///
|
///
|
||||||
|
@ -562,6 +572,419 @@ impl ZipFileData {
|
||||||
.map(|v| v.len())
|
.map(|v| v.len())
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
pub(crate) fn initialize_local_block<S, T: FileOptionExtension>(
|
||||||
|
name: S,
|
||||||
|
options: &FileOptions<T>,
|
||||||
|
raw_values: ZipRawValues,
|
||||||
|
header_start: u64,
|
||||||
|
extra_data_start: Option<u64>,
|
||||||
|
aes_extra_data_start: u64,
|
||||||
|
compression_method: crate::compression::CompressionMethod,
|
||||||
|
aes_mode: Option<(AesMode, AesVendorVersion, CompressionMethod)>,
|
||||||
|
extra_field: Option<Arc<Vec<u8>>>,
|
||||||
|
) -> Self
|
||||||
|
where
|
||||||
|
S: Into<Box<str>>,
|
||||||
|
{
|
||||||
|
let permissions = options.permissions.unwrap_or(0o100644);
|
||||||
|
let file_name: Box<str> = name.into();
|
||||||
|
let file_name_raw: Box<[u8]> = file_name.bytes().collect();
|
||||||
|
ZipFileData {
|
||||||
|
system: System::Unix,
|
||||||
|
version_made_by: DEFAULT_VERSION,
|
||||||
|
encrypted: options.encrypt_with.is_some(),
|
||||||
|
using_data_descriptor: false,
|
||||||
|
compression_method,
|
||||||
|
compression_level: options.compression_level,
|
||||||
|
last_modified_time: Some(options.last_modified_time),
|
||||||
|
crc32: raw_values.crc32,
|
||||||
|
compressed_size: raw_values.compressed_size,
|
||||||
|
uncompressed_size: raw_values.uncompressed_size,
|
||||||
|
file_name, // Never used for saving, but used as map key in insert_file_data()
|
||||||
|
file_name_raw,
|
||||||
|
extra_field,
|
||||||
|
central_extra_field: options.extended_options.central_extra_data().cloned(),
|
||||||
|
file_comment: String::with_capacity(0).into_boxed_str(),
|
||||||
|
header_start,
|
||||||
|
data_start: OnceLock::new(),
|
||||||
|
central_header_start: 0,
|
||||||
|
external_attributes: permissions << 16,
|
||||||
|
large_file: options.large_file,
|
||||||
|
aes_mode,
|
||||||
|
extra_fields: Vec::new(),
|
||||||
|
extra_data_start,
|
||||||
|
aes_extra_data_start,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn from_local_block<R: std::io::Read>(
|
||||||
|
block: ZipLocalEntryBlock,
|
||||||
|
reader: &mut R,
|
||||||
|
) -> ZipResult<Self> {
|
||||||
|
let ZipLocalEntryBlock {
|
||||||
|
// magic,
|
||||||
|
version_made_by,
|
||||||
|
flags,
|
||||||
|
compression_method,
|
||||||
|
last_mod_time,
|
||||||
|
last_mod_date,
|
||||||
|
crc32,
|
||||||
|
compressed_size,
|
||||||
|
uncompressed_size,
|
||||||
|
file_name_length,
|
||||||
|
extra_field_length,
|
||||||
|
..
|
||||||
|
} = block;
|
||||||
|
|
||||||
|
let encrypted: bool = flags & 1 == 1;
|
||||||
|
if encrypted {
|
||||||
|
return Err(ZipError::UnsupportedArchive(
|
||||||
|
"Encrypted files are not supported",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* FIXME: these were previously incorrect: add testing! */
|
||||||
|
/* flags & (1 << 3) != 0 */
|
||||||
|
let using_data_descriptor: bool = flags & (1 << 3) == 1 << 3;
|
||||||
|
if using_data_descriptor {
|
||||||
|
return Err(ZipError::UnsupportedArchive(
|
||||||
|
"The file length is not available in the local header",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* flags & (1 << 1) != 0 */
|
||||||
|
let is_utf8: bool = flags & (1 << 11) != 0;
|
||||||
|
let compression_method = crate::CompressionMethod::parse_from_u16(compression_method);
|
||||||
|
let file_name_length: usize = file_name_length.into();
|
||||||
|
let extra_field_length: usize = extra_field_length.into();
|
||||||
|
|
||||||
|
let mut file_name_raw = vec![0u8; file_name_length];
|
||||||
|
reader.read_exact(&mut file_name_raw)?;
|
||||||
|
let mut extra_field = vec![0u8; extra_field_length];
|
||||||
|
reader.read_exact(&mut extra_field)?;
|
||||||
|
|
||||||
|
let file_name: Box<str> = match is_utf8 {
|
||||||
|
true => String::from_utf8_lossy(&file_name_raw).into(),
|
||||||
|
false => file_name_raw.clone().from_cp437().into(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let system: u8 = (version_made_by >> 8).try_into().unwrap();
|
||||||
|
Ok(ZipFileData {
|
||||||
|
system: System::from(system),
|
||||||
|
/* NB: this strips the top 8 bits! */
|
||||||
|
version_made_by: version_made_by as u8,
|
||||||
|
encrypted,
|
||||||
|
using_data_descriptor,
|
||||||
|
compression_method,
|
||||||
|
compression_level: None,
|
||||||
|
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
|
||||||
|
crc32,
|
||||||
|
compressed_size: compressed_size.into(),
|
||||||
|
uncompressed_size: uncompressed_size.into(),
|
||||||
|
file_name,
|
||||||
|
file_name_raw: file_name_raw.into(),
|
||||||
|
extra_field: Some(Arc::new(extra_field)),
|
||||||
|
central_extra_field: None,
|
||||||
|
file_comment: String::with_capacity(0).into_boxed_str(), // file comment is only available in the central directory
|
||||||
|
// header_start and data start are not available, but also don't matter, since seeking is
|
||||||
|
// not available.
|
||||||
|
header_start: 0,
|
||||||
|
data_start: OnceLock::new(),
|
||||||
|
central_header_start: 0,
|
||||||
|
// The external_attributes field is only available in the central directory.
|
||||||
|
// We set this to zero, which should be valid as the docs state 'If input came
|
||||||
|
// from standard input, this field is set to zero.'
|
||||||
|
external_attributes: 0,
|
||||||
|
large_file: false,
|
||||||
|
aes_mode: None,
|
||||||
|
extra_fields: Vec::new(),
|
||||||
|
extra_data_start: None,
|
||||||
|
aes_extra_data_start: 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_utf8(&self) -> bool {
|
||||||
|
std::str::from_utf8(&self.file_name_raw).is_ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_ascii(&self) -> bool {
|
||||||
|
self.file_name_raw.is_ascii()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flags(&self) -> u16 {
|
||||||
|
let utf8_bit: u16 = if self.is_utf8() && !self.is_ascii() {
|
||||||
|
1u16 << 11
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
let encrypted_bit: u16 = if self.encrypted { 1u16 << 0 } else { 0 };
|
||||||
|
|
||||||
|
utf8_bit | encrypted_bit
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clamp_size_field(&self, field: u64) -> u32 {
|
||||||
|
if self.large_file {
|
||||||
|
spec::ZIP64_BYTES_THR as u32
|
||||||
|
} else {
|
||||||
|
field.min(spec::ZIP64_BYTES_THR).try_into().unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn local_block(&self) -> ZipResult<ZipLocalEntryBlock> {
|
||||||
|
let compressed_size: u32 = self.clamp_size_field(self.compressed_size);
|
||||||
|
let uncompressed_size: u32 = self.clamp_size_field(self.uncompressed_size);
|
||||||
|
|
||||||
|
let extra_block_len: usize = self
|
||||||
|
.zip64_extra_field_block()
|
||||||
|
.map(|block| block.full_size())
|
||||||
|
.unwrap_or(0);
|
||||||
|
let extra_field_length: u16 = (self.extra_field_len() + extra_block_len)
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| ZipError::InvalidArchive("Extra data field is too large"))?;
|
||||||
|
|
||||||
|
let last_modified_time = self
|
||||||
|
.last_modified_time
|
||||||
|
.unwrap_or_else(DateTime::default_for_write);
|
||||||
|
Ok(ZipLocalEntryBlock {
|
||||||
|
magic: ZipLocalEntryBlock::MAGIC,
|
||||||
|
version_made_by: self.version_needed(),
|
||||||
|
flags: self.flags(),
|
||||||
|
compression_method: self.compression_method.serialize_to_u16(),
|
||||||
|
last_mod_time: last_modified_time.timepart(),
|
||||||
|
last_mod_date: last_modified_time.datepart(),
|
||||||
|
crc32: self.crc32,
|
||||||
|
compressed_size,
|
||||||
|
uncompressed_size,
|
||||||
|
file_name_length: self.file_name_raw.len().try_into().unwrap(),
|
||||||
|
extra_field_length,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn block(&self, zip64_extra_field_length: u16) -> ZipCentralEntryBlock {
|
||||||
|
let extra_field_len: u16 = self.extra_field_len().try_into().unwrap();
|
||||||
|
let central_extra_field_len: u16 = self.central_extra_field_len().try_into().unwrap();
|
||||||
|
let last_modified_time = self
|
||||||
|
.last_modified_time
|
||||||
|
.unwrap_or_else(DateTime::default_for_write);
|
||||||
|
ZipCentralEntryBlock {
|
||||||
|
magic: ZipCentralEntryBlock::MAGIC,
|
||||||
|
version_made_by: (self.system as u16) << 8 | (self.version_made_by as u16),
|
||||||
|
version_to_extract: self.version_needed(),
|
||||||
|
flags: self.flags(),
|
||||||
|
compression_method: self.compression_method.serialize_to_u16(),
|
||||||
|
last_mod_time: last_modified_time.timepart(),
|
||||||
|
last_mod_date: last_modified_time.datepart(),
|
||||||
|
crc32: self.crc32,
|
||||||
|
compressed_size: self
|
||||||
|
.compressed_size
|
||||||
|
.min(spec::ZIP64_BYTES_THR)
|
||||||
|
.try_into()
|
||||||
|
.unwrap(),
|
||||||
|
uncompressed_size: self
|
||||||
|
.uncompressed_size
|
||||||
|
.min(spec::ZIP64_BYTES_THR)
|
||||||
|
.try_into()
|
||||||
|
.unwrap(),
|
||||||
|
file_name_length: self.file_name_raw.len().try_into().unwrap(),
|
||||||
|
extra_field_length: zip64_extra_field_length
|
||||||
|
+ extra_field_len
|
||||||
|
+ central_extra_field_len,
|
||||||
|
file_comment_length: self.file_comment.as_bytes().len().try_into().unwrap(),
|
||||||
|
disk_number: 0,
|
||||||
|
internal_file_attributes: 0,
|
||||||
|
external_file_attributes: self.external_attributes,
|
||||||
|
offset: self
|
||||||
|
.header_start
|
||||||
|
.min(spec::ZIP64_BYTES_THR)
|
||||||
|
.try_into()
|
||||||
|
.unwrap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn zip64_extra_field_block(&self) -> Option<Zip64ExtraFieldBlock> {
|
||||||
|
let uncompressed_size: Option<u64> =
|
||||||
|
if self.uncompressed_size >= spec::ZIP64_BYTES_THR || self.large_file {
|
||||||
|
Some(spec::ZIP64_BYTES_THR)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
let compressed_size: Option<u64> =
|
||||||
|
if self.compressed_size >= spec::ZIP64_BYTES_THR || self.large_file {
|
||||||
|
Some(spec::ZIP64_BYTES_THR)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
let header_start: Option<u64> = if self.header_start >= spec::ZIP64_BYTES_THR {
|
||||||
|
Some(spec::ZIP64_BYTES_THR)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut size: u16 = 0;
|
||||||
|
if uncompressed_size.is_some() {
|
||||||
|
size += mem::size_of::<u64>() as u16;
|
||||||
|
}
|
||||||
|
if compressed_size.is_some() {
|
||||||
|
size += mem::size_of::<u64>() as u16;
|
||||||
|
}
|
||||||
|
if header_start.is_some() {
|
||||||
|
size += mem::size_of::<u64>() as u16;
|
||||||
|
}
|
||||||
|
if size == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Zip64ExtraFieldBlock {
|
||||||
|
magic: spec::ExtraFieldMagic::ZIP64_EXTRA_FIELD_TAG,
|
||||||
|
size,
|
||||||
|
uncompressed_size,
|
||||||
|
compressed_size,
|
||||||
|
header_start,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
#[repr(packed)]
|
||||||
|
pub(crate) struct ZipCentralEntryBlock {
|
||||||
|
magic: spec::Magic,
|
||||||
|
pub version_made_by: u16,
|
||||||
|
pub version_to_extract: u16,
|
||||||
|
pub flags: u16,
|
||||||
|
pub compression_method: u16,
|
||||||
|
pub last_mod_time: u16,
|
||||||
|
pub last_mod_date: u16,
|
||||||
|
pub crc32: u32,
|
||||||
|
pub compressed_size: u32,
|
||||||
|
pub uncompressed_size: u32,
|
||||||
|
pub file_name_length: u16,
|
||||||
|
pub extra_field_length: u16,
|
||||||
|
pub file_comment_length: u16,
|
||||||
|
pub disk_number: u16,
|
||||||
|
pub internal_file_attributes: u16,
|
||||||
|
pub external_file_attributes: u32,
|
||||||
|
pub offset: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Block for ZipCentralEntryBlock {
|
||||||
|
const MAGIC: spec::Magic = spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn magic(self) -> spec::Magic {
|
||||||
|
self.magic
|
||||||
|
}
|
||||||
|
|
||||||
|
const ERROR: ZipError = ZipError::InvalidArchive("Invalid Central Directory header");
|
||||||
|
|
||||||
|
to_and_from_le![
|
||||||
|
(magic, spec::Magic),
|
||||||
|
(version_made_by, u16),
|
||||||
|
(version_to_extract, u16),
|
||||||
|
(flags, u16),
|
||||||
|
(compression_method, u16),
|
||||||
|
(last_mod_time, u16),
|
||||||
|
(last_mod_date, u16),
|
||||||
|
(crc32, u32),
|
||||||
|
(compressed_size, u32),
|
||||||
|
(uncompressed_size, u32),
|
||||||
|
(file_name_length, u16),
|
||||||
|
(extra_field_length, u16),
|
||||||
|
(file_comment_length, u16),
|
||||||
|
(disk_number, u16),
|
||||||
|
(internal_file_attributes, u16),
|
||||||
|
(external_file_attributes, u32),
|
||||||
|
(offset, u32),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
#[repr(packed)]
|
||||||
|
pub(crate) struct ZipLocalEntryBlock {
|
||||||
|
magic: spec::Magic,
|
||||||
|
pub version_made_by: u16,
|
||||||
|
pub flags: u16,
|
||||||
|
pub compression_method: u16,
|
||||||
|
pub last_mod_time: u16,
|
||||||
|
pub last_mod_date: u16,
|
||||||
|
pub crc32: u32,
|
||||||
|
pub compressed_size: u32,
|
||||||
|
pub uncompressed_size: u32,
|
||||||
|
pub file_name_length: u16,
|
||||||
|
pub extra_field_length: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Block for ZipLocalEntryBlock {
|
||||||
|
const MAGIC: spec::Magic = spec::Magic::LOCAL_FILE_HEADER_SIGNATURE;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn magic(self) -> spec::Magic {
|
||||||
|
self.magic
|
||||||
|
}
|
||||||
|
|
||||||
|
const ERROR: ZipError = ZipError::InvalidArchive("Invalid local file header");
|
||||||
|
|
||||||
|
to_and_from_le![
|
||||||
|
(magic, spec::Magic),
|
||||||
|
(version_made_by, u16),
|
||||||
|
(flags, u16),
|
||||||
|
(compression_method, u16),
|
||||||
|
(last_mod_time, u16),
|
||||||
|
(last_mod_date, u16),
|
||||||
|
(crc32, u32),
|
||||||
|
(compressed_size, u32),
|
||||||
|
(uncompressed_size, u32),
|
||||||
|
(file_name_length, u16),
|
||||||
|
(extra_field_length, u16),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
pub(crate) struct Zip64ExtraFieldBlock {
|
||||||
|
magic: spec::ExtraFieldMagic,
|
||||||
|
size: u16,
|
||||||
|
uncompressed_size: Option<u64>,
|
||||||
|
compressed_size: Option<u64>,
|
||||||
|
header_start: Option<u64>,
|
||||||
|
// Excluded fields:
|
||||||
|
// u32: disk start number
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Zip64ExtraFieldBlock {
|
||||||
|
pub fn full_size(&self) -> usize {
|
||||||
|
assert!(self.size > 0);
|
||||||
|
self.size as usize + mem::size_of::<spec::ExtraFieldMagic>() + mem::size_of::<u16>()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn serialize(self) -> Box<[u8]> {
|
||||||
|
let Self {
|
||||||
|
magic,
|
||||||
|
size,
|
||||||
|
uncompressed_size,
|
||||||
|
compressed_size,
|
||||||
|
header_start,
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
let full_size = self.full_size();
|
||||||
|
|
||||||
|
let mut ret = Vec::with_capacity(full_size);
|
||||||
|
ret.extend(magic.to_le_bytes());
|
||||||
|
ret.extend(u16::to_le_bytes(size));
|
||||||
|
|
||||||
|
if let Some(uncompressed_size) = uncompressed_size {
|
||||||
|
ret.extend(u64::to_le_bytes(uncompressed_size));
|
||||||
|
}
|
||||||
|
if let Some(compressed_size) = compressed_size {
|
||||||
|
ret.extend(u64::to_le_bytes(compressed_size));
|
||||||
|
}
|
||||||
|
if let Some(header_start) = header_start {
|
||||||
|
ret.extend(u64::to_le_bytes(header_start));
|
||||||
|
}
|
||||||
|
debug_assert_eq!(ret.len(), full_size);
|
||||||
|
|
||||||
|
ret.into_boxed_slice()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The encryption specification used to encrypt a file with AES.
|
/// The encryption specification used to encrypt a file with AES.
|
||||||
|
|
257
src/write.rs
257
src/write.rs
|
@ -5,10 +5,12 @@ use crate::aes::AesWriter;
|
||||||
use crate::compression::CompressionMethod;
|
use crate::compression::CompressionMethod;
|
||||||
use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader};
|
use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader};
|
||||||
use crate::result::{ZipError, ZipResult};
|
use crate::result::{ZipError, ZipResult};
|
||||||
use crate::spec;
|
use crate::spec::{self, Block};
|
||||||
#[cfg(feature = "aes-crypto")]
|
#[cfg(feature = "aes-crypto")]
|
||||||
use crate::types::AesMode;
|
use crate::types::AesMode;
|
||||||
use crate::types::{ffi, AesVendorVersion, DateTime, System, ZipFileData, DEFAULT_VERSION};
|
use crate::types::{
|
||||||
|
ffi, AesVendorVersion, DateTime, ZipFileData, ZipLocalEntryBlock, ZipRawValues, DEFAULT_VERSION,
|
||||||
|
};
|
||||||
use crate::write::ffi::S_IFLNK;
|
use crate::write::ffi::S_IFLNK;
|
||||||
#[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))]
|
#[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))]
|
||||||
use core::num::NonZeroU64;
|
use core::num::NonZeroU64;
|
||||||
|
@ -22,7 +24,7 @@ use std::io::{BufReader, SeekFrom};
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::str::{from_utf8, Utf8Error};
|
use std::str::{from_utf8, Utf8Error};
|
||||||
use std::sync::{Arc, OnceLock};
|
use std::sync::Arc;
|
||||||
|
|
||||||
#[cfg(any(
|
#[cfg(any(
|
||||||
feature = "deflate",
|
feature = "deflate",
|
||||||
|
@ -147,11 +149,6 @@ struct ZipWriterStats {
|
||||||
bytes_written: u64,
|
bytes_written: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ZipRawValues {
|
|
||||||
crc32: u32,
|
|
||||||
compressed_size: u64,
|
|
||||||
uncompressed_size: u64,
|
|
||||||
}
|
|
||||||
mod sealed {
|
mod sealed {
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
@ -188,7 +185,7 @@ mod sealed {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Copy, Clone, Debug)]
|
||||||
enum EncryptWith<'k> {
|
pub(crate) enum EncryptWith<'k> {
|
||||||
#[cfg(feature = "aes-crypto")]
|
#[cfg(feature = "aes-crypto")]
|
||||||
Aes {
|
Aes {
|
||||||
mode: AesMode,
|
mode: AesMode,
|
||||||
|
@ -223,9 +220,9 @@ pub struct FileOptions<'k, T: FileOptionExtension> {
|
||||||
pub(crate) last_modified_time: DateTime,
|
pub(crate) last_modified_time: DateTime,
|
||||||
pub(crate) permissions: Option<u32>,
|
pub(crate) permissions: Option<u32>,
|
||||||
pub(crate) large_file: bool,
|
pub(crate) large_file: bool,
|
||||||
encrypt_with: Option<EncryptWith<'k>>,
|
pub(crate) encrypt_with: Option<EncryptWith<'k>>,
|
||||||
extended_options: T,
|
pub(crate) extended_options: T,
|
||||||
alignment: u16,
|
pub(crate) alignment: u16,
|
||||||
#[cfg(feature = "deflate-zopfli")]
|
#[cfg(feature = "deflate-zopfli")]
|
||||||
pub(super) zopfli_buffer_size: Option<usize>,
|
pub(super) zopfli_buffer_size: Option<usize>,
|
||||||
}
|
}
|
||||||
|
@ -509,7 +506,8 @@ impl ZipWriterStats {
|
||||||
impl<A: Read + Write + Seek> ZipWriter<A> {
|
impl<A: Read + Write + Seek> ZipWriter<A> {
|
||||||
/// Initializes the archive from an existing ZIP archive, making it ready for append.
|
/// Initializes the archive from an existing ZIP archive, making it ready for append.
|
||||||
pub fn new_append(mut readwriter: A) -> ZipResult<ZipWriter<A>> {
|
pub fn new_append(mut readwriter: A) -> ZipResult<ZipWriter<A>> {
|
||||||
let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut readwriter)?;
|
let (footer, cde_start_pos) =
|
||||||
|
spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?;
|
||||||
let metadata = ZipArchive::get_metadata(&mut readwriter, &footer, cde_start_pos)?;
|
let metadata = ZipArchive::get_metadata(&mut readwriter, &footer, cde_start_pos)?;
|
||||||
|
|
||||||
Ok(ZipWriter {
|
Ok(ZipWriter {
|
||||||
|
@ -704,7 +702,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
/// Set ZIP archive comment.
|
/// Set ZIP archive comment.
|
||||||
///
|
///
|
||||||
/// This sets the raw bytes of the comment. The comment
|
/// This sets the raw bytes of the comment. The comment
|
||||||
/// is typically expected to be encoded in UTF-8
|
/// is typically expected to be encoded in UTF-8.
|
||||||
pub fn set_raw_comment(&mut self, comment: Box<[u8]>) {
|
pub fn set_raw_comment(&mut self, comment: Box<[u8]>) {
|
||||||
self.comment = comment;
|
self.comment = comment;
|
||||||
}
|
}
|
||||||
|
@ -717,7 +715,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
/// Get ZIP archive comment.
|
/// Get ZIP archive comment.
|
||||||
///
|
///
|
||||||
/// This returns the raw bytes of the comment. The comment
|
/// This returns the raw bytes of the comment. The comment
|
||||||
/// is typically expected to be encoded in UTF-8
|
/// is typically expected to be encoded in UTF-8.
|
||||||
pub const fn get_raw_comment(&self) -> &[u8] {
|
pub const fn get_raw_comment(&self) -> &[u8] {
|
||||||
&self.comment
|
&self.comment
|
||||||
}
|
}
|
||||||
|
@ -777,7 +775,6 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
{
|
{
|
||||||
let header_start = self.inner.get_plain().stream_position()?;
|
let header_start = self.inner.get_plain().stream_position()?;
|
||||||
|
|
||||||
let permissions = options.permissions.unwrap_or(0o100644);
|
|
||||||
let (compression_method, aes_mode) = match options.encrypt_with {
|
let (compression_method, aes_mode) = match options.encrypt_with {
|
||||||
#[cfg(feature = "aes-crypto")]
|
#[cfg(feature = "aes-crypto")]
|
||||||
Some(EncryptWith::Aes { mode, .. }) => (
|
Some(EncryptWith::Aes { mode, .. }) => (
|
||||||
|
@ -786,78 +783,38 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
),
|
),
|
||||||
_ => (options.compression_method, None),
|
_ => (options.compression_method, None),
|
||||||
};
|
};
|
||||||
let last_modified_time = options.last_modified_time;
|
|
||||||
let mut file = ZipFileData {
|
|
||||||
system: System::Unix,
|
|
||||||
version_made_by: DEFAULT_VERSION,
|
|
||||||
encrypted: options.encrypt_with.is_some(),
|
|
||||||
using_data_descriptor: false,
|
|
||||||
compression_method,
|
|
||||||
compression_level: options.compression_level,
|
|
||||||
last_modified_time: Some(options.last_modified_time),
|
|
||||||
crc32: raw_values.crc32,
|
|
||||||
compressed_size: raw_values.compressed_size,
|
|
||||||
uncompressed_size: raw_values.uncompressed_size,
|
|
||||||
file_name: name.to_owned().into(), // Never used for saving, but used as map key in insert_file_data()
|
|
||||||
file_name_raw: name.into().bytes().collect(),
|
|
||||||
extra_field,
|
|
||||||
central_extra_field: options.extended_options.central_extra_data().cloned(),
|
|
||||||
file_comment: String::with_capacity(0).into_boxed_str(),
|
|
||||||
header_start,
|
|
||||||
extra_data_start: None,
|
|
||||||
data_start: OnceLock::new(),
|
|
||||||
central_header_start: 0,
|
|
||||||
external_attributes: permissions << 16,
|
|
||||||
large_file: options.large_file,
|
|
||||||
aes_mode,
|
|
||||||
aes_extra_data_start,
|
|
||||||
|
|
||||||
extra_fields: Vec::new(),
|
let mut file = ZipFileData::initialize_local_block(
|
||||||
};
|
name,
|
||||||
|
&options,
|
||||||
|
raw_values,
|
||||||
|
header_start,
|
||||||
|
None,
|
||||||
|
aes_extra_data_start,
|
||||||
|
compression_method,
|
||||||
|
aes_mode,
|
||||||
|
extra_field,
|
||||||
|
);
|
||||||
let version_needed = file.version_needed();
|
let version_needed = file.version_needed();
|
||||||
file.version_made_by = file.version_made_by.max(version_needed as u8);
|
file.version_made_by = file.version_made_by.max(version_needed as u8);
|
||||||
let index = self.insert_file_data(file)?;
|
let index = self.insert_file_data(file)?;
|
||||||
let file = &mut self.files[index];
|
let file = &mut self.files[index];
|
||||||
let writer = self.inner.get_plain();
|
let writer = self.inner.get_plain();
|
||||||
// local file header signature
|
|
||||||
writer.write_u32_le(spec::LOCAL_FILE_HEADER_SIGNATURE)?;
|
let block = match file.local_block() {
|
||||||
// version needed to extract
|
Ok(block) => block,
|
||||||
writer.write_u16_le(version_needed)?;
|
Err(e) => {
|
||||||
// general purpose bit flag
|
let _ = self.abort_file();
|
||||||
let is_utf8 = std::str::from_utf8(&file.file_name_raw).is_ok();
|
return Err(e);
|
||||||
let is_ascii = file.file_name_raw.is_ascii();
|
}
|
||||||
let flag = if is_utf8 && !is_ascii { 1u16 << 11 } else { 0 }
|
};
|
||||||
| if file.encrypted { 1u16 << 0 } else { 0 };
|
match block.write(writer) {
|
||||||
writer.write_u16_le(flag)?;
|
Ok(()) => (),
|
||||||
// Compression method
|
Err(e) => {
|
||||||
#[allow(deprecated)]
|
let _ = self.abort_file();
|
||||||
writer.write_u16_le(file.compression_method.to_u16())?;
|
return Err(e);
|
||||||
// last mod file time and last mod file date
|
}
|
||||||
writer.write_u16_le(last_modified_time.timepart())?;
|
|
||||||
writer.write_u16_le(last_modified_time.datepart())?;
|
|
||||||
// crc-32
|
|
||||||
writer.write_u32_le(file.crc32)?;
|
|
||||||
// compressed size and uncompressed size
|
|
||||||
if file.large_file {
|
|
||||||
writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?;
|
|
||||||
writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?;
|
|
||||||
} else {
|
|
||||||
writer.write_u32_le(file.compressed_size as u32)?;
|
|
||||||
writer.write_u32_le(file.uncompressed_size as u32)?;
|
|
||||||
}
|
}
|
||||||
// file name length
|
|
||||||
writer.write_u16_le(file.file_name_raw.len() as u16)?;
|
|
||||||
// extra field length
|
|
||||||
let mut extra_field_length = file.extra_field_len();
|
|
||||||
if file.large_file {
|
|
||||||
extra_field_length += 20;
|
|
||||||
}
|
|
||||||
if extra_field_length + file.central_extra_field_len() > u16::MAX as usize {
|
|
||||||
let _ = self.abort_file();
|
|
||||||
return Err(InvalidArchive("Extra data field is too large"));
|
|
||||||
}
|
|
||||||
let extra_field_length = extra_field_length as u16;
|
|
||||||
writer.write_u16_le(extra_field_length)?;
|
|
||||||
// file name
|
// file name
|
||||||
writer.write_all(&file.file_name_raw)?;
|
writer.write_all(&file.file_name_raw)?;
|
||||||
// zip64 extra field
|
// zip64 extra field
|
||||||
|
@ -875,7 +832,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
if unaligned_header_bytes != 0 {
|
if unaligned_header_bytes != 0 {
|
||||||
let pad_length = (align - unaligned_header_bytes) as usize;
|
let pad_length = (align - unaligned_header_bytes) as usize;
|
||||||
let Some(new_extra_field_length) =
|
let Some(new_extra_field_length) =
|
||||||
(pad_length as u16).checked_add(extra_field_length)
|
(pad_length as u16).checked_add(block.extra_field_length)
|
||||||
else {
|
else {
|
||||||
let _ = self.abort_file();
|
let _ = self.abort_file();
|
||||||
return Err(InvalidArchive(
|
return Err(InvalidArchive(
|
||||||
|
@ -1435,7 +1392,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let number_of_files = self.files.len().min(spec::ZIP64_ENTRY_THR) as u16;
|
let number_of_files = self.files.len().min(spec::ZIP64_ENTRY_THR) as u16;
|
||||||
let footer = spec::CentralDirectoryEnd {
|
let footer = spec::Zip32CentralDirectoryEnd {
|
||||||
disk_number: 0,
|
disk_number: 0,
|
||||||
disk_with_central_directory: 0,
|
disk_with_central_directory: 0,
|
||||||
zip_file_comment: self.comment.clone(),
|
zip_file_comment: self.comment.clone(),
|
||||||
|
@ -1754,6 +1711,7 @@ fn update_aes_extra_data<W: Write + io::Seek>(
|
||||||
|
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
|
|
||||||
|
/* TODO: implement this using the Block trait! */
|
||||||
// Extra field header ID.
|
// Extra field header ID.
|
||||||
buf.write_u16_le(0x9901)?;
|
buf.write_u16_le(0x9901)?;
|
||||||
// Data size.
|
// Data size.
|
||||||
|
@ -1765,8 +1723,7 @@ fn update_aes_extra_data<W: Write + io::Seek>(
|
||||||
// AES encryption strength.
|
// AES encryption strength.
|
||||||
buf.write_all(&[aes_mode as u8])?;
|
buf.write_all(&[aes_mode as u8])?;
|
||||||
// Real compression method.
|
// Real compression method.
|
||||||
#[allow(deprecated)]
|
buf.write_u16_le(compression_method.serialize_to_u16())?;
|
||||||
buf.write_u16_le(compression_method.to_u16())?;
|
|
||||||
|
|
||||||
writer.write_all(&buf)?;
|
writer.write_all(&buf)?;
|
||||||
|
|
||||||
|
@ -1808,55 +1765,8 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData)
|
||||||
let mut zip64_extra_field = [0; 28];
|
let mut zip64_extra_field = [0; 28];
|
||||||
let zip64_extra_field_length =
|
let zip64_extra_field_length =
|
||||||
write_central_zip64_extra_field(&mut zip64_extra_field.as_mut(), file)?;
|
write_central_zip64_extra_field(&mut zip64_extra_field.as_mut(), file)?;
|
||||||
|
let block = file.block(zip64_extra_field_length);
|
||||||
// central file header signature
|
block.write(writer)?;
|
||||||
writer.write_u32_le(spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)?;
|
|
||||||
let version_needed = file.version_needed();
|
|
||||||
// version made by
|
|
||||||
let version_made_by =
|
|
||||||
(file.system as u16) << 8 | (file.version_made_by as u16).max(version_needed);
|
|
||||||
writer.write_u16_le(version_made_by)?;
|
|
||||||
// version needed to extract
|
|
||||||
writer.write_u16_le(version_needed)?;
|
|
||||||
// general puprose bit flag
|
|
||||||
let is_utf8 = std::str::from_utf8(&file.file_name_raw).is_ok();
|
|
||||||
let is_ascii = file.file_name_raw.is_ascii();
|
|
||||||
let flag = if is_utf8 && !is_ascii { 1u16 << 11 } else { 0 }
|
|
||||||
| if file.encrypted { 1u16 << 0 } else { 0 };
|
|
||||||
writer.write_u16_le(flag)?;
|
|
||||||
// compression method
|
|
||||||
#[allow(deprecated)]
|
|
||||||
writer.write_u16_le(file.compression_method.to_u16())?;
|
|
||||||
let last_modified_time = file
|
|
||||||
.last_modified_time
|
|
||||||
.unwrap_or_else(DateTime::default_for_write);
|
|
||||||
// last mod file time + date
|
|
||||||
writer.write_u16_le(last_modified_time.timepart())?;
|
|
||||||
writer.write_u16_le(last_modified_time.datepart())?;
|
|
||||||
// crc-32
|
|
||||||
writer.write_u32_le(file.crc32)?;
|
|
||||||
// compressed size
|
|
||||||
writer.write_u32_le(file.compressed_size.min(spec::ZIP64_BYTES_THR) as u32)?;
|
|
||||||
// uncompressed size
|
|
||||||
writer.write_u32_le(file.uncompressed_size.min(spec::ZIP64_BYTES_THR) as u32)?;
|
|
||||||
// file name length
|
|
||||||
writer.write_u16_le(file.file_name_raw.len() as u16)?;
|
|
||||||
// extra field length
|
|
||||||
writer.write_u16_le(
|
|
||||||
zip64_extra_field_length
|
|
||||||
+ file.extra_field_len() as u16
|
|
||||||
+ file.central_extra_field_len() as u16,
|
|
||||||
)?;
|
|
||||||
// file comment length
|
|
||||||
writer.write_u16_le(0)?;
|
|
||||||
// disk number start
|
|
||||||
writer.write_u16_le(0)?;
|
|
||||||
// internal file attributes
|
|
||||||
writer.write_u16_le(0)?;
|
|
||||||
// external file attributes
|
|
||||||
writer.write_u32_le(file.external_attributes)?;
|
|
||||||
// relative offset of local header
|
|
||||||
writer.write_u32_le(file.header_start.min(spec::ZIP64_BYTES_THR) as u32)?;
|
|
||||||
// file name
|
// file name
|
||||||
writer.write_all(&file.file_name_raw)?;
|
writer.write_all(&file.file_name_raw)?;
|
||||||
// zip64 extra field
|
// zip64 extra field
|
||||||
|
@ -1869,7 +1779,7 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData)
|
||||||
writer.write_all(central_extra_field)?;
|
writer.write_all(central_extra_field)?;
|
||||||
}
|
}
|
||||||
// file comment
|
// file comment
|
||||||
// <none>
|
writer.write_all(file.file_comment.as_bytes())?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -1910,12 +1820,13 @@ fn validate_extra_data(header_id: u16, data: &[u8]) -> ZipResult<()> {
|
||||||
fn write_local_zip64_extra_field<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<()> {
|
fn write_local_zip64_extra_field<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<()> {
|
||||||
// This entry in the Local header MUST include BOTH original
|
// This entry in the Local header MUST include BOTH original
|
||||||
// and compressed file size fields.
|
// and compressed file size fields.
|
||||||
writer.write_u16_le(0x0001)?;
|
let Some(block) = file.zip64_extra_field_block() else {
|
||||||
writer.write_u16_le(16)?;
|
return Err(ZipError::InvalidArchive(
|
||||||
writer.write_u64_le(file.uncompressed_size)?;
|
"Attempted to write a ZIP64 extra field for a file that's within zip32 limits",
|
||||||
writer.write_u64_le(file.compressed_size)?;
|
));
|
||||||
// Excluded fields:
|
};
|
||||||
// u32: disk start number
|
let block = block.serialize();
|
||||||
|
writer.write_all(&block)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1923,12 +1834,21 @@ fn update_local_zip64_extra_field<T: Write + Seek>(
|
||||||
writer: &mut T,
|
writer: &mut T,
|
||||||
file: &ZipFileData,
|
file: &ZipFileData,
|
||||||
) -> ZipResult<()> {
|
) -> ZipResult<()> {
|
||||||
let zip64_extra_field = file.header_start + 30 + file.file_name_raw.len() as u64;
|
if !file.large_file {
|
||||||
writer.seek(SeekFrom::Start(zip64_extra_field + 4))?;
|
return Err(ZipError::InvalidArchive(
|
||||||
writer.write_u64_le(file.uncompressed_size)?;
|
"Attempted to update a nonexistent ZIP64 extra field",
|
||||||
writer.write_u64_le(file.compressed_size)?;
|
));
|
||||||
// Excluded fields:
|
}
|
||||||
// u32: disk start number
|
|
||||||
|
let zip64_extra_field = file.header_start
|
||||||
|
+ mem::size_of::<ZipLocalEntryBlock>() as u64
|
||||||
|
+ file.file_name_raw.len() as u64;
|
||||||
|
|
||||||
|
writer.seek(SeekFrom::Start(zip64_extra_field))?;
|
||||||
|
|
||||||
|
let block = file.zip64_extra_field_block().unwrap();
|
||||||
|
let block = block.serialize();
|
||||||
|
writer.write_all(&block)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1937,37 +1857,15 @@ fn write_central_zip64_extra_field<T: Write>(writer: &mut T, file: &ZipFileData)
|
||||||
// information record is fixed, but the fields MUST
|
// information record is fixed, but the fields MUST
|
||||||
// only appear if the corresponding Local or Central
|
// only appear if the corresponding Local or Central
|
||||||
// directory record field is set to 0xFFFF or 0xFFFFFFFF.
|
// directory record field is set to 0xFFFF or 0xFFFFFFFF.
|
||||||
let mut size = 0;
|
match file.zip64_extra_field_block() {
|
||||||
let uncompressed_size = file.uncompressed_size > spec::ZIP64_BYTES_THR;
|
None => Ok(0),
|
||||||
let compressed_size = file.compressed_size > spec::ZIP64_BYTES_THR;
|
Some(block) => {
|
||||||
let header_start = file.header_start > spec::ZIP64_BYTES_THR;
|
let block = block.serialize();
|
||||||
if uncompressed_size {
|
writer.write_all(&block)?;
|
||||||
size += 8;
|
let len: u16 = block.len().try_into().unwrap();
|
||||||
}
|
Ok(len)
|
||||||
if compressed_size {
|
|
||||||
size += 8;
|
|
||||||
}
|
|
||||||
if header_start {
|
|
||||||
size += 8;
|
|
||||||
}
|
|
||||||
if size > 0 {
|
|
||||||
writer.write_u16_le(0x0001)?;
|
|
||||||
writer.write_u16_le(size)?;
|
|
||||||
size += 4;
|
|
||||||
|
|
||||||
if uncompressed_size {
|
|
||||||
writer.write_u64_le(file.uncompressed_size)?;
|
|
||||||
}
|
}
|
||||||
if compressed_size {
|
|
||||||
writer.write_u64_le(file.compressed_size)?;
|
|
||||||
}
|
|
||||||
if header_start {
|
|
||||||
writer.write_u64_le(file.header_start)?;
|
|
||||||
}
|
|
||||||
// Excluded fields:
|
|
||||||
// u32: disk start number
|
|
||||||
}
|
}
|
||||||
Ok(size)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(not(feature = "unreserved"))]
|
#[cfg(not(feature = "unreserved"))]
|
||||||
|
@ -2080,7 +1978,7 @@ mod test {
|
||||||
writer
|
writer
|
||||||
.start_file_from_path(path, SimpleFileOptions::default())
|
.start_file_from_path(path, SimpleFileOptions::default())
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let archive = ZipArchive::new(writer.finish().unwrap()).unwrap();
|
let archive = writer.finish_into_readable().unwrap();
|
||||||
assert_eq!(Some("foo/example.txt"), archive.name_for_index(0));
|
assert_eq!(Some("foo/example.txt"), archive.name_for_index(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2233,8 +2131,7 @@ mod test {
|
||||||
writer
|
writer
|
||||||
.shallow_copy_file(SECOND_FILENAME, SECOND_FILENAME)
|
.shallow_copy_file(SECOND_FILENAME, SECOND_FILENAME)
|
||||||
.expect_err("Duplicate filename");
|
.expect_err("Duplicate filename");
|
||||||
let zip = writer.finish().unwrap();
|
let mut reader = writer.finish_into_readable().unwrap();
|
||||||
let mut reader = ZipArchive::new(zip).unwrap();
|
|
||||||
let mut file_names: Vec<&str> = reader.file_names().collect();
|
let mut file_names: Vec<&str> = reader.file_names().collect();
|
||||||
file_names.sort();
|
file_names.sort();
|
||||||
let mut expected_file_names = vec![RT_TEST_FILENAME, SECOND_FILENAME];
|
let mut expected_file_names = vec![RT_TEST_FILENAME, SECOND_FILENAME];
|
||||||
|
@ -2518,7 +2415,7 @@ mod test {
|
||||||
let contents = b"sleeping";
|
let contents = b"sleeping";
|
||||||
let () = zip.start_file("sleep", options).unwrap();
|
let () = zip.start_file("sleep", options).unwrap();
|
||||||
let _count = zip.write(&contents[..]).unwrap();
|
let _count = zip.write(&contents[..]).unwrap();
|
||||||
let mut zip = ZipArchive::new(zip.finish().unwrap()).unwrap();
|
let mut zip = zip.finish_into_readable().unwrap();
|
||||||
let file = zip.by_index(0).unwrap();
|
let file = zip.by_index(0).unwrap();
|
||||||
assert_eq!(file.name(), "sleep");
|
assert_eq!(file.name(), "sleep");
|
||||||
assert_eq!(file.data_start(), page_size.into());
|
assert_eq!(file.data_start(), page_size.into());
|
||||||
|
|
BIN
tests/data/misaligned_comment.zip
Normal file
BIN
tests/data/misaligned_comment.zip
Normal file
Binary file not shown.
|
@ -28,3 +28,21 @@ fn correctly_handle_zip_with_garbage_after_comment() {
|
||||||
|
|
||||||
assert_eq!(archive.comment(), "short.".as_bytes());
|
assert_eq!(archive.comment(), "short.".as_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Ensure that a file which has the signature misaligned with the window size is still
|
||||||
|
/// successfully located.
|
||||||
|
#[test]
|
||||||
|
fn correctly_handle_cde_on_window() {
|
||||||
|
let mut v = Vec::new();
|
||||||
|
v.extend_from_slice(include_bytes!("../tests/data/misaligned_comment.zip"));
|
||||||
|
assert_eq!(v.len(), 512 + 1);
|
||||||
|
let sig: [u8; 4] = v[..4].try_into().unwrap();
|
||||||
|
let sig = u32::from_le_bytes(sig);
|
||||||
|
|
||||||
|
const CENTRAL_DIRECTORY_END_SIGNATURE: u32 = 0x06054b50;
|
||||||
|
assert_eq!(sig, CENTRAL_DIRECTORY_END_SIGNATURE);
|
||||||
|
|
||||||
|
let archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip");
|
||||||
|
|
||||||
|
assert_eq!(archive.comment(), "short.".as_bytes());
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue