bulk parsing and bulk writing

- use blocks for reading individual file headers
- remove unnecessary option wrapping for stream entries
- create Block trait
- add coerce method to reduce some boilerplate
- add serialize method to reduce more boilerplate
- use to_le! and from_le!
- add test case
- add some docs
- rename a few structs to clarify zip32-only
This commit is contained in:
Danny McClanahan 2024-05-03 14:28:28 -04:00
parent 011e5afe7b
commit ea308499af
No known key found for this signature in database
GPG key ID: 6105C10F1A199CC7
7 changed files with 1135 additions and 406 deletions

View file

@ -35,6 +35,7 @@ displaydoc = { version = "0.2.4", default-features = false }
flate2 = { version = "1.0.28", default-features = false, optional = true }
indexmap = "2"
hmac = { version = "0.12.1", optional = true, features = ["reset"] }
memchr = "2.7.2"
pbkdf2 = { version = "0.12.2", optional = true }
rand = { version = "0.8.5", optional = true }
sha1 = { version = "0.10.6", optional = true }

View file

@ -48,7 +48,7 @@ fn generate_random_zip32_archive_with_comment(comment_length: usize) -> ZipResul
let mut bytes = vec![0u8; comment_length];
getrandom(&mut bytes).unwrap();
writer.set_raw_comment(bytes.into());
writer.set_raw_comment(bytes.into_boxed_slice());
writer.start_file("asdf.txt", options)?;
writer.write_all(b"asdf")?;
@ -77,7 +77,7 @@ fn generate_random_zip64_archive_with_comment(comment_length: usize) -> ZipResul
let mut bytes = vec![0u8; comment_length];
getrandom(&mut bytes).unwrap();
writer.set_raw_comment(bytes.into());
writer.set_raw_comment(bytes.into_boxed_slice());
writer.start_file("asdf.txt", options)?;
writer.write_all(b"asdf")?;

View file

@ -8,14 +8,17 @@ use crate::crc32::Crc32Reader;
use crate::extra_fields::{ExtendedTimestamp, ExtraField};
use crate::read::zip_archive::Shared;
use crate::result::{ZipError, ZipResult};
use crate::spec;
use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipFileData};
use crate::spec::{self, Block};
use crate::types::{
AesMode, AesVendorVersion, DateTime, System, ZipEntryBlock, ZipFileData, ZipLocalEntryBlock,
};
use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
use indexmap::IndexMap;
use std::borrow::Cow;
use std::ffi::OsString;
use std::fs::create_dir_all;
use std::io::{self, copy, prelude::*, sink};
use std::mem;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::sync::{Arc, OnceLock};
@ -458,7 +461,7 @@ impl<R: Read + Seek> ZipArchive<R> {
}
fn get_directory_info_zip32(
footer: &spec::CentralDirectoryEnd,
footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64,
) -> ZipResult<CentralDirectoryInfo> {
// Some zip files have data prepended to them, resulting in the
@ -485,7 +488,7 @@ impl<R: Read + Seek> ZipArchive<R> {
fn get_directory_info_zip64(
reader: &mut R,
footer: &spec::CentralDirectoryEnd,
footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64,
) -> ZipResult<Vec<ZipResult<CentralDirectoryInfo>>> {
// See if there's a ZIP64 footer. The ZIP64 locator if present will
@ -502,56 +505,59 @@ impl<R: Read + Seek> ZipArchive<R> {
// don't know how to precisely relate that location to our current
// actual offset in the file, since there may be junk at its
// beginning. Therefore we need to perform another search, as in
// read::CentralDirectoryEnd::find_and_parse, except now we search
// read::Zip32CentralDirectoryEnd::find_and_parse, except now we search
// forward. There may be multiple results because of Zip64 central-directory signatures in
// ZIP comment data.
let mut results = Vec::new();
let search_upper_bound = cde_start_pos
.checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
// minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
.checked_sub(60)
.ok_or(ZipError::InvalidArchive(
"File cannot contain ZIP64 central directory end",
))?;
let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(
reader,
locator64.end_of_central_directory_offset,
search_upper_bound,
)?;
search_results.into_iter().for_each(|(footer64, archive_offset)| {
results.push({
let directory_start_result = footer64
let (lower, upper) = if locator64.end_of_central_directory_offset > search_upper_bound {
(
search_upper_bound,
locator64.end_of_central_directory_offset,
)
} else {
(
locator64.end_of_central_directory_offset,
search_upper_bound,
)
};
let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?;
let results: Vec<ZipResult<CentralDirectoryInfo>> =
search_results.into_iter().map(|(footer64, archive_offset)| {
let directory_start = footer64
.central_directory_offset
.checked_add(archive_offset)
.ok_or(ZipError::InvalidArchive(
"Invalid central directory size or offset",
));
directory_start_result.and_then(|directory_start| {
if directory_start > search_upper_bound {
Err(ZipError::InvalidArchive(
"Invalid central directory size or offset",
))
} else if footer64.number_of_files_on_this_disk > footer64.number_of_files {
Err(ZipError::InvalidArchive(
"ZIP64 footer indicates more files on this disk than in the whole archive",
))
} else if footer64.version_needed_to_extract > footer64.version_made_by {
Err(ZipError::InvalidArchive(
"ZIP64 footer indicates a new version is needed to extract this archive than the \
version that wrote it",
))
} else {
Ok(CentralDirectoryInfo {
archive_offset,
directory_start,
number_of_files: footer64.number_of_files as usize,
disk_number: footer64.disk_number,
disk_with_central_directory: footer64.disk_with_central_directory,
})
}
})
});
});
))?;
if directory_start > search_upper_bound {
Err(ZipError::InvalidArchive(
"Invalid central directory size or offset",
))
} else if footer64.number_of_files_on_this_disk > footer64.number_of_files {
Err(ZipError::InvalidArchive(
"ZIP64 footer indicates more files on this disk than in the whole archive",
))
} else if footer64.version_needed_to_extract > footer64.version_made_by {
Err(ZipError::InvalidArchive(
"ZIP64 footer indicates a new version is needed to extract this archive than the \
version that wrote it",
))
} else {
Ok(CentralDirectoryInfo {
archive_offset,
directory_start,
number_of_files: footer64.number_of_files as usize,
disk_number: footer64.disk_number,
disk_with_central_directory: footer64.disk_with_central_directory,
})
}
}).collect();
Ok(results)
}
@ -559,7 +565,7 @@ impl<R: Read + Seek> ZipArchive<R> {
/// separate function to ease the control flow design.
pub(crate) fn get_metadata(
reader: &mut R,
footer: &spec::CentralDirectoryEnd,
footer: &spec::Zip32CentralDirectoryEnd,
cde_start_pos: u64,
) -> ZipResult<Shared> {
// Check if file has a zip64 footer
@ -690,7 +696,7 @@ impl<R: Read + Seek> ZipArchive<R> {
///
/// This uses the central directory record of the ZIP file, and ignores local file headers
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?;
let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?;
let shared = Self::get_metadata(&mut reader, &footer, cde_start_pos)?;
Ok(ZipArchive {
reader,
@ -1002,12 +1008,8 @@ pub(crate) fn central_header_to_zip_file<R: Read + Seek>(
let central_header_start = reader.stream_position()?;
// Parse central header
let signature = reader.read_u32_le()?;
if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
Err(ZipError::InvalidArchive("Invalid Central Directory header"))
} else {
central_header_to_zip_file_inner(reader, archive_offset, central_header_start)
}
let block = ZipEntryBlock::parse(reader)?;
central_header_to_zip_file_inner(reader, archive_offset, central_header_start, block)
}
/// Parse a central directory entry to collect the information for the file.
@ -1015,31 +1017,38 @@ fn central_header_to_zip_file_inner<R: Read>(
reader: &mut R,
archive_offset: u64,
central_header_start: u64,
block: ZipEntryBlock,
) -> ZipResult<ZipFileData> {
let version_made_by = reader.read_u16_le()?;
let _version_to_extract = reader.read_u16_le()?;
let flags = reader.read_u16_le()?;
let ZipEntryBlock {
// magic,
version_made_by,
// version_to_extract,
flags,
compression_method,
last_mod_time,
last_mod_date,
crc32,
compressed_size,
uncompressed_size,
file_name_length,
extra_field_length,
file_comment_length,
// disk_number,
// internal_file_attributes,
external_file_attributes,
offset,
..
} = block;
let encrypted = flags & 1 == 1;
let is_utf8 = flags & (1 << 11) != 0;
let using_data_descriptor = flags & (1 << 3) != 0;
let compression_method = reader.read_u16_le()?;
let last_mod_time = reader.read_u16_le()?;
let last_mod_date = reader.read_u16_le()?;
let crc32 = reader.read_u32_le()?;
let compressed_size = reader.read_u32_le()?;
let uncompressed_size = reader.read_u32_le()?;
let file_name_length = reader.read_u16_le()? as usize;
let extra_field_length = reader.read_u16_le()? as usize;
let file_comment_length = reader.read_u16_le()? as usize;
let _disk_number = reader.read_u16_le()?;
let _internal_file_attributes = reader.read_u16_le()?;
let external_file_attributes = reader.read_u32_le()?;
let offset = reader.read_u32_le()? as u64;
let mut file_name_raw = vec![0; file_name_length];
let mut file_name_raw = vec![0; file_name_length as usize];
reader.read_exact(&mut file_name_raw)?;
let mut extra_field = vec![0; extra_field_length];
let mut extra_field = vec![0; extra_field_length as usize];
reader.read_exact(&mut extra_field)?;
let mut file_comment_raw = vec![0; file_comment_length];
let mut file_comment_raw = vec![0; file_comment_length as usize];
reader.read_exact(&mut file_comment_raw)?;
let file_name: Box<str> = match is_utf8 {
@ -1054,6 +1063,7 @@ fn central_header_to_zip_file_inner<R: Read>(
// Construct the result
let mut result = ZipFileData {
system: System::from((version_made_by >> 8) as u8),
/* NB: this strips the top 8 bits! */
version_made_by: version_made_by as u8,
encrypted,
using_data_descriptor,
@ -1071,7 +1081,7 @@ fn central_header_to_zip_file_inner<R: Read>(
extra_field: Some(Arc::new(extra_field)),
central_extra_field: None,
file_comment,
header_start: offset,
header_start: offset.into(),
extra_data_start: None,
central_header_start,
data_start: OnceLock::new(),
@ -1404,7 +1414,15 @@ impl<'a> Drop for ZipFile<'a> {
/// * `data_start`: set to 0
/// * `external_attributes`: `unix_mode()`: will return None
pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult<Option<ZipFile<'_>>> {
let signature = reader.read_u32_le()?;
let mut block = [0u8; mem::size_of::<ZipLocalEntryBlock>()];
reader.read_exact(&mut block)?;
let block: Box<[u8]> = block.into();
let signature = spec::Magic::from_le_bytes(
block[..mem::size_of_val(&spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)]
.try_into()
.unwrap(),
);
match signature {
spec::LOCAL_FILE_HEADER_SIGNATURE => (),
@ -1412,67 +1430,9 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult<Opt
_ => return Err(ZipError::InvalidArchive("Invalid local file header")),
}
let version_made_by = reader.read_u16_le()?;
let flags = reader.read_u16_le()?;
if flags & 1 == 1 {
return unsupported_zip_error("Encrypted files are not supported");
}
if flags & (1 << 3) == 1 << 3 {
// using_data_descriptor flag is set
return unsupported_zip_error("The file length is not available in the local header");
}
let is_utf8 = flags & (1 << 11) != 0;
#[allow(deprecated)]
let compression_method = CompressionMethod::from_u16(reader.read_u16_le()?);
let last_mod_time = reader.read_u16_le()?;
let last_mod_date = reader.read_u16_le()?;
let crc32 = reader.read_u32_le()?;
let compressed_size = reader.read_u32_le()?;
let uncompressed_size = reader.read_u32_le()?;
let file_name_length = reader.read_u16_le()? as usize;
let extra_field_length = reader.read_u16_le()? as usize;
let block = ZipLocalEntryBlock::interpret(block)?;
let mut file_name_raw = vec![0; file_name_length];
reader.read_exact(&mut file_name_raw)?;
let mut extra_field = vec![0; extra_field_length];
reader.read_exact(&mut extra_field)?;
let file_name: Box<str> = match is_utf8 {
true => String::from_utf8_lossy(&file_name_raw).into(),
false => file_name_raw.clone().from_cp437().into(),
};
let mut result = ZipFileData {
system: System::from((version_made_by >> 8) as u8),
version_made_by: version_made_by as u8,
encrypted: flags & 1 == 1,
using_data_descriptor: false,
compression_method,
compression_level: None,
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
crc32,
compressed_size: compressed_size as u64,
uncompressed_size: uncompressed_size as u64,
file_name,
file_name_raw: file_name_raw.into(),
extra_field: Some(Arc::new(extra_field)),
central_extra_field: None,
file_comment: String::with_capacity(0).into_boxed_str(), // file comment is only available in the central directory
// header_start and data start are not available, but also don't matter, since seeking is
// not available.
header_start: 0,
extra_data_start: None,
data_start: OnceLock::new(),
central_header_start: 0,
// The external_attributes field is only available in the central directory.
// We set this to zero, which should be valid as the docs state 'If input came
// from standard input, this field is set to zero.'
external_attributes: 0,
large_file: false,
aes_mode: None,
aes_extra_data_start: 0,
extra_fields: Vec::new(),
};
let mut result = ZipFileData::from_local_block(block, reader)?;
match parse_extra_field(&mut result) {
Ok(..) | Err(ZipError::Io(..)) => {}

View file

@ -1,12 +1,12 @@
use crate::unstable::LittleEndianReadExt;
use std::fs;
use std::io::{self, Read};
use std::path::{Path, PathBuf};
use super::{
central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile,
central_header_to_zip_file_inner, read_zipfile_from_stream, ZipEntryBlock, ZipError, ZipFile,
ZipFileData, ZipResult,
};
use crate::spec::Block;
/// Stream decoder for zip.
#[derive(Debug)]
@ -20,31 +20,31 @@ impl<R> ZipStreamReader<R> {
}
impl<R: Read> ZipStreamReader<R> {
fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> {
fn parse_central_directory(&mut self) -> ZipResult<ZipStreamFileMetadata> {
// Give archive_offset and central_header_start dummy value 0, since
// they are not used in the output.
let archive_offset = 0;
let central_header_start = 0;
// Parse central header
let signature = self.0.read_u32_le()?;
if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
Ok(None)
} else {
central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start)
.map(ZipStreamFileMetadata)
.map(Some)
}
let block = ZipEntryBlock::parse(&mut self.0)?;
let file = central_header_to_zip_file_inner(
&mut self.0,
archive_offset,
central_header_start,
block,
)?;
Ok(ZipStreamFileMetadata(file))
}
/// Iteraate over the stream and extract all file and their
/// Iterate over the stream and extract all file and their
/// metadata.
pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
visitor.visit_file(&mut file)?;
}
while let Some(metadata) = self.parse_central_directory()? {
while let Ok(metadata) = self.parse_central_directory() {
visitor.visit_additional_metadata(&metadata)?;
}

View file

@ -1,21 +1,157 @@
#![macro_use]
use crate::result::{ZipError, ZipResult};
use crate::unstable::{LittleEndianReadExt, LittleEndianWriteExt};
use core::mem::size_of_val;
use memchr::memmem::FinderRev;
use std::borrow::Cow;
use std::io;
use std::io::prelude::*;
use std::mem;
use std::path::{Component, Path, MAIN_SEPARATOR};
pub const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50;
pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: u32 = 0x02014b50;
pub(crate) const CENTRAL_DIRECTORY_END_SIGNATURE: u32 = 0x06054b50;
pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: u32 = 0x06064b50;
pub(crate) const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: u32 = 0x07064b50;
pub type Magic = u32;
pub const LOCAL_FILE_HEADER_SIGNATURE: Magic = 0x04034b50;
pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: Magic = 0x02014b50;
pub(crate) const CENTRAL_DIRECTORY_END_SIGNATURE: Magic = 0x06054b50;
pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: Magic = 0x06064b50;
pub(crate) const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: Magic = 0x07064b50;
pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64;
pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize;
pub struct CentralDirectoryEnd {
pub trait Block: Sized + Copy {
/* TODO: use smallvec? */
fn interpret(bytes: Box<[u8]>) -> ZipResult<Self>;
fn deserialize(block: &[u8]) -> Self {
assert_eq!(block.len(), mem::size_of::<Self>());
let block_ptr: *const Self = block.as_ptr().cast();
unsafe { block_ptr.read() }
}
fn parse<T: Read>(reader: &mut T) -> ZipResult<Self> {
let mut block = vec![0u8; mem::size_of::<Self>()];
reader.read_exact(&mut block)?;
Self::interpret(block.into_boxed_slice())
}
fn encode(self) -> Box<[u8]>;
fn serialize(self) -> Box<[u8]> {
let mut out_block = vec![0u8; mem::size_of::<Self>()];
let out_view: &mut [u8] = out_block.as_mut();
let out_ptr: *mut Self = out_view.as_mut_ptr().cast();
unsafe {
out_ptr.write(self);
}
out_block.into_boxed_slice()
}
fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
let block = self.encode();
writer.write_all(&block)?;
Ok(())
}
}
/// Convert all the fields of a struct *from* little-endian representations.
macro_rules! from_le {
($obj:ident, $field:ident, $type:ty) => {
$obj.$field = <$type>::from_le($obj.$field);
};
($obj:ident, [($field:ident, $type:ty) $(,)?]) => {
from_le![$obj, $field, $type];
};
($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => {
from_le![$obj, $field, $type];
from_le!($obj, [$($rest),+]);
};
}
/// Convert all the fields of a struct *into* little-endian representations.
macro_rules! to_le {
($obj:ident, $field:ident, $type:ty) => {
$obj.$field = <$type>::to_le($obj.$field);
};
($obj:ident, [($field:ident, $type:ty) $(,)?]) => {
to_le![$obj, $field, $type];
};
($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => {
to_le![$obj, $field, $type];
to_le!($obj, [$($rest),+]);
};
}
#[derive(Copy, Clone, Debug)]
#[repr(packed)]
pub struct Zip32CDEBlock {
magic: Magic,
pub disk_number: u16,
pub disk_with_central_directory: u16,
pub number_of_files_on_this_disk: u16,
pub number_of_files: u16,
pub central_directory_size: u32,
pub central_directory_offset: u32,
pub zip_file_comment_length: u16,
}
impl Zip32CDEBlock {
#[allow(clippy::wrong_self_convention)]
#[inline(always)]
fn from_le(mut self) -> Self {
from_le![
self,
[
(magic, Magic),
(disk_number, u16),
(disk_with_central_directory, u16),
(number_of_files_on_this_disk, u16),
(number_of_files, u16),
(central_directory_size, u32),
(central_directory_offset, u32),
(zip_file_comment_length, u16)
]
];
self
}
#[inline(always)]
fn to_le(mut self) -> Self {
to_le![
self,
[
(magic, Magic),
(disk_number, u16),
(disk_with_central_directory, u16),
(number_of_files_on_this_disk, u16),
(number_of_files, u16),
(central_directory_size, u32),
(central_directory_offset, u32),
(zip_file_comment_length, u16)
]
];
self
}
}
impl Block for Zip32CDEBlock {
fn interpret(bytes: Box<[u8]>) -> ZipResult<Self> {
let block = Self::deserialize(&bytes).from_le();
if block.magic != CENTRAL_DIRECTORY_END_SIGNATURE {
return Err(ZipError::InvalidArchive("Invalid digital signature header"));
}
Ok(block)
}
fn encode(self) -> Box<[u8]> {
self.to_le().serialize()
}
}
#[derive(Debug)]
pub struct Zip32CentralDirectoryEnd {
pub disk_number: u16,
pub disk_with_central_directory: u16,
pub number_of_files_on_this_disk: u16,
@ -25,23 +161,9 @@ pub struct CentralDirectoryEnd {
pub zip_file_comment: Box<[u8]>,
}
impl CentralDirectoryEnd {
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<CentralDirectoryEnd> {
let magic = reader.read_u32_le()?;
if magic != CENTRAL_DIRECTORY_END_SIGNATURE {
return Err(ZipError::InvalidArchive("Invalid digital signature header"));
}
let disk_number = reader.read_u16_le()?;
let disk_with_central_directory = reader.read_u16_le()?;
let number_of_files_on_this_disk = reader.read_u16_le()?;
let number_of_files = reader.read_u16_le()?;
let central_directory_size = reader.read_u32_le()?;
let central_directory_offset = reader.read_u32_le()?;
let zip_file_comment_length = reader.read_u16_le()? as usize;
let mut zip_file_comment = vec![0; zip_file_comment_length].into_boxed_slice();
reader.read_exact(&mut zip_file_comment)?;
Ok(CentralDirectoryEnd {
impl Zip32CentralDirectoryEnd {
fn block_and_comment(self) -> ZipResult<(Zip32CDEBlock, Box<[u8]>)> {
let Self {
disk_number,
disk_with_central_directory,
number_of_files_on_this_disk,
@ -49,63 +171,187 @@ impl CentralDirectoryEnd {
central_directory_size,
central_directory_offset,
zip_file_comment,
} = self;
let block = Zip32CDEBlock {
magic: CENTRAL_DIRECTORY_END_SIGNATURE,
disk_number,
disk_with_central_directory,
number_of_files_on_this_disk,
number_of_files,
central_directory_size,
central_directory_offset,
zip_file_comment_length: zip_file_comment.len().try_into().unwrap_or(u16::MAX),
};
Ok((block, zip_file_comment))
}
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip32CentralDirectoryEnd> {
let Zip32CDEBlock {
// magic,
disk_number,
disk_with_central_directory,
number_of_files_on_this_disk,
number_of_files,
central_directory_size,
central_directory_offset,
zip_file_comment_length,
..
} = Zip32CDEBlock::parse(reader)?;
let mut zip_file_comment = vec![0u8; zip_file_comment_length as usize];
reader.read_exact(&mut zip_file_comment)?;
Ok(Zip32CentralDirectoryEnd {
disk_number,
disk_with_central_directory,
number_of_files_on_this_disk,
number_of_files,
central_directory_size,
central_directory_offset,
zip_file_comment: zip_file_comment.into_boxed_slice(),
})
}
pub fn find_and_parse<T: Read + Seek>(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u64)> {
const HEADER_SIZE: u64 = 22;
const MAX_HEADER_AND_COMMENT_SIZE: u64 = 66000;
const BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE: u64 = HEADER_SIZE - 6;
pub fn find_and_parse<T: Read + Seek>(
reader: &mut T,
) -> ZipResult<(Zip32CentralDirectoryEnd, u64)> {
let file_length = reader.seek(io::SeekFrom::End(0))?;
let search_upper_bound = 0;
if file_length < HEADER_SIZE {
if file_length < mem::size_of::<Zip32CDEBlock>() as u64 {
return Err(ZipError::InvalidArchive("Invalid zip header"));
}
let mut pos = file_length - HEADER_SIZE;
while pos >= search_upper_bound {
let mut have_signature = false;
reader.seek(io::SeekFrom::Start(pos))?;
if reader.read_u32_le()? == CENTRAL_DIRECTORY_END_SIGNATURE {
have_signature = true;
reader.seek(io::SeekFrom::Current(
BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE as i64,
))?;
let cde_start_pos = reader.seek(io::SeekFrom::Start(pos))?;
if let Ok(end_header) = CentralDirectoryEnd::parse(reader) {
return Ok((end_header, cde_start_pos));
let search_upper_bound = 0;
const END_WINDOW_SIZE: usize = 512;
let sig_bytes = CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
let finder = FinderRev::new(&sig_bytes);
let mut window_start: u64 = file_length.saturating_sub(END_WINDOW_SIZE as u64);
let mut window = [0u8; END_WINDOW_SIZE];
while window_start >= search_upper_bound {
/* Go to the start of the window in the file. */
reader.seek(io::SeekFrom::Start(window_start))?;
/* Identify how many bytes to read (this may be less than the window size for files
* smaller than END_WINDOW_SIZE). */
let end = (window_start + END_WINDOW_SIZE as u64).min(file_length);
let cur_len = (end - window_start) as usize;
debug_assert!(cur_len > 0);
debug_assert!(cur_len <= END_WINDOW_SIZE);
let cur_window: &mut [u8] = &mut window[..cur_len];
/* Read the window into the bytes! */
reader.read_exact(cur_window)?;
/* Find instances of the magic signature. */
for offset in finder.rfind_iter(cur_window) {
let cde_start_pos = window_start + offset as u64;
reader.seek(io::SeekFrom::Start(cde_start_pos))?;
/* Drop any headers that don't parse. */
if let Ok(cde) = Self::parse(reader) {
return Ok((cde, cde_start_pos));
}
}
pos = match pos.checked_sub(if have_signature {
size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE) as u64
} else {
1
}) {
Some(p) => p,
None => break,
};
/* We always want to make sure we go allllll the way back to the start of the file if
* we can't find it elsewhere. However, our `while` condition doesn't check that. So we
* avoid infinite looping by checking at the end of the loop. */
if window_start == search_upper_bound {
break;
}
debug_assert!(END_WINDOW_SIZE > mem::size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE));
/* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that
* overlap our nice neat window boundaries! */
window_start = (window_start
/* NB: To catch matches across window boundaries, we need to make our blocks overlap
* by the width of the pattern to match. */
+ mem::size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE) as u64)
/* This should never happen, but make sure we don't go past the end of the file. */
.min(file_length);
window_start = window_start
.saturating_sub(
/* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at
* once (unless limited by file_length). */
END_WINDOW_SIZE as u64,
)
/* This will never go below the value of `search_upper_bound`, so we have a special
* `if window_start == search_upper_bound` check above. */
.max(search_upper_bound);
}
Err(ZipError::InvalidArchive(
"Could not find central directory end",
))
}
pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> {
writer.write_u32_le(CENTRAL_DIRECTORY_END_SIGNATURE)?;
writer.write_u16_le(self.disk_number)?;
writer.write_u16_le(self.disk_with_central_directory)?;
writer.write_u16_le(self.number_of_files_on_this_disk)?;
writer.write_u16_le(self.number_of_files)?;
writer.write_u32_le(self.central_directory_size)?;
writer.write_u32_le(self.central_directory_offset)?;
writer.write_u16_le(self.zip_file_comment.len() as u16)?;
writer.write_all(&self.zip_file_comment)?;
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
let (block, comment) = self.block_and_comment()?;
block.write(writer)?;
writer.write_all(&comment)?;
Ok(())
}
}
#[derive(Copy, Clone)]
#[repr(packed)]
pub struct Zip64CDELocatorBlock {
magic: Magic,
pub disk_with_central_directory: u32,
pub end_of_central_directory_offset: u64,
pub number_of_disks: u32,
}
impl Zip64CDELocatorBlock {
#[allow(clippy::wrong_self_convention)]
#[inline(always)]
fn from_le(mut self) -> Self {
from_le![
self,
[
(magic, Magic),
(disk_with_central_directory, u32),
(end_of_central_directory_offset, u64),
(number_of_disks, u32),
]
];
self
}
#[inline(always)]
fn to_le(mut self) -> Self {
to_le![
self,
[
(magic, Magic),
(disk_with_central_directory, u32),
(end_of_central_directory_offset, u64),
(number_of_disks, u32),
]
];
self
}
}
impl Block for Zip64CDELocatorBlock {
fn interpret(bytes: Box<[u8]>) -> ZipResult<Self> {
let block = Self::deserialize(&bytes).from_le();
if block.magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE {
return Err(ZipError::InvalidArchive(
"Invalid zip64 locator digital signature header",
));
}
Ok(block)
}
fn encode(self) -> Box<[u8]> {
self.to_le().serialize()
}
}
pub struct Zip64CentralDirectoryEndLocator {
pub disk_with_central_directory: u32,
pub end_of_central_directory_offset: u64,
@ -114,15 +360,13 @@ pub struct Zip64CentralDirectoryEndLocator {
impl Zip64CentralDirectoryEndLocator {
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip64CentralDirectoryEndLocator> {
let magic = reader.read_u32_le()?;
if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE {
return Err(ZipError::InvalidArchive(
"Invalid zip64 locator digital signature header",
));
}
let disk_with_central_directory = reader.read_u32_le()?;
let end_of_central_directory_offset = reader.read_u64_le()?;
let number_of_disks = reader.read_u32_le()?;
let Zip64CDELocatorBlock {
// magic,
disk_with_central_directory,
end_of_central_directory_offset,
number_of_disks,
..
} = Zip64CDELocatorBlock::parse(reader)?;
Ok(Zip64CentralDirectoryEndLocator {
disk_with_central_directory,
@ -131,12 +375,96 @@ impl Zip64CentralDirectoryEndLocator {
})
}
pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> {
writer.write_u32_le(ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE)?;
writer.write_u32_le(self.disk_with_central_directory)?;
writer.write_u64_le(self.end_of_central_directory_offset)?;
writer.write_u32_le(self.number_of_disks)?;
Ok(())
pub fn block(self) -> Zip64CDELocatorBlock {
let Self {
disk_with_central_directory,
end_of_central_directory_offset,
number_of_disks,
} = self;
Zip64CDELocatorBlock {
magic: ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE,
disk_with_central_directory,
end_of_central_directory_offset,
number_of_disks,
}
}
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
self.block().write(writer)
}
}
#[derive(Copy, Clone)]
#[repr(packed)]
pub struct Zip64CDEBlock {
magic: Magic,
pub record_size: u64,
pub version_made_by: u16,
pub version_needed_to_extract: u16,
pub disk_number: u32,
pub disk_with_central_directory: u32,
pub number_of_files_on_this_disk: u64,
pub number_of_files: u64,
pub central_directory_size: u64,
pub central_directory_offset: u64,
}
impl Zip64CDEBlock {
#[allow(clippy::wrong_self_convention)]
#[inline(always)]
fn from_le(mut self) -> Self {
from_le![
self,
[
(magic, Magic),
(record_size, u64),
(version_made_by, u16),
(version_needed_to_extract, u16),
(disk_number, u32),
(disk_with_central_directory, u32),
(number_of_files_on_this_disk, u64),
(number_of_files, u64),
(central_directory_size, u64),
(central_directory_offset, u64),
]
];
self
}
#[inline(always)]
fn to_le(mut self) -> Self {
to_le![
self,
[
(magic, Magic),
(record_size, u64),
(version_made_by, u16),
(version_needed_to_extract, u16),
(disk_number, u32),
(disk_with_central_directory, u32),
(number_of_files_on_this_disk, u64),
(number_of_files, u64),
(central_directory_size, u64),
(central_directory_offset, u64),
]
];
self
}
}
impl Block for Zip64CDEBlock {
fn interpret(bytes: Box<[u8]>) -> ZipResult<Self> {
let block = Self::deserialize(&bytes).from_le();
if block.magic != ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE {
return Err(ZipError::InvalidArchive("Invalid digital signature header"));
}
Ok(block)
}
fn encode(self) -> Box<[u8]> {
self.to_le().serialize()
}
}
@ -153,56 +481,105 @@ pub struct Zip64CentralDirectoryEnd {
}
impl Zip64CentralDirectoryEnd {
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip64CentralDirectoryEnd> {
let Zip64CDEBlock {
// record_size,
version_made_by,
version_needed_to_extract,
disk_number,
disk_with_central_directory,
number_of_files_on_this_disk,
number_of_files,
central_directory_size,
central_directory_offset,
..
} = Zip64CDEBlock::parse(reader)?;
Ok(Self {
version_made_by,
version_needed_to_extract,
disk_number,
disk_with_central_directory,
number_of_files_on_this_disk,
number_of_files,
central_directory_size,
central_directory_offset,
})
}
pub fn find_and_parse<T: Read + Seek>(
reader: &mut T,
nominal_offset: u64,
search_upper_bound: u64,
) -> ZipResult<Vec<(Zip64CentralDirectoryEnd, u64)>> {
let mut results = Vec::new();
let mut pos = search_upper_bound;
while pos >= nominal_offset {
let mut have_signature = false;
reader.seek(io::SeekFrom::Start(pos))?;
if reader.read_u32_le()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE {
have_signature = true;
let archive_offset = pos - nominal_offset;
const END_WINDOW_SIZE: usize = 2048;
let _record_size = reader.read_u64_le()?;
// We would use this value if we did anything with the "zip64 extensible data sector".
let sig_bytes = ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes();
let finder = FinderRev::new(&sig_bytes);
let version_made_by = reader.read_u16_le()?;
let version_needed_to_extract = reader.read_u16_le()?;
let disk_number = reader.read_u32_le()?;
let disk_with_central_directory = reader.read_u32_le()?;
let number_of_files_on_this_disk = reader.read_u64_le()?;
let number_of_files = reader.read_u64_le()?;
let central_directory_size = reader.read_u64_le()?;
let central_directory_offset = reader.read_u64_le()?;
let mut window_start: u64 = search_upper_bound
.saturating_sub(END_WINDOW_SIZE as u64)
.max(nominal_offset);
let mut window = [0u8; END_WINDOW_SIZE];
while window_start >= nominal_offset {
reader.seek(io::SeekFrom::Start(window_start))?;
results.push((
Zip64CentralDirectoryEnd {
version_made_by,
version_needed_to_extract,
disk_number,
disk_with_central_directory,
number_of_files_on_this_disk,
number_of_files,
central_directory_size,
central_directory_offset,
},
archive_offset,
));
/* Identify how many bytes to read (this may be less than the window size for files
* smaller than END_WINDOW_SIZE). */
let end = (window_start + END_WINDOW_SIZE as u64).min(search_upper_bound);
debug_assert!(end >= window_start);
let cur_len = (end - window_start) as usize;
if cur_len == 0 {
break;
}
pos = match pos.checked_sub(if have_signature {
size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE) as u64
} else {
1
}) {
None => break,
Some(p) => p,
debug_assert!(cur_len <= END_WINDOW_SIZE);
let cur_window: &mut [u8] = &mut window[..cur_len];
/* Read the window into the bytes! */
reader.read_exact(cur_window)?;
/* Find instances of the magic signature. */
for offset in finder.rfind_iter(cur_window) {
let cde_start_pos = window_start + offset as u64;
reader.seek(io::SeekFrom::Start(cde_start_pos))?;
debug_assert!(cde_start_pos >= nominal_offset);
let archive_offset = cde_start_pos - nominal_offset;
let cde = Self::parse(reader)?;
results.push((cde, archive_offset));
}
/* We always want to make sure we go allllll the way back to the start of the file if
* we can't find it elsewhere. However, our `while` condition doesn't check that. So we
* avoid infinite looping by checking at the end of the loop. */
if window_start == nominal_offset {
break;
}
debug_assert!(
END_WINDOW_SIZE > mem::size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE)
);
/* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that
* overlap our nice neat window boundaries! */
window_start = (window_start
/* NB: To catch matches across window boundaries, we need to make our blocks overlap
* by the width of the pattern to match. */
+ mem::size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE) as u64)
/* This may never happen, but make sure we don't go past the end of the specified
* range. */
.min(search_upper_bound);
window_start = window_start
.saturating_sub(
/* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at
* once (unless limited by search_upper_bound). */
END_WINDOW_SIZE as u64,
)
/* This will never go below the value of `nominal_offset`, so we have a special
* `if window_start == nominal_offset` check above. */
.max(nominal_offset);
}
if results.is_empty() {
Err(ZipError::InvalidArchive(
"Could not find ZIP64 central directory end",
@ -212,18 +589,34 @@ impl Zip64CentralDirectoryEnd {
}
}
pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> {
writer.write_u32_le(ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE)?;
writer.write_u64_le(44)?; // record size
writer.write_u16_le(self.version_made_by)?;
writer.write_u16_le(self.version_needed_to_extract)?;
writer.write_u32_le(self.disk_number)?;
writer.write_u32_le(self.disk_with_central_directory)?;
writer.write_u64_le(self.number_of_files_on_this_disk)?;
writer.write_u64_le(self.number_of_files)?;
writer.write_u64_le(self.central_directory_size)?;
writer.write_u64_le(self.central_directory_offset)?;
Ok(())
pub fn block(self) -> Zip64CDEBlock {
let Self {
version_made_by,
version_needed_to_extract,
disk_number,
disk_with_central_directory,
number_of_files_on_this_disk,
number_of_files,
central_directory_size,
central_directory_offset,
} = self;
Zip64CDEBlock {
magic: ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE,
/* currently unused */
record_size: 44,
version_made_by,
version_needed_to_extract,
disk_number,
disk_with_central_directory,
number_of_files_on_this_disk,
number_of_files,
central_directory_size,
central_directory_offset,
}
}
pub fn write<T: Write>(self, writer: &mut T) -> ZipResult<()> {
self.block().write(writer)
}
}
@ -280,3 +673,51 @@ pub(crate) fn path_to_string<T: AsRef<Path>>(path: T) -> Box<str> {
maybe_original.unwrap().into()
}
}
#[cfg(test)]
mod test {
use super::*;
use std::io::Cursor;
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
#[repr(packed)]
pub struct TestBlock {
magic: Magic,
pub file_name_length: u16,
}
impl TestBlock {
#[allow(clippy::wrong_self_convention)]
fn from_le(mut self) -> Self {
from_le![self, [(magic, Magic), (file_name_length, u16)]];
self
}
fn to_le(mut self) -> Self {
to_le![self, [(magic, Magic), (file_name_length, u16)]];
self
}
}
impl Block for TestBlock {
fn interpret(bytes: Box<[u8]>) -> ZipResult<Self> {
Ok(Self::deserialize(&bytes).from_le())
}
fn encode(self) -> Box<[u8]> {
self.to_le().serialize()
}
}
/// Demonstrate that a block object can be safely written to memory and deserialized back out.
#[test]
fn block_serde() {
let block = TestBlock {
magic: 0x01111,
file_name_length: 3,
};
let mut c = Cursor::new(Vec::new());
block.write(&mut c).unwrap();
c.set_position(0);
let block2 = TestBlock::parse(&mut c).unwrap();
assert_eq!(block, block2);
}
}

View file

@ -1,13 +1,18 @@
//! Types that specify what is contained in a ZIP.
use crate::cp437::FromCp437;
use crate::write::{FileOptionExtension, FileOptions};
use path::{Component, Path, PathBuf};
use std::fmt;
use std::path;
use std::sync::{Arc, OnceLock};
#[cfg(doc)]
use crate::read::ZipFile;
#[cfg(feature = "chrono")]
use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike};
#[cfg(doc)]
use {crate::read::ZipFile, crate::write::FileOptions};
use crate::result::{ZipError, ZipResult};
use crate::spec::{self, Block};
pub(crate) mod ffi {
pub const S_IFDIR: u32 = 0o0040000;
@ -23,6 +28,12 @@ use crate::CompressionMethod;
#[cfg(feature = "time")]
use time::{error::ComponentRange, Date, Month, OffsetDateTime, PrimitiveDateTime, Time};
pub(crate) struct ZipRawValues {
pub(crate) crc32: u32,
pub(crate) compressed_size: u64,
pub(crate) uncompressed_size: u64,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum System {
@ -546,6 +557,408 @@ impl ZipFileData {
.map(|v| v.len())
.unwrap_or_default()
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn initialize_local_block<S, T: FileOptionExtension>(
name: S,
options: &FileOptions<T>,
raw_values: ZipRawValues,
header_start: u64,
extra_data_start: Option<u64>,
aes_extra_data_start: u64,
compression_method: crate::compression::CompressionMethod,
aes_mode: Option<(AesMode, AesVendorVersion, CompressionMethod)>,
extra_field: Option<Arc<Vec<u8>>>,
) -> Self
where
S: Into<Box<str>>,
{
let permissions = options.permissions.unwrap_or(0o100644);
let file_name: Box<str> = name.into();
let file_name_raw: Box<[u8]> = file_name.bytes().collect();
ZipFileData {
system: System::Unix,
version_made_by: DEFAULT_VERSION,
encrypted: options.encrypt_with.is_some(),
using_data_descriptor: false,
compression_method,
compression_level: options.compression_level,
last_modified_time: Some(options.last_modified_time),
crc32: raw_values.crc32,
compressed_size: raw_values.compressed_size,
uncompressed_size: raw_values.uncompressed_size,
file_name, // Never used for saving, but used as map key in insert_file_data()
file_name_raw,
extra_field,
central_extra_field: options.extended_options.central_extra_data().cloned(),
file_comment: String::with_capacity(0).into_boxed_str(),
header_start,
data_start: OnceLock::new(),
central_header_start: 0,
external_attributes: permissions << 16,
large_file: options.large_file,
aes_mode,
extra_fields: Vec::new(),
extra_data_start,
aes_extra_data_start,
}
}
pub(crate) fn from_local_block<R: std::io::Read>(
block: ZipLocalEntryBlock,
reader: &mut R,
) -> ZipResult<Self> {
let ZipLocalEntryBlock {
// magic,
version_made_by,
flags,
compression_method,
last_mod_time,
last_mod_date,
crc32,
compressed_size,
uncompressed_size,
file_name_length,
extra_field_length,
..
} = block;
let encrypted: bool = flags & 1 == 1;
/* FIXME: these were previously incorrect: add testing! */
/* flags & (1 << 1) != 0 */
let is_utf8: bool = flags & (1 << 11) != 0;
/* flags & (1 << 3) != 0 */
let using_data_descriptor: bool = flags & (1 << 3) == 1 << 3;
#[allow(deprecated)]
let compression_method = crate::CompressionMethod::from_u16(compression_method);
let file_name_length: usize = file_name_length.into();
let extra_field_length: usize = extra_field_length.into();
if encrypted {
return Err(ZipError::UnsupportedArchive(
"Encrypted files are not supported",
));
}
if using_data_descriptor {
return Err(ZipError::UnsupportedArchive(
"The file length is not available in the local header",
));
}
let mut file_name_raw = vec![0u8; file_name_length];
reader.read_exact(&mut file_name_raw)?;
let mut extra_field = vec![0u8; extra_field_length];
reader.read_exact(&mut extra_field)?;
let file_name: Box<str> = match is_utf8 {
true => String::from_utf8_lossy(&file_name_raw).into(),
false => file_name_raw.clone().from_cp437().into(),
};
let system: u8 = (version_made_by >> 8).try_into().unwrap();
Ok(ZipFileData {
system: System::from(system),
/* NB: this strips the top 8 bits! */
version_made_by: version_made_by as u8,
encrypted,
using_data_descriptor,
compression_method,
compression_level: None,
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
crc32,
compressed_size: compressed_size.into(),
uncompressed_size: uncompressed_size.into(),
file_name,
file_name_raw: file_name_raw.into(),
extra_field: Some(Arc::new(extra_field)),
central_extra_field: None,
file_comment: String::with_capacity(0).into_boxed_str(), // file comment is only available in the central directory
// header_start and data start are not available, but also don't matter, since seeking is
// not available.
header_start: 0,
data_start: OnceLock::new(),
central_header_start: 0,
// The external_attributes field is only available in the central directory.
// We set this to zero, which should be valid as the docs state 'If input came
// from standard input, this field is set to zero.'
external_attributes: 0,
large_file: false,
aes_mode: None,
extra_fields: Vec::new(),
extra_data_start: None,
aes_extra_data_start: 0,
})
}
fn is_utf8(&self) -> bool {
std::str::from_utf8(&self.file_name_raw).is_ok()
}
fn is_ascii(&self) -> bool {
self.file_name_raw.is_ascii()
}
fn flags(&self) -> u16 {
(if self.is_utf8() && !self.is_ascii() {
1u16 << 11
} else {
0
}) | if self.encrypted { 1u16 << 0 } else { 0 }
}
pub(crate) fn local_block(&self) -> ZipResult<ZipLocalEntryBlock> {
let (compressed_size, uncompressed_size) = if self.large_file {
(spec::ZIP64_BYTES_THR as u32, spec::ZIP64_BYTES_THR as u32)
} else {
(
self.compressed_size.try_into().unwrap(),
self.uncompressed_size.try_into().unwrap(),
)
};
let mut extra_field_length = self.extra_field_len();
if self.large_file {
/* TODO: magic number */
extra_field_length += 20;
}
if extra_field_length + self.central_extra_field_len() > u16::MAX as usize {
return Err(ZipError::InvalidArchive("Extra data field is too large"));
}
let extra_field_length: u16 = extra_field_length.try_into().unwrap();
let last_modified_time = self
.last_modified_time
.unwrap_or_else(DateTime::default_for_write);
Ok(ZipLocalEntryBlock {
magic: spec::LOCAL_FILE_HEADER_SIGNATURE,
version_made_by: self.version_needed(),
flags: self.flags(),
#[allow(deprecated)]
compression_method: self.compression_method.to_u16(),
last_mod_time: last_modified_time.timepart(),
last_mod_date: last_modified_time.datepart(),
crc32: self.crc32,
compressed_size,
uncompressed_size,
file_name_length: self.file_name_raw.len().try_into().unwrap(),
extra_field_length,
})
}
pub(crate) fn block(&self, zip64_extra_field_length: u16) -> ZipEntryBlock {
let extra_field_len: u16 = self.extra_field_len().try_into().unwrap();
let central_extra_field_len: u16 = self.central_extra_field_len().try_into().unwrap();
let last_modified_time = self
.last_modified_time
.unwrap_or_else(DateTime::default_for_write);
ZipEntryBlock {
magic: spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE,
version_made_by: (self.system as u16) << 8 | (self.version_made_by as u16),
version_to_extract: self.version_needed(),
flags: self.flags(),
#[allow(deprecated)]
compression_method: self.compression_method.to_u16(),
last_mod_time: last_modified_time.timepart(),
last_mod_date: last_modified_time.datepart(),
crc32: self.crc32,
compressed_size: self
.compressed_size
.min(spec::ZIP64_BYTES_THR)
.try_into()
.unwrap(),
uncompressed_size: self
.uncompressed_size
.min(spec::ZIP64_BYTES_THR)
.try_into()
.unwrap(),
file_name_length: self.file_name_raw.len().try_into().unwrap(),
extra_field_length: zip64_extra_field_length
+ extra_field_len
+ central_extra_field_len,
/* FIXME: this appears to be set to 0 in write_central_directory_header() on master? */
file_comment_length: self.file_comment.as_bytes().len().try_into().unwrap(),
disk_number: 0,
internal_file_attributes: 0,
external_file_attributes: self.external_attributes,
offset: self
.header_start
.min(spec::ZIP64_BYTES_THR)
.try_into()
.unwrap(),
}
}
}
#[derive(Copy, Clone, Debug)]
#[repr(packed)]
pub(crate) struct ZipEntryBlock {
pub magic: spec::Magic,
pub version_made_by: u16,
pub version_to_extract: u16,
pub flags: u16,
pub compression_method: u16,
pub last_mod_time: u16,
pub last_mod_date: u16,
pub crc32: u32,
pub compressed_size: u32,
pub uncompressed_size: u32,
pub file_name_length: u16,
pub extra_field_length: u16,
pub file_comment_length: u16,
pub disk_number: u16,
pub internal_file_attributes: u16,
pub external_file_attributes: u32,
pub offset: u32,
}
impl ZipEntryBlock {
#[allow(clippy::wrong_self_convention)]
#[inline(always)]
fn from_le(mut self) -> Self {
from_le![
self,
[
(magic, spec::Magic),
(version_made_by, u16),
(version_to_extract, u16),
(flags, u16),
(compression_method, u16),
(last_mod_time, u16),
(last_mod_date, u16),
(crc32, u32),
(compressed_size, u32),
(uncompressed_size, u32),
(file_name_length, u16),
(extra_field_length, u16),
(file_comment_length, u16),
(disk_number, u16),
(internal_file_attributes, u16),
(external_file_attributes, u32),
(offset, u32),
]
];
self
}
#[inline(always)]
fn to_le(mut self) -> Self {
to_le![
self,
[
(magic, spec::Magic),
(version_made_by, u16),
(version_to_extract, u16),
(flags, u16),
(compression_method, u16),
(last_mod_time, u16),
(last_mod_date, u16),
(crc32, u32),
(compressed_size, u32),
(uncompressed_size, u32),
(file_name_length, u16),
(extra_field_length, u16),
(file_comment_length, u16),
(disk_number, u16),
(internal_file_attributes, u16),
(external_file_attributes, u32),
(offset, u32),
]
];
self
}
}
impl Block for ZipEntryBlock {
fn interpret(bytes: Box<[u8]>) -> ZipResult<Self> {
let block = Self::deserialize(&bytes).from_le();
if block.magic != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
return Err(ZipError::InvalidArchive("Invalid Central Directory header"));
}
Ok(block)
}
fn encode(self) -> Box<[u8]> {
self.to_le().serialize()
}
}
#[derive(Copy, Clone, Debug)]
#[repr(packed)]
pub(crate) struct ZipLocalEntryBlock {
magic: spec::Magic,
pub version_made_by: u16,
pub flags: u16,
pub compression_method: u16,
pub last_mod_time: u16,
pub last_mod_date: u16,
pub crc32: u32,
pub compressed_size: u32,
pub uncompressed_size: u32,
pub file_name_length: u16,
pub extra_field_length: u16,
}
impl ZipLocalEntryBlock {
#[allow(clippy::wrong_self_convention)]
#[inline(always)]
fn from_le(mut self) -> Self {
from_le![
self,
[
(magic, spec::Magic),
(version_made_by, u16),
(flags, u16),
(compression_method, u16),
(last_mod_time, u16),
(last_mod_date, u16),
(crc32, u32),
(compressed_size, u32),
(uncompressed_size, u32),
(file_name_length, u16),
(extra_field_length, u16),
]
];
self
}
#[inline(always)]
fn to_le(mut self) -> Self {
to_le![
self,
[
(magic, spec::Magic),
(version_made_by, u16),
(flags, u16),
(compression_method, u16),
(last_mod_time, u16),
(last_mod_date, u16),
(crc32, u32),
(compressed_size, u32),
(uncompressed_size, u32),
(file_name_length, u16),
(extra_field_length, u16),
]
];
self
}
}
impl Block for ZipLocalEntryBlock {
fn interpret(bytes: Box<[u8]>) -> ZipResult<Self> {
let block = Self::deserialize(&bytes).from_le();
if block.magic != spec::LOCAL_FILE_HEADER_SIGNATURE {
return Err(ZipError::InvalidArchive("Invalid local file header"));
}
Ok(block)
}
fn encode(self) -> Box<[u8]> {
self.to_le().serialize()
}
}
/// The encryption specification used to encrypt a file with AES.

View file

@ -5,10 +5,10 @@ use crate::aes::AesWriter;
use crate::compression::CompressionMethod;
use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader};
use crate::result::{ZipError, ZipResult};
use crate::spec;
use crate::spec::{self, Block};
#[cfg(feature = "aes-crypto")]
use crate::types::AesMode;
use crate::types::{ffi, AesVendorVersion, DateTime, System, ZipFileData, DEFAULT_VERSION};
use crate::types::{ffi, AesVendorVersion, DateTime, ZipFileData, ZipRawValues, DEFAULT_VERSION};
use crate::write::ffi::S_IFLNK;
#[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))]
use core::num::NonZeroU64;
@ -22,7 +22,7 @@ use std::io::{BufReader, SeekFrom};
use std::marker::PhantomData;
use std::mem;
use std::str::{from_utf8, Utf8Error};
use std::sync::{Arc, OnceLock};
use std::sync::Arc;
#[cfg(any(
feature = "deflate",
@ -147,11 +147,6 @@ struct ZipWriterStats {
bytes_written: u64,
}
struct ZipRawValues {
crc32: u32,
compressed_size: u64,
uncompressed_size: u64,
}
mod sealed {
use std::sync::Arc;
@ -188,7 +183,7 @@ mod sealed {
}
#[derive(Copy, Clone, Debug)]
enum EncryptWith<'k> {
pub(crate) enum EncryptWith<'k> {
#[cfg(feature = "aes-crypto")]
Aes {
mode: AesMode,
@ -223,9 +218,9 @@ pub struct FileOptions<'k, T: FileOptionExtension> {
pub(crate) last_modified_time: DateTime,
pub(crate) permissions: Option<u32>,
pub(crate) large_file: bool,
encrypt_with: Option<EncryptWith<'k>>,
extended_options: T,
alignment: u16,
pub(crate) encrypt_with: Option<EncryptWith<'k>>,
pub(crate) extended_options: T,
pub(crate) alignment: u16,
#[cfg(feature = "deflate-zopfli")]
pub(super) zopfli_buffer_size: Option<usize>,
}
@ -509,7 +504,8 @@ impl ZipWriterStats {
impl<A: Read + Write + Seek> ZipWriter<A> {
/// Initializes the archive from an existing ZIP archive, making it ready for append.
pub fn new_append(mut readwriter: A) -> ZipResult<ZipWriter<A>> {
let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut readwriter)?;
let (footer, cde_start_pos) =
spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?;
let metadata = ZipArchive::get_metadata(&mut readwriter, &footer, cde_start_pos)?;
Ok(ZipWriter {
@ -777,7 +773,6 @@ impl<W: Write + Seek> ZipWriter<W> {
{
let header_start = self.inner.get_plain().stream_position()?;
let permissions = options.permissions.unwrap_or(0o100644);
let (compression_method, aes_mode) = match options.encrypt_with {
#[cfg(feature = "aes-crypto")]
Some(EncryptWith::Aes { mode, .. }) => (
@ -786,76 +781,38 @@ impl<W: Write + Seek> ZipWriter<W> {
),
_ => (options.compression_method, None),
};
let last_modified_time = options.last_modified_time;
let file = ZipFileData {
system: System::Unix,
version_made_by: DEFAULT_VERSION,
encrypted: options.encrypt_with.is_some(),
using_data_descriptor: false,
compression_method,
compression_level: options.compression_level,
last_modified_time: Some(options.last_modified_time),
crc32: raw_values.crc32,
compressed_size: raw_values.compressed_size,
uncompressed_size: raw_values.uncompressed_size,
file_name: name.to_owned().into(), // Never used for saving, but used as map key in insert_file_data()
file_name_raw: name.into().bytes().collect(),
extra_field,
central_extra_field: options.extended_options.central_extra_data().cloned(),
file_comment: String::with_capacity(0).into_boxed_str(),
header_start,
extra_data_start: None,
data_start: OnceLock::new(),
central_header_start: 0,
external_attributes: permissions << 16,
large_file: options.large_file,
aes_mode,
aes_extra_data_start,
extra_fields: Vec::new(),
};
let file = ZipFileData::initialize_local_block(
name,
&options,
raw_values,
header_start,
None,
aes_extra_data_start,
compression_method,
aes_mode,
extra_field,
);
let index = self.insert_file_data(file)?;
let file = &mut self.files[index];
let writer = self.inner.get_plain();
// local file header signature
writer.write_u32_le(spec::LOCAL_FILE_HEADER_SIGNATURE)?;
// version needed to extract
writer.write_u16_le(file.version_needed())?;
// general purpose bit flag
let is_utf8 = std::str::from_utf8(&file.file_name_raw).is_ok();
let is_ascii = file.file_name_raw.is_ascii();
let flag = if is_utf8 && !is_ascii { 1u16 << 11 } else { 0 }
| if file.encrypted { 1u16 << 0 } else { 0 };
writer.write_u16_le(flag)?;
// Compression method
#[allow(deprecated)]
writer.write_u16_le(file.compression_method.to_u16())?;
// last mod file time and last mod file date
writer.write_u16_le(last_modified_time.timepart())?;
writer.write_u16_le(last_modified_time.datepart())?;
// crc-32
writer.write_u32_le(file.crc32)?;
// compressed size and uncompressed size
if file.large_file {
writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?;
writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?;
} else {
writer.write_u32_le(file.compressed_size as u32)?;
writer.write_u32_le(file.uncompressed_size as u32)?;
let block = match file.local_block() {
Ok(block) => block,
Err(e) => {
let _ = self.abort_file();
return Err(e);
}
};
match block.write(writer) {
Ok(()) => (),
Err(e) => {
let _ = self.abort_file();
return Err(e);
}
}
// file name length
writer.write_u16_le(file.file_name_raw.len() as u16)?;
// extra field length
let mut extra_field_length = file.extra_field_len();
if file.large_file {
extra_field_length += 20;
}
if extra_field_length + file.central_extra_field_len() > u16::MAX as usize {
let _ = self.abort_file();
return Err(InvalidArchive("Extra data field is too large"));
}
let extra_field_length = extra_field_length as u16;
writer.write_u16_le(extra_field_length)?;
// file name
writer.write_all(&file.file_name_raw)?;
// zip64 extra field
@ -873,7 +830,7 @@ impl<W: Write + Seek> ZipWriter<W> {
if unaligned_header_bytes != 0 {
let pad_length = (align - unaligned_header_bytes) as usize;
let Some(new_extra_field_length) =
(pad_length as u16).checked_add(extra_field_length)
(pad_length as u16).checked_add(block.extra_field_length)
else {
let _ = self.abort_file();
return Err(InvalidArchive(
@ -1431,7 +1388,7 @@ impl<W: Write + Seek> ZipWriter<W> {
}
let number_of_files = self.files.len().min(spec::ZIP64_ENTRY_THR) as u16;
let footer = spec::CentralDirectoryEnd {
let footer = spec::Zip32CentralDirectoryEnd {
disk_number: 0,
disk_with_central_directory: 0,
zip_file_comment: self.comment.clone(),
@ -1805,52 +1762,9 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData)
let zip64_extra_field_length =
write_central_zip64_extra_field(&mut zip64_extra_field.as_mut(), file)?;
// central file header signature
writer.write_u32_le(spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)?;
// version made by
let version_made_by = (file.system as u16) << 8 | (file.version_made_by as u16);
writer.write_u16_le(version_made_by)?;
// version needed to extract
writer.write_u16_le(file.version_needed())?;
// general puprose bit flag
let is_utf8 = std::str::from_utf8(&file.file_name_raw).is_ok();
let is_ascii = file.file_name_raw.is_ascii();
let flag = if is_utf8 && !is_ascii { 1u16 << 11 } else { 0 }
| if file.encrypted { 1u16 << 0 } else { 0 };
writer.write_u16_le(flag)?;
// compression method
#[allow(deprecated)]
writer.write_u16_le(file.compression_method.to_u16())?;
let last_modified_time = file
.last_modified_time
.unwrap_or_else(DateTime::default_for_write);
// last mod file time + date
writer.write_u16_le(last_modified_time.timepart())?;
writer.write_u16_le(last_modified_time.datepart())?;
// crc-32
writer.write_u32_le(file.crc32)?;
// compressed size
writer.write_u32_le(file.compressed_size.min(spec::ZIP64_BYTES_THR) as u32)?;
// uncompressed size
writer.write_u32_le(file.uncompressed_size.min(spec::ZIP64_BYTES_THR) as u32)?;
// file name length
writer.write_u16_le(file.file_name_raw.len() as u16)?;
// extra field length
writer.write_u16_le(
zip64_extra_field_length
+ file.extra_field_len() as u16
+ file.central_extra_field_len() as u16,
)?;
// file comment length
writer.write_u16_le(0)?;
// disk number start
writer.write_u16_le(0)?;
// internal file attributes
writer.write_u16_le(0)?;
// external file attributes
writer.write_u32_le(file.external_attributes)?;
// relative offset of local header
writer.write_u32_le(file.header_start.min(spec::ZIP64_BYTES_THR) as u32)?;
let block = file.block(zip64_extra_field_length);
block.write(writer)?;
// file name
writer.write_all(&file.file_name_raw)?;
// zip64 extra field
@ -1926,6 +1840,7 @@ fn update_local_zip64_extra_field<T: Write + Seek>(
Ok(())
}
/* TODO: make this use the Block trait somehow! */
fn write_central_zip64_extra_field<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<u16> {
// The order of the fields in the zip64 extended
// information record is fixed, but the fields MUST
@ -2074,7 +1989,7 @@ mod test {
writer
.start_file_from_path(path, SimpleFileOptions::default())
.unwrap();
let archive = ZipArchive::new(writer.finish().unwrap()).unwrap();
let archive = writer.finish_into_readable().unwrap();
assert_eq!(Some("foo/example.txt"), archive.name_for_index(0));
}
@ -2227,8 +2142,7 @@ mod test {
writer
.shallow_copy_file(SECOND_FILENAME, SECOND_FILENAME)
.expect_err("Duplicate filename");
let zip = writer.finish().unwrap();
let mut reader = ZipArchive::new(zip).unwrap();
let mut reader = writer.finish_into_readable().unwrap();
let mut file_names: Vec<&str> = reader.file_names().collect();
file_names.sort();
let mut expected_file_names = vec![RT_TEST_FILENAME, SECOND_FILENAME];
@ -2512,7 +2426,7 @@ mod test {
let contents = b"sleeping";
let () = zip.start_file("sleep", options).unwrap();
let _count = zip.write(&contents[..]).unwrap();
let mut zip = ZipArchive::new(zip.finish().unwrap()).unwrap();
let mut zip = zip.finish_into_readable().unwrap();
let file = zip.by_index(0).unwrap();
assert_eq!(file.name(), "sleep");
assert_eq!(file.data_start(), page_size.into());