Add ZIP64 write support.

This commit is contained in:
Rouven Spreckels 2020-10-03 11:05:23 +02:00
parent ba8307abc7
commit 9397773a32
6 changed files with 293 additions and 31 deletions

View file

@ -29,6 +29,7 @@ walkdir = "2"
deflate = ["flate2/rust_backend"]
deflate-miniz = ["flate2/default"]
deflate-zlib = ["flate2/zlib"]
unreserved = []
default = ["bzip2", "deflate", "time"]
[[bench]]

View file

@ -508,6 +508,7 @@ fn central_header_to_zip_file<R: Read + io::Seek>(
central_header_start,
data_start: 0,
external_attributes: external_file_attributes,
large_file: false,
};
match parse_extra_field(&mut result) {
@ -530,6 +531,8 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
let mut len_left = len as i64;
// Zip64 extended information extra field
if kind == 0x0001 {
file.large_file = true;
if file.uncompressed_size == 0xFFFFFFFF {
file.uncompressed_size = reader.read_u64::<LittleEndian>()?;
len_left -= 8;
@ -778,6 +781,7 @@ pub fn read_zipfile_from_stream<'a, R: io::Read>(
// We set this to zero, which should be valid as the docs state 'If input came
// from standard input, this field is set to zero.'
external_attributes: 0,
large_file: false,
};
match parse_extra_field(&mut result) {

View file

@ -120,6 +120,14 @@ impl Zip64CentralDirectoryEndLocator {
number_of_disks,
})
}
pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> {
writer.write_u32::<LittleEndian>(ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE)?;
writer.write_u32::<LittleEndian>(self.disk_with_central_directory)?;
writer.write_u64::<LittleEndian>(self.end_of_central_directory_offset)?;
writer.write_u32::<LittleEndian>(self.number_of_disks)?;
Ok(())
}
}
pub struct Zip64CentralDirectoryEnd {
@ -182,4 +190,18 @@ impl Zip64CentralDirectoryEnd {
"Could not find ZIP64 central directory end",
))
}
pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> {
writer.write_u32::<LittleEndian>(ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE)?;
writer.write_u64::<LittleEndian>(44)?; // record size
writer.write_u16::<LittleEndian>(self.version_made_by)?;
writer.write_u16::<LittleEndian>(self.version_needed_to_extract)?;
writer.write_u32::<LittleEndian>(self.disk_number)?;
writer.write_u32::<LittleEndian>(self.disk_with_central_directory)?;
writer.write_u64::<LittleEndian>(self.number_of_files_on_this_disk)?;
writer.write_u64::<LittleEndian>(self.number_of_files)?;
writer.write_u64::<LittleEndian>(self.central_directory_size)?;
writer.write_u64::<LittleEndian>(self.central_directory_offset)?;
Ok(())
}
}

View file

@ -244,6 +244,8 @@ pub struct ZipFileData {
pub data_start: u64,
/// External file attributes
pub external_attributes: u32,
/// Reserve local ZIP64 extra field
pub large_file: bool,
}
impl ZipFileData {
@ -277,10 +279,18 @@ impl ZipFileData {
})
}
pub fn zip64_extension(&self) -> bool {
self.uncompressed_size > 0xFFFFFFFF
|| self.compressed_size > 0xFFFFFFFF
|| self.header_start > 0xFFFFFFFF
}
pub fn version_needed(&self) -> u16 {
match self.compression_method {
// higher versions matched first
match (self.zip64_extension(), self.compression_method) {
#[cfg(feature = "bzip2")]
crate::compression::CompressionMethod::Bzip2 => 46,
(_, crate::compression::CompressionMethod::Bzip2) => 46,
(true, _) => 45,
_ => 20,
}
}
@ -318,6 +328,7 @@ mod test {
data_start: 0,
central_header_start: 0,
external_attributes: 0,
large_file: false,
};
assert_eq!(
data.file_name_sanitized(),

View file

@ -4,7 +4,7 @@ use crate::compression::CompressionMethod;
use crate::result::{ZipError, ZipResult};
use crate::spec;
use crate::types::{DateTime, System, ZipFileData, DEFAULT_VERSION};
use byteorder::{LittleEndian, WriteBytesExt};
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use crc32fast::Hasher;
use std::default::Default;
use std::io;
@ -85,6 +85,7 @@ pub struct FileOptions {
compression_method: CompressionMethod,
last_modified_time: DateTime,
permissions: Option<u32>,
large_file: bool,
}
impl FileOptions {
@ -108,6 +109,7 @@ impl FileOptions {
#[cfg(not(feature = "time"))]
last_modified_time: DateTime::default(),
permissions: None,
large_file: false,
}
}
@ -115,7 +117,6 @@ impl FileOptions {
///
/// The default is `CompressionMethod::Deflated`. If the deflate compression feature is
/// disabled, `CompressionMethod::Stored` becomes the default.
/// otherwise.
pub fn compression_method(mut self, method: CompressionMethod) -> FileOptions {
self.compression_method = method;
self
@ -139,6 +140,15 @@ impl FileOptions {
self.permissions = Some(mode & 0o777);
self
}
/// Set whether the new file's compressed and uncompressed size is less than 4 GiB.
///
/// If set to `false` and the file exceeds the limit, an I/O error is thrown. If set to `true`
/// and the file does not exceed the limit, 20 B are wasted. The default is `false`.
pub fn large_file(mut self, large: bool) -> FileOptions {
self.large_file = large;
self
}
}
impl Default for FileOptions {
@ -163,6 +173,14 @@ impl<W: Write + io::Seek> Write for ZipWriter<W> {
let write_result = w.write(buf);
if let Ok(count) = write_result {
self.stats.update(&buf[0..count]);
if self.stats.bytes_written > 0xFFFFFFFF
&& !self.files.last_mut().unwrap().large_file
{
return Err(io::Error::new(
io::ErrorKind::Other,
"Large file option has not been set",
));
}
}
write_result
}
@ -247,6 +265,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
data_start: 0,
central_header_start: 0,
external_attributes: permissions << 16,
large_file: options.large_file,
};
write_local_file_header(writer, &file)?;
@ -376,7 +395,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
///
/// zip.start_file_with_extra_data("identical_extra_data.txt", options)?;
/// let extra_data = b"local and central extra data";
/// zip.write_u16::<LittleEndian>(0x0000)?;
/// zip.write_u16::<LittleEndian>(0xbeef)?;
/// zip.write_u16::<LittleEndian>(extra_data.len() as u16)?;
/// zip.write_all(extra_data)?;
/// zip.end_extra_data()?;
@ -384,20 +403,20 @@ impl<W: Write + io::Seek> ZipWriter<W> {
///
/// let data_start = zip.start_file_with_extra_data("different_extra_data.txt", options)?;
/// let extra_data = b"local extra data";
/// zip.write_u16::<LittleEndian>(0x0000)?;
/// zip.write_u16::<LittleEndian>(0xbeef)?;
/// zip.write_u16::<LittleEndian>(extra_data.len() as u16)?;
/// zip.write_all(extra_data)?;
/// let data_start = data_start as usize + 4 + extra_data.len() + 4;
/// let align = 64;
/// let pad_length = (align - data_start % align) % align;
/// assert_eq!(pad_length, 19);
/// zip.write_u16::<LittleEndian>(0x0000)?;
/// zip.write_u16::<LittleEndian>(0xdead)?;
/// zip.write_u16::<LittleEndian>(pad_length as u16)?;
/// zip.write_all(&vec![0; pad_length])?;
/// let data_start = zip.end_local_start_central_extra_data()?;
/// assert_eq!(data_start as usize % align, 0);
/// let extra_data = b"central extra data";
/// zip.write_u16::<LittleEndian>(0x0000)?;
/// zip.write_u16::<LittleEndian>(0xbeef)?;
/// zip.write_u16::<LittleEndian>(extra_data.len() as u16)?;
/// zip.write_all(extra_data)?;
/// zip.end_extra_data()?;
@ -454,13 +473,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
}
let file = self.files.last_mut().unwrap();
// Ensure extra data fits into extra field.
if file.extra_field.len() > 0xFFFF {
return Err(ZipError::Io(io::Error::new(
io::ErrorKind::InvalidData,
"Extra data exceeds extra field",
)));
}
validate_extra_data(&file)?;
if !self.writing_to_central_extra_field_only {
let writer = self.inner.get_plain();
@ -474,8 +487,10 @@ impl<W: Write + io::Seek> ZipWriter<W> {
file.data_start = header_end;
// Update extra field length in local file header.
let extra_field_length =
if file.large_file { 20 } else { 0 } + file.extra_field.len() as u16;
writer.seek(io::SeekFrom::Start(file.header_start + 28))?;
writer.write_u16::<LittleEndian>(file.extra_field.len() as u16)?;
writer.write_u16::<LittleEndian>(extra_field_length)?;
writer.seek(io::SeekFrom::Start(header_end))?;
self.inner.switch_to(file.compression_method)?;
@ -549,13 +564,50 @@ impl<W: Write + io::Seek> ZipWriter<W> {
}
let central_size = writer.seek(io::SeekFrom::Current(0))? - central_start;
if self.files.len() > 0xFFFF || central_size > 0xFFFFFFFF || central_start > 0xFFFFFFFF
{
let zip64_footer = spec::Zip64CentralDirectoryEnd {
version_made_by: DEFAULT_VERSION as u16,
version_needed_to_extract: DEFAULT_VERSION as u16,
disk_number: 0,
disk_with_central_directory: 0,
number_of_files_on_this_disk: self.files.len() as u64,
number_of_files: self.files.len() as u64,
central_directory_size: central_size,
central_directory_offset: central_start,
};
zip64_footer.write(writer)?;
let zip64_footer = spec::Zip64CentralDirectoryEndLocator {
disk_with_central_directory: 0,
end_of_central_directory_offset: central_start + central_size,
number_of_disks: 1,
};
zip64_footer.write(writer)?;
}
let number_of_files = if self.files.len() > 0xFFFF {
0xFFFF
} else {
self.files.len() as u16
};
let footer = spec::CentralDirectoryEnd {
disk_number: 0,
disk_with_central_directory: 0,
number_of_files_on_this_disk: self.files.len() as u16,
number_of_files: self.files.len() as u16,
central_directory_size: central_size as u32,
central_directory_offset: central_start as u32,
number_of_files_on_this_disk: number_of_files,
number_of_files,
central_directory_size: if central_size > 0xFFFFFFFF {
0xFFFFFFFF
} else {
central_size as u32
},
central_directory_offset: if central_start > 0xFFFFFFFF {
0xFFFFFFFF
} else {
central_start as u32
},
zip_file_comment: self.comment.as_bytes().to_vec(),
};
@ -708,15 +760,28 @@ fn write_local_file_header<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipR
// crc-32
writer.write_u32::<LittleEndian>(file.crc32)?;
// compressed size
writer.write_u32::<LittleEndian>(file.compressed_size as u32)?;
writer.write_u32::<LittleEndian>(if file.compressed_size > 0xFFFFFFFF {
0xFFFFFFFF
} else {
file.compressed_size as u32
})?;
// uncompressed size
writer.write_u32::<LittleEndian>(file.uncompressed_size as u32)?;
writer.write_u32::<LittleEndian>(if file.uncompressed_size > 0xFFFFFFFF {
0xFFFFFFFF
} else {
file.uncompressed_size as u32
})?;
// file name length
writer.write_u16::<LittleEndian>(file.file_name.as_bytes().len() as u16)?;
// extra field length
writer.write_u16::<LittleEndian>(file.extra_field.len() as u16)?;
let extra_field_length = if file.large_file { 20 } else { 0 } + file.extra_field.len() as u16;
writer.write_u16::<LittleEndian>(extra_field_length)?;
// file name
writer.write_all(file.file_name.as_bytes())?;
// zip64 extra field
if file.large_file {
write_local_zip64_extra_field(writer, &file)?;
}
Ok(())
}
@ -728,12 +793,37 @@ fn update_local_file_header<T: Write + io::Seek>(
const CRC32_OFFSET: u64 = 14;
writer.seek(io::SeekFrom::Start(file.header_start + CRC32_OFFSET))?;
writer.write_u32::<LittleEndian>(file.crc32)?;
writer.write_u32::<LittleEndian>(file.compressed_size as u32)?;
writer.write_u32::<LittleEndian>(file.uncompressed_size as u32)?;
writer.write_u32::<LittleEndian>(if file.compressed_size > 0xFFFFFFFF {
if file.large_file {
0xFFFFFFFF
} else {
// compressed size can be slightly larger than uncompressed size
return Err(ZipError::Io(io::Error::new(
io::ErrorKind::Other,
"Large file option has not been set",
)));
}
} else {
file.compressed_size as u32
})?;
writer.write_u32::<LittleEndian>(if file.uncompressed_size > 0xFFFFFFFF {
// uncompressed size is checked on write to catch it as soon as possible
0xFFFFFFFF
} else {
file.uncompressed_size as u32
})?;
if file.large_file {
update_local_zip64_extra_field(writer, file)?;
}
Ok(())
}
fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<()> {
// buffer zip64 extra field to determine its variable length
let mut zip64_extra_field = [0; 28];
let zip64_extra_field_length =
write_central_zip64_extra_field(&mut zip64_extra_field.as_mut(), file)?;
// central file header signature
writer.write_u32::<LittleEndian>(spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)?;
// version made by
@ -757,13 +847,21 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData)
// crc-32
writer.write_u32::<LittleEndian>(file.crc32)?;
// compressed size
writer.write_u32::<LittleEndian>(file.compressed_size as u32)?;
writer.write_u32::<LittleEndian>(if file.compressed_size > 0xFFFFFFFF {
0xFFFFFFFF
} else {
file.compressed_size as u32
})?;
// uncompressed size
writer.write_u32::<LittleEndian>(file.uncompressed_size as u32)?;
writer.write_u32::<LittleEndian>(if file.uncompressed_size > 0xFFFFFFFF {
0xFFFFFFFF
} else {
file.uncompressed_size as u32
})?;
// file name length
writer.write_u16::<LittleEndian>(file.file_name.as_bytes().len() as u16)?;
// extra field length
writer.write_u16::<LittleEndian>(file.extra_field.len() as u16)?;
writer.write_u16::<LittleEndian>(zip64_extra_field_length + file.extra_field.len() as u16)?;
// file comment length
writer.write_u16::<LittleEndian>(0)?;
// disk number start
@ -773,9 +871,15 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData)
// external file attributes
writer.write_u32::<LittleEndian>(file.external_attributes)?;
// relative offset of local header
writer.write_u32::<LittleEndian>(file.header_start as u32)?;
writer.write_u32::<LittleEndian>(if file.header_start > 0xFFFFFFFF {
0xFFFFFFFF
} else {
file.header_start as u32
})?;
// file name
writer.write_all(file.file_name.as_bytes())?;
// zip64 extra field
writer.write_all(&zip64_extra_field[..zip64_extra_field_length as usize])?;
// extra field
writer.write_all(&file.extra_field)?;
// file comment
@ -784,6 +888,125 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData)
Ok(())
}
fn validate_extra_data(file: &ZipFileData) -> ZipResult<()> {
let mut data = file.extra_field.as_slice();
if data.len() > 0xFFFF {
return Err(ZipError::Io(io::Error::new(
io::ErrorKind::InvalidData,
"Extra data exceeds extra field",
)));
}
while data.len() > 0 {
let left = data.len();
if left < 4 {
return Err(ZipError::Io(io::Error::new(
io::ErrorKind::Other,
"Incomplete extra data header",
)));
}
let kind = data.read_u16::<LittleEndian>()?;
let size = data.read_u16::<LittleEndian>()? as usize;
let left = left - 4;
if kind == 0x0001 {
return Err(ZipError::Io(io::Error::new(
io::ErrorKind::Other,
"No custom ZIP64 extra data allowed",
)));
}
#[cfg(not(feature = "unreserved"))]
{
if kind <= 31
|| [0x0021, 0x0022, 0x0023, 0x0065, 0x0066, 0x4690]
.iter()
.any(|&reserved| reserved == kind)
{
return Err(ZipError::Io(io::Error::new(
io::ErrorKind::Other,
"Reserved extra data header ID",
)));
}
}
if size > left {
return Err(ZipError::Io(io::Error::new(
io::ErrorKind::Other,
"Extra data size exceeds extra field",
)));
}
data = &data[size..];
}
Ok(())
}
fn write_local_zip64_extra_field<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<()> {
// This entry in the Local header MUST include BOTH original
// and compressed file size fields.
writer.write_u16::<LittleEndian>(0x0001)?;
writer.write_u16::<LittleEndian>(16)?;
writer.write_u64::<LittleEndian>(file.uncompressed_size)?;
writer.write_u64::<LittleEndian>(file.compressed_size)?;
// Excluded fields:
// u32: disk start number
Ok(())
}
fn update_local_zip64_extra_field<T: Write + io::Seek>(
writer: &mut T,
file: &ZipFileData,
) -> ZipResult<()> {
let zip64_extra_field = file.header_start + 30 + file.file_name_raw.len() as u64;
writer.seek(io::SeekFrom::Start(zip64_extra_field + 4))?;
writer.write_u64::<LittleEndian>(file.uncompressed_size)?;
writer.write_u64::<LittleEndian>(file.compressed_size)?;
// Excluded fields:
// u32: disk start number
Ok(())
}
fn write_central_zip64_extra_field<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<u16> {
// The order of the fields in the zip64 extended
// information record is fixed, but the fields MUST
// only appear if the corresponding Local or Central
// directory record field is set to 0xFFFF or 0xFFFFFFFF.
let mut size = 0;
let uncompressed_size = file.uncompressed_size > 0xFFFFFFFF;
let compressed_size = file.compressed_size > 0xFFFFFFFF;
let header_start = file.header_start > 0xFFFFFFFF;
if uncompressed_size {
size += 8;
}
if compressed_size {
size += 8;
}
if header_start {
size += 8;
}
if size > 0 {
writer.write_u16::<LittleEndian>(0x0001)?;
writer.write_u16::<LittleEndian>(size)?;
size += 4;
if uncompressed_size {
writer.write_u64::<LittleEndian>(file.uncompressed_size)?;
}
if compressed_size {
writer.write_u64::<LittleEndian>(file.compressed_size)?;
}
if header_start {
writer.write_u64::<LittleEndian>(file.header_start)?;
}
// Excluded fields:
// u32: disk start number
}
Ok(size)
}
fn path_to_string(path: &std::path::Path) -> String {
let mut path_str = String::new();
for component in path.components() {
@ -852,6 +1075,7 @@ mod test {
compression_method: CompressionMethod::Stored,
last_modified_time: DateTime::default(),
permissions: Some(33188),
large_file: false,
};
writer.start_file("mimetype", options).unwrap();
writer

View file

@ -30,7 +30,7 @@ fn write_to_zip_file(file: &mut Cursor<Vec<u8>>) -> zip::result::ZipResult<()> {
zip.write_all(b"Hello, World!\n")?;
zip.start_file_with_extra_data("test_with_extra_data/🐢.txt", options)?;
zip.write_u16::<LittleEndian>(0)?;
zip.write_u16::<LittleEndian>(0xbeef)?;
zip.write_u16::<LittleEndian>(EXTRA_DATA.len() as u16)?;
zip.write_all(EXTRA_DATA)?;
zip.end_extra_data()?;
@ -59,7 +59,7 @@ fn read_zip_file(zip_file: &mut Cursor<Vec<u8>>) -> zip::result::ZipResult<Strin
{
let file_with_extra_data = archive.by_name("test_with_extra_data/🐢.txt")?;
let mut extra_data = Vec::new();
extra_data.write_u16::<LittleEndian>(0)?;
extra_data.write_u16::<LittleEndian>(0xbeef)?;
extra_data.write_u16::<LittleEndian>(EXTRA_DATA.len() as u16)?;
extra_data.write_all(EXTRA_DATA)?;
assert_eq!(file_with_extra_data.extra_data(), extra_data.as_slice());