From 9397773a3240f24803a26afb8ef6778dec38291d Mon Sep 17 00:00:00 2001 From: Rouven Spreckels <rs@qu1x.dev> Date: Sat, 3 Oct 2020 11:05:23 +0200 Subject: [PATCH] Add ZIP64 write support. --- Cargo.toml | 1 + src/read.rs | 4 + src/spec.rs | 22 ++++ src/types.rs | 15 ++- src/write.rs | 278 +++++++++++++++++++++++++++++++++++++++----- tests/end_to_end.rs | 4 +- 6 files changed, 293 insertions(+), 31 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index abc33f7a..3712b7db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ walkdir = "2" deflate = ["flate2/rust_backend"] deflate-miniz = ["flate2/default"] deflate-zlib = ["flate2/zlib"] +unreserved = [] default = ["bzip2", "deflate", "time"] [[bench]] diff --git a/src/read.rs b/src/read.rs index 240bccba..ed9c5451 100644 --- a/src/read.rs +++ b/src/read.rs @@ -508,6 +508,7 @@ fn central_header_to_zip_file<R: Read + io::Seek>( central_header_start, data_start: 0, external_attributes: external_file_attributes, + large_file: false, }; match parse_extra_field(&mut result) { @@ -530,6 +531,8 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { let mut len_left = len as i64; // Zip64 extended information extra field if kind == 0x0001 { + file.large_file = true; + if file.uncompressed_size == 0xFFFFFFFF { file.uncompressed_size = reader.read_u64::<LittleEndian>()?; len_left -= 8; @@ -778,6 +781,7 @@ pub fn read_zipfile_from_stream<'a, R: io::Read>( // We set this to zero, which should be valid as the docs state 'If input came // from standard input, this field is set to zero.' external_attributes: 0, + large_file: false, }; match parse_extra_field(&mut result) { diff --git a/src/spec.rs b/src/spec.rs index 8fa8c5c1..2e25c400 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -120,6 +120,14 @@ impl Zip64CentralDirectoryEndLocator { number_of_disks, }) } + + pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> { + writer.write_u32::<LittleEndian>(ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE)?; + writer.write_u32::<LittleEndian>(self.disk_with_central_directory)?; + writer.write_u64::<LittleEndian>(self.end_of_central_directory_offset)?; + writer.write_u32::<LittleEndian>(self.number_of_disks)?; + Ok(()) + } } pub struct Zip64CentralDirectoryEnd { @@ -182,4 +190,18 @@ impl Zip64CentralDirectoryEnd { "Could not find ZIP64 central directory end", )) } + + pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> { + writer.write_u32::<LittleEndian>(ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE)?; + writer.write_u64::<LittleEndian>(44)?; // record size + writer.write_u16::<LittleEndian>(self.version_made_by)?; + writer.write_u16::<LittleEndian>(self.version_needed_to_extract)?; + writer.write_u32::<LittleEndian>(self.disk_number)?; + writer.write_u32::<LittleEndian>(self.disk_with_central_directory)?; + writer.write_u64::<LittleEndian>(self.number_of_files_on_this_disk)?; + writer.write_u64::<LittleEndian>(self.number_of_files)?; + writer.write_u64::<LittleEndian>(self.central_directory_size)?; + writer.write_u64::<LittleEndian>(self.central_directory_offset)?; + Ok(()) + } } diff --git a/src/types.rs b/src/types.rs index 1f4c13eb..c753fb43 100644 --- a/src/types.rs +++ b/src/types.rs @@ -244,6 +244,8 @@ pub struct ZipFileData { pub data_start: u64, /// External file attributes pub external_attributes: u32, + /// Reserve local ZIP64 extra field + pub large_file: bool, } impl ZipFileData { @@ -277,10 +279,18 @@ impl ZipFileData { }) } + pub fn zip64_extension(&self) -> bool { + self.uncompressed_size > 0xFFFFFFFF + || self.compressed_size > 0xFFFFFFFF + || self.header_start > 0xFFFFFFFF + } + pub fn version_needed(&self) -> u16 { - match self.compression_method { + // higher versions matched first + match (self.zip64_extension(), self.compression_method) { #[cfg(feature = "bzip2")] - crate::compression::CompressionMethod::Bzip2 => 46, + (_, crate::compression::CompressionMethod::Bzip2) => 46, + (true, _) => 45, _ => 20, } } @@ -318,6 +328,7 @@ mod test { data_start: 0, central_header_start: 0, external_attributes: 0, + large_file: false, }; assert_eq!( data.file_name_sanitized(), diff --git a/src/write.rs b/src/write.rs index 72bc63ea..9db89f04 100644 --- a/src/write.rs +++ b/src/write.rs @@ -4,7 +4,7 @@ use crate::compression::CompressionMethod; use crate::result::{ZipError, ZipResult}; use crate::spec; use crate::types::{DateTime, System, ZipFileData, DEFAULT_VERSION}; -use byteorder::{LittleEndian, WriteBytesExt}; +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use crc32fast::Hasher; use std::default::Default; use std::io; @@ -85,6 +85,7 @@ pub struct FileOptions { compression_method: CompressionMethod, last_modified_time: DateTime, permissions: Option<u32>, + large_file: bool, } impl FileOptions { @@ -108,6 +109,7 @@ impl FileOptions { #[cfg(not(feature = "time"))] last_modified_time: DateTime::default(), permissions: None, + large_file: false, } } @@ -115,7 +117,6 @@ impl FileOptions { /// /// The default is `CompressionMethod::Deflated`. If the deflate compression feature is /// disabled, `CompressionMethod::Stored` becomes the default. - /// otherwise. pub fn compression_method(mut self, method: CompressionMethod) -> FileOptions { self.compression_method = method; self @@ -139,6 +140,15 @@ impl FileOptions { self.permissions = Some(mode & 0o777); self } + + /// Set whether the new file's compressed and uncompressed size is less than 4 GiB. + /// + /// If set to `false` and the file exceeds the limit, an I/O error is thrown. If set to `true` + /// and the file does not exceed the limit, 20 B are wasted. The default is `false`. + pub fn large_file(mut self, large: bool) -> FileOptions { + self.large_file = large; + self + } } impl Default for FileOptions { @@ -163,6 +173,14 @@ impl<W: Write + io::Seek> Write for ZipWriter<W> { let write_result = w.write(buf); if let Ok(count) = write_result { self.stats.update(&buf[0..count]); + if self.stats.bytes_written > 0xFFFFFFFF + && !self.files.last_mut().unwrap().large_file + { + return Err(io::Error::new( + io::ErrorKind::Other, + "Large file option has not been set", + )); + } } write_result } @@ -247,6 +265,7 @@ impl<W: Write + io::Seek> ZipWriter<W> { data_start: 0, central_header_start: 0, external_attributes: permissions << 16, + large_file: options.large_file, }; write_local_file_header(writer, &file)?; @@ -376,7 +395,7 @@ impl<W: Write + io::Seek> ZipWriter<W> { /// /// zip.start_file_with_extra_data("identical_extra_data.txt", options)?; /// let extra_data = b"local and central extra data"; - /// zip.write_u16::<LittleEndian>(0x0000)?; + /// zip.write_u16::<LittleEndian>(0xbeef)?; /// zip.write_u16::<LittleEndian>(extra_data.len() as u16)?; /// zip.write_all(extra_data)?; /// zip.end_extra_data()?; @@ -384,20 +403,20 @@ impl<W: Write + io::Seek> ZipWriter<W> { /// /// let data_start = zip.start_file_with_extra_data("different_extra_data.txt", options)?; /// let extra_data = b"local extra data"; - /// zip.write_u16::<LittleEndian>(0x0000)?; + /// zip.write_u16::<LittleEndian>(0xbeef)?; /// zip.write_u16::<LittleEndian>(extra_data.len() as u16)?; /// zip.write_all(extra_data)?; /// let data_start = data_start as usize + 4 + extra_data.len() + 4; /// let align = 64; /// let pad_length = (align - data_start % align) % align; /// assert_eq!(pad_length, 19); - /// zip.write_u16::<LittleEndian>(0x0000)?; + /// zip.write_u16::<LittleEndian>(0xdead)?; /// zip.write_u16::<LittleEndian>(pad_length as u16)?; /// zip.write_all(&vec![0; pad_length])?; /// let data_start = zip.end_local_start_central_extra_data()?; /// assert_eq!(data_start as usize % align, 0); /// let extra_data = b"central extra data"; - /// zip.write_u16::<LittleEndian>(0x0000)?; + /// zip.write_u16::<LittleEndian>(0xbeef)?; /// zip.write_u16::<LittleEndian>(extra_data.len() as u16)?; /// zip.write_all(extra_data)?; /// zip.end_extra_data()?; @@ -454,13 +473,7 @@ impl<W: Write + io::Seek> ZipWriter<W> { } let file = self.files.last_mut().unwrap(); - // Ensure extra data fits into extra field. - if file.extra_field.len() > 0xFFFF { - return Err(ZipError::Io(io::Error::new( - io::ErrorKind::InvalidData, - "Extra data exceeds extra field", - ))); - } + validate_extra_data(&file)?; if !self.writing_to_central_extra_field_only { let writer = self.inner.get_plain(); @@ -474,8 +487,10 @@ impl<W: Write + io::Seek> ZipWriter<W> { file.data_start = header_end; // Update extra field length in local file header. + let extra_field_length = + if file.large_file { 20 } else { 0 } + file.extra_field.len() as u16; writer.seek(io::SeekFrom::Start(file.header_start + 28))?; - writer.write_u16::<LittleEndian>(file.extra_field.len() as u16)?; + writer.write_u16::<LittleEndian>(extra_field_length)?; writer.seek(io::SeekFrom::Start(header_end))?; self.inner.switch_to(file.compression_method)?; @@ -549,13 +564,50 @@ impl<W: Write + io::Seek> ZipWriter<W> { } let central_size = writer.seek(io::SeekFrom::Current(0))? - central_start; + if self.files.len() > 0xFFFF || central_size > 0xFFFFFFFF || central_start > 0xFFFFFFFF + { + let zip64_footer = spec::Zip64CentralDirectoryEnd { + version_made_by: DEFAULT_VERSION as u16, + version_needed_to_extract: DEFAULT_VERSION as u16, + disk_number: 0, + disk_with_central_directory: 0, + number_of_files_on_this_disk: self.files.len() as u64, + number_of_files: self.files.len() as u64, + central_directory_size: central_size, + central_directory_offset: central_start, + }; + + zip64_footer.write(writer)?; + + let zip64_footer = spec::Zip64CentralDirectoryEndLocator { + disk_with_central_directory: 0, + end_of_central_directory_offset: central_start + central_size, + number_of_disks: 1, + }; + + zip64_footer.write(writer)?; + } + + let number_of_files = if self.files.len() > 0xFFFF { + 0xFFFF + } else { + self.files.len() as u16 + }; let footer = spec::CentralDirectoryEnd { disk_number: 0, disk_with_central_directory: 0, - number_of_files_on_this_disk: self.files.len() as u16, - number_of_files: self.files.len() as u16, - central_directory_size: central_size as u32, - central_directory_offset: central_start as u32, + number_of_files_on_this_disk: number_of_files, + number_of_files, + central_directory_size: if central_size > 0xFFFFFFFF { + 0xFFFFFFFF + } else { + central_size as u32 + }, + central_directory_offset: if central_start > 0xFFFFFFFF { + 0xFFFFFFFF + } else { + central_start as u32 + }, zip_file_comment: self.comment.as_bytes().to_vec(), }; @@ -708,15 +760,28 @@ fn write_local_file_header<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipR // crc-32 writer.write_u32::<LittleEndian>(file.crc32)?; // compressed size - writer.write_u32::<LittleEndian>(file.compressed_size as u32)?; + writer.write_u32::<LittleEndian>(if file.compressed_size > 0xFFFFFFFF { + 0xFFFFFFFF + } else { + file.compressed_size as u32 + })?; // uncompressed size - writer.write_u32::<LittleEndian>(file.uncompressed_size as u32)?; + writer.write_u32::<LittleEndian>(if file.uncompressed_size > 0xFFFFFFFF { + 0xFFFFFFFF + } else { + file.uncompressed_size as u32 + })?; // file name length writer.write_u16::<LittleEndian>(file.file_name.as_bytes().len() as u16)?; // extra field length - writer.write_u16::<LittleEndian>(file.extra_field.len() as u16)?; + let extra_field_length = if file.large_file { 20 } else { 0 } + file.extra_field.len() as u16; + writer.write_u16::<LittleEndian>(extra_field_length)?; // file name writer.write_all(file.file_name.as_bytes())?; + // zip64 extra field + if file.large_file { + write_local_zip64_extra_field(writer, &file)?; + } Ok(()) } @@ -728,12 +793,37 @@ fn update_local_file_header<T: Write + io::Seek>( const CRC32_OFFSET: u64 = 14; writer.seek(io::SeekFrom::Start(file.header_start + CRC32_OFFSET))?; writer.write_u32::<LittleEndian>(file.crc32)?; - writer.write_u32::<LittleEndian>(file.compressed_size as u32)?; - writer.write_u32::<LittleEndian>(file.uncompressed_size as u32)?; + writer.write_u32::<LittleEndian>(if file.compressed_size > 0xFFFFFFFF { + if file.large_file { + 0xFFFFFFFF + } else { + // compressed size can be slightly larger than uncompressed size + return Err(ZipError::Io(io::Error::new( + io::ErrorKind::Other, + "Large file option has not been set", + ))); + } + } else { + file.compressed_size as u32 + })?; + writer.write_u32::<LittleEndian>(if file.uncompressed_size > 0xFFFFFFFF { + // uncompressed size is checked on write to catch it as soon as possible + 0xFFFFFFFF + } else { + file.uncompressed_size as u32 + })?; + if file.large_file { + update_local_zip64_extra_field(writer, file)?; + } Ok(()) } fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<()> { + // buffer zip64 extra field to determine its variable length + let mut zip64_extra_field = [0; 28]; + let zip64_extra_field_length = + write_central_zip64_extra_field(&mut zip64_extra_field.as_mut(), file)?; + // central file header signature writer.write_u32::<LittleEndian>(spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)?; // version made by @@ -757,13 +847,21 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData) // crc-32 writer.write_u32::<LittleEndian>(file.crc32)?; // compressed size - writer.write_u32::<LittleEndian>(file.compressed_size as u32)?; + writer.write_u32::<LittleEndian>(if file.compressed_size > 0xFFFFFFFF { + 0xFFFFFFFF + } else { + file.compressed_size as u32 + })?; // uncompressed size - writer.write_u32::<LittleEndian>(file.uncompressed_size as u32)?; + writer.write_u32::<LittleEndian>(if file.uncompressed_size > 0xFFFFFFFF { + 0xFFFFFFFF + } else { + file.uncompressed_size as u32 + })?; // file name length writer.write_u16::<LittleEndian>(file.file_name.as_bytes().len() as u16)?; // extra field length - writer.write_u16::<LittleEndian>(file.extra_field.len() as u16)?; + writer.write_u16::<LittleEndian>(zip64_extra_field_length + file.extra_field.len() as u16)?; // file comment length writer.write_u16::<LittleEndian>(0)?; // disk number start @@ -773,9 +871,15 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData) // external file attributes writer.write_u32::<LittleEndian>(file.external_attributes)?; // relative offset of local header - writer.write_u32::<LittleEndian>(file.header_start as u32)?; + writer.write_u32::<LittleEndian>(if file.header_start > 0xFFFFFFFF { + 0xFFFFFFFF + } else { + file.header_start as u32 + })?; // file name writer.write_all(file.file_name.as_bytes())?; + // zip64 extra field + writer.write_all(&zip64_extra_field[..zip64_extra_field_length as usize])?; // extra field writer.write_all(&file.extra_field)?; // file comment @@ -784,6 +888,125 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData) Ok(()) } +fn validate_extra_data(file: &ZipFileData) -> ZipResult<()> { + let mut data = file.extra_field.as_slice(); + + if data.len() > 0xFFFF { + return Err(ZipError::Io(io::Error::new( + io::ErrorKind::InvalidData, + "Extra data exceeds extra field", + ))); + } + + while data.len() > 0 { + let left = data.len(); + if left < 4 { + return Err(ZipError::Io(io::Error::new( + io::ErrorKind::Other, + "Incomplete extra data header", + ))); + } + let kind = data.read_u16::<LittleEndian>()?; + let size = data.read_u16::<LittleEndian>()? as usize; + let left = left - 4; + + if kind == 0x0001 { + return Err(ZipError::Io(io::Error::new( + io::ErrorKind::Other, + "No custom ZIP64 extra data allowed", + ))); + } + + #[cfg(not(feature = "unreserved"))] + { + if kind <= 31 + || [0x0021, 0x0022, 0x0023, 0x0065, 0x0066, 0x4690] + .iter() + .any(|&reserved| reserved == kind) + { + return Err(ZipError::Io(io::Error::new( + io::ErrorKind::Other, + "Reserved extra data header ID", + ))); + } + } + + if size > left { + return Err(ZipError::Io(io::Error::new( + io::ErrorKind::Other, + "Extra data size exceeds extra field", + ))); + } + + data = &data[size..]; + } + + Ok(()) +} + +fn write_local_zip64_extra_field<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<()> { + // This entry in the Local header MUST include BOTH original + // and compressed file size fields. + writer.write_u16::<LittleEndian>(0x0001)?; + writer.write_u16::<LittleEndian>(16)?; + writer.write_u64::<LittleEndian>(file.uncompressed_size)?; + writer.write_u64::<LittleEndian>(file.compressed_size)?; + // Excluded fields: + // u32: disk start number + Ok(()) +} + +fn update_local_zip64_extra_field<T: Write + io::Seek>( + writer: &mut T, + file: &ZipFileData, +) -> ZipResult<()> { + let zip64_extra_field = file.header_start + 30 + file.file_name_raw.len() as u64; + writer.seek(io::SeekFrom::Start(zip64_extra_field + 4))?; + writer.write_u64::<LittleEndian>(file.uncompressed_size)?; + writer.write_u64::<LittleEndian>(file.compressed_size)?; + // Excluded fields: + // u32: disk start number + Ok(()) +} + +fn write_central_zip64_extra_field<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<u16> { + // The order of the fields in the zip64 extended + // information record is fixed, but the fields MUST + // only appear if the corresponding Local or Central + // directory record field is set to 0xFFFF or 0xFFFFFFFF. + let mut size = 0; + let uncompressed_size = file.uncompressed_size > 0xFFFFFFFF; + let compressed_size = file.compressed_size > 0xFFFFFFFF; + let header_start = file.header_start > 0xFFFFFFFF; + if uncompressed_size { + size += 8; + } + if compressed_size { + size += 8; + } + if header_start { + size += 8; + } + if size > 0 { + writer.write_u16::<LittleEndian>(0x0001)?; + writer.write_u16::<LittleEndian>(size)?; + size += 4; + + if uncompressed_size { + writer.write_u64::<LittleEndian>(file.uncompressed_size)?; + } + if compressed_size { + writer.write_u64::<LittleEndian>(file.compressed_size)?; + } + if header_start { + writer.write_u64::<LittleEndian>(file.header_start)?; + } + // Excluded fields: + // u32: disk start number + } + Ok(size) +} + fn path_to_string(path: &std::path::Path) -> String { let mut path_str = String::new(); for component in path.components() { @@ -852,6 +1075,7 @@ mod test { compression_method: CompressionMethod::Stored, last_modified_time: DateTime::default(), permissions: Some(33188), + large_file: false, }; writer.start_file("mimetype", options).unwrap(); writer diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs index 0821433f..1b19d118 100644 --- a/tests/end_to_end.rs +++ b/tests/end_to_end.rs @@ -30,7 +30,7 @@ fn write_to_zip_file(file: &mut Cursor<Vec<u8>>) -> zip::result::ZipResult<()> { zip.write_all(b"Hello, World!\n")?; zip.start_file_with_extra_data("test_with_extra_data/🐢.txt", options)?; - zip.write_u16::<LittleEndian>(0)?; + zip.write_u16::<LittleEndian>(0xbeef)?; zip.write_u16::<LittleEndian>(EXTRA_DATA.len() as u16)?; zip.write_all(EXTRA_DATA)?; zip.end_extra_data()?; @@ -59,7 +59,7 @@ fn read_zip_file(zip_file: &mut Cursor<Vec<u8>>) -> zip::result::ZipResult<Strin { let file_with_extra_data = archive.by_name("test_with_extra_data/🐢.txt")?; let mut extra_data = Vec::new(); - extra_data.write_u16::<LittleEndian>(0)?; + extra_data.write_u16::<LittleEndian>(0xbeef)?; extra_data.write_u16::<LittleEndian>(EXTRA_DATA.len() as u16)?; extra_data.write_all(EXTRA_DATA)?; assert_eq!(file_with_extra_data.extra_data(), extra_data.as_slice());