From 9397773a3240f24803a26afb8ef6778dec38291d Mon Sep 17 00:00:00 2001
From: Rouven Spreckels <rs@qu1x.dev>
Date: Sat, 3 Oct 2020 11:05:23 +0200
Subject: [PATCH] Add ZIP64 write support.

---
 Cargo.toml          |   1 +
 src/read.rs         |   4 +
 src/spec.rs         |  22 ++++
 src/types.rs        |  15 ++-
 src/write.rs        | 278 +++++++++++++++++++++++++++++++++++++++-----
 tests/end_to_end.rs |   4 +-
 6 files changed, 293 insertions(+), 31 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index abc33f7a..3712b7db 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -29,6 +29,7 @@ walkdir = "2"
 deflate = ["flate2/rust_backend"]
 deflate-miniz = ["flate2/default"]
 deflate-zlib = ["flate2/zlib"]
+unreserved = []
 default = ["bzip2", "deflate", "time"]
 
 [[bench]]
diff --git a/src/read.rs b/src/read.rs
index 240bccba..ed9c5451 100644
--- a/src/read.rs
+++ b/src/read.rs
@@ -508,6 +508,7 @@ fn central_header_to_zip_file<R: Read + io::Seek>(
         central_header_start,
         data_start: 0,
         external_attributes: external_file_attributes,
+        large_file: false,
     };
 
     match parse_extra_field(&mut result) {
@@ -530,6 +531,8 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
         let mut len_left = len as i64;
         // Zip64 extended information extra field
         if kind == 0x0001 {
+            file.large_file = true;
+
             if file.uncompressed_size == 0xFFFFFFFF {
                 file.uncompressed_size = reader.read_u64::<LittleEndian>()?;
                 len_left -= 8;
@@ -778,6 +781,7 @@ pub fn read_zipfile_from_stream<'a, R: io::Read>(
         // We set this to zero, which should be valid as the docs state 'If input came
         // from standard input, this field is set to zero.'
         external_attributes: 0,
+        large_file: false,
     };
 
     match parse_extra_field(&mut result) {
diff --git a/src/spec.rs b/src/spec.rs
index 8fa8c5c1..2e25c400 100644
--- a/src/spec.rs
+++ b/src/spec.rs
@@ -120,6 +120,14 @@ impl Zip64CentralDirectoryEndLocator {
             number_of_disks,
         })
     }
+
+    pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> {
+        writer.write_u32::<LittleEndian>(ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE)?;
+        writer.write_u32::<LittleEndian>(self.disk_with_central_directory)?;
+        writer.write_u64::<LittleEndian>(self.end_of_central_directory_offset)?;
+        writer.write_u32::<LittleEndian>(self.number_of_disks)?;
+        Ok(())
+    }
 }
 
 pub struct Zip64CentralDirectoryEnd {
@@ -182,4 +190,18 @@ impl Zip64CentralDirectoryEnd {
             "Could not find ZIP64 central directory end",
         ))
     }
+
+    pub fn write<T: Write>(&self, writer: &mut T) -> ZipResult<()> {
+        writer.write_u32::<LittleEndian>(ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE)?;
+        writer.write_u64::<LittleEndian>(44)?; // record size
+        writer.write_u16::<LittleEndian>(self.version_made_by)?;
+        writer.write_u16::<LittleEndian>(self.version_needed_to_extract)?;
+        writer.write_u32::<LittleEndian>(self.disk_number)?;
+        writer.write_u32::<LittleEndian>(self.disk_with_central_directory)?;
+        writer.write_u64::<LittleEndian>(self.number_of_files_on_this_disk)?;
+        writer.write_u64::<LittleEndian>(self.number_of_files)?;
+        writer.write_u64::<LittleEndian>(self.central_directory_size)?;
+        writer.write_u64::<LittleEndian>(self.central_directory_offset)?;
+        Ok(())
+    }
 }
diff --git a/src/types.rs b/src/types.rs
index 1f4c13eb..c753fb43 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -244,6 +244,8 @@ pub struct ZipFileData {
     pub data_start: u64,
     /// External file attributes
     pub external_attributes: u32,
+    /// Reserve local ZIP64 extra field
+    pub large_file: bool,
 }
 
 impl ZipFileData {
@@ -277,10 +279,18 @@ impl ZipFileData {
             })
     }
 
+    pub fn zip64_extension(&self) -> bool {
+        self.uncompressed_size > 0xFFFFFFFF
+            || self.compressed_size > 0xFFFFFFFF
+            || self.header_start > 0xFFFFFFFF
+    }
+
     pub fn version_needed(&self) -> u16 {
-        match self.compression_method {
+        // higher versions matched first
+        match (self.zip64_extension(), self.compression_method) {
             #[cfg(feature = "bzip2")]
-            crate::compression::CompressionMethod::Bzip2 => 46,
+            (_, crate::compression::CompressionMethod::Bzip2) => 46,
+            (true, _) => 45,
             _ => 20,
         }
     }
@@ -318,6 +328,7 @@ mod test {
             data_start: 0,
             central_header_start: 0,
             external_attributes: 0,
+            large_file: false,
         };
         assert_eq!(
             data.file_name_sanitized(),
diff --git a/src/write.rs b/src/write.rs
index 72bc63ea..9db89f04 100644
--- a/src/write.rs
+++ b/src/write.rs
@@ -4,7 +4,7 @@ use crate::compression::CompressionMethod;
 use crate::result::{ZipError, ZipResult};
 use crate::spec;
 use crate::types::{DateTime, System, ZipFileData, DEFAULT_VERSION};
-use byteorder::{LittleEndian, WriteBytesExt};
+use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 use crc32fast::Hasher;
 use std::default::Default;
 use std::io;
@@ -85,6 +85,7 @@ pub struct FileOptions {
     compression_method: CompressionMethod,
     last_modified_time: DateTime,
     permissions: Option<u32>,
+    large_file: bool,
 }
 
 impl FileOptions {
@@ -108,6 +109,7 @@ impl FileOptions {
             #[cfg(not(feature = "time"))]
             last_modified_time: DateTime::default(),
             permissions: None,
+            large_file: false,
         }
     }
 
@@ -115,7 +117,6 @@ impl FileOptions {
     ///
     /// The default is `CompressionMethod::Deflated`. If the deflate compression feature is
     /// disabled, `CompressionMethod::Stored` becomes the default.
-    /// otherwise.
     pub fn compression_method(mut self, method: CompressionMethod) -> FileOptions {
         self.compression_method = method;
         self
@@ -139,6 +140,15 @@ impl FileOptions {
         self.permissions = Some(mode & 0o777);
         self
     }
+
+    /// Set whether the new file's compressed and uncompressed size is less than 4 GiB.
+    ///
+    /// If set to `false` and the file exceeds the limit, an I/O error is thrown. If set to `true`
+    /// and the file does not exceed the limit, 20 B are wasted. The default is `false`.
+    pub fn large_file(mut self, large: bool) -> FileOptions {
+        self.large_file = large;
+        self
+    }
 }
 
 impl Default for FileOptions {
@@ -163,6 +173,14 @@ impl<W: Write + io::Seek> Write for ZipWriter<W> {
                     let write_result = w.write(buf);
                     if let Ok(count) = write_result {
                         self.stats.update(&buf[0..count]);
+                        if self.stats.bytes_written > 0xFFFFFFFF
+                            && !self.files.last_mut().unwrap().large_file
+                        {
+                            return Err(io::Error::new(
+                                io::ErrorKind::Other,
+                                "Large file option has not been set",
+                            ));
+                        }
                     }
                     write_result
                 }
@@ -247,6 +265,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
                 data_start: 0,
                 central_header_start: 0,
                 external_attributes: permissions << 16,
+                large_file: options.large_file,
             };
             write_local_file_header(writer, &file)?;
 
@@ -376,7 +395,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
     ///
     ///     zip.start_file_with_extra_data("identical_extra_data.txt", options)?;
     ///     let extra_data = b"local and central extra data";
-    ///     zip.write_u16::<LittleEndian>(0x0000)?;
+    ///     zip.write_u16::<LittleEndian>(0xbeef)?;
     ///     zip.write_u16::<LittleEndian>(extra_data.len() as u16)?;
     ///     zip.write_all(extra_data)?;
     ///     zip.end_extra_data()?;
@@ -384,20 +403,20 @@ impl<W: Write + io::Seek> ZipWriter<W> {
     ///
     ///     let data_start = zip.start_file_with_extra_data("different_extra_data.txt", options)?;
     ///     let extra_data = b"local extra data";
-    ///     zip.write_u16::<LittleEndian>(0x0000)?;
+    ///     zip.write_u16::<LittleEndian>(0xbeef)?;
     ///     zip.write_u16::<LittleEndian>(extra_data.len() as u16)?;
     ///     zip.write_all(extra_data)?;
     ///     let data_start = data_start as usize + 4 + extra_data.len() + 4;
     ///     let align = 64;
     ///     let pad_length = (align - data_start % align) % align;
     ///     assert_eq!(pad_length, 19);
-    ///     zip.write_u16::<LittleEndian>(0x0000)?;
+    ///     zip.write_u16::<LittleEndian>(0xdead)?;
     ///     zip.write_u16::<LittleEndian>(pad_length as u16)?;
     ///     zip.write_all(&vec![0; pad_length])?;
     ///     let data_start = zip.end_local_start_central_extra_data()?;
     ///     assert_eq!(data_start as usize % align, 0);
     ///     let extra_data = b"central extra data";
-    ///     zip.write_u16::<LittleEndian>(0x0000)?;
+    ///     zip.write_u16::<LittleEndian>(0xbeef)?;
     ///     zip.write_u16::<LittleEndian>(extra_data.len() as u16)?;
     ///     zip.write_all(extra_data)?;
     ///     zip.end_extra_data()?;
@@ -454,13 +473,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
         }
         let file = self.files.last_mut().unwrap();
 
-        // Ensure extra data fits into extra field.
-        if file.extra_field.len() > 0xFFFF {
-            return Err(ZipError::Io(io::Error::new(
-                io::ErrorKind::InvalidData,
-                "Extra data exceeds extra field",
-            )));
-        }
+        validate_extra_data(&file)?;
 
         if !self.writing_to_central_extra_field_only {
             let writer = self.inner.get_plain();
@@ -474,8 +487,10 @@ impl<W: Write + io::Seek> ZipWriter<W> {
             file.data_start = header_end;
 
             // Update extra field length in local file header.
+            let extra_field_length =
+                if file.large_file { 20 } else { 0 } + file.extra_field.len() as u16;
             writer.seek(io::SeekFrom::Start(file.header_start + 28))?;
-            writer.write_u16::<LittleEndian>(file.extra_field.len() as u16)?;
+            writer.write_u16::<LittleEndian>(extra_field_length)?;
             writer.seek(io::SeekFrom::Start(header_end))?;
 
             self.inner.switch_to(file.compression_method)?;
@@ -549,13 +564,50 @@ impl<W: Write + io::Seek> ZipWriter<W> {
             }
             let central_size = writer.seek(io::SeekFrom::Current(0))? - central_start;
 
+            if self.files.len() > 0xFFFF || central_size > 0xFFFFFFFF || central_start > 0xFFFFFFFF
+            {
+                let zip64_footer = spec::Zip64CentralDirectoryEnd {
+                    version_made_by: DEFAULT_VERSION as u16,
+                    version_needed_to_extract: DEFAULT_VERSION as u16,
+                    disk_number: 0,
+                    disk_with_central_directory: 0,
+                    number_of_files_on_this_disk: self.files.len() as u64,
+                    number_of_files: self.files.len() as u64,
+                    central_directory_size: central_size,
+                    central_directory_offset: central_start,
+                };
+
+                zip64_footer.write(writer)?;
+
+                let zip64_footer = spec::Zip64CentralDirectoryEndLocator {
+                    disk_with_central_directory: 0,
+                    end_of_central_directory_offset: central_start + central_size,
+                    number_of_disks: 1,
+                };
+
+                zip64_footer.write(writer)?;
+            }
+
+            let number_of_files = if self.files.len() > 0xFFFF {
+                0xFFFF
+            } else {
+                self.files.len() as u16
+            };
             let footer = spec::CentralDirectoryEnd {
                 disk_number: 0,
                 disk_with_central_directory: 0,
-                number_of_files_on_this_disk: self.files.len() as u16,
-                number_of_files: self.files.len() as u16,
-                central_directory_size: central_size as u32,
-                central_directory_offset: central_start as u32,
+                number_of_files_on_this_disk: number_of_files,
+                number_of_files,
+                central_directory_size: if central_size > 0xFFFFFFFF {
+                    0xFFFFFFFF
+                } else {
+                    central_size as u32
+                },
+                central_directory_offset: if central_start > 0xFFFFFFFF {
+                    0xFFFFFFFF
+                } else {
+                    central_start as u32
+                },
                 zip_file_comment: self.comment.as_bytes().to_vec(),
             };
 
@@ -708,15 +760,28 @@ fn write_local_file_header<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipR
     // crc-32
     writer.write_u32::<LittleEndian>(file.crc32)?;
     // compressed size
-    writer.write_u32::<LittleEndian>(file.compressed_size as u32)?;
+    writer.write_u32::<LittleEndian>(if file.compressed_size > 0xFFFFFFFF {
+        0xFFFFFFFF
+    } else {
+        file.compressed_size as u32
+    })?;
     // uncompressed size
-    writer.write_u32::<LittleEndian>(file.uncompressed_size as u32)?;
+    writer.write_u32::<LittleEndian>(if file.uncompressed_size > 0xFFFFFFFF {
+        0xFFFFFFFF
+    } else {
+        file.uncompressed_size as u32
+    })?;
     // file name length
     writer.write_u16::<LittleEndian>(file.file_name.as_bytes().len() as u16)?;
     // extra field length
-    writer.write_u16::<LittleEndian>(file.extra_field.len() as u16)?;
+    let extra_field_length = if file.large_file { 20 } else { 0 } + file.extra_field.len() as u16;
+    writer.write_u16::<LittleEndian>(extra_field_length)?;
     // file name
     writer.write_all(file.file_name.as_bytes())?;
+    // zip64 extra field
+    if file.large_file {
+        write_local_zip64_extra_field(writer, &file)?;
+    }
 
     Ok(())
 }
@@ -728,12 +793,37 @@ fn update_local_file_header<T: Write + io::Seek>(
     const CRC32_OFFSET: u64 = 14;
     writer.seek(io::SeekFrom::Start(file.header_start + CRC32_OFFSET))?;
     writer.write_u32::<LittleEndian>(file.crc32)?;
-    writer.write_u32::<LittleEndian>(file.compressed_size as u32)?;
-    writer.write_u32::<LittleEndian>(file.uncompressed_size as u32)?;
+    writer.write_u32::<LittleEndian>(if file.compressed_size > 0xFFFFFFFF {
+        if file.large_file {
+            0xFFFFFFFF
+        } else {
+            // compressed size can be slightly larger than uncompressed size
+            return Err(ZipError::Io(io::Error::new(
+                io::ErrorKind::Other,
+                "Large file option has not been set",
+            )));
+        }
+    } else {
+        file.compressed_size as u32
+    })?;
+    writer.write_u32::<LittleEndian>(if file.uncompressed_size > 0xFFFFFFFF {
+        // uncompressed size is checked on write to catch it as soon as possible
+        0xFFFFFFFF
+    } else {
+        file.uncompressed_size as u32
+    })?;
+    if file.large_file {
+        update_local_zip64_extra_field(writer, file)?;
+    }
     Ok(())
 }
 
 fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<()> {
+    // buffer zip64 extra field to determine its variable length
+    let mut zip64_extra_field = [0; 28];
+    let zip64_extra_field_length =
+        write_central_zip64_extra_field(&mut zip64_extra_field.as_mut(), file)?;
+
     // central file header signature
     writer.write_u32::<LittleEndian>(spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)?;
     // version made by
@@ -757,13 +847,21 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData)
     // crc-32
     writer.write_u32::<LittleEndian>(file.crc32)?;
     // compressed size
-    writer.write_u32::<LittleEndian>(file.compressed_size as u32)?;
+    writer.write_u32::<LittleEndian>(if file.compressed_size > 0xFFFFFFFF {
+        0xFFFFFFFF
+    } else {
+        file.compressed_size as u32
+    })?;
     // uncompressed size
-    writer.write_u32::<LittleEndian>(file.uncompressed_size as u32)?;
+    writer.write_u32::<LittleEndian>(if file.uncompressed_size > 0xFFFFFFFF {
+        0xFFFFFFFF
+    } else {
+        file.uncompressed_size as u32
+    })?;
     // file name length
     writer.write_u16::<LittleEndian>(file.file_name.as_bytes().len() as u16)?;
     // extra field length
-    writer.write_u16::<LittleEndian>(file.extra_field.len() as u16)?;
+    writer.write_u16::<LittleEndian>(zip64_extra_field_length + file.extra_field.len() as u16)?;
     // file comment length
     writer.write_u16::<LittleEndian>(0)?;
     // disk number start
@@ -773,9 +871,15 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData)
     // external file attributes
     writer.write_u32::<LittleEndian>(file.external_attributes)?;
     // relative offset of local header
-    writer.write_u32::<LittleEndian>(file.header_start as u32)?;
+    writer.write_u32::<LittleEndian>(if file.header_start > 0xFFFFFFFF {
+        0xFFFFFFFF
+    } else {
+        file.header_start as u32
+    })?;
     // file name
     writer.write_all(file.file_name.as_bytes())?;
+    // zip64 extra field
+    writer.write_all(&zip64_extra_field[..zip64_extra_field_length as usize])?;
     // extra field
     writer.write_all(&file.extra_field)?;
     // file comment
@@ -784,6 +888,125 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData)
     Ok(())
 }
 
+fn validate_extra_data(file: &ZipFileData) -> ZipResult<()> {
+    let mut data = file.extra_field.as_slice();
+
+    if data.len() > 0xFFFF {
+        return Err(ZipError::Io(io::Error::new(
+            io::ErrorKind::InvalidData,
+            "Extra data exceeds extra field",
+        )));
+    }
+
+    while data.len() > 0 {
+        let left = data.len();
+        if left < 4 {
+            return Err(ZipError::Io(io::Error::new(
+                io::ErrorKind::Other,
+                "Incomplete extra data header",
+            )));
+        }
+        let kind = data.read_u16::<LittleEndian>()?;
+        let size = data.read_u16::<LittleEndian>()? as usize;
+        let left = left - 4;
+
+        if kind == 0x0001 {
+            return Err(ZipError::Io(io::Error::new(
+                io::ErrorKind::Other,
+                "No custom ZIP64 extra data allowed",
+            )));
+        }
+
+        #[cfg(not(feature = "unreserved"))]
+        {
+            if kind <= 31
+                || [0x0021, 0x0022, 0x0023, 0x0065, 0x0066, 0x4690]
+                    .iter()
+                    .any(|&reserved| reserved == kind)
+            {
+                return Err(ZipError::Io(io::Error::new(
+                    io::ErrorKind::Other,
+                    "Reserved extra data header ID",
+                )));
+            }
+        }
+
+        if size > left {
+            return Err(ZipError::Io(io::Error::new(
+                io::ErrorKind::Other,
+                "Extra data size exceeds extra field",
+            )));
+        }
+
+        data = &data[size..];
+    }
+
+    Ok(())
+}
+
+fn write_local_zip64_extra_field<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<()> {
+    // This entry in the Local header MUST include BOTH original
+    // and compressed file size fields.
+    writer.write_u16::<LittleEndian>(0x0001)?;
+    writer.write_u16::<LittleEndian>(16)?;
+    writer.write_u64::<LittleEndian>(file.uncompressed_size)?;
+    writer.write_u64::<LittleEndian>(file.compressed_size)?;
+    // Excluded fields:
+    // u32: disk start number
+    Ok(())
+}
+
+fn update_local_zip64_extra_field<T: Write + io::Seek>(
+    writer: &mut T,
+    file: &ZipFileData,
+) -> ZipResult<()> {
+    let zip64_extra_field = file.header_start + 30 + file.file_name_raw.len() as u64;
+    writer.seek(io::SeekFrom::Start(zip64_extra_field + 4))?;
+    writer.write_u64::<LittleEndian>(file.uncompressed_size)?;
+    writer.write_u64::<LittleEndian>(file.compressed_size)?;
+    // Excluded fields:
+    // u32: disk start number
+    Ok(())
+}
+
+fn write_central_zip64_extra_field<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipResult<u16> {
+    // The order of the fields in the zip64 extended
+    // information record is fixed, but the fields MUST
+    // only appear if the corresponding Local or Central
+    // directory record field is set to 0xFFFF or 0xFFFFFFFF.
+    let mut size = 0;
+    let uncompressed_size = file.uncompressed_size > 0xFFFFFFFF;
+    let compressed_size = file.compressed_size > 0xFFFFFFFF;
+    let header_start = file.header_start > 0xFFFFFFFF;
+    if uncompressed_size {
+        size += 8;
+    }
+    if compressed_size {
+        size += 8;
+    }
+    if header_start {
+        size += 8;
+    }
+    if size > 0 {
+        writer.write_u16::<LittleEndian>(0x0001)?;
+        writer.write_u16::<LittleEndian>(size)?;
+        size += 4;
+
+        if uncompressed_size {
+            writer.write_u64::<LittleEndian>(file.uncompressed_size)?;
+        }
+        if compressed_size {
+            writer.write_u64::<LittleEndian>(file.compressed_size)?;
+        }
+        if header_start {
+            writer.write_u64::<LittleEndian>(file.header_start)?;
+        }
+        // Excluded fields:
+        // u32: disk start number
+    }
+    Ok(size)
+}
+
 fn path_to_string(path: &std::path::Path) -> String {
     let mut path_str = String::new();
     for component in path.components() {
@@ -852,6 +1075,7 @@ mod test {
             compression_method: CompressionMethod::Stored,
             last_modified_time: DateTime::default(),
             permissions: Some(33188),
+            large_file: false,
         };
         writer.start_file("mimetype", options).unwrap();
         writer
diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs
index 0821433f..1b19d118 100644
--- a/tests/end_to_end.rs
+++ b/tests/end_to_end.rs
@@ -30,7 +30,7 @@ fn write_to_zip_file(file: &mut Cursor<Vec<u8>>) -> zip::result::ZipResult<()> {
     zip.write_all(b"Hello, World!\n")?;
 
     zip.start_file_with_extra_data("test_with_extra_data/🐢.txt", options)?;
-    zip.write_u16::<LittleEndian>(0)?;
+    zip.write_u16::<LittleEndian>(0xbeef)?;
     zip.write_u16::<LittleEndian>(EXTRA_DATA.len() as u16)?;
     zip.write_all(EXTRA_DATA)?;
     zip.end_extra_data()?;
@@ -59,7 +59,7 @@ fn read_zip_file(zip_file: &mut Cursor<Vec<u8>>) -> zip::result::ZipResult<Strin
     {
         let file_with_extra_data = archive.by_name("test_with_extra_data/🐢.txt")?;
         let mut extra_data = Vec::new();
-        extra_data.write_u16::<LittleEndian>(0)?;
+        extra_data.write_u16::<LittleEndian>(0xbeef)?;
         extra_data.write_u16::<LittleEndian>(EXTRA_DATA.len() as u16)?;
         extra_data.write_all(EXTRA_DATA)?;
         assert_eq!(file_with_extra_data.extra_data(), extra_data.as_slice());