From 10da026ff68f5b0eeff54fc0d1f900368a301034 Mon Sep 17 00:00:00 2001 From: zhanghar Date: Wed, 23 Dec 2020 14:37:09 -0500 Subject: [PATCH 1/4] feat: support append to an existing archive --- src/read.rs | 5 +++-- src/write.rs | 42 +++++++++++++++++++++++++++++++++++++++++- tests/end_to_end.rs | 19 +++++++++++++++++++ 3 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/read.rs b/src/read.rs index 3aac00f6..0ce37c0a 100644 --- a/src/read.rs +++ b/src/read.rs @@ -209,7 +209,7 @@ fn make_reader<'a>( impl ZipArchive { /// Get the directory start offset and number of files. This is done in a /// separate function to ease the control flow design. - fn get_directory_counts( + pub fn get_directory_counts( reader: &mut R, footer: &spec::CentralDirectoryEnd, cde_start_pos: u64, @@ -514,7 +514,8 @@ fn unsupported_zip_error(detail: &'static str) -> ZipResult { Err(ZipError::UnsupportedArchive(detail)) } -fn central_header_to_zip_file( +/// Parse a central directory entry to collect the information for the file. +pub fn central_header_to_zip_file( reader: &mut R, archive_offset: u64, ) -> ZipResult { diff --git a/src/write.rs b/src/write.rs index bc688172..2994b40a 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1,7 +1,7 @@ //! Types for creating ZIP archives use crate::compression::CompressionMethod; -use crate::read::ZipFile; +use crate::read::{central_header_to_zip_file, ZipArchive, ZipFile}; use crate::result::{ZipError, ZipResult}; use crate::spec; use crate::types::{DateTime, System, ZipFileData, DEFAULT_VERSION}; @@ -194,6 +194,46 @@ impl ZipWriterStats { } } +impl ZipWriter { + /// Initializes the archive from an existing ZIP archive, making it ready for append. + pub fn new_append(mut readwriter: A) -> ZipResult> { + let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut readwriter)?; + + if footer.disk_number != footer.disk_with_central_directory { + return Err(ZipError::UnsupportedArchive( + "Support for multi-disk files is not implemented", + )); + } + + let (archive_offset, directory_start, number_of_files) = + ZipArchive::get_directory_counts(&mut readwriter, &footer, cde_start_pos)?; + + let mut files = Vec::new(); + + if let Err(_) = readwriter.seek(io::SeekFrom::Start(directory_start)) { + return Err(ZipError::InvalidArchive( + "Could not seek to start of central directory", + )); + } + + for _ in 0..number_of_files { + let file = central_header_to_zip_file(&mut readwriter, archive_offset)?; + files.push(file); + } + + let _ = readwriter.seek(io::SeekFrom::Start(directory_start)); // seek directory_start to overwrite it + + Ok(ZipWriter { + inner: GenericZipWriter::Storer(readwriter), + files, + stats: Default::default(), + writing_to_file: false, + comment: String::new(), + writing_raw: true, // avoid recomputing the last file's header + }) + } +} + impl ZipWriter { /// Initializes the archive. /// diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs index b826f548..c658a6f5 100644 --- a/tests/end_to_end.rs +++ b/tests/end_to_end.rs @@ -46,6 +46,25 @@ fn copy() { check_zip_file_contents(&mut tgt_archive, COPY_ENTRY_NAME); } +// This test asserts that after appending to a `ZipWriter`, then reading its contents back out, +// both the prior data and the appended data will be exactly the same as their originals. +#[test] +fn append() { + let mut file = &mut Cursor::new(Vec::new()); + write_to_zip(file).expect("file written"); + + { + let mut zip = zip::ZipWriter::new_append(&mut file).unwrap(); + zip.start_file(COPY_ENTRY_NAME, Default::default()).unwrap(); + zip.write_all(LOREM_IPSUM).unwrap(); + zip.finish().unwrap(); + } + + let mut zip = zip::ZipArchive::new(&mut file).unwrap(); + check_zip_file_contents(&mut zip, ENTRY_NAME); + check_zip_file_contents(&mut zip, COPY_ENTRY_NAME); +} + fn write_to_zip(file: &mut Cursor>) -> zip::result::ZipResult<()> { let mut zip = zip::ZipWriter::new(file); From 4ce57911647eae9dd46c786fad15a612fcf4eb8c Mon Sep 17 00:00:00 2001 From: zhanghar Date: Thu, 25 Feb 2021 23:50:31 -0500 Subject: [PATCH 2/4] chore: use pub(crate) for header parsing used internally by the appendable ZipWriter --- src/read.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/read.rs b/src/read.rs index 0ce37c0a..4f103522 100644 --- a/src/read.rs +++ b/src/read.rs @@ -209,7 +209,7 @@ fn make_reader<'a>( impl ZipArchive { /// Get the directory start offset and number of files. This is done in a /// separate function to ease the control flow design. - pub fn get_directory_counts( + pub(crate) fn get_directory_counts( reader: &mut R, footer: &spec::CentralDirectoryEnd, cde_start_pos: u64, @@ -515,7 +515,7 @@ fn unsupported_zip_error(detail: &'static str) -> ZipResult { } /// Parse a central directory entry to collect the information for the file. -pub fn central_header_to_zip_file( +pub(crate) fn central_header_to_zip_file( reader: &mut R, archive_offset: u64, ) -> ZipResult { From 32b2f5b4832ae95965eaf05f50eaf78dabea4d9f Mon Sep 17 00:00:00 2001 From: zhanghar Date: Thu, 25 Feb 2021 23:54:20 -0500 Subject: [PATCH 3/4] refactor: use iterator for file list init --- src/write.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/write.rs b/src/write.rs index 2994b40a..68ae1147 100644 --- a/src/write.rs +++ b/src/write.rs @@ -208,18 +208,15 @@ impl ZipWriter { let (archive_offset, directory_start, number_of_files) = ZipArchive::get_directory_counts(&mut readwriter, &footer, cde_start_pos)?; - let mut files = Vec::new(); - if let Err(_) = readwriter.seek(io::SeekFrom::Start(directory_start)) { return Err(ZipError::InvalidArchive( "Could not seek to start of central directory", )); } - for _ in 0..number_of_files { - let file = central_header_to_zip_file(&mut readwriter, archive_offset)?; - files.push(file); - } + let files = (0..number_of_files) + .map(|_| central_header_to_zip_file(&mut readwriter, archive_offset)) + .collect::, _>>()?; let _ = readwriter.seek(io::SeekFrom::Start(directory_start)); // seek directory_start to overwrite it From 44352aa34ba25db0fe153169ef19bdef5c1e9cbc Mon Sep 17 00:00:00 2001 From: Marli Frost Date: Mon, 19 Apr 2021 11:58:45 +0100 Subject: [PATCH 4/4] fix: allow non-utf8 comments in ZipWriter --- src/write.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/write.rs b/src/write.rs index 68ae1147..ca2e3fb1 100644 --- a/src/write.rs +++ b/src/write.rs @@ -68,7 +68,7 @@ pub struct ZipWriter { files: Vec, stats: ZipWriterStats, writing_to_file: bool, - comment: String, + comment: Vec, writing_raw: bool, } @@ -225,7 +225,7 @@ impl ZipWriter { files, stats: Default::default(), writing_to_file: false, - comment: String::new(), + comment: footer.zip_file_comment, writing_raw: true, // avoid recomputing the last file's header }) } @@ -241,7 +241,7 @@ impl ZipWriter { files: Vec::new(), stats: Default::default(), writing_to_file: false, - comment: String::new(), + comment: Vec::new(), writing_raw: false, } } @@ -251,7 +251,15 @@ impl ZipWriter { where S: Into, { - self.comment = comment.into(); + self.set_raw_comment(comment.into().into()) + } + + /// Set ZIP archive comment. + /// + /// This sets the raw bytes of the comment. The comment + /// is typically expected to be encoded in UTF-8 + pub fn set_raw_comment(&mut self, comment: Vec) { + self.comment = comment; } /// Start a new file for with the requested options. @@ -522,7 +530,7 @@ impl ZipWriter { number_of_files: self.files.len() as u16, central_directory_size: central_size as u32, central_directory_offset: central_start as u32, - zip_file_comment: self.comment.as_bytes().to_vec(), + zip_file_comment: self.comment.clone(), }; footer.write(writer)?;