Add deep-copy method, and include copying in end-to-end tests

This commit is contained in:
Chris Hennick 2023-04-24 10:44:36 -07:00
parent d0300bc6e6
commit 36e7b19969
No known key found for this signature in database
GPG key ID: 25653935CC8B6C74
2 changed files with 124 additions and 32 deletions

View file

@ -1,7 +1,7 @@
//! Types for creating ZIP archives //! Types for creating ZIP archives
use crate::compression::CompressionMethod; use crate::compression::CompressionMethod;
use crate::read::{central_header_to_zip_file, ZipArchive, ZipFile}; use crate::read::{central_header_to_zip_file, find_content, ZipArchive, ZipFile, ZipFileReader};
use crate::result::{ZipError, ZipResult}; use crate::result::{ZipError, ZipResult};
use crate::spec; use crate::spec;
use crate::types::{AtomicU64, DateTime, System, ZipFileData, DEFAULT_VERSION}; use crate::types::{AtomicU64, DateTime, System, ZipFileData, DEFAULT_VERSION};
@ -11,6 +11,7 @@ use std::convert::TryInto;
use std::default::Default; use std::default::Default;
use std::io; use std::io;
use std::io::prelude::*; use std::io::prelude::*;
use std::io::{BufReader, SeekFrom};
use std::mem; use std::mem;
#[cfg(any( #[cfg(any(
@ -268,10 +269,7 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
let (archive_offset, directory_start, number_of_files) = let (archive_offset, directory_start, number_of_files) =
ZipArchive::get_directory_counts(&mut readwriter, &footer, cde_start_pos)?; ZipArchive::get_directory_counts(&mut readwriter, &footer, cde_start_pos)?;
if readwriter if readwriter.seek(SeekFrom::Start(directory_start)).is_err() {
.seek(io::SeekFrom::Start(directory_start))
.is_err()
{
return Err(ZipError::InvalidArchive( return Err(ZipError::InvalidArchive(
"Could not seek to start of central directory", "Could not seek to start of central directory",
)); ));
@ -281,7 +279,7 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
.map(|_| central_header_to_zip_file(&mut readwriter, archive_offset)) .map(|_| central_header_to_zip_file(&mut readwriter, archive_offset))
.collect::<Result<Vec<_>, _>>()?; .collect::<Result<Vec<_>, _>>()?;
let _ = readwriter.seek(io::SeekFrom::Start(directory_start)); // seek directory_start to overwrite it let _ = readwriter.seek(SeekFrom::Start(directory_start)); // seek directory_start to overwrite it
Ok(ZipWriter { Ok(ZipWriter {
inner: GenericZipWriter::Storer(readwriter), inner: GenericZipWriter::Storer(readwriter),
@ -296,6 +294,45 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
} }
} }
impl<A: Read + Write + Seek> ZipWriter<A> {
/// Adds another copy of a file already in this archive. This will produce a larger but more
/// widely-compatible archive compared to [shallow_copy_file].
pub fn deep_copy_file(&mut self, src_name: &str, dest_name: &str) -> ZipResult<()> {
self.finish_file()?;
let write_position = self.inner.get_plain().stream_position()?;
let src_data = self.data_by_name(src_name)?.to_owned();
let data_start = src_data.data_start.load();
let real_size = src_data.compressed_size.max(write_position - data_start);
let mut options = FileOptions::default()
.large_file(real_size > spec::ZIP64_BYTES_THR)
.last_modified_time(src_data.last_modified_time)
.compression_method(src_data.compression_method);
if let Some(perms) = src_data.unix_mode() {
options = options.unix_permissions(perms);
}
let raw_values = ZipRawValues {
crc32: src_data.crc32,
compressed_size: real_size,
uncompressed_size: src_data.uncompressed_size,
};
let reader = self.inner.get_plain();
let mut reader = BufReader::new(ZipFileReader::Raw(find_content(&src_data, reader)?));
let mut copy = Vec::with_capacity(real_size as usize);
reader.read_to_end(&mut copy)?;
drop(reader);
self.inner
.get_plain()
.seek(SeekFrom::Start(write_position))?;
self.start_entry(dest_name, options, Some(raw_values))?;
self.writing_raw = true;
self.writing_to_file = true;
self.write_all(&copy)?;
Ok(())
}
}
impl<W: Write + Seek> ZipWriter<W> { impl<W: Write + Seek> ZipWriter<W> {
/// Initializes the archive. /// Initializes the archive.
/// ///
@ -409,7 +446,7 @@ impl<W: Write + Seek> ZipWriter<W> {
file.compressed_size = file_end - self.stats.start; file.compressed_size = file_end - self.stats.start;
update_local_file_header(writer, file)?; update_local_file_header(writer, file)?;
writer.seek(io::SeekFrom::Start(file_end))?; writer.seek(SeekFrom::Start(file_end))?;
} }
self.writing_to_file = false; self.writing_to_file = false;
@ -603,9 +640,9 @@ impl<W: Write + Seek> ZipWriter<W> {
// Update extra field length in local file header. // Update extra field length in local file header.
let extra_field_length = let extra_field_length =
if file.large_file { 20 } else { 0 } + file.extra_field.len() as u16; if file.large_file { 20 } else { 0 } + file.extra_field.len() as u16;
writer.seek(io::SeekFrom::Start(file.header_start + 28))?; writer.seek(SeekFrom::Start(file.header_start + 28))?;
writer.write_u16::<LittleEndian>(extra_field_length)?; writer.write_u16::<LittleEndian>(extra_field_length)?;
writer.seek(io::SeekFrom::Start(header_end))?; writer.seek(SeekFrom::Start(header_end))?;
self.inner self.inner
.switch_to(file.compression_method, file.compression_level)?; .switch_to(file.compression_method, file.compression_level)?;
@ -852,9 +889,9 @@ impl<W: Write + Seek> ZipWriter<W> {
/// Adds another entry to the central directory referring to the same content as an existing /// Adds another entry to the central directory referring to the same content as an existing
/// entry. The file's local-file header will still refer to it by its original name, so /// entry. The file's local-file header will still refer to it by its original name, so
/// unzipping the file will technically be unspecified behavior. However, both [ZipArchive] and /// unzipping the file will technically be unspecified behavior. [ZipArchive] ignores the
/// OpenJDK ignore the filename in the local-file header and treat the central directory as /// filename in the local-file header and treat the central directory as authoritative. However,
/// authoritative. /// some other software (e.g. Minecraft) will refuse to extract a file copied this way.
pub fn shallow_copy_file(&mut self, src_name: &str, dest_name: &str) -> ZipResult<()> { pub fn shallow_copy_file(&mut self, src_name: &str, dest_name: &str) -> ZipResult<()> {
self.finish_file()?; self.finish_file()?;
let src_data = self.data_by_name(src_name)?; let src_data = self.data_by_name(src_name)?;
@ -1117,7 +1154,7 @@ fn write_local_file_header<T: Write>(writer: &mut T, file: &ZipFileData) -> ZipR
fn update_local_file_header<T: Write + Seek>(writer: &mut T, file: &ZipFileData) -> ZipResult<()> { fn update_local_file_header<T: Write + Seek>(writer: &mut T, file: &ZipFileData) -> ZipResult<()> {
const CRC32_OFFSET: u64 = 14; const CRC32_OFFSET: u64 = 14;
writer.seek(io::SeekFrom::Start(file.header_start + CRC32_OFFSET))?; writer.seek(SeekFrom::Start(file.header_start + CRC32_OFFSET))?;
writer.write_u32::<LittleEndian>(file.crc32)?; writer.write_u32::<LittleEndian>(file.crc32)?;
if file.large_file { if file.large_file {
update_local_zip64_extra_field(writer, file)?; update_local_zip64_extra_field(writer, file)?;
@ -1265,7 +1302,7 @@ fn update_local_zip64_extra_field<T: Write + Seek>(
file: &ZipFileData, file: &ZipFileData,
) -> ZipResult<()> { ) -> ZipResult<()> {
let zip64_extra_field = file.header_start + 30 + file.file_name.as_bytes().len() as u64; let zip64_extra_field = file.header_start + 30 + file.file_name.as_bytes().len() as u64;
writer.seek(io::SeekFrom::Start(zip64_extra_field + 4))?; writer.seek(SeekFrom::Start(zip64_extra_field + 4))?;
writer.write_u64::<LittleEndian>(file.uncompressed_size)?; writer.write_u64::<LittleEndian>(file.uncompressed_size)?;
writer.write_u64::<LittleEndian>(file.compressed_size)?; writer.write_u64::<LittleEndian>(file.compressed_size)?;
// Excluded fields: // Excluded fields:
@ -1513,6 +1550,44 @@ mod test {
assert_eq!(second_file_content, RT_TEST_TEXT); assert_eq!(second_file_content, RT_TEST_TEXT);
} }
#[test]
fn test_deep_copy() {
let mut writer = ZipWriter::new(io::Cursor::new(Vec::new()));
let options = FileOptions {
compression_method: CompressionMethod::Deflated,
compression_level: Some(9),
last_modified_time: DateTime::default(),
permissions: Some(33188),
large_file: false,
};
writer.start_file(RT_TEST_FILENAME, options).unwrap();
writer.write_all(RT_TEST_TEXT.as_ref()).unwrap();
writer
.deep_copy_file(RT_TEST_FILENAME, SECOND_FILENAME)
.unwrap();
let zip = writer.finish().unwrap();
let mut reader = ZipArchive::new(zip).unwrap();
let mut file_names: Vec<&str> = reader.file_names().collect();
file_names.sort();
let mut expected_file_names = vec![RT_TEST_FILENAME, SECOND_FILENAME];
expected_file_names.sort();
assert_eq!(file_names, expected_file_names);
let mut first_file_content = String::new();
reader
.by_name(RT_TEST_FILENAME)
.unwrap()
.read_to_string(&mut first_file_content)
.unwrap();
assert_eq!(first_file_content, RT_TEST_TEXT);
let mut second_file_content = String::new();
reader
.by_name(SECOND_FILENAME)
.unwrap()
.read_to_string(&mut second_file_content)
.unwrap();
assert_eq!(second_file_content, RT_TEST_TEXT);
}
#[test] #[test]
fn path_to_string() { fn path_to_string() {
let mut path = std::path::PathBuf::new(); let mut path = std::path::PathBuf::new();

View file

@ -15,10 +15,11 @@ fn end_to_end() {
let file = &mut Cursor::new(Vec::new()); let file = &mut Cursor::new(Vec::new());
println!("Writing file with {method} compression"); println!("Writing file with {method} compression");
write_test_archive(file, method).expect("Couldn't write test zip archive"); write_test_archive(file, method, true).expect("Couldn't write test zip archive");
println!("Checking file contents"); println!("Checking file contents");
check_archive_file(file, ENTRY_NAME, Some(method), LOREM_IPSUM); check_archive_file(file, ENTRY_NAME, Some(method), LOREM_IPSUM);
check_archive_file(file, INTERNAL_COPY_ENTRY_NAME, Some(method), LOREM_IPSUM);
} }
} }
@ -28,7 +29,7 @@ fn end_to_end() {
fn copy() { fn copy() {
for &method in SUPPORTED_COMPRESSION_METHODS { for &method in SUPPORTED_COMPRESSION_METHODS {
let src_file = &mut Cursor::new(Vec::new()); let src_file = &mut Cursor::new(Vec::new());
write_test_archive(src_file, method).expect("Couldn't write to test file"); write_test_archive(src_file, method, false).expect("Couldn't write to test file");
let mut tgt_file = &mut Cursor::new(Vec::new()); let mut tgt_file = &mut Cursor::new(Vec::new());
@ -66,28 +67,35 @@ fn copy() {
#[test] #[test]
fn append() { fn append() {
for &method in SUPPORTED_COMPRESSION_METHODS { for &method in SUPPORTED_COMPRESSION_METHODS {
let mut file = &mut Cursor::new(Vec::new()); for shallow_copy in vec![false, true] {
write_test_archive(file, method).expect("Couldn't write to test file"); let mut file = &mut Cursor::new(Vec::new());
write_test_archive(file, method, shallow_copy).expect("Couldn't write to test file");
{ {
let mut zip = ZipWriter::new_append(&mut file).unwrap(); let mut zip = ZipWriter::new_append(&mut file).unwrap();
zip.start_file( zip.start_file(
COPY_ENTRY_NAME, COPY_ENTRY_NAME,
FileOptions::default().compression_method(method), FileOptions::default().compression_method(method),
) )
.unwrap(); .unwrap();
zip.write_all(LOREM_IPSUM).unwrap(); zip.write_all(LOREM_IPSUM).unwrap();
zip.finish().unwrap(); zip.finish().unwrap();
}
let mut zip = zip_next::ZipArchive::new(&mut file).unwrap();
check_archive_file_contents(&mut zip, ENTRY_NAME, LOREM_IPSUM);
check_archive_file_contents(&mut zip, COPY_ENTRY_NAME, LOREM_IPSUM);
check_archive_file_contents(&mut zip, INTERNAL_COPY_ENTRY_NAME, LOREM_IPSUM);
} }
let mut zip = zip_next::ZipArchive::new(&mut file).unwrap();
check_archive_file_contents(&mut zip, ENTRY_NAME, LOREM_IPSUM);
check_archive_file_contents(&mut zip, COPY_ENTRY_NAME, LOREM_IPSUM);
} }
} }
// Write a test zip archive to buffer. // Write a test zip archive to buffer.
fn write_test_archive(file: &mut Cursor<Vec<u8>>, method: CompressionMethod) -> ZipResult<()> { fn write_test_archive(
file: &mut Cursor<Vec<u8>>,
method: CompressionMethod,
shallow_copy: bool,
) -> ZipResult<()> {
let mut zip = ZipWriter::new(file); let mut zip = ZipWriter::new(file);
zip.add_directory("test/", Default::default())?; zip.add_directory("test/", Default::default())?;
@ -109,6 +117,12 @@ fn write_test_archive(file: &mut Cursor<Vec<u8>>, method: CompressionMethod) ->
zip.start_file(ENTRY_NAME, options)?; zip.start_file(ENTRY_NAME, options)?;
zip.write_all(LOREM_IPSUM)?; zip.write_all(LOREM_IPSUM)?;
if shallow_copy {
zip.shallow_copy_file(ENTRY_NAME, INTERNAL_COPY_ENTRY_NAME)?;
} else {
zip.deep_copy_file(ENTRY_NAME, INTERNAL_COPY_ENTRY_NAME)?;
}
zip.finish()?; zip.finish()?;
Ok(()) Ok(())
} }
@ -124,6 +138,7 @@ fn check_test_archive<R: Read + Seek>(zip_file: R) -> ZipResult<zip_next::ZipArc
"test/☃.txt", "test/☃.txt",
"test_with_extra_data/🐢.txt", "test_with_extra_data/🐢.txt",
ENTRY_NAME, ENTRY_NAME,
INTERNAL_COPY_ENTRY_NAME,
]; ];
let expected_file_names = HashSet::from_iter(expected_file_names.iter().copied()); let expected_file_names = HashSet::from_iter(expected_file_names.iter().copied());
let file_names = archive.file_names().collect::<HashSet<_>>(); let file_names = archive.file_names().collect::<HashSet<_>>();
@ -201,3 +216,5 @@ const EXTRA_DATA: &[u8] = b"Extra Data";
const ENTRY_NAME: &str = "test/lorem_ipsum.txt"; const ENTRY_NAME: &str = "test/lorem_ipsum.txt";
const COPY_ENTRY_NAME: &str = "test/lorem_ipsum_renamed.txt"; const COPY_ENTRY_NAME: &str = "test/lorem_ipsum_renamed.txt";
const INTERNAL_COPY_ENTRY_NAME: &str = "test/lorem_ipsum_copied.txt";