Add new APIs that allow copying zip file entries between zip files

The copy is done directly using the raw compressed data, avoiding
decompression and recompression.
This commit is contained in:
Robert Marcano 2020-08-31 16:57:16 -04:00
parent 9884c68315
commit 5843d17d4c
3 changed files with 250 additions and 52 deletions

View file

@ -80,6 +80,7 @@ impl<'a> CryptoReader<'a> {
enum ZipFileReader<'a> {
NoReader,
Raw(io::Take<&'a mut dyn io::Read>),
Stored(Crc32Reader<CryptoReader<'a>>),
#[cfg(any(
feature = "deflate",
@ -95,6 +96,7 @@ impl<'a> Read for ZipFileReader<'a> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
match self {
ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
ZipFileReader::Raw(r) => r.read(buf),
ZipFileReader::Stored(r) => r.read(buf),
#[cfg(any(
feature = "deflate",
@ -113,6 +115,7 @@ impl<'a> ZipFileReader<'a> {
pub fn into_inner(self) -> io::Take<&'a mut dyn Read> {
match self {
ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"),
ZipFileReader::Raw(r) => r,
ZipFileReader::Stored(r) => r.into_inner().into_inner(),
#[cfg(any(
feature = "deflate",
@ -129,15 +132,23 @@ impl<'a> ZipFileReader<'a> {
/// A struct for reading a zip file
pub struct ZipFile<'a> {
data: Cow<'a, ZipFileData>,
crypto_reader: Option<CryptoReader<'a>>,
reader: ZipFileReader<'a>,
}
fn make_reader<'a>(
fn make_crypto_reader<'a>(
compression_method: crate::compression::CompressionMethod,
crc32: u32,
reader: io::Take<&'a mut dyn io::Read>,
password: Option<&[u8]>,
) -> ZipResult<Result<ZipFileReader<'a>, InvalidPassword>> {
) -> ZipResult<Result<CryptoReader<'a>, InvalidPassword>> {
#[allow(deprecated)]
{
if let CompressionMethod::Unsupported(_) = compression_method {
return unsupported_zip_error("Compression method not supported");
}
}
let reader = match password {
None => CryptoReader::Plaintext(reader),
Some(password) => match ZipCryptoReader::new(reader, password).validate(crc32)? {
@ -145,9 +156,16 @@ fn make_reader<'a>(
Some(r) => CryptoReader::ZipCrypto(r),
},
};
Ok(Ok(reader))
}
fn make_reader<'a>(
compression_method: CompressionMethod,
crc32: u32,
reader: CryptoReader<'a>,
) -> ZipFileReader<'a> {
match compression_method {
CompressionMethod::Stored => Ok(Ok(ZipFileReader::Stored(Crc32Reader::new(reader, crc32)))),
CompressionMethod::Stored => ZipFileReader::Stored(Crc32Reader::new(reader, crc32)),
#[cfg(any(
feature = "deflate",
feature = "deflate-miniz",
@ -155,20 +173,14 @@ fn make_reader<'a>(
))]
CompressionMethod::Deflated => {
let deflate_reader = DeflateDecoder::new(reader);
Ok(Ok(ZipFileReader::Deflated(Crc32Reader::new(
deflate_reader,
crc32,
))))
ZipFileReader::Deflated(Crc32Reader::new(deflate_reader, crc32))
}
#[cfg(feature = "bzip2")]
CompressionMethod::Bzip2 => {
let bzip2_reader = BzDecoder::new(reader);
Ok(Ok(ZipFileReader::Bzip2(Crc32Reader::new(
bzip2_reader,
crc32,
))))
ZipFileReader::Bzip2(Crc32Reader::new(bzip2_reader, crc32))
}
_ => unsupported_zip_error("Compression method not supported"),
_ => panic!("Compression method not supported"),
}
}
@ -420,9 +432,10 @@ impl<R: Read + io::Seek> ZipArchive<R> {
self.reader.seek(io::SeekFrom::Start(data.data_start))?;
let limit_reader = (self.reader.by_ref() as &mut dyn Read).take(data.compressed_size);
match make_reader(data.compression_method, data.crc32, limit_reader, password) {
Ok(Ok(reader)) => Ok(Ok(ZipFile {
reader,
match make_crypto_reader(data.compression_method, data.crc32, limit_reader, password) {
Ok(Ok(crypto_reader)) => Ok(Ok(ZipFile {
crypto_reader: Some(crypto_reader),
reader: ZipFileReader::NoReader,
data: Cow::Borrowed(data),
})),
Err(e) => Err(e),
@ -555,6 +568,23 @@ fn parse_extra_field(file: &mut ZipFileData, data: &[u8]) -> ZipResult<()> {
/// Methods for retrieving information on zip files
impl<'a> ZipFile<'a> {
fn get_reader(&mut self) -> &mut ZipFileReader<'a> {
if let ZipFileReader::NoReader = self.reader {
let data = &self.data;
let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)
}
&mut self.reader
}
pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read {
if let ZipFileReader::NoReader = self.reader {
let crypto_reader = self.crypto_reader.take().expect("Invalid reader state");
self.reader = ZipFileReader::Raw(crypto_reader.into_inner())
}
&mut self.reader
}
/// Get the version of the file
pub fn version_made_by(&self) -> (u8, u8) {
(
@ -669,7 +699,7 @@ impl<'a> ZipFile<'a> {
impl<'a> Read for ZipFile<'a> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.reader.read(buf)
self.get_reader().read(buf)
}
}
@ -681,8 +711,16 @@ impl<'a> Drop for ZipFile<'a> {
let mut buffer = [0; 1 << 16];
// Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped.
let innerreader = ::std::mem::replace(&mut self.reader, ZipFileReader::NoReader);
let mut reader: std::io::Take<&mut dyn std::io::Read> = innerreader.into_inner();
let mut reader: std::io::Take<&mut dyn std::io::Read> = match &mut self.reader {
ZipFileReader::NoReader => {
let innerreader = ::std::mem::replace(&mut self.crypto_reader, None);
innerreader.expect("Invalid reader state").into_inner()
}
reader => {
let innerreader = ::std::mem::replace(reader, ZipFileReader::NoReader);
innerreader.into_inner()
}
};
loop {
match reader.read(&mut buffer) {
@ -789,9 +827,13 @@ pub fn read_zipfile_from_stream<'a, R: io::Read>(
let result_crc32 = result.crc32;
let result_compression_method = result.compression_method;
let crypto_reader =
make_crypto_reader(result_compression_method, result_crc32, limit_reader, None)?.unwrap();
Ok(Some(ZipFile {
data: Cow::Owned(result),
reader: make_reader(result_compression_method, result_crc32, limit_reader, None)?.unwrap(),
crypto_reader: None,
reader: make_reader(result_compression_method, result_crc32, crypto_reader),
}))
}

View file

@ -1,6 +1,7 @@
//! Types for creating ZIP archives
use crate::compression::CompressionMethod;
use crate::read::ZipFile;
use crate::result::{ZipError, ZipResult};
use crate::spec;
use crate::types::{DateTime, System, ZipFileData, DEFAULT_VERSION};
@ -68,6 +69,7 @@ pub struct ZipWriter<W: Write + io::Seek> {
stats: ZipWriterStats,
writing_to_file: bool,
comment: String,
writing_raw: bool,
}
#[derive(Default)]
@ -77,6 +79,12 @@ struct ZipWriterStats {
bytes_written: u64,
}
struct ZipRawValues {
crc32: u32,
compressed_size: u64,
uncompressed_size: u64,
}
/// Metadata for a file to be written
#[derive(Copy, Clone)]
pub struct FileOptions {
@ -197,6 +205,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
stats: Default::default(),
writing_to_file: false,
comment: String::new(),
writing_raw: false,
}
}
@ -209,30 +218,40 @@ impl<W: Write + io::Seek> ZipWriter<W> {
}
/// Start a new file for with the requested options.
fn start_entry<S>(&mut self, name: S, options: FileOptions) -> ZipResult<()>
fn start_entry<S>(
&mut self,
name: S,
options: FileOptions,
raw_values: Option<ZipRawValues>,
) -> ZipResult<()>
where
S: Into<String>,
{
self.finish_file()?;
let is_raw = raw_values.is_some();
let raw_values = raw_values.unwrap_or_else(|| ZipRawValues {
crc32: 0,
compressed_size: 0,
uncompressed_size: 0,
});
{
let writer = self.inner.get_plain();
let header_start = writer.seek(io::SeekFrom::Current(0))?;
let permissions = options.permissions.unwrap_or(0o100644);
let file_name = name.into();
let file_name_raw = file_name.clone().into_bytes();
let mut file = ZipFileData {
system: System::Unix,
version_made_by: DEFAULT_VERSION,
encrypted: false,
compression_method: options.compression_method,
last_modified_time: options.last_modified_time,
crc32: 0,
compressed_size: 0,
uncompressed_size: 0,
file_name,
file_name_raw,
crc32: raw_values.crc32,
compressed_size: raw_values.compressed_size,
uncompressed_size: raw_values.uncompressed_size,
file_name: name.into(),
file_name_raw: Vec::new(), // Never used for saving
file_comment: String::new(),
header_start,
data_start: 0,
@ -251,7 +270,12 @@ impl<W: Write + io::Seek> ZipWriter<W> {
self.files.push(file);
}
self.inner.switch_to(options.compression_method)?;
self.writing_raw = is_raw;
self.inner.switch_to(if is_raw {
CompressionMethod::Stored
} else {
options.compression_method
})?;
Ok(())
}
@ -260,20 +284,23 @@ impl<W: Write + io::Seek> ZipWriter<W> {
self.inner.switch_to(CompressionMethod::Stored)?;
let writer = self.inner.get_plain();
let file = match self.files.last_mut() {
None => return Ok(()),
Some(f) => f,
};
file.crc32 = self.stats.hasher.clone().finalize();
file.uncompressed_size = self.stats.bytes_written;
if !self.writing_raw {
let file = match self.files.last_mut() {
None => return Ok(()),
Some(f) => f,
};
file.crc32 = self.stats.hasher.clone().finalize();
file.uncompressed_size = self.stats.bytes_written;
let file_end = writer.seek(io::SeekFrom::Current(0))?;
file.compressed_size = file_end - self.stats.start;
let file_end = writer.seek(io::SeekFrom::Current(0))?;
file.compressed_size = file_end - self.stats.start;
update_local_file_header(writer, file)?;
writer.seek(io::SeekFrom::Start(file_end))?;
update_local_file_header(writer, file)?;
writer.seek(io::SeekFrom::Start(file_end))?;
}
self.writing_to_file = false;
self.writing_raw = false;
Ok(())
}
@ -288,7 +315,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
options.permissions = Some(0o644);
}
*options.permissions.as_mut().unwrap() |= 0o100000;
self.start_entry(name, options)?;
self.start_entry(name, options, None)?;
self.writing_to_file = true;
Ok(())
}
@ -309,6 +336,85 @@ impl<W: Write + io::Seek> ZipWriter<W> {
self.start_file(path_to_string(path), options)
}
/// Add a new file using the already compressed data from a ZIP file being read and renames it, this
/// allows faster copies of the `ZipFile` since there is no need to decompress and compress it again.
/// Any `ZipFile` metadata is copied and not checked, for example the file CRC.
/// ```no_run
/// use std::fs::File;
/// use std::io::{Read, Seek, Write};
/// use zip::{ZipArchive, ZipWriter};
///
/// fn copy_rename<R, W>(
/// src: &mut ZipArchive<R>,
/// dst: &mut ZipWriter<W>,
/// ) -> zip::result::ZipResult<()>
/// where
/// R: Read + Seek,
/// W: Write + Seek,
/// {
/// // Retrieve file entry by name
/// let file = src.by_name("src_file.txt")?;
///
/// // Copy and rename the previously obtained file entry to the destination zip archive
/// dst.raw_copy_file_rename(file, "new_name.txt")?;
///
/// Ok(())
/// }
/// ```
pub fn raw_copy_file_rename<S>(&mut self, mut file: ZipFile, name: S) -> ZipResult<()>
where
S: Into<String>,
{
let options = FileOptions::default()
.last_modified_time(file.last_modified())
.compression_method(file.compression());
if let Some(perms) = file.unix_mode() {
options.unix_permissions(perms);
}
let raw_values = ZipRawValues {
crc32: file.crc32(),
compressed_size: file.compressed_size(),
uncompressed_size: file.size(),
};
self.start_entry(name, options, Some(raw_values))?;
self.writing_to_file = true;
io::copy(file.get_raw_reader(), self)?;
Ok(())
}
/// Add a new file using the already compressed data from a ZIP file being read, this allows faster
/// copies of the `ZipFile` since there is no need to decompress and compress it again. Any `ZipFile`
/// metadata is copied and not checked, for example the file CRC.
///
/// ```no_run
/// use std::fs::File;
/// use std::io::{Read, Seek, Write};
/// use zip::{ZipArchive, ZipWriter};
///
/// fn copy<R, W>(src: &mut ZipArchive<R>, dst: &mut ZipWriter<W>) -> zip::result::ZipResult<()>
/// where
/// R: Read + Seek,
/// W: Write + Seek,
/// {
/// // Retrieve file entry by name
/// let file = src.by_name("src_file.txt")?;
///
/// // Copy the previously obtained file entry to the destination zip archive
/// dst.raw_copy_file(file)?;
///
/// Ok(())
/// }
/// ```
pub fn raw_copy_file(&mut self, file: ZipFile) -> ZipResult<()> {
let name = file.name().to_owned();
self.raw_copy_file_rename(file, name)
}
/// Add a directory entry.
///
/// You can't write data to the file afterwards.
@ -329,7 +435,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
_ => name_as_string + "/",
};
self.start_entry(name_with_slash, options)?;
self.start_entry(name_with_slash, options, None)?;
self.writing_to_file = false;
Ok(())
}

View file

@ -1,8 +1,9 @@
use std::collections::HashSet;
use std::io::prelude::*;
use std::io::Cursor;
use std::io::{Cursor, Seek};
use std::iter::FromIterator;
use zip::write::FileOptions;
use zip::CompressionMethod;
// This test asserts that after creating a zip file, then reading its contents back out,
// the extracted data will *always* be exactly the same as the original data.
@ -10,49 +11,98 @@ use zip::write::FileOptions;
fn end_to_end() {
let file = &mut Cursor::new(Vec::new());
write_to_zip_file(file).expect("file written");
write_to_zip(file).expect("file written");
let file_contents: String = read_zip_file(file).unwrap();
assert!(file_contents.as_bytes() == LOREM_IPSUM);
check_zip_contents(file, ENTRY_NAME);
}
fn write_to_zip_file(file: &mut Cursor<Vec<u8>>) -> zip::result::ZipResult<()> {
// This test asserts that after copying a `ZipFile` to a new `ZipWriter`, then reading its
// contents back out, the extracted data will *always* be exactly the same as the original data.
#[test]
fn copy() {
let src_file = &mut Cursor::new(Vec::new());
write_to_zip(src_file).expect("file written");
let mut tgt_file = &mut Cursor::new(Vec::new());
{
let mut src_archive = zip::ZipArchive::new(src_file).unwrap();
let mut zip = zip::ZipWriter::new(&mut tgt_file);
{
let file = src_archive.by_name(ENTRY_NAME).expect("file found");
zip.raw_copy_file(file).unwrap();
}
{
let file = src_archive.by_name(ENTRY_NAME).expect("file found");
zip.raw_copy_file_rename(file, COPY_ENTRY_NAME).unwrap();
}
}
let mut tgt_archive = zip::ZipArchive::new(tgt_file).unwrap();
check_zip_file_contents(&mut tgt_archive, ENTRY_NAME);
check_zip_file_contents(&mut tgt_archive, COPY_ENTRY_NAME);
}
fn write_to_zip(file: &mut Cursor<Vec<u8>>) -> zip::result::ZipResult<()> {
let mut zip = zip::ZipWriter::new(file);
zip.add_directory("test/", Default::default())?;
let options = FileOptions::default()
.compression_method(zip::CompressionMethod::Stored)
.compression_method(CompressionMethod::Stored)
.unix_permissions(0o755);
zip.start_file("test/☃.txt", options)?;
zip.write_all(b"Hello, World!\n")?;
zip.start_file("test/lorem_ipsum.txt", Default::default())?;
zip.start_file(ENTRY_NAME, Default::default())?;
zip.write_all(LOREM_IPSUM)?;
zip.finish()?;
Ok(())
}
fn read_zip_file(zip_file: &mut Cursor<Vec<u8>>) -> zip::result::ZipResult<String> {
let mut archive = zip::ZipArchive::new(zip_file).unwrap();
fn read_zip<R: Read + Seek>(zip_file: R) -> zip::result::ZipResult<zip::ZipArchive<R>> {
let archive = zip::ZipArchive::new(zip_file).unwrap();
let expected_file_names = ["test/", "test/☃.txt", "test/lorem_ipsum.txt"];
let expected_file_names = ["test/", "test/☃.txt", ENTRY_NAME];
let expected_file_names = HashSet::from_iter(expected_file_names.iter().map(|&v| v));
let file_names = archive.file_names().collect::<HashSet<_>>();
assert_eq!(file_names, expected_file_names);
let mut file = archive.by_name("test/lorem_ipsum.txt")?;
Ok(archive)
}
fn read_zip_file<R: Read + Seek>(
archive: &mut zip::ZipArchive<R>,
name: &str,
) -> zip::result::ZipResult<String> {
let mut file = archive.by_name(name)?;
let mut contents = String::new();
file.read_to_string(&mut contents).unwrap();
Ok(contents)
}
fn check_zip_contents(zip_file: &mut Cursor<Vec<u8>>, name: &str) {
let mut archive = read_zip(zip_file).unwrap();
check_zip_file_contents(&mut archive, name);
}
fn check_zip_file_contents<R: Read + Seek>(archive: &mut zip::ZipArchive<R>, name: &str) {
let file_contents: String = read_zip_file(archive, name).unwrap();
assert!(file_contents.as_bytes() == LOREM_IPSUM);
}
const LOREM_IPSUM : &'static [u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. In tellus elit, tristique vitae mattis egestas, ultricies vitae risus. Quisque sit amet quam ut urna aliquet
molestie. Proin blandit ornare dui, a tempor nisl accumsan in. Praesent a consequat felis. Morbi metus diam, auctor in auctor vel, feugiat id odio. Curabitur ex ex,
dictum quis auctor quis, suscipit id lorem. Aliquam vestibulum dolor nec enim vehicula, porta tristique augue tincidunt. Vivamus ut gravida est. Sed pellentesque, dolor
vitae tristique consectetur, neque lectus pulvinar dui, sed feugiat purus diam id lectus. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per
inceptos himenaeos. Maecenas feugiat velit in ex ultrices scelerisque id id neque.
";
const ENTRY_NAME: &str = "test/lorem_ipsum.txt";
const COPY_ENTRY_NAME: &str = "test/lorem_ipsum_renamed.txt";