Merge branch 'master' into oldpr384
This commit is contained in:
commit
01809627d4
5 changed files with 341 additions and 33 deletions
|
@ -31,6 +31,7 @@ crc32fast = "1.4.0"
|
|||
displaydoc = { version = "0.2.4", default-features = false }
|
||||
flate2 = { version = "1.0.28", default-features = false, optional = true }
|
||||
hmac = { version = "0.12.1", optional = true, features = ["reset"] }
|
||||
num_enum = "0.6.1"
|
||||
pbkdf2 = { version = "0.12.2", optional = true }
|
||||
sha1 = { version = "0.10.6", optional = true }
|
||||
thiserror = "1.0.48"
|
||||
|
@ -87,3 +88,7 @@ harness = false
|
|||
[[bench]]
|
||||
name = "read_metadata"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "merge_archive"
|
||||
harness = false
|
||||
|
|
120
benches/merge_archive.rs
Normal file
120
benches/merge_archive.rs
Normal file
|
@ -0,0 +1,120 @@
|
|||
use bencher::{benchmark_group, benchmark_main};
|
||||
|
||||
use std::io::{Cursor, Read, Seek, Write};
|
||||
|
||||
use bencher::Bencher;
|
||||
use getrandom::getrandom;
|
||||
use zip::{result::ZipResult, write::SimpleFileOptions, ZipArchive, ZipWriter};
|
||||
|
||||
fn generate_random_archive(
|
||||
num_entries: usize,
|
||||
entry_size: usize,
|
||||
options: SimpleFileOptions,
|
||||
) -> ZipResult<(usize, ZipArchive<Cursor<Vec<u8>>>)> {
|
||||
let buf = Cursor::new(Vec::new());
|
||||
let mut zip = ZipWriter::new(buf);
|
||||
|
||||
let mut bytes = vec![0u8; entry_size];
|
||||
for i in 0..num_entries {
|
||||
let name = format!("random{}.dat", i);
|
||||
zip.start_file(name, options)?;
|
||||
getrandom(&mut bytes).unwrap();
|
||||
zip.write_all(&bytes)?;
|
||||
}
|
||||
|
||||
let buf = zip.finish()?.into_inner();
|
||||
let len = buf.len();
|
||||
|
||||
Ok((len, ZipArchive::new(Cursor::new(buf))?))
|
||||
}
|
||||
|
||||
fn perform_merge<R: Read + Seek, W: Write + Seek>(
|
||||
src: ZipArchive<R>,
|
||||
mut target: ZipWriter<W>,
|
||||
) -> ZipResult<ZipWriter<W>> {
|
||||
target.merge_archive(src)?;
|
||||
Ok(target)
|
||||
}
|
||||
|
||||
fn perform_raw_copy_file<R: Read + Seek, W: Write + Seek>(
|
||||
mut src: ZipArchive<R>,
|
||||
mut target: ZipWriter<W>,
|
||||
) -> ZipResult<ZipWriter<W>> {
|
||||
for i in 0..src.len() {
|
||||
let entry = src.by_index(i)?;
|
||||
target.raw_copy_file(entry)?;
|
||||
}
|
||||
Ok(target)
|
||||
}
|
||||
|
||||
const NUM_ENTRIES: usize = 100;
|
||||
const ENTRY_SIZE: usize = 1024;
|
||||
|
||||
fn merge_archive_stored(bench: &mut Bencher) {
|
||||
let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
|
||||
let (len, src) = generate_random_archive(NUM_ENTRIES, ENTRY_SIZE, options).unwrap();
|
||||
|
||||
bench.bytes = len as u64;
|
||||
|
||||
bench.iter(|| {
|
||||
let buf = Cursor::new(Vec::new());
|
||||
let zip = ZipWriter::new(buf);
|
||||
let mut zip = perform_merge(src.clone(), zip).unwrap();
|
||||
let buf = zip.finish().unwrap().into_inner();
|
||||
assert_eq!(buf.len(), len);
|
||||
});
|
||||
}
|
||||
|
||||
fn merge_archive_compressed(bench: &mut Bencher) {
|
||||
let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Deflated);
|
||||
let (len, src) = generate_random_archive(NUM_ENTRIES, ENTRY_SIZE, options).unwrap();
|
||||
|
||||
bench.bytes = len as u64;
|
||||
|
||||
bench.iter(|| {
|
||||
let buf = Cursor::new(Vec::new());
|
||||
let zip = ZipWriter::new(buf);
|
||||
let mut zip = perform_merge(src.clone(), zip).unwrap();
|
||||
let buf = zip.finish().unwrap().into_inner();
|
||||
assert_eq!(buf.len(), len);
|
||||
});
|
||||
}
|
||||
|
||||
fn merge_archive_raw_copy_file_stored(bench: &mut Bencher) {
|
||||
let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Stored);
|
||||
let (len, src) = generate_random_archive(NUM_ENTRIES, ENTRY_SIZE, options).unwrap();
|
||||
|
||||
bench.bytes = len as u64;
|
||||
|
||||
bench.iter(|| {
|
||||
let buf = Cursor::new(Vec::new());
|
||||
let zip = ZipWriter::new(buf);
|
||||
let mut zip = perform_raw_copy_file(src.clone(), zip).unwrap();
|
||||
let buf = zip.finish().unwrap().into_inner();
|
||||
assert_eq!(buf.len(), len);
|
||||
});
|
||||
}
|
||||
|
||||
fn merge_archive_raw_copy_file_compressed(bench: &mut Bencher) {
|
||||
let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Deflated);
|
||||
let (len, src) = generate_random_archive(NUM_ENTRIES, ENTRY_SIZE, options).unwrap();
|
||||
|
||||
bench.bytes = len as u64;
|
||||
|
||||
bench.iter(|| {
|
||||
let buf = Cursor::new(Vec::new());
|
||||
let zip = ZipWriter::new(buf);
|
||||
let mut zip = perform_raw_copy_file(src.clone(), zip).unwrap();
|
||||
let buf = zip.finish().unwrap().into_inner();
|
||||
assert_eq!(buf.len(), len);
|
||||
});
|
||||
}
|
||||
|
||||
benchmark_group!(
|
||||
benches,
|
||||
merge_archive_stored,
|
||||
merge_archive_compressed,
|
||||
merge_archive_raw_copy_file_stored,
|
||||
merge_archive_raw_copy_file_compressed,
|
||||
);
|
||||
benchmark_main!(benches);
|
114
src/read.rs
114
src/read.rs
|
@ -52,7 +52,6 @@ pub(crate) mod zip_archive {
|
|||
pub(crate) names_map: super::HashMap<Box<str>, usize>,
|
||||
pub(super) offset: u64,
|
||||
pub(super) dir_start: u64,
|
||||
pub(super) dir_end: u64,
|
||||
}
|
||||
|
||||
/// ZIP archive reader
|
||||
|
@ -332,7 +331,101 @@ pub(crate) struct CentralDirectoryInfo {
|
|||
pub(crate) disk_with_central_directory: u32,
|
||||
}
|
||||
|
||||
impl<R> ZipArchive<R> {
|
||||
pub(crate) fn from_finalized_writer(
|
||||
files: Vec<ZipFileData>,
|
||||
comment: Vec<u8>,
|
||||
reader: R,
|
||||
central_start: u64,
|
||||
) -> ZipResult<Self> {
|
||||
if files.is_empty() {
|
||||
return Err(ZipError::InvalidArchive(
|
||||
"attempt to finalize empty zip writer into readable",
|
||||
));
|
||||
}
|
||||
/* This is where the whole file starts. */
|
||||
let initial_offset = files.first().unwrap().header_start;
|
||||
let names_map: HashMap<Box<str>, usize> = files
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, d)| (d.file_name.clone(), i))
|
||||
.collect();
|
||||
let shared = Arc::new(zip_archive::Shared {
|
||||
files: files.into_boxed_slice(),
|
||||
names_map,
|
||||
offset: initial_offset,
|
||||
dir_start: central_start,
|
||||
});
|
||||
Ok(Self {
|
||||
reader,
|
||||
shared,
|
||||
comment: comment.into_boxed_slice().into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read + Seek> ZipArchive<R> {
|
||||
pub(crate) fn merge_contents<W: Write + io::Seek>(
|
||||
&mut self,
|
||||
mut w: W,
|
||||
) -> ZipResult<Vec<ZipFileData>> {
|
||||
let mut new_files = self.shared.files.clone();
|
||||
if new_files.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
/* The first file header will probably start at the beginning of the file, but zip doesn't
|
||||
* enforce that, and executable zips like PEX files will have a shebang line so will
|
||||
* definitely be greater than 0.
|
||||
*
|
||||
* assert_eq!(0, new_files[0].header_start); // Avoid this.
|
||||
*/
|
||||
|
||||
let new_initial_header_start = w.stream_position()?;
|
||||
/* Push back file header starts for all entries in the covered files. */
|
||||
new_files.iter_mut().try_for_each(|f| {
|
||||
/* This is probably the only really important thing to change. */
|
||||
f.header_start = f.header_start.checked_add(new_initial_header_start).ok_or(
|
||||
ZipError::InvalidArchive("new header start from merge would have been too large"),
|
||||
)?;
|
||||
/* This is only ever used internally to cache metadata lookups (it's not part of the
|
||||
* zip spec), and 0 is the sentinel value. */
|
||||
f.central_header_start = 0;
|
||||
/* This is an atomic variable so it can be updated from another thread in the
|
||||
* implementation (which is good!). */
|
||||
if let Some(old_data_start) = f.data_start.take() {
|
||||
let new_data_start = old_data_start.checked_add(new_initial_header_start).ok_or(
|
||||
ZipError::InvalidArchive("new data start from merge would have been too large"),
|
||||
)?;
|
||||
f.data_start.get_or_init(|| new_data_start);
|
||||
}
|
||||
Ok::<_, ZipError>(())
|
||||
})?;
|
||||
|
||||
/* Rewind to the beginning of the file.
|
||||
*
|
||||
* NB: we *could* decide to start copying from new_files[0].header_start instead, which
|
||||
* would avoid copying over e.g. any pex shebangs or other file contents that start before
|
||||
* the first zip file entry. However, zip files actually shouldn't care about garbage data
|
||||
* in *between* real entries, since the central directory header records the correct start
|
||||
* location of each, and keeping track of that math is more complicated logic that will only
|
||||
* rarely be used, since most zips that get merged together are likely to be produced
|
||||
* specifically for that purpose (and therefore are unlikely to have a shebang or other
|
||||
* preface). Finally, this preserves any data that might actually be useful.
|
||||
*/
|
||||
self.reader.rewind()?;
|
||||
/* Find the end of the file data. */
|
||||
let length_to_read = self.shared.dir_start;
|
||||
/* Produce a Read that reads bytes up until the start of the central directory header.
|
||||
* This "as &mut dyn Read" trick is used elsewhere to avoid having to clone the underlying
|
||||
* handle, which it really shouldn't need to anyway. */
|
||||
let mut limited_raw = (&mut self.reader as &mut dyn Read).take(length_to_read);
|
||||
/* Copy over file data from source archive directly. */
|
||||
io::copy(&mut limited_raw, &mut w)?;
|
||||
|
||||
/* Return the files we've just written to the data stream. */
|
||||
Ok(new_files.into_vec())
|
||||
}
|
||||
|
||||
fn get_directory_info_zip32(
|
||||
footer: &spec::CentralDirectoryEnd,
|
||||
cde_start_pos: u64,
|
||||
|
@ -483,11 +576,12 @@ impl<R: Read + Seek> ZipArchive<R> {
|
|||
result.and_then(|dir_info| {
|
||||
// If the parsed number of files is greater than the offset then
|
||||
// something fishy is going on and we shouldn't trust number_of_files.
|
||||
let file_capacity = if dir_info.number_of_files > cde_start_pos as usize {
|
||||
0
|
||||
} else {
|
||||
dir_info.number_of_files
|
||||
};
|
||||
let file_capacity =
|
||||
if dir_info.number_of_files > dir_info.directory_start as usize {
|
||||
0
|
||||
} else {
|
||||
dir_info.number_of_files
|
||||
};
|
||||
let mut files = Vec::with_capacity(file_capacity);
|
||||
let mut names_map = HashMap::with_capacity(file_capacity);
|
||||
reader.seek(io::SeekFrom::Start(dir_info.directory_start))?;
|
||||
|
@ -496,7 +590,6 @@ impl<R: Read + Seek> ZipArchive<R> {
|
|||
names_map.insert(file.file_name.clone(), files.len());
|
||||
files.push(file);
|
||||
}
|
||||
let dir_end = reader.seek(io::SeekFrom::Start(dir_info.directory_start))?;
|
||||
if dir_info.disk_number != dir_info.disk_with_central_directory {
|
||||
unsupported_zip_error("Support for multi-disk files is not implemented")
|
||||
} else {
|
||||
|
@ -505,7 +598,6 @@ impl<R: Read + Seek> ZipArchive<R> {
|
|||
names_map,
|
||||
offset: dir_info.archive_offset,
|
||||
dir_start: dir_info.directory_start,
|
||||
dir_end,
|
||||
})
|
||||
}
|
||||
})
|
||||
|
@ -525,7 +617,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
|||
}
|
||||
let shared = ok_results
|
||||
.into_iter()
|
||||
.max_by_key(|shared| shared.dir_end)
|
||||
.max_by_key(|shared| shared.dir_start)
|
||||
.unwrap();
|
||||
reader.seek(io::SeekFrom::Start(shared.dir_start))?;
|
||||
Ok(shared)
|
||||
|
@ -813,7 +905,7 @@ fn central_header_to_zip_file_inner<R: Read>(
|
|||
|
||||
// Construct the result
|
||||
let mut result = ZipFileData {
|
||||
system: System::from_u8((version_made_by >> 8) as u8),
|
||||
system: System::from((version_made_by >> 8) as u8),
|
||||
version_made_by: version_made_by as u8,
|
||||
encrypted,
|
||||
using_data_descriptor,
|
||||
|
@ -1197,7 +1289,7 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult<Opt
|
|||
};
|
||||
|
||||
let mut result = ZipFileData {
|
||||
system: System::from_u8((version_made_by >> 8) as u8),
|
||||
system: System::from((version_made_by >> 8) as u8),
|
||||
version_made_by: version_made_by as u8,
|
||||
encrypted,
|
||||
using_data_descriptor,
|
||||
|
|
29
src/types.rs
29
src/types.rs
|
@ -1,4 +1,5 @@
|
|||
//! Types that specify what is contained in a ZIP.
|
||||
use num_enum::{FromPrimitive, IntoPrimitive};
|
||||
use path::{Component, Path, PathBuf};
|
||||
use std::path;
|
||||
use std::sync::{Arc, OnceLock};
|
||||
|
@ -50,25 +51,15 @@ use crate::result::DateTimeRangeError;
|
|||
#[cfg(feature = "time")]
|
||||
use time::{error::ComponentRange, Date, Month, OffsetDateTime, PrimitiveDateTime, Time};
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, FromPrimitive, IntoPrimitive)]
|
||||
#[repr(u8)]
|
||||
pub enum System {
|
||||
Dos = 0,
|
||||
Unix = 3,
|
||||
#[num_enum(default)]
|
||||
Unknown,
|
||||
}
|
||||
|
||||
impl System {
|
||||
pub const fn from_u8(system: u8) -> System {
|
||||
use self::System::*;
|
||||
|
||||
match system {
|
||||
0 => Dos,
|
||||
3 => Unix,
|
||||
_ => Unknown,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Representation of a moment in time.
|
||||
///
|
||||
/// Zip files use an old format from DOS to store timestamps,
|
||||
|
@ -516,10 +507,14 @@ mod test {
|
|||
#[test]
|
||||
fn system() {
|
||||
use super::System;
|
||||
assert_eq!(System::Dos as u16, 0u16);
|
||||
assert_eq!(System::Unix as u16, 3u16);
|
||||
assert_eq!(System::from_u8(0), System::Dos);
|
||||
assert_eq!(System::from_u8(3), System::Unix);
|
||||
assert_eq!(u8::from(System::Dos), 0u8);
|
||||
assert_eq!(System::Dos as u8, 0u8);
|
||||
assert_eq!(System::Unix as u8, 3u8);
|
||||
assert_eq!(u8::from(System::Unix), 3u8);
|
||||
assert_eq!(System::from(0), System::Dos);
|
||||
assert_eq!(System::from(3), System::Unix);
|
||||
assert_eq!(u8::from(System::Unknown), 4u8);
|
||||
assert_eq!(System::Unknown as u8, 4u8);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
106
src/write.rs
106
src/write.rs
|
@ -597,6 +597,39 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
|
|||
) -> ZipResult<()> {
|
||||
self.deep_copy_file(&path_to_string(src_path), &path_to_string(dest_path))
|
||||
}
|
||||
|
||||
/// Write the zip file into the backing stream, then produce a readable archive of that data.
|
||||
///
|
||||
/// This method avoids parsing the central directory records at the end of the stream for
|
||||
/// a slight performance improvement over running [`ZipArchive::new()`] on the output of
|
||||
/// [`Self::finish()`].
|
||||
///
|
||||
///```
|
||||
/// # fn main() -> Result<(), zip::result::ZipError> {
|
||||
/// use std::io::{Cursor, prelude::*};
|
||||
/// use zip::{ZipArchive, ZipWriter, write::SimpleFileOptions};
|
||||
///
|
||||
/// let buf = Cursor::new(Vec::new());
|
||||
/// let mut zip = ZipWriter::new(buf);
|
||||
/// let options = SimpleFileOptions::default();
|
||||
/// zip.start_file("a.txt", options)?;
|
||||
/// zip.write_all(b"hello\n")?;
|
||||
///
|
||||
/// let mut zip = zip.finish_into_readable()?;
|
||||
/// let mut s: String = String::new();
|
||||
/// zip.by_name("a.txt")?.read_to_string(&mut s)?;
|
||||
/// assert_eq!(s, "hello\n");
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
///```
|
||||
pub fn finish_into_readable(mut self) -> ZipResult<ZipArchive<A>> {
|
||||
let central_start = self.finalize()?;
|
||||
let inner = mem::replace(&mut self.inner, Closed).unwrap();
|
||||
let comment = mem::take(&mut self.comment);
|
||||
let files = mem::take(&mut self.files);
|
||||
let archive = ZipArchive::from_finalized_writer(files, comment, inner, central_start)?;
|
||||
Ok(archive)
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Write + Seek> ZipWriter<W> {
|
||||
|
@ -935,6 +968,68 @@ impl<W: Write + Seek> ZipWriter<W> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/* TODO: link to/use Self::finish_into_readable() from https://github.com/zip-rs/zip/pull/400 in
|
||||
* this docstring. */
|
||||
/// Copy over the entire contents of another archive verbatim.
|
||||
///
|
||||
/// This method extracts file metadata from the `source` archive, then simply performs a single
|
||||
/// big [`io::copy()`](io::copy) to transfer all the actual file contents without any
|
||||
/// decompression or decryption. This is more performant than the equivalent operation of
|
||||
/// calling [`Self::raw_copy_file()`] for each entry from the `source` archive in sequence.
|
||||
///
|
||||
///```
|
||||
/// # fn main() -> Result<(), zip::result::ZipError> {
|
||||
/// use std::io::{Cursor, prelude::*};
|
||||
/// use zip::{ZipArchive, ZipWriter, write::SimpleFileOptions};
|
||||
///
|
||||
/// let buf = Cursor::new(Vec::new());
|
||||
/// let mut zip = ZipWriter::new(buf);
|
||||
/// zip.start_file("a.txt", SimpleFileOptions::default())?;
|
||||
/// zip.write_all(b"hello\n")?;
|
||||
/// let src = ZipArchive::new(zip.finish()?)?;
|
||||
///
|
||||
/// let buf = Cursor::new(Vec::new());
|
||||
/// let mut zip = ZipWriter::new(buf);
|
||||
/// zip.start_file("b.txt", SimpleFileOptions::default())?;
|
||||
/// zip.write_all(b"hey\n")?;
|
||||
/// let src2 = ZipArchive::new(zip.finish()?)?;
|
||||
///
|
||||
/// let buf = Cursor::new(Vec::new());
|
||||
/// let mut zip = ZipWriter::new(buf);
|
||||
/// zip.merge_archive(src)?;
|
||||
/// zip.merge_archive(src2)?;
|
||||
/// let mut result = ZipArchive::new(zip.finish()?)?;
|
||||
///
|
||||
/// let mut s: String = String::new();
|
||||
/// result.by_name("a.txt")?.read_to_string(&mut s)?;
|
||||
/// assert_eq!(s, "hello\n");
|
||||
/// s.clear();
|
||||
/// result.by_name("b.txt")?.read_to_string(&mut s)?;
|
||||
/// assert_eq!(s, "hey\n");
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
///```
|
||||
pub fn merge_archive<R>(&mut self, mut source: ZipArchive<R>) -> ZipResult<()>
|
||||
where
|
||||
R: Read + io::Seek,
|
||||
{
|
||||
self.finish_file()?;
|
||||
|
||||
/* Ensure we accept the file contents on faith (and avoid overwriting the data).
|
||||
* See raw_copy_file_rename(). */
|
||||
self.writing_to_file = true;
|
||||
self.writing_raw = true;
|
||||
|
||||
let writer = self.inner.get_plain();
|
||||
/* Get the file entries from the source archive. */
|
||||
let new_files = source.merge_contents(writer)?;
|
||||
|
||||
/* These file entries are now ours! */
|
||||
self.files.extend(new_files);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn normalize_options<T: FileOptionExtension>(options: &mut FileOptions<T>) {
|
||||
if options.permissions.is_none() {
|
||||
options.permissions = Some(0o644);
|
||||
|
@ -1101,7 +1196,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
|||
/// This will return the writer, but one should normally not append any data to the end of the file.
|
||||
/// Note that the zipfile will also be finished on drop.
|
||||
pub fn finish(&mut self) -> ZipResult<W> {
|
||||
self.finalize()?;
|
||||
let _central_start = self.finalize()?;
|
||||
let inner = mem::replace(&mut self.inner, Closed);
|
||||
Ok(inner.unwrap())
|
||||
}
|
||||
|
@ -1161,10 +1256,10 @@ impl<W: Write + Seek> ZipWriter<W> {
|
|||
self.add_symlink(path_to_string(path), path_to_string(target), options)
|
||||
}
|
||||
|
||||
fn finalize(&mut self) -> ZipResult<()> {
|
||||
fn finalize(&mut self) -> ZipResult<u64> {
|
||||
self.finish_file()?;
|
||||
|
||||
{
|
||||
let central_start = {
|
||||
let central_start = self.write_central_and_footer()?;
|
||||
let writer = self.inner.get_plain();
|
||||
let footer_end = writer.stream_position()?;
|
||||
|
@ -1176,9 +1271,10 @@ impl<W: Write + Seek> ZipWriter<W> {
|
|||
writer.seek(SeekFrom::End(-(central_and_footer_size as i64)))?;
|
||||
self.write_central_and_footer()?;
|
||||
}
|
||||
}
|
||||
central_start
|
||||
};
|
||||
|
||||
Ok(())
|
||||
Ok(central_start)
|
||||
}
|
||||
|
||||
fn write_central_and_footer(&mut self) -> Result<u64, ZipError> {
|
||||
|
|
Loading…
Add table
Reference in a new issue