Make ZipArchive cheap to clone

This commit is contained in:
Benoît du Garreau 2022-02-05 15:58:31 +01:00
parent 478501289a
commit addfe01eb0
3 changed files with 82 additions and 34 deletions

View file

@ -7,13 +7,14 @@ use crate::cp437::FromCp437;
use crate::crc32::Crc32Reader;
use crate::result::{InvalidPassword, ZipError, ZipResult};
use crate::spec;
use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipFileData};
use crate::types::{AesMode, AesVendorVersion, AtomicU64, DateTime, System, ZipFileData};
use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
use byteorder::{LittleEndian, ReadBytesExt};
use std::borrow::Cow;
use std::collections::HashMap;
use std::io::{self, prelude::*};
use std::path::{Component, Path};
use std::sync::Arc;
#[cfg(any(
feature = "deflate",
@ -33,8 +34,19 @@ mod ffi {
pub const S_IFREG: u32 = 0o0100000;
}
/// Extract immutable data from `ZipArchive` to make it cheap to clone
#[derive(Debug)]
struct Shared {
files: Vec<ZipFileData>,
names_map: HashMap<String, usize>,
offset: u64,
comment: Vec<u8>,
}
/// ZIP archive reader
///
/// This type is cheap to clone if this is the case for the reader it uses.
///
/// ```no_run
/// use std::io::prelude::*;
/// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
@ -52,10 +64,7 @@ mod ffi {
#[derive(Clone, Debug)]
pub struct ZipArchive<R> {
reader: R,
files: Vec<ZipFileData>,
names_map: HashMap<String, usize>,
offset: u64,
comment: Vec<u8>,
shared: Arc<Shared>,
}
#[allow(clippy::large_enum_variant)]
@ -171,7 +180,7 @@ pub struct ZipFile<'a> {
}
fn find_content<'a>(
data: &mut ZipFileData,
data: &ZipFileData,
reader: &'a mut (impl Read + Seek),
) -> ZipResult<io::Take<&'a mut dyn Read>> {
// Parse local header
@ -185,9 +194,10 @@ fn find_content<'a>(
let file_name_length = reader.read_u16::<LittleEndian>()? as u64;
let extra_field_length = reader.read_u16::<LittleEndian>()? as u64;
let magic_and_header = 4 + 22 + 2 + 2;
data.data_start = data.header_start + magic_and_header + file_name_length + extra_field_length;
let data_start = data.header_start + magic_and_header + file_name_length + extra_field_length;
data.data_start.store(data_start);
reader.seek(io::SeekFrom::Start(data.data_start))?;
reader.seek(io::SeekFrom::Start(data_start))?;
Ok((reader as &mut dyn Read).take(data.compressed_size))
}
@ -407,13 +417,14 @@ impl<R: Read + io::Seek> ZipArchive<R> {
files.push(file);
}
Ok(ZipArchive {
reader,
let shared = Arc::new(Shared {
files,
names_map,
offset: archive_offset,
comment: footer.zip_file_comment,
})
});
Ok(ZipArchive { reader, shared })
}
/// Extract a Zip archive into a directory, overwriting files if they
/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
@ -456,7 +467,7 @@ impl<R: Read + io::Seek> ZipArchive<R> {
/// Number of files contained in this zip.
pub fn len(&self) -> usize {
self.files.len()
self.shared.files.len()
}
/// Whether this zip archive contains no files
@ -469,17 +480,17 @@ impl<R: Read + io::Seek> ZipArchive<R> {
/// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
/// of that prepended data.
pub fn offset(&self) -> u64 {
self.offset
self.shared.offset
}
/// Get the comment of the zip archive.
pub fn comment(&self) -> &[u8] {
&self.comment
&self.shared.comment
}
/// Returns an iterator over all the file and directory names in this archive.
pub fn file_names(&self) -> impl Iterator<Item = &str> {
self.names_map.keys().map(|s| s.as_str())
self.shared.names_map.keys().map(|s| s.as_str())
}
/// Search for a file entry by name, decrypt with given password
@ -501,7 +512,7 @@ impl<R: Read + io::Seek> ZipArchive<R> {
name: &str,
password: Option<&[u8]>,
) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
let index = match self.names_map.get(name) {
let index = match self.shared.names_map.get(name) {
Some(index) => *index,
None => {
return Err(ZipError::FileNotFound);
@ -529,8 +540,9 @@ impl<R: Read + io::Seek> ZipArchive<R> {
/// Get a contained file by index without decompressing it
pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
let reader = &mut self.reader;
self.files
.get_mut(file_number)
self.shared
.files
.get(file_number)
.ok_or(ZipError::FileNotFound)
.and_then(move |data| {
Ok(ZipFile {
@ -546,10 +558,11 @@ impl<R: Read + io::Seek> ZipArchive<R> {
file_number: usize,
mut password: Option<&[u8]>,
) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
if file_number >= self.files.len() {
return Err(ZipError::FileNotFound);
}
let data = &mut self.files[file_number];
let data = self
.shared
.files
.get(file_number)
.ok_or(ZipError::FileNotFound)?;
match (password, data.encrypted) {
(None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)),
@ -658,7 +671,7 @@ pub(crate) fn central_header_to_zip_file<R: Read + io::Seek>(
file_comment,
header_start: offset,
central_header_start,
data_start: 0,
data_start: AtomicU64::new(0),
external_attributes: external_file_attributes,
large_file: false,
aes_mode: None,
@ -933,7 +946,7 @@ impl<'a> ZipFile<'a> {
/// Get the starting offset of the data of the compressed file
pub fn data_start(&self) -> u64 {
self.data.data_start
self.data.data_start.load()
}
/// Get the starting offset of the zip header for this file
@ -1054,7 +1067,7 @@ pub fn read_zipfile_from_stream<'a, R: io::Read>(
// header_start and data start are not available, but also don't matter, since seeking is
// not available.
header_start: 0,
data_start: 0,
data_start: AtomicU64::new(0),
central_header_start: 0,
// The external_attributes field is only available in the central directory.
// We set this to zero, which should be valid as the docs state 'If input came

View file

@ -1,5 +1,7 @@
//! Types that specify what is contained in a ZIP.
use std::sync::atomic;
#[cfg(feature = "time")]
use time::{error::ComponentRange, Date, Month, OffsetDateTime, PrimitiveDateTime, Time};
@ -193,6 +195,37 @@ impl DateTime {
pub const DEFAULT_VERSION: u8 = 46;
/// A type like `AtomicU64` except it implements `Clone` and has predefined
/// ordering.
///
/// It uses `Relaxed` ordering because it is not used for synchronisation.
#[derive(Debug)]
pub struct AtomicU64(atomic::AtomicU64);
impl AtomicU64 {
pub fn new(v: u64) -> Self {
Self(atomic::AtomicU64::new(v))
}
pub fn load(&self) -> u64 {
self.0.load(atomic::Ordering::Relaxed)
}
pub fn store(&self, val: u64) {
self.0.store(val, atomic::Ordering::Relaxed)
}
pub fn get_mut(&mut self) -> &mut u64 {
self.0.get_mut()
}
}
impl Clone for AtomicU64 {
fn clone(&self) -> Self {
Self(atomic::AtomicU64::new(self.load()))
}
}
/// Structure representing a ZIP file.
#[derive(Debug, Clone)]
pub struct ZipFileData {
@ -229,7 +262,7 @@ pub struct ZipFileData {
/// Note that when this is not known, it is set to 0
pub central_header_start: u64,
/// Specifies where the compressed data of the file starts
pub data_start: u64,
pub data_start: AtomicU64,
/// External file attributes
pub external_attributes: u32,
/// Reserve local ZIP64 extra field
@ -346,7 +379,7 @@ mod test {
extra_field: Vec::new(),
file_comment: String::new(),
header_start: 0,
data_start: 0,
data_start: AtomicU64::new(0),
central_header_start: 0,
external_attributes: 0,
large_file: false,

View file

@ -4,7 +4,7 @@ use crate::compression::CompressionMethod;
use crate::read::{central_header_to_zip_file, ZipArchive, ZipFile};
use crate::result::{ZipError, ZipResult};
use crate::spec;
use crate::types::{DateTime, System, ZipFileData, DEFAULT_VERSION};
use crate::types::{AtomicU64, DateTime, System, ZipFileData, DEFAULT_VERSION};
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use crc32fast::Hasher;
use std::default::Default;
@ -345,7 +345,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
extra_field: Vec::new(),
file_comment: String::new(),
header_start,
data_start: 0,
data_start: AtomicU64::new(0),
central_header_start: 0,
external_attributes: permissions << 16,
large_file: options.large_file,
@ -355,7 +355,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
let header_end = writer.seek(io::SeekFrom::Current(0))?;
self.stats.start = header_end;
file.data_start = header_end;
*file.data_start.get_mut() = header_end;
self.stats.bytes_written = 0;
self.stats.hasher = Hasher::new();
@ -534,7 +534,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
self.start_entry(name, options, None)?;
self.writing_to_file = true;
self.writing_to_extra_field = true;
Ok(self.files.last().unwrap().data_start)
Ok(self.files.last().unwrap().data_start.load())
}
/// End local and start central extra data. Requires [`ZipWriter::start_file_with_extra_data`].
@ -563,6 +563,8 @@ impl<W: Write + io::Seek> ZipWriter<W> {
validate_extra_data(file)?;
let data_start = file.data_start.get_mut();
if !self.writing_to_central_extra_field_only {
let writer = self.inner.get_plain();
@ -570,9 +572,9 @@ impl<W: Write + io::Seek> ZipWriter<W> {
writer.write_all(&file.extra_field)?;
// Update final `data_start`.
let header_end = file.data_start + file.extra_field.len() as u64;
let header_end = *data_start + file.extra_field.len() as u64;
self.stats.start = header_end;
file.data_start = header_end;
*data_start = header_end;
// Update extra field length in local file header.
let extra_field_length =
@ -586,7 +588,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
self.writing_to_extra_field = false;
self.writing_to_central_extra_field_only = false;
Ok(file.data_start)
Ok(*data_start)
}
/// Add a new file using the already compressed data from a ZIP file being read and renames it, this