Merge pull request #276 from a1phyr/cheap_clone

Make `ZipArchive` cheap to clone
This commit is contained in:
Alexander Zaitsev 2022-02-07 13:53:46 +03:00 committed by GitHub
commit a9e1436655
Signed by: DevComp
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 84 additions and 34 deletions

View file

@ -7,13 +7,14 @@ use crate::cp437::FromCp437;
use crate::crc32::Crc32Reader; use crate::crc32::Crc32Reader;
use crate::result::{InvalidPassword, ZipError, ZipResult}; use crate::result::{InvalidPassword, ZipError, ZipResult};
use crate::spec; use crate::spec;
use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipFileData}; use crate::types::{AesMode, AesVendorVersion, AtomicU64, DateTime, System, ZipFileData};
use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
use byteorder::{LittleEndian, ReadBytesExt}; use byteorder::{LittleEndian, ReadBytesExt};
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::{self, prelude::*}; use std::io::{self, prelude::*};
use std::path::{Component, Path}; use std::path::{Component, Path};
use std::sync::Arc;
#[cfg(any( #[cfg(any(
feature = "deflate", feature = "deflate",
@ -33,8 +34,21 @@ mod ffi {
pub const S_IFREG: u32 = 0o0100000; pub const S_IFREG: u32 = 0o0100000;
} }
/// Extract immutable data from `ZipArchive` to make it cheap to clone
#[derive(Debug)]
struct Shared {
files: Vec<ZipFileData>,
names_map: HashMap<String, usize>,
offset: u64,
comment: Vec<u8>,
}
/// ZIP archive reader /// ZIP archive reader
/// ///
/// At the moment, this type is cheap to clone if this is the case for the
/// reader it uses. However, this is not guaranteed by this crate and it may
/// change in the future.
///
/// ```no_run /// ```no_run
/// use std::io::prelude::*; /// use std::io::prelude::*;
/// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> { /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
@ -52,10 +66,7 @@ mod ffi {
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct ZipArchive<R> { pub struct ZipArchive<R> {
reader: R, reader: R,
files: Vec<ZipFileData>, shared: Arc<Shared>,
names_map: HashMap<String, usize>,
offset: u64,
comment: Vec<u8>,
} }
#[allow(clippy::large_enum_variant)] #[allow(clippy::large_enum_variant)]
@ -171,7 +182,7 @@ pub struct ZipFile<'a> {
} }
fn find_content<'a>( fn find_content<'a>(
data: &mut ZipFileData, data: &ZipFileData,
reader: &'a mut (impl Read + Seek), reader: &'a mut (impl Read + Seek),
) -> ZipResult<io::Take<&'a mut dyn Read>> { ) -> ZipResult<io::Take<&'a mut dyn Read>> {
// Parse local header // Parse local header
@ -185,9 +196,10 @@ fn find_content<'a>(
let file_name_length = reader.read_u16::<LittleEndian>()? as u64; let file_name_length = reader.read_u16::<LittleEndian>()? as u64;
let extra_field_length = reader.read_u16::<LittleEndian>()? as u64; let extra_field_length = reader.read_u16::<LittleEndian>()? as u64;
let magic_and_header = 4 + 22 + 2 + 2; let magic_and_header = 4 + 22 + 2 + 2;
data.data_start = data.header_start + magic_and_header + file_name_length + extra_field_length; let data_start = data.header_start + magic_and_header + file_name_length + extra_field_length;
data.data_start.store(data_start);
reader.seek(io::SeekFrom::Start(data.data_start))?; reader.seek(io::SeekFrom::Start(data_start))?;
Ok((reader as &mut dyn Read).take(data.compressed_size)) Ok((reader as &mut dyn Read).take(data.compressed_size))
} }
@ -407,13 +419,14 @@ impl<R: Read + io::Seek> ZipArchive<R> {
files.push(file); files.push(file);
} }
Ok(ZipArchive { let shared = Arc::new(Shared {
reader,
files, files,
names_map, names_map,
offset: archive_offset, offset: archive_offset,
comment: footer.zip_file_comment, comment: footer.zip_file_comment,
}) });
Ok(ZipArchive { reader, shared })
} }
/// Extract a Zip archive into a directory, overwriting files if they /// Extract a Zip archive into a directory, overwriting files if they
/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
@ -456,7 +469,7 @@ impl<R: Read + io::Seek> ZipArchive<R> {
/// Number of files contained in this zip. /// Number of files contained in this zip.
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.files.len() self.shared.files.len()
} }
/// Whether this zip archive contains no files /// Whether this zip archive contains no files
@ -469,17 +482,17 @@ impl<R: Read + io::Seek> ZipArchive<R> {
/// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
/// of that prepended data. /// of that prepended data.
pub fn offset(&self) -> u64 { pub fn offset(&self) -> u64 {
self.offset self.shared.offset
} }
/// Get the comment of the zip archive. /// Get the comment of the zip archive.
pub fn comment(&self) -> &[u8] { pub fn comment(&self) -> &[u8] {
&self.comment &self.shared.comment
} }
/// Returns an iterator over all the file and directory names in this archive. /// Returns an iterator over all the file and directory names in this archive.
pub fn file_names(&self) -> impl Iterator<Item = &str> { pub fn file_names(&self) -> impl Iterator<Item = &str> {
self.names_map.keys().map(|s| s.as_str()) self.shared.names_map.keys().map(|s| s.as_str())
} }
/// Search for a file entry by name, decrypt with given password /// Search for a file entry by name, decrypt with given password
@ -501,7 +514,7 @@ impl<R: Read + io::Seek> ZipArchive<R> {
name: &str, name: &str,
password: Option<&[u8]>, password: Option<&[u8]>,
) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> { ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
let index = match self.names_map.get(name) { let index = match self.shared.names_map.get(name) {
Some(index) => *index, Some(index) => *index,
None => { None => {
return Err(ZipError::FileNotFound); return Err(ZipError::FileNotFound);
@ -529,8 +542,9 @@ impl<R: Read + io::Seek> ZipArchive<R> {
/// Get a contained file by index without decompressing it /// Get a contained file by index without decompressing it
pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> { pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
let reader = &mut self.reader; let reader = &mut self.reader;
self.files self.shared
.get_mut(file_number) .files
.get(file_number)
.ok_or(ZipError::FileNotFound) .ok_or(ZipError::FileNotFound)
.and_then(move |data| { .and_then(move |data| {
Ok(ZipFile { Ok(ZipFile {
@ -546,10 +560,11 @@ impl<R: Read + io::Seek> ZipArchive<R> {
file_number: usize, file_number: usize,
mut password: Option<&[u8]>, mut password: Option<&[u8]>,
) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> { ) -> ZipResult<Result<ZipFile<'a>, InvalidPassword>> {
if file_number >= self.files.len() { let data = self
return Err(ZipError::FileNotFound); .shared
} .files
let data = &mut self.files[file_number]; .get(file_number)
.ok_or(ZipError::FileNotFound)?;
match (password, data.encrypted) { match (password, data.encrypted) {
(None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)), (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)),
@ -658,7 +673,7 @@ pub(crate) fn central_header_to_zip_file<R: Read + io::Seek>(
file_comment, file_comment,
header_start: offset, header_start: offset,
central_header_start, central_header_start,
data_start: 0, data_start: AtomicU64::new(0),
external_attributes: external_file_attributes, external_attributes: external_file_attributes,
large_file: false, large_file: false,
aes_mode: None, aes_mode: None,
@ -933,7 +948,7 @@ impl<'a> ZipFile<'a> {
/// Get the starting offset of the data of the compressed file /// Get the starting offset of the data of the compressed file
pub fn data_start(&self) -> u64 { pub fn data_start(&self) -> u64 {
self.data.data_start self.data.data_start.load()
} }
/// Get the starting offset of the zip header for this file /// Get the starting offset of the zip header for this file
@ -1054,7 +1069,7 @@ pub fn read_zipfile_from_stream<'a, R: io::Read>(
// header_start and data start are not available, but also don't matter, since seeking is // header_start and data start are not available, but also don't matter, since seeking is
// not available. // not available.
header_start: 0, header_start: 0,
data_start: 0, data_start: AtomicU64::new(0),
central_header_start: 0, central_header_start: 0,
// The external_attributes field is only available in the central directory. // The external_attributes field is only available in the central directory.
// We set this to zero, which should be valid as the docs state 'If input came // We set this to zero, which should be valid as the docs state 'If input came

View file

@ -1,5 +1,7 @@
//! Types that specify what is contained in a ZIP. //! Types that specify what is contained in a ZIP.
use std::sync::atomic;
#[cfg(feature = "time")] #[cfg(feature = "time")]
use time::{error::ComponentRange, Date, Month, OffsetDateTime, PrimitiveDateTime, Time}; use time::{error::ComponentRange, Date, Month, OffsetDateTime, PrimitiveDateTime, Time};
@ -193,6 +195,37 @@ impl DateTime {
pub const DEFAULT_VERSION: u8 = 46; pub const DEFAULT_VERSION: u8 = 46;
/// A type like `AtomicU64` except it implements `Clone` and has predefined
/// ordering.
///
/// It uses `Relaxed` ordering because it is not used for synchronisation.
#[derive(Debug)]
pub struct AtomicU64(atomic::AtomicU64);
impl AtomicU64 {
pub fn new(v: u64) -> Self {
Self(atomic::AtomicU64::new(v))
}
pub fn load(&self) -> u64 {
self.0.load(atomic::Ordering::Relaxed)
}
pub fn store(&self, val: u64) {
self.0.store(val, atomic::Ordering::Relaxed)
}
pub fn get_mut(&mut self) -> &mut u64 {
self.0.get_mut()
}
}
impl Clone for AtomicU64 {
fn clone(&self) -> Self {
Self(atomic::AtomicU64::new(self.load()))
}
}
/// Structure representing a ZIP file. /// Structure representing a ZIP file.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct ZipFileData { pub struct ZipFileData {
@ -229,7 +262,7 @@ pub struct ZipFileData {
/// Note that when this is not known, it is set to 0 /// Note that when this is not known, it is set to 0
pub central_header_start: u64, pub central_header_start: u64,
/// Specifies where the compressed data of the file starts /// Specifies where the compressed data of the file starts
pub data_start: u64, pub data_start: AtomicU64,
/// External file attributes /// External file attributes
pub external_attributes: u32, pub external_attributes: u32,
/// Reserve local ZIP64 extra field /// Reserve local ZIP64 extra field
@ -346,7 +379,7 @@ mod test {
extra_field: Vec::new(), extra_field: Vec::new(),
file_comment: String::new(), file_comment: String::new(),
header_start: 0, header_start: 0,
data_start: 0, data_start: AtomicU64::new(0),
central_header_start: 0, central_header_start: 0,
external_attributes: 0, external_attributes: 0,
large_file: false, large_file: false,

View file

@ -4,7 +4,7 @@ use crate::compression::CompressionMethod;
use crate::read::{central_header_to_zip_file, ZipArchive, ZipFile}; use crate::read::{central_header_to_zip_file, ZipArchive, ZipFile};
use crate::result::{ZipError, ZipResult}; use crate::result::{ZipError, ZipResult};
use crate::spec; use crate::spec;
use crate::types::{DateTime, System, ZipFileData, DEFAULT_VERSION}; use crate::types::{AtomicU64, DateTime, System, ZipFileData, DEFAULT_VERSION};
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use crc32fast::Hasher; use crc32fast::Hasher;
use std::default::Default; use std::default::Default;
@ -345,7 +345,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
extra_field: Vec::new(), extra_field: Vec::new(),
file_comment: String::new(), file_comment: String::new(),
header_start, header_start,
data_start: 0, data_start: AtomicU64::new(0),
central_header_start: 0, central_header_start: 0,
external_attributes: permissions << 16, external_attributes: permissions << 16,
large_file: options.large_file, large_file: options.large_file,
@ -355,7 +355,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
let header_end = writer.seek(io::SeekFrom::Current(0))?; let header_end = writer.seek(io::SeekFrom::Current(0))?;
self.stats.start = header_end; self.stats.start = header_end;
file.data_start = header_end; *file.data_start.get_mut() = header_end;
self.stats.bytes_written = 0; self.stats.bytes_written = 0;
self.stats.hasher = Hasher::new(); self.stats.hasher = Hasher::new();
@ -534,7 +534,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
self.start_entry(name, options, None)?; self.start_entry(name, options, None)?;
self.writing_to_file = true; self.writing_to_file = true;
self.writing_to_extra_field = true; self.writing_to_extra_field = true;
Ok(self.files.last().unwrap().data_start) Ok(self.files.last().unwrap().data_start.load())
} }
/// End local and start central extra data. Requires [`ZipWriter::start_file_with_extra_data`]. /// End local and start central extra data. Requires [`ZipWriter::start_file_with_extra_data`].
@ -563,6 +563,8 @@ impl<W: Write + io::Seek> ZipWriter<W> {
validate_extra_data(file)?; validate_extra_data(file)?;
let data_start = file.data_start.get_mut();
if !self.writing_to_central_extra_field_only { if !self.writing_to_central_extra_field_only {
let writer = self.inner.get_plain(); let writer = self.inner.get_plain();
@ -570,9 +572,9 @@ impl<W: Write + io::Seek> ZipWriter<W> {
writer.write_all(&file.extra_field)?; writer.write_all(&file.extra_field)?;
// Update final `data_start`. // Update final `data_start`.
let header_end = file.data_start + file.extra_field.len() as u64; let header_end = *data_start + file.extra_field.len() as u64;
self.stats.start = header_end; self.stats.start = header_end;
file.data_start = header_end; *data_start = header_end;
// Update extra field length in local file header. // Update extra field length in local file header.
let extra_field_length = let extra_field_length =
@ -586,7 +588,7 @@ impl<W: Write + io::Seek> ZipWriter<W> {
self.writing_to_extra_field = false; self.writing_to_extra_field = false;
self.writing_to_central_extra_field_only = false; self.writing_to_central_extra_field_only = false;
Ok(file.data_start) Ok(*data_start)
} }
/// Add a new file using the already compressed data from a ZIP file being read and renames it, this /// Add a new file using the already compressed data from a ZIP file being read and renames it, this