Merge pull request #91 from cosmicexplorer/oldpr396a
perf: use indexmap in read::zip_archive::Shared instead of a separate vec and hashmap
This commit is contained in:
commit
5ad3fe4acf
3 changed files with 45 additions and 61 deletions
|
@ -29,6 +29,7 @@ constant_time_eq = { version = "0.3.0", optional = true }
|
||||||
crc32fast = "1.4.0"
|
crc32fast = "1.4.0"
|
||||||
displaydoc = { version = "0.2.4", default-features = false }
|
displaydoc = { version = "0.2.4", default-features = false }
|
||||||
flate2 = { version = "1.0.28", default-features = false, optional = true }
|
flate2 = { version = "1.0.28", default-features = false, optional = true }
|
||||||
|
indexmap = "2"
|
||||||
hmac = { version = "0.12.1", optional = true, features = ["reset"] }
|
hmac = { version = "0.12.1", optional = true, features = ["reset"] }
|
||||||
num_enum = "0.7.2"
|
num_enum = "0.7.2"
|
||||||
pbkdf2 = { version = "0.12.2", optional = true }
|
pbkdf2 = { version = "0.12.2", optional = true }
|
||||||
|
|
70
src/read.rs
70
src/read.rs
|
@ -11,8 +11,8 @@ use crate::result::{ZipError, ZipResult};
|
||||||
use crate::spec;
|
use crate::spec;
|
||||||
use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipFileData};
|
use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipFileData};
|
||||||
use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
|
use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator};
|
||||||
use std::borrow::{Borrow, Cow};
|
use indexmap::IndexMap;
|
||||||
use std::collections::HashMap;
|
use std::borrow::Cow;
|
||||||
use std::io::{self, prelude::*};
|
use std::io::{self, prelude::*};
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
@ -47,8 +47,7 @@ pub(crate) mod zip_archive {
|
||||||
/// Extract immutable data from `ZipArchive` to make it cheap to clone
|
/// Extract immutable data from `ZipArchive` to make it cheap to clone
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub(crate) struct Shared {
|
pub(crate) struct Shared {
|
||||||
pub(crate) files: Box<[super::ZipFileData]>,
|
pub(crate) files: super::IndexMap<Box<str>, super::ZipFileData>,
|
||||||
pub(crate) names_map: super::HashMap<Box<str>, usize>,
|
|
||||||
pub(super) offset: u64,
|
pub(super) offset: u64,
|
||||||
pub(super) dir_start: u64,
|
pub(super) dir_start: u64,
|
||||||
}
|
}
|
||||||
|
@ -333,7 +332,7 @@ pub(crate) struct CentralDirectoryInfo {
|
||||||
|
|
||||||
impl<R> ZipArchive<R> {
|
impl<R> ZipArchive<R> {
|
||||||
pub(crate) fn from_finalized_writer(
|
pub(crate) fn from_finalized_writer(
|
||||||
files: Vec<ZipFileData>,
|
files: IndexMap<Box<str>, ZipFileData>,
|
||||||
comment: Vec<u8>,
|
comment: Vec<u8>,
|
||||||
reader: R,
|
reader: R,
|
||||||
central_start: u64,
|
central_start: u64,
|
||||||
|
@ -344,15 +343,10 @@ impl<R> ZipArchive<R> {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
/* This is where the whole file starts. */
|
/* This is where the whole file starts. */
|
||||||
let initial_offset = files.first().unwrap().header_start;
|
let (_, first_header) = files.first().unwrap();
|
||||||
let names_map: HashMap<Box<str>, usize> = files
|
let initial_offset = first_header.header_start;
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.map(|(i, d)| (d.file_name.clone(), i))
|
|
||||||
.collect();
|
|
||||||
let shared = Arc::new(zip_archive::Shared {
|
let shared = Arc::new(zip_archive::Shared {
|
||||||
files: files.into_boxed_slice(),
|
files,
|
||||||
names_map,
|
|
||||||
offset: initial_offset,
|
offset: initial_offset,
|
||||||
dir_start: central_start,
|
dir_start: central_start,
|
||||||
});
|
});
|
||||||
|
@ -368,10 +362,10 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
pub(crate) fn merge_contents<W: Write + io::Seek>(
|
pub(crate) fn merge_contents<W: Write + io::Seek>(
|
||||||
&mut self,
|
&mut self,
|
||||||
mut w: W,
|
mut w: W,
|
||||||
) -> ZipResult<Vec<ZipFileData>> {
|
) -> ZipResult<IndexMap<Box<str>, ZipFileData>> {
|
||||||
let mut new_files = self.shared.files.clone();
|
let mut new_files = self.shared.files.clone();
|
||||||
if new_files.is_empty() {
|
if new_files.is_empty() {
|
||||||
return Ok(vec![]);
|
return Ok(IndexMap::new());
|
||||||
}
|
}
|
||||||
/* The first file header will probably start at the beginning of the file, but zip doesn't
|
/* The first file header will probably start at the beginning of the file, but zip doesn't
|
||||||
* enforce that, and executable zips like PEX files will have a shebang line so will
|
* enforce that, and executable zips like PEX files will have a shebang line so will
|
||||||
|
@ -382,7 +376,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
|
|
||||||
let new_initial_header_start = w.stream_position()?;
|
let new_initial_header_start = w.stream_position()?;
|
||||||
/* Push back file header starts for all entries in the covered files. */
|
/* Push back file header starts for all entries in the covered files. */
|
||||||
new_files.iter_mut().try_for_each(|f| {
|
new_files.values_mut().try_for_each(|f| {
|
||||||
/* This is probably the only really important thing to change. */
|
/* This is probably the only really important thing to change. */
|
||||||
f.header_start = f.header_start.checked_add(new_initial_header_start).ok_or(
|
f.header_start = f.header_start.checked_add(new_initial_header_start).ok_or(
|
||||||
ZipError::InvalidArchive("new header start from merge would have been too large"),
|
ZipError::InvalidArchive("new header start from merge would have been too large"),
|
||||||
|
@ -423,7 +417,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
io::copy(&mut limited_raw, &mut w)?;
|
io::copy(&mut limited_raw, &mut w)?;
|
||||||
|
|
||||||
/* Return the files we've just written to the data stream. */
|
/* Return the files we've just written to the data stream. */
|
||||||
Ok(new_files.into_vec())
|
Ok(new_files)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_directory_info_zip32(
|
fn get_directory_info_zip32(
|
||||||
|
@ -582,20 +576,17 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
} else {
|
} else {
|
||||||
dir_info.number_of_files
|
dir_info.number_of_files
|
||||||
};
|
};
|
||||||
let mut files = Vec::with_capacity(file_capacity);
|
let mut files = IndexMap::with_capacity(file_capacity);
|
||||||
let mut names_map = HashMap::with_capacity(file_capacity);
|
|
||||||
reader.seek(io::SeekFrom::Start(dir_info.directory_start))?;
|
reader.seek(io::SeekFrom::Start(dir_info.directory_start))?;
|
||||||
for _ in 0..dir_info.number_of_files {
|
for _ in 0..dir_info.number_of_files {
|
||||||
let file = central_header_to_zip_file(reader, dir_info.archive_offset)?;
|
let file = central_header_to_zip_file(reader, dir_info.archive_offset)?;
|
||||||
names_map.insert(file.file_name.clone(), files.len());
|
files.insert(file.file_name.clone(), file);
|
||||||
files.push(file);
|
|
||||||
}
|
}
|
||||||
if dir_info.disk_number != dir_info.disk_with_central_directory {
|
if dir_info.disk_number != dir_info.disk_with_central_directory {
|
||||||
unsupported_zip_error("Support for multi-disk files is not implemented")
|
unsupported_zip_error("Support for multi-disk files is not implemented")
|
||||||
} else {
|
} else {
|
||||||
Ok(Shared {
|
Ok(Shared {
|
||||||
files: files.into(),
|
files,
|
||||||
names_map,
|
|
||||||
offset: dir_info.archive_offset,
|
offset: dir_info.archive_offset,
|
||||||
dir_start: dir_info.directory_start,
|
dir_start: dir_info.directory_start,
|
||||||
})
|
})
|
||||||
|
@ -699,7 +690,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
|
|
||||||
/// Returns an iterator over all the file and directory names in this archive.
|
/// Returns an iterator over all the file and directory names in this archive.
|
||||||
pub fn file_names(&self) -> impl Iterator<Item = &str> {
|
pub fn file_names(&self) -> impl Iterator<Item = &str> {
|
||||||
self.shared.names_map.keys().map(Box::borrow)
|
self.shared.files.keys().map(|s| s.as_ref())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Search for a file entry by name, decrypt with given password
|
/// Search for a file entry by name, decrypt with given password
|
||||||
|
@ -727,7 +718,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
/// Get the index of a file entry by name, if it's present.
|
/// Get the index of a file entry by name, if it's present.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn index_for_name(&self, name: &str) -> Option<usize> {
|
pub fn index_for_name(&self, name: &str) -> Option<usize> {
|
||||||
self.shared.names_map.get(name).copied()
|
self.shared.files.get_index_of(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the index of a file entry by path, if it's present.
|
/// Get the index of a file entry by path, if it's present.
|
||||||
|
@ -741,8 +732,8 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
pub fn name_for_index(&self, index: usize) -> Option<&str> {
|
pub fn name_for_index(&self, index: usize) -> Option<&str> {
|
||||||
self.shared
|
self.shared
|
||||||
.files
|
.files
|
||||||
.get(index)
|
.get_index(index)
|
||||||
.map(|file_data| &*file_data.file_name)
|
.map(|(name, _)| name.as_ref())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn by_name_with_optional_password<'a>(
|
fn by_name_with_optional_password<'a>(
|
||||||
|
@ -750,7 +741,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
name: &str,
|
name: &str,
|
||||||
password: Option<&[u8]>,
|
password: Option<&[u8]>,
|
||||||
) -> ZipResult<ZipFile<'a>> {
|
) -> ZipResult<ZipFile<'a>> {
|
||||||
let Some(index) = self.index_for_name(name) else {
|
let Some(index) = self.shared.files.get_index_of(name) else {
|
||||||
return Err(ZipError::FileNotFound);
|
return Err(ZipError::FileNotFound);
|
||||||
};
|
};
|
||||||
self.by_index_with_optional_password(index, password)
|
self.by_index_with_optional_password(index, password)
|
||||||
|
@ -785,17 +776,16 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
/// Get a contained file by index without decompressing it
|
/// Get a contained file by index without decompressing it
|
||||||
pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
|
pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult<ZipFile<'_>> {
|
||||||
let reader = &mut self.reader;
|
let reader = &mut self.reader;
|
||||||
self.shared
|
let (_, data) = self
|
||||||
|
.shared
|
||||||
.files
|
.files
|
||||||
.get(file_number)
|
.get_index(file_number)
|
||||||
.ok_or(ZipError::FileNotFound)
|
.ok_or(ZipError::FileNotFound)?;
|
||||||
.and_then(move |data| {
|
Ok(ZipFile {
|
||||||
Ok(ZipFile {
|
crypto_reader: None,
|
||||||
crypto_reader: None,
|
reader: ZipFileReader::Raw(find_content(data, reader)?),
|
||||||
reader: ZipFileReader::Raw(find_content(data, reader)?),
|
data: Cow::Borrowed(data),
|
||||||
data: Cow::Borrowed(data),
|
})
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn by_index_with_optional_password(
|
fn by_index_with_optional_password(
|
||||||
|
@ -803,10 +793,10 @@ impl<R: Read + Seek> ZipArchive<R> {
|
||||||
file_number: usize,
|
file_number: usize,
|
||||||
mut password: Option<&[u8]>,
|
mut password: Option<&[u8]>,
|
||||||
) -> ZipResult<ZipFile<'_>> {
|
) -> ZipResult<ZipFile<'_>> {
|
||||||
let data = self
|
let (_, data) = self
|
||||||
.shared
|
.shared
|
||||||
.files
|
.files
|
||||||
.get(file_number)
|
.get_index(file_number)
|
||||||
.ok_or(ZipError::FileNotFound)?;
|
.ok_or(ZipError::FileNotFound)?;
|
||||||
|
|
||||||
match (password, data.encrypted) {
|
match (password, data.encrypted) {
|
||||||
|
|
35
src/write.rs
35
src/write.rs
|
@ -8,7 +8,7 @@ use crate::types::{ffi, DateTime, System, ZipFileData, DEFAULT_VERSION};
|
||||||
#[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))]
|
#[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))]
|
||||||
use core::num::NonZeroU64;
|
use core::num::NonZeroU64;
|
||||||
use crc32fast::Hasher;
|
use crc32fast::Hasher;
|
||||||
use std::collections::HashMap;
|
use indexmap::IndexMap;
|
||||||
use std::default::Default;
|
use std::default::Default;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
|
@ -110,8 +110,7 @@ pub(crate) mod zip_writer {
|
||||||
/// ```
|
/// ```
|
||||||
pub struct ZipWriter<W: Write + Seek> {
|
pub struct ZipWriter<W: Write + Seek> {
|
||||||
pub(super) inner: GenericZipWriter<W>,
|
pub(super) inner: GenericZipWriter<W>,
|
||||||
pub(super) files: Vec<ZipFileData>,
|
pub(super) files: IndexMap<Box<str>, ZipFileData>,
|
||||||
pub(super) files_by_name: HashMap<Box<str>, usize>,
|
|
||||||
pub(super) stats: ZipWriterStats,
|
pub(super) stats: ZipWriterStats,
|
||||||
pub(super) writing_to_file: bool,
|
pub(super) writing_to_file: bool,
|
||||||
pub(super) writing_raw: bool,
|
pub(super) writing_raw: bool,
|
||||||
|
@ -435,7 +434,7 @@ impl<W: Write + Seek> Write for ZipWriter<W> {
|
||||||
if let Ok(count) = write_result {
|
if let Ok(count) = write_result {
|
||||||
self.stats.update(&buf[0..count]);
|
self.stats.update(&buf[0..count]);
|
||||||
if self.stats.bytes_written > spec::ZIP64_BYTES_THR
|
if self.stats.bytes_written > spec::ZIP64_BYTES_THR
|
||||||
&& !self.files.last_mut().unwrap().large_file
|
&& !self.files.last_mut().unwrap().1.large_file
|
||||||
{
|
{
|
||||||
self.abort_file().unwrap();
|
self.abort_file().unwrap();
|
||||||
return Err(io::Error::new(
|
return Err(io::Error::new(
|
||||||
|
@ -479,8 +478,7 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
|
||||||
|
|
||||||
Ok(ZipWriter {
|
Ok(ZipWriter {
|
||||||
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),
|
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),
|
||||||
files: metadata.files.into(),
|
files: metadata.files,
|
||||||
files_by_name: metadata.names_map,
|
|
||||||
stats: Default::default(),
|
stats: Default::default(),
|
||||||
writing_to_file: false,
|
writing_to_file: false,
|
||||||
comment: footer.zip_file_comment,
|
comment: footer.zip_file_comment,
|
||||||
|
@ -641,8 +639,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
pub fn new(inner: W) -> ZipWriter<W> {
|
pub fn new(inner: W) -> ZipWriter<W> {
|
||||||
ZipWriter {
|
ZipWriter {
|
||||||
inner: Storer(MaybeEncrypted::Unencrypted(inner)),
|
inner: Storer(MaybeEncrypted::Unencrypted(inner)),
|
||||||
files: Vec::new(),
|
files: IndexMap::new(),
|
||||||
files_by_name: HashMap::new(),
|
|
||||||
stats: Default::default(),
|
stats: Default::default(),
|
||||||
writing_to_file: false,
|
writing_to_file: false,
|
||||||
writing_raw: false,
|
writing_raw: false,
|
||||||
|
@ -842,15 +839,12 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_file_data(&mut self, file: ZipFileData) -> ZipResult<usize> {
|
fn insert_file_data(&mut self, file: ZipFileData) -> ZipResult<usize> {
|
||||||
let name = &file.file_name;
|
if self.files.contains_key(&file.file_name) {
|
||||||
if self.files_by_name.contains_key(name) {
|
|
||||||
return Err(InvalidArchive("Duplicate filename"));
|
return Err(InvalidArchive("Duplicate filename"));
|
||||||
}
|
}
|
||||||
let name = name.to_owned();
|
let name = file.file_name.to_owned();
|
||||||
self.files.push(file);
|
self.files.insert(name.clone(), file);
|
||||||
let index = self.files.len() - 1;
|
Ok(self.files.get_index_of(&name).unwrap())
|
||||||
self.files_by_name.insert(name, index);
|
|
||||||
Ok(index)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finish_file(&mut self) -> ZipResult<()> {
|
fn finish_file(&mut self) -> ZipResult<()> {
|
||||||
|
@ -871,7 +865,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
if !self.writing_raw {
|
if !self.writing_raw {
|
||||||
let file = match self.files.last_mut() {
|
let file = match self.files.last_mut() {
|
||||||
None => return Ok(()),
|
None => return Ok(()),
|
||||||
Some(f) => f,
|
Some((_, f)) => f,
|
||||||
};
|
};
|
||||||
file.crc32 = self.stats.hasher.clone().finalize();
|
file.crc32 = self.stats.hasher.clone().finalize();
|
||||||
file.uncompressed_size = self.stats.bytes_written;
|
file.uncompressed_size = self.stats.bytes_written;
|
||||||
|
@ -911,8 +905,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
/// Removes the file currently being written from the archive if there is one, or else removes
|
/// Removes the file currently being written from the archive if there is one, or else removes
|
||||||
/// the file most recently written.
|
/// the file most recently written.
|
||||||
pub fn abort_file(&mut self) -> ZipResult<()> {
|
pub fn abort_file(&mut self) -> ZipResult<()> {
|
||||||
let last_file = self.files.pop().ok_or(ZipError::FileNotFound)?;
|
let (_, last_file) = self.files.pop().ok_or(ZipError::FileNotFound)?;
|
||||||
self.files_by_name.remove(&last_file.file_name);
|
|
||||||
let make_plain_writer = self.inner.prepare_next_writer(
|
let make_plain_writer = self.inner.prepare_next_writer(
|
||||||
Stored,
|
Stored,
|
||||||
None,
|
None,
|
||||||
|
@ -925,7 +918,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
// overwrite a valid file and corrupt the archive
|
// overwrite a valid file and corrupt the archive
|
||||||
let rewind_safe: bool = match last_file.data_start.get() {
|
let rewind_safe: bool = match last_file.data_start.get() {
|
||||||
None => self.files.is_empty(),
|
None => self.files.is_empty(),
|
||||||
Some(last_file_start) => self.files.iter().all(|file| {
|
Some(last_file_start) => self.files.values().all(|file| {
|
||||||
file.data_start
|
file.data_start
|
||||||
.get()
|
.get()
|
||||||
.is_some_and(|start| start < last_file_start)
|
.is_some_and(|start| start < last_file_start)
|
||||||
|
@ -1281,7 +1274,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
let writer = self.inner.get_plain();
|
let writer = self.inner.get_plain();
|
||||||
|
|
||||||
let central_start = writer.stream_position()?;
|
let central_start = writer.stream_position()?;
|
||||||
for file in self.files.iter() {
|
for file in self.files.values() {
|
||||||
write_central_directory_header(writer, file)?;
|
write_central_directory_header(writer, file)?;
|
||||||
}
|
}
|
||||||
let central_size = writer.stream_position()? - central_start;
|
let central_size = writer.stream_position()? - central_start;
|
||||||
|
@ -1327,7 +1320,7 @@ impl<W: Write + Seek> ZipWriter<W> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn index_by_name(&self, name: &str) -> ZipResult<usize> {
|
fn index_by_name(&self, name: &str) -> ZipResult<usize> {
|
||||||
Ok(*self.files_by_name.get(name).ok_or(ZipError::FileNotFound)?)
|
self.files.get_index_of(name).ok_or(ZipError::FileNotFound)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Adds another entry to the central directory referring to the same content as an existing
|
/// Adds another entry to the central directory referring to the same content as an existing
|
||||||
|
|
Loading…
Add table
Reference in a new issue