Wrap extra-data field in an Option, to save Arc overhead when empty

This commit is contained in:
Chris Hennick 2023-05-15 20:23:46 -07:00
parent 7370d63f2d
commit 1df9186527
No known key found for this signature in database
GPG key ID: 25653935CC8B6C74
4 changed files with 136 additions and 96 deletions

View file

@ -158,4 +158,10 @@
### Added ### Added
- New method `with_alignment` on `FileOptions`. - New method `with_alignment` on `FileOptions`.
## [0.8.3]
### Changed
- Improved performance for files with no extra data (should mainly affect multithreaded Mutex-guarded access).

View file

@ -737,8 +737,8 @@ fn central_header_to_zip_file_inner<R: Read>(
uncompressed_size: uncompressed_size as u64, uncompressed_size: uncompressed_size as u64,
file_name, file_name,
file_name_raw, file_name_raw,
extra_field: Arc::new(extra_field), extra_field: Some(Arc::new(extra_field)),
central_extra_field: Arc::new(vec![]), central_extra_field: None,
file_comment, file_comment,
header_start: offset, header_start: offset,
central_header_start, central_header_start,
@ -770,69 +770,73 @@ fn central_header_to_zip_file_inner<R: Read>(
} }
fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
let mut reader = io::Cursor::new(file.extra_field.as_ref()); if let Some(extra_data) = &file.extra_field {
let mut reader = io::Cursor::new(extra_data.as_ref());
while (reader.position() as usize) < file.extra_field.len() { while (reader.position() as usize) < extra_data.len() {
let kind = reader.read_u16::<LittleEndian>()?; let kind = reader.read_u16::<LittleEndian>()?;
let len = reader.read_u16::<LittleEndian>()?; let len = reader.read_u16::<LittleEndian>()?;
let mut len_left = len as i64; let mut len_left = len as i64;
match kind { match kind {
// Zip64 extended information extra field // Zip64 extended information extra field
0x0001 => { 0x0001 => {
if file.uncompressed_size == spec::ZIP64_BYTES_THR { if file.uncompressed_size == spec::ZIP64_BYTES_THR {
file.large_file = true; file.large_file = true;
file.uncompressed_size = reader.read_u64::<LittleEndian>()?; file.uncompressed_size = reader.read_u64::<LittleEndian>()?;
len_left -= 8; len_left -= 8;
}
if file.compressed_size == spec::ZIP64_BYTES_THR {
file.large_file = true;
file.compressed_size = reader.read_u64::<LittleEndian>()?;
len_left -= 8;
}
if file.header_start == spec::ZIP64_BYTES_THR {
file.header_start = reader.read_u64::<LittleEndian>()?;
len_left -= 8;
}
} }
if file.compressed_size == spec::ZIP64_BYTES_THR { 0x9901 => {
file.large_file = true; // AES
file.compressed_size = reader.read_u64::<LittleEndian>()?; if len != 7 {
len_left -= 8; return Err(ZipError::UnsupportedArchive(
"AES extra data field has an unsupported length",
));
}
let vendor_version = reader.read_u16::<LittleEndian>()?;
let vendor_id = reader.read_u16::<LittleEndian>()?;
let aes_mode = reader.read_u8()?;
let compression_method = reader.read_u16::<LittleEndian>()?;
if vendor_id != 0x4541 {
return Err(ZipError::InvalidArchive("Invalid AES vendor"));
}
let vendor_version = match vendor_version {
0x0001 => AesVendorVersion::Ae1,
0x0002 => AesVendorVersion::Ae2,
_ => return Err(ZipError::InvalidArchive("Invalid AES vendor version")),
};
match aes_mode {
0x01 => file.aes_mode = Some((AesMode::Aes128, vendor_version)),
0x02 => file.aes_mode = Some((AesMode::Aes192, vendor_version)),
0x03 => file.aes_mode = Some((AesMode::Aes256, vendor_version)),
_ => {
return Err(ZipError::InvalidArchive("Invalid AES encryption strength"))
}
};
file.compression_method = {
#[allow(deprecated)]
CompressionMethod::from_u16(compression_method)
};
} }
if file.header_start == spec::ZIP64_BYTES_THR { _ => {
file.header_start = reader.read_u64::<LittleEndian>()?; // Other fields are ignored
len_left -= 8;
} }
} }
0x9901 => {
// AES
if len != 7 {
return Err(ZipError::UnsupportedArchive(
"AES extra data field has an unsupported length",
));
}
let vendor_version = reader.read_u16::<LittleEndian>()?;
let vendor_id = reader.read_u16::<LittleEndian>()?;
let aes_mode = reader.read_u8()?;
let compression_method = reader.read_u16::<LittleEndian>()?;
if vendor_id != 0x4541 { // We could also check for < 0 to check for errors
return Err(ZipError::InvalidArchive("Invalid AES vendor")); if len_left > 0 {
} reader.seek(io::SeekFrom::Current(len_left))?;
let vendor_version = match vendor_version {
0x0001 => AesVendorVersion::Ae1,
0x0002 => AesVendorVersion::Ae2,
_ => return Err(ZipError::InvalidArchive("Invalid AES vendor version")),
};
match aes_mode {
0x01 => file.aes_mode = Some((AesMode::Aes128, vendor_version)),
0x02 => file.aes_mode = Some((AesMode::Aes192, vendor_version)),
0x03 => file.aes_mode = Some((AesMode::Aes256, vendor_version)),
_ => return Err(ZipError::InvalidArchive("Invalid AES encryption strength")),
};
file.compression_method = {
#[allow(deprecated)]
CompressionMethod::from_u16(compression_method)
};
} }
_ => {
// Other fields are ignored
}
}
// We could also check for < 0 to check for errors
if len_left > 0 {
reader.seek(io::SeekFrom::Current(len_left))?;
} }
} }
Ok(()) Ok(())
@ -979,7 +983,11 @@ impl<'a> ZipFile<'a> {
/// Get the extra data of the zip header for this file /// Get the extra data of the zip header for this file
pub fn extra_data(&self) -> &[u8] { pub fn extra_data(&self) -> &[u8] {
&self.data.extra_field if let Some(extra_data) = &self.data.extra_field {
extra_data
} else {
&[]
}
} }
/// Get the starting offset of the data of the compressed file /// Get the starting offset of the data of the compressed file
@ -1098,8 +1106,8 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult<Opt
uncompressed_size: uncompressed_size as u64, uncompressed_size: uncompressed_size as u64,
file_name, file_name,
file_name_raw, file_name_raw,
extra_field: Arc::new(extra_field), extra_field: Some(Arc::new(extra_field)),
central_extra_field: Arc::new(vec![]), central_extra_field: None,
file_comment: String::new(), // file comment is only available in the central directory file_comment: String::new(), // file comment is only available in the central directory
// header_start and data start are not available, but also don't matter, since seeking is // header_start and data start are not available, but also don't matter, since seeking is
// not available. // not available.

View file

@ -362,9 +362,9 @@ pub struct ZipFileData {
/// Raw file name. To be used when file_name was incorrectly decoded. /// Raw file name. To be used when file_name was incorrectly decoded.
pub file_name_raw: Vec<u8>, pub file_name_raw: Vec<u8>,
/// Extra field usually used for storage expansion /// Extra field usually used for storage expansion
pub extra_field: Arc<Vec<u8>>, pub extra_field: Option<Arc<Vec<u8>>>,
/// Extra field only written to central directory /// Extra field only written to central directory
pub central_extra_field: Arc<Vec<u8>>, pub central_extra_field: Option<Arc<Vec<u8>>>,
/// File comment /// File comment
pub file_comment: String, pub file_comment: String,
/// Specifies where the local header of the file starts /// Specifies where the local header of the file starts
@ -531,8 +531,8 @@ mod test {
uncompressed_size: 0, uncompressed_size: 0,
file_name: file_name.clone(), file_name: file_name.clone(),
file_name_raw: file_name.into_bytes(), file_name_raw: file_name.into_bytes(),
extra_field: Arc::new(vec![]), extra_field: None,
central_extra_field: Arc::new(vec![]), central_extra_field: None,
file_comment: String::new(), file_comment: String::new(),
header_start: 0, header_start: 0,
data_start: AtomicU64::new(0), data_start: AtomicU64::new(0),

View file

@ -134,8 +134,8 @@ pub struct FileOptions {
pub(crate) permissions: Option<u32>, pub(crate) permissions: Option<u32>,
pub(crate) large_file: bool, pub(crate) large_file: bool,
encrypt_with: Option<ZipCryptoKeys>, encrypt_with: Option<ZipCryptoKeys>,
extra_data: Arc<Vec<u8>>, extra_data: Option<Arc<Vec<u8>>>,
central_extra_data: Arc<Vec<u8>>, central_extra_data: Option<Arc<Vec<u8>>>,
alignment: u16, alignment: u16,
} }
@ -149,8 +149,8 @@ impl arbitrary::Arbitrary<'_> for FileOptions {
permissions: Option::<u32>::arbitrary(u)?, permissions: Option::<u32>::arbitrary(u)?,
large_file: bool::arbitrary(u)?, large_file: bool::arbitrary(u)?,
encrypt_with: Option::<ZipCryptoKeys>::arbitrary(u)?, encrypt_with: Option::<ZipCryptoKeys>::arbitrary(u)?,
extra_data: Arc::new(vec![]), extra_data: None,
central_extra_data: Arc::new(vec![]), central_extra_data: None,
alignment: u16::arbitrary(u)?, alignment: u16::arbitrary(u)?,
}; };
u.arbitrary_loop(Some(0), Some((u16::MAX / 4) as u32), |u| { u.arbitrary_loop(Some(0), Some((u16::MAX / 4) as u32), |u| {
@ -242,7 +242,14 @@ impl FileOptions {
) -> ZipResult<()> { ) -> ZipResult<()> {
validate_extra_data(header_id, data)?; validate_extra_data(header_id, data)?;
let len = data.len() + 4; let len = data.len() + 4;
if self.extra_data.len() + self.central_extra_data.len() + len > u16::MAX as usize { if self.extra_data.as_ref().map_or(0, |data| data.len())
+ self
.central_extra_data
.as_ref()
.map_or(0, |data| data.len())
+ len
> u16::MAX as usize
{
Err(InvalidArchive( Err(InvalidArchive(
"Extra data field would be longer than allowed", "Extra data field would be longer than allowed",
)) ))
@ -252,12 +259,17 @@ impl FileOptions {
} else { } else {
&mut self.extra_data &mut self.extra_data
}; };
let vec = Arc::get_mut(field); let vec = match field {
let vec = match vec { Some(arc) => match Arc::get_mut(arc) {
Some(exclusive) => exclusive, Some(exclusive) => exclusive,
None => {
*field = Some(Arc::new(arc.to_vec()));
Arc::get_mut(field.as_mut().unwrap()).unwrap()
}
},
None => { None => {
*field = Arc::new(field.to_vec()); *field = Some(Arc::new(Vec::new()));
Arc::get_mut(field).unwrap() Arc::get_mut(field.as_mut().unwrap()).unwrap()
} }
}; };
vec.reserve_exact(data.len() + 4); vec.reserve_exact(data.len() + 4);
@ -271,11 +283,11 @@ impl FileOptions {
/// Removes the extra data fields. /// Removes the extra data fields.
#[must_use] #[must_use]
pub fn clear_extra_data(mut self) -> FileOptions { pub fn clear_extra_data(mut self) -> FileOptions {
if self.extra_data.len() > 0 { if self.extra_data.is_some() {
self.extra_data = Arc::new(vec![]); self.extra_data = None;
} }
if self.central_extra_data.len() > 0 { if self.central_extra_data.is_some() {
self.central_extra_data = Arc::new(vec![]); self.central_extra_data = None;
} }
self self
} }
@ -312,8 +324,8 @@ impl Default for FileOptions {
permissions: None, permissions: None,
large_file: false, large_file: false,
encrypt_with: None, encrypt_with: None,
extra_data: Arc::new(vec![]), extra_data: None,
central_extra_data: Arc::new(vec![]), central_extra_data: None,
alignment: 1, alignment: 1,
} }
} }
@ -582,11 +594,17 @@ impl<W: Write + Seek> ZipWriter<W> {
// file name length // file name length
writer.write_u16::<LittleEndian>(file.file_name.as_bytes().len() as u16)?; writer.write_u16::<LittleEndian>(file.file_name.as_bytes().len() as u16)?;
// extra field length // extra field length
let mut extra_field_length = file.extra_field.len(); let mut extra_field_length = file.extra_field.as_ref().map_or(0, |data| data.len());
if file.large_file { if file.large_file {
extra_field_length += 20; extra_field_length += 20;
} }
if extra_field_length + file.central_extra_field.len() > u16::MAX as usize { if extra_field_length
+ file
.central_extra_field
.as_ref()
.map_or(0, |data| data.len())
> u16::MAX as usize
{
let _ = self.abort_file(); let _ = self.abort_file();
return Err(InvalidArchive("Extra data field is too large")); return Err(InvalidArchive("Extra data field is too large"));
} }
@ -598,7 +616,9 @@ impl<W: Write + Seek> ZipWriter<W> {
if file.large_file { if file.large_file {
write_local_zip64_extra_field(writer, file)?; write_local_zip64_extra_field(writer, file)?;
} }
writer.write_all(&file.extra_field)?; if let Some(extra_data) = &file.extra_field {
writer.write_all(extra_data)?;
}
let mut header_end = writer.stream_position()?; let mut header_end = writer.stream_position()?;
if options.alignment > 1 { if options.alignment > 1 {
let align = options.alignment as u64; let align = options.alignment as u64;
@ -1262,8 +1282,11 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData)
// extra field length // extra field length
writer.write_u16::<LittleEndian>( writer.write_u16::<LittleEndian>(
zip64_extra_field_length zip64_extra_field_length
+ file.extra_field.len() as u16 + file.extra_field.as_ref().map_or(0, |data| data.len()) as u16
+ file.central_extra_field.len() as u16, + file
.central_extra_field
.as_ref()
.map_or(0, |data| data.len()) as u16,
)?; )?;
// file comment length // file comment length
writer.write_u16::<LittleEndian>(0)?; writer.write_u16::<LittleEndian>(0)?;
@ -1280,8 +1303,12 @@ fn write_central_directory_header<T: Write>(writer: &mut T, file: &ZipFileData)
// zip64 extra field // zip64 extra field
writer.write_all(&zip64_extra_field[..zip64_extra_field_length as usize])?; writer.write_all(&zip64_extra_field[..zip64_extra_field_length as usize])?;
// extra field // extra field
writer.write_all(&file.extra_field)?; if let Some(extra_data) = &file.extra_field {
writer.write_all(&file.central_extra_field)?; writer.write_all(extra_data)?;
}
if let Some(extra_data) = &file.central_extra_field {
writer.write_all(extra_data)?;
}
// file comment // file comment
// <none> // <none>
@ -1406,7 +1433,6 @@ mod test {
use crate::ZipArchive; use crate::ZipArchive;
use std::io; use std::io;
use std::io::{Read, Write}; use std::io::{Read, Write};
use std::sync::Arc;
#[test] #[test]
fn write_empty_zip() { fn write_empty_zip() {
@ -1526,8 +1552,8 @@ mod test {
permissions: Some(33188), permissions: Some(33188),
large_file: false, large_file: false,
encrypt_with: None, encrypt_with: None,
extra_data: Arc::new(vec![]), extra_data: None,
central_extra_data: Arc::new(vec![]), central_extra_data: None,
alignment: 1, alignment: 1,
}; };
writer.start_file("mimetype", options).unwrap(); writer.start_file("mimetype", options).unwrap();
@ -1566,8 +1592,8 @@ mod test {
permissions: Some(33188), permissions: Some(33188),
large_file: false, large_file: false,
encrypt_with: None, encrypt_with: None,
extra_data: Arc::new(vec![]), extra_data: None,
central_extra_data: Arc::new(vec![]), central_extra_data: None,
alignment: 0, alignment: 0,
}; };
writer.start_file(RT_TEST_FILENAME, options).unwrap(); writer.start_file(RT_TEST_FILENAME, options).unwrap();
@ -1616,8 +1642,8 @@ mod test {
permissions: Some(33188), permissions: Some(33188),
large_file: false, large_file: false,
encrypt_with: None, encrypt_with: None,
extra_data: Arc::new(vec![]), extra_data: None,
central_extra_data: Arc::new(vec![]), central_extra_data: None,
alignment: 0, alignment: 0,
}; };
writer.start_file(RT_TEST_FILENAME, options).unwrap(); writer.start_file(RT_TEST_FILENAME, options).unwrap();