diff --git a/examples/extract.rs b/examples/extract.rs index 7359b53e..57cfba0d 100644 --- a/examples/extract.rs +++ b/examples/extract.rs @@ -19,7 +19,7 @@ fn real_main() -> i32 { for i in 0..archive.len() { let mut file = archive.by_index(i).unwrap(); let outpath = match file.enclosed_name() { - Some(path) => path.to_owned(), + Some(path) => path, None => continue, }; diff --git a/fuzz/corpus/fuzz_read/chinese.zip b/fuzz/corpus/fuzz_read/chinese.zip new file mode 100644 index 00000000..8fcd465d Binary files /dev/null and b/fuzz/corpus/fuzz_read/chinese.zip differ diff --git a/fuzz/fuzz.dict b/fuzz/fuzz.dict index 2e7ad6a5..db54193c 100644 --- a/fuzz/fuzz.dict +++ b/fuzz/fuzz.dict @@ -17,7 +17,11 @@ compression_method_bzip2="\x0C\x00" compression_method_lzma="\x0E\x00" compression_method_zstd="]\x00" compression_method_aes="C\x00" -compression_method_unsupported="\xFF\x00" +extra_field_zip64="\x01\x00" +extra_field_aes="\x99\x01" +extra_field_extended_timestamp="\x55\x54" +extra_field_utf8_comment="\x75\x63" +extra_field_utf8_filename="\x75\x70" "\xFF\xFF" "/" "/./" diff --git a/src/extra_fields/mod.rs b/src/extra_fields/mod.rs index 145cfade..ee8defec 100644 --- a/src/extra_fields/mod.rs +++ b/src/extra_fields/mod.rs @@ -17,8 +17,10 @@ impl ExtraFieldVersion for LocalHeaderVersion {} impl ExtraFieldVersion for CentralHeaderVersion {} mod extended_timestamp; +mod zipinfo_utf8; pub use extended_timestamp::*; +pub use zipinfo_utf8::*; /// contains one extra field #[derive(Debug, Clone)] diff --git a/src/extra_fields/zipinfo_utf8.rs b/src/extra_fields/zipinfo_utf8.rs new file mode 100644 index 00000000..7119bbfb --- /dev/null +++ b/src/extra_fields/zipinfo_utf8.rs @@ -0,0 +1,42 @@ +use crate::result::{ZipError, ZipResult}; +use crate::unstable::LittleEndianReadExt; +use core::mem::size_of; +use std::io::Read; + +/// Info-ZIP Unicode Path Extra Field (0x7075) or Unicode Comment Extra Field (0x6375), as +/// specified in APPNOTE 4.6.8 and 4.6.9 +#[derive(Clone, Debug)] +pub struct UnicodeExtraField { + crc32: u32, + content: Box<[u8]>, +} + +impl UnicodeExtraField { + /// Verifies the checksum and returns the content. + pub fn unwrap_valid(self, ascii_field: &[u8]) -> ZipResult> { + let mut crc32 = crc32fast::Hasher::new(); + crc32.update(ascii_field); + let actual_crc32 = crc32.finalize(); + if self.crc32 != actual_crc32 { + return Err(ZipError::InvalidArchive( + "CRC32 checksum failed on Unicode extra field", + )); + } + Ok(self.content) + } +} + +impl UnicodeExtraField { + pub(crate) fn try_from_reader(reader: &mut R, len: u16) -> ZipResult { + // Read and discard version byte + reader.read_exact(&mut [0u8])?; + + let crc32 = reader.read_u32_le()?; + let content_len = (len as usize) + .checked_sub(size_of::() + size_of::()) + .ok_or(ZipError::InvalidArchive("Unicode extra field is too small"))?; + let mut content = vec![0u8; content_len].into_boxed_slice(); + reader.read_exact(&mut content)?; + Ok(Self { crc32, content }) + } +} diff --git a/src/read.rs b/src/read.rs index 15c1319e..7dc60a61 100644 --- a/src/read.rs +++ b/src/read.rs @@ -8,7 +8,7 @@ use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::Shared; use crate::result::{ZipError, ZipResult}; -use crate::spec::{self, Block}; +use crate::spec::{self, FixedSizeBlock}; use crate::types::{ AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData, ZipLocalEntryBlock, @@ -91,6 +91,7 @@ pub(crate) mod zip_archive { #[cfg(feature = "aes-crypto")] use crate::aes::PWD_VERIFY_LENGTH; +use crate::extra_fields::UnicodeExtraField; #[cfg(feature = "lzma")] use crate::read::lzma::LzmaDecoder; use crate::result::ZipError::{InvalidPassword, UnsupportedArchive}; @@ -1156,6 +1157,7 @@ fn central_header_to_zip_file_inner( version_made_by: version_made_by as u8, encrypted, using_data_descriptor, + is_utf8, compression_method: CompressionMethod::parse_from_u16(compression_method), compression_level: None, last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), @@ -1275,6 +1277,29 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { // the reader for ExtendedTimestamp consumes `len` bytes len_left = 0; } + 0x6375 => { + // Info-ZIP Unicode Comment Extra Field + // APPNOTE 4.6.8 and https://libzip.org/specifications/extrafld.txt + if !file.is_utf8 { + file.file_comment = String::from_utf8( + UnicodeExtraField::try_from_reader(&mut reader, len)? + .unwrap_valid(file.file_comment.as_bytes())? + .into_vec(), + )? + .into(); + } + } + 0x7075 => { + // Info-ZIP Unicode Path Extra Field + // APPNOTE 4.6.9 and https://libzip.org/specifications/extrafld.txt + if !file.is_utf8 { + file.file_name_raw = UnicodeExtraField::try_from_reader(&mut reader, len)? + .unwrap_valid(&file.file_name_raw)?; + file.file_name = + String::from_utf8(file.file_name_raw.clone().into_vec())?.into_boxed_str(); + file.is_utf8 = true; + } + } _ => { // Other fields are ignored } @@ -1516,7 +1541,7 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult return Err(ZipError::InvalidArchive("Invalid local file header")), } - let block = ZipLocalEntryBlock::interpret(block)?; + let block = ZipLocalEntryBlock::interpret(&block)?; let mut result = ZipFileData::from_local_block(block, reader)?; @@ -1759,4 +1784,13 @@ mod test { assert!(tempdir.path().join("bar").is_symlink()); Ok(()) } + + #[test] + #[cfg(feature = "_deflate-any")] + fn test_utf8_extra_field() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/chinese.zip")); + let mut reader = ZipArchive::new(Cursor::new(v)).unwrap(); + reader.by_name("七个房间.txt").unwrap(); + } } diff --git a/src/read/stream.rs b/src/read/stream.rs index d87e22fc..8f2ffa0c 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -6,7 +6,7 @@ use super::{ central_header_to_zip_file_inner, read_zipfile_from_stream, ZipCentralEntryBlock, ZipError, ZipFile, ZipFileData, ZipResult, }; -use crate::spec::Block; +use crate::spec::FixedSizeBlock; /// Stream decoder for zip. #[derive(Debug)] diff --git a/src/result.rs b/src/result.rs index 7bd5cad5..ec8fbb13 100644 --- a/src/result.rs +++ b/src/result.rs @@ -9,6 +9,7 @@ use std::error::Error; use std::fmt; use std::io; use std::num::TryFromIntError; +use std::string::FromUtf8Error; /// Generic result type with ZipError as its error variant pub type ZipResult = Result; @@ -68,6 +69,12 @@ impl From for ZipError { } } +impl From for ZipError { + fn from(_: FromUtf8Error) -> Self { + ZipError::InvalidArchive("Invalid UTF-8") + } +} + /// Error type for time parsing #[derive(Debug)] pub struct DateTimeRangeError; diff --git a/src/spec.rs b/src/spec.rs index 8738a7bd..8a6feff0 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -97,35 +97,33 @@ impl ExtraFieldMagic { pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64; pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize; -pub(crate) trait Block: Sized + Copy { +pub(crate) trait FixedSizeBlock: Sized + Copy { const MAGIC: Magic; fn magic(self) -> Magic; - const ERROR: ZipError; + const WRONG_MAGIC_ERROR: ZipError; /* TODO: use smallvec? */ - fn interpret(bytes: Box<[u8]>) -> ZipResult { - let block = Self::deserialize(&bytes).from_le(); + fn interpret(bytes: &[u8]) -> ZipResult { + if bytes.len() != mem::size_of::() { + return Err(ZipError::InvalidArchive("Block is wrong size")); + } + let block_ptr: *const Self = bytes.as_ptr().cast(); + let block = unsafe { block_ptr.read() }.from_le(); if block.magic() != Self::MAGIC { - return Err(Self::ERROR); + return Err(Self::WRONG_MAGIC_ERROR); } Ok(block) } - fn deserialize(block: &[u8]) -> Self { - assert_eq!(block.len(), mem::size_of::()); - let block_ptr: *const Self = block.as_ptr().cast(); - unsafe { block_ptr.read() } - } - #[allow(clippy::wrong_self_convention)] fn from_le(self) -> Self; fn parse(reader: &mut T) -> ZipResult { let mut block = vec![0u8; mem::size_of::()].into_boxed_slice(); reader.read_exact(&mut block)?; - Self::interpret(block) + Self::interpret(&block) } fn encode(self) -> Box<[u8]> { @@ -212,7 +210,7 @@ pub(crate) struct Zip32CDEBlock { pub zip_file_comment_length: u16, } -impl Block for Zip32CDEBlock { +impl FixedSizeBlock for Zip32CDEBlock { const MAGIC: Magic = Magic::CENTRAL_DIRECTORY_END_SIGNATURE; #[inline(always)] @@ -220,7 +218,8 @@ impl Block for Zip32CDEBlock { self.magic } - const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header"); + const WRONG_MAGIC_ERROR: ZipError = + ZipError::InvalidArchive("Invalid digital signature header"); to_and_from_le![ (magic, Magic), @@ -391,7 +390,7 @@ pub(crate) struct Zip64CDELocatorBlock { pub number_of_disks: u32, } -impl Block for Zip64CDELocatorBlock { +impl FixedSizeBlock for Zip64CDELocatorBlock { const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE; #[inline(always)] @@ -399,7 +398,7 @@ impl Block for Zip64CDELocatorBlock { self.magic } - const ERROR: ZipError = + const WRONG_MAGIC_ERROR: ZipError = ZipError::InvalidArchive("Invalid zip64 locator digital signature header"); to_and_from_le![ @@ -467,14 +466,15 @@ pub(crate) struct Zip64CDEBlock { pub central_directory_offset: u64, } -impl Block for Zip64CDEBlock { +impl FixedSizeBlock for Zip64CDEBlock { const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE; fn magic(self) -> Magic { self.magic } - const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header"); + const WRONG_MAGIC_ERROR: ZipError = + ZipError::InvalidArchive("Invalid digital signature header"); to_and_from_le![ (magic, Magic), @@ -708,14 +708,14 @@ mod test { pub file_name_length: u16, } - impl Block for TestBlock { + impl FixedSizeBlock for TestBlock { const MAGIC: Magic = Magic::literal(0x01111); fn magic(self) -> Magic { self.magic } - const ERROR: ZipError = ZipError::InvalidArchive("unreachable"); + const WRONG_MAGIC_ERROR: ZipError = ZipError::InvalidArchive("unreachable"); to_and_from_le![(magic, Magic), (file_name_length, u16)]; } diff --git a/src/types.rs b/src/types.rs index f56b3c06..25e5df06 100644 --- a/src/types.rs +++ b/src/types.rs @@ -11,7 +11,7 @@ use std::sync::{Arc, OnceLock}; use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike}; use crate::result::{ZipError, ZipResult}; -use crate::spec::{self, Block}; +use crate::spec::{self, FixedSizeBlock}; pub(crate) mod ffi { pub const S_IFDIR: u32 = 0o0040000; @@ -415,6 +415,8 @@ pub struct ZipFileData { pub version_made_by: u8, /// True if the file is encrypted. pub encrypted: bool, + /// True if file_name and file_comment are UTF8 + pub is_utf8: bool, /// True if the file uses a data-descriptor section pub using_data_descriptor: bool, /// Compression method used to store the file @@ -612,6 +614,7 @@ impl ZipFileData { version_made_by: DEFAULT_VERSION, encrypted: options.encrypt_with.is_some(), using_data_descriptor: false, + is_utf8: !file_name.is_ascii(), compression_method, compression_level: options.compression_level, last_modified_time: Some(options.last_modified_time), @@ -695,6 +698,7 @@ impl ZipFileData { version_made_by: version_made_by as u8, encrypted, using_data_descriptor, + is_utf8, compression_method, compression_level: None, last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), @@ -887,7 +891,7 @@ pub(crate) struct ZipCentralEntryBlock { pub offset: u32, } -impl Block for ZipCentralEntryBlock { +impl FixedSizeBlock for ZipCentralEntryBlock { const MAGIC: spec::Magic = spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE; #[inline(always)] @@ -895,7 +899,8 @@ impl Block for ZipCentralEntryBlock { self.magic } - const ERROR: ZipError = ZipError::InvalidArchive("Invalid Central Directory header"); + const WRONG_MAGIC_ERROR: ZipError = + ZipError::InvalidArchive("Invalid Central Directory header"); to_and_from_le![ (magic, spec::Magic), @@ -934,7 +939,7 @@ pub(crate) struct ZipLocalEntryBlock { pub extra_field_length: u16, } -impl Block for ZipLocalEntryBlock { +impl FixedSizeBlock for ZipLocalEntryBlock { const MAGIC: spec::Magic = spec::Magic::LOCAL_FILE_HEADER_SIGNATURE; #[inline(always)] @@ -942,7 +947,7 @@ impl Block for ZipLocalEntryBlock { self.magic } - const ERROR: ZipError = ZipError::InvalidArchive("Invalid local file header"); + const WRONG_MAGIC_ERROR: ZipError = ZipError::InvalidArchive("Invalid local file header"); to_and_from_le![ (magic, spec::Magic), @@ -1071,6 +1076,7 @@ mod test { version_made_by: 0, encrypted: false, using_data_descriptor: false, + is_utf8: true, compression_method: crate::compression::CompressionMethod::Stored, compression_level: None, last_modified_time: None, diff --git a/src/write.rs b/src/write.rs index 151258a8..acf9ac98 100644 --- a/src/write.rs +++ b/src/write.rs @@ -5,7 +5,7 @@ use crate::aes::AesWriter; use crate::compression::CompressionMethod; use crate::read::{find_content, Config, ZipArchive, ZipFile, ZipFileReader}; use crate::result::{ZipError, ZipResult}; -use crate::spec::{self, Block}; +use crate::spec::{self, FixedSizeBlock}; #[cfg(feature = "aes-crypto")] use crate::types::AesMode; use crate::types::{ diff --git a/tests/data/chinese.zip b/tests/data/chinese.zip new file mode 100644 index 00000000..8fcd465d Binary files /dev/null and b/tests/data/chinese.zip differ