From e3c81023a7ebedceaf287be98f3a10b5c1c18f8e Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Sun, 2 Jun 2024 11:52:20 -0700 Subject: [PATCH] fix: Decode Zip-Info UTF8 name and comment fields (#159) --- examples/extract.rs | 2 +- src/extra_fields/mod.rs | 2 ++ src/extra_fields/zipinfo_utf8.rs | 40 ++++++++++++++++++++++++++++++++ src/read.rs | 18 ++++++++++++++ src/result.rs | 7 ++++++ src/types.rs | 5 ++++ 6 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 src/extra_fields/zipinfo_utf8.rs diff --git a/examples/extract.rs b/examples/extract.rs index 7359b53e..57cfba0d 100644 --- a/examples/extract.rs +++ b/examples/extract.rs @@ -19,7 +19,7 @@ fn real_main() -> i32 { for i in 0..archive.len() { let mut file = archive.by_index(i).unwrap(); let outpath = match file.enclosed_name() { - Some(path) => path.to_owned(), + Some(path) => path, None => continue, }; diff --git a/src/extra_fields/mod.rs b/src/extra_fields/mod.rs index 145cfade..ee8defec 100644 --- a/src/extra_fields/mod.rs +++ b/src/extra_fields/mod.rs @@ -17,8 +17,10 @@ impl ExtraFieldVersion for LocalHeaderVersion {} impl ExtraFieldVersion for CentralHeaderVersion {} mod extended_timestamp; +mod zipinfo_utf8; pub use extended_timestamp::*; +pub use zipinfo_utf8::*; /// contains one extra field #[derive(Debug, Clone)] diff --git a/src/extra_fields/zipinfo_utf8.rs b/src/extra_fields/zipinfo_utf8.rs new file mode 100644 index 00000000..f8e360fa --- /dev/null +++ b/src/extra_fields/zipinfo_utf8.rs @@ -0,0 +1,40 @@ +use core::mem::size_of; +use std::io::Read; +use crate::result::{ZipError, ZipResult}; +use crate::unstable::LittleEndianReadExt; + +/// Info-ZIP Unicode Path Extra Field (0x7075) or Unicode Comment Extra Field (0x6375), as +/// specified in APPNOTE 4.6.8 and 4.6.9 +#[derive(Clone, Debug)] +pub struct UnicodeExtraField { + crc32: u32, + content: Box<[u8]> +} + +impl<'a> UnicodeExtraField { + /// Verifies the checksum and returns the content. + pub fn unwrap_valid(self, ascii_field: &[u8]) -> ZipResult> { + let mut crc32 = crc32fast::Hasher::new(); + crc32.update(ascii_field); + let actual_crc32 = crc32.finalize(); + if self.crc32 != actual_crc32 { + return Err(ZipError::InvalidArchive("CRC32 checksum failed on Unicode extra field")); + } + Ok(self.content) + } +} + +impl UnicodeExtraField { + pub(crate) fn try_from_reader(reader: &mut R, len: u16) -> ZipResult { + // Read and discard version byte + reader.read_exact(&mut [0u8])?; + + let crc32 = reader.read_u32_le()?; + let mut content = vec![0u8; len as usize - size_of::() - size_of::()].into_boxed_slice(); + reader.read_exact(&mut content)?; + Ok(Self { + crc32, + content + }) + } +} \ No newline at end of file diff --git a/src/read.rs b/src/read.rs index 94ed2366..6d9f441d 100644 --- a/src/read.rs +++ b/src/read.rs @@ -102,6 +102,7 @@ use crate::spec::{is_dir, path_to_string}; use crate::types::ffi::S_IFLNK; use crate::unstable::LittleEndianReadExt; pub use zip_archive::ZipArchive; +use crate::extra_fields::UnicodeExtraField; #[allow(clippy::large_enum_variant)] pub(crate) enum CryptoReader<'a> { @@ -1160,6 +1161,7 @@ fn central_header_to_zip_file_inner( version_made_by: version_made_by as u8, encrypted, using_data_descriptor, + is_utf8, compression_method: CompressionMethod::parse_from_u16(compression_method), compression_level: None, last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), @@ -1279,6 +1281,22 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { // the reader for ExtendedTimestamp consumes `len` bytes len_left = 0; } + 0x6375 => { + // Info-ZIP Unicode Comment Extra Field + // APPNOTE 4.6.8 and https://libzip.org/specifications/extrafld.txt + if !file.is_utf8 { + file.file_comment = String::from_utf8( + UnicodeExtraField::try_from_reader(&mut reader, len)?.unwrap_valid(file.file_comment.as_bytes())?.into_vec())?.into(); + } + } + 0x7075 => { + // Info-ZIP Unicode Path Extra Field + // APPNOTE 4.6.9 and https://libzip.org/specifications/extrafld.txt + if !file.is_utf8 { + file.file_name_raw = UnicodeExtraField::try_from_reader(&mut reader, len)?.unwrap_valid(&file.file_name_raw)?; + file.file_name = String::from_utf8(file.file_name_raw.clone().into_vec())?.into_boxed_str(); + } + } _ => { // Other fields are ignored } diff --git a/src/result.rs b/src/result.rs index 7bd5cad5..ec8fbb13 100644 --- a/src/result.rs +++ b/src/result.rs @@ -9,6 +9,7 @@ use std::error::Error; use std::fmt; use std::io; use std::num::TryFromIntError; +use std::string::FromUtf8Error; /// Generic result type with ZipError as its error variant pub type ZipResult = Result; @@ -68,6 +69,12 @@ impl From for ZipError { } } +impl From for ZipError { + fn from(_: FromUtf8Error) -> Self { + ZipError::InvalidArchive("Invalid UTF-8") + } +} + /// Error type for time parsing #[derive(Debug)] pub struct DateTimeRangeError; diff --git a/src/types.rs b/src/types.rs index f56b3c06..181592e6 100644 --- a/src/types.rs +++ b/src/types.rs @@ -415,6 +415,8 @@ pub struct ZipFileData { pub version_made_by: u8, /// True if the file is encrypted. pub encrypted: bool, + /// True if file_name and file_comment are UTF8 + pub is_utf8: bool, /// True if the file uses a data-descriptor section pub using_data_descriptor: bool, /// Compression method used to store the file @@ -612,6 +614,7 @@ impl ZipFileData { version_made_by: DEFAULT_VERSION, encrypted: options.encrypt_with.is_some(), using_data_descriptor: false, + is_utf8: !file_name.is_ascii(), compression_method, compression_level: options.compression_level, last_modified_time: Some(options.last_modified_time), @@ -695,6 +698,7 @@ impl ZipFileData { version_made_by: version_made_by as u8, encrypted, using_data_descriptor, + is_utf8, compression_method, compression_level: None, last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), @@ -1071,6 +1075,7 @@ mod test { version_made_by: 0, encrypted: false, using_data_descriptor: false, + is_utf8: true, compression_method: crate::compression::CompressionMethod::Stored, compression_level: None, last_modified_time: None,