fix: Decode Zip-Info UTF8 name and comment fields (#159)

This commit is contained in:
Chris Hennick 2024-06-02 11:52:20 -07:00
parent c74a811680
commit e3c81023a7
No known key found for this signature in database
GPG key ID: DA47AABA4961C509
6 changed files with 73 additions and 1 deletions

View file

@ -19,7 +19,7 @@ fn real_main() -> i32 {
for i in 0..archive.len() { for i in 0..archive.len() {
let mut file = archive.by_index(i).unwrap(); let mut file = archive.by_index(i).unwrap();
let outpath = match file.enclosed_name() { let outpath = match file.enclosed_name() {
Some(path) => path.to_owned(), Some(path) => path,
None => continue, None => continue,
}; };

View file

@ -17,8 +17,10 @@ impl ExtraFieldVersion for LocalHeaderVersion {}
impl ExtraFieldVersion for CentralHeaderVersion {} impl ExtraFieldVersion for CentralHeaderVersion {}
mod extended_timestamp; mod extended_timestamp;
mod zipinfo_utf8;
pub use extended_timestamp::*; pub use extended_timestamp::*;
pub use zipinfo_utf8::*;
/// contains one extra field /// contains one extra field
#[derive(Debug, Clone)] #[derive(Debug, Clone)]

View file

@ -0,0 +1,40 @@
use core::mem::size_of;
use std::io::Read;
use crate::result::{ZipError, ZipResult};
use crate::unstable::LittleEndianReadExt;
/// Info-ZIP Unicode Path Extra Field (0x7075) or Unicode Comment Extra Field (0x6375), as
/// specified in APPNOTE 4.6.8 and 4.6.9
#[derive(Clone, Debug)]
pub struct UnicodeExtraField {
crc32: u32,
content: Box<[u8]>
}
impl<'a> UnicodeExtraField {
/// Verifies the checksum and returns the content.
pub fn unwrap_valid(self, ascii_field: &[u8]) -> ZipResult<Box<[u8]>> {
let mut crc32 = crc32fast::Hasher::new();
crc32.update(ascii_field);
let actual_crc32 = crc32.finalize();
if self.crc32 != actual_crc32 {
return Err(ZipError::InvalidArchive("CRC32 checksum failed on Unicode extra field"));
}
Ok(self.content)
}
}
impl UnicodeExtraField {
pub(crate) fn try_from_reader<R: Read>(reader: &mut R, len: u16) -> ZipResult<Self> {
// Read and discard version byte
reader.read_exact(&mut [0u8])?;
let crc32 = reader.read_u32_le()?;
let mut content = vec![0u8; len as usize - size_of::<u8>() - size_of::<u32>()].into_boxed_slice();
reader.read_exact(&mut content)?;
Ok(Self {
crc32,
content
})
}
}

View file

@ -102,6 +102,7 @@ use crate::spec::{is_dir, path_to_string};
use crate::types::ffi::S_IFLNK; use crate::types::ffi::S_IFLNK;
use crate::unstable::LittleEndianReadExt; use crate::unstable::LittleEndianReadExt;
pub use zip_archive::ZipArchive; pub use zip_archive::ZipArchive;
use crate::extra_fields::UnicodeExtraField;
#[allow(clippy::large_enum_variant)] #[allow(clippy::large_enum_variant)]
pub(crate) enum CryptoReader<'a> { pub(crate) enum CryptoReader<'a> {
@ -1160,6 +1161,7 @@ fn central_header_to_zip_file_inner<R: Read>(
version_made_by: version_made_by as u8, version_made_by: version_made_by as u8,
encrypted, encrypted,
using_data_descriptor, using_data_descriptor,
is_utf8,
compression_method: CompressionMethod::parse_from_u16(compression_method), compression_method: CompressionMethod::parse_from_u16(compression_method),
compression_level: None, compression_level: None,
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
@ -1279,6 +1281,22 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
// the reader for ExtendedTimestamp consumes `len` bytes // the reader for ExtendedTimestamp consumes `len` bytes
len_left = 0; len_left = 0;
} }
0x6375 => {
// Info-ZIP Unicode Comment Extra Field
// APPNOTE 4.6.8 and https://libzip.org/specifications/extrafld.txt
if !file.is_utf8 {
file.file_comment = String::from_utf8(
UnicodeExtraField::try_from_reader(&mut reader, len)?.unwrap_valid(file.file_comment.as_bytes())?.into_vec())?.into();
}
}
0x7075 => {
// Info-ZIP Unicode Path Extra Field
// APPNOTE 4.6.9 and https://libzip.org/specifications/extrafld.txt
if !file.is_utf8 {
file.file_name_raw = UnicodeExtraField::try_from_reader(&mut reader, len)?.unwrap_valid(&file.file_name_raw)?;
file.file_name = String::from_utf8(file.file_name_raw.clone().into_vec())?.into_boxed_str();
}
}
_ => { _ => {
// Other fields are ignored // Other fields are ignored
} }

View file

@ -9,6 +9,7 @@ use std::error::Error;
use std::fmt; use std::fmt;
use std::io; use std::io;
use std::num::TryFromIntError; use std::num::TryFromIntError;
use std::string::FromUtf8Error;
/// Generic result type with ZipError as its error variant /// Generic result type with ZipError as its error variant
pub type ZipResult<T> = Result<T, ZipError>; pub type ZipResult<T> = Result<T, ZipError>;
@ -68,6 +69,12 @@ impl From<DateTimeRangeError> for ZipError {
} }
} }
impl From<FromUtf8Error> for ZipError {
fn from(_: FromUtf8Error) -> Self {
ZipError::InvalidArchive("Invalid UTF-8")
}
}
/// Error type for time parsing /// Error type for time parsing
#[derive(Debug)] #[derive(Debug)]
pub struct DateTimeRangeError; pub struct DateTimeRangeError;

View file

@ -415,6 +415,8 @@ pub struct ZipFileData {
pub version_made_by: u8, pub version_made_by: u8,
/// True if the file is encrypted. /// True if the file is encrypted.
pub encrypted: bool, pub encrypted: bool,
/// True if file_name and file_comment are UTF8
pub is_utf8: bool,
/// True if the file uses a data-descriptor section /// True if the file uses a data-descriptor section
pub using_data_descriptor: bool, pub using_data_descriptor: bool,
/// Compression method used to store the file /// Compression method used to store the file
@ -612,6 +614,7 @@ impl ZipFileData {
version_made_by: DEFAULT_VERSION, version_made_by: DEFAULT_VERSION,
encrypted: options.encrypt_with.is_some(), encrypted: options.encrypt_with.is_some(),
using_data_descriptor: false, using_data_descriptor: false,
is_utf8: !file_name.is_ascii(),
compression_method, compression_method,
compression_level: options.compression_level, compression_level: options.compression_level,
last_modified_time: Some(options.last_modified_time), last_modified_time: Some(options.last_modified_time),
@ -695,6 +698,7 @@ impl ZipFileData {
version_made_by: version_made_by as u8, version_made_by: version_made_by as u8,
encrypted, encrypted,
using_data_descriptor, using_data_descriptor,
is_utf8,
compression_method, compression_method,
compression_level: None, compression_level: None,
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
@ -1071,6 +1075,7 @@ mod test {
version_made_by: 0, version_made_by: 0,
encrypted: false, encrypted: false,
using_data_descriptor: false, using_data_descriptor: false,
is_utf8: true,
compression_method: crate::compression::CompressionMethod::Stored, compression_method: crate::compression::CompressionMethod::Stored,
compression_level: None, compression_level: None,
last_modified_time: None, last_modified_time: None,