Merge pull request from zip-rs/utf8_extra_fields

fix: Decode Zip-Info UTF8 name and comment fields ()
This commit is contained in:
Chris Hennick 2024-06-03 10:45:45 +00:00 committed by GitHub
commit 9cc845d7fa
Signed by: DevComp
GPG key ID: B5690EEEBB952194
12 changed files with 126 additions and 31 deletions

View file

@ -19,7 +19,7 @@ fn real_main() -> i32 {
for i in 0..archive.len() {
let mut file = archive.by_index(i).unwrap();
let outpath = match file.enclosed_name() {
Some(path) => path.to_owned(),
Some(path) => path,
None => continue,
};

Binary file not shown.

View file

@ -17,7 +17,11 @@ compression_method_bzip2="\x0C\x00"
compression_method_lzma="\x0E\x00"
compression_method_zstd="]\x00"
compression_method_aes="C\x00"
compression_method_unsupported="\xFF\x00"
extra_field_zip64="\x01\x00"
extra_field_aes="\x99\x01"
extra_field_extended_timestamp="\x55\x54"
extra_field_utf8_comment="\x75\x63"
extra_field_utf8_filename="\x75\x70"
"\xFF\xFF"
"/"
"/./"

View file

@ -17,8 +17,10 @@ impl ExtraFieldVersion for LocalHeaderVersion {}
impl ExtraFieldVersion for CentralHeaderVersion {}
mod extended_timestamp;
mod zipinfo_utf8;
pub use extended_timestamp::*;
pub use zipinfo_utf8::*;
/// contains one extra field
#[derive(Debug, Clone)]

View file

@ -0,0 +1,42 @@
use crate::result::{ZipError, ZipResult};
use crate::unstable::LittleEndianReadExt;
use core::mem::size_of;
use std::io::Read;
/// Info-ZIP Unicode Path Extra Field (0x7075) or Unicode Comment Extra Field (0x6375), as
/// specified in APPNOTE 4.6.8 and 4.6.9
#[derive(Clone, Debug)]
pub struct UnicodeExtraField {
crc32: u32,
content: Box<[u8]>,
}
impl UnicodeExtraField {
/// Verifies the checksum and returns the content.
pub fn unwrap_valid(self, ascii_field: &[u8]) -> ZipResult<Box<[u8]>> {
let mut crc32 = crc32fast::Hasher::new();
crc32.update(ascii_field);
let actual_crc32 = crc32.finalize();
if self.crc32 != actual_crc32 {
return Err(ZipError::InvalidArchive(
"CRC32 checksum failed on Unicode extra field",
));
}
Ok(self.content)
}
}
impl UnicodeExtraField {
pub(crate) fn try_from_reader<R: Read>(reader: &mut R, len: u16) -> ZipResult<Self> {
// Read and discard version byte
reader.read_exact(&mut [0u8])?;
let crc32 = reader.read_u32_le()?;
let content_len = (len as usize)
.checked_sub(size_of::<u8>() + size_of::<u32>())
.ok_or(ZipError::InvalidArchive("Unicode extra field is too small"))?;
let mut content = vec![0u8; content_len].into_boxed_slice();
reader.read_exact(&mut content)?;
Ok(Self { crc32, content })
}
}

View file

@ -8,7 +8,7 @@ use crate::crc32::Crc32Reader;
use crate::extra_fields::{ExtendedTimestamp, ExtraField};
use crate::read::zip_archive::Shared;
use crate::result::{ZipError, ZipResult};
use crate::spec::{self, Block};
use crate::spec::{self, FixedSizeBlock};
use crate::types::{
AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData,
ZipLocalEntryBlock,
@ -91,6 +91,7 @@ pub(crate) mod zip_archive {
#[cfg(feature = "aes-crypto")]
use crate::aes::PWD_VERIFY_LENGTH;
use crate::extra_fields::UnicodeExtraField;
#[cfg(feature = "lzma")]
use crate::read::lzma::LzmaDecoder;
use crate::result::ZipError::{InvalidPassword, UnsupportedArchive};
@ -1156,6 +1157,7 @@ fn central_header_to_zip_file_inner<R: Read>(
version_made_by: version_made_by as u8,
encrypted,
using_data_descriptor,
is_utf8,
compression_method: CompressionMethod::parse_from_u16(compression_method),
compression_level: None,
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
@ -1275,6 +1277,29 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
// the reader for ExtendedTimestamp consumes `len` bytes
len_left = 0;
}
0x6375 => {
// Info-ZIP Unicode Comment Extra Field
// APPNOTE 4.6.8 and https://libzip.org/specifications/extrafld.txt
if !file.is_utf8 {
file.file_comment = String::from_utf8(
UnicodeExtraField::try_from_reader(&mut reader, len)?
.unwrap_valid(file.file_comment.as_bytes())?
.into_vec(),
)?
.into();
}
}
0x7075 => {
// Info-ZIP Unicode Path Extra Field
// APPNOTE 4.6.9 and https://libzip.org/specifications/extrafld.txt
if !file.is_utf8 {
file.file_name_raw = UnicodeExtraField::try_from_reader(&mut reader, len)?
.unwrap_valid(&file.file_name_raw)?;
file.file_name =
String::from_utf8(file.file_name_raw.clone().into_vec())?.into_boxed_str();
file.is_utf8 = true;
}
}
_ => {
// Other fields are ignored
}
@ -1516,7 +1541,7 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult<Opt
_ => return Err(ZipError::InvalidArchive("Invalid local file header")),
}
let block = ZipLocalEntryBlock::interpret(block)?;
let block = ZipLocalEntryBlock::interpret(&block)?;
let mut result = ZipFileData::from_local_block(block, reader)?;
@ -1759,4 +1784,13 @@ mod test {
assert!(tempdir.path().join("bar").is_symlink());
Ok(())
}
#[test]
#[cfg(feature = "_deflate-any")]
fn test_utf8_extra_field() {
let mut v = Vec::new();
v.extend_from_slice(include_bytes!("../tests/data/chinese.zip"));
let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
reader.by_name("七个房间.txt").unwrap();
}
}

View file

@ -6,7 +6,7 @@ use super::{
central_header_to_zip_file_inner, read_zipfile_from_stream, ZipCentralEntryBlock, ZipError,
ZipFile, ZipFileData, ZipResult,
};
use crate::spec::Block;
use crate::spec::FixedSizeBlock;
/// Stream decoder for zip.
#[derive(Debug)]

View file

@ -9,6 +9,7 @@ use std::error::Error;
use std::fmt;
use std::io;
use std::num::TryFromIntError;
use std::string::FromUtf8Error;
/// Generic result type with ZipError as its error variant
pub type ZipResult<T> = Result<T, ZipError>;
@ -68,6 +69,12 @@ impl From<DateTimeRangeError> for ZipError {
}
}
impl From<FromUtf8Error> for ZipError {
fn from(_: FromUtf8Error) -> Self {
ZipError::InvalidArchive("Invalid UTF-8")
}
}
/// Error type for time parsing
#[derive(Debug)]
pub struct DateTimeRangeError;

View file

@ -97,35 +97,33 @@ impl ExtraFieldMagic {
pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64;
pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize;
pub(crate) trait Block: Sized + Copy {
pub(crate) trait FixedSizeBlock: Sized + Copy {
const MAGIC: Magic;
fn magic(self) -> Magic;
const ERROR: ZipError;
const WRONG_MAGIC_ERROR: ZipError;
/* TODO: use smallvec? */
fn interpret(bytes: Box<[u8]>) -> ZipResult<Self> {
let block = Self::deserialize(&bytes).from_le();
fn interpret(bytes: &[u8]) -> ZipResult<Self> {
if bytes.len() != mem::size_of::<Self>() {
return Err(ZipError::InvalidArchive("Block is wrong size"));
}
let block_ptr: *const Self = bytes.as_ptr().cast();
let block = unsafe { block_ptr.read() }.from_le();
if block.magic() != Self::MAGIC {
return Err(Self::ERROR);
return Err(Self::WRONG_MAGIC_ERROR);
}
Ok(block)
}
fn deserialize(block: &[u8]) -> Self {
assert_eq!(block.len(), mem::size_of::<Self>());
let block_ptr: *const Self = block.as_ptr().cast();
unsafe { block_ptr.read() }
}
#[allow(clippy::wrong_self_convention)]
fn from_le(self) -> Self;
fn parse<T: Read>(reader: &mut T) -> ZipResult<Self> {
let mut block = vec![0u8; mem::size_of::<Self>()].into_boxed_slice();
reader.read_exact(&mut block)?;
Self::interpret(block)
Self::interpret(&block)
}
fn encode(self) -> Box<[u8]> {
@ -212,7 +210,7 @@ pub(crate) struct Zip32CDEBlock {
pub zip_file_comment_length: u16,
}
impl Block for Zip32CDEBlock {
impl FixedSizeBlock for Zip32CDEBlock {
const MAGIC: Magic = Magic::CENTRAL_DIRECTORY_END_SIGNATURE;
#[inline(always)]
@ -220,7 +218,8 @@ impl Block for Zip32CDEBlock {
self.magic
}
const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header");
const WRONG_MAGIC_ERROR: ZipError =
ZipError::InvalidArchive("Invalid digital signature header");
to_and_from_le![
(magic, Magic),
@ -391,7 +390,7 @@ pub(crate) struct Zip64CDELocatorBlock {
pub number_of_disks: u32,
}
impl Block for Zip64CDELocatorBlock {
impl FixedSizeBlock for Zip64CDELocatorBlock {
const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE;
#[inline(always)]
@ -399,7 +398,7 @@ impl Block for Zip64CDELocatorBlock {
self.magic
}
const ERROR: ZipError =
const WRONG_MAGIC_ERROR: ZipError =
ZipError::InvalidArchive("Invalid zip64 locator digital signature header");
to_and_from_le![
@ -467,14 +466,15 @@ pub(crate) struct Zip64CDEBlock {
pub central_directory_offset: u64,
}
impl Block for Zip64CDEBlock {
impl FixedSizeBlock for Zip64CDEBlock {
const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE;
fn magic(self) -> Magic {
self.magic
}
const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header");
const WRONG_MAGIC_ERROR: ZipError =
ZipError::InvalidArchive("Invalid digital signature header");
to_and_from_le![
(magic, Magic),
@ -708,14 +708,14 @@ mod test {
pub file_name_length: u16,
}
impl Block for TestBlock {
impl FixedSizeBlock for TestBlock {
const MAGIC: Magic = Magic::literal(0x01111);
fn magic(self) -> Magic {
self.magic
}
const ERROR: ZipError = ZipError::InvalidArchive("unreachable");
const WRONG_MAGIC_ERROR: ZipError = ZipError::InvalidArchive("unreachable");
to_and_from_le![(magic, Magic), (file_name_length, u16)];
}

View file

@ -11,7 +11,7 @@ use std::sync::{Arc, OnceLock};
use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike};
use crate::result::{ZipError, ZipResult};
use crate::spec::{self, Block};
use crate::spec::{self, FixedSizeBlock};
pub(crate) mod ffi {
pub const S_IFDIR: u32 = 0o0040000;
@ -415,6 +415,8 @@ pub struct ZipFileData {
pub version_made_by: u8,
/// True if the file is encrypted.
pub encrypted: bool,
/// True if file_name and file_comment are UTF8
pub is_utf8: bool,
/// True if the file uses a data-descriptor section
pub using_data_descriptor: bool,
/// Compression method used to store the file
@ -612,6 +614,7 @@ impl ZipFileData {
version_made_by: DEFAULT_VERSION,
encrypted: options.encrypt_with.is_some(),
using_data_descriptor: false,
is_utf8: !file_name.is_ascii(),
compression_method,
compression_level: options.compression_level,
last_modified_time: Some(options.last_modified_time),
@ -695,6 +698,7 @@ impl ZipFileData {
version_made_by: version_made_by as u8,
encrypted,
using_data_descriptor,
is_utf8,
compression_method,
compression_level: None,
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
@ -887,7 +891,7 @@ pub(crate) struct ZipCentralEntryBlock {
pub offset: u32,
}
impl Block for ZipCentralEntryBlock {
impl FixedSizeBlock for ZipCentralEntryBlock {
const MAGIC: spec::Magic = spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE;
#[inline(always)]
@ -895,7 +899,8 @@ impl Block for ZipCentralEntryBlock {
self.magic
}
const ERROR: ZipError = ZipError::InvalidArchive("Invalid Central Directory header");
const WRONG_MAGIC_ERROR: ZipError =
ZipError::InvalidArchive("Invalid Central Directory header");
to_and_from_le![
(magic, spec::Magic),
@ -934,7 +939,7 @@ pub(crate) struct ZipLocalEntryBlock {
pub extra_field_length: u16,
}
impl Block for ZipLocalEntryBlock {
impl FixedSizeBlock for ZipLocalEntryBlock {
const MAGIC: spec::Magic = spec::Magic::LOCAL_FILE_HEADER_SIGNATURE;
#[inline(always)]
@ -942,7 +947,7 @@ impl Block for ZipLocalEntryBlock {
self.magic
}
const ERROR: ZipError = ZipError::InvalidArchive("Invalid local file header");
const WRONG_MAGIC_ERROR: ZipError = ZipError::InvalidArchive("Invalid local file header");
to_and_from_le![
(magic, spec::Magic),
@ -1071,6 +1076,7 @@ mod test {
version_made_by: 0,
encrypted: false,
using_data_descriptor: false,
is_utf8: true,
compression_method: crate::compression::CompressionMethod::Stored,
compression_level: None,
last_modified_time: None,

View file

@ -5,7 +5,7 @@ use crate::aes::AesWriter;
use crate::compression::CompressionMethod;
use crate::read::{find_content, Config, ZipArchive, ZipFile, ZipFileReader};
use crate::result::{ZipError, ZipResult};
use crate::spec::{self, Block};
use crate::spec::{self, FixedSizeBlock};
#[cfg(feature = "aes-crypto")]
use crate::types::AesMode;
use crate::types::{

BIN
tests/data/chinese.zip Normal file

Binary file not shown.