Merge pull request #182 from zip-rs/utf8_extra_fields
fix: Decode Zip-Info UTF8 name and comment fields (#159)
This commit is contained in:
commit
9cc845d7fa
12 changed files with 126 additions and 31 deletions
examples
fuzz
src
tests/data
|
@ -19,7 +19,7 @@ fn real_main() -> i32 {
|
|||
for i in 0..archive.len() {
|
||||
let mut file = archive.by_index(i).unwrap();
|
||||
let outpath = match file.enclosed_name() {
|
||||
Some(path) => path.to_owned(),
|
||||
Some(path) => path,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
|
|
BIN
fuzz/corpus/fuzz_read/chinese.zip
Normal file
BIN
fuzz/corpus/fuzz_read/chinese.zip
Normal file
Binary file not shown.
|
@ -17,7 +17,11 @@ compression_method_bzip2="\x0C\x00"
|
|||
compression_method_lzma="\x0E\x00"
|
||||
compression_method_zstd="]\x00"
|
||||
compression_method_aes="C\x00"
|
||||
compression_method_unsupported="\xFF\x00"
|
||||
extra_field_zip64="\x01\x00"
|
||||
extra_field_aes="\x99\x01"
|
||||
extra_field_extended_timestamp="\x55\x54"
|
||||
extra_field_utf8_comment="\x75\x63"
|
||||
extra_field_utf8_filename="\x75\x70"
|
||||
"\xFF\xFF"
|
||||
"/"
|
||||
"/./"
|
||||
|
|
|
@ -17,8 +17,10 @@ impl ExtraFieldVersion for LocalHeaderVersion {}
|
|||
impl ExtraFieldVersion for CentralHeaderVersion {}
|
||||
|
||||
mod extended_timestamp;
|
||||
mod zipinfo_utf8;
|
||||
|
||||
pub use extended_timestamp::*;
|
||||
pub use zipinfo_utf8::*;
|
||||
|
||||
/// contains one extra field
|
||||
#[derive(Debug, Clone)]
|
||||
|
|
42
src/extra_fields/zipinfo_utf8.rs
Normal file
42
src/extra_fields/zipinfo_utf8.rs
Normal file
|
@ -0,0 +1,42 @@
|
|||
use crate::result::{ZipError, ZipResult};
|
||||
use crate::unstable::LittleEndianReadExt;
|
||||
use core::mem::size_of;
|
||||
use std::io::Read;
|
||||
|
||||
/// Info-ZIP Unicode Path Extra Field (0x7075) or Unicode Comment Extra Field (0x6375), as
|
||||
/// specified in APPNOTE 4.6.8 and 4.6.9
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct UnicodeExtraField {
|
||||
crc32: u32,
|
||||
content: Box<[u8]>,
|
||||
}
|
||||
|
||||
impl UnicodeExtraField {
|
||||
/// Verifies the checksum and returns the content.
|
||||
pub fn unwrap_valid(self, ascii_field: &[u8]) -> ZipResult<Box<[u8]>> {
|
||||
let mut crc32 = crc32fast::Hasher::new();
|
||||
crc32.update(ascii_field);
|
||||
let actual_crc32 = crc32.finalize();
|
||||
if self.crc32 != actual_crc32 {
|
||||
return Err(ZipError::InvalidArchive(
|
||||
"CRC32 checksum failed on Unicode extra field",
|
||||
));
|
||||
}
|
||||
Ok(self.content)
|
||||
}
|
||||
}
|
||||
|
||||
impl UnicodeExtraField {
|
||||
pub(crate) fn try_from_reader<R: Read>(reader: &mut R, len: u16) -> ZipResult<Self> {
|
||||
// Read and discard version byte
|
||||
reader.read_exact(&mut [0u8])?;
|
||||
|
||||
let crc32 = reader.read_u32_le()?;
|
||||
let content_len = (len as usize)
|
||||
.checked_sub(size_of::<u8>() + size_of::<u32>())
|
||||
.ok_or(ZipError::InvalidArchive("Unicode extra field is too small"))?;
|
||||
let mut content = vec![0u8; content_len].into_boxed_slice();
|
||||
reader.read_exact(&mut content)?;
|
||||
Ok(Self { crc32, content })
|
||||
}
|
||||
}
|
38
src/read.rs
38
src/read.rs
|
@ -8,7 +8,7 @@ use crate::crc32::Crc32Reader;
|
|||
use crate::extra_fields::{ExtendedTimestamp, ExtraField};
|
||||
use crate::read::zip_archive::Shared;
|
||||
use crate::result::{ZipError, ZipResult};
|
||||
use crate::spec::{self, Block};
|
||||
use crate::spec::{self, FixedSizeBlock};
|
||||
use crate::types::{
|
||||
AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData,
|
||||
ZipLocalEntryBlock,
|
||||
|
@ -91,6 +91,7 @@ pub(crate) mod zip_archive {
|
|||
|
||||
#[cfg(feature = "aes-crypto")]
|
||||
use crate::aes::PWD_VERIFY_LENGTH;
|
||||
use crate::extra_fields::UnicodeExtraField;
|
||||
#[cfg(feature = "lzma")]
|
||||
use crate::read::lzma::LzmaDecoder;
|
||||
use crate::result::ZipError::{InvalidPassword, UnsupportedArchive};
|
||||
|
@ -1156,6 +1157,7 @@ fn central_header_to_zip_file_inner<R: Read>(
|
|||
version_made_by: version_made_by as u8,
|
||||
encrypted,
|
||||
using_data_descriptor,
|
||||
is_utf8,
|
||||
compression_method: CompressionMethod::parse_from_u16(compression_method),
|
||||
compression_level: None,
|
||||
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
|
||||
|
@ -1275,6 +1277,29 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
|
|||
// the reader for ExtendedTimestamp consumes `len` bytes
|
||||
len_left = 0;
|
||||
}
|
||||
0x6375 => {
|
||||
// Info-ZIP Unicode Comment Extra Field
|
||||
// APPNOTE 4.6.8 and https://libzip.org/specifications/extrafld.txt
|
||||
if !file.is_utf8 {
|
||||
file.file_comment = String::from_utf8(
|
||||
UnicodeExtraField::try_from_reader(&mut reader, len)?
|
||||
.unwrap_valid(file.file_comment.as_bytes())?
|
||||
.into_vec(),
|
||||
)?
|
||||
.into();
|
||||
}
|
||||
}
|
||||
0x7075 => {
|
||||
// Info-ZIP Unicode Path Extra Field
|
||||
// APPNOTE 4.6.9 and https://libzip.org/specifications/extrafld.txt
|
||||
if !file.is_utf8 {
|
||||
file.file_name_raw = UnicodeExtraField::try_from_reader(&mut reader, len)?
|
||||
.unwrap_valid(&file.file_name_raw)?;
|
||||
file.file_name =
|
||||
String::from_utf8(file.file_name_raw.clone().into_vec())?.into_boxed_str();
|
||||
file.is_utf8 = true;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Other fields are ignored
|
||||
}
|
||||
|
@ -1516,7 +1541,7 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult<Opt
|
|||
_ => return Err(ZipError::InvalidArchive("Invalid local file header")),
|
||||
}
|
||||
|
||||
let block = ZipLocalEntryBlock::interpret(block)?;
|
||||
let block = ZipLocalEntryBlock::interpret(&block)?;
|
||||
|
||||
let mut result = ZipFileData::from_local_block(block, reader)?;
|
||||
|
||||
|
@ -1759,4 +1784,13 @@ mod test {
|
|||
assert!(tempdir.path().join("bar").is_symlink());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "_deflate-any")]
|
||||
fn test_utf8_extra_field() {
|
||||
let mut v = Vec::new();
|
||||
v.extend_from_slice(include_bytes!("../tests/data/chinese.zip"));
|
||||
let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
|
||||
reader.by_name("七个房间.txt").unwrap();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,7 @@ use super::{
|
|||
central_header_to_zip_file_inner, read_zipfile_from_stream, ZipCentralEntryBlock, ZipError,
|
||||
ZipFile, ZipFileData, ZipResult,
|
||||
};
|
||||
use crate::spec::Block;
|
||||
use crate::spec::FixedSizeBlock;
|
||||
|
||||
/// Stream decoder for zip.
|
||||
#[derive(Debug)]
|
||||
|
|
|
@ -9,6 +9,7 @@ use std::error::Error;
|
|||
use std::fmt;
|
||||
use std::io;
|
||||
use std::num::TryFromIntError;
|
||||
use std::string::FromUtf8Error;
|
||||
|
||||
/// Generic result type with ZipError as its error variant
|
||||
pub type ZipResult<T> = Result<T, ZipError>;
|
||||
|
@ -68,6 +69,12 @@ impl From<DateTimeRangeError> for ZipError {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<FromUtf8Error> for ZipError {
|
||||
fn from(_: FromUtf8Error) -> Self {
|
||||
ZipError::InvalidArchive("Invalid UTF-8")
|
||||
}
|
||||
}
|
||||
|
||||
/// Error type for time parsing
|
||||
#[derive(Debug)]
|
||||
pub struct DateTimeRangeError;
|
||||
|
|
40
src/spec.rs
40
src/spec.rs
|
@ -97,35 +97,33 @@ impl ExtraFieldMagic {
|
|||
pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64;
|
||||
pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize;
|
||||
|
||||
pub(crate) trait Block: Sized + Copy {
|
||||
pub(crate) trait FixedSizeBlock: Sized + Copy {
|
||||
const MAGIC: Magic;
|
||||
|
||||
fn magic(self) -> Magic;
|
||||
|
||||
const ERROR: ZipError;
|
||||
const WRONG_MAGIC_ERROR: ZipError;
|
||||
|
||||
/* TODO: use smallvec? */
|
||||
fn interpret(bytes: Box<[u8]>) -> ZipResult<Self> {
|
||||
let block = Self::deserialize(&bytes).from_le();
|
||||
fn interpret(bytes: &[u8]) -> ZipResult<Self> {
|
||||
if bytes.len() != mem::size_of::<Self>() {
|
||||
return Err(ZipError::InvalidArchive("Block is wrong size"));
|
||||
}
|
||||
let block_ptr: *const Self = bytes.as_ptr().cast();
|
||||
let block = unsafe { block_ptr.read() }.from_le();
|
||||
if block.magic() != Self::MAGIC {
|
||||
return Err(Self::ERROR);
|
||||
return Err(Self::WRONG_MAGIC_ERROR);
|
||||
}
|
||||
Ok(block)
|
||||
}
|
||||
|
||||
fn deserialize(block: &[u8]) -> Self {
|
||||
assert_eq!(block.len(), mem::size_of::<Self>());
|
||||
let block_ptr: *const Self = block.as_ptr().cast();
|
||||
unsafe { block_ptr.read() }
|
||||
}
|
||||
|
||||
#[allow(clippy::wrong_self_convention)]
|
||||
fn from_le(self) -> Self;
|
||||
|
||||
fn parse<T: Read>(reader: &mut T) -> ZipResult<Self> {
|
||||
let mut block = vec![0u8; mem::size_of::<Self>()].into_boxed_slice();
|
||||
reader.read_exact(&mut block)?;
|
||||
Self::interpret(block)
|
||||
Self::interpret(&block)
|
||||
}
|
||||
|
||||
fn encode(self) -> Box<[u8]> {
|
||||
|
@ -212,7 +210,7 @@ pub(crate) struct Zip32CDEBlock {
|
|||
pub zip_file_comment_length: u16,
|
||||
}
|
||||
|
||||
impl Block for Zip32CDEBlock {
|
||||
impl FixedSizeBlock for Zip32CDEBlock {
|
||||
const MAGIC: Magic = Magic::CENTRAL_DIRECTORY_END_SIGNATURE;
|
||||
|
||||
#[inline(always)]
|
||||
|
@ -220,7 +218,8 @@ impl Block for Zip32CDEBlock {
|
|||
self.magic
|
||||
}
|
||||
|
||||
const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header");
|
||||
const WRONG_MAGIC_ERROR: ZipError =
|
||||
ZipError::InvalidArchive("Invalid digital signature header");
|
||||
|
||||
to_and_from_le![
|
||||
(magic, Magic),
|
||||
|
@ -391,7 +390,7 @@ pub(crate) struct Zip64CDELocatorBlock {
|
|||
pub number_of_disks: u32,
|
||||
}
|
||||
|
||||
impl Block for Zip64CDELocatorBlock {
|
||||
impl FixedSizeBlock for Zip64CDELocatorBlock {
|
||||
const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE;
|
||||
|
||||
#[inline(always)]
|
||||
|
@ -399,7 +398,7 @@ impl Block for Zip64CDELocatorBlock {
|
|||
self.magic
|
||||
}
|
||||
|
||||
const ERROR: ZipError =
|
||||
const WRONG_MAGIC_ERROR: ZipError =
|
||||
ZipError::InvalidArchive("Invalid zip64 locator digital signature header");
|
||||
|
||||
to_and_from_le![
|
||||
|
@ -467,14 +466,15 @@ pub(crate) struct Zip64CDEBlock {
|
|||
pub central_directory_offset: u64,
|
||||
}
|
||||
|
||||
impl Block for Zip64CDEBlock {
|
||||
impl FixedSizeBlock for Zip64CDEBlock {
|
||||
const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE;
|
||||
|
||||
fn magic(self) -> Magic {
|
||||
self.magic
|
||||
}
|
||||
|
||||
const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header");
|
||||
const WRONG_MAGIC_ERROR: ZipError =
|
||||
ZipError::InvalidArchive("Invalid digital signature header");
|
||||
|
||||
to_and_from_le![
|
||||
(magic, Magic),
|
||||
|
@ -708,14 +708,14 @@ mod test {
|
|||
pub file_name_length: u16,
|
||||
}
|
||||
|
||||
impl Block for TestBlock {
|
||||
impl FixedSizeBlock for TestBlock {
|
||||
const MAGIC: Magic = Magic::literal(0x01111);
|
||||
|
||||
fn magic(self) -> Magic {
|
||||
self.magic
|
||||
}
|
||||
|
||||
const ERROR: ZipError = ZipError::InvalidArchive("unreachable");
|
||||
const WRONG_MAGIC_ERROR: ZipError = ZipError::InvalidArchive("unreachable");
|
||||
|
||||
to_and_from_le![(magic, Magic), (file_name_length, u16)];
|
||||
}
|
||||
|
|
16
src/types.rs
16
src/types.rs
|
@ -11,7 +11,7 @@ use std::sync::{Arc, OnceLock};
|
|||
use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike};
|
||||
|
||||
use crate::result::{ZipError, ZipResult};
|
||||
use crate::spec::{self, Block};
|
||||
use crate::spec::{self, FixedSizeBlock};
|
||||
|
||||
pub(crate) mod ffi {
|
||||
pub const S_IFDIR: u32 = 0o0040000;
|
||||
|
@ -415,6 +415,8 @@ pub struct ZipFileData {
|
|||
pub version_made_by: u8,
|
||||
/// True if the file is encrypted.
|
||||
pub encrypted: bool,
|
||||
/// True if file_name and file_comment are UTF8
|
||||
pub is_utf8: bool,
|
||||
/// True if the file uses a data-descriptor section
|
||||
pub using_data_descriptor: bool,
|
||||
/// Compression method used to store the file
|
||||
|
@ -612,6 +614,7 @@ impl ZipFileData {
|
|||
version_made_by: DEFAULT_VERSION,
|
||||
encrypted: options.encrypt_with.is_some(),
|
||||
using_data_descriptor: false,
|
||||
is_utf8: !file_name.is_ascii(),
|
||||
compression_method,
|
||||
compression_level: options.compression_level,
|
||||
last_modified_time: Some(options.last_modified_time),
|
||||
|
@ -695,6 +698,7 @@ impl ZipFileData {
|
|||
version_made_by: version_made_by as u8,
|
||||
encrypted,
|
||||
using_data_descriptor,
|
||||
is_utf8,
|
||||
compression_method,
|
||||
compression_level: None,
|
||||
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
|
||||
|
@ -887,7 +891,7 @@ pub(crate) struct ZipCentralEntryBlock {
|
|||
pub offset: u32,
|
||||
}
|
||||
|
||||
impl Block for ZipCentralEntryBlock {
|
||||
impl FixedSizeBlock for ZipCentralEntryBlock {
|
||||
const MAGIC: spec::Magic = spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE;
|
||||
|
||||
#[inline(always)]
|
||||
|
@ -895,7 +899,8 @@ impl Block for ZipCentralEntryBlock {
|
|||
self.magic
|
||||
}
|
||||
|
||||
const ERROR: ZipError = ZipError::InvalidArchive("Invalid Central Directory header");
|
||||
const WRONG_MAGIC_ERROR: ZipError =
|
||||
ZipError::InvalidArchive("Invalid Central Directory header");
|
||||
|
||||
to_and_from_le![
|
||||
(magic, spec::Magic),
|
||||
|
@ -934,7 +939,7 @@ pub(crate) struct ZipLocalEntryBlock {
|
|||
pub extra_field_length: u16,
|
||||
}
|
||||
|
||||
impl Block for ZipLocalEntryBlock {
|
||||
impl FixedSizeBlock for ZipLocalEntryBlock {
|
||||
const MAGIC: spec::Magic = spec::Magic::LOCAL_FILE_HEADER_SIGNATURE;
|
||||
|
||||
#[inline(always)]
|
||||
|
@ -942,7 +947,7 @@ impl Block for ZipLocalEntryBlock {
|
|||
self.magic
|
||||
}
|
||||
|
||||
const ERROR: ZipError = ZipError::InvalidArchive("Invalid local file header");
|
||||
const WRONG_MAGIC_ERROR: ZipError = ZipError::InvalidArchive("Invalid local file header");
|
||||
|
||||
to_and_from_le![
|
||||
(magic, spec::Magic),
|
||||
|
@ -1071,6 +1076,7 @@ mod test {
|
|||
version_made_by: 0,
|
||||
encrypted: false,
|
||||
using_data_descriptor: false,
|
||||
is_utf8: true,
|
||||
compression_method: crate::compression::CompressionMethod::Stored,
|
||||
compression_level: None,
|
||||
last_modified_time: None,
|
||||
|
|
|
@ -5,7 +5,7 @@ use crate::aes::AesWriter;
|
|||
use crate::compression::CompressionMethod;
|
||||
use crate::read::{find_content, Config, ZipArchive, ZipFile, ZipFileReader};
|
||||
use crate::result::{ZipError, ZipResult};
|
||||
use crate::spec::{self, Block};
|
||||
use crate::spec::{self, FixedSizeBlock};
|
||||
#[cfg(feature = "aes-crypto")]
|
||||
use crate::types::AesMode;
|
||||
use crate::types::{
|
||||
|
|
BIN
tests/data/chinese.zip
Normal file
BIN
tests/data/chinese.zip
Normal file
Binary file not shown.
Loading…
Add table
Reference in a new issue