Merge pull request #199 from yujincheng08/xz

feat: Support XZ decompression
This commit is contained in:
Chris Hennick 2024-07-05 15:52:38 +00:00 committed by GitHub
commit d45bdccc6a
Signed by: DevComp
GPG key ID: B5690EEEBB952194
10 changed files with 330 additions and 2 deletions

View file

@ -78,6 +78,7 @@ deflate-zlib-ng = ["flate2/zlib-ng", "deflate-flate2"]
deflate-zopfli = ["zopfli", "_deflate-any"]
lzma = ["lzma-rs/stream"]
unreserved = []
xz = ["lzma-rs/raw_decoder"]
default = [
"aes-crypto",
"bzip2",
@ -86,6 +87,7 @@ default = [
"lzma",
"time",
"zstd",
"xz",
]
[[bench]]

View file

@ -15,8 +15,11 @@ compression_method_deflate="\x07\x00"
compression_method_deflate64="\x09\x00"
compression_method_bzip2="\x0C\x00"
compression_method_lzma="\x0E\x00"
compression_method_xz="\x5F\x00"
compression_method_zstd="]\x00"
compression_method_aes="C\x00"
xz_header_magic="\xFD7zXZ\x00"
xz_footer_magic="YZ"
extra_field_zip64="\x01\x00"
extra_field_aes="\x99\x01"
extra_field_extended_timestamp="\x55\x54"
@ -25,4 +28,4 @@ extra_field_utf8_filename="\x75\x70"
"\xFF\xFF"
"/"
"/./"
"/../"
"/../"

View file

@ -38,6 +38,9 @@ pub enum CompressionMethod {
/// Compress the file using LZMA
#[cfg(feature = "lzma")]
Lzma,
/// Compress the file using XZ
#[cfg(feature = "xz")]
Xz,
/// Unsupported compression method
#[cfg_attr(
not(fuzzing),
@ -80,6 +83,9 @@ impl CompressionMethod {
#[cfg(not(feature = "zstd"))]
pub const ZSTD: Self = CompressionMethod::Unsupported(93);
pub const MP3: Self = CompressionMethod::Unsupported(94);
#[cfg(feature = "xz")]
pub const XZ: Self = CompressionMethod::Xz;
#[cfg(not(feature = "xz"))]
pub const XZ: Self = CompressionMethod::Unsupported(95);
pub const JPEG: Self = CompressionMethod::Unsupported(96);
pub const WAVPACK: Self = CompressionMethod::Unsupported(97);
@ -101,6 +107,8 @@ impl CompressionMethod {
12 => CompressionMethod::Bzip2,
#[cfg(feature = "lzma")]
14 => CompressionMethod::Lzma,
#[cfg(feature = "xz")]
95 => CompressionMethod::Xz,
#[cfg(feature = "zstd")]
93 => CompressionMethod::Zstd,
#[cfg(feature = "aes-crypto")]
@ -134,6 +142,8 @@ impl CompressionMethod {
CompressionMethod::Zstd => 93,
#[cfg(feature = "lzma")]
CompressionMethod::Lzma => 14,
#[cfg(feature = "xz")]
CompressionMethod::Xz => 95,
#[allow(deprecated)]
CompressionMethod::Unsupported(v) => v,
}

View file

@ -21,6 +21,7 @@
//! | Deflate64 | ✅ | |
//! | Bzip2 | ✅ | ✅ |
//! | LZMA | ✅ | |
//! | XZ | ✅ | |
//! | AES encryption | ✅ | ✅ |
//! | ZipCrypto deprecated encryption | ✅ | ✅ |
//!

View file

@ -48,6 +48,9 @@ pub(crate) mod stream;
#[cfg(feature = "lzma")]
pub(crate) mod lzma;
#[cfg(feature = "xz")]
pub(crate) mod xz;
// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
pub(crate) mod zip_archive {
use indexmap::IndexMap;
@ -122,6 +125,8 @@ use crate::aes::PWD_VERIFY_LENGTH;
use crate::extra_fields::UnicodeExtraField;
#[cfg(feature = "lzma")]
use crate::read::lzma::LzmaDecoder;
#[cfg(feature = "xz")]
use crate::read::xz::XzDecoder;
use crate::result::ZipError::{InvalidArchive, InvalidPassword, UnsupportedArchive};
use crate::spec::is_dir;
use crate::types::ffi::S_IFLNK;
@ -190,6 +195,8 @@ pub(crate) enum ZipFileReader<'a> {
Zstd(Crc32Reader<ZstdDecoder<'a, io::BufReader<CryptoReader<'a>>>>),
#[cfg(feature = "lzma")]
Lzma(Crc32Reader<Box<LzmaDecoder<CryptoReader<'a>>>>),
#[cfg(feature = "xz")]
Xz(Crc32Reader<XzDecoder<CryptoReader<'a>>>),
}
impl<'a> Read for ZipFileReader<'a> {
@ -208,6 +215,8 @@ impl<'a> Read for ZipFileReader<'a> {
ZipFileReader::Zstd(r) => r.read(buf),
#[cfg(feature = "lzma")]
ZipFileReader::Lzma(r) => r.read(buf),
#[cfg(feature = "xz")]
ZipFileReader::Xz(r) => r.read(buf),
}
}
}
@ -236,6 +245,8 @@ impl<'a> ZipFileReader<'a> {
}
return;
}
#[cfg(feature = "xz")]
ZipFileReader::Xz(r) => r.into_inner().into_inner().into_inner(),
};
let _ = copy(&mut inner, &mut sink());
}
@ -396,6 +407,15 @@ pub(crate) fn make_reader(
ae2_encrypted,
)))
}
#[cfg(feature = "xz")]
CompressionMethod::Xz => {
let reader = XzDecoder::new(reader);
Ok(ZipFileReader::Xz(Crc32Reader::new(
reader,
crc32,
ae2_encrypted,
)))
}
_ => Err(UnsupportedArchive("Compression method not supported")),
}
}

267
src/read/xz.rs Normal file
View file

@ -0,0 +1,267 @@
use crc32fast::Hasher;
use lzma_rs::decompress::raw::Lzma2Decoder;
use std::{
collections::VecDeque,
io::{BufRead, BufReader, Error, Read, Result, Write},
};
#[derive(Debug)]
pub struct XzDecoder<R> {
compressed_reader: BufReader<R>,
stream_size: usize,
buf: VecDeque<u8>,
check_size: usize,
records: Vec<(usize, usize)>,
flags: [u8; 2],
}
impl<R: Read> XzDecoder<R> {
pub fn new(inner: R) -> Self {
XzDecoder {
compressed_reader: BufReader::new(inner),
stream_size: 0,
buf: VecDeque::new(),
check_size: 0,
records: vec![],
flags: [0, 0],
}
}
}
struct CountReader<'a, R: BufRead> {
inner: &'a mut R,
count: &'a mut usize,
}
impl<R: BufRead> Read for CountReader<'_, R> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
let count = self.inner.read(buf)?;
*self.count += count;
Ok(count)
}
}
impl<R: BufRead> BufRead for CountReader<'_, R> {
fn fill_buf(&mut self) -> Result<&[u8]> {
self.inner.fill_buf()
}
fn consume(&mut self, amt: usize) {
self.inner.consume(amt);
*self.count += amt;
}
}
struct BufWriter<'a> {
inner: &'a mut [u8],
written: &'a mut usize,
total: &'a mut usize,
rest: &'a mut VecDeque<u8>,
}
impl<'a> Write for BufWriter<'a> {
fn write(&mut self, buf: &[u8]) -> Result<usize> {
if self.inner.len() > *self.written {
let len = std::cmp::min(buf.len(), self.inner.len() - *self.written);
self.inner[*self.written..*self.written + len].copy_from_slice(&buf[..len]);
*self.written += len;
*self.total += len;
Ok(len)
} else {
self.rest.extend(buf.iter());
*self.total += buf.len();
Ok(buf.len())
}
}
fn flush(&mut self) -> Result<()> {
Ok(())
}
}
fn error<T>(s: &'static str) -> Result<T> {
Err(Error::new(std::io::ErrorKind::InvalidData, s))
}
fn get_multibyte<R: Read>(input: &mut R, hasher: &mut Hasher) -> Result<u64> {
let mut result = 0;
for i in 0..9 {
let mut b = [0u8; 1];
input.read_exact(&mut b)?;
hasher.update(&b);
let b = b[0];
result ^= ((b & 0x7F) as u64) << (i * 7);
if (b & 0x80) == 0 {
return Ok(result);
}
}
error("Invalid multi-byte encoding")
}
impl<R: Read> Read for XzDecoder<R> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
if !self.buf.is_empty() {
let len = std::cmp::min(buf.len(), self.buf.len());
buf[..len].copy_from_slice(&self.buf.as_slices().0[..len]);
self.buf.drain(..len);
return Ok(len);
}
let mut reader = CountReader {
inner: &mut self.compressed_reader,
count: &mut self.stream_size,
};
if *reader.count == 0 {
let mut b = [0u8; 12];
match reader.read(&mut b) {
Ok(0) => return Ok(0),
Err(e) => return Err(e),
_ => (),
}
if b[..6] != b"\xFD7zXZ\0"[..] {
return error("Invalid XZ header");
}
self.flags = [b[6], b[7]];
if self.flags[0] != 0 || self.flags[1] & 0xF0 != 0 {
return error("Invalid XZ stream flags");
}
match self.flags[1] & 0x0F {
0 => self.check_size = 0,
1 => self.check_size = 4,
_ => return error("Unsupported XZ stream flags"),
}
let mut digest = Hasher::new();
digest.update(&self.flags);
if digest.finalize().to_le_bytes() != b[8..] {
return error("Invalid XZ stream flags CRC32");
}
}
let block_begin = *reader.count;
let mut b = [0u8; 1];
reader.read_exact(&mut b)?;
let mut digest = Hasher::new();
digest.update(&b);
if b[0] == 0 {
// index
let num_records = get_multibyte(&mut reader, &mut digest)?;
if num_records != self.records.len() as u64 {
return error("Invalid XZ index record count");
}
for (unpadded_size, total) in &self.records {
if get_multibyte(&mut reader, &mut digest)? != *unpadded_size as u64 {
return error("Invalid XZ unpadded size");
}
if get_multibyte(&mut reader, &mut digest)? != *total as u64 {
return error("Invalid XZ uncompressed size");
}
}
let mut size = *reader.count - block_begin;
let mut b = vec![0u8; (4 - (size & 0x3)) & 0x3];
reader.read_exact(b.as_mut_slice())?;
if !b.iter().all(|&b| b == 0) {
return error("Invalid XZ index padding");
}
digest.update(b.as_slice());
size += b.len();
let mut b = [0u8; 16];
reader.read_exact(&mut b)?;
if digest.finalize().to_le_bytes() != b[..4] {
return error("Invalid XZ index CRC32");
}
let mut digest = Hasher::new();
digest.update(&b[8..14]);
if digest.finalize().to_le_bytes() != b[4..8] {
return error("Invalid XZ footer CRC32");
}
if b[8..12] != ((size >> 2) as u32).to_le_bytes() {
return error("Invalid XZ footer size");
}
if self.flags != b[12..14] {
return error("Invalid XZ footer flags");
}
if &b[14..16] != b"YZ" {
return error("Invalid XZ footer magic");
}
let mut b = vec![0u8; (4 - (*reader.count & 0x3)) & 0x3];
reader.read_exact(b.as_mut_slice())?;
if !b.iter().all(|&b| b == 0) {
return error("Invalid XZ footer padding");
}
*reader.count = 0;
return self.read(buf);
}
// block
let header_end = ((b[0] as usize) << 2) - 1 + *reader.count;
let mut b = [0u8; 1];
reader.read_exact(&mut b)?;
digest.update(&b);
let flags = b[0];
let num_filters = (flags & 0x03) + 1;
if flags & 0x3C != 0 {
return error("Invalid XZ block flags");
}
if flags & 0x40 != 0 {
get_multibyte(&mut reader, &mut digest)?;
}
if flags & 0x80 != 0 {
get_multibyte(&mut reader, &mut digest)?;
}
for _ in 0..num_filters {
let filter_id = get_multibyte(&mut reader, &mut digest)?;
if filter_id != 0x21 {
return error("Unsupported XZ filter ID");
}
let properties_size = get_multibyte(&mut reader, &mut digest)?;
if properties_size != 1 {
return error("Unsupported XZ filter properties size");
}
reader.read_exact(&mut b)?;
if b[0] & 0xC0 != 0 {
return error("Unsupported XZ filter properties");
}
digest.update(&b);
}
let mut b = vec![0u8; header_end - *reader.count];
reader.read_exact(b.as_mut_slice())?;
if !b.iter().all(|&b| b == 0) {
return error("Invalid XZ block header padding");
}
digest.update(b.as_slice());
let mut b = [0u8; 4];
reader.read_exact(&mut b)?;
if digest.finalize().to_le_bytes() != b {
return error("Invalid XZ block header CRC32");
}
let mut written = 0;
let mut total = 0;
Lzma2Decoder::new().decompress(
&mut reader,
&mut BufWriter {
inner: buf,
written: &mut written,
rest: &mut self.buf,
total: &mut total,
},
)?;
let unpadded_size = *reader.count - block_begin;
self.records.push((unpadded_size, total));
// ignore check here since zip itself will check it
let mut b = vec![0u8; ((4 - (unpadded_size & 0x3)) & 0x3) + self.check_size];
reader.read_exact(b.as_mut_slice())?;
if !b.as_slice()[..self.check_size].iter().all(|&b| b == 0) {
return error("Invalid XZ block padding");
}
Ok(written)
}
}
impl<R: Read> XzDecoder<R> {
pub fn into_inner(self) -> R {
self.compressed_reader.into_inner()
}
}

View file

@ -570,6 +570,8 @@ impl ZipFileData {
CompressionMethod::Deflate64 => 21,
#[cfg(feature = "lzma")]
CompressionMethod::Lzma => 63,
#[cfg(feature = "xz")]
CompressionMethod::Xz => 63,
// APPNOTE doesn't specify a version for Zstandard
_ => DEFAULT_VERSION as u16,
};

View file

@ -174,7 +174,7 @@ pub(crate) mod zip_writer {
#[doc(inline)]
pub use self::sealed::FileOptionExtension;
use crate::result::ZipError::InvalidArchive;
#[cfg(feature = "lzma")]
#[cfg(any(feature = "lzma", feature = "xz"))]
use crate::result::ZipError::UnsupportedArchive;
use crate::unstable::path_to_string;
use crate::unstable::LittleEndianWriteExt;
@ -1702,6 +1702,10 @@ impl<W: Write + Seek> GenericZipWriter<W> {
CompressionMethod::Lzma => {
Err(UnsupportedArchive("LZMA isn't supported for compression"))
}
#[cfg(feature = "xz")]
CompressionMethod::Xz => {
Err(UnsupportedArchive("XZ isn't supported for compression"))
}
CompressionMethod::Unsupported(..) => {
Err(ZipError::UnsupportedArchive("Unsupported compression"))
}

BIN
tests/data/xz.zip Normal file

Binary file not shown.

19
tests/xz.rs Normal file
View file

@ -0,0 +1,19 @@
#![cfg(feature = "xz")]
use std::io::{self, Read};
use zip::ZipArchive;
#[test]
fn decompress_xz() -> io::Result<()> {
let mut v = Vec::new();
v.extend_from_slice(include_bytes!("data/xz.zip"));
let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file");
let mut file = archive.by_name("hello.txt")?;
assert_eq!("hello.txt", file.name());
let mut content = Vec::new();
file.read_to_end(&mut content)?;
assert_eq!("Hello world\n", String::from_utf8(content).unwrap());
Ok(())
}