Merge pull request #199 from yujincheng08/xz
feat: Support XZ decompression
This commit is contained in:
commit
d45bdccc6a
10 changed files with 330 additions and 2 deletions
|
@ -78,6 +78,7 @@ deflate-zlib-ng = ["flate2/zlib-ng", "deflate-flate2"]
|
|||
deflate-zopfli = ["zopfli", "_deflate-any"]
|
||||
lzma = ["lzma-rs/stream"]
|
||||
unreserved = []
|
||||
xz = ["lzma-rs/raw_decoder"]
|
||||
default = [
|
||||
"aes-crypto",
|
||||
"bzip2",
|
||||
|
@ -86,6 +87,7 @@ default = [
|
|||
"lzma",
|
||||
"time",
|
||||
"zstd",
|
||||
"xz",
|
||||
]
|
||||
|
||||
[[bench]]
|
||||
|
|
|
@ -15,8 +15,11 @@ compression_method_deflate="\x07\x00"
|
|||
compression_method_deflate64="\x09\x00"
|
||||
compression_method_bzip2="\x0C\x00"
|
||||
compression_method_lzma="\x0E\x00"
|
||||
compression_method_xz="\x5F\x00"
|
||||
compression_method_zstd="]\x00"
|
||||
compression_method_aes="C\x00"
|
||||
xz_header_magic="\xFD7zXZ\x00"
|
||||
xz_footer_magic="YZ"
|
||||
extra_field_zip64="\x01\x00"
|
||||
extra_field_aes="\x99\x01"
|
||||
extra_field_extended_timestamp="\x55\x54"
|
||||
|
@ -25,4 +28,4 @@ extra_field_utf8_filename="\x75\x70"
|
|||
"\xFF\xFF"
|
||||
"/"
|
||||
"/./"
|
||||
"/../"
|
||||
"/../"
|
||||
|
|
|
@ -38,6 +38,9 @@ pub enum CompressionMethod {
|
|||
/// Compress the file using LZMA
|
||||
#[cfg(feature = "lzma")]
|
||||
Lzma,
|
||||
/// Compress the file using XZ
|
||||
#[cfg(feature = "xz")]
|
||||
Xz,
|
||||
/// Unsupported compression method
|
||||
#[cfg_attr(
|
||||
not(fuzzing),
|
||||
|
@ -80,6 +83,9 @@ impl CompressionMethod {
|
|||
#[cfg(not(feature = "zstd"))]
|
||||
pub const ZSTD: Self = CompressionMethod::Unsupported(93);
|
||||
pub const MP3: Self = CompressionMethod::Unsupported(94);
|
||||
#[cfg(feature = "xz")]
|
||||
pub const XZ: Self = CompressionMethod::Xz;
|
||||
#[cfg(not(feature = "xz"))]
|
||||
pub const XZ: Self = CompressionMethod::Unsupported(95);
|
||||
pub const JPEG: Self = CompressionMethod::Unsupported(96);
|
||||
pub const WAVPACK: Self = CompressionMethod::Unsupported(97);
|
||||
|
@ -101,6 +107,8 @@ impl CompressionMethod {
|
|||
12 => CompressionMethod::Bzip2,
|
||||
#[cfg(feature = "lzma")]
|
||||
14 => CompressionMethod::Lzma,
|
||||
#[cfg(feature = "xz")]
|
||||
95 => CompressionMethod::Xz,
|
||||
#[cfg(feature = "zstd")]
|
||||
93 => CompressionMethod::Zstd,
|
||||
#[cfg(feature = "aes-crypto")]
|
||||
|
@ -134,6 +142,8 @@ impl CompressionMethod {
|
|||
CompressionMethod::Zstd => 93,
|
||||
#[cfg(feature = "lzma")]
|
||||
CompressionMethod::Lzma => 14,
|
||||
#[cfg(feature = "xz")]
|
||||
CompressionMethod::Xz => 95,
|
||||
#[allow(deprecated)]
|
||||
CompressionMethod::Unsupported(v) => v,
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
//! | Deflate64 | ✅ | |
|
||||
//! | Bzip2 | ✅ | ✅ |
|
||||
//! | LZMA | ✅ | |
|
||||
//! | XZ | ✅ | |
|
||||
//! | AES encryption | ✅ | ✅ |
|
||||
//! | ZipCrypto deprecated encryption | ✅ | ✅ |
|
||||
//!
|
||||
|
|
20
src/read.rs
20
src/read.rs
|
@ -48,6 +48,9 @@ pub(crate) mod stream;
|
|||
#[cfg(feature = "lzma")]
|
||||
pub(crate) mod lzma;
|
||||
|
||||
#[cfg(feature = "xz")]
|
||||
pub(crate) mod xz;
|
||||
|
||||
// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
|
||||
pub(crate) mod zip_archive {
|
||||
use indexmap::IndexMap;
|
||||
|
@ -122,6 +125,8 @@ use crate::aes::PWD_VERIFY_LENGTH;
|
|||
use crate::extra_fields::UnicodeExtraField;
|
||||
#[cfg(feature = "lzma")]
|
||||
use crate::read::lzma::LzmaDecoder;
|
||||
#[cfg(feature = "xz")]
|
||||
use crate::read::xz::XzDecoder;
|
||||
use crate::result::ZipError::{InvalidArchive, InvalidPassword, UnsupportedArchive};
|
||||
use crate::spec::is_dir;
|
||||
use crate::types::ffi::S_IFLNK;
|
||||
|
@ -190,6 +195,8 @@ pub(crate) enum ZipFileReader<'a> {
|
|||
Zstd(Crc32Reader<ZstdDecoder<'a, io::BufReader<CryptoReader<'a>>>>),
|
||||
#[cfg(feature = "lzma")]
|
||||
Lzma(Crc32Reader<Box<LzmaDecoder<CryptoReader<'a>>>>),
|
||||
#[cfg(feature = "xz")]
|
||||
Xz(Crc32Reader<XzDecoder<CryptoReader<'a>>>),
|
||||
}
|
||||
|
||||
impl<'a> Read for ZipFileReader<'a> {
|
||||
|
@ -208,6 +215,8 @@ impl<'a> Read for ZipFileReader<'a> {
|
|||
ZipFileReader::Zstd(r) => r.read(buf),
|
||||
#[cfg(feature = "lzma")]
|
||||
ZipFileReader::Lzma(r) => r.read(buf),
|
||||
#[cfg(feature = "xz")]
|
||||
ZipFileReader::Xz(r) => r.read(buf),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -236,6 +245,8 @@ impl<'a> ZipFileReader<'a> {
|
|||
}
|
||||
return;
|
||||
}
|
||||
#[cfg(feature = "xz")]
|
||||
ZipFileReader::Xz(r) => r.into_inner().into_inner().into_inner(),
|
||||
};
|
||||
let _ = copy(&mut inner, &mut sink());
|
||||
}
|
||||
|
@ -396,6 +407,15 @@ pub(crate) fn make_reader(
|
|||
ae2_encrypted,
|
||||
)))
|
||||
}
|
||||
#[cfg(feature = "xz")]
|
||||
CompressionMethod::Xz => {
|
||||
let reader = XzDecoder::new(reader);
|
||||
Ok(ZipFileReader::Xz(Crc32Reader::new(
|
||||
reader,
|
||||
crc32,
|
||||
ae2_encrypted,
|
||||
)))
|
||||
}
|
||||
_ => Err(UnsupportedArchive("Compression method not supported")),
|
||||
}
|
||||
}
|
||||
|
|
267
src/read/xz.rs
Normal file
267
src/read/xz.rs
Normal file
|
@ -0,0 +1,267 @@
|
|||
use crc32fast::Hasher;
|
||||
use lzma_rs::decompress::raw::Lzma2Decoder;
|
||||
use std::{
|
||||
collections::VecDeque,
|
||||
io::{BufRead, BufReader, Error, Read, Result, Write},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct XzDecoder<R> {
|
||||
compressed_reader: BufReader<R>,
|
||||
stream_size: usize,
|
||||
buf: VecDeque<u8>,
|
||||
check_size: usize,
|
||||
records: Vec<(usize, usize)>,
|
||||
flags: [u8; 2],
|
||||
}
|
||||
|
||||
impl<R: Read> XzDecoder<R> {
|
||||
pub fn new(inner: R) -> Self {
|
||||
XzDecoder {
|
||||
compressed_reader: BufReader::new(inner),
|
||||
stream_size: 0,
|
||||
buf: VecDeque::new(),
|
||||
check_size: 0,
|
||||
records: vec![],
|
||||
flags: [0, 0],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct CountReader<'a, R: BufRead> {
|
||||
inner: &'a mut R,
|
||||
count: &'a mut usize,
|
||||
}
|
||||
|
||||
impl<R: BufRead> Read for CountReader<'_, R> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
|
||||
let count = self.inner.read(buf)?;
|
||||
*self.count += count;
|
||||
Ok(count)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: BufRead> BufRead for CountReader<'_, R> {
|
||||
fn fill_buf(&mut self) -> Result<&[u8]> {
|
||||
self.inner.fill_buf()
|
||||
}
|
||||
|
||||
fn consume(&mut self, amt: usize) {
|
||||
self.inner.consume(amt);
|
||||
*self.count += amt;
|
||||
}
|
||||
}
|
||||
|
||||
struct BufWriter<'a> {
|
||||
inner: &'a mut [u8],
|
||||
written: &'a mut usize,
|
||||
total: &'a mut usize,
|
||||
rest: &'a mut VecDeque<u8>,
|
||||
}
|
||||
|
||||
impl<'a> Write for BufWriter<'a> {
|
||||
fn write(&mut self, buf: &[u8]) -> Result<usize> {
|
||||
if self.inner.len() > *self.written {
|
||||
let len = std::cmp::min(buf.len(), self.inner.len() - *self.written);
|
||||
self.inner[*self.written..*self.written + len].copy_from_slice(&buf[..len]);
|
||||
*self.written += len;
|
||||
*self.total += len;
|
||||
Ok(len)
|
||||
} else {
|
||||
self.rest.extend(buf.iter());
|
||||
*self.total += buf.len();
|
||||
Ok(buf.len())
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn error<T>(s: &'static str) -> Result<T> {
|
||||
Err(Error::new(std::io::ErrorKind::InvalidData, s))
|
||||
}
|
||||
|
||||
fn get_multibyte<R: Read>(input: &mut R, hasher: &mut Hasher) -> Result<u64> {
|
||||
let mut result = 0;
|
||||
for i in 0..9 {
|
||||
let mut b = [0u8; 1];
|
||||
input.read_exact(&mut b)?;
|
||||
hasher.update(&b);
|
||||
let b = b[0];
|
||||
result ^= ((b & 0x7F) as u64) << (i * 7);
|
||||
if (b & 0x80) == 0 {
|
||||
return Ok(result);
|
||||
}
|
||||
}
|
||||
error("Invalid multi-byte encoding")
|
||||
}
|
||||
|
||||
impl<R: Read> Read for XzDecoder<R> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
|
||||
if !self.buf.is_empty() {
|
||||
let len = std::cmp::min(buf.len(), self.buf.len());
|
||||
buf[..len].copy_from_slice(&self.buf.as_slices().0[..len]);
|
||||
self.buf.drain(..len);
|
||||
return Ok(len);
|
||||
}
|
||||
let mut reader = CountReader {
|
||||
inner: &mut self.compressed_reader,
|
||||
count: &mut self.stream_size,
|
||||
};
|
||||
if *reader.count == 0 {
|
||||
let mut b = [0u8; 12];
|
||||
match reader.read(&mut b) {
|
||||
Ok(0) => return Ok(0),
|
||||
Err(e) => return Err(e),
|
||||
_ => (),
|
||||
}
|
||||
if b[..6] != b"\xFD7zXZ\0"[..] {
|
||||
return error("Invalid XZ header");
|
||||
}
|
||||
self.flags = [b[6], b[7]];
|
||||
if self.flags[0] != 0 || self.flags[1] & 0xF0 != 0 {
|
||||
return error("Invalid XZ stream flags");
|
||||
}
|
||||
match self.flags[1] & 0x0F {
|
||||
0 => self.check_size = 0,
|
||||
1 => self.check_size = 4,
|
||||
_ => return error("Unsupported XZ stream flags"),
|
||||
}
|
||||
let mut digest = Hasher::new();
|
||||
digest.update(&self.flags);
|
||||
if digest.finalize().to_le_bytes() != b[8..] {
|
||||
return error("Invalid XZ stream flags CRC32");
|
||||
}
|
||||
}
|
||||
|
||||
let block_begin = *reader.count;
|
||||
let mut b = [0u8; 1];
|
||||
reader.read_exact(&mut b)?;
|
||||
|
||||
let mut digest = Hasher::new();
|
||||
digest.update(&b);
|
||||
if b[0] == 0 {
|
||||
// index
|
||||
let num_records = get_multibyte(&mut reader, &mut digest)?;
|
||||
if num_records != self.records.len() as u64 {
|
||||
return error("Invalid XZ index record count");
|
||||
}
|
||||
for (unpadded_size, total) in &self.records {
|
||||
if get_multibyte(&mut reader, &mut digest)? != *unpadded_size as u64 {
|
||||
return error("Invalid XZ unpadded size");
|
||||
}
|
||||
if get_multibyte(&mut reader, &mut digest)? != *total as u64 {
|
||||
return error("Invalid XZ uncompressed size");
|
||||
}
|
||||
}
|
||||
let mut size = *reader.count - block_begin;
|
||||
let mut b = vec![0u8; (4 - (size & 0x3)) & 0x3];
|
||||
reader.read_exact(b.as_mut_slice())?;
|
||||
if !b.iter().all(|&b| b == 0) {
|
||||
return error("Invalid XZ index padding");
|
||||
}
|
||||
digest.update(b.as_slice());
|
||||
size += b.len();
|
||||
let mut b = [0u8; 16];
|
||||
reader.read_exact(&mut b)?;
|
||||
if digest.finalize().to_le_bytes() != b[..4] {
|
||||
return error("Invalid XZ index CRC32");
|
||||
}
|
||||
let mut digest = Hasher::new();
|
||||
digest.update(&b[8..14]);
|
||||
if digest.finalize().to_le_bytes() != b[4..8] {
|
||||
return error("Invalid XZ footer CRC32");
|
||||
}
|
||||
if b[8..12] != ((size >> 2) as u32).to_le_bytes() {
|
||||
return error("Invalid XZ footer size");
|
||||
}
|
||||
if self.flags != b[12..14] {
|
||||
return error("Invalid XZ footer flags");
|
||||
}
|
||||
if &b[14..16] != b"YZ" {
|
||||
return error("Invalid XZ footer magic");
|
||||
}
|
||||
let mut b = vec![0u8; (4 - (*reader.count & 0x3)) & 0x3];
|
||||
reader.read_exact(b.as_mut_slice())?;
|
||||
if !b.iter().all(|&b| b == 0) {
|
||||
return error("Invalid XZ footer padding");
|
||||
}
|
||||
*reader.count = 0;
|
||||
return self.read(buf);
|
||||
}
|
||||
|
||||
// block
|
||||
let header_end = ((b[0] as usize) << 2) - 1 + *reader.count;
|
||||
let mut b = [0u8; 1];
|
||||
reader.read_exact(&mut b)?;
|
||||
digest.update(&b);
|
||||
let flags = b[0];
|
||||
let num_filters = (flags & 0x03) + 1;
|
||||
|
||||
if flags & 0x3C != 0 {
|
||||
return error("Invalid XZ block flags");
|
||||
}
|
||||
if flags & 0x40 != 0 {
|
||||
get_multibyte(&mut reader, &mut digest)?;
|
||||
}
|
||||
if flags & 0x80 != 0 {
|
||||
get_multibyte(&mut reader, &mut digest)?;
|
||||
}
|
||||
for _ in 0..num_filters {
|
||||
let filter_id = get_multibyte(&mut reader, &mut digest)?;
|
||||
if filter_id != 0x21 {
|
||||
return error("Unsupported XZ filter ID");
|
||||
}
|
||||
let properties_size = get_multibyte(&mut reader, &mut digest)?;
|
||||
if properties_size != 1 {
|
||||
return error("Unsupported XZ filter properties size");
|
||||
}
|
||||
reader.read_exact(&mut b)?;
|
||||
if b[0] & 0xC0 != 0 {
|
||||
return error("Unsupported XZ filter properties");
|
||||
}
|
||||
digest.update(&b);
|
||||
}
|
||||
let mut b = vec![0u8; header_end - *reader.count];
|
||||
reader.read_exact(b.as_mut_slice())?;
|
||||
if !b.iter().all(|&b| b == 0) {
|
||||
return error("Invalid XZ block header padding");
|
||||
}
|
||||
digest.update(b.as_slice());
|
||||
|
||||
let mut b = [0u8; 4];
|
||||
reader.read_exact(&mut b)?;
|
||||
if digest.finalize().to_le_bytes() != b {
|
||||
return error("Invalid XZ block header CRC32");
|
||||
}
|
||||
let mut written = 0;
|
||||
let mut total = 0;
|
||||
Lzma2Decoder::new().decompress(
|
||||
&mut reader,
|
||||
&mut BufWriter {
|
||||
inner: buf,
|
||||
written: &mut written,
|
||||
rest: &mut self.buf,
|
||||
total: &mut total,
|
||||
},
|
||||
)?;
|
||||
|
||||
let unpadded_size = *reader.count - block_begin;
|
||||
self.records.push((unpadded_size, total));
|
||||
// ignore check here since zip itself will check it
|
||||
let mut b = vec![0u8; ((4 - (unpadded_size & 0x3)) & 0x3) + self.check_size];
|
||||
reader.read_exact(b.as_mut_slice())?;
|
||||
if !b.as_slice()[..self.check_size].iter().all(|&b| b == 0) {
|
||||
return error("Invalid XZ block padding");
|
||||
}
|
||||
Ok(written)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> XzDecoder<R> {
|
||||
pub fn into_inner(self) -> R {
|
||||
self.compressed_reader.into_inner()
|
||||
}
|
||||
}
|
|
@ -570,6 +570,8 @@ impl ZipFileData {
|
|||
CompressionMethod::Deflate64 => 21,
|
||||
#[cfg(feature = "lzma")]
|
||||
CompressionMethod::Lzma => 63,
|
||||
#[cfg(feature = "xz")]
|
||||
CompressionMethod::Xz => 63,
|
||||
// APPNOTE doesn't specify a version for Zstandard
|
||||
_ => DEFAULT_VERSION as u16,
|
||||
};
|
||||
|
|
|
@ -174,7 +174,7 @@ pub(crate) mod zip_writer {
|
|||
#[doc(inline)]
|
||||
pub use self::sealed::FileOptionExtension;
|
||||
use crate::result::ZipError::InvalidArchive;
|
||||
#[cfg(feature = "lzma")]
|
||||
#[cfg(any(feature = "lzma", feature = "xz"))]
|
||||
use crate::result::ZipError::UnsupportedArchive;
|
||||
use crate::unstable::path_to_string;
|
||||
use crate::unstable::LittleEndianWriteExt;
|
||||
|
@ -1702,6 +1702,10 @@ impl<W: Write + Seek> GenericZipWriter<W> {
|
|||
CompressionMethod::Lzma => {
|
||||
Err(UnsupportedArchive("LZMA isn't supported for compression"))
|
||||
}
|
||||
#[cfg(feature = "xz")]
|
||||
CompressionMethod::Xz => {
|
||||
Err(UnsupportedArchive("XZ isn't supported for compression"))
|
||||
}
|
||||
CompressionMethod::Unsupported(..) => {
|
||||
Err(ZipError::UnsupportedArchive("Unsupported compression"))
|
||||
}
|
||||
|
|
BIN
tests/data/xz.zip
Normal file
BIN
tests/data/xz.zip
Normal file
Binary file not shown.
19
tests/xz.rs
Normal file
19
tests/xz.rs
Normal file
|
@ -0,0 +1,19 @@
|
|||
#![cfg(feature = "xz")]
|
||||
|
||||
use std::io::{self, Read};
|
||||
use zip::ZipArchive;
|
||||
|
||||
#[test]
|
||||
fn decompress_xz() -> io::Result<()> {
|
||||
let mut v = Vec::new();
|
||||
v.extend_from_slice(include_bytes!("data/xz.zip"));
|
||||
let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file");
|
||||
|
||||
let mut file = archive.by_name("hello.txt")?;
|
||||
assert_eq!("hello.txt", file.name());
|
||||
|
||||
let mut content = Vec::new();
|
||||
file.read_to_end(&mut content)?;
|
||||
assert_eq!("Hello world\n", String::from_utf8(content).unwrap());
|
||||
Ok(())
|
||||
}
|
Loading…
Add table
Reference in a new issue