diff --git a/Cargo.toml b/Cargo.toml index 7b4b0335..b161f115 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -78,6 +78,7 @@ deflate-zlib-ng = ["flate2/zlib-ng", "deflate-flate2"] deflate-zopfli = ["zopfli", "_deflate-any"] lzma = ["lzma-rs/stream"] unreserved = [] +xz = ["lzma-rs/raw_decoder"] default = [ "aes-crypto", "bzip2", @@ -86,6 +87,7 @@ default = [ "lzma", "time", "zstd", + "xz", ] [[bench]] diff --git a/fuzz/fuzz.dict b/fuzz/fuzz.dict index db54193c..0b11aab6 100644 --- a/fuzz/fuzz.dict +++ b/fuzz/fuzz.dict @@ -15,8 +15,11 @@ compression_method_deflate="\x07\x00" compression_method_deflate64="\x09\x00" compression_method_bzip2="\x0C\x00" compression_method_lzma="\x0E\x00" +compression_method_xz="\x5F\x00" compression_method_zstd="]\x00" compression_method_aes="C\x00" +xz_header_magic="\xFD7zXZ\x00" +xz_footer_magic="YZ" extra_field_zip64="\x01\x00" extra_field_aes="\x99\x01" extra_field_extended_timestamp="\x55\x54" @@ -25,4 +28,4 @@ extra_field_utf8_filename="\x75\x70" "\xFF\xFF" "/" "/./" -"/../" \ No newline at end of file +"/../" diff --git a/src/compression.rs b/src/compression.rs index 33693cf7..0dd21017 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -38,6 +38,9 @@ pub enum CompressionMethod { /// Compress the file using LZMA #[cfg(feature = "lzma")] Lzma, + /// Compress the file using XZ + #[cfg(feature = "xz")] + Xz, /// Unsupported compression method #[cfg_attr( not(fuzzing), @@ -80,6 +83,9 @@ impl CompressionMethod { #[cfg(not(feature = "zstd"))] pub const ZSTD: Self = CompressionMethod::Unsupported(93); pub const MP3: Self = CompressionMethod::Unsupported(94); + #[cfg(feature = "xz")] + pub const XZ: Self = CompressionMethod::Xz; + #[cfg(not(feature = "xz"))] pub const XZ: Self = CompressionMethod::Unsupported(95); pub const JPEG: Self = CompressionMethod::Unsupported(96); pub const WAVPACK: Self = CompressionMethod::Unsupported(97); @@ -101,6 +107,8 @@ impl CompressionMethod { 12 => CompressionMethod::Bzip2, #[cfg(feature = "lzma")] 14 => CompressionMethod::Lzma, + #[cfg(feature = "xz")] + 95 => CompressionMethod::Xz, #[cfg(feature = "zstd")] 93 => CompressionMethod::Zstd, #[cfg(feature = "aes-crypto")] @@ -134,6 +142,8 @@ impl CompressionMethod { CompressionMethod::Zstd => 93, #[cfg(feature = "lzma")] CompressionMethod::Lzma => 14, + #[cfg(feature = "xz")] + CompressionMethod::Xz => 95, #[allow(deprecated)] CompressionMethod::Unsupported(v) => v, } diff --git a/src/lib.rs b/src/lib.rs index f6fbb89f..a78bb184 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,7 @@ //! | Deflate64 | ✅ | | //! | Bzip2 | ✅ | ✅ | //! | LZMA | ✅ | | +//! | XZ | ✅ | | //! | AES encryption | ✅ | ✅ | //! | ZipCrypto deprecated encryption | ✅ | ✅ | //! diff --git a/src/read.rs b/src/read.rs index 44da9881..98ffd2b6 100644 --- a/src/read.rs +++ b/src/read.rs @@ -48,6 +48,9 @@ pub(crate) mod stream; #[cfg(feature = "lzma")] pub(crate) mod lzma; +#[cfg(feature = "xz")] +pub(crate) mod xz; + // Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely pub(crate) mod zip_archive { use indexmap::IndexMap; @@ -122,6 +125,8 @@ use crate::aes::PWD_VERIFY_LENGTH; use crate::extra_fields::UnicodeExtraField; #[cfg(feature = "lzma")] use crate::read::lzma::LzmaDecoder; +#[cfg(feature = "xz")] +use crate::read::xz::XzDecoder; use crate::result::ZipError::{InvalidArchive, InvalidPassword, UnsupportedArchive}; use crate::spec::is_dir; use crate::types::ffi::S_IFLNK; @@ -190,6 +195,8 @@ pub(crate) enum ZipFileReader<'a> { Zstd(Crc32Reader>>>), #[cfg(feature = "lzma")] Lzma(Crc32Reader>>>), + #[cfg(feature = "xz")] + Xz(Crc32Reader>>), } impl<'a> Read for ZipFileReader<'a> { @@ -208,6 +215,8 @@ impl<'a> Read for ZipFileReader<'a> { ZipFileReader::Zstd(r) => r.read(buf), #[cfg(feature = "lzma")] ZipFileReader::Lzma(r) => r.read(buf), + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.read(buf), } } } @@ -236,6 +245,8 @@ impl<'a> ZipFileReader<'a> { } return; } + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.into_inner().into_inner().into_inner(), }; let _ = copy(&mut inner, &mut sink()); } @@ -396,6 +407,15 @@ pub(crate) fn make_reader( ae2_encrypted, ))) } + #[cfg(feature = "xz")] + CompressionMethod::Xz => { + let reader = XzDecoder::new(reader); + Ok(ZipFileReader::Xz(Crc32Reader::new( + reader, + crc32, + ae2_encrypted, + ))) + } _ => Err(UnsupportedArchive("Compression method not supported")), } } diff --git a/src/read/xz.rs b/src/read/xz.rs new file mode 100644 index 00000000..50ee38d7 --- /dev/null +++ b/src/read/xz.rs @@ -0,0 +1,267 @@ +use crc32fast::Hasher; +use lzma_rs::decompress::raw::Lzma2Decoder; +use std::{ + collections::VecDeque, + io::{BufRead, BufReader, Error, Read, Result, Write}, +}; + +#[derive(Debug)] +pub struct XzDecoder { + compressed_reader: BufReader, + stream_size: usize, + buf: VecDeque, + check_size: usize, + records: Vec<(usize, usize)>, + flags: [u8; 2], +} + +impl XzDecoder { + pub fn new(inner: R) -> Self { + XzDecoder { + compressed_reader: BufReader::new(inner), + stream_size: 0, + buf: VecDeque::new(), + check_size: 0, + records: vec![], + flags: [0, 0], + } + } +} + +struct CountReader<'a, R: BufRead> { + inner: &'a mut R, + count: &'a mut usize, +} + +impl Read for CountReader<'_, R> { + fn read(&mut self, buf: &mut [u8]) -> Result { + let count = self.inner.read(buf)?; + *self.count += count; + Ok(count) + } +} + +impl BufRead for CountReader<'_, R> { + fn fill_buf(&mut self) -> Result<&[u8]> { + self.inner.fill_buf() + } + + fn consume(&mut self, amt: usize) { + self.inner.consume(amt); + *self.count += amt; + } +} + +struct BufWriter<'a> { + inner: &'a mut [u8], + written: &'a mut usize, + total: &'a mut usize, + rest: &'a mut VecDeque, +} + +impl<'a> Write for BufWriter<'a> { + fn write(&mut self, buf: &[u8]) -> Result { + if self.inner.len() > *self.written { + let len = std::cmp::min(buf.len(), self.inner.len() - *self.written); + self.inner[*self.written..*self.written + len].copy_from_slice(&buf[..len]); + *self.written += len; + *self.total += len; + Ok(len) + } else { + self.rest.extend(buf.iter()); + *self.total += buf.len(); + Ok(buf.len()) + } + } + + fn flush(&mut self) -> Result<()> { + Ok(()) + } +} + +fn error(s: &'static str) -> Result { + Err(Error::new(std::io::ErrorKind::InvalidData, s)) +} + +fn get_multibyte(input: &mut R, hasher: &mut Hasher) -> Result { + let mut result = 0; + for i in 0..9 { + let mut b = [0u8; 1]; + input.read_exact(&mut b)?; + hasher.update(&b); + let b = b[0]; + result ^= ((b & 0x7F) as u64) << (i * 7); + if (b & 0x80) == 0 { + return Ok(result); + } + } + error("Invalid multi-byte encoding") +} + +impl Read for XzDecoder { + fn read(&mut self, buf: &mut [u8]) -> Result { + if !self.buf.is_empty() { + let len = std::cmp::min(buf.len(), self.buf.len()); + buf[..len].copy_from_slice(&self.buf.as_slices().0[..len]); + self.buf.drain(..len); + return Ok(len); + } + let mut reader = CountReader { + inner: &mut self.compressed_reader, + count: &mut self.stream_size, + }; + if *reader.count == 0 { + let mut b = [0u8; 12]; + match reader.read(&mut b) { + Ok(0) => return Ok(0), + Err(e) => return Err(e), + _ => (), + } + if b[..6] != b"\xFD7zXZ\0"[..] { + return error("Invalid XZ header"); + } + self.flags = [b[6], b[7]]; + if self.flags[0] != 0 || self.flags[1] & 0xF0 != 0 { + return error("Invalid XZ stream flags"); + } + match self.flags[1] & 0x0F { + 0 => self.check_size = 0, + 1 => self.check_size = 4, + _ => return error("Unsupported XZ stream flags"), + } + let mut digest = Hasher::new(); + digest.update(&self.flags); + if digest.finalize().to_le_bytes() != b[8..] { + return error("Invalid XZ stream flags CRC32"); + } + } + + let block_begin = *reader.count; + let mut b = [0u8; 1]; + reader.read_exact(&mut b)?; + + let mut digest = Hasher::new(); + digest.update(&b); + if b[0] == 0 { + // index + let num_records = get_multibyte(&mut reader, &mut digest)?; + if num_records != self.records.len() as u64 { + return error("Invalid XZ index record count"); + } + for (unpadded_size, total) in &self.records { + if get_multibyte(&mut reader, &mut digest)? != *unpadded_size as u64 { + return error("Invalid XZ unpadded size"); + } + if get_multibyte(&mut reader, &mut digest)? != *total as u64 { + return error("Invalid XZ uncompressed size"); + } + } + let mut size = *reader.count - block_begin; + let mut b = vec![0u8; (4 - (size & 0x3)) & 0x3]; + reader.read_exact(b.as_mut_slice())?; + if !b.iter().all(|&b| b == 0) { + return error("Invalid XZ index padding"); + } + digest.update(b.as_slice()); + size += b.len(); + let mut b = [0u8; 16]; + reader.read_exact(&mut b)?; + if digest.finalize().to_le_bytes() != b[..4] { + return error("Invalid XZ index CRC32"); + } + let mut digest = Hasher::new(); + digest.update(&b[8..14]); + if digest.finalize().to_le_bytes() != b[4..8] { + return error("Invalid XZ footer CRC32"); + } + if b[8..12] != ((size >> 2) as u32).to_le_bytes() { + return error("Invalid XZ footer size"); + } + if self.flags != b[12..14] { + return error("Invalid XZ footer flags"); + } + if &b[14..16] != b"YZ" { + return error("Invalid XZ footer magic"); + } + let mut b = vec![0u8; (4 - (*reader.count & 0x3)) & 0x3]; + reader.read_exact(b.as_mut_slice())?; + if !b.iter().all(|&b| b == 0) { + return error("Invalid XZ footer padding"); + } + *reader.count = 0; + return self.read(buf); + } + + // block + let header_end = ((b[0] as usize) << 2) - 1 + *reader.count; + let mut b = [0u8; 1]; + reader.read_exact(&mut b)?; + digest.update(&b); + let flags = b[0]; + let num_filters = (flags & 0x03) + 1; + + if flags & 0x3C != 0 { + return error("Invalid XZ block flags"); + } + if flags & 0x40 != 0 { + get_multibyte(&mut reader, &mut digest)?; + } + if flags & 0x80 != 0 { + get_multibyte(&mut reader, &mut digest)?; + } + for _ in 0..num_filters { + let filter_id = get_multibyte(&mut reader, &mut digest)?; + if filter_id != 0x21 { + return error("Unsupported XZ filter ID"); + } + let properties_size = get_multibyte(&mut reader, &mut digest)?; + if properties_size != 1 { + return error("Unsupported XZ filter properties size"); + } + reader.read_exact(&mut b)?; + if b[0] & 0xC0 != 0 { + return error("Unsupported XZ filter properties"); + } + digest.update(&b); + } + let mut b = vec![0u8; header_end - *reader.count]; + reader.read_exact(b.as_mut_slice())?; + if !b.iter().all(|&b| b == 0) { + return error("Invalid XZ block header padding"); + } + digest.update(b.as_slice()); + + let mut b = [0u8; 4]; + reader.read_exact(&mut b)?; + if digest.finalize().to_le_bytes() != b { + return error("Invalid XZ block header CRC32"); + } + let mut written = 0; + let mut total = 0; + Lzma2Decoder::new().decompress( + &mut reader, + &mut BufWriter { + inner: buf, + written: &mut written, + rest: &mut self.buf, + total: &mut total, + }, + )?; + + let unpadded_size = *reader.count - block_begin; + self.records.push((unpadded_size, total)); + // ignore check here since zip itself will check it + let mut b = vec![0u8; ((4 - (unpadded_size & 0x3)) & 0x3) + self.check_size]; + reader.read_exact(b.as_mut_slice())?; + if !b.as_slice()[..self.check_size].iter().all(|&b| b == 0) { + return error("Invalid XZ block padding"); + } + Ok(written) + } +} + +impl XzDecoder { + pub fn into_inner(self) -> R { + self.compressed_reader.into_inner() + } +} diff --git a/src/types.rs b/src/types.rs index f1921cb4..91031a08 100644 --- a/src/types.rs +++ b/src/types.rs @@ -570,6 +570,8 @@ impl ZipFileData { CompressionMethod::Deflate64 => 21, #[cfg(feature = "lzma")] CompressionMethod::Lzma => 63, + #[cfg(feature = "xz")] + CompressionMethod::Xz => 63, // APPNOTE doesn't specify a version for Zstandard _ => DEFAULT_VERSION as u16, }; diff --git a/src/write.rs b/src/write.rs index 373a20a0..a8a31a53 100644 --- a/src/write.rs +++ b/src/write.rs @@ -174,7 +174,7 @@ pub(crate) mod zip_writer { #[doc(inline)] pub use self::sealed::FileOptionExtension; use crate::result::ZipError::InvalidArchive; -#[cfg(feature = "lzma")] +#[cfg(any(feature = "lzma", feature = "xz"))] use crate::result::ZipError::UnsupportedArchive; use crate::unstable::path_to_string; use crate::unstable::LittleEndianWriteExt; @@ -1702,6 +1702,10 @@ impl GenericZipWriter { CompressionMethod::Lzma => { Err(UnsupportedArchive("LZMA isn't supported for compression")) } + #[cfg(feature = "xz")] + CompressionMethod::Xz => { + Err(UnsupportedArchive("XZ isn't supported for compression")) + } CompressionMethod::Unsupported(..) => { Err(ZipError::UnsupportedArchive("Unsupported compression")) } diff --git a/tests/data/xz.zip b/tests/data/xz.zip new file mode 100644 index 00000000..fcb7f330 Binary files /dev/null and b/tests/data/xz.zip differ diff --git a/tests/xz.rs b/tests/xz.rs new file mode 100644 index 00000000..110b4085 --- /dev/null +++ b/tests/xz.rs @@ -0,0 +1,19 @@ +#![cfg(feature = "xz")] + +use std::io::{self, Read}; +use zip::ZipArchive; + +#[test] +fn decompress_xz() -> io::Result<()> { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/xz.zip")); + let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file"); + + let mut file = archive.by_name("hello.txt")?; + assert_eq!("hello.txt", file.name()); + + let mut content = Vec::new(); + file.read_to_end(&mut content)?; + assert_eq!("Hello world\n", String::from_utf8(content).unwrap()); + Ok(()) +}