From 421e1dd8fba9b19b945d85adfb5f779cd496db56 Mon Sep 17 00:00:00 2001 From: LoveSy Date: Sat, 22 Jun 2024 00:45:37 +0800 Subject: [PATCH 1/4] feat: support XZ decompression --- Cargo.toml | 2 + fuzz/fuzz.dict | 5 +- src/compression.rs | 10 ++ src/lib.rs | 1 + src/read.rs | 20 ++++ src/read/xz.rs | 267 +++++++++++++++++++++++++++++++++++++++++++++ src/types.rs | 2 + src/write.rs | 6 +- tests/data/xz.zip | Bin 0 -> 196 bytes tests/xz.rs | 19 ++++ 10 files changed, 330 insertions(+), 2 deletions(-) create mode 100644 src/read/xz.rs create mode 100644 tests/data/xz.zip create mode 100644 tests/xz.rs diff --git a/Cargo.toml b/Cargo.toml index 7b4b0335..b161f115 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -78,6 +78,7 @@ deflate-zlib-ng = ["flate2/zlib-ng", "deflate-flate2"] deflate-zopfli = ["zopfli", "_deflate-any"] lzma = ["lzma-rs/stream"] unreserved = [] +xz = ["lzma-rs/raw_decoder"] default = [ "aes-crypto", "bzip2", @@ -86,6 +87,7 @@ default = [ "lzma", "time", "zstd", + "xz", ] [[bench]] diff --git a/fuzz/fuzz.dict b/fuzz/fuzz.dict index db54193c..0b11aab6 100644 --- a/fuzz/fuzz.dict +++ b/fuzz/fuzz.dict @@ -15,8 +15,11 @@ compression_method_deflate="\x07\x00" compression_method_deflate64="\x09\x00" compression_method_bzip2="\x0C\x00" compression_method_lzma="\x0E\x00" +compression_method_xz="\x5F\x00" compression_method_zstd="]\x00" compression_method_aes="C\x00" +xz_header_magic="\xFD7zXZ\x00" +xz_footer_magic="YZ" extra_field_zip64="\x01\x00" extra_field_aes="\x99\x01" extra_field_extended_timestamp="\x55\x54" @@ -25,4 +28,4 @@ extra_field_utf8_filename="\x75\x70" "\xFF\xFF" "/" "/./" -"/../" \ No newline at end of file +"/../" diff --git a/src/compression.rs b/src/compression.rs index 33693cf7..0dd21017 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -38,6 +38,9 @@ pub enum CompressionMethod { /// Compress the file using LZMA #[cfg(feature = "lzma")] Lzma, + /// Compress the file using XZ + #[cfg(feature = "xz")] + Xz, /// Unsupported compression method #[cfg_attr( not(fuzzing), @@ -80,6 +83,9 @@ impl CompressionMethod { #[cfg(not(feature = "zstd"))] pub const ZSTD: Self = CompressionMethod::Unsupported(93); pub const MP3: Self = CompressionMethod::Unsupported(94); + #[cfg(feature = "xz")] + pub const XZ: Self = CompressionMethod::Xz; + #[cfg(not(feature = "xz"))] pub const XZ: Self = CompressionMethod::Unsupported(95); pub const JPEG: Self = CompressionMethod::Unsupported(96); pub const WAVPACK: Self = CompressionMethod::Unsupported(97); @@ -101,6 +107,8 @@ impl CompressionMethod { 12 => CompressionMethod::Bzip2, #[cfg(feature = "lzma")] 14 => CompressionMethod::Lzma, + #[cfg(feature = "xz")] + 95 => CompressionMethod::Xz, #[cfg(feature = "zstd")] 93 => CompressionMethod::Zstd, #[cfg(feature = "aes-crypto")] @@ -134,6 +142,8 @@ impl CompressionMethod { CompressionMethod::Zstd => 93, #[cfg(feature = "lzma")] CompressionMethod::Lzma => 14, + #[cfg(feature = "xz")] + CompressionMethod::Xz => 95, #[allow(deprecated)] CompressionMethod::Unsupported(v) => v, } diff --git a/src/lib.rs b/src/lib.rs index f6fbb89f..a78bb184 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,7 @@ //! | Deflate64 | ✅ | | //! | Bzip2 | ✅ | ✅ | //! | LZMA | ✅ | | +//! | XZ | ✅ | | //! | AES encryption | ✅ | ✅ | //! | ZipCrypto deprecated encryption | ✅ | ✅ | //! diff --git a/src/read.rs b/src/read.rs index 7f9f5881..47a21310 100644 --- a/src/read.rs +++ b/src/read.rs @@ -48,6 +48,9 @@ pub(crate) mod stream; #[cfg(feature = "lzma")] pub(crate) mod lzma; +#[cfg(feature = "xz")] +pub(crate) mod xz; + // Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely pub(crate) mod zip_archive { use indexmap::IndexMap; @@ -123,6 +126,8 @@ use crate::aes::PWD_VERIFY_LENGTH; use crate::extra_fields::UnicodeExtraField; #[cfg(feature = "lzma")] use crate::read::lzma::LzmaDecoder; +#[cfg(feature = "xz")] +use crate::read::xz::XzDecoder; use crate::result::ZipError::{InvalidArchive, InvalidPassword, UnsupportedArchive}; use crate::spec::is_dir; use crate::types::ffi::S_IFLNK; @@ -191,6 +196,8 @@ pub(crate) enum ZipFileReader<'a> { Zstd(Crc32Reader>>>), #[cfg(feature = "lzma")] Lzma(Crc32Reader>>>), + #[cfg(feature = "xz")] + Xz(Crc32Reader>>), } impl<'a> Read for ZipFileReader<'a> { @@ -209,6 +216,8 @@ impl<'a> Read for ZipFileReader<'a> { ZipFileReader::Zstd(r) => r.read(buf), #[cfg(feature = "lzma")] ZipFileReader::Lzma(r) => r.read(buf), + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.read(buf), } } } @@ -237,6 +246,8 @@ impl<'a> ZipFileReader<'a> { } return; } + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.into_inner().into_inner().into_inner(), }; let _ = copy(&mut inner, &mut sink()); } @@ -397,6 +408,15 @@ pub(crate) fn make_reader( ae2_encrypted, ))) } + #[cfg(feature = "xz")] + CompressionMethod::Xz => { + let reader = XzDecoder::new(reader); + Ok(ZipFileReader::Xz(Crc32Reader::new( + reader, + crc32, + ae2_encrypted, + ))) + } _ => Err(UnsupportedArchive("Compression method not supported")), } } diff --git a/src/read/xz.rs b/src/read/xz.rs new file mode 100644 index 00000000..50ee38d7 --- /dev/null +++ b/src/read/xz.rs @@ -0,0 +1,267 @@ +use crc32fast::Hasher; +use lzma_rs::decompress::raw::Lzma2Decoder; +use std::{ + collections::VecDeque, + io::{BufRead, BufReader, Error, Read, Result, Write}, +}; + +#[derive(Debug)] +pub struct XzDecoder { + compressed_reader: BufReader, + stream_size: usize, + buf: VecDeque, + check_size: usize, + records: Vec<(usize, usize)>, + flags: [u8; 2], +} + +impl XzDecoder { + pub fn new(inner: R) -> Self { + XzDecoder { + compressed_reader: BufReader::new(inner), + stream_size: 0, + buf: VecDeque::new(), + check_size: 0, + records: vec![], + flags: [0, 0], + } + } +} + +struct CountReader<'a, R: BufRead> { + inner: &'a mut R, + count: &'a mut usize, +} + +impl Read for CountReader<'_, R> { + fn read(&mut self, buf: &mut [u8]) -> Result { + let count = self.inner.read(buf)?; + *self.count += count; + Ok(count) + } +} + +impl BufRead for CountReader<'_, R> { + fn fill_buf(&mut self) -> Result<&[u8]> { + self.inner.fill_buf() + } + + fn consume(&mut self, amt: usize) { + self.inner.consume(amt); + *self.count += amt; + } +} + +struct BufWriter<'a> { + inner: &'a mut [u8], + written: &'a mut usize, + total: &'a mut usize, + rest: &'a mut VecDeque, +} + +impl<'a> Write for BufWriter<'a> { + fn write(&mut self, buf: &[u8]) -> Result { + if self.inner.len() > *self.written { + let len = std::cmp::min(buf.len(), self.inner.len() - *self.written); + self.inner[*self.written..*self.written + len].copy_from_slice(&buf[..len]); + *self.written += len; + *self.total += len; + Ok(len) + } else { + self.rest.extend(buf.iter()); + *self.total += buf.len(); + Ok(buf.len()) + } + } + + fn flush(&mut self) -> Result<()> { + Ok(()) + } +} + +fn error(s: &'static str) -> Result { + Err(Error::new(std::io::ErrorKind::InvalidData, s)) +} + +fn get_multibyte(input: &mut R, hasher: &mut Hasher) -> Result { + let mut result = 0; + for i in 0..9 { + let mut b = [0u8; 1]; + input.read_exact(&mut b)?; + hasher.update(&b); + let b = b[0]; + result ^= ((b & 0x7F) as u64) << (i * 7); + if (b & 0x80) == 0 { + return Ok(result); + } + } + error("Invalid multi-byte encoding") +} + +impl Read for XzDecoder { + fn read(&mut self, buf: &mut [u8]) -> Result { + if !self.buf.is_empty() { + let len = std::cmp::min(buf.len(), self.buf.len()); + buf[..len].copy_from_slice(&self.buf.as_slices().0[..len]); + self.buf.drain(..len); + return Ok(len); + } + let mut reader = CountReader { + inner: &mut self.compressed_reader, + count: &mut self.stream_size, + }; + if *reader.count == 0 { + let mut b = [0u8; 12]; + match reader.read(&mut b) { + Ok(0) => return Ok(0), + Err(e) => return Err(e), + _ => (), + } + if b[..6] != b"\xFD7zXZ\0"[..] { + return error("Invalid XZ header"); + } + self.flags = [b[6], b[7]]; + if self.flags[0] != 0 || self.flags[1] & 0xF0 != 0 { + return error("Invalid XZ stream flags"); + } + match self.flags[1] & 0x0F { + 0 => self.check_size = 0, + 1 => self.check_size = 4, + _ => return error("Unsupported XZ stream flags"), + } + let mut digest = Hasher::new(); + digest.update(&self.flags); + if digest.finalize().to_le_bytes() != b[8..] { + return error("Invalid XZ stream flags CRC32"); + } + } + + let block_begin = *reader.count; + let mut b = [0u8; 1]; + reader.read_exact(&mut b)?; + + let mut digest = Hasher::new(); + digest.update(&b); + if b[0] == 0 { + // index + let num_records = get_multibyte(&mut reader, &mut digest)?; + if num_records != self.records.len() as u64 { + return error("Invalid XZ index record count"); + } + for (unpadded_size, total) in &self.records { + if get_multibyte(&mut reader, &mut digest)? != *unpadded_size as u64 { + return error("Invalid XZ unpadded size"); + } + if get_multibyte(&mut reader, &mut digest)? != *total as u64 { + return error("Invalid XZ uncompressed size"); + } + } + let mut size = *reader.count - block_begin; + let mut b = vec![0u8; (4 - (size & 0x3)) & 0x3]; + reader.read_exact(b.as_mut_slice())?; + if !b.iter().all(|&b| b == 0) { + return error("Invalid XZ index padding"); + } + digest.update(b.as_slice()); + size += b.len(); + let mut b = [0u8; 16]; + reader.read_exact(&mut b)?; + if digest.finalize().to_le_bytes() != b[..4] { + return error("Invalid XZ index CRC32"); + } + let mut digest = Hasher::new(); + digest.update(&b[8..14]); + if digest.finalize().to_le_bytes() != b[4..8] { + return error("Invalid XZ footer CRC32"); + } + if b[8..12] != ((size >> 2) as u32).to_le_bytes() { + return error("Invalid XZ footer size"); + } + if self.flags != b[12..14] { + return error("Invalid XZ footer flags"); + } + if &b[14..16] != b"YZ" { + return error("Invalid XZ footer magic"); + } + let mut b = vec![0u8; (4 - (*reader.count & 0x3)) & 0x3]; + reader.read_exact(b.as_mut_slice())?; + if !b.iter().all(|&b| b == 0) { + return error("Invalid XZ footer padding"); + } + *reader.count = 0; + return self.read(buf); + } + + // block + let header_end = ((b[0] as usize) << 2) - 1 + *reader.count; + let mut b = [0u8; 1]; + reader.read_exact(&mut b)?; + digest.update(&b); + let flags = b[0]; + let num_filters = (flags & 0x03) + 1; + + if flags & 0x3C != 0 { + return error("Invalid XZ block flags"); + } + if flags & 0x40 != 0 { + get_multibyte(&mut reader, &mut digest)?; + } + if flags & 0x80 != 0 { + get_multibyte(&mut reader, &mut digest)?; + } + for _ in 0..num_filters { + let filter_id = get_multibyte(&mut reader, &mut digest)?; + if filter_id != 0x21 { + return error("Unsupported XZ filter ID"); + } + let properties_size = get_multibyte(&mut reader, &mut digest)?; + if properties_size != 1 { + return error("Unsupported XZ filter properties size"); + } + reader.read_exact(&mut b)?; + if b[0] & 0xC0 != 0 { + return error("Unsupported XZ filter properties"); + } + digest.update(&b); + } + let mut b = vec![0u8; header_end - *reader.count]; + reader.read_exact(b.as_mut_slice())?; + if !b.iter().all(|&b| b == 0) { + return error("Invalid XZ block header padding"); + } + digest.update(b.as_slice()); + + let mut b = [0u8; 4]; + reader.read_exact(&mut b)?; + if digest.finalize().to_le_bytes() != b { + return error("Invalid XZ block header CRC32"); + } + let mut written = 0; + let mut total = 0; + Lzma2Decoder::new().decompress( + &mut reader, + &mut BufWriter { + inner: buf, + written: &mut written, + rest: &mut self.buf, + total: &mut total, + }, + )?; + + let unpadded_size = *reader.count - block_begin; + self.records.push((unpadded_size, total)); + // ignore check here since zip itself will check it + let mut b = vec![0u8; ((4 - (unpadded_size & 0x3)) & 0x3) + self.check_size]; + reader.read_exact(b.as_mut_slice())?; + if !b.as_slice()[..self.check_size].iter().all(|&b| b == 0) { + return error("Invalid XZ block padding"); + } + Ok(written) + } +} + +impl XzDecoder { + pub fn into_inner(self) -> R { + self.compressed_reader.into_inner() + } +} diff --git a/src/types.rs b/src/types.rs index f1921cb4..91031a08 100644 --- a/src/types.rs +++ b/src/types.rs @@ -570,6 +570,8 @@ impl ZipFileData { CompressionMethod::Deflate64 => 21, #[cfg(feature = "lzma")] CompressionMethod::Lzma => 63, + #[cfg(feature = "xz")] + CompressionMethod::Xz => 63, // APPNOTE doesn't specify a version for Zstandard _ => DEFAULT_VERSION as u16, }; diff --git a/src/write.rs b/src/write.rs index 47fda291..4b354d87 100644 --- a/src/write.rs +++ b/src/write.rs @@ -174,7 +174,7 @@ pub(crate) mod zip_writer { #[doc(inline)] pub use self::sealed::FileOptionExtension; use crate::result::ZipError::InvalidArchive; -#[cfg(feature = "lzma")] +#[cfg(any(feature = "lzma", feature = "xz"))] use crate::result::ZipError::UnsupportedArchive; use crate::unstable::path_to_string; use crate::unstable::LittleEndianWriteExt; @@ -1703,6 +1703,10 @@ impl GenericZipWriter { CompressionMethod::Lzma => { Err(UnsupportedArchive("LZMA isn't supported for compression")) } + #[cfg(feature = "xz")] + CompressionMethod::Xz => { + Err(UnsupportedArchive("XZ isn't supported for compression")) + } CompressionMethod::Unsupported(..) => { Err(ZipError::UnsupportedArchive("Unsupported compression")) } diff --git a/tests/data/xz.zip b/tests/data/xz.zip new file mode 100644 index 0000000000000000000000000000000000000000..fcb7f33085febb049065a223c3e52021102d37bc GIT binary patch literal 196 zcmWIWW@h1HVBm;nc(LPZ#MK9u+ie&a7{504V^0 zzvfjDQ9$~?&`n1s21Q00AY0&k#2H2gZV!+?h4TEOoD`tpj50j2OWe=170s(+1ZiZ5 mj0*5(WD;S io::Result<()> { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/xz.zip")); + let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file"); + + let mut file = archive.by_name("hello.txt")?; + assert_eq!("hello.txt", file.name()); + + let mut content = Vec::new(); + file.read_to_end(&mut content)?; + assert_eq!("Hello world\n", String::from_utf8(content).unwrap()); + Ok(()) +} From 59630c00c6823713c50c1092928af3a318f29ec9 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 27 Jun 2024 15:37:45 -0400 Subject: [PATCH 2/4] fix dead code analysis warning --- src/read/stream.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/read/stream.rs b/src/read/stream.rs index 7bc91c9c..7fb76e70 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -220,6 +220,7 @@ mod test { } } + #[allow(dead_code)] #[derive(Default, Debug, Eq, PartialEq)] struct CounterVisitor(u64, u64); impl ZipStreamVisitor for CounterVisitor { From 4facc3ab544192bd72504e6fe2f02016e6487829 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 4 Jul 2024 10:52:04 -0700 Subject: [PATCH 3/4] test(fuzz): Use jemalloc for fuzz tests --- fuzz/Cargo.toml | 1 + fuzz/fuzz_targets/fuzz_read.rs | 5 +++++ fuzz/fuzz_targets/fuzz_write.rs | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 33c8933b..39c1969f 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -12,6 +12,7 @@ cargo-fuzz = true libfuzzer-sys = "0.4" arbitrary = { version = "1.3.2", features = ["derive"] } replace_with = "0.1.7" +tikv-jemallocator = "0.5.4" [dependencies.zip] path = ".." diff --git a/fuzz/fuzz_targets/fuzz_read.rs b/fuzz/fuzz_targets/fuzz_read.rs index a5c5a3ff..78fe670e 100644 --- a/fuzz/fuzz_targets/fuzz_read.rs +++ b/fuzz/fuzz_targets/fuzz_read.rs @@ -1,10 +1,15 @@ #![no_main] + use libfuzzer_sys::fuzz_target; use std::io::{Read, Seek, SeekFrom}; +use tikv_jemallocator::Jemalloc; use zip::read::read_zipfile_from_stream; const MAX_BYTES_TO_READ: u64 = 1 << 24; +#[global_allocator] +static GLOBAL: Jemalloc = Jemalloc; + fn decompress_all(data: &[u8]) -> Result<(), Box> { let reader = std::io::Cursor::new(data); let mut zip = zip::ZipArchive::new(reader)?; diff --git a/fuzz/fuzz_targets/fuzz_write.rs b/fuzz/fuzz_targets/fuzz_write.rs index 4fa9ce4f..414de08d 100755 --- a/fuzz/fuzz_targets/fuzz_write.rs +++ b/fuzz/fuzz_targets/fuzz_write.rs @@ -7,8 +7,12 @@ use libfuzzer_sys::fuzz_target; use replace_with::replace_with_or_abort; use std::io::{Cursor, Read, Seek, Write}; use std::path::PathBuf; +use tikv_jemallocator::Jemalloc; use zip::unstable::path_to_string; +#[global_allocator] +static GLOBAL: Jemalloc = Jemalloc; + #[derive(Arbitrary, Clone)] pub enum BasicFileOperation<'k> { WriteNormalFile { From b61fd1198e76eed67a28e7ff33058e2a96a7df57 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Sat, 6 Jul 2024 11:59:33 -0700 Subject: [PATCH 4/4] doc: Commit signing is no longer required Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- pull_request_template.md | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/pull_request_template.md b/pull_request_template.md index ad20cad4..329cb7f0 100644 --- a/pull_request_template.md +++ b/pull_request_template.md @@ -32,21 +32,6 @@ These are our requirements for PRs, in addition to the usual functionality and r explaining what the exception applies to and why it's needed. - Commit messages and the PR title must conform to [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) and start with one of the types specified by the [Angular convention](https://github.com/angular/angular/blob/22b96b9/CONTRIBUTING.md#type). -- All commits must be signed and display a "Verified" badge; see - https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification. - If any of your commits don't have a "Verified" badge, here's how to fix this: - 1. Generate a GPG key if you don't already have one, by following - https://docs.github.com/en/authentication/managing-commit-signature-verification/generating-a-new-gpg-key. - 2. If you use GitHub's email privacy feature, associate the key with your users.noreply.github.com email address by following - https://docs.github.com/en/authentication/managing-commit-signature-verification/associating-an-email-with-your-gpg-key. - 3. Configure Git to use your signing key by following - https://docs.github.com/en/authentication/managing-commit-signature-verification/telling-git-about-your-signing-key - 4. Add the key to your GitHub account by following - https://docs.github.com/en/authentication/managing-commit-signature-verification/adding-a-gpg-key-to-your-github-account - 5. Enable commit signing by following - https://docs.github.com/en/authentication/managing-commit-signature-verification/signing-commits - 6. Squash your PR into one commit or run `git commit --amend --no-edit`, because enabling commit signing isn't retroactive - even for unpushed commits. Thanks in advance for submitting a bug fix or proposed feature that meets these requirements! -->