From 09d1ef2e3b90d538373c5774dac1685d12133231 Mon Sep 17 00:00:00 2001 From: Chris Hennick Date: Mon, 1 May 2023 14:59:04 -0700 Subject: [PATCH] Make large files more compressible --- .github/workflows/ci.yaml | 2 +- fuzz/Cargo.toml | 1 + fuzz/fuzz_targets/fuzz_write.rs | 75 ++++++++++++++++++++------------- 3 files changed, 47 insertions(+), 31 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f01ab679..5972802f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -109,4 +109,4 @@ jobs: cargo fuzz build fuzz_write - name: run fuzz run: | - cargo fuzz run fuzz_write -- -timeout=1s -runs=1000000 -max_len=15000000000 + cargo fuzz run fuzz_write -- -timeout=2m diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index f63ff859..ce6b11fb 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -11,6 +11,7 @@ cargo-fuzz = true [dependencies] libfuzzer-sys = "0.4" arbitrary = { version = "1.3.0", features = ["derive"] } +itertools = "0.10.5" [dependencies.zip_next] path = ".." diff --git a/fuzz/fuzz_targets/fuzz_write.rs b/fuzz/fuzz_targets/fuzz_write.rs index 2c3bdc70..e56add51 100644 --- a/fuzz/fuzz_targets/fuzz_write.rs +++ b/fuzz/fuzz_targets/fuzz_write.rs @@ -1,10 +1,10 @@ #![no_main] use libfuzzer_sys::fuzz_target; -use arbitrary::{Arbitrary, Unstructured}; -use arbitrary::size_hint::and_all; -use std::fmt::{Debug, Formatter}; +use arbitrary::{Arbitrary}; +use std::fmt::Debug; use std::io::{Cursor, Read, Seek, Write}; +use std::iter::{repeat, Flatten, Repeat, Take}; #[derive(Arbitrary,Debug)] pub struct File { @@ -14,37 +14,44 @@ pub struct File { const LARGE_FILE_BUF_SIZE: usize = u32::MAX as usize + 1; +#[derive(Arbitrary, Clone, Debug)] +pub enum RepeatedBytes { + Once(Vec), + U8Times { + bytes: Vec, + repeats: u8, + }, + U16Times { + bytes: Vec, + repeats: u16, + } +} + +impl IntoIterator for RepeatedBytes { + type Item = u8; + type IntoIter = Flatten>>>; + fn into_iter(self) -> Self::IntoIter { + match self { + RepeatedBytes::Once(bytes) => { + repeat(bytes).take(1) + }, + RepeatedBytes::U8Times {bytes, repeats} => { + repeat(bytes).take(repeats as usize + 2) + }, + RepeatedBytes::U16Times {bytes, repeats} => { + repeat(bytes).take(repeats as usize + u8::MAX as usize + 2) + } + }.flatten() + } +} + +#[derive(Arbitrary,Debug)] pub struct LargeFile { pub name: String, - pub large_contents: Vec, + pub large_contents: Vec>, pub extra_contents: Vec> } -impl Arbitrary<'_> for LargeFile { - fn arbitrary(u: &mut Unstructured) -> arbitrary::Result { - Ok(LargeFile { - name: String::arbitrary(u)?, - large_contents: u.bytes(LARGE_FILE_BUF_SIZE)?.to_vec(), - extra_contents: Vec::arbitrary(u)? - }) - } - - fn size_hint(depth: usize) -> (usize, Option) { - and_all(&[::size_hint(depth), - > as Arbitrary>::size_hint(depth), - (LARGE_FILE_BUF_SIZE, Some(LARGE_FILE_BUF_SIZE))]) - } -} - -impl Debug for LargeFile { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("LargeFile") - .field("name", &self.name) - .field("extra_contents", &self.extra_contents) - .finish() - } -} - #[derive(Arbitrary,Debug)] pub enum FileOperation { Write { @@ -92,7 +99,15 @@ fn do_operation(writer: &mut zip_next::ZipWriter, FileOperation::WriteLarge {file, mut options} => { options = options.large_file(true); writer.start_file(file.name.to_owned(), options)?; - writer.write_all(&*file.large_contents)?; + let written: usize = 0; + while written < LARGE_FILE_BUF_SIZE { + for chunk in &file.large_contents { + let chunk: Vec = chunk.iter() + .flat_map(RepeatedBytes::into_iter) + .collect(); + writer.write_all(chunk.as_slice())?; + } + } for chunk in &file.extra_contents { writer.write_all(chunk.as_slice())?; }