Make large files more compressible

This commit is contained in:
Chris Hennick 2023-05-01 14:59:04 -07:00
parent a9aaea306e
commit 09d1ef2e3b
No known key found for this signature in database
GPG key ID: 25653935CC8B6C74
3 changed files with 47 additions and 31 deletions

View file

@ -109,4 +109,4 @@ jobs:
cargo fuzz build fuzz_write
- name: run fuzz
run: |
cargo fuzz run fuzz_write -- -timeout=1s -runs=1000000 -max_len=15000000000
cargo fuzz run fuzz_write -- -timeout=2m

View file

@ -11,6 +11,7 @@ cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
arbitrary = { version = "1.3.0", features = ["derive"] }
itertools = "0.10.5"
[dependencies.zip_next]
path = ".."

View file

@ -1,10 +1,10 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use arbitrary::{Arbitrary, Unstructured};
use arbitrary::size_hint::and_all;
use std::fmt::{Debug, Formatter};
use arbitrary::{Arbitrary};
use std::fmt::Debug;
use std::io::{Cursor, Read, Seek, Write};
use std::iter::{repeat, Flatten, Repeat, Take};
#[derive(Arbitrary,Debug)]
pub struct File {
@ -14,37 +14,44 @@ pub struct File {
const LARGE_FILE_BUF_SIZE: usize = u32::MAX as usize + 1;
#[derive(Arbitrary, Clone, Debug)]
pub enum RepeatedBytes {
Once(Vec<u8>),
U8Times {
bytes: Vec<u8>,
repeats: u8,
},
U16Times {
bytes: Vec<u8>,
repeats: u16,
}
}
impl IntoIterator for RepeatedBytes {
type Item = u8;
type IntoIter = Flatten<Take<Repeat<Vec<u8>>>>;
fn into_iter(self) -> Self::IntoIter {
match self {
RepeatedBytes::Once(bytes) => {
repeat(bytes).take(1)
},
RepeatedBytes::U8Times {bytes, repeats} => {
repeat(bytes).take(repeats as usize + 2)
},
RepeatedBytes::U16Times {bytes, repeats} => {
repeat(bytes).take(repeats as usize + u8::MAX as usize + 2)
}
}.flatten()
}
}
#[derive(Arbitrary,Debug)]
pub struct LargeFile {
pub name: String,
pub large_contents: Vec<u8>,
pub large_contents: Vec<Vec<RepeatedBytes>>,
pub extra_contents: Vec<Vec<u8>>
}
impl Arbitrary<'_> for LargeFile {
fn arbitrary(u: &mut Unstructured) -> arbitrary::Result<Self> {
Ok(LargeFile {
name: String::arbitrary(u)?,
large_contents: u.bytes(LARGE_FILE_BUF_SIZE)?.to_vec(),
extra_contents: Vec::arbitrary(u)?
})
}
fn size_hint(depth: usize) -> (usize, Option<usize>) {
and_all(&[<String as Arbitrary>::size_hint(depth),
<Vec<Vec<u8>> as Arbitrary>::size_hint(depth),
(LARGE_FILE_BUF_SIZE, Some(LARGE_FILE_BUF_SIZE))])
}
}
impl Debug for LargeFile {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LargeFile")
.field("name", &self.name)
.field("extra_contents", &self.extra_contents)
.finish()
}
}
#[derive(Arbitrary,Debug)]
pub enum FileOperation {
Write {
@ -92,7 +99,15 @@ fn do_operation<T>(writer: &mut zip_next::ZipWriter<T>,
FileOperation::WriteLarge {file, mut options} => {
options = options.large_file(true);
writer.start_file(file.name.to_owned(), options)?;
writer.write_all(&*file.large_contents)?;
let written: usize = 0;
while written < LARGE_FILE_BUF_SIZE {
for chunk in &file.large_contents {
let chunk: Vec<u8> = chunk.iter()
.flat_map(RepeatedBytes::into_iter)
.collect();
writer.write_all(chunk.as_slice())?;
}
}
for chunk in &file.extra_contents {
writer.write_all(chunk.as_slice())?;
}