Make large files more compressible
This commit is contained in:
parent
a9aaea306e
commit
09d1ef2e3b
3 changed files with 47 additions and 31 deletions
2
.github/workflows/ci.yaml
vendored
2
.github/workflows/ci.yaml
vendored
|
@ -109,4 +109,4 @@ jobs:
|
|||
cargo fuzz build fuzz_write
|
||||
- name: run fuzz
|
||||
run: |
|
||||
cargo fuzz run fuzz_write -- -timeout=1s -runs=1000000 -max_len=15000000000
|
||||
cargo fuzz run fuzz_write -- -timeout=2m
|
||||
|
|
|
@ -11,6 +11,7 @@ cargo-fuzz = true
|
|||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
arbitrary = { version = "1.3.0", features = ["derive"] }
|
||||
itertools = "0.10.5"
|
||||
|
||||
[dependencies.zip_next]
|
||||
path = ".."
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
#![no_main]
|
||||
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use arbitrary::{Arbitrary, Unstructured};
|
||||
use arbitrary::size_hint::and_all;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use arbitrary::{Arbitrary};
|
||||
use std::fmt::Debug;
|
||||
use std::io::{Cursor, Read, Seek, Write};
|
||||
use std::iter::{repeat, Flatten, Repeat, Take};
|
||||
|
||||
#[derive(Arbitrary,Debug)]
|
||||
pub struct File {
|
||||
|
@ -14,37 +14,44 @@ pub struct File {
|
|||
|
||||
const LARGE_FILE_BUF_SIZE: usize = u32::MAX as usize + 1;
|
||||
|
||||
#[derive(Arbitrary, Clone, Debug)]
|
||||
pub enum RepeatedBytes {
|
||||
Once(Vec<u8>),
|
||||
U8Times {
|
||||
bytes: Vec<u8>,
|
||||
repeats: u8,
|
||||
},
|
||||
U16Times {
|
||||
bytes: Vec<u8>,
|
||||
repeats: u16,
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for RepeatedBytes {
|
||||
type Item = u8;
|
||||
type IntoIter = Flatten<Take<Repeat<Vec<u8>>>>;
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
match self {
|
||||
RepeatedBytes::Once(bytes) => {
|
||||
repeat(bytes).take(1)
|
||||
},
|
||||
RepeatedBytes::U8Times {bytes, repeats} => {
|
||||
repeat(bytes).take(repeats as usize + 2)
|
||||
},
|
||||
RepeatedBytes::U16Times {bytes, repeats} => {
|
||||
repeat(bytes).take(repeats as usize + u8::MAX as usize + 2)
|
||||
}
|
||||
}.flatten()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Arbitrary,Debug)]
|
||||
pub struct LargeFile {
|
||||
pub name: String,
|
||||
pub large_contents: Vec<u8>,
|
||||
pub large_contents: Vec<Vec<RepeatedBytes>>,
|
||||
pub extra_contents: Vec<Vec<u8>>
|
||||
}
|
||||
|
||||
impl Arbitrary<'_> for LargeFile {
|
||||
fn arbitrary(u: &mut Unstructured) -> arbitrary::Result<Self> {
|
||||
Ok(LargeFile {
|
||||
name: String::arbitrary(u)?,
|
||||
large_contents: u.bytes(LARGE_FILE_BUF_SIZE)?.to_vec(),
|
||||
extra_contents: Vec::arbitrary(u)?
|
||||
})
|
||||
}
|
||||
|
||||
fn size_hint(depth: usize) -> (usize, Option<usize>) {
|
||||
and_all(&[<String as Arbitrary>::size_hint(depth),
|
||||
<Vec<Vec<u8>> as Arbitrary>::size_hint(depth),
|
||||
(LARGE_FILE_BUF_SIZE, Some(LARGE_FILE_BUF_SIZE))])
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for LargeFile {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("LargeFile")
|
||||
.field("name", &self.name)
|
||||
.field("extra_contents", &self.extra_contents)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Arbitrary,Debug)]
|
||||
pub enum FileOperation {
|
||||
Write {
|
||||
|
@ -92,7 +99,15 @@ fn do_operation<T>(writer: &mut zip_next::ZipWriter<T>,
|
|||
FileOperation::WriteLarge {file, mut options} => {
|
||||
options = options.large_file(true);
|
||||
writer.start_file(file.name.to_owned(), options)?;
|
||||
writer.write_all(&*file.large_contents)?;
|
||||
let written: usize = 0;
|
||||
while written < LARGE_FILE_BUF_SIZE {
|
||||
for chunk in &file.large_contents {
|
||||
let chunk: Vec<u8> = chunk.iter()
|
||||
.flat_map(RepeatedBytes::into_iter)
|
||||
.collect();
|
||||
writer.write_all(chunk.as_slice())?;
|
||||
}
|
||||
}
|
||||
for chunk in &file.extra_contents {
|
||||
writer.write_all(chunk.as_slice())?;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue