Make large files more compressible
This commit is contained in:
parent
a9aaea306e
commit
09d1ef2e3b
3 changed files with 47 additions and 31 deletions
2
.github/workflows/ci.yaml
vendored
2
.github/workflows/ci.yaml
vendored
|
@ -109,4 +109,4 @@ jobs:
|
||||||
cargo fuzz build fuzz_write
|
cargo fuzz build fuzz_write
|
||||||
- name: run fuzz
|
- name: run fuzz
|
||||||
run: |
|
run: |
|
||||||
cargo fuzz run fuzz_write -- -timeout=1s -runs=1000000 -max_len=15000000000
|
cargo fuzz run fuzz_write -- -timeout=2m
|
||||||
|
|
|
@ -11,6 +11,7 @@ cargo-fuzz = true
|
||||||
[dependencies]
|
[dependencies]
|
||||||
libfuzzer-sys = "0.4"
|
libfuzzer-sys = "0.4"
|
||||||
arbitrary = { version = "1.3.0", features = ["derive"] }
|
arbitrary = { version = "1.3.0", features = ["derive"] }
|
||||||
|
itertools = "0.10.5"
|
||||||
|
|
||||||
[dependencies.zip_next]
|
[dependencies.zip_next]
|
||||||
path = ".."
|
path = ".."
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
#![no_main]
|
#![no_main]
|
||||||
|
|
||||||
use libfuzzer_sys::fuzz_target;
|
use libfuzzer_sys::fuzz_target;
|
||||||
use arbitrary::{Arbitrary, Unstructured};
|
use arbitrary::{Arbitrary};
|
||||||
use arbitrary::size_hint::and_all;
|
use std::fmt::Debug;
|
||||||
use std::fmt::{Debug, Formatter};
|
|
||||||
use std::io::{Cursor, Read, Seek, Write};
|
use std::io::{Cursor, Read, Seek, Write};
|
||||||
|
use std::iter::{repeat, Flatten, Repeat, Take};
|
||||||
|
|
||||||
#[derive(Arbitrary,Debug)]
|
#[derive(Arbitrary,Debug)]
|
||||||
pub struct File {
|
pub struct File {
|
||||||
|
@ -14,37 +14,44 @@ pub struct File {
|
||||||
|
|
||||||
const LARGE_FILE_BUF_SIZE: usize = u32::MAX as usize + 1;
|
const LARGE_FILE_BUF_SIZE: usize = u32::MAX as usize + 1;
|
||||||
|
|
||||||
|
#[derive(Arbitrary, Clone, Debug)]
|
||||||
|
pub enum RepeatedBytes {
|
||||||
|
Once(Vec<u8>),
|
||||||
|
U8Times {
|
||||||
|
bytes: Vec<u8>,
|
||||||
|
repeats: u8,
|
||||||
|
},
|
||||||
|
U16Times {
|
||||||
|
bytes: Vec<u8>,
|
||||||
|
repeats: u16,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IntoIterator for RepeatedBytes {
|
||||||
|
type Item = u8;
|
||||||
|
type IntoIter = Flatten<Take<Repeat<Vec<u8>>>>;
|
||||||
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
|
match self {
|
||||||
|
RepeatedBytes::Once(bytes) => {
|
||||||
|
repeat(bytes).take(1)
|
||||||
|
},
|
||||||
|
RepeatedBytes::U8Times {bytes, repeats} => {
|
||||||
|
repeat(bytes).take(repeats as usize + 2)
|
||||||
|
},
|
||||||
|
RepeatedBytes::U16Times {bytes, repeats} => {
|
||||||
|
repeat(bytes).take(repeats as usize + u8::MAX as usize + 2)
|
||||||
|
}
|
||||||
|
}.flatten()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Arbitrary,Debug)]
|
||||||
pub struct LargeFile {
|
pub struct LargeFile {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub large_contents: Vec<u8>,
|
pub large_contents: Vec<Vec<RepeatedBytes>>,
|
||||||
pub extra_contents: Vec<Vec<u8>>
|
pub extra_contents: Vec<Vec<u8>>
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Arbitrary<'_> for LargeFile {
|
|
||||||
fn arbitrary(u: &mut Unstructured) -> arbitrary::Result<Self> {
|
|
||||||
Ok(LargeFile {
|
|
||||||
name: String::arbitrary(u)?,
|
|
||||||
large_contents: u.bytes(LARGE_FILE_BUF_SIZE)?.to_vec(),
|
|
||||||
extra_contents: Vec::arbitrary(u)?
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn size_hint(depth: usize) -> (usize, Option<usize>) {
|
|
||||||
and_all(&[<String as Arbitrary>::size_hint(depth),
|
|
||||||
<Vec<Vec<u8>> as Arbitrary>::size_hint(depth),
|
|
||||||
(LARGE_FILE_BUF_SIZE, Some(LARGE_FILE_BUF_SIZE))])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Debug for LargeFile {
|
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
||||||
f.debug_struct("LargeFile")
|
|
||||||
.field("name", &self.name)
|
|
||||||
.field("extra_contents", &self.extra_contents)
|
|
||||||
.finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Arbitrary,Debug)]
|
#[derive(Arbitrary,Debug)]
|
||||||
pub enum FileOperation {
|
pub enum FileOperation {
|
||||||
Write {
|
Write {
|
||||||
|
@ -92,7 +99,15 @@ fn do_operation<T>(writer: &mut zip_next::ZipWriter<T>,
|
||||||
FileOperation::WriteLarge {file, mut options} => {
|
FileOperation::WriteLarge {file, mut options} => {
|
||||||
options = options.large_file(true);
|
options = options.large_file(true);
|
||||||
writer.start_file(file.name.to_owned(), options)?;
|
writer.start_file(file.name.to_owned(), options)?;
|
||||||
writer.write_all(&*file.large_contents)?;
|
let written: usize = 0;
|
||||||
|
while written < LARGE_FILE_BUF_SIZE {
|
||||||
|
for chunk in &file.large_contents {
|
||||||
|
let chunk: Vec<u8> = chunk.iter()
|
||||||
|
.flat_map(RepeatedBytes::into_iter)
|
||||||
|
.collect();
|
||||||
|
writer.write_all(chunk.as_slice())?;
|
||||||
|
}
|
||||||
|
}
|
||||||
for chunk in &file.extra_contents {
|
for chunk in &file.extra_contents {
|
||||||
writer.write_all(chunk.as_slice())?;
|
writer.write_all(chunk.as_slice())?;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue