Use sparse files that are mostly the same byte pattern

This commit is contained in:
Chris Hennick 2023-05-01 16:01:38 -07:00
parent 5220222f84
commit c18fae1f77
No known key found for this signature in database
GPG key ID: 25653935CC8B6C74
2 changed files with 19 additions and 48 deletions

View file

@ -109,4 +109,4 @@ jobs:
cargo fuzz build fuzz_write cargo fuzz build fuzz_write
- name: run fuzz - name: run fuzz
run: | run: |
cargo fuzz run fuzz_write -- -timeout=120 -runs=1000000 cargo fuzz run fuzz_write -- -timeout=300 -runs=1000000 -max_len=1000000

View file

@ -4,7 +4,7 @@ use libfuzzer_sys::fuzz_target;
use arbitrary::{Arbitrary}; use arbitrary::{Arbitrary};
use std::fmt::Debug; use std::fmt::Debug;
use std::io::{Cursor, Read, Seek, Write}; use std::io::{Cursor, Read, Seek, Write};
use std::iter::{repeat, Flatten, Repeat, Take}; use std::iter::{repeat};
#[derive(Arbitrary,Debug)] #[derive(Arbitrary,Debug)]
pub struct File { pub struct File {
@ -15,46 +15,18 @@ pub struct File {
const LARGE_FILE_BUF_SIZE: usize = u32::MAX as usize + 1; const LARGE_FILE_BUF_SIZE: usize = u32::MAX as usize + 1;
#[derive(Arbitrary, Clone, Debug)] #[derive(Arbitrary, Clone, Debug)]
pub enum RepeatedBytes { pub struct SparseFilePart {
Once { pub start: u32,
min_bytes: [u8; 1024], pub contents: Vec<u8>
extra_bytes: Vec<u8>
},
U8Times {
bytes: Vec<u8>,
repeats: u8,
},
U16Times {
bytes: Vec<u8>,
repeats: u16,
}
}
impl IntoIterator for RepeatedBytes {
type Item = u8;
type IntoIter = Flatten<Take<Repeat<Vec<u8>>>>;
fn into_iter(self) -> Self::IntoIter {
match self {
RepeatedBytes::Once {min_bytes, extra_bytes} => {
let mut bytes = min_bytes.to_vec();
bytes.extend(extra_bytes);
repeat(bytes).take(1)
},
RepeatedBytes::U8Times {bytes, repeats} => {
repeat(bytes).take(repeats as usize + 2)
},
RepeatedBytes::U16Times {bytes, repeats} => {
repeat(bytes).take(repeats as usize + u8::MAX as usize + 2)
}
}.flatten()
}
} }
#[derive(Arbitrary,Debug)] #[derive(Arbitrary,Debug)]
pub struct LargeFile { pub struct LargeFile {
pub name: String, pub name: String,
pub large_contents: Vec<Vec<RepeatedBytes>>, pub default_pattern_first_byte: u8,
pub extra_contents: Vec<Vec<u8>> pub default_pattern_extra_bytes: Vec<u8>,
pub parts: Vec<SparseFilePart>,
pub min_extra_length: u32
} }
#[derive(Arbitrary,Debug)] #[derive(Arbitrary,Debug)]
@ -104,19 +76,18 @@ fn do_operation<T>(writer: &mut zip_next::ZipWriter<T>,
FileOperation::WriteLarge {file, mut options} => { FileOperation::WriteLarge {file, mut options} => {
options = options.large_file(true); options = options.large_file(true);
writer.start_file(file.name.to_owned(), options)?; writer.start_file(file.name.to_owned(), options)?;
let mut written: usize = 0; let mut default_pattern = Vec::with_capacity(file.default_pattern_extra_bytes.len() + 1);
while written < LARGE_FILE_BUF_SIZE { default_pattern.push(file.default_pattern_first_byte);
for chunk in &file.large_contents { default_pattern.extend(&file.default_pattern_extra_bytes);
let chunk: Vec<u8> = chunk.to_owned().into_iter() let mut sparse_file: Vec<u8> =
.flat_map(RepeatedBytes::into_iter) repeat(default_pattern.into_iter()).flatten().take(LARGE_FILE_BUF_SIZE + file.min_extra_length as usize)
.collect(); .collect();
written += chunk.len(); for part in &file.parts {
writer.write_all(chunk.as_slice())?; for (index, byte) in part.contents.iter().enumerate() {
sparse_file[part.start as usize + index] = *byte;
} }
} }
for chunk in &file.extra_contents { writer.write_all(sparse_file.as_slice())?;
writer.write_all(chunk.as_slice())?;
}
} }
FileOperation::ShallowCopy {base, new_name} => { FileOperation::ShallowCopy {base, new_name} => {
do_operation(writer, base)?; do_operation(writer, base)?;