Merge pull request #69 from zip-rs/oldpr369

feat: Add by_name_seek() for Stored zips
This commit is contained in:
Chris Hennick 2024-07-17 17:25:19 +00:00 committed by GitHub
commit 5632e7f25a
Signed by: DevComp
GPG key ID: B5690EEEBB952194
3 changed files with 259 additions and 81 deletions

View file

@ -2,9 +2,9 @@
use arbitrary::Arbitrary;
use core::fmt::{Debug, Formatter};
use std::borrow::Cow;
use libfuzzer_sys::fuzz_target;
use replace_with::replace_with_or_abort;
use std::borrow::Cow;
use std::io::{Cursor, Read, Seek, Write};
use std::path::PathBuf;
use tikv_jemallocator::Jemalloc;
@ -27,16 +27,16 @@ pub enum BasicFileOperation<'k> {
ShallowCopy(Box<FileOperation<'k>>),
DeepCopy(Box<FileOperation<'k>>),
MergeWithOtherFile {
operations: Box<[(FileOperation<'k>, bool)]>
operations: Box<[(FileOperation<'k>, bool)]>,
},
SetArchiveComment(Box<[u8]>)
SetArchiveComment(Box<[u8]>),
}
#[derive(Arbitrary, Clone, Debug, Eq, PartialEq)]
pub enum ReopenOption {
DoNotReopen,
ViaFinish,
ViaFinishIntoReadable
ViaFinishIntoReadable,
}
#[derive(Arbitrary, Clone)]
@ -47,78 +47,105 @@ pub struct FileOperation<'k> {
// 'abort' flag is separate, to prevent trying to copy an aborted file
}
impl <'k> FileOperation<'k> {
impl<'k> FileOperation<'k> {
fn get_path(&self) -> Option<Cow<PathBuf>> {
match &self.basic {
BasicFileOperation::SetArchiveComment(_) => None,
BasicFileOperation::WriteDirectory(_) => Some(Cow::Owned(self.path.join("/"))),
BasicFileOperation::MergeWithOtherFile { operations } =>
operations.iter().flat_map(|(op, abort)| if !abort { op.get_path() } else { None }).next(),
_ => Some(Cow::Borrowed(&self.path))
BasicFileOperation::MergeWithOtherFile { operations } => operations
.iter()
.flat_map(|(op, abort)| if !abort { op.get_path() } else { None })
.next(),
_ => Some(Cow::Borrowed(&self.path)),
}
}
}
impl <'k> Debug for FileOperation<'k> {
impl<'k> Debug for FileOperation<'k> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match &self.basic {
BasicFileOperation::WriteNormalFile {contents, options} => {
f.write_fmt(format_args!("let options = {:?};\n\
writer.start_file_from_path({:?}, options)?;\n", options, self.path))?;
BasicFileOperation::WriteNormalFile { contents, options } => {
f.write_fmt(format_args!(
"let options = {:?};\n\
writer.start_file_from_path({:?}, options)?;\n",
options, self.path
))?;
for content_slice in contents {
f.write_fmt(format_args!("writer.write_all(&({:?}))?;\n", content_slice))?;
}
},
}
BasicFileOperation::WriteDirectory(options) => {
f.write_fmt(format_args!("let options = {:?};\n\
f.write_fmt(format_args!(
"let options = {:?};\n\
writer.add_directory_from_path({:?}, options)?;\n",
options, self.path))?;
},
BasicFileOperation::WriteSymlinkWithTarget {target, options} => {
f.write_fmt(format_args!("let options = {:?};\n\
options, self.path
))?;
}
BasicFileOperation::WriteSymlinkWithTarget { target, options } => {
f.write_fmt(format_args!(
"let options = {:?};\n\
writer.add_symlink_from_path({:?}, {:?}, options)?;\n",
options, self.path, target.to_owned()))?;
},
options,
self.path,
target.to_owned()
))?;
}
BasicFileOperation::ShallowCopy(base) => {
let Some(base_path) = base.get_path() else {
return Ok(())
return Ok(());
};
f.write_fmt(format_args!("{:?}writer.shallow_copy_file_from_path({:?}, {:?})?;\n", base, base_path, self.path))?;
},
f.write_fmt(format_args!(
"{:?}writer.shallow_copy_file_from_path({:?}, {:?})?;\n",
base, base_path, self.path
))?;
}
BasicFileOperation::DeepCopy(base) => {
let Some(base_path) = base.get_path() else {
return Ok(())
return Ok(());
};
f.write_fmt(format_args!("{:?}writer.deep_copy_file_from_path({:?}, {:?})?;\n", base, base_path, self.path))?;
},
BasicFileOperation::MergeWithOtherFile {operations} => {
f.write_str("let sub_writer = {\n\
f.write_fmt(format_args!(
"{:?}writer.deep_copy_file_from_path({:?}, {:?})?;\n",
base, base_path, self.path
))?;
}
BasicFileOperation::MergeWithOtherFile { operations } => {
f.write_str(
"let sub_writer = {\n\
let mut writer = ZipWriter::new(Cursor::new(Vec::new()));\n\
writer.set_flush_on_finish_file(false);\n")?;
operations.iter().map(|op| {
f.write_fmt(format_args!("{:?}", op.0))?;
if op.1 {
f.write_str("writer.abort_file()?;\n")
} else {
Ok(())
}
}).collect::<Result<(), _>>()?;
f.write_str("writer\n\
writer.set_flush_on_finish_file(false);\n",
)?;
operations
.iter()
.map(|op| {
f.write_fmt(format_args!("{:?}", op.0))?;
if op.1 {
f.write_str("writer.abort_file()?;\n")
} else {
Ok(())
}
})
.collect::<Result<(), _>>()?;
f.write_str(
"writer\n\
};\n\
writer.merge_archive(sub_writer.finish_into_readable()?)?;\n")?;
},
writer.merge_archive(sub_writer.finish_into_readable()?)?;\n",
)?;
}
BasicFileOperation::SetArchiveComment(comment) => {
f.write_fmt(format_args!("writer.set_raw_comment({:?}.into());\n", comment))?;
f.write_fmt(format_args!(
"writer.set_raw_comment({:?}.into());\n",
comment
))?;
}
}
match &self.reopen {
ReopenOption::DoNotReopen => Ok(()),
ReopenOption::ViaFinish => {
f.write_str("writer = ZipWriter::new_append(writer.finish()?)?;\n")
},
ReopenOption::ViaFinishIntoReadable => {
f.write_str("writer = ZipWriter::new_append(writer.finish_into_readable()?.into_inner())?;\n")
}
ReopenOption::ViaFinishIntoReadable => f.write_str(
"writer = ZipWriter::new_append(writer.finish_into_readable()?.into_inner())?;\n",
),
}
}
}
@ -129,19 +156,23 @@ pub struct FuzzTestCase<'k> {
flush_on_finish_file: bool,
}
impl <'k> Debug for FuzzTestCase<'k> {
impl<'k> Debug for FuzzTestCase<'k> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"let mut writer = ZipWriter::new(Cursor::new(Vec::new()));\n\
writer.set_flush_on_finish_file({:?});\n", self.flush_on_finish_file))?;
self.operations.iter().map(|op| {
f.write_fmt(format_args!("{:?}", op.0))?;
if op.1 {
f.write_str("writer.abort_file()?;\n")
} else {
Ok(())
}
})
writer.set_flush_on_finish_file({:?});\n",
self.flush_on_finish_file
))?;
self.operations
.iter()
.map(|op| {
f.write_fmt(format_args!("{:?}", op.0))?;
if op.1 {
f.write_str("writer.abort_file()?;\n")
} else {
Ok(())
}
})
.collect::<Result<(), _>>()?;
f.write_str("writer\n")
}
@ -154,8 +185,8 @@ fn deduplicate_paths(copy: &mut Cow<PathBuf>, original: &PathBuf) {
let mut new_name = name.to_owned();
new_name.push("_copy");
copy.with_file_name(new_name)
},
None => copy.with_file_name("copy")
}
None => copy.with_file_name("copy"),
};
*copy = Cow::Owned(new_path);
}
@ -166,7 +197,7 @@ fn do_operation<'k, T>(
operation: &FileOperation<'k>,
abort: bool,
flush_on_finish_file: bool,
files_added: &mut usize
files_added: &mut usize,
) -> Result<(), Box<dyn std::error::Error>>
where
T: Read + Write + Seek,
@ -175,9 +206,7 @@ where
let mut path = Cow::Borrowed(&operation.path);
match &operation.basic {
BasicFileOperation::WriteNormalFile {
contents,
options,
..
contents, options, ..
} => {
let uncompressed_size = contents.iter().map(|chunk| chunk.len()).sum::<usize>();
let mut options = (*options).to_owned();
@ -225,12 +254,12 @@ where
&operation,
*abort,
false,
&mut inner_files_added
&mut inner_files_added,
);
});
writer.merge_archive(other_writer.finish_into_readable()?)?;
*files_added += inner_files_added;
},
}
BasicFileOperation::SetArchiveComment(comment) => {
writer.set_raw_comment(comment.clone());
}
@ -250,14 +279,15 @@ where
zip::ZipWriter::new_append(old_writer.finish().unwrap()).unwrap()
});
assert!(writer.get_raw_comment().starts_with(&old_comment));
},
}
ReopenOption::ViaFinishIntoReadable => {
let old_comment = writer.get_raw_comment().to_owned();
replace_with_or_abort(writer, |old_writer: zip::ZipWriter<T>| {
zip::ZipWriter::new_append(old_writer.finish_into_readable().unwrap().into_inner()).unwrap()
zip::ZipWriter::new_append(old_writer.finish_into_readable().unwrap().into_inner())
.unwrap()
});
assert!(writer.get_raw_comment().starts_with(&old_comment));
},
}
}
Ok(())
}
@ -279,7 +309,7 @@ fuzz_target!(|test_case: FuzzTestCase| {
&operation,
*abort,
test_case.flush_on_finish_file,
&mut files_added
&mut files_added,
);
}
if final_reopen {

View file

@ -30,6 +30,7 @@
#![warn(missing_docs)]
#![allow(unexpected_cfgs)] // Needed for cfg(fuzzing) on nightly as of 2024-05-06
pub use crate::compression::{CompressionMethod, SUPPORTED_COMPRESSION_METHODS};
pub use crate::read::HasZipMetadata;
pub use crate::read::ZipArchive;
pub use crate::types::{AesMode, DateTime};
pub use crate::write::ZipWriter;

View file

@ -101,6 +101,7 @@ pub(crate) mod zip_archive {
/// ```no_run
/// use std::io::prelude::*;
/// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> {
/// use zip::HasZipMetadata;
/// let mut zip = zip::ZipArchive::new(reader)?;
///
/// for i in 0..zip.len() {
@ -337,6 +338,70 @@ pub struct ZipFile<'a> {
pub(crate) reader: ZipFileReader<'a>,
}
/// A struct for reading and seeking a zip file
pub struct ZipFileSeek<'a, R> {
data: Cow<'a, ZipFileData>,
reader: ZipFileSeekReader<'a, R>,
}
enum ZipFileSeekReader<'a, R> {
Raw(SeekableTake<'a, R>),
}
struct SeekableTake<'a, R> {
inner: &'a mut R,
inner_starting_offset: u64,
length: u64,
current_offset: u64,
}
impl<'a, R: Seek> SeekableTake<'a, R> {
pub fn new(inner: &'a mut R, length: u64) -> io::Result<Self> {
let inner_starting_offset = inner.stream_position()?;
Ok(Self {
inner,
inner_starting_offset,
length,
current_offset: 0,
})
}
}
impl<'a, R: Seek> Seek for SeekableTake<'a, R> {
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
let offset = match pos {
SeekFrom::Start(offset) => Some(offset),
SeekFrom::End(offset) => self.length.checked_add_signed(offset),
SeekFrom::Current(offset) => self.current_offset.checked_add_signed(offset),
};
match offset {
None => Err(io::Error::new(
io::ErrorKind::InvalidInput,
"invalid seek to a negative or overflowing position",
)),
Some(offset) => {
let clamped_offset = std::cmp::min(self.length, offset);
let new_inner_offset = self
.inner
.seek(SeekFrom::Start(self.inner_starting_offset + clamped_offset))?;
self.current_offset = new_inner_offset - self.inner_starting_offset;
Ok(new_inner_offset)
}
}
}
}
impl<'a, R: Read> Read for SeekableTake<'a, R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let written = self
.inner
.take(self.length - self.current_offset)
.read(buf)?;
self.current_offset += written as u64;
Ok(written)
}
}
pub(crate) fn find_content<'a>(
data: &ZipFileData,
reader: &'a mut (impl Read + Seek),
@ -351,6 +416,18 @@ pub(crate) fn find_content<'a>(
Ok((reader as &mut dyn Read).take(data.compressed_size))
}
fn find_content_seek<'a, R: Read + Seek>(
data: &ZipFileData,
reader: &'a mut R,
) -> ZipResult<SeekableTake<'a, R>> {
// Parse local header
let data_start = find_data_start(data, reader)?;
reader.seek(io::SeekFrom::Start(data_start))?;
// Explicit Ok and ? are needed to convert io::Error to ZipError
Ok(SeekableTake::new(reader, data.compressed_size)?)
}
fn find_data_start(
data: &ZipFileData,
reader: &mut (impl Read + Seek + Sized),
@ -1161,6 +1238,36 @@ impl<R: Read + Seek> ZipArchive<R> {
.map(|(name, _)| name.as_ref())
}
/// Search for a file entry by name and return a seekable object.
pub fn by_name_seek(&mut self, name: &str) -> ZipResult<ZipFileSeek<R>> {
self.by_index_seek(self.index_for_name(name).ok_or(ZipError::FileNotFound)?)
}
/// Search for a file entry by index and return a seekable object.
pub fn by_index_seek(&mut self, index: usize) -> ZipResult<ZipFileSeek<R>> {
let reader = &mut self.reader;
self.shared
.files
.get_index(index)
.ok_or(ZipError::FileNotFound)
.and_then(move |(_, data)| {
let seek_reader = match data.compression_method {
CompressionMethod::Stored => {
ZipFileSeekReader::Raw(find_content_seek(data, reader)?)
}
_ => {
return Err(ZipError::UnsupportedArchive(
"Seekable compressed files are not yet supported",
))
}
};
Ok(ZipFileSeek {
reader: seek_reader,
data: Cow::Borrowed(data),
})
})
}
fn by_name_with_optional_password<'a>(
&'a mut self,
name: &str,
@ -1532,6 +1639,12 @@ pub(crate) fn parse_single_extra_field<R: Read>(
Ok(false)
}
/// A trait for exposing file metadata inside the zip.
pub trait HasZipMetadata {
/// Get the file metadata
fn get_metadata(&self) -> &ZipFileData;
}
/// Methods for retrieving information on zip files
impl<'a> ZipFile<'a> {
fn get_reader(&mut self) -> ZipResult<&mut ZipFileReader<'a>> {
@ -1554,8 +1667,8 @@ impl<'a> ZipFile<'a> {
/// Get the version of the file
pub fn version_made_by(&self) -> (u8, u8) {
(
self.data.version_made_by / 10,
self.data.version_made_by % 10,
self.get_metadata().version_made_by / 10,
self.get_metadata().version_made_by % 10,
)
}
@ -1572,14 +1685,14 @@ impl<'a> ZipFile<'a> {
/// You can use the [`ZipFile::enclosed_name`] method to validate the name
/// as a safe path.
pub fn name(&self) -> &str {
&self.data.file_name
&self.get_metadata().file_name
}
/// Get the name of the file, in the raw (internal) byte representation.
///
/// The encoding of this data is currently undefined.
pub fn name_raw(&self) -> &[u8] {
&self.data.file_name_raw
&self.get_metadata().file_name_raw
}
/// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte,
@ -1606,7 +1719,7 @@ impl<'a> ZipFile<'a> {
///
/// [`ParentDir`]: `Component::ParentDir`
pub fn mangled_name(&self) -> PathBuf {
self.data.file_name_sanitized()
self.get_metadata().file_name_sanitized()
}
/// Ensure the file path is safe to use as a [`Path`].
@ -1620,27 +1733,27 @@ impl<'a> ZipFile<'a> {
/// to path-based exploits. It is recommended over
/// [`ZipFile::mangled_name`].
pub fn enclosed_name(&self) -> Option<PathBuf> {
self.data.enclosed_name()
self.get_metadata().enclosed_name()
}
/// Get the comment of the file
pub fn comment(&self) -> &str {
&self.data.file_comment
&self.get_metadata().file_comment
}
/// Get the compression method used to store the file
pub fn compression(&self) -> CompressionMethod {
self.data.compression_method
self.get_metadata().compression_method
}
/// Get the size of the file, in bytes, in the archive
pub fn compressed_size(&self) -> u64 {
self.data.compressed_size
self.get_metadata().compressed_size
}
/// Get the size of the file, in bytes, when uncompressed
pub fn size(&self) -> u64 {
self.data.uncompressed_size
self.get_metadata().uncompressed_size
}
/// Get the time the file was last modified
@ -1665,17 +1778,20 @@ impl<'a> ZipFile<'a> {
/// Get unix mode for the file
pub fn unix_mode(&self) -> Option<u32> {
self.data.unix_mode()
self.get_metadata().unix_mode()
}
/// Get the CRC32 hash of the original file
pub fn crc32(&self) -> u32 {
self.data.crc32
self.get_metadata().crc32
}
/// Get the extra data of the zip header for this file
pub fn extra_data(&self) -> Option<&[u8]> {
self.data.extra_field.as_ref().map(|v| v.deref().deref())
self.get_metadata()
.extra_field
.as_ref()
.map(|v| v.deref().deref())
}
/// Get the starting offset of the data of the compressed file
@ -1685,19 +1801,28 @@ impl<'a> ZipFile<'a> {
/// Get the starting offset of the zip header for this file
pub fn header_start(&self) -> u64 {
self.data.header_start
self.get_metadata().header_start
}
/// Get the starting offset of the zip header in the central directory for this file
pub fn central_header_start(&self) -> u64 {
self.data.central_header_start
self.get_metadata().central_header_start
}
}
/// Methods for retrieving information on zip files
impl<'a> ZipFile<'a> {
/// iterate through all extra fields
pub fn extra_data_fields(&self) -> impl Iterator<Item = &ExtraField> {
self.data.extra_fields.iter()
}
}
impl<'a> HasZipMetadata for ZipFile<'a> {
fn get_metadata(&self) -> &ZipFileData {
self.data.as_ref()
}
}
impl<'a> Read for ZipFile<'a> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.get_reader()?.read(buf)
@ -1716,6 +1841,28 @@ impl<'a> Read for ZipFile<'a> {
}
}
impl<'a, R: Read> Read for ZipFileSeek<'a, R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
match &mut self.reader {
ZipFileSeekReader::Raw(r) => r.read(buf),
}
}
}
impl<'a, R: Seek> Seek for ZipFileSeek<'a, R> {
fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
match &mut self.reader {
ZipFileSeekReader::Raw(r) => r.seek(pos),
}
}
}
impl<'a, R> HasZipMetadata for ZipFileSeek<'a, R> {
fn get_metadata(&self) -> &ZipFileData {
self.data.as_ref()
}
}
impl<'a> Drop for ZipFile<'a> {
fn drop(&mut self) {
// self.data is Owned, this reader is constructed by a streaming reader.