diff --git a/fuzz/fuzz_targets/fuzz_write.rs b/fuzz/fuzz_targets/fuzz_write.rs index 414de08d..fa281a4f 100755 --- a/fuzz/fuzz_targets/fuzz_write.rs +++ b/fuzz/fuzz_targets/fuzz_write.rs @@ -2,9 +2,9 @@ use arbitrary::Arbitrary; use core::fmt::{Debug, Formatter}; -use std::borrow::Cow; use libfuzzer_sys::fuzz_target; use replace_with::replace_with_or_abort; +use std::borrow::Cow; use std::io::{Cursor, Read, Seek, Write}; use std::path::PathBuf; use tikv_jemallocator::Jemalloc; @@ -27,16 +27,16 @@ pub enum BasicFileOperation<'k> { ShallowCopy(Box>), DeepCopy(Box>), MergeWithOtherFile { - operations: Box<[(FileOperation<'k>, bool)]> + operations: Box<[(FileOperation<'k>, bool)]>, }, - SetArchiveComment(Box<[u8]>) + SetArchiveComment(Box<[u8]>), } #[derive(Arbitrary, Clone, Debug, Eq, PartialEq)] pub enum ReopenOption { DoNotReopen, ViaFinish, - ViaFinishIntoReadable + ViaFinishIntoReadable, } #[derive(Arbitrary, Clone)] @@ -47,78 +47,105 @@ pub struct FileOperation<'k> { // 'abort' flag is separate, to prevent trying to copy an aborted file } -impl <'k> FileOperation<'k> { +impl<'k> FileOperation<'k> { fn get_path(&self) -> Option> { match &self.basic { BasicFileOperation::SetArchiveComment(_) => None, BasicFileOperation::WriteDirectory(_) => Some(Cow::Owned(self.path.join("/"))), - BasicFileOperation::MergeWithOtherFile { operations } => - operations.iter().flat_map(|(op, abort)| if !abort { op.get_path() } else { None }).next(), - _ => Some(Cow::Borrowed(&self.path)) + BasicFileOperation::MergeWithOtherFile { operations } => operations + .iter() + .flat_map(|(op, abort)| if !abort { op.get_path() } else { None }) + .next(), + _ => Some(Cow::Borrowed(&self.path)), } } } -impl <'k> Debug for FileOperation<'k> { +impl<'k> Debug for FileOperation<'k> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match &self.basic { - BasicFileOperation::WriteNormalFile {contents, options} => { - f.write_fmt(format_args!("let options = {:?};\n\ - writer.start_file_from_path({:?}, options)?;\n", options, self.path))?; + BasicFileOperation::WriteNormalFile { contents, options } => { + f.write_fmt(format_args!( + "let options = {:?};\n\ + writer.start_file_from_path({:?}, options)?;\n", + options, self.path + ))?; for content_slice in contents { f.write_fmt(format_args!("writer.write_all(&({:?}))?;\n", content_slice))?; } - }, + } BasicFileOperation::WriteDirectory(options) => { - f.write_fmt(format_args!("let options = {:?};\n\ + f.write_fmt(format_args!( + "let options = {:?};\n\ writer.add_directory_from_path({:?}, options)?;\n", - options, self.path))?; - }, - BasicFileOperation::WriteSymlinkWithTarget {target, options} => { - f.write_fmt(format_args!("let options = {:?};\n\ + options, self.path + ))?; + } + BasicFileOperation::WriteSymlinkWithTarget { target, options } => { + f.write_fmt(format_args!( + "let options = {:?};\n\ writer.add_symlink_from_path({:?}, {:?}, options)?;\n", - options, self.path, target.to_owned()))?; - }, + options, + self.path, + target.to_owned() + ))?; + } BasicFileOperation::ShallowCopy(base) => { let Some(base_path) = base.get_path() else { - return Ok(()) + return Ok(()); }; - f.write_fmt(format_args!("{:?}writer.shallow_copy_file_from_path({:?}, {:?})?;\n", base, base_path, self.path))?; - }, + f.write_fmt(format_args!( + "{:?}writer.shallow_copy_file_from_path({:?}, {:?})?;\n", + base, base_path, self.path + ))?; + } BasicFileOperation::DeepCopy(base) => { let Some(base_path) = base.get_path() else { - return Ok(()) + return Ok(()); }; - f.write_fmt(format_args!("{:?}writer.deep_copy_file_from_path({:?}, {:?})?;\n", base, base_path, self.path))?; - }, - BasicFileOperation::MergeWithOtherFile {operations} => { - f.write_str("let sub_writer = {\n\ + f.write_fmt(format_args!( + "{:?}writer.deep_copy_file_from_path({:?}, {:?})?;\n", + base, base_path, self.path + ))?; + } + BasicFileOperation::MergeWithOtherFile { operations } => { + f.write_str( + "let sub_writer = {\n\ let mut writer = ZipWriter::new(Cursor::new(Vec::new()));\n\ - writer.set_flush_on_finish_file(false);\n")?; - operations.iter().map(|op| { - f.write_fmt(format_args!("{:?}", op.0))?; - if op.1 { - f.write_str("writer.abort_file()?;\n") - } else { - Ok(()) - } - }).collect::>()?; - f.write_str("writer\n\ + writer.set_flush_on_finish_file(false);\n", + )?; + operations + .iter() + .map(|op| { + f.write_fmt(format_args!("{:?}", op.0))?; + if op.1 { + f.write_str("writer.abort_file()?;\n") + } else { + Ok(()) + } + }) + .collect::>()?; + f.write_str( + "writer\n\ };\n\ - writer.merge_archive(sub_writer.finish_into_readable()?)?;\n")?; - }, + writer.merge_archive(sub_writer.finish_into_readable()?)?;\n", + )?; + } BasicFileOperation::SetArchiveComment(comment) => { - f.write_fmt(format_args!("writer.set_raw_comment({:?}.into());\n", comment))?; + f.write_fmt(format_args!( + "writer.set_raw_comment({:?}.into());\n", + comment + ))?; } } match &self.reopen { ReopenOption::DoNotReopen => Ok(()), ReopenOption::ViaFinish => { f.write_str("writer = ZipWriter::new_append(writer.finish()?)?;\n") - }, - ReopenOption::ViaFinishIntoReadable => { - f.write_str("writer = ZipWriter::new_append(writer.finish_into_readable()?.into_inner())?;\n") } + ReopenOption::ViaFinishIntoReadable => f.write_str( + "writer = ZipWriter::new_append(writer.finish_into_readable()?.into_inner())?;\n", + ), } } } @@ -129,19 +156,23 @@ pub struct FuzzTestCase<'k> { flush_on_finish_file: bool, } -impl <'k> Debug for FuzzTestCase<'k> { +impl<'k> Debug for FuzzTestCase<'k> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!( "let mut writer = ZipWriter::new(Cursor::new(Vec::new()));\n\ - writer.set_flush_on_finish_file({:?});\n", self.flush_on_finish_file))?; - self.operations.iter().map(|op| { - f.write_fmt(format_args!("{:?}", op.0))?; - if op.1 { - f.write_str("writer.abort_file()?;\n") - } else { - Ok(()) - } - }) + writer.set_flush_on_finish_file({:?});\n", + self.flush_on_finish_file + ))?; + self.operations + .iter() + .map(|op| { + f.write_fmt(format_args!("{:?}", op.0))?; + if op.1 { + f.write_str("writer.abort_file()?;\n") + } else { + Ok(()) + } + }) .collect::>()?; f.write_str("writer\n") } @@ -154,8 +185,8 @@ fn deduplicate_paths(copy: &mut Cow, original: &PathBuf) { let mut new_name = name.to_owned(); new_name.push("_copy"); copy.with_file_name(new_name) - }, - None => copy.with_file_name("copy") + } + None => copy.with_file_name("copy"), }; *copy = Cow::Owned(new_path); } @@ -166,7 +197,7 @@ fn do_operation<'k, T>( operation: &FileOperation<'k>, abort: bool, flush_on_finish_file: bool, - files_added: &mut usize + files_added: &mut usize, ) -> Result<(), Box> where T: Read + Write + Seek, @@ -175,9 +206,7 @@ where let mut path = Cow::Borrowed(&operation.path); match &operation.basic { BasicFileOperation::WriteNormalFile { - contents, - options, - .. + contents, options, .. } => { let uncompressed_size = contents.iter().map(|chunk| chunk.len()).sum::(); let mut options = (*options).to_owned(); @@ -225,12 +254,12 @@ where &operation, *abort, false, - &mut inner_files_added + &mut inner_files_added, ); }); writer.merge_archive(other_writer.finish_into_readable()?)?; *files_added += inner_files_added; - }, + } BasicFileOperation::SetArchiveComment(comment) => { writer.set_raw_comment(comment.clone()); } @@ -250,14 +279,15 @@ where zip::ZipWriter::new_append(old_writer.finish().unwrap()).unwrap() }); assert!(writer.get_raw_comment().starts_with(&old_comment)); - }, + } ReopenOption::ViaFinishIntoReadable => { let old_comment = writer.get_raw_comment().to_owned(); replace_with_or_abort(writer, |old_writer: zip::ZipWriter| { - zip::ZipWriter::new_append(old_writer.finish_into_readable().unwrap().into_inner()).unwrap() + zip::ZipWriter::new_append(old_writer.finish_into_readable().unwrap().into_inner()) + .unwrap() }); assert!(writer.get_raw_comment().starts_with(&old_comment)); - }, + } } Ok(()) } @@ -279,7 +309,7 @@ fuzz_target!(|test_case: FuzzTestCase| { &operation, *abort, test_case.flush_on_finish_file, - &mut files_added + &mut files_added, ); } if final_reopen { diff --git a/src/lib.rs b/src/lib.rs index a78bb184..a5f6b2bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,7 @@ #![warn(missing_docs)] #![allow(unexpected_cfgs)] // Needed for cfg(fuzzing) on nightly as of 2024-05-06 pub use crate::compression::{CompressionMethod, SUPPORTED_COMPRESSION_METHODS}; +pub use crate::read::HasZipMetadata; pub use crate::read::ZipArchive; pub use crate::types::{AesMode, DateTime}; pub use crate::write::ZipWriter; diff --git a/src/read.rs b/src/read.rs index 963b5265..422e3f4e 100644 --- a/src/read.rs +++ b/src/read.rs @@ -101,6 +101,7 @@ pub(crate) mod zip_archive { /// ```no_run /// use std::io::prelude::*; /// fn list_zip_contents(reader: impl Read + Seek) -> zip::result::ZipResult<()> { + /// use zip::HasZipMetadata; /// let mut zip = zip::ZipArchive::new(reader)?; /// /// for i in 0..zip.len() { @@ -337,6 +338,70 @@ pub struct ZipFile<'a> { pub(crate) reader: ZipFileReader<'a>, } +/// A struct for reading and seeking a zip file +pub struct ZipFileSeek<'a, R> { + data: Cow<'a, ZipFileData>, + reader: ZipFileSeekReader<'a, R>, +} + +enum ZipFileSeekReader<'a, R> { + Raw(SeekableTake<'a, R>), +} + +struct SeekableTake<'a, R> { + inner: &'a mut R, + inner_starting_offset: u64, + length: u64, + current_offset: u64, +} + +impl<'a, R: Seek> SeekableTake<'a, R> { + pub fn new(inner: &'a mut R, length: u64) -> io::Result { + let inner_starting_offset = inner.stream_position()?; + Ok(Self { + inner, + inner_starting_offset, + length, + current_offset: 0, + }) + } +} + +impl<'a, R: Seek> Seek for SeekableTake<'a, R> { + fn seek(&mut self, pos: SeekFrom) -> io::Result { + let offset = match pos { + SeekFrom::Start(offset) => Some(offset), + SeekFrom::End(offset) => self.length.checked_add_signed(offset), + SeekFrom::Current(offset) => self.current_offset.checked_add_signed(offset), + }; + match offset { + None => Err(io::Error::new( + io::ErrorKind::InvalidInput, + "invalid seek to a negative or overflowing position", + )), + Some(offset) => { + let clamped_offset = std::cmp::min(self.length, offset); + let new_inner_offset = self + .inner + .seek(SeekFrom::Start(self.inner_starting_offset + clamped_offset))?; + self.current_offset = new_inner_offset - self.inner_starting_offset; + Ok(new_inner_offset) + } + } + } +} + +impl<'a, R: Read> Read for SeekableTake<'a, R> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let written = self + .inner + .take(self.length - self.current_offset) + .read(buf)?; + self.current_offset += written as u64; + Ok(written) + } +} + pub(crate) fn find_content<'a>( data: &ZipFileData, reader: &'a mut (impl Read + Seek), @@ -351,6 +416,18 @@ pub(crate) fn find_content<'a>( Ok((reader as &mut dyn Read).take(data.compressed_size)) } +fn find_content_seek<'a, R: Read + Seek>( + data: &ZipFileData, + reader: &'a mut R, +) -> ZipResult> { + // Parse local header + let data_start = find_data_start(data, reader)?; + reader.seek(io::SeekFrom::Start(data_start))?; + + // Explicit Ok and ? are needed to convert io::Error to ZipError + Ok(SeekableTake::new(reader, data.compressed_size)?) +} + fn find_data_start( data: &ZipFileData, reader: &mut (impl Read + Seek + Sized), @@ -1161,6 +1238,36 @@ impl ZipArchive { .map(|(name, _)| name.as_ref()) } + /// Search for a file entry by name and return a seekable object. + pub fn by_name_seek(&mut self, name: &str) -> ZipResult> { + self.by_index_seek(self.index_for_name(name).ok_or(ZipError::FileNotFound)?) + } + + /// Search for a file entry by index and return a seekable object. + pub fn by_index_seek(&mut self, index: usize) -> ZipResult> { + let reader = &mut self.reader; + self.shared + .files + .get_index(index) + .ok_or(ZipError::FileNotFound) + .and_then(move |(_, data)| { + let seek_reader = match data.compression_method { + CompressionMethod::Stored => { + ZipFileSeekReader::Raw(find_content_seek(data, reader)?) + } + _ => { + return Err(ZipError::UnsupportedArchive( + "Seekable compressed files are not yet supported", + )) + } + }; + Ok(ZipFileSeek { + reader: seek_reader, + data: Cow::Borrowed(data), + }) + }) + } + fn by_name_with_optional_password<'a>( &'a mut self, name: &str, @@ -1532,6 +1639,12 @@ pub(crate) fn parse_single_extra_field( Ok(false) } +/// A trait for exposing file metadata inside the zip. +pub trait HasZipMetadata { + /// Get the file metadata + fn get_metadata(&self) -> &ZipFileData; +} + /// Methods for retrieving information on zip files impl<'a> ZipFile<'a> { fn get_reader(&mut self) -> ZipResult<&mut ZipFileReader<'a>> { @@ -1554,8 +1667,8 @@ impl<'a> ZipFile<'a> { /// Get the version of the file pub fn version_made_by(&self) -> (u8, u8) { ( - self.data.version_made_by / 10, - self.data.version_made_by % 10, + self.get_metadata().version_made_by / 10, + self.get_metadata().version_made_by % 10, ) } @@ -1572,14 +1685,14 @@ impl<'a> ZipFile<'a> { /// You can use the [`ZipFile::enclosed_name`] method to validate the name /// as a safe path. pub fn name(&self) -> &str { - &self.data.file_name + &self.get_metadata().file_name } /// Get the name of the file, in the raw (internal) byte representation. /// /// The encoding of this data is currently undefined. pub fn name_raw(&self) -> &[u8] { - &self.data.file_name_raw + &self.get_metadata().file_name_raw } /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte, @@ -1606,7 +1719,7 @@ impl<'a> ZipFile<'a> { /// /// [`ParentDir`]: `Component::ParentDir` pub fn mangled_name(&self) -> PathBuf { - self.data.file_name_sanitized() + self.get_metadata().file_name_sanitized() } /// Ensure the file path is safe to use as a [`Path`]. @@ -1620,27 +1733,27 @@ impl<'a> ZipFile<'a> { /// to path-based exploits. It is recommended over /// [`ZipFile::mangled_name`]. pub fn enclosed_name(&self) -> Option { - self.data.enclosed_name() + self.get_metadata().enclosed_name() } /// Get the comment of the file pub fn comment(&self) -> &str { - &self.data.file_comment + &self.get_metadata().file_comment } /// Get the compression method used to store the file pub fn compression(&self) -> CompressionMethod { - self.data.compression_method + self.get_metadata().compression_method } /// Get the size of the file, in bytes, in the archive pub fn compressed_size(&self) -> u64 { - self.data.compressed_size + self.get_metadata().compressed_size } /// Get the size of the file, in bytes, when uncompressed pub fn size(&self) -> u64 { - self.data.uncompressed_size + self.get_metadata().uncompressed_size } /// Get the time the file was last modified @@ -1665,17 +1778,20 @@ impl<'a> ZipFile<'a> { /// Get unix mode for the file pub fn unix_mode(&self) -> Option { - self.data.unix_mode() + self.get_metadata().unix_mode() } /// Get the CRC32 hash of the original file pub fn crc32(&self) -> u32 { - self.data.crc32 + self.get_metadata().crc32 } /// Get the extra data of the zip header for this file pub fn extra_data(&self) -> Option<&[u8]> { - self.data.extra_field.as_ref().map(|v| v.deref().deref()) + self.get_metadata() + .extra_field + .as_ref() + .map(|v| v.deref().deref()) } /// Get the starting offset of the data of the compressed file @@ -1685,19 +1801,28 @@ impl<'a> ZipFile<'a> { /// Get the starting offset of the zip header for this file pub fn header_start(&self) -> u64 { - self.data.header_start + self.get_metadata().header_start } /// Get the starting offset of the zip header in the central directory for this file pub fn central_header_start(&self) -> u64 { - self.data.central_header_start + self.get_metadata().central_header_start } +} +/// Methods for retrieving information on zip files +impl<'a> ZipFile<'a> { /// iterate through all extra fields pub fn extra_data_fields(&self) -> impl Iterator { self.data.extra_fields.iter() } } +impl<'a> HasZipMetadata for ZipFile<'a> { + fn get_metadata(&self) -> &ZipFileData { + self.data.as_ref() + } +} + impl<'a> Read for ZipFile<'a> { fn read(&mut self, buf: &mut [u8]) -> io::Result { self.get_reader()?.read(buf) @@ -1716,6 +1841,28 @@ impl<'a> Read for ZipFile<'a> { } } +impl<'a, R: Read> Read for ZipFileSeek<'a, R> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match &mut self.reader { + ZipFileSeekReader::Raw(r) => r.read(buf), + } + } +} + +impl<'a, R: Seek> Seek for ZipFileSeek<'a, R> { + fn seek(&mut self, pos: SeekFrom) -> io::Result { + match &mut self.reader { + ZipFileSeekReader::Raw(r) => r.seek(pos), + } + } +} + +impl<'a, R> HasZipMetadata for ZipFileSeek<'a, R> { + fn get_metadata(&self) -> &ZipFileData { + self.data.as_ref() + } +} + impl<'a> Drop for ZipFile<'a> { fn drop(&mut self) { // self.data is Owned, this reader is constructed by a streaming reader.