From 77e718864d6a7dcda3cc24d4e0d408d4d3d032d0 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Thu, 13 Jun 2024 13:49:27 -0700 Subject: [PATCH] fix: Incorrect behavior following a rare combination of `merge_archive`, `abort_file` and `deep_copy_file`. As well, we now return an error when a file is being copied to itself. --- fuzz/fuzz_targets/fuzz_write.rs | 30 +++++++++++--- src/read.rs | 69 ++++++++++++++++++--------------- src/read/stream.rs | 5 --- src/spec.rs | 52 ------------------------- src/types.rs | 5 +++ src/unstable.rs | 52 +++++++++++++++++++++++++ src/write.rs | 28 +++++++++---- 7 files changed, 140 insertions(+), 101 deletions(-) diff --git a/fuzz/fuzz_targets/fuzz_write.rs b/fuzz/fuzz_targets/fuzz_write.rs index 66ea9714..820098c8 100755 --- a/fuzz/fuzz_targets/fuzz_write.rs +++ b/fuzz/fuzz_targets/fuzz_write.rs @@ -2,10 +2,12 @@ use arbitrary::Arbitrary; use core::fmt::{Debug, Formatter}; +use std::borrow::Cow; use libfuzzer_sys::fuzz_target; use replace_with::replace_with_or_abort; use std::io::{Cursor, Read, Seek, Write}; use std::path::PathBuf; +use zip::unstable::path_to_string; #[derive(Arbitrary, Clone)] pub enum BasicFileOperation<'k> { @@ -125,6 +127,20 @@ impl <'k> Debug for FuzzTestCase<'k> { } } +fn deduplicate_paths(copy: &mut Cow, original: &PathBuf) { + if path_to_string(&**copy) == path_to_string(original) { + let new_path = match original.file_name() { + Some(name) => { + let mut new_name = name.to_owned(); + new_name.push("_copy"); + copy.with_file_name(new_name) + }, + None => copy.with_file_name("copy") + }; + *copy = Cow::Owned(new_path); + } +} + fn do_operation<'k, T>( writer: &mut zip::ZipWriter, operation: &FileOperation<'k>, @@ -136,7 +152,7 @@ where T: Read + Write + Seek, { writer.set_flush_on_finish_file(flush_on_finish_file); - let path = &operation.path; + let mut path = Cow::Borrowed(&operation.path); match &operation.basic { BasicFileOperation::WriteNormalFile { contents, @@ -148,28 +164,30 @@ where if uncompressed_size >= u32::MAX as usize { options = options.large_file(true); } - writer.start_file_from_path(path, options)?; + writer.start_file_from_path(&*path, options)?; for chunk in contents.iter() { writer.write_all(&chunk)?; } *files_added += 1; } BasicFileOperation::WriteDirectory(options) => { - writer.add_directory_from_path(path, options.to_owned())?; + writer.add_directory_from_path(&*path, options.to_owned())?; *files_added += 1; } BasicFileOperation::WriteSymlinkWithTarget { target, options } => { - writer.add_symlink_from_path(&path, target, options.to_owned())?; + writer.add_symlink_from_path(&*path, target, options.to_owned())?; *files_added += 1; } BasicFileOperation::ShallowCopy(base) => { + deduplicate_paths(&mut path, &base.path); do_operation(writer, &base, false, flush_on_finish_file, files_added)?; - writer.shallow_copy_file_from_path(&base.path, &path)?; + writer.shallow_copy_file_from_path(&base.path, &*path)?; *files_added += 1; } BasicFileOperation::DeepCopy(base) => { + deduplicate_paths(&mut path, &base.path); do_operation(writer, &base, false, flush_on_finish_file, files_added)?; - writer.deep_copy_file_from_path(&base.path, &path)?; + writer.deep_copy_file_from_path(&base.path, &*path)?; *files_added += 1; } BasicFileOperation::MergeWithOtherFile { operations } => { diff --git a/src/read.rs b/src/read.rs index dd4a9f87..5766e3f3 100644 --- a/src/read.rs +++ b/src/read.rs @@ -18,7 +18,7 @@ use indexmap::IndexMap; use std::borrow::Cow; use std::ffi::OsString; use std::fs::create_dir_all; -use std::io::{self, copy, prelude::*, sink}; +use std::io::{self, copy, prelude::*, sink, SeekFrom}; use std::mem; use std::ops::Deref; use std::path::{Path, PathBuf}; @@ -95,9 +95,9 @@ use crate::extra_fields::UnicodeExtraField; #[cfg(feature = "lzma")] use crate::read::lzma::LzmaDecoder; use crate::result::ZipError::{InvalidPassword, UnsupportedArchive}; -use crate::spec::{is_dir, path_to_string}; +use crate::spec::is_dir; use crate::types::ffi::S_IFLNK; -use crate::unstable::LittleEndianReadExt; +use crate::unstable::{path_to_string, LittleEndianReadExt}; pub use zip_archive::ZipArchive; #[allow(clippy::large_enum_variant)] @@ -227,38 +227,42 @@ pub(crate) fn find_content<'a>( // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result! let data_start = match data.data_start.get() { Some(data_start) => *data_start, - None => { - // Go to start of data. - reader.seek(io::SeekFrom::Start(data.header_start))?; - - // Parse static-sized fields and check the magic value. - let block = ZipLocalEntryBlock::parse(reader)?; - - // Calculate the end of the local header from the fields we just parsed. - let variable_fields_len = - // Each of these fields must be converted to u64 before adding, as the result may - // easily overflow a u16. - block.file_name_length as u64 + block.extra_field_length as u64; - let data_start = data.header_start - + mem::size_of::() as u64 - + variable_fields_len; - // Set the value so we don't have to read it again. - match data.data_start.set(data_start) { - Ok(()) => (), - // If the value was already set in the meantime, ensure it matches (this is probably - // unnecessary). - Err(_) => { - assert_eq!(*data.data_start.get().unwrap(), data_start); - } - } - data_start - } + None => find_data_start(data, reader)?, }; reader.seek(io::SeekFrom::Start(data_start))?; Ok((reader as &mut dyn Read).take(data.compressed_size)) } +fn find_data_start( + data: &ZipFileData, + reader: &mut (impl Read + Seek + Sized), +) -> Result { + // Go to start of data. + reader.seek(io::SeekFrom::Start(data.header_start))?; + + // Parse static-sized fields and check the magic value. + let block = ZipLocalEntryBlock::parse(reader)?; + + // Calculate the end of the local header from the fields we just parsed. + let variable_fields_len = + // Each of these fields must be converted to u64 before adding, as the result may + // easily overflow a u16. + block.file_name_length as u64 + block.extra_field_length as u64; + let data_start = + data.header_start + mem::size_of::() as u64 + variable_fields_len; + // Set the value so we don't have to read it again. + match data.data_start.set(data_start) { + Ok(()) => (), + // If the value was already set in the meantime, ensure it matches (this is probably + // unnecessary). + Err(_) => { + assert_eq!(*data.data_start.get().unwrap(), data_start); + } + } + Ok(data_start) +} + #[allow(clippy::too_many_arguments)] pub(crate) fn make_crypto_reader<'a>( compression_method: CompressionMethod, @@ -698,6 +702,9 @@ impl ZipArchive { reader.seek(io::SeekFrom::Start(dir_info.directory_start))?; for _ in 0..dir_info.number_of_files { let file = central_header_to_zip_file(reader, dir_info.archive_offset)?; + let central_end = reader.stream_position()?; + find_data_start(&file, reader)?; + reader.seek(SeekFrom::Start(central_end))?; files.insert(file.file_name.clone(), file); } if dir_info.disk_number != dir_info.disk_with_central_directory { @@ -1461,7 +1468,7 @@ impl<'a> ZipFile<'a> { /// Get the starting offset of the data of the compressed file pub fn data_start(&self) -> u64 { - *self.data.data_start.get().unwrap_or(&0) + *self.data.data_start.get().unwrap() } /// Get the starting offset of the zip header for this file @@ -1538,7 +1545,7 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult (), spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None), - _ => return Err(ZipError::InvalidArchive("Invalid local file header")), + _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR), } let block = ZipLocalEntryBlock::interpret(&block)?; diff --git a/src/read/stream.rs b/src/read/stream.rs index 8f2ffa0c..7bc91c9c 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -195,11 +195,6 @@ impl ZipStreamFileMetadata { &self.0.file_comment } - /// Get the starting offset of the data of the compressed file - pub fn data_start(&self) -> u64 { - *self.0.data_start.get().unwrap_or(&0) - } - /// Get unix mode for the file pub const fn unix_mode(&self) -> Option { self.0.unix_mode() diff --git a/src/spec.rs b/src/spec.rs index 9b02aa61..31354883 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -2,11 +2,9 @@ use crate::result::{ZipError, ZipResult}; use memchr::memmem::FinderRev; -use std::borrow::Cow; use std::io; use std::io::prelude::*; use std::mem; -use std::path::{Component, Path, MAIN_SEPARATOR}; /// "Magic" header values used in the zip spec to locate metadata records. /// @@ -649,56 +647,6 @@ pub(crate) fn is_dir(filename: &str) -> bool { .map_or(false, |c| c == '/' || c == '\\') } -/// Converts a path to the ZIP format (forward-slash-delimited and normalized). -pub(crate) fn path_to_string>(path: T) -> Box { - let mut maybe_original = None; - if let Some(original) = path.as_ref().to_str() { - if original.is_empty() { - return String::new().into_boxed_str(); - } - if (MAIN_SEPARATOR == '/' || !original[1..].contains(MAIN_SEPARATOR)) - && !original.ends_with('.') - && !original.starts_with(['.', MAIN_SEPARATOR]) - && !original.starts_with(['.', '.', MAIN_SEPARATOR]) - && !original.contains([MAIN_SEPARATOR, MAIN_SEPARATOR]) - && !original.contains([MAIN_SEPARATOR, '.', MAIN_SEPARATOR]) - && !original.contains([MAIN_SEPARATOR, '.', '.', MAIN_SEPARATOR]) - { - if original.starts_with(MAIN_SEPARATOR) { - maybe_original = Some(&original[1..]); - } else { - maybe_original = Some(original); - } - } - } - let mut recreate = maybe_original.is_none(); - let mut normalized_components = Vec::new(); - - for component in path.as_ref().components() { - match component { - Component::Normal(os_str) => match os_str.to_str() { - Some(valid_str) => normalized_components.push(Cow::Borrowed(valid_str)), - None => { - recreate = true; - normalized_components.push(os_str.to_string_lossy()); - } - }, - Component::ParentDir => { - recreate = true; - normalized_components.pop(); - } - _ => { - recreate = true; - } - } - } - if recreate { - normalized_components.join("/").into() - } else { - maybe_original.unwrap().into() - } -} - #[cfg(test)] mod test { use super::*; diff --git a/src/types.rs b/src/types.rs index acf07eb7..d81b4249 100644 --- a/src/types.rs +++ b/src/types.rs @@ -478,6 +478,11 @@ pub struct ZipFileData { } impl ZipFileData { + /// Get the starting offset of the data of the compressed file + pub fn data_start(&self) -> u64 { + *self.data_start.get().unwrap() + } + #[allow(dead_code)] pub fn is_dir(&self) -> bool { is_dir(&self.file_name) diff --git a/src/unstable.rs b/src/unstable.rs index 102c4336..7584f7f6 100644 --- a/src/unstable.rs +++ b/src/unstable.rs @@ -1,7 +1,9 @@ #![allow(missing_docs)] +use std::borrow::Cow; use std::io; use std::io::{Read, Write}; +use std::path::{Component, Path, MAIN_SEPARATOR}; /// Provides high level API for reading from a stream. pub mod stream { @@ -67,3 +69,53 @@ pub trait LittleEndianReadExt: Read { } impl LittleEndianReadExt for R {} + +/// Converts a path to the ZIP format (forward-slash-delimited and normalized). +pub fn path_to_string>(path: T) -> Box { + let mut maybe_original = None; + if let Some(original) = path.as_ref().to_str() { + if original.is_empty() { + return String::new().into_boxed_str(); + } + if (MAIN_SEPARATOR == '/' || !original[1..].contains(MAIN_SEPARATOR)) + && !original.ends_with('.') + && !original.starts_with(['.', MAIN_SEPARATOR]) + && !original.starts_with(['.', '.', MAIN_SEPARATOR]) + && !original.contains([MAIN_SEPARATOR, MAIN_SEPARATOR]) + && !original.contains([MAIN_SEPARATOR, '.', MAIN_SEPARATOR]) + && !original.contains([MAIN_SEPARATOR, '.', '.', MAIN_SEPARATOR]) + { + if original.starts_with(MAIN_SEPARATOR) { + maybe_original = Some(&original[1..]); + } else { + maybe_original = Some(original); + } + } + } + let mut recreate = maybe_original.is_none(); + let mut normalized_components = Vec::new(); + + for component in path.as_ref().components() { + match component { + Component::Normal(os_str) => match os_str.to_str() { + Some(valid_str) => normalized_components.push(Cow::Borrowed(valid_str)), + None => { + recreate = true; + normalized_components.push(os_str.to_string_lossy()); + } + }, + Component::ParentDir => { + recreate = true; + normalized_components.pop(); + } + _ => { + recreate = true; + } + } + } + if recreate { + normalized_components.join("/").into() + } else { + maybe_original.unwrap().into() + } +} diff --git a/src/write.rs b/src/write.rs index 3ae552a4..28d1a580 100644 --- a/src/write.rs +++ b/src/write.rs @@ -5,7 +5,7 @@ use crate::aes::AesWriter; use crate::compression::CompressionMethod; use crate::read::{find_content, Config, ZipArchive, ZipFile, ZipFileReader}; use crate::result::{ZipError, ZipResult}; -use crate::spec::{self, FixedSizeBlock}; +use crate::spec::{self, FixedSizeBlock, Magic}; #[cfg(feature = "aes-crypto")] use crate::types::AesMode; use crate::types::{ @@ -38,6 +38,7 @@ use zopfli::Options; #[cfg(feature = "deflate-zopfli")] use std::io::BufWriter; +use std::mem::size_of; use std::path::Path; #[cfg(feature = "zstd")] @@ -172,7 +173,7 @@ pub use self::sealed::FileOptionExtension; use crate::result::ZipError::InvalidArchive; #[cfg(feature = "lzma")] use crate::result::ZipError::UnsupportedArchive; -use crate::spec::path_to_string; +use crate::unstable::path_to_string; use crate::unstable::LittleEndianWriteExt; use crate::write::GenericZipWriter::{Closed, Storer}; use crate::zipcrypto::ZipCryptoKeys; @@ -598,12 +599,15 @@ impl ZipWriter { /// widely-compatible archive compared to [Self::shallow_copy_file]. Does not copy alignment. pub fn deep_copy_file(&mut self, src_name: &str, dest_name: &str) -> ZipResult<()> { self.finish_file()?; + if src_name == dest_name { + return Err(InvalidArchive("Trying to copy a file to itself")); + } let write_position = self.inner.get_plain().stream_position()?; let src_index = self.index_by_name(src_name)?; let src_data = &mut self.files[src_index]; - let data_start = *src_data.data_start.get().unwrap_or(&0); + let data_start = src_data.data_start(); let mut compressed_size = src_data.compressed_size; - if compressed_size > write_position - data_start { + if compressed_size > (write_position - data_start) { compressed_size = write_position - data_start; src_data.compressed_size = compressed_size; } @@ -1402,8 +1406,15 @@ impl ZipWriter { let footer_end = writer.stream_position()?; let file_end = writer.seek(SeekFrom::End(0))?; if footer_end < file_end { - // Data from an aborted file is past the end of the footer, so rewrite the footer at - // the actual end. + // Data from an aborted file is past the end of the footer. + + // Overwrite the magic so the footer is no longer valid. + writer.seek(SeekFrom::Start(central_start))?; + writer.write_u32_le(0)?; + writer.seek(SeekFrom::Start(footer_end - size_of::() as u64))?; + writer.write_u32_le(0)?; + + // Rewrite the footer at the actual end. let central_and_footer_size = footer_end - central_start; writer.seek(SeekFrom::End(-(central_and_footer_size as i64)))?; central_start = self.write_central_and_footer()?; @@ -1476,6 +1487,9 @@ impl ZipWriter { /// some other software (e.g. Minecraft) will refuse to extract a file copied this way. pub fn shallow_copy_file(&mut self, src_name: &str, dest_name: &str) -> ZipResult<()> { self.finish_file()?; + if src_name == dest_name { + return Err(InvalidArchive("Trying to copy a file to itself")); + } let src_index = self.index_by_name(src_name)?; let mut dest_data = self.files[src_index].to_owned(); dest_data.file_name = dest_name.to_string().into(); @@ -1926,7 +1940,7 @@ const EXTRA_FIELD_MAPPING: [u16; 49] = [ #[cfg(test)] mod test { - use super::{FileOptions, ZipWriter}; + use super::{ExtendedFileOptions, FileOptions, ZipWriter}; use crate::compression::CompressionMethod; use crate::result::ZipResult; use crate::types::DateTime;