diff --git a/Cargo.toml b/Cargo.toml index caf6a07f..5468919a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ keywords = ["zip", "archive"] description = """ Library to support the reading and writing of zip files. """ -edition = "2018" +edition = "2021" [dependencies] aes = { version = "0.7.5", optional = true } diff --git a/src/lib.rs b/src/lib.rs index 0fee99cc..7f3e7a01 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -42,3 +42,14 @@ mod spec; mod types; pub mod write; mod zipcrypto; + +/// Unstable APIs +/// +/// All APIs accessible by importing this module are unstable; They may be changed in patch releases. +/// You MUST you an exact version specifier in `Cargo.toml`, to indicate the version of this API you're using: +/// +/// ```toml +/// [dependencies] +/// zip = "=0.6.4" +/// ``` +pub mod unstable; diff --git a/src/read.rs b/src/read.rs index dad20c26..b702b4f2 100644 --- a/src/read.rs +++ b/src/read.rs @@ -13,7 +13,7 @@ use byteorder::{LittleEndian, ReadBytesExt}; use std::borrow::Cow; use std::collections::HashMap; use std::io::{self, prelude::*}; -use std::path::{Component, Path}; +use std::path::Path; use std::sync::Arc; #[cfg(any( @@ -29,10 +29,8 @@ use bzip2::read::BzDecoder; #[cfg(feature = "zstd")] use zstd::stream::read::Decoder as ZstdDecoder; -mod ffi { - pub const S_IFDIR: u32 = 0o0040000; - pub const S_IFREG: u32 = 0o0100000; -} +/// Provides high level API for reading from a stream. +pub(crate) mod stream; // Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely pub(crate) mod zip_archive { @@ -650,12 +648,22 @@ pub(crate) fn central_header_to_zip_file( archive_offset: u64, ) -> ZipResult { let central_header_start = reader.stream_position()?; + // Parse central header let signature = reader.read_u32::()?; if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { - return Err(ZipError::InvalidArchive("Invalid Central Directory header")); + Err(ZipError::InvalidArchive("Invalid Central Directory header")) + } else { + central_header_to_zip_file_inner(reader, archive_offset, central_header_start) } +} +/// Parse a central directory entry to collect the information for the file. +fn central_header_to_zip_file_inner( + reader: &mut R, + archive_offset: u64, + central_header_start: u64, +) -> ZipResult { let version_made_by = reader.read_u16::()?; let _version_to_extract = reader.read_u16::()?; let flags = reader.read_u16::()?; @@ -896,20 +904,7 @@ impl<'a> ZipFile<'a> { /// to path-based exploits. It is recommended over /// [`ZipFile::mangled_name`]. pub fn enclosed_name(&self) -> Option<&Path> { - if self.data.file_name.contains('\0') { - return None; - } - let path = Path::new(&self.data.file_name); - let mut depth = 0usize; - for component in path.components() { - match component { - Component::Prefix(_) | Component::RootDir => return None, - Component::ParentDir => depth = depth.checked_sub(1)?, - Component::Normal(_) => depth += 1, - Component::CurDir => (), - } - } - Some(path) + self.data.enclosed_name() } /// Get the comment of the file @@ -952,27 +947,7 @@ impl<'a> ZipFile<'a> { /// Get unix mode for the file pub fn unix_mode(&self) -> Option { - if self.data.external_attributes == 0 { - return None; - } - - match self.data.system { - System::Unix => Some(self.data.external_attributes >> 16), - System::Dos => { - // Interpret MS-DOS directory bit - let mut mode = if 0x10 == (self.data.external_attributes & 0x10) { - ffi::S_IFDIR | 0o0775 - } else { - ffi::S_IFREG | 0o0664 - }; - if 0x01 == (self.data.external_attributes & 0x01) { - // Read-only bit; strip write permissions - mode &= 0o0555; - } - Some(mode) - } - _ => None, - } + self.data.unix_mode() } /// Get the CRC32 hash of the original file @@ -1029,10 +1004,9 @@ impl<'a> Drop for ZipFile<'a> { match reader.read(&mut buffer) { Ok(0) => break, Ok(_) => (), - Err(e) => panic!( - "Could not consume all of the output of the current ZipFile: {:?}", - e - ), + Err(e) => { + panic!("Could not consume all of the output of the current ZipFile: {e:?}") + } } } } diff --git a/src/read/stream.rs b/src/read/stream.rs new file mode 100644 index 00000000..5a01b23f --- /dev/null +++ b/src/read/stream.rs @@ -0,0 +1,372 @@ +use std::fs; +use std::io::{self, Read}; +use std::path::Path; + +use super::{ + central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile, + ZipFileData, ZipResult, +}; + +use byteorder::{LittleEndian, ReadBytesExt}; + +/// Stream decoder for zip. +#[derive(Debug)] +pub struct ZipStreamReader(R); + +impl ZipStreamReader { + /// Create a new ZipStreamReader + pub fn new(reader: R) -> Self { + Self(reader) + } +} + +impl ZipStreamReader { + fn parse_central_directory(&mut self) -> ZipResult> { + // Give archive_offset and central_header_start dummy value 0, since + // they are not used in the output. + let archive_offset = 0; + let central_header_start = 0; + + // Parse central header + let signature = self.0.read_u32::()?; + if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { + Ok(None) + } else { + central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start) + .map(ZipStreamFileMetadata) + .map(Some) + } + } + + /// Iteraate over the stream and extract all file and their + /// metadata. + pub fn visit(mut self, visitor: &mut V) -> ZipResult<()> { + while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? { + visitor.visit_file(&mut file)?; + } + + while let Some(metadata) = self.parse_central_directory()? { + visitor.visit_additional_metadata(&metadata)?; + } + + Ok(()) + } + + /// Extract a Zip archive into a directory, overwriting files if they + /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. + /// + /// Extraction is not atomic; If an error is encountered, some of the files + /// may be left on disk. + pub fn extract>(self, directory: P) -> ZipResult<()> { + struct Extractor<'a>(&'a Path); + impl ZipStreamVisitor for Extractor<'_> { + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + let filepath = file + .enclosed_name() + .ok_or(ZipError::InvalidArchive("Invalid file path"))?; + + let outpath = self.0.join(filepath); + + if file.name().ends_with('/') { + fs::create_dir_all(&outpath)?; + } else { + if let Some(p) = outpath.parent() { + fs::create_dir_all(p)?; + } + let mut outfile = fs::File::create(&outpath)?; + io::copy(file, &mut outfile)?; + } + + Ok(()) + } + + #[allow(unused)] + fn visit_additional_metadata( + &mut self, + metadata: &ZipStreamFileMetadata, + ) -> ZipResult<()> { + #[cfg(unix)] + { + let filepath = metadata + .enclosed_name() + .ok_or(ZipError::InvalidArchive("Invalid file path"))?; + + let outpath = self.0.join(filepath); + + use std::os::unix::fs::PermissionsExt; + if let Some(mode) = metadata.unix_mode() { + fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?; + } + } + + Ok(()) + } + } + + self.visit(&mut Extractor(directory.as_ref())) + } +} + +/// Visitor for ZipStreamReader +pub trait ZipStreamVisitor { + /// * `file` - contains the content of the file and most of the metadata, + /// except: + /// - `comment`: set to an empty string + /// - `data_start`: set to 0 + /// - `external_attributes`: `unix_mode()`: will return None + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>; + + /// This function is guranteed to be called after all `visit_file`s. + /// + /// * `metadata` - Provides missing metadata in `visit_file`. + fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>; +} + +/// Additional metadata for the file. +#[derive(Debug)] +pub struct ZipStreamFileMetadata(ZipFileData); + +impl ZipStreamFileMetadata { + /// Get the name of the file + /// + /// # Warnings + /// + /// It is dangerous to use this name directly when extracting an archive. + /// It may contain an absolute path (`/etc/shadow`), or break out of the + /// current directory (`../runtime`). Carelessly writing to these paths + /// allows an attacker to craft a ZIP archive that will overwrite critical + /// files. + /// + /// You can use the [`ZipFile::enclosed_name`] method to validate the name + /// as a safe path. + pub fn name(&self) -> &str { + &self.0.file_name + } + + /// Get the name of the file, in the raw (internal) byte representation. + /// + /// The encoding of this data is currently undefined. + pub fn name_raw(&self) -> &[u8] { + &self.0.file_name_raw + } + + /// Rewrite the path, ignoring any path components with special meaning. + /// + /// - Absolute paths are made relative + /// - [`ParentDir`]s are ignored + /// - Truncates the filename at a NULL byte + /// + /// This is appropriate if you need to be able to extract *something* from + /// any archive, but will easily misrepresent trivial paths like + /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, + /// [`ZipFile::enclosed_name`] is the better option in most scenarios. + /// + /// [`ParentDir`]: `Component::ParentDir` + pub fn mangled_name(&self) -> ::std::path::PathBuf { + self.0.file_name_sanitized() + } + + /// Ensure the file path is safe to use as a [`Path`]. + /// + /// - It can't contain NULL bytes + /// - It can't resolve to a path outside the current directory + /// > `foo/../bar` is fine, `foo/../../bar` is not. + /// - It can't be an absolute path + /// + /// This will read well-formed ZIP files correctly, and is resistant + /// to path-based exploits. It is recommended over + /// [`ZipFile::mangled_name`]. + pub fn enclosed_name(&self) -> Option<&Path> { + self.0.enclosed_name() + } + + /// Returns whether the file is actually a directory + pub fn is_dir(&self) -> bool { + self.name() + .chars() + .rev() + .next() + .map_or(false, |c| c == '/' || c == '\\') + } + + /// Returns whether the file is a regular file + pub fn is_file(&self) -> bool { + !self.is_dir() + } + + /// Get the comment of the file + pub fn comment(&self) -> &str { + &self.0.file_comment + } + + /// Get the starting offset of the data of the compressed file + pub fn data_start(&self) -> u64 { + self.0.data_start.load() + } + + /// Get unix mode for the file + pub fn unix_mode(&self) -> Option { + self.0.unix_mode() + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::collections::BTreeSet; + use std::io; + + struct DummyVisitor; + impl ZipStreamVisitor for DummyVisitor { + fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { + Ok(()) + } + + fn visit_additional_metadata( + &mut self, + _metadata: &ZipStreamFileMetadata, + ) -> ZipResult<()> { + Ok(()) + } + } + + #[derive(Default, Debug, Eq, PartialEq)] + struct CounterVisitor(u64, u64); + impl ZipStreamVisitor for CounterVisitor { + fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { + self.0 += 1; + Ok(()) + } + + fn visit_additional_metadata( + &mut self, + _metadata: &ZipStreamFileMetadata, + ) -> ZipResult<()> { + self.1 += 1; + Ok(()) + } + } + + #[test] + fn invalid_offset() { + ZipStreamReader::new(io::Cursor::new(include_bytes!( + "../../tests/data/invalid_offset.zip" + ))) + .visit(&mut DummyVisitor) + .unwrap_err(); + } + + #[test] + fn invalid_offset2() { + ZipStreamReader::new(io::Cursor::new(include_bytes!( + "../../tests/data/invalid_offset2.zip" + ))) + .visit(&mut DummyVisitor) + .unwrap_err(); + } + + #[test] + fn zip_read_streaming() { + let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!( + "../../tests/data/mimetype.zip" + ))); + + #[derive(Default)] + struct V { + filenames: BTreeSet>, + } + impl ZipStreamVisitor for V { + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + if file.is_file() { + self.filenames.insert(file.name().into()); + } + + Ok(()) + } + fn visit_additional_metadata( + &mut self, + metadata: &ZipStreamFileMetadata, + ) -> ZipResult<()> { + if metadata.is_file() { + assert!( + self.filenames.contains(metadata.name()), + "{} is missing its file content", + metadata.name() + ); + } + + Ok(()) + } + } + + reader.visit(&mut V::default()).unwrap(); + } + + #[test] + fn file_and_dir_predicates() { + let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!( + "../../tests/data/files_and_dirs.zip" + ))); + + #[derive(Default)] + struct V { + filenames: BTreeSet>, + } + impl ZipStreamVisitor for V { + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + let full_name = file.enclosed_name().unwrap(); + let file_name = full_name.file_name().unwrap().to_str().unwrap(); + assert!( + (file_name.starts_with("dir") && file.is_dir()) + || (file_name.starts_with("file") && file.is_file()) + ); + + if file.is_file() { + self.filenames.insert(file.name().into()); + } + + Ok(()) + } + fn visit_additional_metadata( + &mut self, + metadata: &ZipStreamFileMetadata, + ) -> ZipResult<()> { + if metadata.is_file() { + assert!( + self.filenames.contains(metadata.name()), + "{} is missing its file content", + metadata.name() + ); + } + + Ok(()) + } + } + + reader.visit(&mut V::default()).unwrap(); + } + + /// test case to ensure we don't preemptively over allocate based on the + /// declared number of files in the CDE of an invalid zip when the number of + /// files declared is more than the alleged offset in the CDE + #[test] + fn invalid_cde_number_of_files_allocation_smaller_offset() { + ZipStreamReader::new(io::Cursor::new(include_bytes!( + "../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip" + ))) + .visit(&mut DummyVisitor) + .unwrap_err(); + } + + /// test case to ensure we don't preemptively over allocate based on the + /// declared number of files in the CDE of an invalid zip when the number of + /// files declared is less than the alleged offset in the CDE + #[test] + fn invalid_cde_number_of_files_allocation_greater_offset() { + ZipStreamReader::new(io::Cursor::new(include_bytes!( + "../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip" + ))) + .visit(&mut DummyVisitor) + .unwrap_err(); + } +} diff --git a/src/types.rs b/src/types.rs index ad3a5700..c333d8fa 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,6 +1,6 @@ //! Types that specify what is contained in a ZIP. -#[cfg(feature = "time")] -use std::convert::{TryFrom, TryInto}; +use std::path; + #[cfg(not(any( all(target_arch = "arm", target_pointer_width = "32"), target_arch = "mips", @@ -12,6 +12,11 @@ use std::time::SystemTime; #[cfg(doc)] use {crate::read::ZipFile, crate::write::FileOptions}; +mod ffi { + pub const S_IFDIR: u32 = 0o0040000; + pub const S_IFREG: u32 = 0o0100000; +} + #[cfg(any( all(target_arch = "arm", target_pointer_width = "32"), target_arch = "mips", @@ -375,6 +380,48 @@ impl ZipFileData { }) } + pub(crate) fn enclosed_name(&self) -> Option<&path::Path> { + if self.file_name.contains('\0') { + return None; + } + let path = path::Path::new(&self.file_name); + let mut depth = 0usize; + for component in path.components() { + match component { + path::Component::Prefix(_) | path::Component::RootDir => return None, + path::Component::ParentDir => depth = depth.checked_sub(1)?, + path::Component::Normal(_) => depth += 1, + path::Component::CurDir => (), + } + } + Some(path) + } + + /// Get unix mode for the file + pub(crate) fn unix_mode(&self) -> Option { + if self.external_attributes == 0 { + return None; + } + + match self.system { + System::Unix => Some(self.external_attributes >> 16), + System::Dos => { + // Interpret MS-DOS directory bit + let mut mode = if 0x10 == (self.external_attributes & 0x10) { + ffi::S_IFDIR | 0o0775 + } else { + ffi::S_IFREG | 0o0664 + }; + if 0x01 == (self.external_attributes & 0x01) { + // Read-only bit; strip write permissions + mode &= 0o0555; + } + Some(mode) + } + _ => None, + } + } + pub fn zip64_extension(&self) -> bool { self.uncompressed_size > 0xFFFFFFFF || self.compressed_size > 0xFFFFFFFF diff --git a/src/unstable.rs b/src/unstable.rs new file mode 100644 index 00000000..2cbfa5bb --- /dev/null +++ b/src/unstable.rs @@ -0,0 +1,4 @@ +/// Provides high level API for reading from a stream. +pub mod stream { + pub use crate::read::stream::*; +} diff --git a/tests/issue_234.rs b/tests/issue_234.rs index bd01d1d0..f8c1d2c8 100644 --- a/tests/issue_234.rs +++ b/tests/issue_234.rs @@ -26,6 +26,6 @@ fn invalid_header() { let archive = zip::ZipArchive::new(reader); match archive { Err(ZipError::InvalidArchive(_)) => {} - value => panic!("Unexpected value: {:?}", value), + value => panic!("Unexpected value: {value:?}"), } }