Merge pull request from NobodyXu/streaming

Implement a high-level, easy-to-use streaming decoder that recover all information from a zip
This commit is contained in:
Plecra 2023-02-01 18:15:29 +00:00 committed by GitHub
commit e32db515a2
Signed by: DevComp
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 457 additions and 49 deletions

View file

@ -8,7 +8,7 @@ keywords = ["zip", "archive"]
description = """
Library to support the reading and writing of zip files.
"""
edition = "2018"
edition = "2021"
[dependencies]
aes = { version = "0.7.5", optional = true }

View file

@ -42,3 +42,14 @@ mod spec;
mod types;
pub mod write;
mod zipcrypto;
/// Unstable APIs
///
/// All APIs accessible by importing this module are unstable; They may be changed in patch releases.
/// You MUST you an exact version specifier in `Cargo.toml`, to indicate the version of this API you're using:
///
/// ```toml
/// [dependencies]
/// zip = "=0.6.4"
/// ```
pub mod unstable;

View file

@ -13,7 +13,7 @@ use byteorder::{LittleEndian, ReadBytesExt};
use std::borrow::Cow;
use std::collections::HashMap;
use std::io::{self, prelude::*};
use std::path::{Component, Path};
use std::path::Path;
use std::sync::Arc;
#[cfg(any(
@ -29,10 +29,8 @@ use bzip2::read::BzDecoder;
#[cfg(feature = "zstd")]
use zstd::stream::read::Decoder as ZstdDecoder;
mod ffi {
pub const S_IFDIR: u32 = 0o0040000;
pub const S_IFREG: u32 = 0o0100000;
}
/// Provides high level API for reading from a stream.
pub(crate) mod stream;
// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
pub(crate) mod zip_archive {
@ -650,12 +648,22 @@ pub(crate) fn central_header_to_zip_file<R: Read + io::Seek>(
archive_offset: u64,
) -> ZipResult<ZipFileData> {
let central_header_start = reader.stream_position()?;
// Parse central header
let signature = reader.read_u32::<LittleEndian>()?;
if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
return Err(ZipError::InvalidArchive("Invalid Central Directory header"));
Err(ZipError::InvalidArchive("Invalid Central Directory header"))
} else {
central_header_to_zip_file_inner(reader, archive_offset, central_header_start)
}
}
/// Parse a central directory entry to collect the information for the file.
fn central_header_to_zip_file_inner<R: Read>(
reader: &mut R,
archive_offset: u64,
central_header_start: u64,
) -> ZipResult<ZipFileData> {
let version_made_by = reader.read_u16::<LittleEndian>()?;
let _version_to_extract = reader.read_u16::<LittleEndian>()?;
let flags = reader.read_u16::<LittleEndian>()?;
@ -896,20 +904,7 @@ impl<'a> ZipFile<'a> {
/// to path-based exploits. It is recommended over
/// [`ZipFile::mangled_name`].
pub fn enclosed_name(&self) -> Option<&Path> {
if self.data.file_name.contains('\0') {
return None;
}
let path = Path::new(&self.data.file_name);
let mut depth = 0usize;
for component in path.components() {
match component {
Component::Prefix(_) | Component::RootDir => return None,
Component::ParentDir => depth = depth.checked_sub(1)?,
Component::Normal(_) => depth += 1,
Component::CurDir => (),
}
}
Some(path)
self.data.enclosed_name()
}
/// Get the comment of the file
@ -952,27 +947,7 @@ impl<'a> ZipFile<'a> {
/// Get unix mode for the file
pub fn unix_mode(&self) -> Option<u32> {
if self.data.external_attributes == 0 {
return None;
}
match self.data.system {
System::Unix => Some(self.data.external_attributes >> 16),
System::Dos => {
// Interpret MS-DOS directory bit
let mut mode = if 0x10 == (self.data.external_attributes & 0x10) {
ffi::S_IFDIR | 0o0775
} else {
ffi::S_IFREG | 0o0664
};
if 0x01 == (self.data.external_attributes & 0x01) {
// Read-only bit; strip write permissions
mode &= 0o0555;
}
Some(mode)
}
_ => None,
}
self.data.unix_mode()
}
/// Get the CRC32 hash of the original file
@ -1029,10 +1004,9 @@ impl<'a> Drop for ZipFile<'a> {
match reader.read(&mut buffer) {
Ok(0) => break,
Ok(_) => (),
Err(e) => panic!(
"Could not consume all of the output of the current ZipFile: {:?}",
e
),
Err(e) => {
panic!("Could not consume all of the output of the current ZipFile: {e:?}")
}
}
}
}

372
src/read/stream.rs Normal file
View file

@ -0,0 +1,372 @@
use std::fs;
use std::io::{self, Read};
use std::path::Path;
use super::{
central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile,
ZipFileData, ZipResult,
};
use byteorder::{LittleEndian, ReadBytesExt};
/// Stream decoder for zip.
#[derive(Debug)]
pub struct ZipStreamReader<R>(R);
impl<R> ZipStreamReader<R> {
/// Create a new ZipStreamReader
pub fn new(reader: R) -> Self {
Self(reader)
}
}
impl<R: Read> ZipStreamReader<R> {
fn parse_central_directory(&mut self) -> ZipResult<Option<ZipStreamFileMetadata>> {
// Give archive_offset and central_header_start dummy value 0, since
// they are not used in the output.
let archive_offset = 0;
let central_header_start = 0;
// Parse central header
let signature = self.0.read_u32::<LittleEndian>()?;
if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
Ok(None)
} else {
central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start)
.map(ZipStreamFileMetadata)
.map(Some)
}
}
/// Iteraate over the stream and extract all file and their
/// metadata.
pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
visitor.visit_file(&mut file)?;
}
while let Some(metadata) = self.parse_central_directory()? {
visitor.visit_additional_metadata(&metadata)?;
}
Ok(())
}
/// Extract a Zip archive into a directory, overwriting files if they
/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
///
/// Extraction is not atomic; If an error is encountered, some of the files
/// may be left on disk.
pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> {
struct Extractor<'a>(&'a Path);
impl ZipStreamVisitor for Extractor<'_> {
fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
let filepath = file
.enclosed_name()
.ok_or(ZipError::InvalidArchive("Invalid file path"))?;
let outpath = self.0.join(filepath);
if file.name().ends_with('/') {
fs::create_dir_all(&outpath)?;
} else {
if let Some(p) = outpath.parent() {
fs::create_dir_all(p)?;
}
let mut outfile = fs::File::create(&outpath)?;
io::copy(file, &mut outfile)?;
}
Ok(())
}
#[allow(unused)]
fn visit_additional_metadata(
&mut self,
metadata: &ZipStreamFileMetadata,
) -> ZipResult<()> {
#[cfg(unix)]
{
let filepath = metadata
.enclosed_name()
.ok_or(ZipError::InvalidArchive("Invalid file path"))?;
let outpath = self.0.join(filepath);
use std::os::unix::fs::PermissionsExt;
if let Some(mode) = metadata.unix_mode() {
fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?;
}
}
Ok(())
}
}
self.visit(&mut Extractor(directory.as_ref()))
}
}
/// Visitor for ZipStreamReader
pub trait ZipStreamVisitor {
/// * `file` - contains the content of the file and most of the metadata,
/// except:
/// - `comment`: set to an empty string
/// - `data_start`: set to 0
/// - `external_attributes`: `unix_mode()`: will return None
fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>;
/// This function is guranteed to be called after all `visit_file`s.
///
/// * `metadata` - Provides missing metadata in `visit_file`.
fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>;
}
/// Additional metadata for the file.
#[derive(Debug)]
pub struct ZipStreamFileMetadata(ZipFileData);
impl ZipStreamFileMetadata {
/// Get the name of the file
///
/// # Warnings
///
/// It is dangerous to use this name directly when extracting an archive.
/// It may contain an absolute path (`/etc/shadow`), or break out of the
/// current directory (`../runtime`). Carelessly writing to these paths
/// allows an attacker to craft a ZIP archive that will overwrite critical
/// files.
///
/// You can use the [`ZipFile::enclosed_name`] method to validate the name
/// as a safe path.
pub fn name(&self) -> &str {
&self.0.file_name
}
/// Get the name of the file, in the raw (internal) byte representation.
///
/// The encoding of this data is currently undefined.
pub fn name_raw(&self) -> &[u8] {
&self.0.file_name_raw
}
/// Rewrite the path, ignoring any path components with special meaning.
///
/// - Absolute paths are made relative
/// - [`ParentDir`]s are ignored
/// - Truncates the filename at a NULL byte
///
/// This is appropriate if you need to be able to extract *something* from
/// any archive, but will easily misrepresent trivial paths like
/// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
/// [`ZipFile::enclosed_name`] is the better option in most scenarios.
///
/// [`ParentDir`]: `Component::ParentDir`
pub fn mangled_name(&self) -> ::std::path::PathBuf {
self.0.file_name_sanitized()
}
/// Ensure the file path is safe to use as a [`Path`].
///
/// - It can't contain NULL bytes
/// - It can't resolve to a path outside the current directory
/// > `foo/../bar` is fine, `foo/../../bar` is not.
/// - It can't be an absolute path
///
/// This will read well-formed ZIP files correctly, and is resistant
/// to path-based exploits. It is recommended over
/// [`ZipFile::mangled_name`].
pub fn enclosed_name(&self) -> Option<&Path> {
self.0.enclosed_name()
}
/// Returns whether the file is actually a directory
pub fn is_dir(&self) -> bool {
self.name()
.chars()
.rev()
.next()
.map_or(false, |c| c == '/' || c == '\\')
}
/// Returns whether the file is a regular file
pub fn is_file(&self) -> bool {
!self.is_dir()
}
/// Get the comment of the file
pub fn comment(&self) -> &str {
&self.0.file_comment
}
/// Get the starting offset of the data of the compressed file
pub fn data_start(&self) -> u64 {
self.0.data_start.load()
}
/// Get unix mode for the file
pub fn unix_mode(&self) -> Option<u32> {
self.0.unix_mode()
}
}
#[cfg(test)]
mod test {
use super::*;
use std::collections::BTreeSet;
use std::io;
struct DummyVisitor;
impl ZipStreamVisitor for DummyVisitor {
fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
Ok(())
}
fn visit_additional_metadata(
&mut self,
_metadata: &ZipStreamFileMetadata,
) -> ZipResult<()> {
Ok(())
}
}
#[derive(Default, Debug, Eq, PartialEq)]
struct CounterVisitor(u64, u64);
impl ZipStreamVisitor for CounterVisitor {
fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
self.0 += 1;
Ok(())
}
fn visit_additional_metadata(
&mut self,
_metadata: &ZipStreamFileMetadata,
) -> ZipResult<()> {
self.1 += 1;
Ok(())
}
}
#[test]
fn invalid_offset() {
ZipStreamReader::new(io::Cursor::new(include_bytes!(
"../../tests/data/invalid_offset.zip"
)))
.visit(&mut DummyVisitor)
.unwrap_err();
}
#[test]
fn invalid_offset2() {
ZipStreamReader::new(io::Cursor::new(include_bytes!(
"../../tests/data/invalid_offset2.zip"
)))
.visit(&mut DummyVisitor)
.unwrap_err();
}
#[test]
fn zip_read_streaming() {
let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
"../../tests/data/mimetype.zip"
)));
#[derive(Default)]
struct V {
filenames: BTreeSet<Box<str>>,
}
impl ZipStreamVisitor for V {
fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
if file.is_file() {
self.filenames.insert(file.name().into());
}
Ok(())
}
fn visit_additional_metadata(
&mut self,
metadata: &ZipStreamFileMetadata,
) -> ZipResult<()> {
if metadata.is_file() {
assert!(
self.filenames.contains(metadata.name()),
"{} is missing its file content",
metadata.name()
);
}
Ok(())
}
}
reader.visit(&mut V::default()).unwrap();
}
#[test]
fn file_and_dir_predicates() {
let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
"../../tests/data/files_and_dirs.zip"
)));
#[derive(Default)]
struct V {
filenames: BTreeSet<Box<str>>,
}
impl ZipStreamVisitor for V {
fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
let full_name = file.enclosed_name().unwrap();
let file_name = full_name.file_name().unwrap().to_str().unwrap();
assert!(
(file_name.starts_with("dir") && file.is_dir())
|| (file_name.starts_with("file") && file.is_file())
);
if file.is_file() {
self.filenames.insert(file.name().into());
}
Ok(())
}
fn visit_additional_metadata(
&mut self,
metadata: &ZipStreamFileMetadata,
) -> ZipResult<()> {
if metadata.is_file() {
assert!(
self.filenames.contains(metadata.name()),
"{} is missing its file content",
metadata.name()
);
}
Ok(())
}
}
reader.visit(&mut V::default()).unwrap();
}
/// test case to ensure we don't preemptively over allocate based on the
/// declared number of files in the CDE of an invalid zip when the number of
/// files declared is more than the alleged offset in the CDE
#[test]
fn invalid_cde_number_of_files_allocation_smaller_offset() {
ZipStreamReader::new(io::Cursor::new(include_bytes!(
"../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
)))
.visit(&mut DummyVisitor)
.unwrap_err();
}
/// test case to ensure we don't preemptively over allocate based on the
/// declared number of files in the CDE of an invalid zip when the number of
/// files declared is less than the alleged offset in the CDE
#[test]
fn invalid_cde_number_of_files_allocation_greater_offset() {
ZipStreamReader::new(io::Cursor::new(include_bytes!(
"../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
)))
.visit(&mut DummyVisitor)
.unwrap_err();
}
}

View file

@ -1,6 +1,6 @@
//! Types that specify what is contained in a ZIP.
#[cfg(feature = "time")]
use std::convert::{TryFrom, TryInto};
use std::path;
#[cfg(not(any(
all(target_arch = "arm", target_pointer_width = "32"),
target_arch = "mips",
@ -12,6 +12,11 @@ use std::time::SystemTime;
#[cfg(doc)]
use {crate::read::ZipFile, crate::write::FileOptions};
mod ffi {
pub const S_IFDIR: u32 = 0o0040000;
pub const S_IFREG: u32 = 0o0100000;
}
#[cfg(any(
all(target_arch = "arm", target_pointer_width = "32"),
target_arch = "mips",
@ -375,6 +380,48 @@ impl ZipFileData {
})
}
pub(crate) fn enclosed_name(&self) -> Option<&path::Path> {
if self.file_name.contains('\0') {
return None;
}
let path = path::Path::new(&self.file_name);
let mut depth = 0usize;
for component in path.components() {
match component {
path::Component::Prefix(_) | path::Component::RootDir => return None,
path::Component::ParentDir => depth = depth.checked_sub(1)?,
path::Component::Normal(_) => depth += 1,
path::Component::CurDir => (),
}
}
Some(path)
}
/// Get unix mode for the file
pub(crate) fn unix_mode(&self) -> Option<u32> {
if self.external_attributes == 0 {
return None;
}
match self.system {
System::Unix => Some(self.external_attributes >> 16),
System::Dos => {
// Interpret MS-DOS directory bit
let mut mode = if 0x10 == (self.external_attributes & 0x10) {
ffi::S_IFDIR | 0o0775
} else {
ffi::S_IFREG | 0o0664
};
if 0x01 == (self.external_attributes & 0x01) {
// Read-only bit; strip write permissions
mode &= 0o0555;
}
Some(mode)
}
_ => None,
}
}
pub fn zip64_extension(&self) -> bool {
self.uncompressed_size > 0xFFFFFFFF
|| self.compressed_size > 0xFFFFFFFF

4
src/unstable.rs Normal file
View file

@ -0,0 +1,4 @@
/// Provides high level API for reading from a stream.
pub mod stream {
pub use crate::read::stream::*;
}

View file

@ -26,6 +26,6 @@ fn invalid_header() {
let archive = zip::ZipArchive::new(reader);
match archive {
Err(ZipError::InvalidArchive(_)) => {}
value => panic!("Unexpected value: {:?}", value),
value => panic!("Unexpected value: {value:?}"),
}
}