363 lines
11 KiB
Rust
363 lines
11 KiB
Rust
use std::fs;
|
|
use std::io::{self, Read};
|
|
use std::path::{Path, PathBuf};
|
|
|
|
use super::{
|
|
central_header_to_zip_file_inner, read_zipfile_from_stream, ZipCentralEntryBlock, ZipError,
|
|
ZipFile, ZipFileData, ZipResult,
|
|
};
|
|
use crate::spec::FixedSizeBlock;
|
|
|
|
/// Stream decoder for zip.
|
|
#[derive(Debug)]
|
|
pub struct ZipStreamReader<R>(R);
|
|
|
|
impl<R> ZipStreamReader<R> {
|
|
/// Create a new ZipStreamReader
|
|
pub const fn new(reader: R) -> Self {
|
|
Self(reader)
|
|
}
|
|
}
|
|
|
|
impl<R: Read> ZipStreamReader<R> {
|
|
fn parse_central_directory(&mut self) -> ZipResult<ZipStreamFileMetadata> {
|
|
// Give archive_offset and central_header_start dummy value 0, since
|
|
// they are not used in the output.
|
|
let archive_offset = 0;
|
|
let central_header_start = 0;
|
|
|
|
// Parse central header
|
|
let block = ZipCentralEntryBlock::parse(&mut self.0)?;
|
|
let file = central_header_to_zip_file_inner(
|
|
&mut self.0,
|
|
archive_offset,
|
|
central_header_start,
|
|
block,
|
|
)?;
|
|
Ok(ZipStreamFileMetadata(file))
|
|
}
|
|
|
|
/// Iterate over the stream and extract all file and their
|
|
/// metadata.
|
|
pub fn visit<V: ZipStreamVisitor>(mut self, visitor: &mut V) -> ZipResult<()> {
|
|
while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? {
|
|
visitor.visit_file(&mut file)?;
|
|
}
|
|
|
|
while let Ok(metadata) = self.parse_central_directory() {
|
|
visitor.visit_additional_metadata(&metadata)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Extract a Zip archive into a directory, overwriting files if they
|
|
/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
|
|
///
|
|
/// Extraction is not atomic; If an error is encountered, some of the files
|
|
/// may be left on disk.
|
|
pub fn extract<P: AsRef<Path>>(self, directory: P) -> ZipResult<()> {
|
|
struct Extractor<'a>(&'a Path);
|
|
impl ZipStreamVisitor for Extractor<'_> {
|
|
fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
|
|
let filepath = file
|
|
.enclosed_name()
|
|
.ok_or(ZipError::InvalidArchive("Invalid file path"))?;
|
|
|
|
let outpath = self.0.join(filepath);
|
|
|
|
if file.is_dir() {
|
|
fs::create_dir_all(&outpath)?;
|
|
} else {
|
|
if let Some(p) = outpath.parent() {
|
|
fs::create_dir_all(p)?;
|
|
}
|
|
let mut outfile = fs::File::create(&outpath)?;
|
|
io::copy(file, &mut outfile)?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[allow(unused)]
|
|
fn visit_additional_metadata(
|
|
&mut self,
|
|
metadata: &ZipStreamFileMetadata,
|
|
) -> ZipResult<()> {
|
|
#[cfg(unix)]
|
|
{
|
|
let filepath = metadata
|
|
.enclosed_name()
|
|
.ok_or(ZipError::InvalidArchive("Invalid file path"))?;
|
|
|
|
let outpath = self.0.join(filepath);
|
|
|
|
use std::os::unix::fs::PermissionsExt;
|
|
if let Some(mode) = metadata.unix_mode() {
|
|
fs::set_permissions(outpath, fs::Permissions::from_mode(mode))?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
self.visit(&mut Extractor(directory.as_ref()))
|
|
}
|
|
}
|
|
|
|
/// Visitor for ZipStreamReader
|
|
pub trait ZipStreamVisitor {
|
|
/// * `file` - contains the content of the file and most of the metadata,
|
|
/// except:
|
|
/// - `comment`: set to an empty string
|
|
/// - `data_start`: set to 0
|
|
/// - `external_attributes`: `unix_mode()`: will return None
|
|
fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>;
|
|
|
|
/// This function is guranteed to be called after all `visit_file`s.
|
|
///
|
|
/// * `metadata` - Provides missing metadata in `visit_file`.
|
|
fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>;
|
|
}
|
|
|
|
/// Additional metadata for the file.
|
|
#[derive(Debug)]
|
|
pub struct ZipStreamFileMetadata(ZipFileData);
|
|
|
|
impl ZipStreamFileMetadata {
|
|
/// Get the name of the file
|
|
///
|
|
/// # Warnings
|
|
///
|
|
/// It is dangerous to use this name directly when extracting an archive.
|
|
/// It may contain an absolute path (`/etc/shadow`), or break out of the
|
|
/// current directory (`../runtime`). Carelessly writing to these paths
|
|
/// allows an attacker to craft a ZIP archive that will overwrite critical
|
|
/// files.
|
|
///
|
|
/// You can use the [`ZipFile::enclosed_name`] method to validate the name
|
|
/// as a safe path.
|
|
pub fn name(&self) -> &str {
|
|
&self.0.file_name
|
|
}
|
|
|
|
/// Get the name of the file, in the raw (internal) byte representation.
|
|
///
|
|
/// The encoding of this data is currently undefined.
|
|
pub fn name_raw(&self) -> &[u8] {
|
|
&self.0.file_name_raw
|
|
}
|
|
|
|
/// Rewrite the path, ignoring any path components with special meaning.
|
|
///
|
|
/// - Absolute paths are made relative
|
|
/// - [std::path::Component::ParentDir]s are ignored
|
|
/// - Truncates the filename at a NULL byte
|
|
///
|
|
/// This is appropriate if you need to be able to extract *something* from
|
|
/// any archive, but will easily misrepresent trivial paths like
|
|
/// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
|
|
/// [`ZipFile::enclosed_name`] is the better option in most scenarios.
|
|
pub fn mangled_name(&self) -> PathBuf {
|
|
self.0.file_name_sanitized()
|
|
}
|
|
|
|
/// Ensure the file path is safe to use as a [`Path`].
|
|
///
|
|
/// - It can't contain NULL bytes
|
|
/// - It can't resolve to a path outside the current directory
|
|
/// > `foo/../bar` is fine, `foo/../../bar` is not.
|
|
/// - It can't be an absolute path
|
|
///
|
|
/// This will read well-formed ZIP files correctly, and is resistant
|
|
/// to path-based exploits. It is recommended over
|
|
/// [`ZipFile::mangled_name`].
|
|
pub fn enclosed_name(&self) -> Option<PathBuf> {
|
|
self.0.enclosed_name()
|
|
}
|
|
|
|
/// Returns whether the file is actually a directory
|
|
pub fn is_dir(&self) -> bool {
|
|
self.name()
|
|
.chars()
|
|
.next_back()
|
|
.map_or(false, |c| c == '/' || c == '\\')
|
|
}
|
|
|
|
/// Returns whether the file is a regular file
|
|
pub fn is_file(&self) -> bool {
|
|
!self.is_dir()
|
|
}
|
|
|
|
/// Get the comment of the file
|
|
pub fn comment(&self) -> &str {
|
|
&self.0.file_comment
|
|
}
|
|
|
|
/// Get unix mode for the file
|
|
pub const fn unix_mode(&self) -> Option<u32> {
|
|
self.0.unix_mode()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
use std::collections::BTreeSet;
|
|
|
|
struct DummyVisitor;
|
|
impl ZipStreamVisitor for DummyVisitor {
|
|
fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
|
|
Ok(())
|
|
}
|
|
|
|
fn visit_additional_metadata(
|
|
&mut self,
|
|
_metadata: &ZipStreamFileMetadata,
|
|
) -> ZipResult<()> {
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
#[derive(Default, Debug, Eq, PartialEq)]
|
|
struct CounterVisitor(u64, u64);
|
|
impl ZipStreamVisitor for CounterVisitor {
|
|
fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> {
|
|
self.0 += 1;
|
|
Ok(())
|
|
}
|
|
|
|
fn visit_additional_metadata(
|
|
&mut self,
|
|
_metadata: &ZipStreamFileMetadata,
|
|
) -> ZipResult<()> {
|
|
self.1 += 1;
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn invalid_offset() {
|
|
ZipStreamReader::new(io::Cursor::new(include_bytes!(
|
|
"../../tests/data/invalid_offset.zip"
|
|
)))
|
|
.visit(&mut DummyVisitor)
|
|
.unwrap_err();
|
|
}
|
|
|
|
#[test]
|
|
fn invalid_offset2() {
|
|
ZipStreamReader::new(io::Cursor::new(include_bytes!(
|
|
"../../tests/data/invalid_offset2.zip"
|
|
)))
|
|
.visit(&mut DummyVisitor)
|
|
.unwrap_err();
|
|
}
|
|
|
|
#[test]
|
|
fn zip_read_streaming() {
|
|
let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
|
|
"../../tests/data/mimetype.zip"
|
|
)));
|
|
|
|
#[derive(Default)]
|
|
struct V {
|
|
filenames: BTreeSet<Box<str>>,
|
|
}
|
|
impl ZipStreamVisitor for V {
|
|
fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
|
|
if file.is_file() {
|
|
self.filenames.insert(file.name().into());
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
fn visit_additional_metadata(
|
|
&mut self,
|
|
metadata: &ZipStreamFileMetadata,
|
|
) -> ZipResult<()> {
|
|
if metadata.is_file() {
|
|
assert!(
|
|
self.filenames.contains(metadata.name()),
|
|
"{} is missing its file content",
|
|
metadata.name()
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
reader.visit(&mut V::default()).unwrap();
|
|
}
|
|
|
|
#[test]
|
|
fn file_and_dir_predicates() {
|
|
let reader = ZipStreamReader::new(io::Cursor::new(include_bytes!(
|
|
"../../tests/data/files_and_dirs.zip"
|
|
)));
|
|
|
|
#[derive(Default)]
|
|
struct V {
|
|
filenames: BTreeSet<Box<str>>,
|
|
}
|
|
impl ZipStreamVisitor for V {
|
|
fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> {
|
|
let full_name = file.enclosed_name().unwrap();
|
|
let file_name = full_name.file_name().unwrap().to_str().unwrap();
|
|
assert!(
|
|
(file_name.starts_with("dir") && file.is_dir())
|
|
|| (file_name.starts_with("file") && file.is_file())
|
|
);
|
|
|
|
if file.is_file() {
|
|
self.filenames.insert(file.name().into());
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
fn visit_additional_metadata(
|
|
&mut self,
|
|
metadata: &ZipStreamFileMetadata,
|
|
) -> ZipResult<()> {
|
|
if metadata.is_file() {
|
|
assert!(
|
|
self.filenames.contains(metadata.name()),
|
|
"{} is missing its file content",
|
|
metadata.name()
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
reader.visit(&mut V::default()).unwrap();
|
|
}
|
|
|
|
/// test case to ensure we don't preemptively over allocate based on the
|
|
/// declared number of files in the CDE of an invalid zip when the number of
|
|
/// files declared is more than the alleged offset in the CDE
|
|
#[test]
|
|
fn invalid_cde_number_of_files_allocation_smaller_offset() {
|
|
ZipStreamReader::new(io::Cursor::new(include_bytes!(
|
|
"../../tests/data/invalid_cde_number_of_files_allocation_smaller_offset.zip"
|
|
)))
|
|
.visit(&mut DummyVisitor)
|
|
.unwrap_err();
|
|
}
|
|
|
|
/// test case to ensure we don't preemptively over allocate based on the
|
|
/// declared number of files in the CDE of an invalid zip when the number of
|
|
/// files declared is less than the alleged offset in the CDE
|
|
#[test]
|
|
fn invalid_cde_number_of_files_allocation_greater_offset() {
|
|
ZipStreamReader::new(io::Cursor::new(include_bytes!(
|
|
"../../tests/data/invalid_cde_number_of_files_allocation_greater_offset.zip"
|
|
)))
|
|
.visit(&mut DummyVisitor)
|
|
.unwrap_err();
|
|
}
|
|
}
|