Overhaul read logic to perform *all* validations before accepting a central directory as the real one
This commit is contained in:
parent
be49def529
commit
8efd2339cf
2 changed files with 172 additions and 155 deletions
273
src/read.rs
273
src/read.rs
|
@ -14,6 +14,7 @@ use std::borrow::Cow;
|
|||
use std::collections::HashMap;
|
||||
use std::io::{self, prelude::*};
|
||||
use std::path::Path;
|
||||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[cfg(any(
|
||||
|
@ -38,13 +39,17 @@ pub(crate) mod stream;
|
|||
|
||||
// Put the struct declaration in a private module to convince rustdoc to display ZipArchive nicely
|
||||
pub(crate) mod zip_archive {
|
||||
use std::rc::Rc;
|
||||
|
||||
/// Extract immutable data from `ZipArchive` to make it cheap to clone
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Shared {
|
||||
pub(super) files: Vec<super::ZipFileData>,
|
||||
pub(super) names_map: super::HashMap<String, usize>,
|
||||
pub(crate) files: Vec<super::ZipFileData>,
|
||||
pub(crate) names_map: super::HashMap<String, usize>,
|
||||
pub(super) offset: u64,
|
||||
pub(super) comment: Vec<u8>,
|
||||
pub(super) comment: Rc<Vec<u8>>,
|
||||
pub(super) dir_start: u64,
|
||||
pub(super) dir_end: u64,
|
||||
}
|
||||
|
||||
/// ZIP archive reader
|
||||
|
@ -74,6 +79,7 @@ pub(crate) mod zip_archive {
|
|||
}
|
||||
}
|
||||
|
||||
use crate::read::zip_archive::Shared;
|
||||
pub use zip_archive::ZipArchive;
|
||||
|
||||
#[allow(clippy::large_enum_variant)]
|
||||
|
@ -311,7 +317,7 @@ pub(crate) fn make_reader(
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) struct DirectoryCounts {
|
||||
pub(crate) struct CentralDirectoryInfo {
|
||||
pub(crate) archive_offset: u64,
|
||||
pub(crate) directory_start: u64,
|
||||
pub(crate) number_of_files: usize,
|
||||
|
@ -320,10 +326,10 @@ pub(crate) struct DirectoryCounts {
|
|||
}
|
||||
|
||||
impl<R: Read + Seek> ZipArchive<R> {
|
||||
fn get_directory_counts_zip32(
|
||||
fn get_directory_info_zip32(
|
||||
footer: &spec::CentralDirectoryEnd,
|
||||
cde_start_pos: u64,
|
||||
) -> ZipResult<DirectoryCounts> {
|
||||
) -> ZipResult<CentralDirectoryInfo> {
|
||||
// Some zip files have data prepended to them, resulting in the
|
||||
// offsets all being too small. Get the amount of error by comparing
|
||||
// the actual file position we found the CDE at with the offset
|
||||
|
@ -337,7 +343,7 @@ impl<R: Read + Seek> ZipArchive<R> {
|
|||
|
||||
let directory_start = footer.central_directory_offset as u64 + archive_offset;
|
||||
let number_of_files = footer.number_of_files_on_this_disk as usize;
|
||||
Ok(DirectoryCounts {
|
||||
Ok(CentralDirectoryInfo {
|
||||
archive_offset,
|
||||
directory_start,
|
||||
number_of_files,
|
||||
|
@ -346,11 +352,11 @@ impl<R: Read + Seek> ZipArchive<R> {
|
|||
})
|
||||
}
|
||||
|
||||
fn get_directory_counts_zip64(
|
||||
fn get_directory_info_zip64(
|
||||
reader: &mut R,
|
||||
footer: &spec::CentralDirectoryEnd,
|
||||
cde_start_pos: u64,
|
||||
) -> ZipResult<DirectoryCounts> {
|
||||
) -> ZipResult<Vec<ZipResult<CentralDirectoryInfo>>> {
|
||||
// See if there's a ZIP64 footer. The ZIP64 locator if present will
|
||||
// have its signature 20 bytes in front of the standard footer. The
|
||||
// standard footer, in turn, is 22+N bytes large, where N is the
|
||||
|
@ -366,99 +372,158 @@ impl<R: Read + Seek> ZipArchive<R> {
|
|||
// actual offset in the file, since there may be junk at its
|
||||
// beginning. Therefore we need to perform another search, as in
|
||||
// read::CentralDirectoryEnd::find_and_parse, except now we search
|
||||
// forward.
|
||||
// forward. There may be multiple results because of Zip64 central-directory signatures in
|
||||
// ZIP comment data.
|
||||
|
||||
let mut results = Vec::new();
|
||||
|
||||
let search_upper_bound = cde_start_pos
|
||||
.checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
|
||||
.ok_or(ZipError::InvalidArchive(
|
||||
"File cannot contain ZIP64 central directory end",
|
||||
))?;
|
||||
let (footer64, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
|
||||
while let Ok((footer64, archive_offset)) = spec::Zip64CentralDirectoryEnd::find_and_parse(
|
||||
reader,
|
||||
locator64.end_of_central_directory_offset,
|
||||
search_upper_bound,
|
||||
)?;
|
||||
|
||||
let directory_start = footer64
|
||||
.central_directory_offset
|
||||
.checked_add(archive_offset)
|
||||
.ok_or(ZipError::InvalidArchive(
|
||||
"Invalid central directory size or offset",
|
||||
))?;
|
||||
if directory_start > search_upper_bound {
|
||||
return Err(ZipError::InvalidArchive(
|
||||
"Invalid central directory size or offset",
|
||||
));
|
||||
) {
|
||||
results.push({
|
||||
let directory_start = footer64
|
||||
.central_directory_offset
|
||||
.checked_add(archive_offset)
|
||||
.ok_or(ZipError::InvalidArchive(
|
||||
"Invalid central directory size or offset",
|
||||
))?;
|
||||
if directory_start > search_upper_bound {
|
||||
return Err(ZipError::InvalidArchive(
|
||||
"Invalid central directory size or offset",
|
||||
));
|
||||
}
|
||||
if footer64.number_of_files_on_this_disk > footer64.number_of_files {
|
||||
return Err(ZipError::InvalidArchive(
|
||||
"ZIP64 footer indicates more files on this disk than in the whole archive",
|
||||
));
|
||||
}
|
||||
if footer64.version_needed_to_extract > footer64.version_made_by {
|
||||
return Err(ZipError::InvalidArchive(
|
||||
"ZIP64 footer indicates a new version is needed to extract this archive than the \
|
||||
version that wrote it",
|
||||
));
|
||||
}
|
||||
Ok(CentralDirectoryInfo {
|
||||
archive_offset,
|
||||
directory_start,
|
||||
number_of_files: footer64.number_of_files as usize,
|
||||
disk_number: footer64.disk_number,
|
||||
disk_with_central_directory: footer64.disk_with_central_directory,
|
||||
})
|
||||
});
|
||||
}
|
||||
if footer64.number_of_files_on_this_disk > footer64.number_of_files {
|
||||
return Err(ZipError::InvalidArchive(
|
||||
"ZIP64 footer indicates more files on this disk than in the whole archive",
|
||||
));
|
||||
}
|
||||
if footer64.version_needed_to_extract > footer64.version_made_by {
|
||||
return Err(ZipError::InvalidArchive(
|
||||
"ZIP64 footer indicates a new version is needed to extract this archive than the \
|
||||
version that wrote it",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(DirectoryCounts {
|
||||
archive_offset,
|
||||
directory_start,
|
||||
number_of_files: footer64.number_of_files as usize,
|
||||
disk_number: footer64.disk_number,
|
||||
disk_with_central_directory: footer64.disk_with_central_directory,
|
||||
})
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Get the directory start offset and number of files. This is done in a
|
||||
/// separate function to ease the control flow design.
|
||||
pub(crate) fn get_directory_counts(
|
||||
pub(crate) fn get_metadata(
|
||||
reader: &mut R,
|
||||
footer: &spec::CentralDirectoryEnd,
|
||||
footer: spec::CentralDirectoryEnd,
|
||||
cde_start_pos: u64,
|
||||
) -> ZipResult<DirectoryCounts> {
|
||||
) -> ZipResult<Shared> {
|
||||
// Check if file has a zip64 footer
|
||||
let counts_64 = Self::get_directory_counts_zip64(reader, footer, cde_start_pos);
|
||||
let counts_32 = Self::get_directory_counts_zip32(footer, cde_start_pos);
|
||||
match counts_64 {
|
||||
Err(_) => match counts_32 {
|
||||
Err(e) => Err(e),
|
||||
Ok(counts) => {
|
||||
if counts.disk_number != counts.disk_with_central_directory {
|
||||
let mut results = Self::get_directory_info_zip64(reader, &footer, cde_start_pos)
|
||||
.unwrap_or_else(|e| vec![Err(e)]);
|
||||
let zip32_result = Self::get_directory_info_zip32(&footer, cde_start_pos);
|
||||
let mut invalid_errors = Vec::new();
|
||||
let mut unsupported_errors = Vec::new();
|
||||
let mut ok_results = Vec::new();
|
||||
let comment = Rc::new(footer.zip_file_comment);
|
||||
results.iter_mut().for_each(|result| {
|
||||
if let Ok(central_dir) = result {
|
||||
if let Ok(zip32_central_dir) = &zip32_result {
|
||||
// Both zip32 and zip64 footers exist, so check if the zip64 footer is valid; if not, try zip32
|
||||
if central_dir.number_of_files != zip32_central_dir.number_of_files
|
||||
&& zip32_central_dir.number_of_files != u16::MAX as usize
|
||||
{
|
||||
*result = Err(ZipError::InvalidArchive(
|
||||
"ZIP32 and ZIP64 file counts don't match",
|
||||
));
|
||||
return;
|
||||
}
|
||||
if central_dir.disk_number != zip32_central_dir.disk_number
|
||||
&& zip32_central_dir.disk_number != u16::MAX as u32
|
||||
{
|
||||
*result = Err(ZipError::InvalidArchive(
|
||||
"ZIP32 and ZIP64 disk numbers don't match",
|
||||
));
|
||||
return;
|
||||
}
|
||||
if central_dir.disk_with_central_directory
|
||||
!= zip32_central_dir.disk_with_central_directory
|
||||
&& zip32_central_dir.disk_with_central_directory != u16::MAX as u32
|
||||
{
|
||||
*result = Err(ZipError::InvalidArchive(
|
||||
"ZIP32 and ZIP64 last-disk numbers don't match",
|
||||
));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
results.push(zip32_result);
|
||||
results
|
||||
.into_iter()
|
||||
.map(|result| {
|
||||
result.and_then(|dir_info| {
|
||||
// If the parsed number of files is greater than the offset then
|
||||
// something fishy is going on and we shouldn't trust number_of_files.
|
||||
let file_capacity = if dir_info.number_of_files > cde_start_pos as usize {
|
||||
0
|
||||
} else {
|
||||
dir_info.number_of_files
|
||||
};
|
||||
let mut files = Vec::with_capacity(file_capacity);
|
||||
let mut names_map = HashMap::with_capacity(file_capacity);
|
||||
|
||||
for _ in 0..dir_info.number_of_files {
|
||||
let file = central_header_to_zip_file(reader, dir_info.archive_offset)?;
|
||||
names_map.insert(file.file_name.clone(), files.len());
|
||||
files.push(file);
|
||||
}
|
||||
let dir_end = reader.seek(io::SeekFrom::Start(dir_info.directory_start))?;
|
||||
if dir_info.disk_number != dir_info.disk_with_central_directory {
|
||||
return unsupported_zip_error(
|
||||
"Support for multi-disk files is not implemented",
|
||||
);
|
||||
}
|
||||
Ok(counts)
|
||||
Ok(Shared {
|
||||
files,
|
||||
names_map,
|
||||
offset: dir_info.archive_offset,
|
||||
comment: comment.clone(),
|
||||
dir_start: dir_info.directory_start,
|
||||
dir_end,
|
||||
})
|
||||
})
|
||||
})
|
||||
.for_each(|result| match result {
|
||||
Err(ZipError::UnsupportedArchive(e)) => {
|
||||
unsupported_errors.push(ZipError::UnsupportedArchive(e))
|
||||
}
|
||||
},
|
||||
Ok(counts_64) => {
|
||||
match counts_32 {
|
||||
Err(_) => Ok(counts_64),
|
||||
Ok(counts_32) => {
|
||||
// Both zip32 and zip64 footers exist, so check if the zip64 footer is valid; if not, try zip32
|
||||
if counts_64.number_of_files != counts_32.number_of_files
|
||||
&& counts_32.number_of_files != u16::MAX as usize
|
||||
{
|
||||
return Ok(counts_32);
|
||||
}
|
||||
if counts_64.disk_number != counts_32.disk_number
|
||||
&& counts_32.disk_number != u16::MAX as u32
|
||||
{
|
||||
return Ok(counts_32);
|
||||
}
|
||||
if counts_64.disk_with_central_directory
|
||||
!= counts_32.disk_with_central_directory
|
||||
&& counts_32.disk_with_central_directory != u16::MAX as u32
|
||||
{
|
||||
return Ok(counts_32);
|
||||
}
|
||||
Ok(counts_64)
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => invalid_errors.push(e),
|
||||
Ok(o) => ok_results.push(o),
|
||||
});
|
||||
if ok_results.is_empty() {
|
||||
return Err(unsupported_errors
|
||||
.into_iter()
|
||||
.next()
|
||||
.unwrap_or_else(|| invalid_errors.into_iter().next().unwrap()));
|
||||
}
|
||||
let shared = ok_results
|
||||
.into_iter()
|
||||
.max_by_key(|shared| shared.dir_end)
|
||||
.unwrap();
|
||||
reader.seek(io::SeekFrom::Start(shared.dir_start))?;
|
||||
Ok(shared)
|
||||
}
|
||||
|
||||
/// Read a ZIP archive, collecting the files it contains
|
||||
|
@ -466,47 +531,11 @@ impl<R: Read + Seek> ZipArchive<R> {
|
|||
/// This uses the central directory record of the ZIP file, and ignores local file headers
|
||||
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
|
||||
let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?;
|
||||
|
||||
let counts = Self::get_directory_counts(&mut reader, &footer, cde_start_pos)?;
|
||||
|
||||
if counts.disk_number != counts.disk_with_central_directory {
|
||||
return unsupported_zip_error("Support for multi-disk files is not implemented");
|
||||
}
|
||||
|
||||
// If the parsed number of files is greater than the offset then
|
||||
// something fishy is going on and we shouldn't trust number_of_files.
|
||||
let file_capacity = if counts.number_of_files > cde_start_pos as usize {
|
||||
0
|
||||
} else {
|
||||
counts.number_of_files
|
||||
};
|
||||
|
||||
let mut files = Vec::with_capacity(file_capacity);
|
||||
let mut names_map = HashMap::with_capacity(file_capacity);
|
||||
|
||||
if reader
|
||||
.seek(io::SeekFrom::Start(counts.directory_start))
|
||||
.is_err()
|
||||
{
|
||||
return Err(ZipError::InvalidArchive(
|
||||
"Could not seek to start of central directory",
|
||||
));
|
||||
}
|
||||
|
||||
for _ in 0..counts.number_of_files {
|
||||
let file = central_header_to_zip_file(&mut reader, counts.archive_offset)?;
|
||||
names_map.insert(file.file_name.clone(), files.len());
|
||||
files.push(file);
|
||||
}
|
||||
|
||||
let shared = Arc::new(zip_archive::Shared {
|
||||
files,
|
||||
names_map,
|
||||
offset: counts.archive_offset,
|
||||
comment: footer.zip_file_comment,
|
||||
});
|
||||
|
||||
Ok(ZipArchive { reader, shared })
|
||||
let shared = Self::get_metadata(&mut reader, footer, cde_start_pos)?;
|
||||
Ok(ZipArchive {
|
||||
reader,
|
||||
shared: Arc::new(shared),
|
||||
})
|
||||
}
|
||||
/// Extract a Zip archive into a directory, overwriting files if they
|
||||
/// already exist. Paths are sanitized with [`ZipFile::enclosed_name`].
|
||||
|
@ -1378,9 +1407,7 @@ mod test {
|
|||
#[test]
|
||||
fn deflate64_not_enough_space() -> std::io::Result<()> {
|
||||
let mut v = Vec::new();
|
||||
v.extend_from_slice(include_bytes!(
|
||||
"../tests/data/deflate64_issue_25.zip"
|
||||
));
|
||||
v.extend_from_slice(include_bytes!("../tests/data/deflate64_issue_25.zip"));
|
||||
let mut reader = ZipArchive::new(Cursor::new(v))?;
|
||||
std::io::copy(&mut reader.by_index(0)?, &mut std::io::sink()).expect_err("Invalid file");
|
||||
Ok(())
|
||||
|
|
54
src/write.rs
54
src/write.rs
|
@ -1,7 +1,7 @@
|
|||
//! Types for creating ZIP archives
|
||||
|
||||
use crate::compression::CompressionMethod;
|
||||
use crate::read::{central_header_to_zip_file, find_content, ZipArchive, ZipFile, ZipFileReader};
|
||||
use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader};
|
||||
use crate::result::{ZipError, ZipResult};
|
||||
use crate::spec;
|
||||
use crate::types::{ffi, AtomicU64, DateTime, System, ZipFileData, DEFAULT_VERSION};
|
||||
|
@ -435,42 +435,16 @@ impl<A: Read + Write + Seek> ZipWriter<A> {
|
|||
/// Initializes the archive from an existing ZIP archive, making it ready for append.
|
||||
pub fn new_append(mut readwriter: A) -> ZipResult<ZipWriter<A>> {
|
||||
let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut readwriter)?;
|
||||
|
||||
let counts = ZipArchive::get_directory_counts(&mut readwriter, &footer, cde_start_pos)?;
|
||||
|
||||
if counts.disk_number != counts.disk_with_central_directory {
|
||||
return Err(ZipError::UnsupportedArchive(
|
||||
"Support for multi-disk files is not implemented",
|
||||
));
|
||||
}
|
||||
|
||||
if readwriter
|
||||
.seek(SeekFrom::Start(counts.directory_start))
|
||||
.is_err()
|
||||
{
|
||||
return Err(InvalidArchive(
|
||||
"Could not seek to start of central directory",
|
||||
));
|
||||
}
|
||||
|
||||
let files = (0..counts.number_of_files)
|
||||
.map(|_| central_header_to_zip_file(&mut readwriter, counts.archive_offset))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
let mut files_by_name = HashMap::new();
|
||||
for (index, file) in files.iter().enumerate() {
|
||||
files_by_name.insert(file.file_name.to_owned(), index);
|
||||
}
|
||||
|
||||
let _ = readwriter.seek(SeekFrom::Start(counts.directory_start)); // seek directory_start to overwrite it
|
||||
let comment = footer.zip_file_comment.to_owned();
|
||||
let metadata = ZipArchive::get_metadata(&mut readwriter, footer, cde_start_pos)?;
|
||||
|
||||
Ok(ZipWriter {
|
||||
inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),
|
||||
files,
|
||||
files_by_name,
|
||||
files: metadata.files,
|
||||
files_by_name: metadata.names_map,
|
||||
stats: Default::default(),
|
||||
writing_to_file: false,
|
||||
comment: footer.zip_file_comment,
|
||||
comment,
|
||||
writing_raw: true, // avoid recomputing the last file's header
|
||||
flush_on_finish_file: false,
|
||||
})
|
||||
|
@ -2077,4 +2051,20 @@ mod test {
|
|||
writer.write_all(&[]).unwrap();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn crash_with_no_features() -> ZipResult<()> {
|
||||
const ORIGINAL_FILE_NAME: &str = "PK\u{6}\u{6}\0\0\0\0\0\0\0\0\0\u{2}g\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\u{1}\0\0\0\0\0\0\0\0\0\0PK\u{6}\u{7}\0\0\0\0\0\0\0\0\0\0\0\0\u{7}\0\t'";
|
||||
let mut writer = ZipWriter::new(io::Cursor::new(Vec::new()));
|
||||
let mut options = FileOptions::default();
|
||||
options = options
|
||||
.with_alignment(3584)
|
||||
.compression_method(CompressionMethod::Stored);
|
||||
writer.start_file(ORIGINAL_FILE_NAME, options)?;
|
||||
let archive = writer.finish()?;
|
||||
let mut writer = ZipWriter::new_append(archive)?;
|
||||
writer.shallow_copy_file(ORIGINAL_FILE_NAME, "\u{6}\\")?;
|
||||
writer.finish()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue