Merge branch 'pkgw-pr-basic-zip64'

This commit is contained in:
Mathijs van de Nes 2017-08-17 18:19:40 +02:00
commit 5b259dc12a
4 changed files with 197 additions and 25 deletions

View file

@ -20,7 +20,7 @@ Supported compression formats:
Currently unsupported zip extensions:
* ZIP64
* Most of ZIP64, although there is some support for archives with more than 65535 files
* Encryption
* Multi-disk

View file

@ -54,7 +54,7 @@ pub struct ZipArchive<R: Read + io::Seek>
reader: R,
files: Vec<ZipFileData>,
names_map: HashMap<String, usize>,
offset: u32,
offset: u64,
}
enum ZipFileReader<'a> {
@ -77,20 +77,86 @@ fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T>
impl<R: Read+io::Seek> ZipArchive<R>
{
/// Get the directory start offset and number of files. This is done in a
/// separate function to ease the control flow design.
fn get_directory_counts(mut reader: &mut R,
footer: &spec::CentralDirectoryEnd,
cde_start_pos: u64) -> ZipResult<(u64, u64, usize)> {
// Some zip files have data prepended to them, resulting in the
// offsets all being too small. Get the amount of error by comparing
// the actual file position we found the CDE at with the offset
// recorded in the CDE.
let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size as u64)
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
.ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?;
let directory_start = footer.central_directory_offset as u64 + archive_offset;
let number_of_files = footer.number_of_files_on_this_disk as usize;
// See if there's a ZIP64 footer. The ZIP64 locator if present will
// have its signature 20 bytes in front of the standard footer. The
// standard footer, in turn, is 22+N bytes large, where N is the
// comment length. Therefore:
if let Err(_) = reader.seek(io::SeekFrom::Current(-(20 + 22 + footer.zip_file_comment.len() as i64))) {
// Empty Zip files will have nothing else so this error might be fine. If
// not, we'll find out soon.
return Ok((archive_offset, directory_start, number_of_files));
}
let locator64 = match spec::Zip64CentralDirectoryEndLocator::parse(&mut reader) {
Ok(loc) => loc,
Err(ZipError::InvalidArchive(_)) => {
// No ZIP64 header; that's actually fine. We're done here.
return Ok((archive_offset, directory_start, number_of_files));
},
Err(e) => {
// Yikes, a real problem
return Err(e);
},
};
// If we got here, this is indeed a ZIP64 file.
if footer.disk_number as u32 != locator64.disk_with_central_directory {
return unsupported_zip_error("Support for multi-disk files is not implemented")
}
// We need to reassess `archive_offset`. We know where the ZIP64
// central-directory-end structure *should* be, but unfortunately we
// don't know how to precisely relate that location to our current
// actual offset in the file, since there may be junk at its
// beginning. Therefore we need to perform another search, as in
// read::CentralDirectoryEnd::find_and_parse, except now we search
// forward.
let search_upper_bound = reader.seek(io::SeekFrom::Current(0))?
.checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
.ok_or(ZipError::InvalidArchive("File cannot contain ZIP64 central directory end"))?;
let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
&mut reader,
locator64.end_of_central_directory_offset,
search_upper_bound)?;
if footer.disk_number != footer.disk_with_central_directory {
return unsupported_zip_error("Support for multi-disk files is not implemented")
}
let directory_start = footer.central_directory_offset + archive_offset;
Ok((archive_offset, directory_start, footer.number_of_files as usize))
}
/// Opens a Zip archive and parses the central directory
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
let (footer, cde_start_pos) = try!(spec::CentralDirectoryEnd::find_and_parse(&mut reader));
if footer.disk_number != footer.disk_with_central_directory { return unsupported_zip_error("Support for multi-disk files is not implemented") }
if footer.disk_number != footer.disk_with_central_directory
{
return unsupported_zip_error("Support for multi-disk files is not implemented")
}
// Some zip files have data prepended to them, resulting in the offsets all being too small. Get the amount of
// error by comparing the actual file position we found the CDE at with the offset recorded in the CDE.
let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size)
.and_then(|x| x.checked_sub(footer.central_directory_offset))
.ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?;
let directory_start = (footer.central_directory_offset + archive_offset) as u64;
let number_of_files = footer.number_of_files_on_this_disk as usize;
let (archive_offset, directory_start, number_of_files) =
try!(Self::get_directory_counts(&mut reader, &footer, cde_start_pos));
let mut files = Vec::with_capacity(number_of_files);
let mut names_map = HashMap::new();
@ -132,7 +198,7 @@ impl<R: Read+io::Seek> ZipArchive<R>
///
/// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
/// of that prepended data.
pub fn offset(&self) -> u32 {
pub fn offset(&self) -> u64 {
self.offset
}
@ -198,7 +264,7 @@ impl<R: Read+io::Seek> ZipArchive<R>
}
}
fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset: u32) -> ZipResult<ZipFileData>
fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset: u64) -> ZipResult<ZipFileData>
{
// Parse central header
let signature = try!(reader.read_u32::<LittleEndian>());
@ -230,7 +296,7 @@ fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset:
let file_comment_raw = try!(ReadPodExt::read_exact(reader, file_comment_length));
// Account for shifted zip offsets.
offset += archive_offset as u64;
offset += archive_offset;
let file_name = match is_utf8
{
@ -399,4 +465,15 @@ mod test {
let reader = ZipArchive::new(io::Cursor::new(v));
assert!(reader.is_err());
}
#[test]
fn zip64_with_leading_junk() {
use std::io;
use super::ZipArchive;
let mut v = Vec::new();
v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
assert!(reader.len() == 1);
}
}

View file

@ -6,6 +6,8 @@ use podio::{ReadPodExt, WritePodExt, LittleEndian};
pub const LOCAL_FILE_HEADER_SIGNATURE : u32 = 0x04034b50;
pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE : u32 = 0x02014b50;
const CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06054b50;
pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06064b50;
const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE : u32 = 0x07064b50;
pub struct CentralDirectoryEnd
{
@ -48,29 +50,32 @@ impl CentralDirectoryEnd
})
}
pub fn find_and_parse<T: Read+io::Seek>(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u32)>
pub fn find_and_parse<T: Read+io::Seek>(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u64)>
{
let header_size = 22;
let bytes_between_magic_and_comment_size = header_size - 6;
let file_length = try!(reader.seek(io::SeekFrom::End(0))) as i64;
const HEADER_SIZE: u64 = 22;
const BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE: u64 = HEADER_SIZE - 6;
let file_length = try!(reader.seek(io::SeekFrom::End(0)));
let search_upper_bound = ::std::cmp::max(0, file_length - header_size - ::std::u16::MAX as i64);
let search_upper_bound = file_length.checked_sub(HEADER_SIZE + ::std::u16::MAX as u64).unwrap_or(0);
let mut pos = file_length - header_size;
let mut pos = file_length - HEADER_SIZE;
while pos >= search_upper_bound
{
try!(reader.seek(io::SeekFrom::Start(pos as u64)));
if try!(reader.read_u32::<LittleEndian>()) == CENTRAL_DIRECTORY_END_SIGNATURE
{
try!(reader.seek(io::SeekFrom::Current(bytes_between_magic_and_comment_size)));
let comment_length = try!(reader.read_u16::<LittleEndian>()) as i64;
if file_length - pos - header_size == comment_length
try!(reader.seek(io::SeekFrom::Current(BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE as i64)));
let comment_length = try!(reader.read_u16::<LittleEndian>()) as u64;
if file_length - pos - HEADER_SIZE == comment_length
{
let cde_start_pos = try!(reader.seek(io::SeekFrom::Start(pos as u64))) as u32;
let cde_start_pos = try!(reader.seek(io::SeekFrom::Start(pos as u64)));
return CentralDirectoryEnd::parse(reader).map(|cde| (cde, cde_start_pos));
}
}
pos -= 1;
pos = match pos.checked_sub(1) {
Some(p) => p,
None => break,
};
}
Err(ZipError::InvalidArchive("Could not find central directory end"))
}
@ -89,3 +94,93 @@ impl CentralDirectoryEnd
Ok(())
}
}
pub struct Zip64CentralDirectoryEndLocator
{
pub disk_with_central_directory: u32,
pub end_of_central_directory_offset: u64,
pub number_of_disks: u32,
}
impl Zip64CentralDirectoryEndLocator
{
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip64CentralDirectoryEndLocator>
{
let magic = try!(reader.read_u32::<LittleEndian>());
if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE
{
return Err(ZipError::InvalidArchive("Invalid zip64 locator digital signature header"))
}
let disk_with_central_directory = try!(reader.read_u32::<LittleEndian>());
let end_of_central_directory_offset = try!(reader.read_u64::<LittleEndian>());
let number_of_disks = try!(reader.read_u32::<LittleEndian>());
Ok(Zip64CentralDirectoryEndLocator
{
disk_with_central_directory: disk_with_central_directory,
end_of_central_directory_offset: end_of_central_directory_offset,
number_of_disks: number_of_disks,
})
}
}
pub struct Zip64CentralDirectoryEnd
{
pub version_made_by: u16,
pub version_needed_to_extract: u16,
pub disk_number: u32,
pub disk_with_central_directory: u32,
pub number_of_files_on_this_disk: u64,
pub number_of_files: u64,
pub central_directory_size: u64,
pub central_directory_offset: u64,
//pub extensible_data_sector: Vec<u8>, <-- We don't do anything with this at the moment.
}
impl Zip64CentralDirectoryEnd
{
pub fn find_and_parse<T: Read+io::Seek>(reader: &mut T,
nominal_offset: u64,
search_upper_bound: u64) -> ZipResult<(Zip64CentralDirectoryEnd, u64)>
{
let mut pos = nominal_offset;
while pos <= search_upper_bound
{
reader.seek(io::SeekFrom::Start(pos))?;
if reader.read_u32::<LittleEndian>()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE
{
let archive_offset = pos - nominal_offset;
let _record_size = try!(reader.read_u64::<LittleEndian>());
// We would use this value if we did anything with the "zip64 extensible data sector".
let version_made_by = try!(reader.read_u16::<LittleEndian>());
let version_needed_to_extract = try!(reader.read_u16::<LittleEndian>());
let disk_number = try!(reader.read_u32::<LittleEndian>());
let disk_with_central_directory = try!(reader.read_u32::<LittleEndian>());
let number_of_files_on_this_disk = try!(reader.read_u64::<LittleEndian>());
let number_of_files = try!(reader.read_u64::<LittleEndian>());
let central_directory_size = try!(reader.read_u64::<LittleEndian>());
let central_directory_offset = try!(reader.read_u64::<LittleEndian>());
return Ok((Zip64CentralDirectoryEnd
{
version_made_by: version_made_by,
version_needed_to_extract: version_needed_to_extract,
disk_number: disk_number,
disk_with_central_directory: disk_with_central_directory,
number_of_files_on_this_disk: number_of_files_on_this_disk,
number_of_files: number_of_files,
central_directory_size: central_directory_size,
central_directory_offset: central_directory_offset,
}, archive_offset));
}
pos += 1;
}
Err(ZipError::InvalidArchive("Could not find ZIP64 central directory end"))
}
}

BIN
tests/data/zip64_demo.zip Normal file

Binary file not shown.