Merge branch 'pkgw-pr-basic-zip64'
This commit is contained in:
commit
5b259dc12a
4 changed files with 197 additions and 25 deletions
|
@ -20,7 +20,7 @@ Supported compression formats:
|
|||
|
||||
Currently unsupported zip extensions:
|
||||
|
||||
* ZIP64
|
||||
* Most of ZIP64, although there is some support for archives with more than 65535 files
|
||||
* Encryption
|
||||
* Multi-disk
|
||||
|
||||
|
|
103
src/read.rs
103
src/read.rs
|
@ -54,7 +54,7 @@ pub struct ZipArchive<R: Read + io::Seek>
|
|||
reader: R,
|
||||
files: Vec<ZipFileData>,
|
||||
names_map: HashMap<String, usize>,
|
||||
offset: u32,
|
||||
offset: u64,
|
||||
}
|
||||
|
||||
enum ZipFileReader<'a> {
|
||||
|
@ -77,20 +77,86 @@ fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T>
|
|||
|
||||
impl<R: Read+io::Seek> ZipArchive<R>
|
||||
{
|
||||
/// Get the directory start offset and number of files. This is done in a
|
||||
/// separate function to ease the control flow design.
|
||||
fn get_directory_counts(mut reader: &mut R,
|
||||
footer: &spec::CentralDirectoryEnd,
|
||||
cde_start_pos: u64) -> ZipResult<(u64, u64, usize)> {
|
||||
// Some zip files have data prepended to them, resulting in the
|
||||
// offsets all being too small. Get the amount of error by comparing
|
||||
// the actual file position we found the CDE at with the offset
|
||||
// recorded in the CDE.
|
||||
let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size as u64)
|
||||
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
|
||||
.ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?;
|
||||
|
||||
let directory_start = footer.central_directory_offset as u64 + archive_offset;
|
||||
let number_of_files = footer.number_of_files_on_this_disk as usize;
|
||||
|
||||
// See if there's a ZIP64 footer. The ZIP64 locator if present will
|
||||
// have its signature 20 bytes in front of the standard footer. The
|
||||
// standard footer, in turn, is 22+N bytes large, where N is the
|
||||
// comment length. Therefore:
|
||||
|
||||
if let Err(_) = reader.seek(io::SeekFrom::Current(-(20 + 22 + footer.zip_file_comment.len() as i64))) {
|
||||
// Empty Zip files will have nothing else so this error might be fine. If
|
||||
// not, we'll find out soon.
|
||||
return Ok((archive_offset, directory_start, number_of_files));
|
||||
}
|
||||
|
||||
let locator64 = match spec::Zip64CentralDirectoryEndLocator::parse(&mut reader) {
|
||||
Ok(loc) => loc,
|
||||
Err(ZipError::InvalidArchive(_)) => {
|
||||
// No ZIP64 header; that's actually fine. We're done here.
|
||||
return Ok((archive_offset, directory_start, number_of_files));
|
||||
},
|
||||
Err(e) => {
|
||||
// Yikes, a real problem
|
||||
return Err(e);
|
||||
},
|
||||
};
|
||||
|
||||
// If we got here, this is indeed a ZIP64 file.
|
||||
|
||||
if footer.disk_number as u32 != locator64.disk_with_central_directory {
|
||||
return unsupported_zip_error("Support for multi-disk files is not implemented")
|
||||
}
|
||||
|
||||
// We need to reassess `archive_offset`. We know where the ZIP64
|
||||
// central-directory-end structure *should* be, but unfortunately we
|
||||
// don't know how to precisely relate that location to our current
|
||||
// actual offset in the file, since there may be junk at its
|
||||
// beginning. Therefore we need to perform another search, as in
|
||||
// read::CentralDirectoryEnd::find_and_parse, except now we search
|
||||
// forward.
|
||||
|
||||
let search_upper_bound = reader.seek(io::SeekFrom::Current(0))?
|
||||
.checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
|
||||
.ok_or(ZipError::InvalidArchive("File cannot contain ZIP64 central directory end"))?;
|
||||
let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
|
||||
&mut reader,
|
||||
locator64.end_of_central_directory_offset,
|
||||
search_upper_bound)?;
|
||||
|
||||
if footer.disk_number != footer.disk_with_central_directory {
|
||||
return unsupported_zip_error("Support for multi-disk files is not implemented")
|
||||
}
|
||||
|
||||
let directory_start = footer.central_directory_offset + archive_offset;
|
||||
Ok((archive_offset, directory_start, footer.number_of_files as usize))
|
||||
}
|
||||
|
||||
/// Opens a Zip archive and parses the central directory
|
||||
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
|
||||
let (footer, cde_start_pos) = try!(spec::CentralDirectoryEnd::find_and_parse(&mut reader));
|
||||
|
||||
if footer.disk_number != footer.disk_with_central_directory { return unsupported_zip_error("Support for multi-disk files is not implemented") }
|
||||
if footer.disk_number != footer.disk_with_central_directory
|
||||
{
|
||||
return unsupported_zip_error("Support for multi-disk files is not implemented")
|
||||
}
|
||||
|
||||
// Some zip files have data prepended to them, resulting in the offsets all being too small. Get the amount of
|
||||
// error by comparing the actual file position we found the CDE at with the offset recorded in the CDE.
|
||||
let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size)
|
||||
.and_then(|x| x.checked_sub(footer.central_directory_offset))
|
||||
.ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?;
|
||||
|
||||
let directory_start = (footer.central_directory_offset + archive_offset) as u64;
|
||||
let number_of_files = footer.number_of_files_on_this_disk as usize;
|
||||
let (archive_offset, directory_start, number_of_files) =
|
||||
try!(Self::get_directory_counts(&mut reader, &footer, cde_start_pos));
|
||||
|
||||
let mut files = Vec::with_capacity(number_of_files);
|
||||
let mut names_map = HashMap::new();
|
||||
|
@ -132,7 +198,7 @@ impl<R: Read+io::Seek> ZipArchive<R>
|
|||
///
|
||||
/// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size
|
||||
/// of that prepended data.
|
||||
pub fn offset(&self) -> u32 {
|
||||
pub fn offset(&self) -> u64 {
|
||||
self.offset
|
||||
}
|
||||
|
||||
|
@ -198,7 +264,7 @@ impl<R: Read+io::Seek> ZipArchive<R>
|
|||
}
|
||||
}
|
||||
|
||||
fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset: u32) -> ZipResult<ZipFileData>
|
||||
fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset: u64) -> ZipResult<ZipFileData>
|
||||
{
|
||||
// Parse central header
|
||||
let signature = try!(reader.read_u32::<LittleEndian>());
|
||||
|
@ -230,7 +296,7 @@ fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset:
|
|||
let file_comment_raw = try!(ReadPodExt::read_exact(reader, file_comment_length));
|
||||
|
||||
// Account for shifted zip offsets.
|
||||
offset += archive_offset as u64;
|
||||
offset += archive_offset;
|
||||
|
||||
let file_name = match is_utf8
|
||||
{
|
||||
|
@ -399,4 +465,15 @@ mod test {
|
|||
let reader = ZipArchive::new(io::Cursor::new(v));
|
||||
assert!(reader.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zip64_with_leading_junk() {
|
||||
use std::io;
|
||||
use super::ZipArchive;
|
||||
|
||||
let mut v = Vec::new();
|
||||
v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip"));
|
||||
let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
|
||||
assert!(reader.len() == 1);
|
||||
}
|
||||
}
|
||||
|
|
117
src/spec.rs
117
src/spec.rs
|
@ -6,6 +6,8 @@ use podio::{ReadPodExt, WritePodExt, LittleEndian};
|
|||
pub const LOCAL_FILE_HEADER_SIGNATURE : u32 = 0x04034b50;
|
||||
pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE : u32 = 0x02014b50;
|
||||
const CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06054b50;
|
||||
pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06064b50;
|
||||
const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE : u32 = 0x07064b50;
|
||||
|
||||
pub struct CentralDirectoryEnd
|
||||
{
|
||||
|
@ -48,29 +50,32 @@ impl CentralDirectoryEnd
|
|||
})
|
||||
}
|
||||
|
||||
pub fn find_and_parse<T: Read+io::Seek>(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u32)>
|
||||
pub fn find_and_parse<T: Read+io::Seek>(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u64)>
|
||||
{
|
||||
let header_size = 22;
|
||||
let bytes_between_magic_and_comment_size = header_size - 6;
|
||||
let file_length = try!(reader.seek(io::SeekFrom::End(0))) as i64;
|
||||
const HEADER_SIZE: u64 = 22;
|
||||
const BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE: u64 = HEADER_SIZE - 6;
|
||||
let file_length = try!(reader.seek(io::SeekFrom::End(0)));
|
||||
|
||||
let search_upper_bound = ::std::cmp::max(0, file_length - header_size - ::std::u16::MAX as i64);
|
||||
let search_upper_bound = file_length.checked_sub(HEADER_SIZE + ::std::u16::MAX as u64).unwrap_or(0);
|
||||
|
||||
let mut pos = file_length - header_size;
|
||||
let mut pos = file_length - HEADER_SIZE;
|
||||
while pos >= search_upper_bound
|
||||
{
|
||||
try!(reader.seek(io::SeekFrom::Start(pos as u64)));
|
||||
if try!(reader.read_u32::<LittleEndian>()) == CENTRAL_DIRECTORY_END_SIGNATURE
|
||||
{
|
||||
try!(reader.seek(io::SeekFrom::Current(bytes_between_magic_and_comment_size)));
|
||||
let comment_length = try!(reader.read_u16::<LittleEndian>()) as i64;
|
||||
if file_length - pos - header_size == comment_length
|
||||
try!(reader.seek(io::SeekFrom::Current(BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE as i64)));
|
||||
let comment_length = try!(reader.read_u16::<LittleEndian>()) as u64;
|
||||
if file_length - pos - HEADER_SIZE == comment_length
|
||||
{
|
||||
let cde_start_pos = try!(reader.seek(io::SeekFrom::Start(pos as u64))) as u32;
|
||||
let cde_start_pos = try!(reader.seek(io::SeekFrom::Start(pos as u64)));
|
||||
return CentralDirectoryEnd::parse(reader).map(|cde| (cde, cde_start_pos));
|
||||
}
|
||||
}
|
||||
pos -= 1;
|
||||
pos = match pos.checked_sub(1) {
|
||||
Some(p) => p,
|
||||
None => break,
|
||||
};
|
||||
}
|
||||
Err(ZipError::InvalidArchive("Could not find central directory end"))
|
||||
}
|
||||
|
@ -89,3 +94,93 @@ impl CentralDirectoryEnd
|
|||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Zip64CentralDirectoryEndLocator
|
||||
{
|
||||
pub disk_with_central_directory: u32,
|
||||
pub end_of_central_directory_offset: u64,
|
||||
pub number_of_disks: u32,
|
||||
}
|
||||
|
||||
impl Zip64CentralDirectoryEndLocator
|
||||
{
|
||||
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip64CentralDirectoryEndLocator>
|
||||
{
|
||||
let magic = try!(reader.read_u32::<LittleEndian>());
|
||||
if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE
|
||||
{
|
||||
return Err(ZipError::InvalidArchive("Invalid zip64 locator digital signature header"))
|
||||
}
|
||||
let disk_with_central_directory = try!(reader.read_u32::<LittleEndian>());
|
||||
let end_of_central_directory_offset = try!(reader.read_u64::<LittleEndian>());
|
||||
let number_of_disks = try!(reader.read_u32::<LittleEndian>());
|
||||
|
||||
Ok(Zip64CentralDirectoryEndLocator
|
||||
{
|
||||
disk_with_central_directory: disk_with_central_directory,
|
||||
end_of_central_directory_offset: end_of_central_directory_offset,
|
||||
number_of_disks: number_of_disks,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Zip64CentralDirectoryEnd
|
||||
{
|
||||
pub version_made_by: u16,
|
||||
pub version_needed_to_extract: u16,
|
||||
pub disk_number: u32,
|
||||
pub disk_with_central_directory: u32,
|
||||
pub number_of_files_on_this_disk: u64,
|
||||
pub number_of_files: u64,
|
||||
pub central_directory_size: u64,
|
||||
pub central_directory_offset: u64,
|
||||
//pub extensible_data_sector: Vec<u8>, <-- We don't do anything with this at the moment.
|
||||
}
|
||||
|
||||
impl Zip64CentralDirectoryEnd
|
||||
{
|
||||
pub fn find_and_parse<T: Read+io::Seek>(reader: &mut T,
|
||||
nominal_offset: u64,
|
||||
search_upper_bound: u64) -> ZipResult<(Zip64CentralDirectoryEnd, u64)>
|
||||
{
|
||||
let mut pos = nominal_offset;
|
||||
|
||||
while pos <= search_upper_bound
|
||||
{
|
||||
reader.seek(io::SeekFrom::Start(pos))?;
|
||||
|
||||
if reader.read_u32::<LittleEndian>()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE
|
||||
{
|
||||
let archive_offset = pos - nominal_offset;
|
||||
|
||||
let _record_size = try!(reader.read_u64::<LittleEndian>());
|
||||
// We would use this value if we did anything with the "zip64 extensible data sector".
|
||||
|
||||
let version_made_by = try!(reader.read_u16::<LittleEndian>());
|
||||
let version_needed_to_extract = try!(reader.read_u16::<LittleEndian>());
|
||||
let disk_number = try!(reader.read_u32::<LittleEndian>());
|
||||
let disk_with_central_directory = try!(reader.read_u32::<LittleEndian>());
|
||||
let number_of_files_on_this_disk = try!(reader.read_u64::<LittleEndian>());
|
||||
let number_of_files = try!(reader.read_u64::<LittleEndian>());
|
||||
let central_directory_size = try!(reader.read_u64::<LittleEndian>());
|
||||
let central_directory_offset = try!(reader.read_u64::<LittleEndian>());
|
||||
|
||||
return Ok((Zip64CentralDirectoryEnd
|
||||
{
|
||||
version_made_by: version_made_by,
|
||||
version_needed_to_extract: version_needed_to_extract,
|
||||
disk_number: disk_number,
|
||||
disk_with_central_directory: disk_with_central_directory,
|
||||
number_of_files_on_this_disk: number_of_files_on_this_disk,
|
||||
number_of_files: number_of_files,
|
||||
central_directory_size: central_directory_size,
|
||||
central_directory_offset: central_directory_offset,
|
||||
}, archive_offset));
|
||||
}
|
||||
|
||||
pos += 1;
|
||||
}
|
||||
|
||||
Err(ZipError::InvalidArchive("Could not find ZIP64 central directory end"))
|
||||
}
|
||||
}
|
||||
|
|
BIN
tests/data/zip64_demo.zip
Normal file
BIN
tests/data/zip64_demo.zip
Normal file
Binary file not shown.
Loading…
Add table
Reference in a new issue