Implement support for reading ZIP64 central-directory-end records
This provides only very basic ZIP64 support, but it allows us to properly read archives with more than 65535 files, so long as none of the individual files need ZIP64 support to be read.
This commit is contained in:
parent
89f33c9348
commit
4a297c32a8
3 changed files with 167 additions and 9 deletions
|
@ -20,7 +20,7 @@ Supported compression formats:
|
|||
|
||||
Currently unsupported zip extensions:
|
||||
|
||||
* ZIP64
|
||||
* Most of ZIP64, although there is some support for archives with more than 65535 files
|
||||
* Encryption
|
||||
* Multi-disk
|
||||
|
||||
|
|
82
src/read.rs
82
src/read.rs
|
@ -77,14 +77,15 @@ fn unsupported_zip_error<T>(detail: &'static str) -> ZipResult<T>
|
|||
|
||||
impl<R: Read+io::Seek> ZipArchive<R>
|
||||
{
|
||||
/// Opens a Zip archive and parses the central directory
|
||||
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
|
||||
let (footer, cde_start_pos) = try!(spec::CentralDirectoryEnd::find_and_parse(&mut reader));
|
||||
|
||||
if footer.disk_number != footer.disk_with_central_directory { return unsupported_zip_error("Support for multi-disk files is not implemented") }
|
||||
|
||||
// Some zip files have data prepended to them, resulting in the offsets all being too small. Get the amount of
|
||||
// error by comparing the actual file position we found the CDE at with the offset recorded in the CDE.
|
||||
/// Get the directory start offset and number of files. This is done in a
|
||||
/// separate function to ease the control flow design.
|
||||
fn get_directory_counts(mut reader: &mut R,
|
||||
footer: &spec::CentralDirectoryEnd,
|
||||
cde_start_pos: u64) -> ZipResult<(u64, u64, usize)> {
|
||||
// Some zip files have data prepended to them, resulting in the
|
||||
// offsets all being too small. Get the amount of error by comparing
|
||||
// the actual file position we found the CDE at with the offset
|
||||
// recorded in the CDE.
|
||||
let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size as u64)
|
||||
.and_then(|x| x.checked_sub(footer.central_directory_offset as u64))
|
||||
.ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?;
|
||||
|
@ -92,6 +93,71 @@ impl<R: Read+io::Seek> ZipArchive<R>
|
|||
let directory_start = footer.central_directory_offset as u64 + archive_offset;
|
||||
let number_of_files = footer.number_of_files_on_this_disk as usize;
|
||||
|
||||
// See if there's a ZIP64 footer. The ZIP64 locator if present will
|
||||
// have its signature 20 bytes in front of the standard footer. The
|
||||
// standard footer, in turn, is 22+N bytes large, where N is the
|
||||
// comment length. Therefore:
|
||||
|
||||
if let Err(_) = reader.seek(io::SeekFrom::Current(-(20 + 22 + footer.zip_file_comment.len() as i64))) {
|
||||
// Empty Zip files will have nothing else so this error might be fine. If
|
||||
// not, we'll find out soon.
|
||||
return Ok((archive_offset, directory_start, number_of_files));
|
||||
}
|
||||
|
||||
let locator64 = match spec::Zip64CentralDirectoryEndLocator::parse(&mut reader) {
|
||||
Ok(loc) => loc,
|
||||
Err(ZipError::InvalidArchive(_)) => {
|
||||
// No ZIP64 header; that's actually fine. We're done here.
|
||||
return Ok((archive_offset, directory_start, number_of_files));
|
||||
},
|
||||
Err(e) => {
|
||||
// Yikes, a real problem
|
||||
return Err(e);
|
||||
},
|
||||
};
|
||||
|
||||
// If we got here, this is indeed a ZIP64 file.
|
||||
|
||||
if footer.disk_number as u32 != locator64.disk_with_central_directory {
|
||||
return unsupported_zip_error("Support for multi-disk files is not implemented")
|
||||
}
|
||||
|
||||
// We need to reassess `archive_offset`. We know where the ZIP64
|
||||
// central-directory-end structure *should* be, but unfortunately we
|
||||
// don't know how to precisely relate that location to our current
|
||||
// actual offset in the file, since there may be junk at its
|
||||
// beginning. Therefore we need to perform another search, as in
|
||||
// read::CentralDirectoryEnd::find_and_parse, except now we search
|
||||
// forward.
|
||||
|
||||
let search_upper_bound = reader.seek(io::SeekFrom::Current(0))?
|
||||
.checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator
|
||||
.ok_or(ZipError::InvalidArchive("File cannot contain ZIP64 central directory end"))?;
|
||||
let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse(
|
||||
&mut reader,
|
||||
locator64.end_of_central_directory_offset,
|
||||
search_upper_bound)?;
|
||||
|
||||
if footer.disk_number != footer.disk_with_central_directory {
|
||||
return unsupported_zip_error("Support for multi-disk files is not implemented")
|
||||
}
|
||||
|
||||
let directory_start = footer.central_directory_offset + archive_offset;
|
||||
Ok((archive_offset, directory_start, footer.number_of_files as usize))
|
||||
}
|
||||
|
||||
/// Opens a Zip archive and parses the central directory
|
||||
pub fn new(mut reader: R) -> ZipResult<ZipArchive<R>> {
|
||||
let (footer, cde_start_pos) = try!(spec::CentralDirectoryEnd::find_and_parse(&mut reader));
|
||||
|
||||
if footer.disk_number != footer.disk_with_central_directory
|
||||
{
|
||||
return unsupported_zip_error("Support for multi-disk files is not implemented")
|
||||
}
|
||||
|
||||
let (archive_offset, directory_start, number_of_files) =
|
||||
try!(Self::get_directory_counts(&mut reader, &footer, cde_start_pos));
|
||||
|
||||
let mut files = Vec::with_capacity(number_of_files);
|
||||
let mut names_map = HashMap::new();
|
||||
|
||||
|
|
92
src/spec.rs
92
src/spec.rs
|
@ -6,6 +6,8 @@ use podio::{ReadPodExt, WritePodExt, LittleEndian};
|
|||
pub const LOCAL_FILE_HEADER_SIGNATURE : u32 = 0x04034b50;
|
||||
pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE : u32 = 0x02014b50;
|
||||
const CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06054b50;
|
||||
pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06064b50;
|
||||
const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE : u32 = 0x07064b50;
|
||||
|
||||
pub struct CentralDirectoryEnd
|
||||
{
|
||||
|
@ -92,3 +94,93 @@ impl CentralDirectoryEnd
|
|||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Zip64CentralDirectoryEndLocator
|
||||
{
|
||||
pub disk_with_central_directory: u32,
|
||||
pub end_of_central_directory_offset: u64,
|
||||
pub number_of_disks: u32,
|
||||
}
|
||||
|
||||
impl Zip64CentralDirectoryEndLocator
|
||||
{
|
||||
pub fn parse<T: Read>(reader: &mut T) -> ZipResult<Zip64CentralDirectoryEndLocator>
|
||||
{
|
||||
let magic = try!(reader.read_u32::<LittleEndian>());
|
||||
if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE
|
||||
{
|
||||
return Err(ZipError::InvalidArchive("Invalid zip64 locator digital signature header"))
|
||||
}
|
||||
let disk_with_central_directory = try!(reader.read_u32::<LittleEndian>());
|
||||
let end_of_central_directory_offset = try!(reader.read_u64::<LittleEndian>());
|
||||
let number_of_disks = try!(reader.read_u32::<LittleEndian>());
|
||||
|
||||
Ok(Zip64CentralDirectoryEndLocator
|
||||
{
|
||||
disk_with_central_directory: disk_with_central_directory,
|
||||
end_of_central_directory_offset: end_of_central_directory_offset,
|
||||
number_of_disks: number_of_disks,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Zip64CentralDirectoryEnd
|
||||
{
|
||||
pub version_made_by: u16,
|
||||
pub version_needed_to_extract: u16,
|
||||
pub disk_number: u32,
|
||||
pub disk_with_central_directory: u32,
|
||||
pub number_of_files_on_this_disk: u64,
|
||||
pub number_of_files: u64,
|
||||
pub central_directory_size: u64,
|
||||
pub central_directory_offset: u64,
|
||||
//pub extensible_data_sector: Vec<u8>, <-- We don't do anything with this at the moment.
|
||||
}
|
||||
|
||||
impl Zip64CentralDirectoryEnd
|
||||
{
|
||||
pub fn find_and_parse<T: Read+io::Seek>(reader: &mut T,
|
||||
nominal_offset: u64,
|
||||
search_upper_bound: u64) -> ZipResult<(Zip64CentralDirectoryEnd, u64)>
|
||||
{
|
||||
let mut pos = nominal_offset;
|
||||
|
||||
while pos <= search_upper_bound
|
||||
{
|
||||
reader.seek(io::SeekFrom::Start(pos))?;
|
||||
|
||||
if reader.read_u32::<LittleEndian>()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE
|
||||
{
|
||||
let archive_offset = pos - nominal_offset;
|
||||
|
||||
let _record_size = try!(reader.read_u64::<LittleEndian>());
|
||||
// We would use this value if we did anything with the "zip64 extensible data sector".
|
||||
|
||||
let version_made_by = try!(reader.read_u16::<LittleEndian>());
|
||||
let version_needed_to_extract = try!(reader.read_u16::<LittleEndian>());
|
||||
let disk_number = try!(reader.read_u32::<LittleEndian>());
|
||||
let disk_with_central_directory = try!(reader.read_u32::<LittleEndian>());
|
||||
let number_of_files_on_this_disk = try!(reader.read_u64::<LittleEndian>());
|
||||
let number_of_files = try!(reader.read_u64::<LittleEndian>());
|
||||
let central_directory_size = try!(reader.read_u64::<LittleEndian>());
|
||||
let central_directory_offset = try!(reader.read_u64::<LittleEndian>());
|
||||
|
||||
return Ok((Zip64CentralDirectoryEnd
|
||||
{
|
||||
version_made_by: version_made_by,
|
||||
version_needed_to_extract: version_needed_to_extract,
|
||||
disk_number: disk_number,
|
||||
disk_with_central_directory: disk_with_central_directory,
|
||||
number_of_files_on_this_disk: number_of_files_on_this_disk,
|
||||
number_of_files: number_of_files,
|
||||
central_directory_size: central_directory_size,
|
||||
central_directory_offset: central_directory_offset,
|
||||
}, archive_offset));
|
||||
}
|
||||
|
||||
pos += 1;
|
||||
}
|
||||
|
||||
Err(ZipError::InvalidArchive("Could not find ZIP64 central directory end"))
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue