diff --git a/README.md b/README.md index 2ceeaca7..a594e33b 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Supported compression formats: Currently unsupported zip extensions: -* ZIP64 +* Most of ZIP64, although there is some support for archives with more than 65535 files * Encryption * Multi-disk diff --git a/src/read.rs b/src/read.rs index c8980303..f8ac295f 100644 --- a/src/read.rs +++ b/src/read.rs @@ -77,14 +77,15 @@ fn unsupported_zip_error(detail: &'static str) -> ZipResult impl ZipArchive { - /// Opens a Zip archive and parses the central directory - pub fn new(mut reader: R) -> ZipResult> { - let (footer, cde_start_pos) = try!(spec::CentralDirectoryEnd::find_and_parse(&mut reader)); - - if footer.disk_number != footer.disk_with_central_directory { return unsupported_zip_error("Support for multi-disk files is not implemented") } - - // Some zip files have data prepended to them, resulting in the offsets all being too small. Get the amount of - // error by comparing the actual file position we found the CDE at with the offset recorded in the CDE. + /// Get the directory start offset and number of files. This is done in a + /// separate function to ease the control flow design. + fn get_directory_counts(mut reader: &mut R, + footer: &spec::CentralDirectoryEnd, + cde_start_pos: u64) -> ZipResult<(u64, u64, usize)> { + // Some zip files have data prepended to them, resulting in the + // offsets all being too small. Get the amount of error by comparing + // the actual file position we found the CDE at with the offset + // recorded in the CDE. let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size as u64) .and_then(|x| x.checked_sub(footer.central_directory_offset as u64)) .ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?; @@ -92,6 +93,71 @@ impl ZipArchive let directory_start = footer.central_directory_offset as u64 + archive_offset; let number_of_files = footer.number_of_files_on_this_disk as usize; + // See if there's a ZIP64 footer. The ZIP64 locator if present will + // have its signature 20 bytes in front of the standard footer. The + // standard footer, in turn, is 22+N bytes large, where N is the + // comment length. Therefore: + + if let Err(_) = reader.seek(io::SeekFrom::Current(-(20 + 22 + footer.zip_file_comment.len() as i64))) { + // Empty Zip files will have nothing else so this error might be fine. If + // not, we'll find out soon. + return Ok((archive_offset, directory_start, number_of_files)); + } + + let locator64 = match spec::Zip64CentralDirectoryEndLocator::parse(&mut reader) { + Ok(loc) => loc, + Err(ZipError::InvalidArchive(_)) => { + // No ZIP64 header; that's actually fine. We're done here. + return Ok((archive_offset, directory_start, number_of_files)); + }, + Err(e) => { + // Yikes, a real problem + return Err(e); + }, + }; + + // If we got here, this is indeed a ZIP64 file. + + if footer.disk_number as u32 != locator64.disk_with_central_directory { + return unsupported_zip_error("Support for multi-disk files is not implemented") + } + + // We need to reassess `archive_offset`. We know where the ZIP64 + // central-directory-end structure *should* be, but unfortunately we + // don't know how to precisely relate that location to our current + // actual offset in the file, since there may be junk at its + // beginning. Therefore we need to perform another search, as in + // read::CentralDirectoryEnd::find_and_parse, except now we search + // forward. + + let search_upper_bound = reader.seek(io::SeekFrom::Current(0))? + .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator + .ok_or(ZipError::InvalidArchive("File cannot contain ZIP64 central directory end"))?; + let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse( + &mut reader, + locator64.end_of_central_directory_offset, + search_upper_bound)?; + + if footer.disk_number != footer.disk_with_central_directory { + return unsupported_zip_error("Support for multi-disk files is not implemented") + } + + let directory_start = footer.central_directory_offset + archive_offset; + Ok((archive_offset, directory_start, footer.number_of_files as usize)) + } + + /// Opens a Zip archive and parses the central directory + pub fn new(mut reader: R) -> ZipResult> { + let (footer, cde_start_pos) = try!(spec::CentralDirectoryEnd::find_and_parse(&mut reader)); + + if footer.disk_number != footer.disk_with_central_directory + { + return unsupported_zip_error("Support for multi-disk files is not implemented") + } + + let (archive_offset, directory_start, number_of_files) = + try!(Self::get_directory_counts(&mut reader, &footer, cde_start_pos)); + let mut files = Vec::with_capacity(number_of_files); let mut names_map = HashMap::new(); diff --git a/src/spec.rs b/src/spec.rs index e0150375..56f5663f 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -6,6 +6,8 @@ use podio::{ReadPodExt, WritePodExt, LittleEndian}; pub const LOCAL_FILE_HEADER_SIGNATURE : u32 = 0x04034b50; pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE : u32 = 0x02014b50; const CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06054b50; +pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06064b50; +const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE : u32 = 0x07064b50; pub struct CentralDirectoryEnd { @@ -92,3 +94,93 @@ impl CentralDirectoryEnd Ok(()) } } + +pub struct Zip64CentralDirectoryEndLocator +{ + pub disk_with_central_directory: u32, + pub end_of_central_directory_offset: u64, + pub number_of_disks: u32, +} + +impl Zip64CentralDirectoryEndLocator +{ + pub fn parse(reader: &mut T) -> ZipResult + { + let magic = try!(reader.read_u32::()); + if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE + { + return Err(ZipError::InvalidArchive("Invalid zip64 locator digital signature header")) + } + let disk_with_central_directory = try!(reader.read_u32::()); + let end_of_central_directory_offset = try!(reader.read_u64::()); + let number_of_disks = try!(reader.read_u32::()); + + Ok(Zip64CentralDirectoryEndLocator + { + disk_with_central_directory: disk_with_central_directory, + end_of_central_directory_offset: end_of_central_directory_offset, + number_of_disks: number_of_disks, + }) + } +} + +pub struct Zip64CentralDirectoryEnd +{ + pub version_made_by: u16, + pub version_needed_to_extract: u16, + pub disk_number: u32, + pub disk_with_central_directory: u32, + pub number_of_files_on_this_disk: u64, + pub number_of_files: u64, + pub central_directory_size: u64, + pub central_directory_offset: u64, + //pub extensible_data_sector: Vec, <-- We don't do anything with this at the moment. +} + +impl Zip64CentralDirectoryEnd +{ + pub fn find_and_parse(reader: &mut T, + nominal_offset: u64, + search_upper_bound: u64) -> ZipResult<(Zip64CentralDirectoryEnd, u64)> + { + let mut pos = nominal_offset; + + while pos <= search_upper_bound + { + reader.seek(io::SeekFrom::Start(pos))?; + + if reader.read_u32::()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE + { + let archive_offset = pos - nominal_offset; + + let _record_size = try!(reader.read_u64::()); + // We would use this value if we did anything with the "zip64 extensible data sector". + + let version_made_by = try!(reader.read_u16::()); + let version_needed_to_extract = try!(reader.read_u16::()); + let disk_number = try!(reader.read_u32::()); + let disk_with_central_directory = try!(reader.read_u32::()); + let number_of_files_on_this_disk = try!(reader.read_u64::()); + let number_of_files = try!(reader.read_u64::()); + let central_directory_size = try!(reader.read_u64::()); + let central_directory_offset = try!(reader.read_u64::()); + + return Ok((Zip64CentralDirectoryEnd + { + version_made_by: version_made_by, + version_needed_to_extract: version_needed_to_extract, + disk_number: disk_number, + disk_with_central_directory: disk_with_central_directory, + number_of_files_on_this_disk: number_of_files_on_this_disk, + number_of_files: number_of_files, + central_directory_size: central_directory_size, + central_directory_offset: central_directory_offset, + }, archive_offset)); + } + + pos += 1; + } + + Err(ZipError::InvalidArchive("Could not find ZIP64 central directory end")) + } +}