From 89f33c93482d89f01dd7a97384da348f208c20b4 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sat, 12 Aug 2017 13:38:07 -0400 Subject: [PATCH 1/3] Use u64's for internal file offsets. These are better aligned with Rust's APIs, and lay the groundwork for more convenient support of Zip64 files. --- src/read.rs | 14 +++++++------- src/spec.rs | 25 ++++++++++++++----------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/read.rs b/src/read.rs index 752dce1d..c8980303 100644 --- a/src/read.rs +++ b/src/read.rs @@ -54,7 +54,7 @@ pub struct ZipArchive reader: R, files: Vec, names_map: HashMap, - offset: u32, + offset: u64, } enum ZipFileReader<'a> { @@ -85,11 +85,11 @@ impl ZipArchive // Some zip files have data prepended to them, resulting in the offsets all being too small. Get the amount of // error by comparing the actual file position we found the CDE at with the offset recorded in the CDE. - let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size) - .and_then(|x| x.checked_sub(footer.central_directory_offset)) + let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size as u64) + .and_then(|x| x.checked_sub(footer.central_directory_offset as u64)) .ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?; - let directory_start = (footer.central_directory_offset + archive_offset) as u64; + let directory_start = footer.central_directory_offset as u64 + archive_offset; let number_of_files = footer.number_of_files_on_this_disk as usize; let mut files = Vec::with_capacity(number_of_files); @@ -132,7 +132,7 @@ impl ZipArchive /// /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size /// of that prepended data. - pub fn offset(&self) -> u32 { + pub fn offset(&self) -> u64 { self.offset } @@ -198,7 +198,7 @@ impl ZipArchive } } -fn central_header_to_zip_file(reader: &mut R, archive_offset: u32) -> ZipResult +fn central_header_to_zip_file(reader: &mut R, archive_offset: u64) -> ZipResult { // Parse central header let signature = try!(reader.read_u32::()); @@ -230,7 +230,7 @@ fn central_header_to_zip_file(reader: &mut R, archive_offset: let file_comment_raw = try!(ReadPodExt::read_exact(reader, file_comment_length)); // Account for shifted zip offsets. - offset += archive_offset as u64; + offset += archive_offset; let file_name = match is_utf8 { diff --git a/src/spec.rs b/src/spec.rs index f70a3175..e0150375 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -48,29 +48,32 @@ impl CentralDirectoryEnd }) } - pub fn find_and_parse(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u32)> + pub fn find_and_parse(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u64)> { - let header_size = 22; - let bytes_between_magic_and_comment_size = header_size - 6; - let file_length = try!(reader.seek(io::SeekFrom::End(0))) as i64; + const HEADER_SIZE: u64 = 22; + const BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE: u64 = HEADER_SIZE - 6; + let file_length = try!(reader.seek(io::SeekFrom::End(0))); - let search_upper_bound = ::std::cmp::max(0, file_length - header_size - ::std::u16::MAX as i64); + let search_upper_bound = file_length.checked_sub(HEADER_SIZE + ::std::u16::MAX as u64).unwrap_or(0); - let mut pos = file_length - header_size; + let mut pos = file_length - HEADER_SIZE; while pos >= search_upper_bound { try!(reader.seek(io::SeekFrom::Start(pos as u64))); if try!(reader.read_u32::()) == CENTRAL_DIRECTORY_END_SIGNATURE { - try!(reader.seek(io::SeekFrom::Current(bytes_between_magic_and_comment_size))); - let comment_length = try!(reader.read_u16::()) as i64; - if file_length - pos - header_size == comment_length + try!(reader.seek(io::SeekFrom::Current(BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE as i64))); + let comment_length = try!(reader.read_u16::()) as u64; + if file_length - pos - HEADER_SIZE == comment_length { - let cde_start_pos = try!(reader.seek(io::SeekFrom::Start(pos as u64))) as u32; + let cde_start_pos = try!(reader.seek(io::SeekFrom::Start(pos as u64))); return CentralDirectoryEnd::parse(reader).map(|cde| (cde, cde_start_pos)); } } - pos -= 1; + pos = match pos.checked_sub(1) { + Some(p) => p, + None => break, + }; } Err(ZipError::InvalidArchive("Could not find central directory end")) } From 4a297c32a8cb18af370b9ab09d168bff04a247e5 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sat, 12 Aug 2017 14:06:45 -0400 Subject: [PATCH 2/3] Implement support for reading ZIP64 central-directory-end records This provides only very basic ZIP64 support, but it allows us to properly read archives with more than 65535 files, so long as none of the individual files need ZIP64 support to be read. --- README.md | 2 +- src/read.rs | 82 ++++++++++++++++++++++++++++++++++++++++++----- src/spec.rs | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 167 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 2ceeaca7..a594e33b 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Supported compression formats: Currently unsupported zip extensions: -* ZIP64 +* Most of ZIP64, although there is some support for archives with more than 65535 files * Encryption * Multi-disk diff --git a/src/read.rs b/src/read.rs index c8980303..f8ac295f 100644 --- a/src/read.rs +++ b/src/read.rs @@ -77,14 +77,15 @@ fn unsupported_zip_error(detail: &'static str) -> ZipResult impl ZipArchive { - /// Opens a Zip archive and parses the central directory - pub fn new(mut reader: R) -> ZipResult> { - let (footer, cde_start_pos) = try!(spec::CentralDirectoryEnd::find_and_parse(&mut reader)); - - if footer.disk_number != footer.disk_with_central_directory { return unsupported_zip_error("Support for multi-disk files is not implemented") } - - // Some zip files have data prepended to them, resulting in the offsets all being too small. Get the amount of - // error by comparing the actual file position we found the CDE at with the offset recorded in the CDE. + /// Get the directory start offset and number of files. This is done in a + /// separate function to ease the control flow design. + fn get_directory_counts(mut reader: &mut R, + footer: &spec::CentralDirectoryEnd, + cde_start_pos: u64) -> ZipResult<(u64, u64, usize)> { + // Some zip files have data prepended to them, resulting in the + // offsets all being too small. Get the amount of error by comparing + // the actual file position we found the CDE at with the offset + // recorded in the CDE. let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size as u64) .and_then(|x| x.checked_sub(footer.central_directory_offset as u64)) .ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?; @@ -92,6 +93,71 @@ impl ZipArchive let directory_start = footer.central_directory_offset as u64 + archive_offset; let number_of_files = footer.number_of_files_on_this_disk as usize; + // See if there's a ZIP64 footer. The ZIP64 locator if present will + // have its signature 20 bytes in front of the standard footer. The + // standard footer, in turn, is 22+N bytes large, where N is the + // comment length. Therefore: + + if let Err(_) = reader.seek(io::SeekFrom::Current(-(20 + 22 + footer.zip_file_comment.len() as i64))) { + // Empty Zip files will have nothing else so this error might be fine. If + // not, we'll find out soon. + return Ok((archive_offset, directory_start, number_of_files)); + } + + let locator64 = match spec::Zip64CentralDirectoryEndLocator::parse(&mut reader) { + Ok(loc) => loc, + Err(ZipError::InvalidArchive(_)) => { + // No ZIP64 header; that's actually fine. We're done here. + return Ok((archive_offset, directory_start, number_of_files)); + }, + Err(e) => { + // Yikes, a real problem + return Err(e); + }, + }; + + // If we got here, this is indeed a ZIP64 file. + + if footer.disk_number as u32 != locator64.disk_with_central_directory { + return unsupported_zip_error("Support for multi-disk files is not implemented") + } + + // We need to reassess `archive_offset`. We know where the ZIP64 + // central-directory-end structure *should* be, but unfortunately we + // don't know how to precisely relate that location to our current + // actual offset in the file, since there may be junk at its + // beginning. Therefore we need to perform another search, as in + // read::CentralDirectoryEnd::find_and_parse, except now we search + // forward. + + let search_upper_bound = reader.seek(io::SeekFrom::Current(0))? + .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator + .ok_or(ZipError::InvalidArchive("File cannot contain ZIP64 central directory end"))?; + let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse( + &mut reader, + locator64.end_of_central_directory_offset, + search_upper_bound)?; + + if footer.disk_number != footer.disk_with_central_directory { + return unsupported_zip_error("Support for multi-disk files is not implemented") + } + + let directory_start = footer.central_directory_offset + archive_offset; + Ok((archive_offset, directory_start, footer.number_of_files as usize)) + } + + /// Opens a Zip archive and parses the central directory + pub fn new(mut reader: R) -> ZipResult> { + let (footer, cde_start_pos) = try!(spec::CentralDirectoryEnd::find_and_parse(&mut reader)); + + if footer.disk_number != footer.disk_with_central_directory + { + return unsupported_zip_error("Support for multi-disk files is not implemented") + } + + let (archive_offset, directory_start, number_of_files) = + try!(Self::get_directory_counts(&mut reader, &footer, cde_start_pos)); + let mut files = Vec::with_capacity(number_of_files); let mut names_map = HashMap::new(); diff --git a/src/spec.rs b/src/spec.rs index e0150375..56f5663f 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -6,6 +6,8 @@ use podio::{ReadPodExt, WritePodExt, LittleEndian}; pub const LOCAL_FILE_HEADER_SIGNATURE : u32 = 0x04034b50; pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE : u32 = 0x02014b50; const CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06054b50; +pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06064b50; +const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE : u32 = 0x07064b50; pub struct CentralDirectoryEnd { @@ -92,3 +94,93 @@ impl CentralDirectoryEnd Ok(()) } } + +pub struct Zip64CentralDirectoryEndLocator +{ + pub disk_with_central_directory: u32, + pub end_of_central_directory_offset: u64, + pub number_of_disks: u32, +} + +impl Zip64CentralDirectoryEndLocator +{ + pub fn parse(reader: &mut T) -> ZipResult + { + let magic = try!(reader.read_u32::()); + if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE + { + return Err(ZipError::InvalidArchive("Invalid zip64 locator digital signature header")) + } + let disk_with_central_directory = try!(reader.read_u32::()); + let end_of_central_directory_offset = try!(reader.read_u64::()); + let number_of_disks = try!(reader.read_u32::()); + + Ok(Zip64CentralDirectoryEndLocator + { + disk_with_central_directory: disk_with_central_directory, + end_of_central_directory_offset: end_of_central_directory_offset, + number_of_disks: number_of_disks, + }) + } +} + +pub struct Zip64CentralDirectoryEnd +{ + pub version_made_by: u16, + pub version_needed_to_extract: u16, + pub disk_number: u32, + pub disk_with_central_directory: u32, + pub number_of_files_on_this_disk: u64, + pub number_of_files: u64, + pub central_directory_size: u64, + pub central_directory_offset: u64, + //pub extensible_data_sector: Vec, <-- We don't do anything with this at the moment. +} + +impl Zip64CentralDirectoryEnd +{ + pub fn find_and_parse(reader: &mut T, + nominal_offset: u64, + search_upper_bound: u64) -> ZipResult<(Zip64CentralDirectoryEnd, u64)> + { + let mut pos = nominal_offset; + + while pos <= search_upper_bound + { + reader.seek(io::SeekFrom::Start(pos))?; + + if reader.read_u32::()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE + { + let archive_offset = pos - nominal_offset; + + let _record_size = try!(reader.read_u64::()); + // We would use this value if we did anything with the "zip64 extensible data sector". + + let version_made_by = try!(reader.read_u16::()); + let version_needed_to_extract = try!(reader.read_u16::()); + let disk_number = try!(reader.read_u32::()); + let disk_with_central_directory = try!(reader.read_u32::()); + let number_of_files_on_this_disk = try!(reader.read_u64::()); + let number_of_files = try!(reader.read_u64::()); + let central_directory_size = try!(reader.read_u64::()); + let central_directory_offset = try!(reader.read_u64::()); + + return Ok((Zip64CentralDirectoryEnd + { + version_made_by: version_made_by, + version_needed_to_extract: version_needed_to_extract, + disk_number: disk_number, + disk_with_central_directory: disk_with_central_directory, + number_of_files_on_this_disk: number_of_files_on_this_disk, + number_of_files: number_of_files, + central_directory_size: central_directory_size, + central_directory_offset: central_directory_offset, + }, archive_offset)); + } + + pos += 1; + } + + Err(ZipError::InvalidArchive("Could not find ZIP64 central directory end")) + } +} From f85b5cb7b873144692c4f84d6f8d89b3c097bfc6 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sat, 12 Aug 2017 14:30:37 -0400 Subject: [PATCH 3/3] Add a test for ZIP64 with leading junk. I constructed this file using a hack from the Zip manpage: if the input to a Zip compression command is streamed on standard input, the output is given in ZIP64 format since the tool doesn't know how big the input will be. I modified the resulting file by adding some leading junk text and editing the non-ZIP64 end-of-central-directory structure to have 0xFFFF for its "number of files" parameters, to help the test demonstrate that the ZIP64 data are being properly read. (0xFFFF is the value used in the non-ZIP64 structure if the archive actually has more than 65535 files.) --- src/read.rs | 11 +++++++++++ tests/data/zip64_demo.zip | Bin 0 -> 224 bytes 2 files changed, 11 insertions(+) create mode 100644 tests/data/zip64_demo.zip diff --git a/src/read.rs b/src/read.rs index f8ac295f..cb3f0725 100644 --- a/src/read.rs +++ b/src/read.rs @@ -465,4 +465,15 @@ mod test { let reader = ZipArchive::new(io::Cursor::new(v)); assert!(reader.is_err()); } + + #[test] + fn zip64_with_leading_junk() { + use std::io; + use super::ZipArchive; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip")); + let reader = ZipArchive::new(io::Cursor::new(v)).unwrap(); + assert!(reader.len() == 1); + } } diff --git a/tests/data/zip64_demo.zip b/tests/data/zip64_demo.zip new file mode 100644 index 0000000000000000000000000000000000000000..f2ceee303ee32da77509bd153a914aaeae0ce1ad GIT binary patch literal 224 zcmeZCO-#wmOIOG$&CAx~3h-uT(PaRERfRm>63auY|3d*Ig9w8zBZB|~A6S|JPJ5*0 z