diff --git a/README.md b/README.md index 2ceeaca7..a594e33b 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Supported compression formats: Currently unsupported zip extensions: -* ZIP64 +* Most of ZIP64, although there is some support for archives with more than 65535 files * Encryption * Multi-disk diff --git a/src/read.rs b/src/read.rs index 752dce1d..cb3f0725 100644 --- a/src/read.rs +++ b/src/read.rs @@ -54,7 +54,7 @@ pub struct ZipArchive reader: R, files: Vec, names_map: HashMap, - offset: u32, + offset: u64, } enum ZipFileReader<'a> { @@ -77,20 +77,86 @@ fn unsupported_zip_error(detail: &'static str) -> ZipResult impl ZipArchive { + /// Get the directory start offset and number of files. This is done in a + /// separate function to ease the control flow design. + fn get_directory_counts(mut reader: &mut R, + footer: &spec::CentralDirectoryEnd, + cde_start_pos: u64) -> ZipResult<(u64, u64, usize)> { + // Some zip files have data prepended to them, resulting in the + // offsets all being too small. Get the amount of error by comparing + // the actual file position we found the CDE at with the offset + // recorded in the CDE. + let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size as u64) + .and_then(|x| x.checked_sub(footer.central_directory_offset as u64)) + .ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?; + + let directory_start = footer.central_directory_offset as u64 + archive_offset; + let number_of_files = footer.number_of_files_on_this_disk as usize; + + // See if there's a ZIP64 footer. The ZIP64 locator if present will + // have its signature 20 bytes in front of the standard footer. The + // standard footer, in turn, is 22+N bytes large, where N is the + // comment length. Therefore: + + if let Err(_) = reader.seek(io::SeekFrom::Current(-(20 + 22 + footer.zip_file_comment.len() as i64))) { + // Empty Zip files will have nothing else so this error might be fine. If + // not, we'll find out soon. + return Ok((archive_offset, directory_start, number_of_files)); + } + + let locator64 = match spec::Zip64CentralDirectoryEndLocator::parse(&mut reader) { + Ok(loc) => loc, + Err(ZipError::InvalidArchive(_)) => { + // No ZIP64 header; that's actually fine. We're done here. + return Ok((archive_offset, directory_start, number_of_files)); + }, + Err(e) => { + // Yikes, a real problem + return Err(e); + }, + }; + + // If we got here, this is indeed a ZIP64 file. + + if footer.disk_number as u32 != locator64.disk_with_central_directory { + return unsupported_zip_error("Support for multi-disk files is not implemented") + } + + // We need to reassess `archive_offset`. We know where the ZIP64 + // central-directory-end structure *should* be, but unfortunately we + // don't know how to precisely relate that location to our current + // actual offset in the file, since there may be junk at its + // beginning. Therefore we need to perform another search, as in + // read::CentralDirectoryEnd::find_and_parse, except now we search + // forward. + + let search_upper_bound = reader.seek(io::SeekFrom::Current(0))? + .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator + .ok_or(ZipError::InvalidArchive("File cannot contain ZIP64 central directory end"))?; + let (footer, archive_offset) = spec::Zip64CentralDirectoryEnd::find_and_parse( + &mut reader, + locator64.end_of_central_directory_offset, + search_upper_bound)?; + + if footer.disk_number != footer.disk_with_central_directory { + return unsupported_zip_error("Support for multi-disk files is not implemented") + } + + let directory_start = footer.central_directory_offset + archive_offset; + Ok((archive_offset, directory_start, footer.number_of_files as usize)) + } + /// Opens a Zip archive and parses the central directory pub fn new(mut reader: R) -> ZipResult> { let (footer, cde_start_pos) = try!(spec::CentralDirectoryEnd::find_and_parse(&mut reader)); - if footer.disk_number != footer.disk_with_central_directory { return unsupported_zip_error("Support for multi-disk files is not implemented") } + if footer.disk_number != footer.disk_with_central_directory + { + return unsupported_zip_error("Support for multi-disk files is not implemented") + } - // Some zip files have data prepended to them, resulting in the offsets all being too small. Get the amount of - // error by comparing the actual file position we found the CDE at with the offset recorded in the CDE. - let archive_offset = cde_start_pos.checked_sub(footer.central_directory_size) - .and_then(|x| x.checked_sub(footer.central_directory_offset)) - .ok_or(ZipError::InvalidArchive("Invalid central directory size or offset"))?; - - let directory_start = (footer.central_directory_offset + archive_offset) as u64; - let number_of_files = footer.number_of_files_on_this_disk as usize; + let (archive_offset, directory_start, number_of_files) = + try!(Self::get_directory_counts(&mut reader, &footer, cde_start_pos)); let mut files = Vec::with_capacity(number_of_files); let mut names_map = HashMap::new(); @@ -132,7 +198,7 @@ impl ZipArchive /// /// Normally this value is zero, but if the zip has arbitrary data prepended to it, then this value will be the size /// of that prepended data. - pub fn offset(&self) -> u32 { + pub fn offset(&self) -> u64 { self.offset } @@ -198,7 +264,7 @@ impl ZipArchive } } -fn central_header_to_zip_file(reader: &mut R, archive_offset: u32) -> ZipResult +fn central_header_to_zip_file(reader: &mut R, archive_offset: u64) -> ZipResult { // Parse central header let signature = try!(reader.read_u32::()); @@ -230,7 +296,7 @@ fn central_header_to_zip_file(reader: &mut R, archive_offset: let file_comment_raw = try!(ReadPodExt::read_exact(reader, file_comment_length)); // Account for shifted zip offsets. - offset += archive_offset as u64; + offset += archive_offset; let file_name = match is_utf8 { @@ -399,4 +465,15 @@ mod test { let reader = ZipArchive::new(io::Cursor::new(v)); assert!(reader.is_err()); } + + #[test] + fn zip64_with_leading_junk() { + use std::io; + use super::ZipArchive; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/zip64_demo.zip")); + let reader = ZipArchive::new(io::Cursor::new(v)).unwrap(); + assert!(reader.len() == 1); + } } diff --git a/src/spec.rs b/src/spec.rs index f70a3175..56f5663f 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -6,6 +6,8 @@ use podio::{ReadPodExt, WritePodExt, LittleEndian}; pub const LOCAL_FILE_HEADER_SIGNATURE : u32 = 0x04034b50; pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE : u32 = 0x02014b50; const CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06054b50; +pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE : u32 = 0x06064b50; +const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE : u32 = 0x07064b50; pub struct CentralDirectoryEnd { @@ -48,29 +50,32 @@ impl CentralDirectoryEnd }) } - pub fn find_and_parse(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u32)> + pub fn find_and_parse(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u64)> { - let header_size = 22; - let bytes_between_magic_and_comment_size = header_size - 6; - let file_length = try!(reader.seek(io::SeekFrom::End(0))) as i64; + const HEADER_SIZE: u64 = 22; + const BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE: u64 = HEADER_SIZE - 6; + let file_length = try!(reader.seek(io::SeekFrom::End(0))); - let search_upper_bound = ::std::cmp::max(0, file_length - header_size - ::std::u16::MAX as i64); + let search_upper_bound = file_length.checked_sub(HEADER_SIZE + ::std::u16::MAX as u64).unwrap_or(0); - let mut pos = file_length - header_size; + let mut pos = file_length - HEADER_SIZE; while pos >= search_upper_bound { try!(reader.seek(io::SeekFrom::Start(pos as u64))); if try!(reader.read_u32::()) == CENTRAL_DIRECTORY_END_SIGNATURE { - try!(reader.seek(io::SeekFrom::Current(bytes_between_magic_and_comment_size))); - let comment_length = try!(reader.read_u16::()) as i64; - if file_length - pos - header_size == comment_length + try!(reader.seek(io::SeekFrom::Current(BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE as i64))); + let comment_length = try!(reader.read_u16::()) as u64; + if file_length - pos - HEADER_SIZE == comment_length { - let cde_start_pos = try!(reader.seek(io::SeekFrom::Start(pos as u64))) as u32; + let cde_start_pos = try!(reader.seek(io::SeekFrom::Start(pos as u64))); return CentralDirectoryEnd::parse(reader).map(|cde| (cde, cde_start_pos)); } } - pos -= 1; + pos = match pos.checked_sub(1) { + Some(p) => p, + None => break, + }; } Err(ZipError::InvalidArchive("Could not find central directory end")) } @@ -89,3 +94,93 @@ impl CentralDirectoryEnd Ok(()) } } + +pub struct Zip64CentralDirectoryEndLocator +{ + pub disk_with_central_directory: u32, + pub end_of_central_directory_offset: u64, + pub number_of_disks: u32, +} + +impl Zip64CentralDirectoryEndLocator +{ + pub fn parse(reader: &mut T) -> ZipResult + { + let magic = try!(reader.read_u32::()); + if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE + { + return Err(ZipError::InvalidArchive("Invalid zip64 locator digital signature header")) + } + let disk_with_central_directory = try!(reader.read_u32::()); + let end_of_central_directory_offset = try!(reader.read_u64::()); + let number_of_disks = try!(reader.read_u32::()); + + Ok(Zip64CentralDirectoryEndLocator + { + disk_with_central_directory: disk_with_central_directory, + end_of_central_directory_offset: end_of_central_directory_offset, + number_of_disks: number_of_disks, + }) + } +} + +pub struct Zip64CentralDirectoryEnd +{ + pub version_made_by: u16, + pub version_needed_to_extract: u16, + pub disk_number: u32, + pub disk_with_central_directory: u32, + pub number_of_files_on_this_disk: u64, + pub number_of_files: u64, + pub central_directory_size: u64, + pub central_directory_offset: u64, + //pub extensible_data_sector: Vec, <-- We don't do anything with this at the moment. +} + +impl Zip64CentralDirectoryEnd +{ + pub fn find_and_parse(reader: &mut T, + nominal_offset: u64, + search_upper_bound: u64) -> ZipResult<(Zip64CentralDirectoryEnd, u64)> + { + let mut pos = nominal_offset; + + while pos <= search_upper_bound + { + reader.seek(io::SeekFrom::Start(pos))?; + + if reader.read_u32::()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE + { + let archive_offset = pos - nominal_offset; + + let _record_size = try!(reader.read_u64::()); + // We would use this value if we did anything with the "zip64 extensible data sector". + + let version_made_by = try!(reader.read_u16::()); + let version_needed_to_extract = try!(reader.read_u16::()); + let disk_number = try!(reader.read_u32::()); + let disk_with_central_directory = try!(reader.read_u32::()); + let number_of_files_on_this_disk = try!(reader.read_u64::()); + let number_of_files = try!(reader.read_u64::()); + let central_directory_size = try!(reader.read_u64::()); + let central_directory_offset = try!(reader.read_u64::()); + + return Ok((Zip64CentralDirectoryEnd + { + version_made_by: version_made_by, + version_needed_to_extract: version_needed_to_extract, + disk_number: disk_number, + disk_with_central_directory: disk_with_central_directory, + number_of_files_on_this_disk: number_of_files_on_this_disk, + number_of_files: number_of_files, + central_directory_size: central_directory_size, + central_directory_offset: central_directory_offset, + }, archive_offset)); + } + + pos += 1; + } + + Err(ZipError::InvalidArchive("Could not find ZIP64 central directory end")) + } +} diff --git a/tests/data/zip64_demo.zip b/tests/data/zip64_demo.zip new file mode 100644 index 00000000..f2ceee30 Binary files /dev/null and b/tests/data/zip64_demo.zip differ