From 38d16998539f96ea9641d6cab6fbf7fdb2d0b07e Mon Sep 17 00:00:00 2001 From: Mathijs van de Nes Date: Sat, 16 Jun 2018 14:14:34 +0200 Subject: [PATCH] Improve reading from non-seekable streams You can now repeatedly call a function to iterate over all files in a zip. This may give some suboptimal results, but is useful when dealing with an incoming data stream. --- examples/stdin_info.rs | 26 ++++++++ src/read.rs | 136 +++++++++++++++++++++++++++++++++-------- src/types.rs | 2 +- 3 files changed, 138 insertions(+), 26 deletions(-) create mode 100644 examples/stdin_info.rs diff --git a/examples/stdin_info.rs b/examples/stdin_info.rs new file mode 100644 index 00000000..2ebe4a53 --- /dev/null +++ b/examples/stdin_info.rs @@ -0,0 +1,26 @@ +extern crate zip; + +use std::io; + +fn main() { + std::process::exit(real_main()); +} + +fn real_main() -> i32 { + let stdin = io::stdin(); + let mut stdin_handle = stdin.lock(); + + loop { + match zip::read::read_zipfile_from_stream(&mut stdin_handle) { + Ok(None) => break, + Ok(Some(file)) => { + println!("{}: {} bytes ({} bytes packed)", file.name(), file.size(), file.compressed_size()); + }, + Err(e) => { + println!("Error encountered while reading zip: {:?}", e); + return 1; + }, + } + } + return 0; +} diff --git a/src/read.rs b/src/read.rs index e4309b3d..1f4a4eb1 100644 --- a/src/read.rs +++ b/src/read.rs @@ -7,6 +7,7 @@ use result::{ZipResult, ZipError}; use std::io; use std::io::prelude::*; use std::collections::HashMap; +use std::borrow::Cow; use podio::{ReadPodExt, LittleEndian}; use types::{ZipFileData, System}; @@ -86,7 +87,7 @@ enum ZipFileReader<'a> { /// A struct for reading a zip file pub struct ZipFile<'a> { - data: &'a ZipFileData, + data: Cow<'a, ZipFileData>, reader: ZipFileReader<'a>, } @@ -285,7 +286,7 @@ impl ZipArchive try!(self.reader.seek(io::SeekFrom::Start(pos))); let limit_reader = (self.reader.by_ref() as &mut Read).take(data.compressed_size); - Ok(ZipFile { reader: try!(make_reader(data.compression_method, data.crc32, limit_reader)), data: data }) + Ok(ZipFile { reader: try!(make_reader(data.compression_method, data.crc32, limit_reader)), data: Cow::Borrowed(data) }) } /// Unwrap and return the inner reader object @@ -389,7 +390,7 @@ fn central_header_to_zip_file(reader: &mut R, archive_offset: Ok(result) } -fn parse_extra_field(_file: &mut ZipFileData, data: &[u8]) -> ZipResult<()> +fn parse_extra_field(file: &mut ZipFileData, data: &[u8]) -> ZipResult<()> { let mut reader = io::Cursor::new(data); @@ -399,7 +400,14 @@ fn parse_extra_field(_file: &mut ZipFileData, data: &[u8]) -> ZipResult<()> let len = try!(reader.read_u16::()); match kind { - _ => try!(reader.seek(io::SeekFrom::Current(len as i64))), + // Zip64 extended information extra field + 0x0001 => { + file.uncompressed_size = try!(reader.read_u64::()); + file.compressed_size = try!(reader.read_u64::()); + try!(reader.read_u64::()); // relative header offset + try!(reader.read_u32::()); // disk start number + }, + _ => { try!(reader.seek(io::SeekFrom::Current(len as i64))); }, }; } Ok(()) @@ -459,6 +467,10 @@ impl<'a> ZipFile<'a> { } /// Get unix mode for the file pub fn unix_mode(&self) -> Option { + if self.data.external_attributes == 0 { + return None; + } + match self.data.system { System::Unix => { Some(self.data.external_attributes >> 16) @@ -496,33 +508,54 @@ impl<'a> Read for ZipFile<'a> { } } -pub struct ZipEntry<'a> { - pub name: String, - pub modified: ::time::Tm, - reader: ZipFileReader<'a>, -} - -impl<'a> ZipEntry<'a> { - pub fn get_reader(&mut self) -> &mut Read { - get_reader(&mut self.reader) +impl<'a> Drop for ZipFile<'a> { + fn drop(&mut self) { + // self.data is Owned, this reader is constructed by a streaming reader. + // In this case, we want to exhaust the reader so that the next file is accessible. + if let Cow::Owned(_) = self.data { + let mut buffer = [0; 1<<16]; + let reader = get_reader(&mut self.reader); + loop { + match reader.read(&mut buffer) { + Ok(0) => break, + Ok(_) => (), + Err(e) => panic!("Could not consume all of the output of the current ZipFile: {:?}", e), + } + } + } } } -pub fn read_single<'a, R: io::Read>(reader: &'a mut R) -> ZipResult> { +/// Read ZipFile structures from a non-seekable reader. +/// +/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions +/// as some information will be missing when reading this manner. +/// +/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is +/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory +/// is encountered. No more files should be read after this. +/// +/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after +/// the structure is done. +/// +/// Missing fields are: +/// * `comment`: set to an empty string +/// * `data_start`: set to 0 +/// * `external_attributes`: `unix_mode()`: will return None +pub fn read_zipfile_from_stream<'a, R: io::Read>(reader: &'a mut R) -> ZipResult> { let signature = try!(reader.read_u32::()); - if signature != spec::LOCAL_FILE_HEADER_SIGNATURE - { - return if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { - Err(ZipError::InvalidArchive("Invalid local file header")) - } else { - Ok(None) - }; + + match signature { + spec::LOCAL_FILE_HEADER_SIGNATURE => (), + spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None), + _ => return Err(ZipError::InvalidArchive("Invalid local file header")), } let version_made_by = try!(reader.read_u16::()); let flags = try!(reader.read_u16::()); let encrypted = flags & 1 == 1; let is_utf8 = flags & (1 << 11) != 0; + let using_data_descriptor = flags & (1 << 3) != 0; let compression_method = CompressionMethod::from_u16(try!(reader.read_u16::())); let last_mod_time = try!(reader.read_u16::()); let last_mod_date = try!(reader.read_u16::()); @@ -541,11 +574,48 @@ pub fn read_single<'a, R: io::Read>(reader: &'a mut R) -> ZipResult file_name_raw.clone().from_cp437(), }; - let limit_reader = (reader as &'a mut io::Read).take(compressed_size as u64); + let mut result = ZipFileData + { + system: System::from_u8((version_made_by >> 8) as u8), + version_made_by: version_made_by as u8, + encrypted: encrypted, + compression_method: compression_method, + last_modified_time: ::time::Tm::from_msdos(MsDosDateTime::new(last_mod_time, last_mod_date)).unwrap_or(TM_1980_01_01), + crc32: crc32, + compressed_size: compressed_size as u64, + uncompressed_size: uncompressed_size as u64, + file_name: file_name, + file_name_raw: file_name_raw, + file_comment: String::new(), // file comment is only available in the central directory + // header_start and data start are not available, but also don't matter, since seeking is + // not available. + header_start: 0, + data_start: 0, + // The external_attributes field is only available in the central directory. + // We set this to zero, which should be valid as the docs state 'If input came + // from standard input, this field is set to zero.' + external_attributes: 0, + }; - Ok(Some(ZipEntry { name: file_name, - modified: try!(::time::Tm::from_msdos(MsDosDateTime::new(last_mod_time, last_mod_date))), - reader: try!(make_reader(compression_method, crc32, limit_reader)) + match parse_extra_field(&mut result, &extra_field) { + Ok(..) | Err(ZipError::Io(..)) => {}, + Err(e) => try!(Err(e)), + } + + if encrypted { + return unsupported_zip_error("Encrypted files are not supported") + } + if using_data_descriptor { + return unsupported_zip_error("The file length is not available in the local header"); + } + + let limit_reader = (reader as &'a mut io::Read).take(result.compressed_size as u64); + + let result_crc32 = result.crc32; + let result_compression_method = result.compression_method; + Ok(Some(ZipFile { + data: Cow::Owned(result), + reader: try!(make_reader(result_compression_method, result_crc32, limit_reader)) })) } @@ -583,4 +653,20 @@ mod test { let reader = ZipArchive::new(io::Cursor::new(v)).unwrap(); assert!(reader.comment == b"zip-rs"); } + + #[test] + fn zip_read_streaming() { + use std::io; + use super::read_zipfile_from_stream; + + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip")); + let mut reader = io::Cursor::new(v); + loop { + match read_zipfile_from_stream(&mut reader).unwrap() { + None => break, + _ => (), + } + } + } } diff --git a/src/types.rs b/src/types.rs index a3596ef7..c6b518d5 100644 --- a/src/types.rs +++ b/src/types.rs @@ -28,7 +28,7 @@ impl System { pub const DEFAULT_VERSION: u8 = 46; /// Structure representing a ZIP file. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ZipFileData { /// Compatibility of the file attribute information