Improve reading from non-seekable streams

You can now repeatedly call a function to iterate over all files in a
zip. This may give some suboptimal results, but is useful when dealing
with an incoming data stream.
This commit is contained in:
Mathijs van de Nes 2018-06-16 14:14:34 +02:00
parent 5b17e07086
commit 38d1699853
3 changed files with 138 additions and 26 deletions

26
examples/stdin_info.rs Normal file
View file

@ -0,0 +1,26 @@
extern crate zip;
use std::io;
fn main() {
std::process::exit(real_main());
}
fn real_main() -> i32 {
let stdin = io::stdin();
let mut stdin_handle = stdin.lock();
loop {
match zip::read::read_zipfile_from_stream(&mut stdin_handle) {
Ok(None) => break,
Ok(Some(file)) => {
println!("{}: {} bytes ({} bytes packed)", file.name(), file.size(), file.compressed_size());
},
Err(e) => {
println!("Error encountered while reading zip: {:?}", e);
return 1;
},
}
}
return 0;
}

View file

@ -7,6 +7,7 @@ use result::{ZipResult, ZipError};
use std::io; use std::io;
use std::io::prelude::*; use std::io::prelude::*;
use std::collections::HashMap; use std::collections::HashMap;
use std::borrow::Cow;
use podio::{ReadPodExt, LittleEndian}; use podio::{ReadPodExt, LittleEndian};
use types::{ZipFileData, System}; use types::{ZipFileData, System};
@ -86,7 +87,7 @@ enum ZipFileReader<'a> {
/// A struct for reading a zip file /// A struct for reading a zip file
pub struct ZipFile<'a> { pub struct ZipFile<'a> {
data: &'a ZipFileData, data: Cow<'a, ZipFileData>,
reader: ZipFileReader<'a>, reader: ZipFileReader<'a>,
} }
@ -285,7 +286,7 @@ impl<R: Read+io::Seek> ZipArchive<R>
try!(self.reader.seek(io::SeekFrom::Start(pos))); try!(self.reader.seek(io::SeekFrom::Start(pos)));
let limit_reader = (self.reader.by_ref() as &mut Read).take(data.compressed_size); let limit_reader = (self.reader.by_ref() as &mut Read).take(data.compressed_size);
Ok(ZipFile { reader: try!(make_reader(data.compression_method, data.crc32, limit_reader)), data: data }) Ok(ZipFile { reader: try!(make_reader(data.compression_method, data.crc32, limit_reader)), data: Cow::Borrowed(data) })
} }
/// Unwrap and return the inner reader object /// Unwrap and return the inner reader object
@ -389,7 +390,7 @@ fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset:
Ok(result) Ok(result)
} }
fn parse_extra_field(_file: &mut ZipFileData, data: &[u8]) -> ZipResult<()> fn parse_extra_field(file: &mut ZipFileData, data: &[u8]) -> ZipResult<()>
{ {
let mut reader = io::Cursor::new(data); let mut reader = io::Cursor::new(data);
@ -399,7 +400,14 @@ fn parse_extra_field(_file: &mut ZipFileData, data: &[u8]) -> ZipResult<()>
let len = try!(reader.read_u16::<LittleEndian>()); let len = try!(reader.read_u16::<LittleEndian>());
match kind match kind
{ {
_ => try!(reader.seek(io::SeekFrom::Current(len as i64))), // Zip64 extended information extra field
0x0001 => {
file.uncompressed_size = try!(reader.read_u64::<LittleEndian>());
file.compressed_size = try!(reader.read_u64::<LittleEndian>());
try!(reader.read_u64::<LittleEndian>()); // relative header offset
try!(reader.read_u32::<LittleEndian>()); // disk start number
},
_ => { try!(reader.seek(io::SeekFrom::Current(len as i64))); },
}; };
} }
Ok(()) Ok(())
@ -459,6 +467,10 @@ impl<'a> ZipFile<'a> {
} }
/// Get unix mode for the file /// Get unix mode for the file
pub fn unix_mode(&self) -> Option<u32> { pub fn unix_mode(&self) -> Option<u32> {
if self.data.external_attributes == 0 {
return None;
}
match self.data.system { match self.data.system {
System::Unix => { System::Unix => {
Some(self.data.external_attributes >> 16) Some(self.data.external_attributes >> 16)
@ -496,33 +508,54 @@ impl<'a> Read for ZipFile<'a> {
} }
} }
pub struct ZipEntry<'a> { impl<'a> Drop for ZipFile<'a> {
pub name: String, fn drop(&mut self) {
pub modified: ::time::Tm, // self.data is Owned, this reader is constructed by a streaming reader.
reader: ZipFileReader<'a>, // In this case, we want to exhaust the reader so that the next file is accessible.
} if let Cow::Owned(_) = self.data {
let mut buffer = [0; 1<<16];
impl<'a> ZipEntry<'a> { let reader = get_reader(&mut self.reader);
pub fn get_reader(&mut self) -> &mut Read { loop {
get_reader(&mut self.reader) match reader.read(&mut buffer) {
Ok(0) => break,
Ok(_) => (),
Err(e) => panic!("Could not consume all of the output of the current ZipFile: {:?}", e),
}
}
}
} }
} }
pub fn read_single<'a, R: io::Read>(reader: &'a mut R) -> ZipResult<Option<ZipEntry>> { /// Read ZipFile structures from a non-seekable reader.
///
/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
/// as some information will be missing when reading this manner.
///
/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
/// is encountered. No more files should be read after this.
///
/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
/// the structure is done.
///
/// Missing fields are:
/// * `comment`: set to an empty string
/// * `data_start`: set to 0
/// * `external_attributes`: `unix_mode()`: will return None
pub fn read_zipfile_from_stream<'a, R: io::Read>(reader: &'a mut R) -> ZipResult<Option<ZipFile>> {
let signature = try!(reader.read_u32::<LittleEndian>()); let signature = try!(reader.read_u32::<LittleEndian>());
if signature != spec::LOCAL_FILE_HEADER_SIGNATURE
{ match signature {
return if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { spec::LOCAL_FILE_HEADER_SIGNATURE => (),
Err(ZipError::InvalidArchive("Invalid local file header")) spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
} else { _ => return Err(ZipError::InvalidArchive("Invalid local file header")),
Ok(None)
};
} }
let version_made_by = try!(reader.read_u16::<LittleEndian>()); let version_made_by = try!(reader.read_u16::<LittleEndian>());
let flags = try!(reader.read_u16::<LittleEndian>()); let flags = try!(reader.read_u16::<LittleEndian>());
let encrypted = flags & 1 == 1; let encrypted = flags & 1 == 1;
let is_utf8 = flags & (1 << 11) != 0; let is_utf8 = flags & (1 << 11) != 0;
let using_data_descriptor = flags & (1 << 3) != 0;
let compression_method = CompressionMethod::from_u16(try!(reader.read_u16::<LittleEndian>())); let compression_method = CompressionMethod::from_u16(try!(reader.read_u16::<LittleEndian>()));
let last_mod_time = try!(reader.read_u16::<LittleEndian>()); let last_mod_time = try!(reader.read_u16::<LittleEndian>());
let last_mod_date = try!(reader.read_u16::<LittleEndian>()); let last_mod_date = try!(reader.read_u16::<LittleEndian>());
@ -541,11 +574,48 @@ pub fn read_single<'a, R: io::Read>(reader: &'a mut R) -> ZipResult<Option<ZipEn
false => file_name_raw.clone().from_cp437(), false => file_name_raw.clone().from_cp437(),
}; };
let limit_reader = (reader as &'a mut io::Read).take(compressed_size as u64); let mut result = ZipFileData
{
system: System::from_u8((version_made_by >> 8) as u8),
version_made_by: version_made_by as u8,
encrypted: encrypted,
compression_method: compression_method,
last_modified_time: ::time::Tm::from_msdos(MsDosDateTime::new(last_mod_time, last_mod_date)).unwrap_or(TM_1980_01_01),
crc32: crc32,
compressed_size: compressed_size as u64,
uncompressed_size: uncompressed_size as u64,
file_name: file_name,
file_name_raw: file_name_raw,
file_comment: String::new(), // file comment is only available in the central directory
// header_start and data start are not available, but also don't matter, since seeking is
// not available.
header_start: 0,
data_start: 0,
// The external_attributes field is only available in the central directory.
// We set this to zero, which should be valid as the docs state 'If input came
// from standard input, this field is set to zero.'
external_attributes: 0,
};
Ok(Some(ZipEntry { name: file_name, match parse_extra_field(&mut result, &extra_field) {
modified: try!(::time::Tm::from_msdos(MsDosDateTime::new(last_mod_time, last_mod_date))), Ok(..) | Err(ZipError::Io(..)) => {},
reader: try!(make_reader(compression_method, crc32, limit_reader)) Err(e) => try!(Err(e)),
}
if encrypted {
return unsupported_zip_error("Encrypted files are not supported")
}
if using_data_descriptor {
return unsupported_zip_error("The file length is not available in the local header");
}
let limit_reader = (reader as &'a mut io::Read).take(result.compressed_size as u64);
let result_crc32 = result.crc32;
let result_compression_method = result.compression_method;
Ok(Some(ZipFile {
data: Cow::Owned(result),
reader: try!(make_reader(result_compression_method, result_crc32, limit_reader))
})) }))
} }
@ -583,4 +653,20 @@ mod test {
let reader = ZipArchive::new(io::Cursor::new(v)).unwrap(); let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
assert!(reader.comment == b"zip-rs"); assert!(reader.comment == b"zip-rs");
} }
#[test]
fn zip_read_streaming() {
use std::io;
use super::read_zipfile_from_stream;
let mut v = Vec::new();
v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
let mut reader = io::Cursor::new(v);
loop {
match read_zipfile_from_stream(&mut reader).unwrap() {
None => break,
_ => (),
}
}
}
} }

View file

@ -28,7 +28,7 @@ impl System {
pub const DEFAULT_VERSION: u8 = 46; pub const DEFAULT_VERSION: u8 = 46;
/// Structure representing a ZIP file. /// Structure representing a ZIP file.
#[derive(Debug)] #[derive(Debug, Clone)]
pub struct ZipFileData pub struct ZipFileData
{ {
/// Compatibility of the file attribute information /// Compatibility of the file attribute information