Improve reading from non-seekable streams
You can now repeatedly call a function to iterate over all files in a zip. This may give some suboptimal results, but is useful when dealing with an incoming data stream.
This commit is contained in:
parent
5b17e07086
commit
38d1699853
3 changed files with 138 additions and 26 deletions
26
examples/stdin_info.rs
Normal file
26
examples/stdin_info.rs
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
extern crate zip;
|
||||||
|
|
||||||
|
use std::io;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
std::process::exit(real_main());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn real_main() -> i32 {
|
||||||
|
let stdin = io::stdin();
|
||||||
|
let mut stdin_handle = stdin.lock();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match zip::read::read_zipfile_from_stream(&mut stdin_handle) {
|
||||||
|
Ok(None) => break,
|
||||||
|
Ok(Some(file)) => {
|
||||||
|
println!("{}: {} bytes ({} bytes packed)", file.name(), file.size(), file.compressed_size());
|
||||||
|
},
|
||||||
|
Err(e) => {
|
||||||
|
println!("Error encountered while reading zip: {:?}", e);
|
||||||
|
return 1;
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
136
src/read.rs
136
src/read.rs
|
@ -7,6 +7,7 @@ use result::{ZipResult, ZipError};
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use podio::{ReadPodExt, LittleEndian};
|
use podio::{ReadPodExt, LittleEndian};
|
||||||
use types::{ZipFileData, System};
|
use types::{ZipFileData, System};
|
||||||
|
@ -86,7 +87,7 @@ enum ZipFileReader<'a> {
|
||||||
|
|
||||||
/// A struct for reading a zip file
|
/// A struct for reading a zip file
|
||||||
pub struct ZipFile<'a> {
|
pub struct ZipFile<'a> {
|
||||||
data: &'a ZipFileData,
|
data: Cow<'a, ZipFileData>,
|
||||||
reader: ZipFileReader<'a>,
|
reader: ZipFileReader<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -285,7 +286,7 @@ impl<R: Read+io::Seek> ZipArchive<R>
|
||||||
try!(self.reader.seek(io::SeekFrom::Start(pos)));
|
try!(self.reader.seek(io::SeekFrom::Start(pos)));
|
||||||
let limit_reader = (self.reader.by_ref() as &mut Read).take(data.compressed_size);
|
let limit_reader = (self.reader.by_ref() as &mut Read).take(data.compressed_size);
|
||||||
|
|
||||||
Ok(ZipFile { reader: try!(make_reader(data.compression_method, data.crc32, limit_reader)), data: data })
|
Ok(ZipFile { reader: try!(make_reader(data.compression_method, data.crc32, limit_reader)), data: Cow::Borrowed(data) })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Unwrap and return the inner reader object
|
/// Unwrap and return the inner reader object
|
||||||
|
@ -389,7 +390,7 @@ fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R, archive_offset:
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_extra_field(_file: &mut ZipFileData, data: &[u8]) -> ZipResult<()>
|
fn parse_extra_field(file: &mut ZipFileData, data: &[u8]) -> ZipResult<()>
|
||||||
{
|
{
|
||||||
let mut reader = io::Cursor::new(data);
|
let mut reader = io::Cursor::new(data);
|
||||||
|
|
||||||
|
@ -399,7 +400,14 @@ fn parse_extra_field(_file: &mut ZipFileData, data: &[u8]) -> ZipResult<()>
|
||||||
let len = try!(reader.read_u16::<LittleEndian>());
|
let len = try!(reader.read_u16::<LittleEndian>());
|
||||||
match kind
|
match kind
|
||||||
{
|
{
|
||||||
_ => try!(reader.seek(io::SeekFrom::Current(len as i64))),
|
// Zip64 extended information extra field
|
||||||
|
0x0001 => {
|
||||||
|
file.uncompressed_size = try!(reader.read_u64::<LittleEndian>());
|
||||||
|
file.compressed_size = try!(reader.read_u64::<LittleEndian>());
|
||||||
|
try!(reader.read_u64::<LittleEndian>()); // relative header offset
|
||||||
|
try!(reader.read_u32::<LittleEndian>()); // disk start number
|
||||||
|
},
|
||||||
|
_ => { try!(reader.seek(io::SeekFrom::Current(len as i64))); },
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -459,6 +467,10 @@ impl<'a> ZipFile<'a> {
|
||||||
}
|
}
|
||||||
/// Get unix mode for the file
|
/// Get unix mode for the file
|
||||||
pub fn unix_mode(&self) -> Option<u32> {
|
pub fn unix_mode(&self) -> Option<u32> {
|
||||||
|
if self.data.external_attributes == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
match self.data.system {
|
match self.data.system {
|
||||||
System::Unix => {
|
System::Unix => {
|
||||||
Some(self.data.external_attributes >> 16)
|
Some(self.data.external_attributes >> 16)
|
||||||
|
@ -496,33 +508,54 @@ impl<'a> Read for ZipFile<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ZipEntry<'a> {
|
impl<'a> Drop for ZipFile<'a> {
|
||||||
pub name: String,
|
fn drop(&mut self) {
|
||||||
pub modified: ::time::Tm,
|
// self.data is Owned, this reader is constructed by a streaming reader.
|
||||||
reader: ZipFileReader<'a>,
|
// In this case, we want to exhaust the reader so that the next file is accessible.
|
||||||
}
|
if let Cow::Owned(_) = self.data {
|
||||||
|
let mut buffer = [0; 1<<16];
|
||||||
impl<'a> ZipEntry<'a> {
|
let reader = get_reader(&mut self.reader);
|
||||||
pub fn get_reader(&mut self) -> &mut Read {
|
loop {
|
||||||
get_reader(&mut self.reader)
|
match reader.read(&mut buffer) {
|
||||||
|
Ok(0) => break,
|
||||||
|
Ok(_) => (),
|
||||||
|
Err(e) => panic!("Could not consume all of the output of the current ZipFile: {:?}", e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read_single<'a, R: io::Read>(reader: &'a mut R) -> ZipResult<Option<ZipEntry>> {
|
/// Read ZipFile structures from a non-seekable reader.
|
||||||
|
///
|
||||||
|
/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions
|
||||||
|
/// as some information will be missing when reading this manner.
|
||||||
|
///
|
||||||
|
/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is
|
||||||
|
/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory
|
||||||
|
/// is encountered. No more files should be read after this.
|
||||||
|
///
|
||||||
|
/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after
|
||||||
|
/// the structure is done.
|
||||||
|
///
|
||||||
|
/// Missing fields are:
|
||||||
|
/// * `comment`: set to an empty string
|
||||||
|
/// * `data_start`: set to 0
|
||||||
|
/// * `external_attributes`: `unix_mode()`: will return None
|
||||||
|
pub fn read_zipfile_from_stream<'a, R: io::Read>(reader: &'a mut R) -> ZipResult<Option<ZipFile>> {
|
||||||
let signature = try!(reader.read_u32::<LittleEndian>());
|
let signature = try!(reader.read_u32::<LittleEndian>());
|
||||||
if signature != spec::LOCAL_FILE_HEADER_SIGNATURE
|
|
||||||
{
|
match signature {
|
||||||
return if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE {
|
spec::LOCAL_FILE_HEADER_SIGNATURE => (),
|
||||||
Err(ZipError::InvalidArchive("Invalid local file header"))
|
spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None),
|
||||||
} else {
|
_ => return Err(ZipError::InvalidArchive("Invalid local file header")),
|
||||||
Ok(None)
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let version_made_by = try!(reader.read_u16::<LittleEndian>());
|
let version_made_by = try!(reader.read_u16::<LittleEndian>());
|
||||||
let flags = try!(reader.read_u16::<LittleEndian>());
|
let flags = try!(reader.read_u16::<LittleEndian>());
|
||||||
let encrypted = flags & 1 == 1;
|
let encrypted = flags & 1 == 1;
|
||||||
let is_utf8 = flags & (1 << 11) != 0;
|
let is_utf8 = flags & (1 << 11) != 0;
|
||||||
|
let using_data_descriptor = flags & (1 << 3) != 0;
|
||||||
let compression_method = CompressionMethod::from_u16(try!(reader.read_u16::<LittleEndian>()));
|
let compression_method = CompressionMethod::from_u16(try!(reader.read_u16::<LittleEndian>()));
|
||||||
let last_mod_time = try!(reader.read_u16::<LittleEndian>());
|
let last_mod_time = try!(reader.read_u16::<LittleEndian>());
|
||||||
let last_mod_date = try!(reader.read_u16::<LittleEndian>());
|
let last_mod_date = try!(reader.read_u16::<LittleEndian>());
|
||||||
|
@ -541,11 +574,48 @@ pub fn read_single<'a, R: io::Read>(reader: &'a mut R) -> ZipResult<Option<ZipEn
|
||||||
false => file_name_raw.clone().from_cp437(),
|
false => file_name_raw.clone().from_cp437(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let limit_reader = (reader as &'a mut io::Read).take(compressed_size as u64);
|
let mut result = ZipFileData
|
||||||
|
{
|
||||||
|
system: System::from_u8((version_made_by >> 8) as u8),
|
||||||
|
version_made_by: version_made_by as u8,
|
||||||
|
encrypted: encrypted,
|
||||||
|
compression_method: compression_method,
|
||||||
|
last_modified_time: ::time::Tm::from_msdos(MsDosDateTime::new(last_mod_time, last_mod_date)).unwrap_or(TM_1980_01_01),
|
||||||
|
crc32: crc32,
|
||||||
|
compressed_size: compressed_size as u64,
|
||||||
|
uncompressed_size: uncompressed_size as u64,
|
||||||
|
file_name: file_name,
|
||||||
|
file_name_raw: file_name_raw,
|
||||||
|
file_comment: String::new(), // file comment is only available in the central directory
|
||||||
|
// header_start and data start are not available, but also don't matter, since seeking is
|
||||||
|
// not available.
|
||||||
|
header_start: 0,
|
||||||
|
data_start: 0,
|
||||||
|
// The external_attributes field is only available in the central directory.
|
||||||
|
// We set this to zero, which should be valid as the docs state 'If input came
|
||||||
|
// from standard input, this field is set to zero.'
|
||||||
|
external_attributes: 0,
|
||||||
|
};
|
||||||
|
|
||||||
Ok(Some(ZipEntry { name: file_name,
|
match parse_extra_field(&mut result, &extra_field) {
|
||||||
modified: try!(::time::Tm::from_msdos(MsDosDateTime::new(last_mod_time, last_mod_date))),
|
Ok(..) | Err(ZipError::Io(..)) => {},
|
||||||
reader: try!(make_reader(compression_method, crc32, limit_reader))
|
Err(e) => try!(Err(e)),
|
||||||
|
}
|
||||||
|
|
||||||
|
if encrypted {
|
||||||
|
return unsupported_zip_error("Encrypted files are not supported")
|
||||||
|
}
|
||||||
|
if using_data_descriptor {
|
||||||
|
return unsupported_zip_error("The file length is not available in the local header");
|
||||||
|
}
|
||||||
|
|
||||||
|
let limit_reader = (reader as &'a mut io::Read).take(result.compressed_size as u64);
|
||||||
|
|
||||||
|
let result_crc32 = result.crc32;
|
||||||
|
let result_compression_method = result.compression_method;
|
||||||
|
Ok(Some(ZipFile {
|
||||||
|
data: Cow::Owned(result),
|
||||||
|
reader: try!(make_reader(result_compression_method, result_crc32, limit_reader))
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -583,4 +653,20 @@ mod test {
|
||||||
let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
|
let reader = ZipArchive::new(io::Cursor::new(v)).unwrap();
|
||||||
assert!(reader.comment == b"zip-rs");
|
assert!(reader.comment == b"zip-rs");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn zip_read_streaming() {
|
||||||
|
use std::io;
|
||||||
|
use super::read_zipfile_from_stream;
|
||||||
|
|
||||||
|
let mut v = Vec::new();
|
||||||
|
v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip"));
|
||||||
|
let mut reader = io::Cursor::new(v);
|
||||||
|
loop {
|
||||||
|
match read_zipfile_from_stream(&mut reader).unwrap() {
|
||||||
|
None => break,
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,7 +28,7 @@ impl System {
|
||||||
pub const DEFAULT_VERSION: u8 = 46;
|
pub const DEFAULT_VERSION: u8 = 46;
|
||||||
|
|
||||||
/// Structure representing a ZIP file.
|
/// Structure representing a ZIP file.
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct ZipFileData
|
pub struct ZipFileData
|
||||||
{
|
{
|
||||||
/// Compatibility of the file attribute information
|
/// Compatibility of the file attribute information
|
||||||
|
|
Loading…
Add table
Reference in a new issue