diff --git a/src/cp437.rs b/src/cp437.rs index 4f4fc663..e9ed6ebc 100644 --- a/src/cp437.rs +++ b/src/cp437.rs @@ -1,8 +1,40 @@ -/// Conversion module to convert a String to IBM codepage 437 +//! Convert a string in IBM codepage 437 to UTF-8 -pub fn to_string(input: &[u8]) -> String -{ - input.iter().map(|c| to_char(*c)).collect() +/// Trait to convert IBM codepage 437 to the target type +pub trait FromCp437 { + /// Target type + type Target; + + /// Function that does the conversion from cp437. + /// Gennerally allocations will be avoided if all data falls into the ASCII range. + fn from_cp437(self) -> Self::Target; +} + +impl<'a> FromCp437 for &'a [u8] { + type Target = ::std::borrow::Cow<'a, str>; + + fn from_cp437(self) -> Self::Target + { + if self.iter().all(|c| *c < 0x80) { + ::std::str::from_utf8(self).unwrap().into() + } + else { + self.iter().map(|c| to_char(*c)).collect::().into() + } + } +} + +impl FromCp437 for Vec { + type Target = String; + + fn from_cp437(self) -> Self::Target { + if self.iter().all(|c| *c < 0x80) { + String::from_utf8(self).unwrap() + } + else { + self.into_iter().map(|c| to_char(c)).collect() + } + } } fn to_char(input: u8) -> char @@ -138,7 +170,7 @@ fn to_char(input: u8) -> char 0xfd => 0x00b2, 0xfe => 0x25a0, 0xff => 0x00a0, - _ => 0xfffd, + _ => unreachable!(), }; ::std::char::from_u32(output).unwrap() } @@ -154,4 +186,27 @@ mod test super::to_char(i as u8); } } + + #[test] + fn ascii() { + for i in 0x00 .. 0x80 { + assert_eq!(super::to_char(i), i as char); + } + } + + #[test] + fn example_slice() { + use super::FromCp437; + let data : &[u8] = &[0x43, 0x75, 0x72, 0x61, 0x87, 0x61, 0x6F]; + assert!(::std::str::from_utf8(data).is_err()); + assert_eq!(data.from_cp437(), "Curaçao"); + } + + #[test] + fn example_vec() { + use super::FromCp437; + let data = vec![0xCC, 0xCD, 0xCD, 0xB9]; + assert!(String::from_utf8(data.clone()).is_err()); + assert_eq!(&data.from_cp437(), "╠══╣"); + } } diff --git a/src/read.rs b/src/read.rs index 7288902d..5f9c5a96 100644 --- a/src/read.rs +++ b/src/read.rs @@ -13,6 +13,7 @@ use bzip2::reader::BzDecompressor; use util; use podio::{ReadPodExt, LittleEndian}; use types::ZipFileData; +use cp437::FromCp437; /// Wrapper for reading the contents of a ZIP file. /// @@ -201,12 +202,12 @@ fn central_header_to_zip_file(reader: &mut R) -> ZipResult String::from_utf8_lossy(&*file_name_raw).into_owned(), - false => ::cp437::to_string(&*file_name_raw), + false => file_name_raw.from_cp437(), }; let file_comment = match is_utf8 { true => String::from_utf8_lossy(&*file_comment_raw).into_owned(), - false => ::cp437::to_string(&*file_comment_raw), + false => file_comment_raw.from_cp437(), }; // Remember end of central header