Several updates to CP437 handling

This commit is contained in:
Mathijs van de Nes 2015-06-18 11:45:25 +02:00
parent 1ce0b92605
commit 6cd6b752e3
2 changed files with 63 additions and 7 deletions

View file

@ -1,8 +1,40 @@
/// Conversion module to convert a String to IBM codepage 437
//! Convert a string in IBM codepage 437 to UTF-8
pub fn to_string(input: &[u8]) -> String
{
input.iter().map(|c| to_char(*c)).collect()
/// Trait to convert IBM codepage 437 to the target type
pub trait FromCp437 {
/// Target type
type Target;
/// Function that does the conversion from cp437.
/// Gennerally allocations will be avoided if all data falls into the ASCII range.
fn from_cp437(self) -> Self::Target;
}
impl<'a> FromCp437 for &'a [u8] {
type Target = ::std::borrow::Cow<'a, str>;
fn from_cp437(self) -> Self::Target
{
if self.iter().all(|c| *c < 0x80) {
::std::str::from_utf8(self).unwrap().into()
}
else {
self.iter().map(|c| to_char(*c)).collect::<String>().into()
}
}
}
impl FromCp437 for Vec<u8> {
type Target = String;
fn from_cp437(self) -> Self::Target {
if self.iter().all(|c| *c < 0x80) {
String::from_utf8(self).unwrap()
}
else {
self.into_iter().map(|c| to_char(c)).collect()
}
}
}
fn to_char(input: u8) -> char
@ -138,7 +170,7 @@ fn to_char(input: u8) -> char
0xfd => 0x00b2,
0xfe => 0x25a0,
0xff => 0x00a0,
_ => 0xfffd,
_ => unreachable!(),
};
::std::char::from_u32(output).unwrap()
}
@ -154,4 +186,27 @@ mod test
super::to_char(i as u8);
}
}
#[test]
fn ascii() {
for i in 0x00 .. 0x80 {
assert_eq!(super::to_char(i), i as char);
}
}
#[test]
fn example_slice() {
use super::FromCp437;
let data : &[u8] = &[0x43, 0x75, 0x72, 0x61, 0x87, 0x61, 0x6F];
assert!(::std::str::from_utf8(data).is_err());
assert_eq!(data.from_cp437(), "Curaçao");
}
#[test]
fn example_vec() {
use super::FromCp437;
let data = vec![0xCC, 0xCD, 0xCD, 0xB9];
assert!(String::from_utf8(data.clone()).is_err());
assert_eq!(&data.from_cp437(), "╠══╣");
}
}

View file

@ -13,6 +13,7 @@ use bzip2::reader::BzDecompressor;
use util;
use podio::{ReadPodExt, LittleEndian};
use types::ZipFileData;
use cp437::FromCp437;
/// Wrapper for reading the contents of a ZIP file.
///
@ -201,12 +202,12 @@ fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R) -> ZipResult<Zip
let file_name = match is_utf8
{
true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
false => ::cp437::to_string(&*file_name_raw),
false => file_name_raw.from_cp437(),
};
let file_comment = match is_utf8
{
true => String::from_utf8_lossy(&*file_comment_raw).into_owned(),
false => ::cp437::to_string(&*file_comment_raw),
false => file_comment_raw.from_cp437(),
};
// Remember end of central header