Several updates to CP437 handling
This commit is contained in:
parent
1ce0b92605
commit
6cd6b752e3
2 changed files with 63 additions and 7 deletions
65
src/cp437.rs
65
src/cp437.rs
|
@ -1,8 +1,40 @@
|
|||
/// Conversion module to convert a String to IBM codepage 437
|
||||
//! Convert a string in IBM codepage 437 to UTF-8
|
||||
|
||||
pub fn to_string(input: &[u8]) -> String
|
||||
{
|
||||
input.iter().map(|c| to_char(*c)).collect()
|
||||
/// Trait to convert IBM codepage 437 to the target type
|
||||
pub trait FromCp437 {
|
||||
/// Target type
|
||||
type Target;
|
||||
|
||||
/// Function that does the conversion from cp437.
|
||||
/// Gennerally allocations will be avoided if all data falls into the ASCII range.
|
||||
fn from_cp437(self) -> Self::Target;
|
||||
}
|
||||
|
||||
impl<'a> FromCp437 for &'a [u8] {
|
||||
type Target = ::std::borrow::Cow<'a, str>;
|
||||
|
||||
fn from_cp437(self) -> Self::Target
|
||||
{
|
||||
if self.iter().all(|c| *c < 0x80) {
|
||||
::std::str::from_utf8(self).unwrap().into()
|
||||
}
|
||||
else {
|
||||
self.iter().map(|c| to_char(*c)).collect::<String>().into()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromCp437 for Vec<u8> {
|
||||
type Target = String;
|
||||
|
||||
fn from_cp437(self) -> Self::Target {
|
||||
if self.iter().all(|c| *c < 0x80) {
|
||||
String::from_utf8(self).unwrap()
|
||||
}
|
||||
else {
|
||||
self.into_iter().map(|c| to_char(c)).collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn to_char(input: u8) -> char
|
||||
|
@ -138,7 +170,7 @@ fn to_char(input: u8) -> char
|
|||
0xfd => 0x00b2,
|
||||
0xfe => 0x25a0,
|
||||
0xff => 0x00a0,
|
||||
_ => 0xfffd,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
::std::char::from_u32(output).unwrap()
|
||||
}
|
||||
|
@ -154,4 +186,27 @@ mod test
|
|||
super::to_char(i as u8);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ascii() {
|
||||
for i in 0x00 .. 0x80 {
|
||||
assert_eq!(super::to_char(i), i as char);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn example_slice() {
|
||||
use super::FromCp437;
|
||||
let data : &[u8] = &[0x43, 0x75, 0x72, 0x61, 0x87, 0x61, 0x6F];
|
||||
assert!(::std::str::from_utf8(data).is_err());
|
||||
assert_eq!(data.from_cp437(), "Curaçao");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn example_vec() {
|
||||
use super::FromCp437;
|
||||
let data = vec![0xCC, 0xCD, 0xCD, 0xB9];
|
||||
assert!(String::from_utf8(data.clone()).is_err());
|
||||
assert_eq!(&data.from_cp437(), "╠══╣");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@ use bzip2::reader::BzDecompressor;
|
|||
use util;
|
||||
use podio::{ReadPodExt, LittleEndian};
|
||||
use types::ZipFileData;
|
||||
use cp437::FromCp437;
|
||||
|
||||
/// Wrapper for reading the contents of a ZIP file.
|
||||
///
|
||||
|
@ -201,12 +202,12 @@ fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R) -> ZipResult<Zip
|
|||
let file_name = match is_utf8
|
||||
{
|
||||
true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
|
||||
false => ::cp437::to_string(&*file_name_raw),
|
||||
false => file_name_raw.from_cp437(),
|
||||
};
|
||||
let file_comment = match is_utf8
|
||||
{
|
||||
true => String::from_utf8_lossy(&*file_comment_raw).into_owned(),
|
||||
false => ::cp437::to_string(&*file_comment_raw),
|
||||
false => file_comment_raw.from_cp437(),
|
||||
};
|
||||
|
||||
// Remember end of central header
|
||||
|
|
Loading…
Add table
Reference in a new issue