Several updates to CP437 handling
This commit is contained in:
parent
1ce0b92605
commit
6cd6b752e3
2 changed files with 63 additions and 7 deletions
65
src/cp437.rs
65
src/cp437.rs
|
@ -1,8 +1,40 @@
|
||||||
/// Conversion module to convert a String to IBM codepage 437
|
//! Convert a string in IBM codepage 437 to UTF-8
|
||||||
|
|
||||||
pub fn to_string(input: &[u8]) -> String
|
/// Trait to convert IBM codepage 437 to the target type
|
||||||
{
|
pub trait FromCp437 {
|
||||||
input.iter().map(|c| to_char(*c)).collect()
|
/// Target type
|
||||||
|
type Target;
|
||||||
|
|
||||||
|
/// Function that does the conversion from cp437.
|
||||||
|
/// Gennerally allocations will be avoided if all data falls into the ASCII range.
|
||||||
|
fn from_cp437(self) -> Self::Target;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> FromCp437 for &'a [u8] {
|
||||||
|
type Target = ::std::borrow::Cow<'a, str>;
|
||||||
|
|
||||||
|
fn from_cp437(self) -> Self::Target
|
||||||
|
{
|
||||||
|
if self.iter().all(|c| *c < 0x80) {
|
||||||
|
::std::str::from_utf8(self).unwrap().into()
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
self.iter().map(|c| to_char(*c)).collect::<String>().into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromCp437 for Vec<u8> {
|
||||||
|
type Target = String;
|
||||||
|
|
||||||
|
fn from_cp437(self) -> Self::Target {
|
||||||
|
if self.iter().all(|c| *c < 0x80) {
|
||||||
|
String::from_utf8(self).unwrap()
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
self.into_iter().map(|c| to_char(c)).collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn to_char(input: u8) -> char
|
fn to_char(input: u8) -> char
|
||||||
|
@ -138,7 +170,7 @@ fn to_char(input: u8) -> char
|
||||||
0xfd => 0x00b2,
|
0xfd => 0x00b2,
|
||||||
0xfe => 0x25a0,
|
0xfe => 0x25a0,
|
||||||
0xff => 0x00a0,
|
0xff => 0x00a0,
|
||||||
_ => 0xfffd,
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
::std::char::from_u32(output).unwrap()
|
::std::char::from_u32(output).unwrap()
|
||||||
}
|
}
|
||||||
|
@ -154,4 +186,27 @@ mod test
|
||||||
super::to_char(i as u8);
|
super::to_char(i as u8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ascii() {
|
||||||
|
for i in 0x00 .. 0x80 {
|
||||||
|
assert_eq!(super::to_char(i), i as char);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn example_slice() {
|
||||||
|
use super::FromCp437;
|
||||||
|
let data : &[u8] = &[0x43, 0x75, 0x72, 0x61, 0x87, 0x61, 0x6F];
|
||||||
|
assert!(::std::str::from_utf8(data).is_err());
|
||||||
|
assert_eq!(data.from_cp437(), "Curaçao");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn example_vec() {
|
||||||
|
use super::FromCp437;
|
||||||
|
let data = vec![0xCC, 0xCD, 0xCD, 0xB9];
|
||||||
|
assert!(String::from_utf8(data.clone()).is_err());
|
||||||
|
assert_eq!(&data.from_cp437(), "╠══╣");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,7 @@ use bzip2::reader::BzDecompressor;
|
||||||
use util;
|
use util;
|
||||||
use podio::{ReadPodExt, LittleEndian};
|
use podio::{ReadPodExt, LittleEndian};
|
||||||
use types::ZipFileData;
|
use types::ZipFileData;
|
||||||
|
use cp437::FromCp437;
|
||||||
|
|
||||||
/// Wrapper for reading the contents of a ZIP file.
|
/// Wrapper for reading the contents of a ZIP file.
|
||||||
///
|
///
|
||||||
|
@ -201,12 +202,12 @@ fn central_header_to_zip_file<R: Read+io::Seek>(reader: &mut R) -> ZipResult<Zip
|
||||||
let file_name = match is_utf8
|
let file_name = match is_utf8
|
||||||
{
|
{
|
||||||
true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
|
true => String::from_utf8_lossy(&*file_name_raw).into_owned(),
|
||||||
false => ::cp437::to_string(&*file_name_raw),
|
false => file_name_raw.from_cp437(),
|
||||||
};
|
};
|
||||||
let file_comment = match is_utf8
|
let file_comment = match is_utf8
|
||||||
{
|
{
|
||||||
true => String::from_utf8_lossy(&*file_comment_raw).into_owned(),
|
true => String::from_utf8_lossy(&*file_comment_raw).into_owned(),
|
||||||
false => ::cp437::to_string(&*file_comment_raw),
|
false => file_comment_raw.from_cp437(),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Remember end of central header
|
// Remember end of central header
|
||||||
|
|
Loading…
Add table
Reference in a new issue