zip-rs-wasm/src/cp437.rs
Jan Starke 0321c05557
fix some clippy warnings
fix another clippy complaint

ad support for extended timestamp

support missing timestamps in the extended timestamps field

handle inconsistencies between flags and len

handle len_left

add getter

Update README.md to state that the crate has moved

ad support for extended timestamp

handle inconsistencies between flags and len

handle len_left

add getter

Update README.md to state that the crate has moved
2024-05-02 09:18:18 +02:00

207 lines
5 KiB
Rust

//! Convert a string in IBM codepage 437 to UTF-8
/// Trait to convert IBM codepage 437 to the target type
pub trait FromCp437 {
/// Target type
type Target;
/// Function that does the conversion from cp437.
/// Generally allocations will be avoided if all data falls into the ASCII range.
#[allow(clippy::wrong_self_convention)]
fn from_cp437(self) -> Self::Target;
}
impl<'a> FromCp437 for &'a [u8] {
type Target = ::std::borrow::Cow<'a, str>;
fn from_cp437(self) -> Self::Target {
if self.iter().all(|c| *c < 0x80) {
::std::str::from_utf8(self).unwrap().into()
} else {
self.iter().map(|c| to_char(*c)).collect::<String>().into()
}
}
}
impl FromCp437 for Box<[u8]> {
type Target = Box<str>;
fn from_cp437(self) -> Self::Target {
if self.iter().all(|c| *c < 0x80) {
String::from_utf8(self.into()).unwrap()
} else {
self.iter().copied().map(to_char).collect()
}
.into_boxed_str()
}
}
fn to_char(input: u8) -> char {
let output = match input {
0x00..=0x7f => input as u32,
0x80 => 0x00c7,
0x81 => 0x00fc,
0x82 => 0x00e9,
0x83 => 0x00e2,
0x84 => 0x00e4,
0x85 => 0x00e0,
0x86 => 0x00e5,
0x87 => 0x00e7,
0x88 => 0x00ea,
0x89 => 0x00eb,
0x8a => 0x00e8,
0x8b => 0x00ef,
0x8c => 0x00ee,
0x8d => 0x00ec,
0x8e => 0x00c4,
0x8f => 0x00c5,
0x90 => 0x00c9,
0x91 => 0x00e6,
0x92 => 0x00c6,
0x93 => 0x00f4,
0x94 => 0x00f6,
0x95 => 0x00f2,
0x96 => 0x00fb,
0x97 => 0x00f9,
0x98 => 0x00ff,
0x99 => 0x00d6,
0x9a => 0x00dc,
0x9b => 0x00a2,
0x9c => 0x00a3,
0x9d => 0x00a5,
0x9e => 0x20a7,
0x9f => 0x0192,
0xa0 => 0x00e1,
0xa1 => 0x00ed,
0xa2 => 0x00f3,
0xa3 => 0x00fa,
0xa4 => 0x00f1,
0xa5 => 0x00d1,
0xa6 => 0x00aa,
0xa7 => 0x00ba,
0xa8 => 0x00bf,
0xa9 => 0x2310,
0xaa => 0x00ac,
0xab => 0x00bd,
0xac => 0x00bc,
0xad => 0x00a1,
0xae => 0x00ab,
0xaf => 0x00bb,
0xb0 => 0x2591,
0xb1 => 0x2592,
0xb2 => 0x2593,
0xb3 => 0x2502,
0xb4 => 0x2524,
0xb5 => 0x2561,
0xb6 => 0x2562,
0xb7 => 0x2556,
0xb8 => 0x2555,
0xb9 => 0x2563,
0xba => 0x2551,
0xbb => 0x2557,
0xbc => 0x255d,
0xbd => 0x255c,
0xbe => 0x255b,
0xbf => 0x2510,
0xc0 => 0x2514,
0xc1 => 0x2534,
0xc2 => 0x252c,
0xc3 => 0x251c,
0xc4 => 0x2500,
0xc5 => 0x253c,
0xc6 => 0x255e,
0xc7 => 0x255f,
0xc8 => 0x255a,
0xc9 => 0x2554,
0xca => 0x2569,
0xcb => 0x2566,
0xcc => 0x2560,
0xcd => 0x2550,
0xce => 0x256c,
0xcf => 0x2567,
0xd0 => 0x2568,
0xd1 => 0x2564,
0xd2 => 0x2565,
0xd3 => 0x2559,
0xd4 => 0x2558,
0xd5 => 0x2552,
0xd6 => 0x2553,
0xd7 => 0x256b,
0xd8 => 0x256a,
0xd9 => 0x2518,
0xda => 0x250c,
0xdb => 0x2588,
0xdc => 0x2584,
0xdd => 0x258c,
0xde => 0x2590,
0xdf => 0x2580,
0xe0 => 0x03b1,
0xe1 => 0x00df,
0xe2 => 0x0393,
0xe3 => 0x03c0,
0xe4 => 0x03a3,
0xe5 => 0x03c3,
0xe6 => 0x00b5,
0xe7 => 0x03c4,
0xe8 => 0x03a6,
0xe9 => 0x0398,
0xea => 0x03a9,
0xeb => 0x03b4,
0xec => 0x221e,
0xed => 0x03c6,
0xee => 0x03b5,
0xef => 0x2229,
0xf0 => 0x2261,
0xf1 => 0x00b1,
0xf2 => 0x2265,
0xf3 => 0x2264,
0xf4 => 0x2320,
0xf5 => 0x2321,
0xf6 => 0x00f7,
0xf7 => 0x2248,
0xf8 => 0x00b0,
0xf9 => 0x2219,
0xfa => 0x00b7,
0xfb => 0x221a,
0xfc => 0x207f,
0xfd => 0x00b2,
0xfe => 0x25a0,
0xff => 0x00a0,
};
::std::char::from_u32(output).unwrap()
}
#[cfg(test)]
mod test {
#[test]
fn to_char_valid() {
for i in 0x00_u32..0x100 {
super::to_char(i as u8);
}
}
#[test]
fn ascii() {
for i in 0x00..0x80 {
assert_eq!(super::to_char(i), i as char);
}
}
#[test]
#[allow(unknown_lints)] // invalid_from_utf8 was added in rust 1.72
#[allow(invalid_from_utf8)]
fn example_slice() {
use super::FromCp437;
let data = b"Cura\x87ao";
assert!(::std::str::from_utf8(data).is_err());
assert_eq!(data.from_cp437(), "Curaçao");
}
#[test]
fn example_vec() {
use super::FromCp437;
let data = vec![0xCC, 0xCD, 0xCD, 0xB9];
assert!(String::from_utf8(data.clone()).is_err());
assert_eq!(&*data.from_cp437(), "╠══╣");
}
}