mirror of
https://github.com/0x5eal/rbxts-pako.git
synced 2025-04-05 11:20:58 +01:00
More utf8 decoder opts
This commit is contained in:
parent
554ffaf908
commit
c58bfccd1b
1 changed files with 14 additions and 16 deletions
|
@ -112,18 +112,11 @@ exports.buf2string = function (buf, max) {
|
|||
buf.subarray ? buf.subarray(0, max) : buf.slice(0, max)
|
||||
)));*/
|
||||
|
||||
var str = '', i, out, part, char_len;
|
||||
var str, i, out, part, char_len;
|
||||
var len = max || buf.length;
|
||||
var out_len = 0;
|
||||
var utf16buf;
|
||||
|
||||
// Calculate output length
|
||||
for (i=0; i < len;) {
|
||||
i += utf8len[buf[i]];
|
||||
out_len++;
|
||||
}
|
||||
|
||||
utf16buf = new utils.Buf32(out_len);
|
||||
// Reserve max possibli length
|
||||
var utf16buf = new utils.Buf16(len*2);
|
||||
|
||||
for (out=0, i=0; i<len; i++) {
|
||||
part = buf[i];
|
||||
|
@ -131,7 +124,7 @@ exports.buf2string = function (buf, max) {
|
|||
|
||||
// edge case - broken sequence
|
||||
if (i + char_len > len) {
|
||||
utf16buf[out++] = 0x7f;
|
||||
utf16buf[out++] = 0xfffd;
|
||||
break;
|
||||
}
|
||||
switch (char_len) {
|
||||
|
@ -145,17 +138,21 @@ exports.buf2string = function (buf, max) {
|
|||
utf16buf[out++] = ((part & 0x0f) << 12) | ((buf[++i] & 0x3f) << 6) | (buf[++i] & 0x3f);
|
||||
break;
|
||||
case 4:
|
||||
utf16buf[out++] = ((part & 0x07) << 18) | ((buf[++i] & 0x3f) << 12) | ((buf[++i] & 0x3f) << 6) + (buf[++i] & 0x3f);
|
||||
// surrogate pair
|
||||
part = ((part & 0x07) << 18) | ((buf[++i] & 0x3f) << 12) | ((buf[++i] & 0x3f) << 6) + (buf[++i] & 0x3f);
|
||||
part -= 0x10000;
|
||||
utf16buf[out++] = 0xd800 | ((part >> 10) & 0x3ff);
|
||||
utf16buf[out++] = 0xdc00 | (part & 0x3ff);
|
||||
break;
|
||||
// 5 & 6 bytes uticodes not supported in UTF16 (JS), so fill with dummy symbol
|
||||
case 5:
|
||||
i += 4;
|
||||
utf16buf[out++] = 0x7f;
|
||||
utf16buf[out++] = 0xfffd;
|
||||
//utf16buf[out++] = (part - 248 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128;
|
||||
break;
|
||||
case 6:
|
||||
i += 5;
|
||||
utf16buf[out++] = 0x7f;
|
||||
utf16buf[out++] = 0xfffd;
|
||||
// (part - 252 << 32) is not possible in ECMAScript! So...:
|
||||
//utf16buf[out++] = (part - 252) * 1073741824 + (buf[++i] - 128 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128;
|
||||
break;
|
||||
|
@ -163,11 +160,12 @@ exports.buf2string = function (buf, max) {
|
|||
}
|
||||
|
||||
if (STR_APPLY_OK) {
|
||||
return String.fromCharCode.apply(null, utf16buf);
|
||||
return String.fromCharCode.apply(null, utils.shrinkBuf(utf16buf, out));
|
||||
}
|
||||
|
||||
// Fallback, when String.fromCharCode.apply not available
|
||||
for (i=0, len=utf16buf.length; i<len; i++) {
|
||||
str = '';
|
||||
for (i=0, len=out; i<len; i++) {
|
||||
str += String.fromCharCode(utf16buf[i]);
|
||||
}
|
||||
return str;
|
||||
|
|
Loading…
Add table
Reference in a new issue