More utf8 decoder opts

This commit is contained in:
Vitaly Puzrin 2014-04-14 01:57:08 +04:00
parent 554ffaf908
commit c58bfccd1b

View file

@ -112,18 +112,11 @@ exports.buf2string = function (buf, max) {
buf.subarray ? buf.subarray(0, max) : buf.slice(0, max)
)));*/
var str = '', i, out, part, char_len;
var str, i, out, part, char_len;
var len = max || buf.length;
var out_len = 0;
var utf16buf;
// Calculate output length
for (i=0; i < len;) {
i += utf8len[buf[i]];
out_len++;
}
utf16buf = new utils.Buf32(out_len);
// Reserve max possibli length
var utf16buf = new utils.Buf16(len*2);
for (out=0, i=0; i<len; i++) {
part = buf[i];
@ -131,7 +124,7 @@ exports.buf2string = function (buf, max) {
// edge case - broken sequence
if (i + char_len > len) {
utf16buf[out++] = 0x7f;
utf16buf[out++] = 0xfffd;
break;
}
switch (char_len) {
@ -145,17 +138,21 @@ exports.buf2string = function (buf, max) {
utf16buf[out++] = ((part & 0x0f) << 12) | ((buf[++i] & 0x3f) << 6) | (buf[++i] & 0x3f);
break;
case 4:
utf16buf[out++] = ((part & 0x07) << 18) | ((buf[++i] & 0x3f) << 12) | ((buf[++i] & 0x3f) << 6) + (buf[++i] & 0x3f);
// surrogate pair
part = ((part & 0x07) << 18) | ((buf[++i] & 0x3f) << 12) | ((buf[++i] & 0x3f) << 6) + (buf[++i] & 0x3f);
part -= 0x10000;
utf16buf[out++] = 0xd800 | ((part >> 10) & 0x3ff);
utf16buf[out++] = 0xdc00 | (part & 0x3ff);
break;
// 5 & 6 bytes uticodes not supported in UTF16 (JS), so fill with dummy symbol
case 5:
i += 4;
utf16buf[out++] = 0x7f;
utf16buf[out++] = 0xfffd;
//utf16buf[out++] = (part - 248 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128;
break;
case 6:
i += 5;
utf16buf[out++] = 0x7f;
utf16buf[out++] = 0xfffd;
// (part - 252 << 32) is not possible in ECMAScript! So...:
//utf16buf[out++] = (part - 252) * 1073741824 + (buf[++i] - 128 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128;
break;
@ -163,11 +160,12 @@ exports.buf2string = function (buf, max) {
}
if (STR_APPLY_OK) {
return String.fromCharCode.apply(null, utf16buf);
return String.fromCharCode.apply(null, utils.shrinkBuf(utf16buf, out));
}
// Fallback, when String.fromCharCode.apply not available
for (i=0, len=utf16buf.length; i<len; i++) {
str = '';
for (i=0, len=out; i<len; i++) {
str += String.fromCharCode(utf16buf[i]);
}
return str;