More utf8 decoder opts

This commit is contained in:
Vitaly Puzrin 2014-04-14 01:57:08 +04:00
parent 554ffaf908
commit c58bfccd1b

View file

@ -112,18 +112,11 @@ exports.buf2string = function (buf, max) {
buf.subarray ? buf.subarray(0, max) : buf.slice(0, max) buf.subarray ? buf.subarray(0, max) : buf.slice(0, max)
)));*/ )));*/
var str = '', i, out, part, char_len; var str, i, out, part, char_len;
var len = max || buf.length; var len = max || buf.length;
var out_len = 0;
var utf16buf;
// Calculate output length // Reserve max possibli length
for (i=0; i < len;) { var utf16buf = new utils.Buf16(len*2);
i += utf8len[buf[i]];
out_len++;
}
utf16buf = new utils.Buf32(out_len);
for (out=0, i=0; i<len; i++) { for (out=0, i=0; i<len; i++) {
part = buf[i]; part = buf[i];
@ -131,7 +124,7 @@ exports.buf2string = function (buf, max) {
// edge case - broken sequence // edge case - broken sequence
if (i + char_len > len) { if (i + char_len > len) {
utf16buf[out++] = 0x7f; utf16buf[out++] = 0xfffd;
break; break;
} }
switch (char_len) { switch (char_len) {
@ -145,17 +138,21 @@ exports.buf2string = function (buf, max) {
utf16buf[out++] = ((part & 0x0f) << 12) | ((buf[++i] & 0x3f) << 6) | (buf[++i] & 0x3f); utf16buf[out++] = ((part & 0x0f) << 12) | ((buf[++i] & 0x3f) << 6) | (buf[++i] & 0x3f);
break; break;
case 4: case 4:
utf16buf[out++] = ((part & 0x07) << 18) | ((buf[++i] & 0x3f) << 12) | ((buf[++i] & 0x3f) << 6) + (buf[++i] & 0x3f); // surrogate pair
part = ((part & 0x07) << 18) | ((buf[++i] & 0x3f) << 12) | ((buf[++i] & 0x3f) << 6) + (buf[++i] & 0x3f);
part -= 0x10000;
utf16buf[out++] = 0xd800 | ((part >> 10) & 0x3ff);
utf16buf[out++] = 0xdc00 | (part & 0x3ff);
break; break;
// 5 & 6 bytes uticodes not supported in UTF16 (JS), so fill with dummy symbol // 5 & 6 bytes uticodes not supported in UTF16 (JS), so fill with dummy symbol
case 5: case 5:
i += 4; i += 4;
utf16buf[out++] = 0x7f; utf16buf[out++] = 0xfffd;
//utf16buf[out++] = (part - 248 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128; //utf16buf[out++] = (part - 248 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128;
break; break;
case 6: case 6:
i += 5; i += 5;
utf16buf[out++] = 0x7f; utf16buf[out++] = 0xfffd;
// (part - 252 << 32) is not possible in ECMAScript! So...: // (part - 252 << 32) is not possible in ECMAScript! So...:
//utf16buf[out++] = (part - 252) * 1073741824 + (buf[++i] - 128 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128; //utf16buf[out++] = (part - 252) * 1073741824 + (buf[++i] - 128 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128;
break; break;
@ -163,11 +160,12 @@ exports.buf2string = function (buf, max) {
} }
if (STR_APPLY_OK) { if (STR_APPLY_OK) {
return String.fromCharCode.apply(null, utf16buf); return String.fromCharCode.apply(null, utils.shrinkBuf(utf16buf, out));
} }
// Fallback, when String.fromCharCode.apply not available // Fallback, when String.fromCharCode.apply not available
for (i=0, len=utf16buf.length; i<len; i++) { str = '';
for (i=0, len=out; i<len; i++) {
str += String.fromCharCode(utf16buf[i]); str += String.fromCharCode(utf16buf[i]);
} }
return str; return str;