From c58bfccd1b87d00c7eaacc829bdeb6b46cd8acb0 Mon Sep 17 00:00:00 2001 From: Vitaly Puzrin Date: Mon, 14 Apr 2014 01:57:08 +0400 Subject: [PATCH] More utf8 decoder opts --- lib/utils/strings.js | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/lib/utils/strings.js b/lib/utils/strings.js index 317dc72..656fef7 100644 --- a/lib/utils/strings.js +++ b/lib/utils/strings.js @@ -112,18 +112,11 @@ exports.buf2string = function (buf, max) { buf.subarray ? buf.subarray(0, max) : buf.slice(0, max) )));*/ - var str = '', i, out, part, char_len; + var str, i, out, part, char_len; var len = max || buf.length; - var out_len = 0; - var utf16buf; - // Calculate output length - for (i=0; i < len;) { - i += utf8len[buf[i]]; - out_len++; - } - - utf16buf = new utils.Buf32(out_len); + // Reserve max possibli length + var utf16buf = new utils.Buf16(len*2); for (out=0, i=0; i len) { - utf16buf[out++] = 0x7f; + utf16buf[out++] = 0xfffd; break; } switch (char_len) { @@ -145,17 +138,21 @@ exports.buf2string = function (buf, max) { utf16buf[out++] = ((part & 0x0f) << 12) | ((buf[++i] & 0x3f) << 6) | (buf[++i] & 0x3f); break; case 4: - utf16buf[out++] = ((part & 0x07) << 18) | ((buf[++i] & 0x3f) << 12) | ((buf[++i] & 0x3f) << 6) + (buf[++i] & 0x3f); + // surrogate pair + part = ((part & 0x07) << 18) | ((buf[++i] & 0x3f) << 12) | ((buf[++i] & 0x3f) << 6) + (buf[++i] & 0x3f); + part -= 0x10000; + utf16buf[out++] = 0xd800 | ((part >> 10) & 0x3ff); + utf16buf[out++] = 0xdc00 | (part & 0x3ff); break; // 5 & 6 bytes uticodes not supported in UTF16 (JS), so fill with dummy symbol case 5: i += 4; - utf16buf[out++] = 0x7f; + utf16buf[out++] = 0xfffd; //utf16buf[out++] = (part - 248 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128; break; case 6: i += 5; - utf16buf[out++] = 0x7f; + utf16buf[out++] = 0xfffd; // (part - 252 << 32) is not possible in ECMAScript! So...: //utf16buf[out++] = (part - 252) * 1073741824 + (buf[++i] - 128 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128; break; @@ -163,11 +160,12 @@ exports.buf2string = function (buf, max) { } if (STR_APPLY_OK) { - return String.fromCharCode.apply(null, utf16buf); + return String.fromCharCode.apply(null, utils.shrinkBuf(utf16buf, out)); } // Fallback, when String.fromCharCode.apply not available - for (i=0, len=utf16buf.length; i