mirror of
https://github.com/0x5eal/rbxts-pako.git
synced 2025-04-04 19:01:00 +01:00
Moved utf8 border detect to utils, fixed bugs & added tests
This commit is contained in:
parent
4d3acc2961
commit
2cd0309b62
3 changed files with 58 additions and 20 deletions
|
@ -211,20 +211,7 @@ Inflate.prototype.push = function(data, mode) {
|
|||
|
||||
if (this.options.to === 'string') {
|
||||
|
||||
// realign size to utf8 char border & move tail to start of buffer
|
||||
next_out_utf8_index = strm.next_out_index - 6;
|
||||
if (next_out_utf8_index < 0) { next_out_utf8_index = 0; }
|
||||
|
||||
tail = strings.utf8tail(strm.next_out[next_out_utf8_index]);
|
||||
while (next_out_utf8_index + tail <= strm.next_out_index) {
|
||||
next_out_utf8_index += tail;
|
||||
tail = strings.utf8tail(strm.next_out[next_out_utf8_index]);
|
||||
}
|
||||
|
||||
// shit happened - broken tail. then take it all.
|
||||
if (next_out_utf8_index === 0) {
|
||||
next_out_utf8_index = strm.next_out_index;
|
||||
}
|
||||
next_out_utf8_index = strings.utf8border(strm.next_out, strm.next_out_index);
|
||||
|
||||
tail = strm.next_out_index - next_out_utf8_index;
|
||||
utf8str = strings.buf2string(strm.next_out, next_out_utf8_index);
|
||||
|
|
|
@ -172,7 +172,29 @@ exports.buf2string = function (buf, max) {
|
|||
};
|
||||
|
||||
|
||||
// calculate tail size of utf8 char by current byte value
|
||||
exports.utf8tail = function(code) {
|
||||
return _utf8len[code];
|
||||
// Calculate max possible position in utf8 buffer,
|
||||
// that will not break sequence. If that's not possible
|
||||
// - (very small limits) return max size as is.
|
||||
//
|
||||
// buf[] - utf8 bytes array
|
||||
// max - length limit (mandatory);
|
||||
exports.utf8border = function(buf, max) {
|
||||
var pos;
|
||||
|
||||
max = max || buf.length;
|
||||
if (max > buf.length) { max = buf.length; }
|
||||
|
||||
// go back from last position, until start of sequence found
|
||||
pos = max-1;
|
||||
while (pos >= 0 && (buf[pos] & 0xC0) === 0x80) { pos--; }
|
||||
|
||||
// Fuckup - very small and broken sequence,
|
||||
// return max, because we should return something anyway.
|
||||
if (pos < 0) { return max; }
|
||||
|
||||
// If we came to start of buffer - that means vuffer is too small,
|
||||
// return max too.
|
||||
if (pos === 0) { return max; }
|
||||
|
||||
return (pos + _utf8len[buf[pos]] > max) ? pos : max;
|
||||
};
|
||||
|
|
|
@ -40,11 +40,40 @@ function a2utf16(arr) {
|
|||
|
||||
describe('Encode/Decode', function () {
|
||||
|
||||
var utf16sample = a2utf16([0x1f3b5, 'abcd', 0x266a, 0x35, 0xe800, 0x10ffff, 0x0fffff]);
|
||||
// Create sample, that contains all types of utf8 (1-4byte) after conversion
|
||||
var utf16sample = a2utf16([0x1f3b5, 'a', 0x266a, 0x35, 0xe800, 0x10ffff, 0x0fffff]);
|
||||
// use node Buffer internal conversion as "done right"
|
||||
var utf8sample = new Uint8Array(new Buffer(utf16sample));
|
||||
|
||||
console.log(utf16sample, utf16sample.length);
|
||||
console.log(new Buffer(utf16sample));
|
||||
it('utf-8 border detect', function () {
|
||||
var ub = strings.utf8border;
|
||||
assert.equal(ub(utf8sample, 1), 1);
|
||||
assert.equal(ub(utf8sample, 2), 2);
|
||||
assert.equal(ub(utf8sample, 3), 3);
|
||||
assert.equal(ub(utf8sample, 4), 4);
|
||||
|
||||
assert.equal(ub(utf8sample, 5), 5);
|
||||
|
||||
assert.equal(ub(utf8sample, 6), 5);
|
||||
assert.equal(ub(utf8sample, 7), 5);
|
||||
assert.equal(ub(utf8sample, 8), 8);
|
||||
|
||||
assert.equal(ub(utf8sample, 9), 9);
|
||||
|
||||
assert.equal(ub(utf8sample, 10), 9);
|
||||
assert.equal(ub(utf8sample, 11), 9);
|
||||
assert.equal(ub(utf8sample, 12), 12);
|
||||
|
||||
assert.equal(ub(utf8sample, 13), 12);
|
||||
assert.equal(ub(utf8sample, 14), 12);
|
||||
assert.equal(ub(utf8sample, 15), 12);
|
||||
assert.equal(ub(utf8sample, 16), 16);
|
||||
|
||||
assert.equal(ub(utf8sample, 17), 16);
|
||||
assert.equal(ub(utf8sample, 18), 16);
|
||||
assert.equal(ub(utf8sample, 19), 16);
|
||||
assert.equal(ub(utf8sample, 20), 20);
|
||||
});
|
||||
|
||||
it('Encode string to utf8 buf', function () {
|
||||
assert.ok(cmp(
|
||||
|
|
Loading…
Add table
Reference in a new issue