diff --git a/CHANGELOG.md b/CHANGELOG.md index 385db56..289a8f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [2.0.0] - WIP ### Changed - Removed binary strings and `Array` support. -- Removed fallbacks for unsupported TypedArray methods (`.set()`, `.subarray()`). +- Removed fallbacks for TypedArray methods (`.set()`, `.subarray()`). +- Rewritten top-level wrappers. - Removed support of `Inflate` & `Deflate` instance create without `new`. -- Removed `Z_SYNC_FLUSH` related code from wrappers (buggy and no tests). +- `Inflate.push()` no longer needs second param (end is auto-detected). +- Increased default inflate chunk size to 64K. - Switched to es6. Legacy es5 builds available in `/dist`. - Structure of `/dist` folder changed. - Upgraded build tools to modern ones. + ## [1.0.11] - 2020-01-29 ### Fixed - Fix tests in node.js v12+, #179. diff --git a/README.md b/README.md index 7c313f8..8f8d435 100644 --- a/README.md +++ b/README.md @@ -58,33 +58,32 @@ For deflate level 6 results can be considered as correct. __Install:__ -node.js: - ``` npm install pako ``` -Example & API -------------- +Examples / API +-------------- Full docs - http://nodeca.github.io/pako/ ```javascript -var pako = require('pako'); +const pako = require('pako'); // Deflate // -var input = new Uint8Array(); +const input = new Uint8Array(); //... fill input data here -var output = pako.deflate(input); +const output = pako.deflate(input); // Inflate (simple wrapper can throw exception on broken stream) // -var compressed = new Uint8Array(); +const compressed = new Uint8Array(); //... fill data to uncompress here try { - var result = pako.inflate(compressed); + const result = pako.inflate(compressed); + // ... continue processing } catch (err) { console.log(err); } @@ -93,37 +92,48 @@ try { // Alternate interface for chunking & without exceptions // -var inflator = new pako.Inflate(); +const deflator = new pako.Deflate(); -inflator.push(chunk1, false); -inflator.push(chunk2, false); +deflator.push(chunk1, false); +deflator.push(chunk2), false; ... -inflator.push(chunkN, true); // true -> last chunk +deflator.push(chunk_last, true); // `true` says this chunk is last + +if (deflator.err) { + console.log(deflator.msg); +} + +const output = deflator.result; + + +const inflator = new pako.Inflate(); + +inflator.push(chunk1); +inflator.push(chunk2); +... +inflator.push(chunk_last); // no second param because end is auto-detected if (inflator.err) { console.log(inflator.msg); } -var output = inflator.result; - +const output = inflator.result; ``` Sometime you can wish to work with strings. For example, to send -big objects as json to server. Pako detects input data type. You can -force output to be string with option `{ to: 'string' }`. +stringified objects to server. Pako's deflate detects input data type, and +automatically recode strings to utf-8 prior to compress. Inflate has special +option, to say compressed data has utf-8 encoding and should be recoded to +javascript's utf-16. ```javascript -var pako = require('pako'); +const pako = require('pako'); -var test = { my: 'super', puper: [456, 567], awesome: 'pako' }; +const test = { my: 'super', puper: [456, 567], awesome: 'pako' }; -var binaryString = pako.deflate(JSON.stringify(test), { to: 'string' }); +const compressed = pako.deflate(JSON.stringify(test)); -// -// Here you can do base64 encode, make xhr requests and so on. -// - -var restored = JSON.parse(pako.inflate(binaryString, { to: 'string' })); +const restored = JSON.parse(pako.inflate(compressed, { to: 'string' })); ``` @@ -137,7 +147,7 @@ Pako does not contain some specific zlib functions: - __inflate__ - methods `inflateCopy`, `inflateMark`, `inflatePrime`, `inflateGetDictionary`, `inflateSync`, `inflateSyncPoint`, `inflateUndermine`. - High level inflate/deflate wrappers (classes) may not support some flush - modes. Those should work: Z_NO_FLUSH, Z_FINISH, Z_SYNC_FLUSH. + modes. pako for enterprise diff --git a/lib/deflate.js b/lib/deflate.js index 2ba7869..2cea4bc 100644 --- a/lib/deflate.js +++ b/lib/deflate.js @@ -13,7 +13,7 @@ const toString = Object.prototype.toString; /* ===========================================================================*/ const { - Z_NO_FLUSH, Z_FINISH, + Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FULL_FLUSH, Z_FINISH, Z_OK, Z_STREAM_END, Z_DEFAULT_COMPRESSION, Z_DEFAULT_STRATEGY, @@ -176,22 +176,19 @@ function Deflate(options) { } /** - * Deflate#push(data[, mode]) -> Boolean + * Deflate#push(data[, flush_mode]) -> Boolean * - data (Uint8Array|ArrayBuffer|String): input data. Strings will be * converted to utf8 byte sequence. - * - mode (Number|Boolean): 0..6 for corresponding Z_NO_FLUSH..Z_TREE modes. + * - flush_mode (Number|Boolean): 0..6 for corresponding Z_NO_FLUSH..Z_TREE modes. * See constants. Skipped or `false` means Z_NO_FLUSH, `true` means Z_FINISH. * * Sends input data to deflate pipe, generating [[Deflate#onData]] calls with - * new compressed chunks. Returns `true` on success. The last data block must have - * mode Z_FINISH (or `true`). That will flush internal pending buffers and call - * [[Deflate#onEnd]]. + * new compressed chunks. Returns `true` on success. The last data block must + * have `flush_mode` Z_FINISH (or `true`). That will flush internal pending + * buffers and call [[Deflate#onEnd]]. * * On fail call [[Deflate#onEnd]] with error code and return false. * - * Note. Don't skip last param and always use the same type in your code - * (boolean or number). That will improve JS speed. - * * ##### Example * * ```javascript @@ -200,14 +197,15 @@ function Deflate(options) { * push(chunk, true); // push last chunk * ``` **/ -Deflate.prototype.push = function (data, mode) { +Deflate.prototype.push = function (data, flush_mode) { const strm = this.strm; const chunkSize = this.options.chunkSize; - let status; + let status, _flush_mode; if (this.ended) { return false; } - const _mode = (mode === ~~mode) ? mode : ((mode === true) ? Z_FINISH : Z_NO_FLUSH); + if (flush_mode === ~~flush_mode) _flush_mode = flush_mode; + else _flush_mode = flush_mode === true ? Z_FINISH : Z_NO_FLUSH; // Convert data if needed if (typeof data === 'string') { @@ -222,30 +220,47 @@ Deflate.prototype.push = function (data, mode) { strm.next_in = 0; strm.avail_in = strm.input.length; - do { + for (;;) { if (strm.avail_out === 0) { strm.output = new Uint8Array(chunkSize); strm.next_out = 0; strm.avail_out = chunkSize; } - status = zlib_deflate.deflate(strm, _mode); /* no bad return value */ - if (status !== Z_STREAM_END && status !== Z_OK) { + // Make sure avail_out > 6 to avoid repeating markers + if ((_flush_mode === Z_SYNC_FLUSH || _flush_mode === Z_FULL_FLUSH) && strm.avail_out <= 6) { + this.onData(strm.output.subarray(0, strm.next_out)); + strm.avail_out = 0; + continue; + } + + status = zlib_deflate.deflate(strm, _flush_mode); + + // Ended => flush and finish + if (status === Z_STREAM_END) { + if (strm.next_out > 0) { + this.onData(strm.output.subarray(0, strm.next_out)); + } + status = zlib_deflate.deflateEnd(this.strm); this.onEnd(status); this.ended = true; - return false; + return status === Z_OK; } - if (strm.avail_out === 0 || (strm.avail_in === 0 && _mode === Z_FINISH)) { - this.onData(strm.output.length === strm.next_out ? strm.output : strm.output.subarray(0, strm.next_out)); - } - } while ((strm.avail_in > 0 || strm.avail_out === 0) && status !== Z_STREAM_END); - // Finalize on the last chunk. - if (_mode === Z_FINISH) { - status = zlib_deflate.deflateEnd(this.strm); - this.onEnd(status); - this.ended = true; - return status === Z_OK; + // Flush if out buffer full + if (strm.avail_out === 0) { + this.onData(strm.output); + continue; + } + + // Flush if requested and has data + if (_flush_mode > 0 && strm.next_out > 0) { + this.onData(strm.output.subarray(0, strm.next_out)); + strm.avail_out = 0; + continue; + } + + if (strm.avail_in === 0) break; } return true; diff --git a/lib/inflate.js b/lib/inflate.js index 9547fd1..cf693ce 100644 --- a/lib/inflate.js +++ b/lib/inflate.js @@ -15,7 +15,7 @@ const toString = Object.prototype.toString; const { Z_NO_FLUSH, Z_FINISH, - Z_OK, Z_STREAM_END, Z_NEED_DICT, Z_BUF_ERROR + Z_OK, Z_STREAM_END, Z_NEED_DICT, Z_STREAM_ERROR, Z_DATA_ERROR, Z_MEM_ERROR } = require('./zlib/constants'); /* ===========================================================================*/ @@ -100,7 +100,7 @@ const { **/ function Inflate(options) { this.options = utils.assign({ - chunkSize: 16384, + chunkSize: 1024 * 64, windowBits: 15, to: '' }, options || {}); @@ -169,21 +169,22 @@ function Inflate(options) { } /** - * Inflate#push(data[, mode]) -> Boolean + * Inflate#push(data[, flush_mode]) -> Boolean * - data (Uint8Array|ArrayBuffer): input data - * - mode (Number|Boolean): 0..6 for corresponding Z_NO_FLUSH..Z_TREE modes. - * See constants. Skipped or `false` means Z_NO_FLUSH, `true` means Z_FINISH. + * - flush_mode (Number|Boolean): 0..6 for corresponding Z_NO_FLUSH..Z_TREE + * flush modes. See constants. Skipped or `false` means Z_NO_FLUSH, + * `true` means Z_FINISH. * * Sends input data to inflate pipe, generating [[Inflate#onData]] calls with - * new output chunks. Returns `true` on success. The last data block must have - * mode Z_FINISH (or `true`). That will flush internal pending buffers and call - * [[Inflate#onEnd]]. + * new output chunks. Returns `true` on success. If end of stream detected, + * [[Inflate#onEnd]] will be called. + * + * `flush_mode` is not needed for normal operation, because end of stream + * detected automatically. You may try to use it for advanced things, but + * this functionality was not tested. * * On fail call [[Inflate#onEnd]] with error code and return false. * - * Note. Don't skip last param and always use the same type in your code - * (boolean or number). That will improve JS speed. - * * ##### Example * * ```javascript @@ -192,19 +193,16 @@ function Inflate(options) { * push(chunk, true); // push last chunk * ``` **/ -Inflate.prototype.push = function (data, mode) { +Inflate.prototype.push = function (data, flush_mode) { const strm = this.strm; const chunkSize = this.options.chunkSize; const dictionary = this.options.dictionary; - let status, _mode; - let next_out_utf8, tail, utf8str; + let status, _flush_mode, last_avail_out; - // Flag to properly process Z_BUF_ERROR on testing inflate call - // when we check that all output data was flushed. - let allowBufError = false; + if (this.ended) return false; - if (this.ended) { return false; } - _mode = (mode === ~~mode) ? mode : ((mode === true) ? Z_FINISH : Z_NO_FLUSH); + if (flush_mode === ~~flush_mode) _flush_mode = flush_mode; + else _flush_mode = flush_mode === true ? Z_FINISH : Z_NO_FLUSH; // Convert data if needed if (toString.call(data) === '[object ArrayBuffer]') { @@ -216,44 +214,64 @@ Inflate.prototype.push = function (data, mode) { strm.next_in = 0; strm.avail_in = strm.input.length; - do { + for (;;) { if (strm.avail_out === 0) { strm.output = new Uint8Array(chunkSize); strm.next_out = 0; strm.avail_out = chunkSize; } - status = zlib_inflate.inflate(strm, Z_NO_FLUSH); /* no bad return value */ + status = zlib_inflate.inflate(strm, _flush_mode); if (status === Z_NEED_DICT && dictionary) { - status = zlib_inflate.inflateSetDictionary(this.strm, dictionary); + status = zlib_inflate.inflateSetDictionary(strm, dictionary); + + if (status === Z_OK) { + status = zlib_inflate.inflate(strm, _flush_mode); + } else if (status === Z_DATA_ERROR) { + // Replace code with more verbose + status = Z_NEED_DICT; + } } - if (status === Z_BUF_ERROR && allowBufError === true) { - status = Z_OK; - allowBufError = false; + // Skip snyc markers if more data follows and not raw mode + while (strm.avail_in > 0 && + status === Z_STREAM_END && + strm.state.wrap > 0 && + data[strm.next_in] !== 0) + { + zlib_inflate.inflateReset(strm); + status = zlib_inflate.inflate(strm, _flush_mode); } - if (status !== Z_STREAM_END && status !== Z_OK) { - this.onEnd(status); - this.ended = true; - return false; + switch (status) { + case Z_STREAM_ERROR: + case Z_DATA_ERROR: + case Z_NEED_DICT: + case Z_MEM_ERROR: + this.onEnd(status); + this.ended = true; + return false; } + // Remember real `avail_out` value, because we may patch out buffer content + // to align utf8 strings boundaries. + last_avail_out = strm.avail_out; + if (strm.next_out) { - if (strm.avail_out === 0 || status === Z_STREAM_END || (strm.avail_in === 0 && _mode === Z_FINISH)) { + if (strm.avail_out === 0 || status === Z_STREAM_END) { if (this.options.to === 'string') { - next_out_utf8 = strings.utf8border(strm.output, strm.next_out); + let next_out_utf8 = strings.utf8border(strm.output, strm.next_out); - tail = strm.next_out - next_out_utf8; - utf8str = strings.buf2string(strm.output, next_out_utf8); + let tail = strm.next_out - next_out_utf8; + let utf8str = strings.buf2string(strm.output, next_out_utf8); - // move tail + // move tail & realign counters strm.next_out = tail; strm.avail_out = chunkSize - tail; - if (tail) { strm.output.set(strm.output.subarray(next_out_utf8, next_out_utf8 + tail), 0); } + if (tail) strm.output.set(strm.output.subarray(next_out_utf8, next_out_utf8 + tail), 0); this.onData(utf8str); @@ -263,29 +281,18 @@ Inflate.prototype.push = function (data, mode) { } } - // When no more input data, we should check that internal inflate buffers - // are flushed. The only way to do it when avail_out = 0 - run one more - // inflate pass. But if output data not exists, inflate return Z_BUF_ERROR. - // Here we set flag to process this error properly. - // - // NOTE. Deflate does not return error in this case and does not needs such - // logic. - if (strm.avail_in === 0 && strm.avail_out === 0) { - allowBufError = true; + // Must repeat iteration if out buffer is full + if (status === Z_OK && last_avail_out === 0) continue; + + // Finalize if end of stream reached. + if (status === Z_STREAM_END) { + status = zlib_inflate.inflateEnd(this.strm); + this.onEnd(status); + this.ended = true; + return true; } - } while ((strm.avail_in > 0 || strm.avail_out === 0) && status !== Z_STREAM_END); - - if (status === Z_STREAM_END) { - _mode = Z_FINISH; - } - - // Finalize on the last chunk. - if (_mode === Z_FINISH) { - status = zlib_inflate.inflateEnd(this.strm); - this.onEnd(status); - this.ended = true; - return status === Z_OK; + if (strm.avail_in === 0) break; } return true; @@ -371,10 +378,10 @@ Inflate.prototype.onEnd = function (status) { function inflate(input, options) { const inflator = new Inflate(options); - inflator.push(input, true); + inflator.push(input); // That will never happens, if you don't cheat with options :) - if (inflator.err) { throw inflator.msg || msg[inflator.err]; } + if (inflator.err) throw inflator.msg || msg[inflator.err]; return inflator.result; } diff --git a/lib/zlib/constants.js b/lib/zlib/constants.js index 569b872..b85cc01 100644 --- a/lib/zlib/constants.js +++ b/lib/zlib/constants.js @@ -39,7 +39,7 @@ module.exports = { Z_ERRNO: -1, Z_STREAM_ERROR: -2, Z_DATA_ERROR: -3, - //Z_MEM_ERROR: -4, + Z_MEM_ERROR: -4, Z_BUF_ERROR: -5, //Z_VERSION_ERROR: -6, diff --git a/test/chunks.js b/test/chunks.js index c8acaa4..9026eff 100644 --- a/test/chunks.js +++ b/test/chunks.js @@ -39,7 +39,7 @@ function testChunk(buf, expected, packer, chunkSize) { } //expected count of onData calls. 16384 output chunk size - expFlushCount = Math.ceil(packer.result.length / 16384); + expFlushCount = Math.ceil(packer.result.length / packer.options.chunkSize); assert(!packer.err, 'Packer error: ' + packer.err); assert.deepStrictEqual(packer.result, expected); @@ -92,7 +92,6 @@ describe('Dummy push (force end)', () => { const inflator = new pako.Inflate(); inflator.push(data); - inflator.push([], true); assert.deepStrictEqual(inflator.result, pako.inflate(data)); }); @@ -119,7 +118,7 @@ describe('Edge condition', () => { assert.ok(!inflator.err, 'Inflate failed with status ' + inflator.err); } - inflator.push(new Uint8Array(0), true); + inflator.push(new Uint8Array(0)); assert.ok(!inflator.err, 'Inflate failed with status ' + inflator.err); assert.deepStrictEqual(data, inflator.result); diff --git a/test/fixtures/gzip-joined-bgzip.gz b/test/fixtures/gzip-joined-bgzip.gz new file mode 100644 index 0000000..fc9f652 Binary files /dev/null and b/test/fixtures/gzip-joined-bgzip.gz differ diff --git a/test/gzip_specials.js b/test/gzip_specials.js index 4dbf8e8..db2bd54 100644 --- a/test/gzip_specials.js +++ b/test/gzip_specials.js @@ -4,8 +4,10 @@ const fs = require('fs'); const path = require('path'); const assert = require('assert'); +const zlib = require('zlib'); const pako = require('../index'); +const { Z_SYNC_FLUSH } = require('../lib/zlib/constants'); function a2s(array) { @@ -18,7 +20,7 @@ describe('Gzip special cases', () => { it('Read custom headers', () => { const data = fs.readFileSync(path.join(__dirname, 'fixtures/gzip-headers.gz')); const inflator = new pako.Inflate(); - inflator.push(data, true); + inflator.push(data); assert.strictEqual(inflator.header.name, 'test name'); assert.strictEqual(inflator.header.comment, 'test comment'); @@ -42,7 +44,7 @@ describe('Gzip special cases', () => { deflator.push(data, true); const inflator = new pako.Inflate({ to: 'string' }); - inflator.push(deflator.result, true); + inflator.push(deflator.result); assert.strictEqual(inflator.err, 0); assert.strictEqual(inflator.result, data); @@ -55,26 +57,45 @@ describe('Gzip special cases', () => { assert.deepStrictEqual(header.extra, new Uint8Array([ 4, 5, 6 ])); }); - it('Read stream with SYNC marks', () => { - let inflator, strm, _in, len, pos = 0, i = 0; + it('Read stream with SYNC marks (multistream source, file 1)', () => { const data = fs.readFileSync(path.join(__dirname, 'fixtures/gzip-joined.gz')); - do { - len = data.length - pos; - _in = new Uint8Array(len); - _in.set(data.subarray(pos, pos + len), 0); + assert.deepStrictEqual( + pako.ungzip(data), + new Uint8Array(zlib.gunzipSync(data)) + ); + }); - inflator = new pako.Inflate(); - strm = inflator.strm; - inflator.push(_in, true); + it.skip('Read stream with SYNC marks (multistream source, file 2)', () => { + const data = fs.readFileSync(path.join(__dirname, 'fixtures/gzip-joined-bgzip.gz')); - assert(!inflator.err, inflator.msg); + assert.deepStrictEqual( + // Currently fails with this chunk size + pako.ungzip(data, { chunkSize: 16384 }), + new Uint8Array(zlib.gunzipSync(data)) + ); + }); - pos += strm.next_in; - i++; - } while (strm.avail_in); + it('Write with Z_SYNC_FLUSH', () => { + const deflator = new pako.Deflate({ gzip: true }); - assert(i === 2, 'invalid blobs count'); + let count = 0; + + deflator.onData = function (chunk) { + this.chunks.push(chunk); + count++; + }; + + deflator.push('12345', Z_SYNC_FLUSH); + deflator.push('67890', true); + + const flushed = deflator.result; + const normal = pako.gzip('1234567890'); + + assert.strictEqual(count, 2); + + assert.deepStrictEqual(pako.ungzip(flushed), pako.ungzip(normal)); + assert.ok(flushed.length > normal.length); }); }); diff --git a/test/inflate.js b/test/inflate.js index 6b8d44a..0ede223 100644 --- a/test/inflate.js +++ b/test/inflate.js @@ -171,7 +171,7 @@ describe('Inflate with dictionary', () => { assert.throws(function () { pako.inflate(zCompressed, { dictionary: 'world' }); - }, /data error/); + }, /need dictionary/); }); it('trivial dictionary', () => { diff --git a/test/inflate_cover_ported.js b/test/inflate_cover_ported.js index ae82eee..ef844af 100644 --- a/test/inflate_cover_ported.js +++ b/test/inflate_cover_ported.js @@ -28,7 +28,7 @@ function testInflate(hex, wbits, status) { assert(e === msg[status]); return; } - inflator.push(new Uint8Array(h2b(hex)), true); + inflator.push(new Uint8Array(h2b(hex))); assert.strictEqual(inflator.err, status); }