Added strings support

This commit is contained in:
Vitaly Puzrin 2014-04-08 18:55:17 +04:00
parent b1dae2db36
commit e555c41214
9 changed files with 346 additions and 34 deletions

View file

@ -1,9 +1,10 @@
0.x.x / WIP
0.2.0 / WIP
-------------------
- Added custom gzip headers support
- Improved memory allocations for small chunks
- More coverate tests
- Added custom gzip headers support.
- Added strings support.
- Improved memory allocations for small chunks.
- More coverate tests.
0.1.1 / 2014-03-20

View file

@ -107,6 +107,24 @@ var output = inflator.result;
```
Sometime you can with to work with strings. For example, to send
big objects as json to server. Pako detects input data type. You can
force output to be string with option `{ to: 'string' }`.
```javascript
var pako = require('pako');
var test = { my: 'super', puper: [456, 567], awesome: 'pako' };
var binaryString = pako.deflate(JSON.stringify(test), { to: 'string' });
//
// Here you can do base64 encode, make xhr requests and so on.
//
var restored = JSON.parse(pako.inflate(binaryString, { to: 'string' }));
```
Notes
-----

View file

@ -43,9 +43,11 @@ fs.readdirSync(SAMPLES_DIRECTORY).sort().forEach(function (sample) {
content.buffer = fs.readFileSync(filepath);
content.typed = new Uint8Array(content.buffer);
content.string = fs.readFileSync(filepath, 'utf8');
content.deflateTyped = pako.deflate(content.typed, { level: LEVEL });
content.deflateBuffer = new Buffer(content.deflateTyped);
content.deflateString = pako.deflate(content.typed, { level: LEVEL, to: 'string' });
content.deflateRawTyped = pako.deflateRaw(content.typed, { level: LEVEL });
content.deflateRawBuffer = new Buffer(content.deflateRawTyped);

View file

@ -0,0 +1,11 @@
'use strict'
var pako = require('../../../');
var utils = require('../../../lib/zlib/utils');
exports.run = function(data, level) {
pako.deflate(data.string, {
level: level,
to: 'string'
});
}

View file

@ -0,0 +1,9 @@
'use strict'
var pako = require('../../../index.js');
exports.run = function(data) {
return pako.inflate(data.deflateString, {
to: 'string'
});
}

View file

@ -81,8 +81,10 @@ var Z_DEFLATED = 8;
* Additional options, for internal needs:
*
* - `chunkSize` - size of generated data chunks (16K by default)
* - `raw` (boolean) - do raw deflate
* - `gzip` (boolean) - create gzip wrapper
* - `raw` (Boolean) - do raw deflate
* - `gzip` (Boolean) - create gzip wrapper
* - `to` (String) - if equal to 'string', then result will be "binary string"
* (each char code [0..255])
*
* ##### Example:
*
@ -109,7 +111,8 @@ var Deflate = function(options) {
chunkSize: 16384,
windowBits: 15,
memLevel: 8,
strategy: Z_DEFAULT_STRATEGY
strategy: Z_DEFAULT_STRATEGY,
to: ''
}, options || {});
var opt = this.options;
@ -145,7 +148,8 @@ var Deflate = function(options) {
/**
* Deflate#push(data[, mode]) -> Boolean
* - data (Uint8Array|Array): input data
* - data (Uint8Array|Array|String): input data. Strings will be converted to
* utf8 byte sequence.
* - mode (Number|Boolean): 0..6 for corresponding Z_NO_FLUSH..Z_TREE modes.
* See constants. Skipped or `false` means Z_NO_FLUSH, `true` meansh Z_FINISH.
*
@ -157,7 +161,7 @@ var Deflate = function(options) {
* On fail call [[Deflate#onEnd]] with error code and return false.
*
* We strongly recommend to use `Uint8Array` on input for best speed (output
* format is detected automatically). Also, don't skip last param and always
* array format is detected automatically). Also, don't skip last param and always
* use the same type in your code (boolean or number). That will improve JS speed.
*
* For regular `Array`-s make sure all elements are [0..255].
@ -179,7 +183,14 @@ Deflate.prototype.push = function(data, mode) {
_mode = (mode === ~~mode) ? mode : ((mode === true) ? Z_FINISH : Z_NO_FLUSH);
strm.next_in = data;
// Convert data if needed
if (typeof data === 'string') {
// If we need to compress text, change encoding to utf8.
strm.next_in = utils.string2buf(data);
} else {
strm.next_in = data;
}
strm.next_in_index = 0;
strm.avail_in = strm.next_in.length;
strm.avail_out = 0;
@ -198,7 +209,11 @@ Deflate.prototype.push = function(data, mode) {
return false;
}
if (strm.avail_out === 0 || strm.avail_in === 0) {
this.onData(utils.shrinkBuf(strm.next_out, strm.next_out_index));
if (this.options.to === 'string') {
this.onData(utils.buf2binstring(utils.shrinkBuf(strm.next_out, strm.next_out_index)));
} else {
this.onData(utils.shrinkBuf(strm.next_out, strm.next_out_index));
}
}
} while ((strm.avail_in > 0 || strm.avail_out === 0) && status !== Z_STREAM_END);
@ -216,8 +231,9 @@ Deflate.prototype.push = function(data, mode) {
/**
* Deflate#onData(chunk) -> Void
* - chunk (Uint8Array|Array): ouput data. Type of array depends
* on js engine support.
* - chunk (Uint8Array|Array|String): ouput data. Type of array depends
* on js engine support. When string output requested, each chunk
* will be string.
*
* By default, stores data blocks in `chunks[]` property and glue
* those in `onEnd`. Override this handler, if you need another behaviour.
@ -239,7 +255,11 @@ Deflate.prototype.onData = function(chunk) {
Deflate.prototype.onEnd = function(status) {
// On success - join
if (status === Z_OK) {
this.result = utils.flattenChunks(this.chunks);
if (this.options.to === 'string') {
this.result = this.chunks.join('');
} else {
this.result = utils.flattenChunks(this.chunks);
}
}
this.chunks = [];
this.err = status;
@ -248,8 +268,8 @@ Deflate.prototype.onEnd = function(status) {
/**
* deflate(data[, options]) -> Uint8Array|Array
* - data (Uint8Array|Array): input data to compress.
* deflate(data[, options]) -> Uint8Array|Array|String
* - data (Uint8Array|Array|String): input data to compress.
* - options (Object): zlib deflate options.
*
* Compress `data` with deflate alrorythm and `options`.
@ -264,6 +284,13 @@ Deflate.prototype.onEnd = function(status) {
* [http://zlib.net/manual.html#Advanced](http://zlib.net/manual.html#Advanced)
* for more information on these.
*
* Sugar (options):
*
* - `raw` (Boolean) - say that we work with raw stream, if you don't wish to specify
* negative windowBits implicitly.
* - `to` (String) - if equal to 'string', then result will be "binary string"
* (each char code [0..255])
*
* ##### Example:
*
* ```javascript
@ -286,8 +313,8 @@ function deflate(input, options) {
/**
* deflateRaw(data[, options]) -> Uint8Array|Array
* - data (Uint8Array|Array): input data to compress.
* deflateRaw(data[, options]) -> Uint8Array|Array|String
* - data (Uint8Array|Array|String): input data to compress.
* - options (Object): zlib deflate options.
*
* The same as [[deflate]], but creates raw data, without wrapper
@ -301,8 +328,8 @@ function deflateRaw(input, options) {
/**
* gzip(data[, options]) -> Uint8Array|Array
* - data (Uint8Array|Array): input data to compress.
* gzip(data[, options]) -> Uint8Array|Array|String
* - data (Uint8Array|Array|String): input data to compress.
* - options (Object): zlib deflate options.
*
* The same as [[deflate]], but create gzip wrapper instead of

View file

@ -9,6 +9,11 @@ var zstream = require('./zlib/zstream');
var gzheader = require('./zlib/gzheader');
// calculate tail size of utf8 char by current byte value
function utf8tail(code) {
return code >= 252 ? 6 : code >= 248 ? 5 : code >= 240 ? 4 : code >= 224 ? 3 : code >= 192 ? 2 : 1;
}
/**
* class Inflate
*
@ -24,7 +29,7 @@ var gzheader = require('./zlib/gzheader');
**/
/**
* Inflate.result -> Uint8Array|Array
* Inflate.result -> Uint8Array|Array|String
*
* Uncompressed result, generated by default [[Inflate#onData]]
* and [[Inflate#onEnd]] handlers. Filled after you push last chunk
@ -60,7 +65,10 @@ var gzheader = require('./zlib/gzheader');
* Additional options, for internal needs:
*
* - `chunkSize` - size of generated data chunks (16K by default)
* - `raw` (boolean) - do raw inflate
* - `raw` (Boolean) - do raw inflate
* - `to` (String) - if equal to 'string', then result will be converted
* from utf8 to utf16 (javascript) string. When string output requested,
* chunk length can differ from `chunkSize`, depending on content.
*
* By default, when no options set, autodetect deflate/gzip data format via
* wrapper header.
@ -86,7 +94,8 @@ var Inflate = function(options) {
this.options = utils.assign({
chunkSize: 16384,
windowBits: 0
windowBits: 0,
to: ''
}, options || {});
var opt = this.options;
@ -140,7 +149,7 @@ var Inflate = function(options) {
/**
* Inflate#push(data[, mode]) -> Boolean
* - data (Uint8Array|Array): input data
* - data (Uint8Array|Array|String): input data
* - mode (Number|Boolean): 0..6 for corresponding Z_NO_FLUSH..Z_TREE modes.
* See constants. Skipped or `false` means Z_NO_FLUSH, `true` meansh Z_FINISH.
*
@ -169,11 +178,19 @@ Inflate.prototype.push = function(data, mode) {
var strm = this.strm;
var chunkSize = this.options.chunkSize;
var status, _mode;
var next_out_utf8_index, tail, utf8str;
if (this.ended) { return false; }
_mode = c.Z_NO_FLUSH;
strm.next_in = data;
// Convert data if needed
if (typeof data === 'string') {
// Only binary strings can be decompressed on practice
strm.next_in = utils.binstring2buf(data);
} else {
strm.next_in = data;
}
strm.next_in_index = 0;
strm.avail_in = strm.next_in.length;
strm.avail_out = 0;
@ -192,9 +209,40 @@ Inflate.prototype.push = function(data, mode) {
this.ended = true;
return false;
}
if (strm.next_out_index) {
if (strm.avail_out === 0 || strm.avail_in === 0 || status === c.Z_STREAM_END) {
this.onData(utils.shrinkBuf(strm.next_out, strm.next_out_index));
if (this.to === 'string') {
// realign size to utf8 char border & move tail to start of buffer
next_out_utf8_index = strm.next_out_index - 5;
if (next_out_utf8_index < 0) { next_out_utf8_index = 0; }
tail = utf8tail(strm.next_out[next_out_utf8_index]);
while (next_out_utf8_index + tail < strm.next_out_index) {
next_out_utf8_index += tail;
tail = utf8tail(strm.next_out[next_out_utf8_index]);
}
// shit happened - broken tail. then take it all.
if (next_out_utf8_index === 0) {
next_out_utf8_index = strm.next_out_index;
tail = 0;
}
utf8str = utils.buf2string(strm.next_out, next_out_utf8_index);
// move tail
strm.next_out_index = tail;
strm.avail_out = chunkSize - tail;
if (tail) { utils.arraySet(strm.next_out, strm.next_out, next_out_utf8_index, tail, 0); }
this.onData(utf8str);
} else {
this.onData(utils.shrinkBuf(strm.next_out, strm.next_out_index));
}
}
}
} while ((strm.avail_in > 0 || strm.avail_out === 0) && status !== c.Z_STREAM_END);
@ -218,8 +266,9 @@ Inflate.prototype.push = function(data, mode) {
/**
* Inflate#onData(chunk) -> Void
* - chunk (Uint8Array|Array): ouput data. Type of array depends
* on js engine support.
* - chunk (Uint8Array|Array|String): ouput data. Type of array depends
* on js engine support. When string output requested, each chunk
* will be string.
*
* By default, stores data blocks in `chunks[]` property and glue
* those in `onEnd`. Override this handler, if you need another behaviour.
@ -241,7 +290,13 @@ Inflate.prototype.onData = function(chunk) {
Inflate.prototype.onEnd = function(status) {
// On success - join
if (status === c.Z_OK) {
this.result = utils.flattenChunks(this.chunks);
if (this.options.to === 'string') {
// Glue & convert here, until we teach pako to send
// utf8 alligned strings to onData
this.result = this.chunks.join('');
} else {
this.result = utils.flattenChunks(this.chunks);
}
}
this.chunks = [];
this.err = status;
@ -250,8 +305,8 @@ Inflate.prototype.onEnd = function(status) {
/**
* inflate(data[, options]) -> Uint8Array|Array
* - data (Uint8Array|Array): input data to compress.
* inflate(data[, options]) -> Uint8Array|Array|String
* - data (Uint8Array|Array|String): input data to compress.
* - options (Object): zlib inflate options.
*
* Decompress `data` with inflate/ungzip and `options`. Autodetect
@ -267,8 +322,11 @@ Inflate.prototype.onEnd = function(status) {
*
* Sugar (options):
*
* - raw (Boolean) - say that we work with raw stream, if you don't wish to specify
* - `raw` (Boolean) - say that we work with raw stream, if you don't wish to specify
* negative windowBits implicitly.
* - `to` (String) - if equal to 'string', then result will be converted
* from utf8 to utf16 (javascript) string. When string output requested,
* chunk length can differ from `chunkSize`, depending on content.
*
*
* ##### Example:
@ -298,8 +356,8 @@ function inflate(input, options) {
/**
* inflateRaw(data[, options]) -> Uint8Array|Array
* - data (Uint8Array|Array): input data to compress.
* inflateRaw(data[, options]) -> Uint8Array|Array|String
* - data (Uint8Array|Array|String): input data to compress.
* - options (Object): zlib inflate options.
*
* The same as [[inflate]], but creates raw data, without wrapper
@ -312,6 +370,17 @@ function inflateRaw(input, options) {
}
/**
* ungzip(data[, options]) -> Uint8Array|Array|String
* - data (Uint8Array|Array|String): input data to compress.
* - options (Object): zlib inflate options.
*
* Just shortcut to [[inflate]], because it autodetects format
* by header.content. Done for convenience.
**/
exports.Inflate = Inflate;
exports.inflate = inflate;
exports.inflateRaw = inflateRaw;
exports.ungzip = inflate;

View file

@ -5,6 +5,10 @@ var TYPED_OK = (typeof Uint8Array !== 'undefined') &&
(typeof Uint16Array !== 'undefined') &&
(typeof Int32Array !== 'undefined');
// Quick check if we can use fast array to bin string conversion
var STR_APPLY_OK = true;
try { String.fromCharCode.apply(null, [0]); } catch(__) { STR_APPLY_OK = false; }
exports.assign = function (obj /*from1, from2, from3, ...*/) {
var sources = Array.prototype.slice.call(arguments, 1);
@ -36,6 +40,117 @@ exports.shrinkBuf = function (buf, size) {
};
// convert string to array (typed, when possible)
// src: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Base64_encoding_and_decoding
exports.string2buf = function (str) {
var buf, c, str_len = str.length, buf_len = 0;
/* mapping... */
for (var m_pos = 0; m_pos < str_len; m_pos++) {
c = str.charCodeAt(m_pos);
buf_len += c < 0x80 ? 1 : c < 0x800 ? 2 : c < 0x10000 ? 3 : c < 0x200000 ? 4 : c < 0x4000000 ? 5 : 6;
}
buf = new exports.Buf8(buf_len);
/* transcription... */
for (var i = 0, c_pos = 0; i < buf_len; c_pos++) {
c = str.charCodeAt(c_pos);
if (c < 128) {
/* one byte */
buf[i++] = c;
} else if (c < 0x800) {
/* two bytes */
buf[i++] = 192 + (c >>> 6);
buf[i++] = 128 + (c & 63);
} else if (c < 0x10000) {
/* three bytes */
buf[i++] = 224 + (c >>> 12);
buf[i++] = 128 + (c >>> 6 & 63);
buf[i++] = 128 + (c & 63);
} else if (c < 0x200000) {
/* four bytes */
buf[i++] = 240 + (c >>> 18);
buf[i++] = 128 + (c >>> 12 & 63);
buf[i++] = 128 + (c >>> 6 & 63);
buf[i++] = 128 + (c & 63);
} else if (c < 0x4000000) {
/* five bytes */
buf[i++] = 248 + (c >>> 24);
buf[i++] = 128 + (c >>> 18 & 63);
buf[i++] = 128 + (c >>> 12 & 63);
buf[i++] = 128 + (c >>> 6 & 63);
buf[i++] = 128 + (c & 63);
} else /* if (c <= 0x7fffffff) */ {
/* six bytes */
buf[i++] = 252 + /* (c >>> 32) is not possible in ECMAScript! So...: */ (c / 1073741824);
buf[i++] = 128 + (c >>> 24 & 63);
buf[i++] = 128 + (c >>> 18 & 63);
buf[i++] = 128 + (c >>> 12 & 63);
buf[i++] = 128 + (c >>> 6 & 63);
buf[i++] = 128 + (c & 63);
}
}
return buf;
};
// convert array to string
// src: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Base64_encoding_and_decoding
exports.buf2string = function (buf, max) {
var str = '';
for (var part, len = max || buf.length, i = 0; i < len; i++) {
part = buf[i];
str += String.fromCharCode(
part > 251 && part < 254 && i + 5 < len ? /* six bytes */
/* (part - 252 << 32) is not possible in ECMAScript! So...: */
(part - 252) * 1073741824 + (buf[++i] - 128 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128
: part > 247 && part < 252 && i + 4 < len ? /* five bytes */
(part - 248 << 24) + (buf[++i] - 128 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128
: part > 239 && part < 248 && i + 3 < len ? /* four bytes */
(part - 240 << 18) + (buf[++i] - 128 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128
: part > 223 && part < 240 && i + 2 < len ? /* three bytes */
(part - 224 << 12) + (buf[++i] - 128 << 6) + buf[++i] - 128
: part > 191 && part < 224 && i + 1 < len ? /* two bytes */
(part - 192 << 6) + buf[++i] - 128
: /* part < 127 ? */ /* one byte */
part
);
}
return str;
};
// Convert byte array to binary string
exports.buf2binstring = function(buf) {
// use fallback for big arrays to avoid stack overflow
if (STR_APPLY_OK && buf.length < 65537) {
return String.fromCharCode.apply(null, buf);
}
var result = '';
for(var i=0, len=buf.length; i < len; i++) {
result += String.fromCharCode(buf[i]);
}
return result;
};
// Convert binary string (typed, when possible)
exports.binstring2buf = function(str) {
var buf = new exports.Buf8(str.length);
for(var i=0, len=buf.length; i < len; i++) {
buf[i] = str.charCodeAt(i);
}
return buf;
};
var fnTyped = {
arraySet: function (dest, src, src_offs, len, dest_offs) {
// Suppose, that with typed array support destination is

60
test/strings.js Normal file
View file

@ -0,0 +1,60 @@
/*global describe, it*/
'use strict';
var fs = require('fs');
var path = require('path');
var assert = require('assert');
var pako_utils = require('../lib/zlib/utils');
var pako = require('../index');
var helpers = require('./helpers');
var cmp = helpers.cmpBuf;
var file = path.join(__dirname, 'fixtures/samples/lorem_utf_100k.txt');
var sampleString = fs.readFileSync(file, 'utf8');
var sampleArray = new Uint8Array(fs.readFileSync(file));
describe('Deflate strings', function () {
it('Deflate javascript string (utf16) on input', function () {
assert.ok(cmp(
pako.deflate(sampleString),
pako.deflate(sampleArray)
));
});
it('Deflate with binary string output', function () {
assert.ok(cmp(
pako_utils.binstring2buf(pako.deflate(sampleArray, { to: 'string', chunkSize: 99 })),
pako.deflate(sampleArray)
));
});
});
describe('Inflate strings', function () {
var deflatedString = pako.deflate(sampleArray, { to: 'string' });
var deflatedArray = pako.deflate(sampleArray);
it('Inflate binary string input', function () {
assert.ok(cmp(
pako.inflate(deflatedString),
pako.inflate(deflatedArray)
));
});
it('Inflate with javascript string (utf16) output', function () {
assert.ok(
pako.inflate(deflatedArray, { to: 'string', chunkSize: 99 }),
sampleString
);
});
});