started deflate_fast() implementation

This commit is contained in:
Vitaly Puzrin 2014-02-15 14:48:22 +04:00
parent 285e2d4cff
commit 42237287cb
2 changed files with 1377 additions and 62 deletions

View file

@ -6,12 +6,19 @@ var trees = require('./trees');
var adler32 = require('./adler32');
var crc32 = require('./crc32');
var Z_NULL = c.Z_NULL;
//var Z_NULL = c.Z_NULL;
var MAX_WBITS = 15;
/* 32K LZ77 window */
var DEF_MEM_LEVEL = 8;
var D_CODES = 30;
var BL_CODES = 19;
var LENGTH_CODES = 29;
var LITERALS = 256;
var L_CODES = LITERALS + 1 + LENGTH_CODES;
var HEAP_SIZE = 2*L_CODES + 1;
var MIN_MATCH = 3;
var MAX_MATCH = 258;
var MIN_LOOKAHEAD = (MAX_MATCH + MIN_MATCH + 1);
@ -33,6 +40,8 @@ var BS_FINISH_DONE = 4; /* finish done, accept no more input or output */
var OS_CODE = 0x03; // Unix :) . Don't detect, use this default.
var NIL = 0;
function rank (f) {
return ((f) << 1) - ((f) > 4 ? 9 : 0);
}
@ -65,7 +74,7 @@ function flush_pending(strm) {
}
function flush_block_only (s, last) {
trees._tr_flush_block(s, (s.block_start >= 0 ? s.block_start : Z_NULL), s.strstart - s.block_start, last);
trees._tr_flush_block(s, (s.block_start >= 0 ? s.block_start : -1), s.strstart - s.block_start, last);
s.block_start = s.strstart;
flush_pending(s.strm);
}
@ -120,6 +129,7 @@ function read_buf(strm, buf, start, size) {
return len;
}
/* ===========================================================================
* Set match_start to the longest match starting at the given string and
* return its length. Matches shorter or equal to prev_length are discarded,
@ -129,9 +139,108 @@ function read_buf(strm, buf, start, size) {
* string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
* OUT assertion: the match length is not greater than s->lookahead.
*/
//function longest_match(s, cur_match) {
//
//}
function longest_match(s, cur_match) {
var chain_length = s.max_chain_length; /* max hash chain length */
var scan = s.strstart; /* current string */
var match; /* matched string */
var len; /* length of current match */
var best_len = s.prev_length; /* best match length so far */
var nice_match = s.nice_match; /* stop if match long enough */
var limit = (s.strstart > (s.w_size - MIN_LOOKAHEAD)) ?
s.strstart - (s.w_size - MIN_LOOKAHEAD) : NIL;
var _win = s.window; // shortcut
var wmask = s.wmask;
var prev = s.prev;
/* Stop when cur_match becomes <= limit. To simplify the code,
* we prevent matches with the string of window index 0.
*/
var strend = s.strstart + MAX_MATCH;
var scan_end1 = _win[scan + best_len - 1];
var scan_end = _win[scan + best_len];
/* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
* It is easy to get rid of this optimization if necessary.
*/
// Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
/* Do not waste too much time if we already have a good match: */
if (s.prev_length >= s.good_match) {
chain_length >>= 2;
}
/* Do not look for matches beyond the end of the input. This is necessary
* to make deflate deterministic.
*/
if (nice_match > s.lookahead) { nice_match = s.lookahead; }
// Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
do {
// Assert(cur_match < s->strstart, "no future");
match = cur_match;
/* Skip to next match if the match length cannot increase
* or if the match length is less than 2. Note that the checks below
* for insufficient lookahead only occur occasionally for performance
* reasons. Therefore uninitialized memory will be accessed, and
* conditional jumps will be made that depend on those values.
* However the length of the match is limited to the lookahead, so
* the output of deflate is not affected by the uninitialized values.
*/
if (_win[match + best_len] !== scan_end ||
_win[match + best_len - 1] !== scan_end1 ||
_win[match] !== _win[scan] ||
_win[++match] !== _win[scan + 1]) {
continue;
}
/* The check at best_len-1 can be removed because it will be made
* again later. (This heuristic is not always a win.)
* It is not necessary to compare scan[2] and match[2] since they
* are always equal when the other bytes match, given that
* the hash keys are equal and that HASH_BITS >= 8.
*/
scan += 2;
match++;
// Assert(*scan == *match, "match[2]?");
/* We check for insufficient lookahead only every 8th comparison;
* the 256th check will be made at strstart+258.
*/
do {
/*jshint noempty:false*/
} while (_win[++scan] === _win[++match] && _win[++scan] === _win[++match] &&
_win[++scan] === _win[++match] && _win[++scan] === _win[++match] &&
_win[++scan] === _win[++match] && _win[++scan] === _win[++match] &&
_win[++scan] === _win[++match] && _win[++scan] === _win[++match] &&
scan < strend);
// Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
len = MAX_MATCH - (strend - scan);
scan = strend - MAX_MATCH;
if (len > best_len) {
s.match_start = cur_match;
best_len = len;
if (len >= nice_match) {
break;
}
scan_end1 = _win[scan + best_len - 1];
scan_end = _win[scan + best_len];
}
} while ((cur_match = prev[cur_match & wmask]) > limit && --chain_length !== 0);
if (best_len <= s.lookahead) {
return best_len;
}
return s.lookahead;
}
/* ===========================================================================
* Fill the window when the lookahead becomes insufficient.
@ -187,14 +296,14 @@ function fill_window(s) {
p = n;
do {
m = s.head[--p];
s.head[p] = m >= _w_size ? m - _w_size : 0;
s.head[p] = (m >= _w_size ? m - _w_size : 0);
} while (--n);
n = _w_size;
p = n;
do {
m = s.prev[--p];
s.prev[p] = m >= _w_size ? m - _w_size : 0;
s.prev[p] = (m >= _w_size ? m - _w_size : 0);
/* If n is not on any hash chain, prev[n] is garbage but
* its value will never be used.
*/
@ -391,8 +500,130 @@ function deflate_stored(s, flush) {
* new strings in the dictionary only for unmatched strings or for short
* matches. It is used only for the fast compression options.
*/
function deflate_fast(/*s, flush*/) {
function deflate_fast(s, flush) {
var hash_head; /* head of the hash chain */
var bflush; /* set if current block must be flushed */
for (;;) {
/* Make sure that we always have enough lookahead, except
* at the end of the input file. We need MAX_MATCH bytes
* for the next match, plus MIN_MATCH bytes to insert the
* string following the next match.
*/
if (s.lookahead < MIN_LOOKAHEAD) {
fill_window(s);
if (s.lookahead < MIN_LOOKAHEAD && flush === c.Z_NO_FLUSH) {
return BS_NEED_MORE;
}
if (s.lookahead === 0) {
break; /* flush the current block */
}
}
/* Insert the string window[strstart .. strstart+2] in the
* dictionary, and set hash_head to the head of the hash chain:
*/
hash_head = NIL;
if (s.lookahead >= MIN_MATCH) {
/*** INSERT_STRING(s, s.strstart, hash_head); ***/
s.ins_h = ((s.ins_h << s.hash_shift) ^ s.window[s.strstart + MIN_MATCH - 1]) & s.hash_mask;
hash_head = (s.head[s.ins_h] & 0xffff);
s.prev[s.strstart & s.w_mask] = s.head[s.ins_h];
s.head[s.ins_h] = s.strstart;
/***/
}
/* Find the longest match, discarding those <= prev_length.
* At this point we have always match_length < MIN_MATCH
*/
if (hash_head !== NIL && ((s.strstart - hash_head) <= (s.w_size - MIN_LOOKAHEAD))) {
/* To simplify the code, we prevent matches with the string
* of window index 0 (in particular we have to avoid a match
* of the string with itself at the start of the input file).
*/
s.match_length = longest_match(s, hash_head);
/* longest_match() sets match_start */
}
if (s.match_length >= MIN_MATCH) {
// check_match(s, s.strstart, s.match_start, s.match_length); // for debug only
/*** _tr_tally_dist(s, s.strstart - s.match_start,
s.match_length - MIN_MATCH, bflush); ***/
bflush = trees._tr_tally(s, s.strstart - s.match_start, s.match_length - MIN_MATCH);
s.lookahead -= s.match_length;
/* Insert new strings in the hash table only if the match length
* is not too large. This saves time but degrades compression.
*/
if (s.match_length <= s.max_insert_length &&
s.lookahead >= MIN_MATCH) {
s.match_length--; /* string at strstart already in table */
do {
s.strstart++;
/*** INSERT_STRING(s, s.strstart, hash_head); ***/
s.ins_h = ((s.ins_h << s.hash_shift) ^ s.window[s.strstart + MIN_MATCH - 1]) & s.hash_mask;
hash_head = (s.head[s.ins_h] & 0xffff);
s.prev[s.strstart & s.w_mask] = s.head[s.ins_h];
s.head[s.ins_h] = s.strstart;
/***/
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
* always MIN_MATCH bytes ahead.
*/
} while (--s.match_length !== 0);
s.strstart++;
} else
{
s.strstart += s.match_length;
s.match_length = 0;
s.ins_h = s.window[s.strstart];
/* UPDATE_HASH(s, s.ins_h, s.window[s.strstart+1]); */
s.ins_h = ((s.ins_h << s.hash_shift) ^ s.window[s.strstart + 1]) & s.hash_mask;
//#if MIN_MATCH != 3
// Call UPDATE_HASH() MIN_MATCH-3 more times
//#endif
/* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
* matter since it will be recomputed at next deflate call.
*/
}
} else {
/* No match, output a literal byte */
//Tracevv((stderr,"%c", s.window[s.strstart]));
/*** _tr_tally_lit(s, s.window[s.strstart], bflush); ***/
bflush = trees._tr_tally(s, 0, s.window[s.strstart]);
s.lookahead--;
s.strstart++;
}
if (bflush) {
/*** FLUSH_BLOCK(s, 0); ***/
flush_block_only(s, false);
if (s.strm.avail_out === 0) {
return BS_NEED_MORE;
}
/***/
}
}
s.insert = ((s.strstart < (MIN_MATCH-1)) ? s.strstart : MIN_MATCH-1);
if (flush === c.Z_FINISH) {
/*** FLUSH_BLOCK(s, 1); ***/
flush_block_only(s, true);
if (s.strm.avail_out === 0) {
return BS_FINISH_STARTED;
}
/***/
return BS_FINISH_DONE;
}
if (s.last_lit) {
/*** FLUSH_BLOCK(s, 0); ***/
flush_block_only(s, false);
if (s.strm.avail_out === 0) {
return BS_NEED_MORE;
}
/***/
}
return BS_BLOCK_DONE;
}
/* ===========================================================================
@ -421,6 +652,12 @@ function deflate_huff(/*s, flush*/) {
}
/* Values for max_lazy_match, good_match and max_chain_length, depending on
* the desired pack level (0..9). The values given below have been tuned to
* exclude worst case performance for pathological files. Better values may be
* found for specific files.
*/
var Config = function (good_length, max_lazy, nice_length, max_chain, func) {
this.good_length = good_length;
this.max_lazy = max_lazy;
@ -446,12 +683,16 @@ configuration_table = [
new Config(32, 258, 258, 4096, deflate_slow) /* 9 max compression */
];
/* ===========================================================================
* Initialize the "longest match" routines for a new zlib stream
*/
function lm_init(s) {
s.window_size = 2 * s.w_size;
/*** CLEAR_HASH(s); ***/
utils.fill(s.head, NIL);
/* Set the default configuration parameters:
*/
s.max_lazy_match = configuration_table[s.level].max_lazy;
@ -468,10 +709,18 @@ function lm_init(s) {
s.ins_h = 0;
}
var TreeDesc = function() {
this.dyn_tree = []; /* the dynamic tree */
this.max_code = 0; /* largest code with non zero frequency */
this.stat_desc = null; /* the corresponding static tree */
};
function DeflateState() {
// z_streamp strm; /* pointer back to this zlib stream */
this.strm = null; /* pointer back to this zlib stream */
this.status = 0; /* as the name implies */
// pending_buf = Z_NULL; /* output still pending */
this.pending_buf = null; /* output still pending */
this.pending_buf_size = 0; /* size of pending_buf */
this.pending_out = 0; /* next pending byte to output to the stream */
this.pending = 0; /* nb of bytes in the pending buffer */
@ -479,7 +728,7 @@ function DeflateState() {
this.gzhead = null; /* gzip header information to write */
this.gzindex = 0; /* where in extra, name, or comment */
this.method = c.Z_DEFLATED; /* can only be DEFLATED */
this.last_flush = Z_NULL; /* value of flush param for previous deflate call */
this.last_flush = -1; /* value of flush param for previous deflate call */
this.w_size = 0; /* LZ77 window size (32K by default) */
this.w_bits = 0; /* log2(w_size) (8..16) */
@ -558,8 +807,114 @@ function DeflateState() {
this.good_match = 0;
/* Use a faster search when the previous match is longer than this */
this.nice_match = 0;
/* Stop searching when current match exceeds this */
this.nice_match = 0; /* Stop searching when current match exceeds this */
/* used by trees.c: */
/* Didn't use ct_data typedef below to suppress compiler warning */
// TODO: review effectivity of lazy tree structures init
// struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
// struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
// struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
// TODO: consider use 2 separate arrays, instead of one interleaved
// Use flat array of DOUBLE size, with interleaved fata,
// because JS does not support effective
this.dyn_ltree = new Array(HEAP_SIZE * 2);
this.dyn_dtree = new Array((2*D_CODES+1) * 2);
this.bl_tree = new Array((2*BL_CODES+1) * 2);
// TODO: check if can be skipped
utils.fill(this.dyn_ltree, 0);
utils.fill(this.dyn_dtree, 0);
utils.fill(this.bl_tree, 0);
// struct tree_desc_s l_desc; /* desc. for literal tree */
// struct tree_desc_s d_desc; /* desc. for distance tree */
// struct tree_desc_s bl_desc; /* desc. for bit length tree */
// Seems to init better from `tree` with direct structures,
// (?) with separate constructor for bl_desc or not?
// Make sure objects have the same hidden class if needed
this.l_desc = new TreeDesc(); /* desc. for literal tree */
this.d_desc = new TreeDesc(); /* desc. for distance tree */
this.bl_desc = new TreeDesc(); /* desc. for bit length tree */
//ush bl_count[MAX_BITS+1];
this.bl_count = [];
// TODO: fixed size, check if Uint16Array helps
/* number of codes at each bit length for an optimal tree */
//int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
// TODO: fixed size, check if Uint16Array helps
this.heap = new Array(2*L_CODES+1); /* heap used to build the Huffman trees */
utils.fill(this.heap, 0);
this.heap_len = 0; /* number of elements in the heap */
this.heap_max = 0; /* element of largest frequency */
/* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
* The same heap array is used to build all trees.
*/
this.depth = new Array(2*L_CODES+1); //uch depth[2*L_CODES+1];
utils.fill(this.depth, 0);
/* Depth of each subtree used as tie breaker for trees of equal frequency
*/
this.l_buf = 0; /* buffer index for literals or lengths */
this.lit_bufsize = 0;
/* Size of match buffer for literals/lengths. There are 4 reasons for
* limiting lit_bufsize to 64K:
* - frequencies can be kept in 16 bit counters
* - if compression is not successful for the first block, all input
* data is still in the window so we can still emit a stored block even
* when input comes from standard input. (This can also be done for
* all blocks if lit_bufsize is not greater than 32K.)
* - if compression is not successful for a file smaller than 64K, we can
* even emit a stored file instead of a stored block (saving 5 bytes).
* This is applicable only for zip (not gzip or zlib).
* - creating new Huffman trees less frequently may not provide fast
* adaptation to changes in the input data statistics. (Take for
* example a binary file with poorly compressible code followed by
* a highly compressible string table.) Smaller buffer sizes give
* fast adaptation but have of course the overhead of transmitting
* trees more frequently.
* - I can't count above 4
*/
this.last_lit = 0; /* running index in l_buf */
// TODO: fixed size, check if Uint8Array helps
this.d_buf = 0;
/* Buffer index for distances. To simplify the code, d_buf and l_buf have
* the same number of elements. To use different lengths, an extra flag
* array would be necessary.
*/
this.opt_len = 0; /* bit length of current block with optimal trees */
this.static_len = 0; /* bit length of current block with static trees */
this.matches = 0; /* number of string matches in current block */
this.insert = 0; /* bytes at end of window left to insert */
this.bi_buf = 0;
/* Output buffer. bits are inserted starting at the bottom (least
* significant bits).
*/
this.bi_valid = 0;
/* Number of valid bits in bi_buf. All bits above the last valid bit
* are always zero.
*/
this.high_water = 0;
/* High water mark offset in window for initialized bytes -- bytes above
* this are set to zero in order to avoid memory check warnings when
* longest match routines access bytes past the input. This is then
* updated to the new high water mark.
*/
}
function deflateResetKeep(strm) {
@ -574,7 +929,7 @@ function deflateResetKeep(strm) {
s.wrap = -s.wrap;
/* was made negative by deflate(..., Z_FINISH); */
}
s.status = s.wrap ? INIT_STATE : BUSY_STATE;
s.status = (s.wrap ? INIT_STATE : BUSY_STATE);
strm.adler = (s.wrap === 2) ?
0 // crc32(0, Z_NULL, 0)
:
@ -593,7 +948,7 @@ function deflateReset(strm) {
}
function deflateInit2(strm, level, method, windowBits, memLevel, strategy) {
if (strm === Z_NULL) {
if (!strm) { // === Z_NULL
return c.Z_STREAM_ERROR;
}
var wrap = 1;
@ -636,10 +991,6 @@ function deflateInit2(strm, level, method, windowBits, memLevel, strategy) {
s.window = utils.arrayCreate(s.w_size * 2);
s.head = utils.array16Create(s.hash_size);
s.prev = utils.array16Create(s.w_size);
// precreate & prefill head/prev arrays to optimize v8 types
// TODO: check if we can remove it, should be inited in lm_init()
utils.fill(s.head, 0);
utils.fill(s.prev, 0);
s.high_water = 0; /* nothing written to s->window yet */
@ -783,7 +1134,7 @@ function deflate(strm, flush) {
*/
if (strm.avail_in !== 0 || s.lookahead !== 0 ||
(flush !== c.Z_NO_FLUSH && s.status !== FINISH_STATE)) {
var bstate = s.strategy === c.Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
var bstate = (s.strategy === c.Z_HUFFMAN_ONLY) ? deflate_huff(s, flush) :
(s.strategy === c.Z_RLE ? deflate_rle(s, flush) :
configuration_table[s.level].func(s, flush));
@ -814,9 +1165,8 @@ function deflate(strm, flush) {
* as a special marker by inflate_sync().
*/
if (flush === c.Z_FULL_FLUSH) {
//CLEAR_HASH(s); /* forget history */
// !!!! seems to be bug -> 0 in original
s.head[s.hash_size - 1] = -1;
/*** CLEAR_HASH(s); ***/ /* forget history */
utils.fill(s.head, NIL);
if (s.lookahead === 0) {
s.strstart = 0;
@ -878,7 +1228,7 @@ function deflateEnd(strm) {
return c.Z_STREAM_ERROR;
}
strm.state = Z_NULL;
strm.state = null;
return status === BUSY_STATE ? c.Z_DATA_ERROR : c.Z_OK;
}

File diff suppressed because it is too large Load diff