From a194382b3c74361dae862f3098effa1671419de4 Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Fri, 10 Jun 2022 17:41:46 +0300 Subject: [PATCH] Fix a bug that can crash deflate on some input when using Z_FIXED. https://github.com/madler/zlib/commit/5c44459c3b28a9bd3283aaceab7c615f8020c531 --- lib/zlib/deflate.js | 73 +++++++++++++++++++++++++++++++++------------ lib/zlib/trees.js | 62 +++++++++----------------------------- 2 files changed, 68 insertions(+), 67 deletions(-) diff --git a/lib/zlib/deflate.js b/lib/zlib/deflate.js index 7165d02..cefd2cf 100644 --- a/lib/zlib/deflate.js +++ b/lib/zlib/deflate.js @@ -805,7 +805,7 @@ const deflate_fast = (s, flush) => { /***/ return BS_FINISH_DONE; } - if (s.last_lit) { + if (s.sym_next) { /*** FLUSH_BLOCK(s, 0); ***/ flush_block_only(s, false); if (s.strm.avail_out === 0) { @@ -966,7 +966,7 @@ const deflate_slow = (s, flush) => { /***/ return BS_FINISH_DONE; } - if (s.last_lit) { + if (s.sym_next) { /*** FLUSH_BLOCK(s, 0); ***/ flush_block_only(s, false); if (s.strm.avail_out === 0) { @@ -1065,7 +1065,7 @@ const deflate_rle = (s, flush) => { /***/ return BS_FINISH_DONE; } - if (s.last_lit) { + if (s.sym_next) { /*** FLUSH_BLOCK(s, 0); ***/ flush_block_only(s, false); if (s.strm.avail_out === 0) { @@ -1122,7 +1122,7 @@ const deflate_huff = (s, flush) => { /***/ return BS_FINISH_DONE; } - if (s.last_lit) { + if (s.sym_next) { /*** FLUSH_BLOCK(s, 0); ***/ flush_block_only(s, false); if (s.strm.avail_out === 0) { @@ -1323,7 +1323,7 @@ function DeflateState() { /* Depth of each subtree used as tie breaker for trees of equal frequency */ - this.l_buf = 0; /* buffer index for literals or lengths */ + this.sym_buf = 0; /* buffer for distances and literals/lengths */ this.lit_bufsize = 0; /* Size of match buffer for literals/lengths. There are 4 reasons for @@ -1345,13 +1345,8 @@ function DeflateState() { * - I can't count above 4 */ - this.last_lit = 0; /* running index in l_buf */ - - this.d_buf = 0; - /* Buffer index for distances. To simplify the code, d_buf and l_buf have - * the same number of elements. To use different lengths, an extra flag - * array would be necessary. - */ + this.sym_next = 0; /* running index in sym_buf */ + this.sym_end = 0; /* symbol table full when sym_next reaches this */ this.opt_len = 0; /* bit length of current block with optimal trees */ this.static_len = 0; /* bit length of current block with static trees */ @@ -1516,18 +1511,58 @@ const deflateInit2 = (strm, level, method, windowBits, memLevel, strategy) => { s.lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ - s.pending_buf_size = s.lit_bufsize * 4; + /* We overlay pending_buf and sym_buf. This works since the average size + * for length/distance pairs over any compressed block is assured to be 31 + * bits or less. + * + * Analysis: The longest fixed codes are a length code of 8 bits plus 5 + * extra bits, for lengths 131 to 257. The longest fixed distance codes are + * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest + * possible fixed-codes length/distance pair is then 31 bits total. + * + * sym_buf starts one-fourth of the way into pending_buf. So there are + * three bytes in sym_buf for every four bytes in pending_buf. Each symbol + * in sym_buf is three bytes -- two for the distance and one for the + * literal/length. As each symbol is consumed, the pointer to the next + * sym_buf value to read moves forward three bytes. From that symbol, up to + * 31 bits are written to pending_buf. The closest the written pending_buf + * bits gets to the next sym_buf symbol to read is just before the last + * code is written. At that time, 31*(n-2) bits have been written, just + * after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at + * 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1 + * symbols are written.) The closest the writing gets to what is unread is + * then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and + * can range from 128 to 32768. + * + * Therefore, at a minimum, there are 142 bits of space between what is + * written and what is read in the overlain buffers, so the symbols cannot + * be overwritten by the compressed data. That space is actually 139 bits, + * due to the three-bit fixed-code block header. + * + * That covers the case where either Z_FIXED is specified, forcing fixed + * codes, or when the use of fixed codes is chosen, because that choice + * results in a smaller compressed block than dynamic codes. That latter + * condition then assures that the above analysis also covers all dynamic + * blocks. A dynamic-code block will only be chosen to be emitted if it has + * fewer bits than a fixed-code block would for the same set of symbols. + * Therefore its average symbol length is assured to be less than 31. So + * the compressed data for a dynamic block also cannot overwrite the + * symbols from which it is being constructed. + */ - //overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); - //s->pending_buf = (uchf *) overlay; + s.pending_buf_size = s.lit_bufsize * 4; s.pending_buf = new Uint8Array(s.pending_buf_size); // It is offset from `s.pending_buf` (size is `s.lit_bufsize * 2`) - //s->d_buf = overlay + s->lit_bufsize/sizeof(ush); - s.d_buf = 1 * s.lit_bufsize; + //s->sym_buf = s->pending_buf + s->lit_bufsize; + s.sym_buf = s.lit_bufsize; - //s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; - s.l_buf = (1 + 2) * s.lit_bufsize; + //s->sym_end = (s->lit_bufsize - 1) * 3; + s.sym_end = (s.lit_bufsize - 1) * 3; + /* We avoid equality with lit_bufsize*3 because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ s.level = level; s.strategy = strategy; diff --git a/lib/zlib/trees.js b/lib/zlib/trees.js index 06224ef..41c1ed8 100644 --- a/lib/zlib/trees.js +++ b/lib/zlib/trees.js @@ -528,7 +528,7 @@ const init_block = (s) => { s.dyn_ltree[END_BLOCK * 2]/*.Freq*/ = 1; s.opt_len = s.static_len = 0; - s.last_lit = s.matches = 0; + s.sym_next = s.matches = 0; }; @@ -605,16 +605,15 @@ const compress_block = (s, ltree, dtree) => { let dist; /* distance of matched string */ let lc; /* match length or unmatched char (if dist == 0) */ - let lx = 0; /* running index in l_buf */ + let sx = 0; /* running index in sym_buf */ let code; /* the code to send */ let extra; /* number of extra bits to send */ - if (s.last_lit !== 0) { + if (s.sym_next !== 0) { do { - dist = (s.pending_buf[s.d_buf + lx * 2] << 8) | (s.pending_buf[s.d_buf + lx * 2 + 1]); - lc = s.pending_buf[s.l_buf + lx]; - lx++; - + dist = s.pending_buf[s.sym_buf + sx++] & 0xff; + dist += (s.pending_buf[s.sym_buf + sx++] & 0xff) << 8; + lc = s.pending_buf[s.sym_buf + sx++]; if (dist === 0) { send_code(s, lc, ltree); /* send a literal byte */ //Tracecv(isgraph(lc), (stderr," '%c' ", lc)); @@ -639,11 +638,10 @@ const compress_block = (s, ltree, dtree) => } } /* literal or match pair ? */ - /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ - //Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, - // "pendingBuf overflow"); + /* Check that the overlay between pending_buf and sym_buf is ok: */ + //Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); - } while (lx < s.last_lit); + } while (sx < s.sym_next); } send_code(s, END_BLOCK, ltree); @@ -1101,7 +1099,7 @@ const _tr_flush_block = (s, buf, stored_len, last) => // Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", // opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, - // s->last_lit)); + // s->sym_next / 3)); if (static_lenb <= opt_lenb) { opt_lenb = static_lenb; } @@ -1153,14 +1151,9 @@ const _tr_tally = (s, dist, lc) => // unsigned dist; /* distance of matched string */ // unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ { - //let out_length, in_length, dcode; - - s.pending_buf[s.d_buf + s.last_lit * 2] = (dist >>> 8) & 0xff; - s.pending_buf[s.d_buf + s.last_lit * 2 + 1] = dist & 0xff; - - s.pending_buf[s.l_buf + s.last_lit] = lc & 0xff; - s.last_lit++; - + s.pending_buf[s.sym_buf + s.sym_next++] = dist; + s.pending_buf[s.sym_buf + s.sym_next++] = dist >> 8; + s.pending_buf[s.sym_buf + s.sym_next++] = lc; if (dist === 0) { /* lc is the unmatched char */ s.dyn_ltree[lc * 2]/*.Freq*/++; @@ -1176,34 +1169,7 @@ const _tr_tally = (s, dist, lc) => s.dyn_dtree[d_code(dist) * 2]/*.Freq*/++; } -// (!) This block is disabled in zlib defaults, -// don't enable it for binary compatibility - -//#ifdef TRUNCATE_BLOCK -// /* Try to guess if it is profitable to stop the current block here */ -// if ((s.last_lit & 0x1fff) === 0 && s.level > 2) { -// /* Compute an upper bound for the compressed length */ -// out_length = s.last_lit*8; -// in_length = s.strstart - s.block_start; -// -// for (dcode = 0; dcode < D_CODES; dcode++) { -// out_length += s.dyn_dtree[dcode*2]/*.Freq*/ * (5 + extra_dbits[dcode]); -// } -// out_length >>>= 3; -// //Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", -// // s->last_lit, in_length, out_length, -// // 100L - out_length*100L/in_length)); -// if (s.matches < (s.last_lit>>1)/*int /2*/ && out_length < (in_length>>1)/*int /2*/) { -// return true; -// } -// } -//#endif - - return (s.last_lit === s.lit_bufsize - 1); - /* We avoid equality with lit_bufsize because of wraparound at 64K - * on 16 bit machines and because stored blocks are restricted to - * 64K-1 bytes. - */ + return (s.sym_next === s.sym_end); }; module.exports._tr_init = _tr_init;