You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1884 lines
74 KiB
1884 lines
74 KiB
// Deflate.cs |
|
// ------------------------------------------------------------------ |
|
// |
|
// Copyright (c) 2009 Dino Chiesa and Microsoft Corporation. |
|
// All rights reserved. |
|
// |
|
// This code module is part of DotNetZip, a zipfile class library. |
|
// |
|
// ------------------------------------------------------------------ |
|
// |
|
// This code is licensed under the Microsoft Public License. |
|
// See the file License.txt for the license details. |
|
// More info on: http://dotnetzip.codeplex.com |
|
// |
|
// ------------------------------------------------------------------ |
|
// |
|
// last saved (in emacs): |
|
// Time-stamp: <2011-August-03 19:52:15> |
|
// |
|
// ------------------------------------------------------------------ |
|
// |
|
// This module defines logic for handling the Deflate or compression. |
|
// |
|
// This code is based on multiple sources: |
|
// - the original zlib v1.2.3 source, which is Copyright (C) 1995-2005 Jean-loup Gailly. |
|
// - the original jzlib, which is Copyright (c) 2000-2003 ymnk, JCraft,Inc. |
|
// |
|
// However, this code is significantly different from both. |
|
// The object model is not the same, and many of the behaviors are different. |
|
// |
|
// In keeping with the license for these other works, the copyrights for |
|
// jzlib and zlib are here. |
|
// |
|
// ----------------------------------------------------------------------- |
|
// Copyright (c) 2000,2001,2002,2003 ymnk, JCraft,Inc. All rights reserved. |
|
// |
|
// Redistribution and use in source and binary forms, with or without |
|
// modification, are permitted provided that the following conditions are met: |
|
// |
|
// 1. Redistributions of source code must retain the above copyright notice, |
|
// this list of conditions and the following disclaimer. |
|
// |
|
// 2. Redistributions in binary form must reproduce the above copyright |
|
// notice, this list of conditions and the following disclaimer in |
|
// the documentation and/or other materials provided with the distribution. |
|
// |
|
// 3. The names of the authors may not be used to endorse or promote products |
|
// derived from this software without specific prior written permission. |
|
// |
|
// THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, |
|
// INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
|
// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JCRAFT, |
|
// INC. OR ANY CONTRIBUTORS TO THIS SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT, |
|
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, |
|
// OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
|
// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
// |
|
// ----------------------------------------------------------------------- |
|
// |
|
// This program is based on zlib-1.1.3; credit to authors |
|
// Jean-loup Gailly(jloup@gzip.org) and Mark Adler(madler@alumni.caltech.edu) |
|
// and contributors of zlib. |
|
// |
|
// ----------------------------------------------------------------------- |
|
|
|
|
|
using BestHTTP.PlatformSupport.Memory; |
|
using System; |
|
|
|
#pragma warning disable 0675 |
|
|
|
namespace BestHTTP.Decompression.Zlib |
|
{ |
|
|
|
internal enum BlockState |
|
{ |
|
NeedMore = 0, // block not completed, need more input or more output |
|
BlockDone, // block flush performed |
|
FinishStarted, // finish started, need only more output at next deflate |
|
FinishDone // finish done, accept no more input or output |
|
} |
|
|
|
internal enum DeflateFlavor |
|
{ |
|
Store, |
|
Fast, |
|
Slow |
|
} |
|
|
|
internal sealed class DeflateManager |
|
{ |
|
private static readonly int MEM_LEVEL_MAX = 9; |
|
private static readonly int MEM_LEVEL_DEFAULT = 8; |
|
|
|
internal delegate BlockState CompressFunc(FlushType flush); |
|
|
|
internal class Config |
|
{ |
|
// Use a faster search when the previous match is longer than this |
|
internal int GoodLength; // reduce lazy search above this match length |
|
|
|
// Attempt to find a better match only when the current match is |
|
// strictly smaller than this value. This mechanism is used only for |
|
// compression levels >= 4. For levels 1,2,3: MaxLazy is actually |
|
// MaxInsertLength. (See DeflateFast) |
|
|
|
internal int MaxLazy; // do not perform lazy search above this match length |
|
|
|
internal int NiceLength; // quit search above this match length |
|
|
|
// To speed up deflation, hash chains are never searched beyond this |
|
// length. A higher limit improves compression ratio but degrades the speed. |
|
|
|
internal int MaxChainLength; |
|
|
|
internal DeflateFlavor Flavor; |
|
|
|
private Config(int goodLength, int maxLazy, int niceLength, int maxChainLength, DeflateFlavor flavor) |
|
{ |
|
this.GoodLength = goodLength; |
|
this.MaxLazy = maxLazy; |
|
this.NiceLength = niceLength; |
|
this.MaxChainLength = maxChainLength; |
|
this.Flavor = flavor; |
|
} |
|
|
|
public static Config Lookup(CompressionLevel level) |
|
{ |
|
return Table[(int)level]; |
|
} |
|
|
|
|
|
static Config() |
|
{ |
|
Table = new Config[] { |
|
new Config(0, 0, 0, 0, DeflateFlavor.Store), |
|
new Config(4, 4, 8, 4, DeflateFlavor.Fast), |
|
new Config(4, 5, 16, 8, DeflateFlavor.Fast), |
|
new Config(4, 6, 32, 32, DeflateFlavor.Fast), |
|
|
|
new Config(4, 4, 16, 16, DeflateFlavor.Slow), |
|
new Config(8, 16, 32, 32, DeflateFlavor.Slow), |
|
new Config(8, 16, 128, 128, DeflateFlavor.Slow), |
|
new Config(8, 32, 128, 256, DeflateFlavor.Slow), |
|
new Config(32, 128, 258, 1024, DeflateFlavor.Slow), |
|
new Config(32, 258, 258, 4096, DeflateFlavor.Slow), |
|
}; |
|
} |
|
|
|
private static readonly Config[] Table; |
|
} |
|
|
|
|
|
private CompressFunc DeflateFunction; |
|
|
|
private static readonly System.String[] _ErrorMessage = new System.String[] |
|
{ |
|
"need dictionary", |
|
"stream end", |
|
"", |
|
"file error", |
|
"stream error", |
|
"data error", |
|
"insufficient memory", |
|
"buffer error", |
|
"incompatible version", |
|
"" |
|
}; |
|
|
|
// preset dictionary flag in zlib header |
|
private static readonly int PRESET_DICT = 0x20; |
|
|
|
private static readonly int INIT_STATE = 42; |
|
private static readonly int BUSY_STATE = 113; |
|
private static readonly int FINISH_STATE = 666; |
|
|
|
// The deflate compression method |
|
private static readonly int Z_DEFLATED = 8; |
|
|
|
private static readonly int STORED_BLOCK = 0; |
|
private static readonly int STATIC_TREES = 1; |
|
private static readonly int DYN_TREES = 2; |
|
|
|
// The three kinds of block type |
|
private static readonly int Z_BINARY = 0; |
|
private static readonly int Z_ASCII = 1; |
|
private static readonly int Z_UNKNOWN = 2; |
|
|
|
private static readonly int Buf_size = 8 * 2; |
|
|
|
private static readonly int MIN_MATCH = 3; |
|
private static readonly int MAX_MATCH = 258; |
|
|
|
private static readonly int MIN_LOOKAHEAD = (MAX_MATCH + MIN_MATCH + 1); |
|
|
|
private static readonly int HEAP_SIZE = (2 * InternalConstants.L_CODES + 1); |
|
|
|
private static readonly int END_BLOCK = 256; |
|
|
|
internal ZlibCodec _codec; // the zlib encoder/decoder |
|
internal int status; // as the name implies |
|
internal byte[] pending; // output still pending - waiting to be compressed |
|
internal int nextPending; // index of next pending byte to output to the stream |
|
internal int pendingCount; // number of bytes in the pending buffer |
|
|
|
internal sbyte data_type; // UNKNOWN, BINARY or ASCII |
|
internal int last_flush; // value of flush param for previous deflate call |
|
|
|
internal int w_size; // LZ77 window size (32K by default) |
|
internal int w_bits; // log2(w_size) (8..16) |
|
internal int w_mask; // w_size - 1 |
|
|
|
//internal byte[] dictionary; |
|
internal byte[] window; |
|
|
|
// Sliding window. Input bytes are read into the second half of the window, |
|
// and move to the first half later to keep a dictionary of at least wSize |
|
// bytes. With this organization, matches are limited to a distance of |
|
// wSize-MAX_MATCH bytes, but this ensures that IO is always |
|
// performed with a length multiple of the block size. |
|
// |
|
// To do: use the user input buffer as sliding window. |
|
|
|
internal int window_size; |
|
// Actual size of window: 2*wSize, except when the user input buffer |
|
// is directly used as sliding window. |
|
|
|
internal short[] prev; |
|
// Link to older string with same hash index. To limit the size of this |
|
// array to 64K, this link is maintained only for the last 32K strings. |
|
// An index in this array is thus a window index modulo 32K. |
|
|
|
internal short[] head; // Heads of the hash chains or NIL. |
|
|
|
internal int ins_h; // hash index of string to be inserted |
|
internal int hash_size; // number of elements in hash table |
|
internal int hash_bits; // log2(hash_size) |
|
internal int hash_mask; // hash_size-1 |
|
|
|
// Number of bits by which ins_h must be shifted at each input |
|
// step. It must be such that after MIN_MATCH steps, the oldest |
|
// byte no longer takes part in the hash key, that is: |
|
// hash_shift * MIN_MATCH >= hash_bits |
|
internal int hash_shift; |
|
|
|
// Window position at the beginning of the current output block. Gets |
|
// negative when the window is moved backwards. |
|
|
|
internal int block_start; |
|
|
|
Config config; |
|
internal int match_length; // length of best match |
|
internal int prev_match; // previous match |
|
internal int match_available; // set if previous match exists |
|
internal int strstart; // start of string to insert into.....???? |
|
internal int match_start; // start of matching string |
|
internal int lookahead; // number of valid bytes ahead in window |
|
|
|
// Length of the best match at previous step. Matches not greater than this |
|
// are discarded. This is used in the lazy match evaluation. |
|
internal int prev_length; |
|
|
|
// Insert new strings in the hash table only if the match length is not |
|
// greater than this length. This saves time but degrades compression. |
|
// max_insert_length is used only for compression levels <= 3. |
|
|
|
internal CompressionLevel compressionLevel; // compression level (1..9) |
|
internal CompressionStrategy compressionStrategy; // favor or force Huffman coding |
|
|
|
|
|
internal short[] dyn_ltree; // literal and length tree |
|
internal short[] dyn_dtree; // distance tree |
|
internal short[] bl_tree; // Huffman tree for bit lengths |
|
|
|
internal ZTree treeLiterals = new ZTree(); // desc for literal tree |
|
internal ZTree treeDistances = new ZTree(); // desc for distance tree |
|
internal ZTree treeBitLengths = new ZTree(); // desc for bit length tree |
|
|
|
// number of codes at each bit length for an optimal tree |
|
internal short[] bl_count = new short[InternalConstants.MAX_BITS + 1]; |
|
|
|
// heap used to build the Huffman trees |
|
internal int[] heap = new int[2 * InternalConstants.L_CODES + 1]; |
|
|
|
internal int heap_len; // number of elements in the heap |
|
internal int heap_max; // element of largest frequency |
|
|
|
// The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. |
|
// The same heap array is used to build all trees. |
|
|
|
// Depth of each subtree used as tie breaker for trees of equal frequency |
|
internal sbyte[] depth = new sbyte[2 * InternalConstants.L_CODES + 1]; |
|
|
|
internal int _lengthOffset; // index for literals or lengths |
|
|
|
|
|
// Size of match buffer for literals/lengths. There are 4 reasons for |
|
// limiting lit_bufsize to 64K: |
|
// - frequencies can be kept in 16 bit counters |
|
// - if compression is not successful for the first block, all input |
|
// data is still in the window so we can still emit a stored block even |
|
// when input comes from standard input. (This can also be done for |
|
// all blocks if lit_bufsize is not greater than 32K.) |
|
// - if compression is not successful for a file smaller than 64K, we can |
|
// even emit a stored file instead of a stored block (saving 5 bytes). |
|
// This is applicable only for zip (not gzip or zlib). |
|
// - creating new Huffman trees less frequently may not provide fast |
|
// adaptation to changes in the input data statistics. (Take for |
|
// example a binary file with poorly compressible code followed by |
|
// a highly compressible string table.) Smaller buffer sizes give |
|
// fast adaptation but have of course the overhead of transmitting |
|
// trees more frequently. |
|
|
|
internal int lit_bufsize; |
|
|
|
internal int last_lit; // running index in l_buf |
|
|
|
// Buffer for distances. To simplify the code, d_buf and l_buf have |
|
// the same number of elements. To use different lengths, an extra flag |
|
// array would be necessary. |
|
|
|
internal int _distanceOffset; // index into pending; points to distance data?? |
|
|
|
internal int opt_len; // bit length of current block with optimal trees |
|
internal int static_len; // bit length of current block with static trees |
|
internal int matches; // number of string matches in current block |
|
internal int last_eob_len; // bit length of EOB code for last block |
|
|
|
// Output buffer. bits are inserted starting at the bottom (least |
|
// significant bits). |
|
internal short bi_buf; |
|
|
|
// Number of valid bits in bi_buf. All bits above the last valid bit |
|
// are always zero. |
|
internal int bi_valid; |
|
|
|
|
|
internal DeflateManager() |
|
{ |
|
dyn_ltree = new short[HEAP_SIZE * 2]; |
|
dyn_dtree = new short[(2 * InternalConstants.D_CODES + 1) * 2]; // distance tree |
|
bl_tree = new short[(2 * InternalConstants.BL_CODES + 1) * 2]; // Huffman tree for bit lengths |
|
} |
|
|
|
|
|
// lm_init |
|
private void _InitializeLazyMatch() |
|
{ |
|
window_size = 2 * w_size; |
|
|
|
// clear the hash - workitem 9063 |
|
Array.Clear(head, 0, hash_size); |
|
//for (int i = 0; i < hash_size; i++) head[i] = 0; |
|
|
|
config = Config.Lookup(compressionLevel); |
|
SetDeflater(); |
|
|
|
strstart = 0; |
|
block_start = 0; |
|
lookahead = 0; |
|
match_length = prev_length = MIN_MATCH - 1; |
|
match_available = 0; |
|
ins_h = 0; |
|
} |
|
|
|
// Initialize the tree data structures for a new zlib stream. |
|
private void _InitializeTreeData() |
|
{ |
|
treeLiterals.dyn_tree = dyn_ltree; |
|
treeLiterals.staticTree = StaticTree.Literals; |
|
|
|
treeDistances.dyn_tree = dyn_dtree; |
|
treeDistances.staticTree = StaticTree.Distances; |
|
|
|
treeBitLengths.dyn_tree = bl_tree; |
|
treeBitLengths.staticTree = StaticTree.BitLengths; |
|
|
|
bi_buf = 0; |
|
bi_valid = 0; |
|
last_eob_len = 8; // enough lookahead for inflate |
|
|
|
// Initialize the first block of the first file: |
|
_InitializeBlocks(); |
|
} |
|
|
|
internal void _InitializeBlocks() |
|
{ |
|
// Initialize the trees. |
|
for (int i = 0; i < InternalConstants.L_CODES; i++) |
|
dyn_ltree[i * 2] = 0; |
|
for (int i = 0; i < InternalConstants.D_CODES; i++) |
|
dyn_dtree[i * 2] = 0; |
|
for (int i = 0; i < InternalConstants.BL_CODES; i++) |
|
bl_tree[i * 2] = 0; |
|
|
|
dyn_ltree[END_BLOCK * 2] = 1; |
|
opt_len = static_len = 0; |
|
last_lit = matches = 0; |
|
} |
|
|
|
// Restore the heap property by moving down the tree starting at node k, |
|
// exchanging a node with the smallest of its two sons if necessary, stopping |
|
// when the heap property is re-established (each father smaller than its |
|
// two sons). |
|
internal void pqdownheap(short[] tree, int k) |
|
{ |
|
int v = heap[k]; |
|
int j = k << 1; // left son of k |
|
while (j <= heap_len) |
|
{ |
|
// Set j to the smallest of the two sons: |
|
if (j < heap_len && _IsSmaller(tree, heap[j + 1], heap[j], depth)) |
|
{ |
|
j++; |
|
} |
|
// Exit if v is smaller than both sons |
|
if (_IsSmaller(tree, v, heap[j], depth)) |
|
break; |
|
|
|
// Exchange v with the smallest son |
|
heap[k] = heap[j]; k = j; |
|
// And continue down the tree, setting j to the left son of k |
|
j <<= 1; |
|
} |
|
heap[k] = v; |
|
} |
|
|
|
internal static bool _IsSmaller(short[] tree, int n, int m, sbyte[] depth) |
|
{ |
|
short tn2 = tree[n * 2]; |
|
short tm2 = tree[m * 2]; |
|
return (tn2 < tm2 || (tn2 == tm2 && depth[n] <= depth[m])); |
|
} |
|
|
|
|
|
// Scan a literal or distance tree to determine the frequencies of the codes |
|
// in the bit length tree. |
|
internal void scan_tree(short[] tree, int max_code) |
|
{ |
|
int n; // iterates over all tree elements |
|
int prevlen = -1; // last emitted length |
|
int curlen; // length of current code |
|
int nextlen = (int)tree[0 * 2 + 1]; // length of next code |
|
int count = 0; // repeat count of the current code |
|
int max_count = 7; // max repeat count |
|
int min_count = 4; // min repeat count |
|
|
|
if (nextlen == 0) |
|
{ |
|
max_count = 138; min_count = 3; |
|
} |
|
tree[(max_code + 1) * 2 + 1] = (short)0x7fff; // guard //?? |
|
|
|
for (n = 0; n <= max_code; n++) |
|
{ |
|
curlen = nextlen; nextlen = (int)tree[(n + 1) * 2 + 1]; |
|
if (++count < max_count && curlen == nextlen) |
|
{ |
|
continue; |
|
} |
|
else if (count < min_count) |
|
{ |
|
bl_tree[curlen * 2] = (short)(bl_tree[curlen * 2] + count); |
|
} |
|
else if (curlen != 0) |
|
{ |
|
if (curlen != prevlen) |
|
bl_tree[curlen * 2]++; |
|
bl_tree[InternalConstants.REP_3_6 * 2]++; |
|
} |
|
else if (count <= 10) |
|
{ |
|
bl_tree[InternalConstants.REPZ_3_10 * 2]++; |
|
} |
|
else |
|
{ |
|
bl_tree[InternalConstants.REPZ_11_138 * 2]++; |
|
} |
|
count = 0; prevlen = curlen; |
|
if (nextlen == 0) |
|
{ |
|
max_count = 138; min_count = 3; |
|
} |
|
else if (curlen == nextlen) |
|
{ |
|
max_count = 6; min_count = 3; |
|
} |
|
else |
|
{ |
|
max_count = 7; min_count = 4; |
|
} |
|
} |
|
} |
|
|
|
// Construct the Huffman tree for the bit lengths and return the index in |
|
// bl_order of the last bit length code to send. |
|
internal int build_bl_tree() |
|
{ |
|
int max_blindex; // index of last bit length code of non zero freq |
|
|
|
// Determine the bit length frequencies for literal and distance trees |
|
scan_tree(dyn_ltree, treeLiterals.max_code); |
|
scan_tree(dyn_dtree, treeDistances.max_code); |
|
|
|
// Build the bit length tree: |
|
treeBitLengths.build_tree(this); |
|
// opt_len now includes the length of the tree representations, except |
|
// the lengths of the bit lengths codes and the 5+5+4 bits for the counts. |
|
|
|
// Determine the number of bit length codes to send. The pkzip format |
|
// requires that at least 4 bit length codes be sent. (appnote.txt says |
|
// 3 but the actual value used is 4.) |
|
for (max_blindex = InternalConstants.BL_CODES - 1; max_blindex >= 3; max_blindex--) |
|
{ |
|
if (bl_tree[ZTree.bl_order[max_blindex] * 2 + 1] != 0) |
|
break; |
|
} |
|
// Update opt_len to include the bit length tree and counts |
|
opt_len += 3 * (max_blindex + 1) + 5 + 5 + 4; |
|
|
|
return max_blindex; |
|
} |
|
|
|
|
|
// Send the header for a block using dynamic Huffman trees: the counts, the |
|
// lengths of the bit length codes, the literal tree and the distance tree. |
|
// IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. |
|
internal void send_all_trees(int lcodes, int dcodes, int blcodes) |
|
{ |
|
int rank; // index in bl_order |
|
|
|
send_bits(lcodes - 257, 5); // not +255 as stated in appnote.txt |
|
send_bits(dcodes - 1, 5); |
|
send_bits(blcodes - 4, 4); // not -3 as stated in appnote.txt |
|
for (rank = 0; rank < blcodes; rank++) |
|
{ |
|
send_bits(bl_tree[ZTree.bl_order[rank] * 2 + 1], 3); |
|
} |
|
send_tree(dyn_ltree, lcodes - 1); // literal tree |
|
send_tree(dyn_dtree, dcodes - 1); // distance tree |
|
} |
|
|
|
// Send a literal or distance tree in compressed form, using the codes in |
|
// bl_tree. |
|
internal void send_tree(short[] tree, int max_code) |
|
{ |
|
int n; // iterates over all tree elements |
|
int prevlen = -1; // last emitted length |
|
int curlen; // length of current code |
|
int nextlen = tree[0 * 2 + 1]; // length of next code |
|
int count = 0; // repeat count of the current code |
|
int max_count = 7; // max repeat count |
|
int min_count = 4; // min repeat count |
|
|
|
if (nextlen == 0) |
|
{ |
|
max_count = 138; min_count = 3; |
|
} |
|
|
|
for (n = 0; n <= max_code; n++) |
|
{ |
|
curlen = nextlen; nextlen = tree[(n + 1) * 2 + 1]; |
|
if (++count < max_count && curlen == nextlen) |
|
{ |
|
continue; |
|
} |
|
else if (count < min_count) |
|
{ |
|
do |
|
{ |
|
send_code(curlen, bl_tree); |
|
} |
|
while (--count != 0); |
|
} |
|
else if (curlen != 0) |
|
{ |
|
if (curlen != prevlen) |
|
{ |
|
send_code(curlen, bl_tree); count--; |
|
} |
|
send_code(InternalConstants.REP_3_6, bl_tree); |
|
send_bits(count - 3, 2); |
|
} |
|
else if (count <= 10) |
|
{ |
|
send_code(InternalConstants.REPZ_3_10, bl_tree); |
|
send_bits(count - 3, 3); |
|
} |
|
else |
|
{ |
|
send_code(InternalConstants.REPZ_11_138, bl_tree); |
|
send_bits(count - 11, 7); |
|
} |
|
count = 0; prevlen = curlen; |
|
if (nextlen == 0) |
|
{ |
|
max_count = 138; min_count = 3; |
|
} |
|
else if (curlen == nextlen) |
|
{ |
|
max_count = 6; min_count = 3; |
|
} |
|
else |
|
{ |
|
max_count = 7; min_count = 4; |
|
} |
|
} |
|
} |
|
|
|
// Output a block of bytes on the stream. |
|
// IN assertion: there is enough room in pending_buf. |
|
private void put_bytes(byte[] p, int start, int len) |
|
{ |
|
Array.Copy(p, start, pending, pendingCount, len); |
|
pendingCount += len; |
|
} |
|
|
|
#if NOTNEEDED |
|
private void put_byte(byte c) |
|
{ |
|
pending[pendingCount++] = c; |
|
} |
|
internal void put_short(int b) |
|
{ |
|
unchecked |
|
{ |
|
pending[pendingCount++] = (byte)b; |
|
pending[pendingCount++] = (byte)(b >> 8); |
|
} |
|
} |
|
internal void putShortMSB(int b) |
|
{ |
|
unchecked |
|
{ |
|
pending[pendingCount++] = (byte)(b >> 8); |
|
pending[pendingCount++] = (byte)b; |
|
} |
|
} |
|
#endif |
|
|
|
internal void send_code(int c, short[] tree) |
|
{ |
|
int c2 = c * 2; |
|
send_bits((tree[c2] & 0xffff), (tree[c2 + 1] & 0xffff)); |
|
} |
|
|
|
internal void send_bits(int value, int length) |
|
{ |
|
int len = length; |
|
unchecked |
|
{ |
|
if (bi_valid > (int)Buf_size - len) |
|
{ |
|
//int val = value; |
|
// bi_buf |= (val << bi_valid); |
|
|
|
bi_buf |= (short)((value << bi_valid) & 0xffff); |
|
//put_short(bi_buf); |
|
pending[pendingCount++] = (byte)bi_buf; |
|
pending[pendingCount++] = (byte)(bi_buf >> 8); |
|
|
|
|
|
bi_buf = (short)((uint)value >> (Buf_size - bi_valid)); |
|
bi_valid += len - Buf_size; |
|
} |
|
else |
|
{ |
|
// bi_buf |= (value) << bi_valid; |
|
bi_buf |= (short)((value << bi_valid) & 0xffff); |
|
bi_valid += len; |
|
} |
|
} |
|
} |
|
|
|
// Send one empty static block to give enough lookahead for inflate. |
|
// This takes 10 bits, of which 7 may remain in the bit buffer. |
|
// The current inflate code requires 9 bits of lookahead. If the |
|
// last two codes for the previous block (real code plus EOB) were coded |
|
// on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode |
|
// the last real code. In this case we send two empty static blocks instead |
|
// of one. (There are no problems if the previous block is stored or fixed.) |
|
// To simplify the code, we assume the worst case of last real code encoded |
|
// on one bit only. |
|
internal void _tr_align() |
|
{ |
|
send_bits(STATIC_TREES << 1, 3); |
|
send_code(END_BLOCK, StaticTree.lengthAndLiteralsTreeCodes); |
|
|
|
bi_flush(); |
|
|
|
// Of the 10 bits for the empty block, we have already sent |
|
// (10 - bi_valid) bits. The lookahead for the last real code (before |
|
// the EOB of the previous block) was thus at least one plus the length |
|
// of the EOB plus what we have just sent of the empty static block. |
|
if (1 + last_eob_len + 10 - bi_valid < 9) |
|
{ |
|
send_bits(STATIC_TREES << 1, 3); |
|
send_code(END_BLOCK, StaticTree.lengthAndLiteralsTreeCodes); |
|
bi_flush(); |
|
} |
|
last_eob_len = 7; |
|
} |
|
|
|
|
|
// Save the match info and tally the frequency counts. Return true if |
|
// the current block must be flushed. |
|
internal bool _tr_tally(int dist, int lc) |
|
{ |
|
pending[_distanceOffset + last_lit * 2] = unchecked((byte) ( (uint)dist >> 8 ) ); |
|
pending[_distanceOffset + last_lit * 2 + 1] = unchecked((byte)dist); |
|
pending[_lengthOffset + last_lit] = unchecked((byte)lc); |
|
last_lit++; |
|
|
|
if (dist == 0) |
|
{ |
|
// lc is the unmatched char |
|
dyn_ltree[lc * 2]++; |
|
} |
|
else |
|
{ |
|
matches++; |
|
// Here, lc is the match length - MIN_MATCH |
|
dist--; // dist = match distance - 1 |
|
dyn_ltree[(ZTree.LengthCode[lc] + InternalConstants.LITERALS + 1) * 2]++; |
|
dyn_dtree[ZTree.DistanceCode(dist) * 2]++; |
|
} |
|
|
|
if ((last_lit & 0x1fff) == 0 && (int)compressionLevel > 2) |
|
{ |
|
// Compute an upper bound for the compressed length |
|
int out_length = last_lit << 3; |
|
int in_length = strstart - block_start; |
|
int dcode; |
|
for (dcode = 0; dcode < InternalConstants.D_CODES; dcode++) |
|
{ |
|
out_length = (int)(out_length + (int)dyn_dtree[dcode * 2] * (5L + ZTree.ExtraDistanceBits[dcode])); |
|
} |
|
out_length >>= 3; |
|
if ((matches < (last_lit / 2)) && out_length < in_length / 2) |
|
return true; |
|
} |
|
|
|
return (last_lit == lit_bufsize - 1) || (last_lit == lit_bufsize); |
|
// dinoch - wraparound? |
|
// We avoid equality with lit_bufsize because of wraparound at 64K |
|
// on 16 bit machines and because stored blocks are restricted to |
|
// 64K-1 bytes. |
|
} |
|
|
|
|
|
|
|
// Send the block data compressed using the given Huffman trees |
|
internal void send_compressed_block(short[] ltree, short[] dtree) |
|
{ |
|
int distance; // distance of matched string |
|
int lc; // match length or unmatched char (if dist == 0) |
|
int lx = 0; // running index in l_buf |
|
int code; // the code to send |
|
int extra; // number of extra bits to send |
|
|
|
if (last_lit != 0) |
|
{ |
|
do |
|
{ |
|
int ix = _distanceOffset + lx * 2; |
|
distance = ((pending[ix] << 8) & 0xff00) | |
|
(pending[ix + 1] & 0xff); |
|
lc = (pending[_lengthOffset + lx]) & 0xff; |
|
lx++; |
|
|
|
if (distance == 0) |
|
{ |
|
send_code(lc, ltree); // send a literal byte |
|
} |
|
else |
|
{ |
|
// literal or match pair |
|
// Here, lc is the match length - MIN_MATCH |
|
code = ZTree.LengthCode[lc]; |
|
|
|
// send the length code |
|
send_code(code + InternalConstants.LITERALS + 1, ltree); |
|
extra = ZTree.ExtraLengthBits[code]; |
|
if (extra != 0) |
|
{ |
|
// send the extra length bits |
|
lc -= ZTree.LengthBase[code]; |
|
send_bits(lc, extra); |
|
} |
|
distance--; // dist is now the match distance - 1 |
|
code = ZTree.DistanceCode(distance); |
|
|
|
// send the distance code |
|
send_code(code, dtree); |
|
|
|
extra = ZTree.ExtraDistanceBits[code]; |
|
if (extra != 0) |
|
{ |
|
// send the extra distance bits |
|
distance -= ZTree.DistanceBase[code]; |
|
send_bits(distance, extra); |
|
} |
|
} |
|
|
|
// Check that the overlay between pending and d_buf+l_buf is ok: |
|
} |
|
while (lx < last_lit); |
|
} |
|
|
|
send_code(END_BLOCK, ltree); |
|
last_eob_len = ltree[END_BLOCK * 2 + 1]; |
|
} |
|
|
|
|
|
|
|
// Set the data type to ASCII or BINARY, using a crude approximation: |
|
// binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. |
|
// IN assertion: the fields freq of dyn_ltree are set and the total of all |
|
// frequencies does not exceed 64K (to fit in an int on 16 bit machines). |
|
internal void set_data_type() |
|
{ |
|
int n = 0; |
|
int ascii_freq = 0; |
|
int bin_freq = 0; |
|
while (n < 7) |
|
{ |
|
bin_freq += dyn_ltree[n * 2]; n++; |
|
} |
|
while (n < 128) |
|
{ |
|
ascii_freq += dyn_ltree[n * 2]; n++; |
|
} |
|
while (n < InternalConstants.LITERALS) |
|
{ |
|
bin_freq += dyn_ltree[n * 2]; n++; |
|
} |
|
data_type = (sbyte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII); |
|
} |
|
|
|
|
|
|
|
// Flush the bit buffer, keeping at most 7 bits in it. |
|
internal void bi_flush() |
|
{ |
|
if (bi_valid == 16) |
|
{ |
|
pending[pendingCount++] = (byte)bi_buf; |
|
pending[pendingCount++] = (byte)(bi_buf >> 8); |
|
bi_buf = 0; |
|
bi_valid = 0; |
|
} |
|
else if (bi_valid >= 8) |
|
{ |
|
//put_byte((byte)bi_buf); |
|
pending[pendingCount++] = (byte)bi_buf; |
|
bi_buf >>= 8; |
|
bi_valid -= 8; |
|
} |
|
} |
|
|
|
// Flush the bit buffer and align the output on a byte boundary |
|
internal void bi_windup() |
|
{ |
|
if (bi_valid > 8) |
|
{ |
|
pending[pendingCount++] = (byte)bi_buf; |
|
pending[pendingCount++] = (byte)(bi_buf >> 8); |
|
} |
|
else if (bi_valid > 0) |
|
{ |
|
//put_byte((byte)bi_buf); |
|
pending[pendingCount++] = (byte)bi_buf; |
|
} |
|
bi_buf = 0; |
|
bi_valid = 0; |
|
} |
|
|
|
// Copy a stored block, storing first the length and its |
|
// one's complement if requested. |
|
internal void copy_block(int buf, int len, bool header) |
|
{ |
|
bi_windup(); // align on byte boundary |
|
last_eob_len = 8; // enough lookahead for inflate |
|
|
|
if (header) |
|
unchecked |
|
{ |
|
//put_short((short)len); |
|
pending[pendingCount++] = (byte)len; |
|
pending[pendingCount++] = (byte)(len >> 8); |
|
//put_short((short)~len); |
|
pending[pendingCount++] = (byte)~len; |
|
pending[pendingCount++] = (byte)(~len >> 8); |
|
} |
|
|
|
put_bytes(window, buf, len); |
|
} |
|
|
|
internal void flush_block_only(bool eof) |
|
{ |
|
_tr_flush_block(block_start >= 0 ? block_start : -1, strstart - block_start, eof); |
|
block_start = strstart; |
|
_codec.flush_pending(); |
|
} |
|
|
|
// Copy without compression as much as possible from the input stream, return |
|
// the current block state. |
|
// This function does not insert new strings in the dictionary since |
|
// uncompressible data is probably not useful. This function is used |
|
// only for the level=0 compression option. |
|
// NOTE: this function should be optimized to avoid extra copying from |
|
// window to pending_buf. |
|
internal BlockState DeflateNone(FlushType flush) |
|
{ |
|
// Stored blocks are limited to 0xffff bytes, pending is limited |
|
// to pending_buf_size, and each stored block has a 5 byte header: |
|
|
|
int max_block_size = 0xffff; |
|
int max_start; |
|
|
|
if (max_block_size > pending.Length - 5) |
|
{ |
|
max_block_size = pending.Length - 5; |
|
} |
|
|
|
// Copy as much as possible from input to output: |
|
while (true) |
|
{ |
|
// Fill the window as much as possible: |
|
if (lookahead <= 1) |
|
{ |
|
_fillWindow(); |
|
if (lookahead == 0 && flush == FlushType.None) |
|
return BlockState.NeedMore; |
|
if (lookahead == 0) |
|
break; // flush the current block |
|
} |
|
|
|
strstart += lookahead; |
|
lookahead = 0; |
|
|
|
// Emit a stored block if pending will be full: |
|
max_start = block_start + max_block_size; |
|
if (strstart == 0 || strstart >= max_start) |
|
{ |
|
// strstart == 0 is possible when wraparound on 16-bit machine |
|
lookahead = (int)(strstart - max_start); |
|
strstart = (int)max_start; |
|
|
|
flush_block_only(false); |
|
if (_codec.AvailableBytesOut == 0) |
|
return BlockState.NeedMore; |
|
} |
|
|
|
// Flush if we may have to slide, otherwise block_start may become |
|
// negative and the data will be gone: |
|
if (strstart - block_start >= w_size - MIN_LOOKAHEAD) |
|
{ |
|
flush_block_only(false); |
|
if (_codec.AvailableBytesOut == 0) |
|
return BlockState.NeedMore; |
|
} |
|
} |
|
|
|
flush_block_only(flush == FlushType.Finish); |
|
if (_codec.AvailableBytesOut == 0) |
|
return (flush == FlushType.Finish) ? BlockState.FinishStarted : BlockState.NeedMore; |
|
|
|
return flush == FlushType.Finish ? BlockState.FinishDone : BlockState.BlockDone; |
|
} |
|
|
|
|
|
// Send a stored block |
|
internal void _tr_stored_block(int buf, int stored_len, bool eof) |
|
{ |
|
send_bits((STORED_BLOCK << 1) + (eof ? 1 : 0), 3); // send block type |
|
copy_block(buf, stored_len, true); // with header |
|
} |
|
|
|
// Determine the best encoding for the current block: dynamic trees, static |
|
// trees or store, and output the encoded block to the zip file. |
|
internal void _tr_flush_block(int buf, int stored_len, bool eof) |
|
{ |
|
int opt_lenb, static_lenb; // opt_len and static_len in bytes |
|
int max_blindex = 0; // index of last bit length code of non zero freq |
|
|
|
// Build the Huffman trees unless a stored block is forced |
|
if (compressionLevel > 0) |
|
{ |
|
// Check if the file is ascii or binary |
|
if (data_type == Z_UNKNOWN) |
|
set_data_type(); |
|
|
|
// Construct the literal and distance trees |
|
treeLiterals.build_tree(this); |
|
|
|
treeDistances.build_tree(this); |
|
|
|
// At this point, opt_len and static_len are the total bit lengths of |
|
// the compressed block data, excluding the tree representations. |
|
|
|
// Build the bit length tree for the above two trees, and get the index |
|
// in bl_order of the last bit length code to send. |
|
max_blindex = build_bl_tree(); |
|
|
|
// Determine the best encoding. Compute first the block length in bytes |
|
opt_lenb = (opt_len + 3 + 7) >> 3; |
|
static_lenb = (static_len + 3 + 7) >> 3; |
|
|
|
if (static_lenb <= opt_lenb) |
|
opt_lenb = static_lenb; |
|
} |
|
else |
|
{ |
|
opt_lenb = static_lenb = stored_len + 5; // force a stored block |
|
} |
|
|
|
if (stored_len + 4 <= opt_lenb && buf != -1) |
|
{ |
|
// 4: two words for the lengths |
|
// The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. |
|
// Otherwise we can't have processed more than WSIZE input bytes since |
|
// the last block flush, because compression would have been |
|
// successful. If LIT_BUFSIZE <= WSIZE, it is never too late to |
|
// transform a block into a stored block. |
|
_tr_stored_block(buf, stored_len, eof); |
|
} |
|
else if (static_lenb == opt_lenb) |
|
{ |
|
send_bits((STATIC_TREES << 1) + (eof ? 1 : 0), 3); |
|
send_compressed_block(StaticTree.lengthAndLiteralsTreeCodes, StaticTree.distTreeCodes); |
|
} |
|
else |
|
{ |
|
send_bits((DYN_TREES << 1) + (eof ? 1 : 0), 3); |
|
send_all_trees(treeLiterals.max_code + 1, treeDistances.max_code + 1, max_blindex + 1); |
|
send_compressed_block(dyn_ltree, dyn_dtree); |
|
} |
|
|
|
// The above check is made mod 2^32, for files larger than 512 MB |
|
// and uLong implemented on 32 bits. |
|
|
|
_InitializeBlocks(); |
|
|
|
if (eof) |
|
{ |
|
bi_windup(); |
|
} |
|
} |
|
|
|
// Fill the window when the lookahead becomes insufficient. |
|
// Updates strstart and lookahead. |
|
// |
|
// IN assertion: lookahead < MIN_LOOKAHEAD |
|
// OUT assertions: strstart <= window_size-MIN_LOOKAHEAD |
|
// At least one byte has been read, or avail_in == 0; reads are |
|
// performed for at least two bytes (required for the zip translate_eol |
|
// option -- not supported here). |
|
private void _fillWindow() |
|
{ |
|
int n, m; |
|
int p; |
|
int more; // Amount of free space at the end of the window. |
|
|
|
do |
|
{ |
|
more = (window_size - lookahead - strstart); |
|
|
|
// Deal with !@#$% 64K limit: |
|
if (more == 0 && strstart == 0 && lookahead == 0) |
|
{ |
|
more = w_size; |
|
} |
|
else if (more == -1) |
|
{ |
|
// Very unlikely, but possible on 16 bit machine if strstart == 0 |
|
// and lookahead == 1 (input done one byte at time) |
|
more--; |
|
|
|
// If the window is almost full and there is insufficient lookahead, |
|
// move the upper half to the lower one to make room in the upper half. |
|
} |
|
else if (strstart >= w_size + w_size - MIN_LOOKAHEAD) |
|
{ |
|
Array.Copy(window, w_size, window, 0, w_size); |
|
match_start -= w_size; |
|
strstart -= w_size; // we now have strstart >= MAX_DIST |
|
block_start -= w_size; |
|
|
|
// Slide the hash table (could be avoided with 32 bit values |
|
// at the expense of memory usage). We slide even when level == 0 |
|
// to keep the hash table consistent if we switch back to level > 0 |
|
// later. (Using level 0 permanently is not an optimal usage of |
|
// zlib, so we don't care about this pathological case.) |
|
|
|
n = hash_size; |
|
p = n; |
|
do |
|
{ |
|
m = (head[--p] & 0xffff); |
|
head[p] = (short)((m >= w_size) ? (m - w_size) : 0); |
|
} |
|
while (--n != 0); |
|
|
|
n = w_size; |
|
p = n; |
|
do |
|
{ |
|
m = (prev[--p] & 0xffff); |
|
prev[p] = (short)((m >= w_size) ? (m - w_size) : 0); |
|
// If n is not on any hash chain, prev[n] is garbage but |
|
// its value will never be used. |
|
} |
|
while (--n != 0); |
|
more += w_size; |
|
} |
|
|
|
if (_codec.AvailableBytesIn == 0) |
|
return; |
|
|
|
// If there was no sliding: |
|
// strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && |
|
// more == window_size - lookahead - strstart |
|
// => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) |
|
// => more >= window_size - 2*WSIZE + 2 |
|
// In the BIG_MEM or MMAP case (not yet supported), |
|
// window_size == input_size + MIN_LOOKAHEAD && |
|
// strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. |
|
// Otherwise, window_size == 2*WSIZE so more >= 2. |
|
// If there was sliding, more >= WSIZE. So in all cases, more >= 2. |
|
|
|
n = _codec.read_buf(window, strstart + lookahead, more); |
|
lookahead += n; |
|
|
|
// Initialize the hash value now that we have some input: |
|
if (lookahead >= MIN_MATCH) |
|
{ |
|
ins_h = window[strstart] & 0xff; |
|
ins_h = (((ins_h) << hash_shift) ^ (window[strstart + 1] & 0xff)) & hash_mask; |
|
} |
|
// If the whole input has less than MIN_MATCH bytes, ins_h is garbage, |
|
// but this is not important since only literal bytes will be emitted. |
|
} |
|
while (lookahead < MIN_LOOKAHEAD && _codec.AvailableBytesIn != 0); |
|
} |
|
|
|
// Compress as much as possible from the input stream, return the current |
|
// block state. |
|
// This function does not perform lazy evaluation of matches and inserts |
|
// new strings in the dictionary only for unmatched strings or for short |
|
// matches. It is used only for the fast compression options. |
|
internal BlockState DeflateFast(FlushType flush) |
|
{ |
|
// short hash_head = 0; // head of the hash chain |
|
int hash_head = 0; // head of the hash chain |
|
bool bflush; // set if current block must be flushed |
|
|
|
while (true) |
|
{ |
|
// Make sure that we always have enough lookahead, except |
|
// at the end of the input file. We need MAX_MATCH bytes |
|
// for the next match, plus MIN_MATCH bytes to insert the |
|
// string following the next match. |
|
if (lookahead < MIN_LOOKAHEAD) |
|
{ |
|
_fillWindow(); |
|
if (lookahead < MIN_LOOKAHEAD && flush == FlushType.None) |
|
{ |
|
return BlockState.NeedMore; |
|
} |
|
if (lookahead == 0) |
|
break; // flush the current block |
|
} |
|
|
|
// Insert the string window[strstart .. strstart+2] in the |
|
// dictionary, and set hash_head to the head of the hash chain: |
|
if (lookahead >= MIN_MATCH) |
|
{ |
|
ins_h = (((ins_h) << hash_shift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hash_mask; |
|
|
|
// prev[strstart&w_mask]=hash_head=head[ins_h]; |
|
hash_head = (head[ins_h] & 0xffff); |
|
prev[strstart & w_mask] = head[ins_h]; |
|
head[ins_h] = unchecked((short)strstart); |
|
} |
|
|
|
// Find the longest match, discarding those <= prev_length. |
|
// At this point we have always match_length < MIN_MATCH |
|
|
|
if (hash_head != 0L && ((strstart - hash_head) & 0xffff) <= w_size - MIN_LOOKAHEAD) |
|
{ |
|
// To simplify the code, we prevent matches with the string |
|
// of window index 0 (in particular we have to avoid a match |
|
// of the string with itself at the start of the input file). |
|
if (compressionStrategy != CompressionStrategy.HuffmanOnly) |
|
{ |
|
match_length = longest_match(hash_head); |
|
} |
|
// longest_match() sets match_start |
|
} |
|
if (match_length >= MIN_MATCH) |
|
{ |
|
// check_match(strstart, match_start, match_length); |
|
|
|
bflush = _tr_tally(strstart - match_start, match_length - MIN_MATCH); |
|
|
|
lookahead -= match_length; |
|
|
|
// Insert new strings in the hash table only if the match length |
|
// is not too large. This saves time but degrades compression. |
|
if (match_length <= config.MaxLazy && lookahead >= MIN_MATCH) |
|
{ |
|
match_length--; // string at strstart already in hash table |
|
do |
|
{ |
|
strstart++; |
|
|
|
ins_h = ((ins_h << hash_shift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hash_mask; |
|
// prev[strstart&w_mask]=hash_head=head[ins_h]; |
|
hash_head = (head[ins_h] & 0xffff); |
|
prev[strstart & w_mask] = head[ins_h]; |
|
head[ins_h] = unchecked((short)strstart); |
|
|
|
// strstart never exceeds WSIZE-MAX_MATCH, so there are |
|
// always MIN_MATCH bytes ahead. |
|
} |
|
while (--match_length != 0); |
|
strstart++; |
|
} |
|
else |
|
{ |
|
strstart += match_length; |
|
match_length = 0; |
|
ins_h = window[strstart] & 0xff; |
|
|
|
ins_h = (((ins_h) << hash_shift) ^ (window[strstart + 1] & 0xff)) & hash_mask; |
|
// If lookahead < MIN_MATCH, ins_h is garbage, but it does not |
|
// matter since it will be recomputed at next deflate call. |
|
} |
|
} |
|
else |
|
{ |
|
// No match, output a literal byte |
|
|
|
bflush = _tr_tally(0, window[strstart] & 0xff); |
|
lookahead--; |
|
strstart++; |
|
} |
|
if (bflush) |
|
{ |
|
flush_block_only(false); |
|
if (_codec.AvailableBytesOut == 0) |
|
return BlockState.NeedMore; |
|
} |
|
} |
|
|
|
flush_block_only(flush == FlushType.Finish); |
|
if (_codec.AvailableBytesOut == 0) |
|
{ |
|
if (flush == FlushType.Finish) |
|
return BlockState.FinishStarted; |
|
else |
|
return BlockState.NeedMore; |
|
} |
|
return flush == FlushType.Finish ? BlockState.FinishDone : BlockState.BlockDone; |
|
} |
|
|
|
// Same as above, but achieves better compression. We use a lazy |
|
// evaluation for matches: a match is finally adopted only if there is |
|
// no better match at the next window position. |
|
internal BlockState DeflateSlow(FlushType flush) |
|
{ |
|
// short hash_head = 0; // head of hash chain |
|
int hash_head = 0; // head of hash chain |
|
bool bflush; // set if current block must be flushed |
|
|
|
// Process the input block. |
|
while (true) |
|
{ |
|
// Make sure that we always have enough lookahead, except |
|
// at the end of the input file. We need MAX_MATCH bytes |
|
// for the next match, plus MIN_MATCH bytes to insert the |
|
// string following the next match. |
|
|
|
if (lookahead < MIN_LOOKAHEAD) |
|
{ |
|
_fillWindow(); |
|
if (lookahead < MIN_LOOKAHEAD && flush == FlushType.None) |
|
return BlockState.NeedMore; |
|
|
|
if (lookahead == 0) |
|
break; // flush the current block |
|
} |
|
|
|
// Insert the string window[strstart .. strstart+2] in the |
|
// dictionary, and set hash_head to the head of the hash chain: |
|
|
|
if (lookahead >= MIN_MATCH) |
|
{ |
|
ins_h = (((ins_h) << hash_shift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hash_mask; |
|
// prev[strstart&w_mask]=hash_head=head[ins_h]; |
|
hash_head = (head[ins_h] & 0xffff); |
|
prev[strstart & w_mask] = head[ins_h]; |
|
head[ins_h] = unchecked((short)strstart); |
|
} |
|
|
|
// Find the longest match, discarding those <= prev_length. |
|
prev_length = match_length; |
|
prev_match = match_start; |
|
match_length = MIN_MATCH - 1; |
|
|
|
if (hash_head != 0 && prev_length < config.MaxLazy && |
|
((strstart - hash_head) & 0xffff) <= w_size - MIN_LOOKAHEAD) |
|
{ |
|
// To simplify the code, we prevent matches with the string |
|
// of window index 0 (in particular we have to avoid a match |
|
// of the string with itself at the start of the input file). |
|
|
|
if (compressionStrategy != CompressionStrategy.HuffmanOnly) |
|
{ |
|
match_length = longest_match(hash_head); |
|
} |
|
// longest_match() sets match_start |
|
|
|
if (match_length <= 5 && (compressionStrategy == CompressionStrategy.Filtered || |
|
(match_length == MIN_MATCH && strstart - match_start > 4096))) |
|
{ |
|
|
|
// If prev_match is also MIN_MATCH, match_start is garbage |
|
// but we will ignore the current match anyway. |
|
match_length = MIN_MATCH - 1; |
|
} |
|
} |
|
|
|
// If there was a match at the previous step and the current |
|
// match is not better, output the previous match: |
|
if (prev_length >= MIN_MATCH && match_length <= prev_length) |
|
{ |
|
int max_insert = strstart + lookahead - MIN_MATCH; |
|
// Do not insert strings in hash table beyond this. |
|
|
|
// check_match(strstart-1, prev_match, prev_length); |
|
|
|
bflush = _tr_tally(strstart - 1 - prev_match, prev_length - MIN_MATCH); |
|
|
|
// Insert in hash table all strings up to the end of the match. |
|
// strstart-1 and strstart are already inserted. If there is not |
|
// enough lookahead, the last two strings are not inserted in |
|
// the hash table. |
|
lookahead -= (prev_length - 1); |
|
prev_length -= 2; |
|
do |
|
{ |
|
if (++strstart <= max_insert) |
|
{ |
|
ins_h = (((ins_h) << hash_shift) ^ (window[(strstart) + (MIN_MATCH - 1)] & 0xff)) & hash_mask; |
|
//prev[strstart&w_mask]=hash_head=head[ins_h]; |
|
hash_head = (head[ins_h] & 0xffff); |
|
prev[strstart & w_mask] = head[ins_h]; |
|
head[ins_h] = unchecked((short)strstart); |
|
} |
|
} |
|
while (--prev_length != 0); |
|
match_available = 0; |
|
match_length = MIN_MATCH - 1; |
|
strstart++; |
|
|
|
if (bflush) |
|
{ |
|
flush_block_only(false); |
|
if (_codec.AvailableBytesOut == 0) |
|
return BlockState.NeedMore; |
|
} |
|
} |
|
else if (match_available != 0) |
|
{ |
|
|
|
// If there was no match at the previous position, output a |
|
// single literal. If there was a match but the current match |
|
// is longer, truncate the previous match to a single literal. |
|
|
|
bflush = _tr_tally(0, window[strstart - 1] & 0xff); |
|
|
|
if (bflush) |
|
{ |
|
flush_block_only(false); |
|
} |
|
strstart++; |
|
lookahead--; |
|
if (_codec.AvailableBytesOut == 0) |
|
return BlockState.NeedMore; |
|
} |
|
else |
|
{ |
|
// There is no previous match to compare with, wait for |
|
// the next step to decide. |
|
|
|
match_available = 1; |
|
strstart++; |
|
lookahead--; |
|
} |
|
} |
|
|
|
if (match_available != 0) |
|
{ |
|
bflush = _tr_tally(0, window[strstart - 1] & 0xff); |
|
match_available = 0; |
|
} |
|
flush_block_only(flush == FlushType.Finish); |
|
|
|
if (_codec.AvailableBytesOut == 0) |
|
{ |
|
if (flush == FlushType.Finish) |
|
return BlockState.FinishStarted; |
|
else |
|
return BlockState.NeedMore; |
|
} |
|
|
|
return flush == FlushType.Finish ? BlockState.FinishDone : BlockState.BlockDone; |
|
} |
|
|
|
|
|
internal int longest_match(int cur_match) |
|
{ |
|
int chain_length = config.MaxChainLength; // max hash chain length |
|
int scan = strstart; // current string |
|
int match; // matched string |
|
int len; // length of current match |
|
int best_len = prev_length; // best match length so far |
|
int limit = strstart > (w_size - MIN_LOOKAHEAD) ? strstart - (w_size - MIN_LOOKAHEAD) : 0; |
|
|
|
int niceLength = config.NiceLength; |
|
|
|
// Stop when cur_match becomes <= limit. To simplify the code, |
|
// we prevent matches with the string of window index 0. |
|
|
|
int wmask = w_mask; |
|
|
|
int strend = strstart + MAX_MATCH; |
|
byte scan_end1 = window[scan + best_len - 1]; |
|
byte scan_end = window[scan + best_len]; |
|
|
|
// The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. |
|
// It is easy to get rid of this optimization if necessary. |
|
|
|
// Do not waste too much time if we already have a good match: |
|
if (prev_length >= config.GoodLength) |
|
{ |
|
chain_length >>= 2; |
|
} |
|
|
|
// Do not look for matches beyond the end of the input. This is necessary |
|
// to make deflate deterministic. |
|
if (niceLength > lookahead) |
|
niceLength = lookahead; |
|
|
|
do |
|
{ |
|
match = cur_match; |
|
|
|
// Skip to next match if the match length cannot increase |
|
// or if the match length is less than 2: |
|
if (window[match + best_len] != scan_end || |
|
window[match + best_len - 1] != scan_end1 || |
|
window[match] != window[scan] || |
|
window[++match] != window[scan + 1]) |
|
continue; |
|
|
|
// The check at best_len-1 can be removed because it will be made |
|
// again later. (This heuristic is not always a win.) |
|
// It is not necessary to compare scan[2] and match[2] since they |
|
// are always equal when the other bytes match, given that |
|
// the hash keys are equal and that HASH_BITS >= 8. |
|
scan += 2; match++; |
|
|
|
// We check for insufficient lookahead only every 8th comparison; |
|
// the 256th check will be made at strstart+258. |
|
do |
|
{ |
|
} |
|
while (window[++scan] == window[++match] && |
|
window[++scan] == window[++match] && |
|
window[++scan] == window[++match] && |
|
window[++scan] == window[++match] && |
|
window[++scan] == window[++match] && |
|
window[++scan] == window[++match] && |
|
window[++scan] == window[++match] && |
|
window[++scan] == window[++match] && scan < strend); |
|
|
|
len = MAX_MATCH - (int)(strend - scan); |
|
scan = strend - MAX_MATCH; |
|
|
|
if (len > best_len) |
|
{ |
|
match_start = cur_match; |
|
best_len = len; |
|
if (len >= niceLength) |
|
break; |
|
scan_end1 = window[scan + best_len - 1]; |
|
scan_end = window[scan + best_len]; |
|
} |
|
} |
|
while ((cur_match = (prev[cur_match & wmask] & 0xffff)) > limit && --chain_length != 0); |
|
|
|
if (best_len <= lookahead) |
|
return best_len; |
|
return lookahead; |
|
} |
|
|
|
|
|
private bool Rfc1950BytesEmitted = false; |
|
private bool _WantRfc1950HeaderBytes = true; |
|
internal bool WantRfc1950HeaderBytes |
|
{ |
|
get { return _WantRfc1950HeaderBytes; } |
|
set { _WantRfc1950HeaderBytes = value; } |
|
} |
|
|
|
|
|
internal int Initialize(ZlibCodec codec, CompressionLevel level) |
|
{ |
|
return Initialize(codec, level, ZlibConstants.WindowBitsMax); |
|
} |
|
|
|
internal int Initialize(ZlibCodec codec, CompressionLevel level, int bits) |
|
{ |
|
return Initialize(codec, level, bits, MEM_LEVEL_DEFAULT, CompressionStrategy.Default); |
|
} |
|
|
|
internal int Initialize(ZlibCodec codec, CompressionLevel level, int bits, CompressionStrategy compressionStrategy) |
|
{ |
|
return Initialize(codec, level, bits, MEM_LEVEL_DEFAULT, compressionStrategy); |
|
} |
|
|
|
internal int Initialize(ZlibCodec codec, CompressionLevel level, int windowBits, int memLevel, CompressionStrategy strategy) |
|
{ |
|
_codec = codec; |
|
_codec.Message = null; |
|
|
|
// validation |
|
if (windowBits < 9 || windowBits > 15) |
|
throw new ZlibException("windowBits must be in the range 9..15."); |
|
|
|
if (memLevel < 1 || memLevel > MEM_LEVEL_MAX) |
|
throw new ZlibException(String.Format("memLevel must be in the range 1.. {0}", MEM_LEVEL_MAX)); |
|
|
|
_codec.dstate = this; |
|
|
|
w_bits = windowBits; |
|
w_size = 1 << w_bits; |
|
w_mask = w_size - 1; |
|
|
|
hash_bits = memLevel + 7; |
|
hash_size = 1 << hash_bits; |
|
hash_mask = hash_size - 1; |
|
hash_shift = ((hash_bits + MIN_MATCH - 1) / MIN_MATCH); |
|
|
|
window = BufferPool.Get(w_size * 2, false); |
|
prev = new short[w_size]; |
|
head = new short[hash_size]; |
|
|
|
// for memLevel==8, this will be 16384, 16k |
|
lit_bufsize = 1 << (memLevel + 6); |
|
|
|
// Use a single array as the buffer for data pending compression, |
|
// the output distance codes, and the output length codes (aka tree). |
|
// orig comment: This works just fine since the average |
|
// output size for (length,distance) codes is <= 24 bits. |
|
pending = BufferPool.Get(lit_bufsize * 4, false); |
|
_distanceOffset = lit_bufsize; |
|
_lengthOffset = (1 + 2) * lit_bufsize; |
|
|
|
// So, for memLevel 8, the length of the pending buffer is 65536. 64k. |
|
// The first 16k are pending bytes. |
|
// The middle slice, of 32k, is used for distance codes. |
|
// The final 16k are length codes. |
|
|
|
this.compressionLevel = level; |
|
this.compressionStrategy = strategy; |
|
|
|
Reset(); |
|
return ZlibConstants.Z_OK; |
|
} |
|
|
|
|
|
internal void Reset() |
|
{ |
|
_codec.TotalBytesIn = _codec.TotalBytesOut = 0; |
|
_codec.Message = null; |
|
//strm.data_type = Z_UNKNOWN; |
|
|
|
pendingCount = 0; |
|
nextPending = 0; |
|
|
|
Rfc1950BytesEmitted = false; |
|
|
|
status = (WantRfc1950HeaderBytes) ? INIT_STATE : BUSY_STATE; |
|
_codec._Adler32 = Adler.Adler32(0, null, 0, 0); |
|
|
|
last_flush = (int)FlushType.None; |
|
|
|
_InitializeTreeData(); |
|
_InitializeLazyMatch(); |
|
} |
|
|
|
|
|
internal int End() |
|
{ |
|
if (status != INIT_STATE && status != BUSY_STATE && status != FINISH_STATE) |
|
{ |
|
return ZlibConstants.Z_STREAM_ERROR; |
|
} |
|
// Deallocate in reverse order of allocations: |
|
BufferPool.Release(pending); |
|
pending = null; |
|
head = null; |
|
prev = null; |
|
BufferPool.Release(window); |
|
window = null; |
|
// free |
|
// dstate=null; |
|
return status == BUSY_STATE ? ZlibConstants.Z_DATA_ERROR : ZlibConstants.Z_OK; |
|
} |
|
|
|
|
|
private void SetDeflater() |
|
{ |
|
switch (config.Flavor) |
|
{ |
|
case DeflateFlavor.Store: |
|
DeflateFunction = DeflateNone; |
|
break; |
|
case DeflateFlavor.Fast: |
|
DeflateFunction = DeflateFast; |
|
break; |
|
case DeflateFlavor.Slow: |
|
DeflateFunction = DeflateSlow; |
|
break; |
|
} |
|
} |
|
|
|
|
|
internal int SetParams(CompressionLevel level, CompressionStrategy strategy) |
|
{ |
|
int result = ZlibConstants.Z_OK; |
|
|
|
if (compressionLevel != level) |
|
{ |
|
Config newConfig = Config.Lookup(level); |
|
|
|
// change in the deflate flavor (Fast vs slow vs none)? |
|
if (newConfig.Flavor != config.Flavor && _codec.TotalBytesIn != 0) |
|
{ |
|
// Flush the last buffer: |
|
result = _codec.Deflate(FlushType.Partial); |
|
} |
|
|
|
compressionLevel = level; |
|
config = newConfig; |
|
SetDeflater(); |
|
} |
|
|
|
// no need to flush with change in strategy? Really? |
|
compressionStrategy = strategy; |
|
|
|
return result; |
|
} |
|
|
|
|
|
internal int SetDictionary(byte[] dictionary) |
|
{ |
|
int length = dictionary.Length; |
|
int index = 0; |
|
|
|
if (dictionary == null || status != INIT_STATE) |
|
throw new ZlibException("Stream error."); |
|
|
|
_codec._Adler32 = Adler.Adler32(_codec._Adler32, dictionary, 0, dictionary.Length); |
|
|
|
if (length < MIN_MATCH) |
|
return ZlibConstants.Z_OK; |
|
if (length > w_size - MIN_LOOKAHEAD) |
|
{ |
|
length = w_size - MIN_LOOKAHEAD; |
|
index = dictionary.Length - length; // use the tail of the dictionary |
|
} |
|
Array.Copy(dictionary, index, window, 0, length); |
|
strstart = length; |
|
block_start = length; |
|
|
|
// Insert all strings in the hash table (except for the last two bytes). |
|
// s->lookahead stays null, so s->ins_h will be recomputed at the next |
|
// call of fill_window. |
|
|
|
ins_h = window[0] & 0xff; |
|
ins_h = (((ins_h) << hash_shift) ^ (window[1] & 0xff)) & hash_mask; |
|
|
|
for (int n = 0; n <= length - MIN_MATCH; n++) |
|
{ |
|
ins_h = (((ins_h) << hash_shift) ^ (window[(n) + (MIN_MATCH - 1)] & 0xff)) & hash_mask; |
|
prev[n & w_mask] = head[ins_h]; |
|
head[ins_h] = (short)n; |
|
} |
|
return ZlibConstants.Z_OK; |
|
} |
|
|
|
|
|
|
|
internal int Deflate(FlushType flush) |
|
{ |
|
int old_flush; |
|
|
|
if (_codec.OutputBuffer == null || |
|
(_codec.InputBuffer == null && _codec.AvailableBytesIn != 0) || |
|
(status == FINISH_STATE && flush != FlushType.Finish)) |
|
{ |
|
_codec.Message = _ErrorMessage[ZlibConstants.Z_NEED_DICT - (ZlibConstants.Z_STREAM_ERROR)]; |
|
throw new ZlibException(String.Format("Something is fishy. [{0}]", _codec.Message)); |
|
} |
|
if (_codec.AvailableBytesOut == 0) |
|
{ |
|
_codec.Message = _ErrorMessage[ZlibConstants.Z_NEED_DICT - (ZlibConstants.Z_BUF_ERROR)]; |
|
throw new ZlibException("OutputBuffer is full (AvailableBytesOut == 0)"); |
|
} |
|
|
|
old_flush = last_flush; |
|
last_flush = (int)flush; |
|
|
|
// Write the zlib (rfc1950) header bytes |
|
if (status == INIT_STATE) |
|
{ |
|
int header = (Z_DEFLATED + ((w_bits - 8) << 4)) << 8; |
|
int level_flags = (((int)compressionLevel - 1) & 0xff) >> 1; |
|
|
|
if (level_flags > 3) |
|
level_flags = 3; |
|
header |= (level_flags << 6); |
|
if (strstart != 0) |
|
header |= PRESET_DICT; |
|
header += 31 - (header % 31); |
|
|
|
status = BUSY_STATE; |
|
//putShortMSB(header); |
|
unchecked |
|
{ |
|
pending[pendingCount++] = (byte)(header >> 8); |
|
pending[pendingCount++] = (byte)header; |
|
} |
|
// Save the adler32 of the preset dictionary: |
|
if (strstart != 0) |
|
{ |
|
pending[pendingCount++] = (byte)((_codec._Adler32 & 0xFF000000) >> 24); |
|
pending[pendingCount++] = (byte)((_codec._Adler32 & 0x00FF0000) >> 16); |
|
pending[pendingCount++] = (byte)((_codec._Adler32 & 0x0000FF00) >> 8); |
|
pending[pendingCount++] = (byte)(_codec._Adler32 & 0x000000FF); |
|
} |
|
_codec._Adler32 = Adler.Adler32(0, null, 0, 0); |
|
} |
|
|
|
// Flush as much pending output as possible |
|
if (pendingCount != 0) |
|
{ |
|
_codec.flush_pending(); |
|
if (_codec.AvailableBytesOut == 0) |
|
{ |
|
//System.out.println(" avail_out==0"); |
|
// Since avail_out is 0, deflate will be called again with |
|
// more output space, but possibly with both pending and |
|
// avail_in equal to zero. There won't be anything to do, |
|
// but this is not an error situation so make sure we |
|
// return OK instead of BUF_ERROR at next call of deflate: |
|
last_flush = -1; |
|
return ZlibConstants.Z_OK; |
|
} |
|
|
|
// Make sure there is something to do and avoid duplicate consecutive |
|
// flushes. For repeated and useless calls with Z_FINISH, we keep |
|
// returning Z_STREAM_END instead of Z_BUFF_ERROR. |
|
} |
|
else if (_codec.AvailableBytesIn == 0 && |
|
(int)flush <= old_flush && |
|
flush != FlushType.Finish) |
|
{ |
|
// workitem 8557 |
|
// |
|
// Not sure why this needs to be an error. pendingCount == 0, which |
|
// means there's nothing to deflate. And the caller has not asked |
|
// for a FlushType.Finish, but... that seems very non-fatal. We |
|
// can just say "OK" and do nothing. |
|
|
|
// _codec.Message = z_errmsg[ZlibConstants.Z_NEED_DICT - (ZlibConstants.Z_BUF_ERROR)]; |
|
// throw new ZlibException("AvailableBytesIn == 0 && flush<=old_flush && flush != FlushType.Finish"); |
|
|
|
return ZlibConstants.Z_OK; |
|
} |
|
|
|
// User must not provide more input after the first FINISH: |
|
if (status == FINISH_STATE && _codec.AvailableBytesIn != 0) |
|
{ |
|
_codec.Message = _ErrorMessage[ZlibConstants.Z_NEED_DICT - (ZlibConstants.Z_BUF_ERROR)]; |
|
throw new ZlibException("status == FINISH_STATE && _codec.AvailableBytesIn != 0"); |
|
} |
|
|
|
// Start a new block or continue the current one. |
|
if (_codec.AvailableBytesIn != 0 || lookahead != 0 || (flush != FlushType.None && status != FINISH_STATE)) |
|
{ |
|
BlockState bstate = DeflateFunction(flush); |
|
|
|
if (bstate == BlockState.FinishStarted || bstate == BlockState.FinishDone) |
|
{ |
|
status = FINISH_STATE; |
|
} |
|
if (bstate == BlockState.NeedMore || bstate == BlockState.FinishStarted) |
|
{ |
|
if (_codec.AvailableBytesOut == 0) |
|
{ |
|
last_flush = -1; // avoid BUF_ERROR next call, see above |
|
} |
|
return ZlibConstants.Z_OK; |
|
// If flush != Z_NO_FLUSH && avail_out == 0, the next call |
|
// of deflate should use the same flush parameter to make sure |
|
// that the flush is complete. So we don't have to output an |
|
// empty block here, this will be done at next call. This also |
|
// ensures that for a very small output buffer, we emit at most |
|
// one empty block. |
|
} |
|
|
|
if (bstate == BlockState.BlockDone) |
|
{ |
|
if (flush == FlushType.Partial) |
|
{ |
|
_tr_align(); |
|
} |
|
else |
|
{ |
|
// FlushType.Full or FlushType.Sync |
|
_tr_stored_block(0, 0, false); |
|
// For a full flush, this empty block will be recognized |
|
// as a special marker by inflate_sync(). |
|
if (flush == FlushType.Full) |
|
{ |
|
// clear hash (forget the history) |
|
for (int i = 0; i < hash_size; i++) |
|
head[i] = 0; |
|
} |
|
} |
|
_codec.flush_pending(); |
|
if (_codec.AvailableBytesOut == 0) |
|
{ |
|
last_flush = -1; // avoid BUF_ERROR at next call, see above |
|
return ZlibConstants.Z_OK; |
|
} |
|
} |
|
} |
|
|
|
if (flush != FlushType.Finish) |
|
return ZlibConstants.Z_OK; |
|
|
|
if (!WantRfc1950HeaderBytes || Rfc1950BytesEmitted) |
|
return ZlibConstants.Z_STREAM_END; |
|
|
|
// Write the zlib trailer (adler32) |
|
pending[pendingCount++] = (byte)((_codec._Adler32 & 0xFF000000) >> 24); |
|
pending[pendingCount++] = (byte)((_codec._Adler32 & 0x00FF0000) >> 16); |
|
pending[pendingCount++] = (byte)((_codec._Adler32 & 0x0000FF00) >> 8); |
|
pending[pendingCount++] = (byte)(_codec._Adler32 & 0x000000FF); |
|
//putShortMSB((int)(SharedUtils.URShift(_codec._Adler32, 16))); |
|
//putShortMSB((int)(_codec._Adler32 & 0xffff)); |
|
|
|
_codec.flush_pending(); |
|
|
|
// If avail_out is zero, the application will call deflate again |
|
// to flush the rest. |
|
|
|
Rfc1950BytesEmitted = true; // write the trailer only once! |
|
|
|
return pendingCount != 0 ? ZlibConstants.Z_OK : ZlibConstants.Z_STREAM_END; |
|
} |
|
|
|
} |
|
} |