1e89516f0SMike Frysinger /* inffast.c -- fast decoding
2e89516f0SMike Frysinger * Copyright (C) 1995-2004 Mark Adler
3e89516f0SMike Frysinger * For conditions of distribution and use, see copyright notice in zlib.h
4e89516f0SMike Frysinger */
5e89516f0SMike Frysinger
6*a187559eSBin Meng /* U-Boot: we already included these
7e89516f0SMike Frysinger #include "zutil.h"
8e89516f0SMike Frysinger #include "inftrees.h"
9e89516f0SMike Frysinger #include "inflate.h"
10e89516f0SMike Frysinger #include "inffast.h"
11e89516f0SMike Frysinger */
12e89516f0SMike Frysinger
13e89516f0SMike Frysinger #ifndef ASMINF
14e89516f0SMike Frysinger
15e89516f0SMike Frysinger /* Allow machine dependent optimization for post-increment or pre-increment.
16e89516f0SMike Frysinger Based on testing to date,
17e89516f0SMike Frysinger Pre-increment preferred for:
18e89516f0SMike Frysinger - PowerPC G3 (Adler)
19e89516f0SMike Frysinger - MIPS R5000 (Randers-Pehrson)
20e89516f0SMike Frysinger Post-increment preferred for:
21e89516f0SMike Frysinger - none
22e89516f0SMike Frysinger No measurable difference:
23e89516f0SMike Frysinger - Pentium III (Anderson)
24e89516f0SMike Frysinger - M68060 (Nikl)
25e89516f0SMike Frysinger */
26e89516f0SMike Frysinger #ifdef POSTINC
27e89516f0SMike Frysinger # define OFF 0
28e89516f0SMike Frysinger # define PUP(a) *(a)++
29e89516f0SMike Frysinger #else
30e89516f0SMike Frysinger # define OFF 1
31e89516f0SMike Frysinger # define PUP(a) *++(a)
32e89516f0SMike Frysinger #endif
33e89516f0SMike Frysinger
34e89516f0SMike Frysinger /*
35e89516f0SMike Frysinger Decode literal, length, and distance codes and write out the resulting
36e89516f0SMike Frysinger literal and match bytes until either not enough input or output is
37e89516f0SMike Frysinger available, an end-of-block is encountered, or a data error is encountered.
38e89516f0SMike Frysinger When large enough input and output buffers are supplied to inflate(), for
39e89516f0SMike Frysinger example, a 16K input buffer and a 64K output buffer, more than 95% of the
40e89516f0SMike Frysinger inflate execution time is spent in this routine.
41e89516f0SMike Frysinger
42e89516f0SMike Frysinger Entry assumptions:
43e89516f0SMike Frysinger
44e89516f0SMike Frysinger state->mode == LEN
45e89516f0SMike Frysinger strm->avail_in >= 6
46e89516f0SMike Frysinger strm->avail_out >= 258
47e89516f0SMike Frysinger start >= strm->avail_out
48e89516f0SMike Frysinger state->bits < 8
49e89516f0SMike Frysinger
50e89516f0SMike Frysinger On return, state->mode is one of:
51e89516f0SMike Frysinger
52e89516f0SMike Frysinger LEN -- ran out of enough output space or enough available input
53e89516f0SMike Frysinger TYPE -- reached end of block code, inflate() to interpret next block
54e89516f0SMike Frysinger BAD -- error in block data
55e89516f0SMike Frysinger
56e89516f0SMike Frysinger Notes:
57e89516f0SMike Frysinger
58e89516f0SMike Frysinger - The maximum input bits used by a length/distance pair is 15 bits for the
59e89516f0SMike Frysinger length code, 5 bits for the length extra, 15 bits for the distance code,
60e89516f0SMike Frysinger and 13 bits for the distance extra. This totals 48 bits, or six bytes.
61e89516f0SMike Frysinger Therefore if strm->avail_in >= 6, then there is enough input to avoid
62e89516f0SMike Frysinger checking for available input while decoding.
63e89516f0SMike Frysinger
64e89516f0SMike Frysinger - The maximum bytes that a single length/distance pair can output is 258
65e89516f0SMike Frysinger bytes, which is the maximum length that can be coded. inflate_fast()
66e89516f0SMike Frysinger requires strm->avail_out >= 258 for each loop to avoid checking for
67e89516f0SMike Frysinger output space.
68e89516f0SMike Frysinger */
inflate_fast(z_streamp strm,unsigned start)69ee820b5eSKim Phillips void inflate_fast(z_streamp strm, unsigned start)
70ee820b5eSKim Phillips /* start: inflate()'s starting value for strm->avail_out */
71e89516f0SMike Frysinger {
72e89516f0SMike Frysinger struct inflate_state FAR *state;
73e89516f0SMike Frysinger unsigned char FAR *in; /* local strm->next_in */
74e89516f0SMike Frysinger unsigned char FAR *last; /* while in < last, enough input available */
75e89516f0SMike Frysinger unsigned char FAR *out; /* local strm->next_out */
76e89516f0SMike Frysinger unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
77e89516f0SMike Frysinger unsigned char FAR *end; /* while out < end, enough space available */
78e89516f0SMike Frysinger #ifdef INFLATE_STRICT
79e89516f0SMike Frysinger unsigned dmax; /* maximum distance from zlib header */
80e89516f0SMike Frysinger #endif
81e89516f0SMike Frysinger unsigned wsize; /* window size or zero if not using window */
82e89516f0SMike Frysinger unsigned whave; /* valid bytes in the window */
83e89516f0SMike Frysinger unsigned write; /* window write index */
84e89516f0SMike Frysinger unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
85e89516f0SMike Frysinger unsigned long hold; /* local strm->hold */
86e89516f0SMike Frysinger unsigned bits; /* local strm->bits */
87e89516f0SMike Frysinger code const FAR *lcode; /* local strm->lencode */
88e89516f0SMike Frysinger code const FAR *dcode; /* local strm->distcode */
89e89516f0SMike Frysinger unsigned lmask; /* mask for first level of length codes */
90e89516f0SMike Frysinger unsigned dmask; /* mask for first level of distance codes */
91e89516f0SMike Frysinger code this; /* retrieved table entry */
92e89516f0SMike Frysinger unsigned op; /* code bits, operation, extra bits, or */
93e89516f0SMike Frysinger /* window position, window bytes to copy */
94e89516f0SMike Frysinger unsigned len; /* match length, unused bytes */
95e89516f0SMike Frysinger unsigned dist; /* match distance */
96e89516f0SMike Frysinger unsigned char FAR *from; /* where to copy match from */
97e89516f0SMike Frysinger
98e89516f0SMike Frysinger /* copy state to local variables */
99e89516f0SMike Frysinger state = (struct inflate_state FAR *)strm->state;
100e89516f0SMike Frysinger in = strm->next_in - OFF;
101e89516f0SMike Frysinger last = in + (strm->avail_in - 5);
10292faa8b1SAnatolij Gustschin if (in > last && strm->avail_in > 5) {
10392faa8b1SAnatolij Gustschin /*
10492faa8b1SAnatolij Gustschin * overflow detected, limit strm->avail_in to the
10592faa8b1SAnatolij Gustschin * max. possible size and recalculate last
10692faa8b1SAnatolij Gustschin */
10741d68b32SSimon Glass strm->avail_in = 0xffffffff - (uintptr_t)in;
10892faa8b1SAnatolij Gustschin last = in + (strm->avail_in - 5);
10992faa8b1SAnatolij Gustschin }
110e89516f0SMike Frysinger out = strm->next_out - OFF;
111e89516f0SMike Frysinger beg = out - (start - strm->avail_out);
112e89516f0SMike Frysinger end = out + (strm->avail_out - 257);
113e89516f0SMike Frysinger #ifdef INFLATE_STRICT
114e89516f0SMike Frysinger dmax = state->dmax;
115e89516f0SMike Frysinger #endif
116e89516f0SMike Frysinger wsize = state->wsize;
117e89516f0SMike Frysinger whave = state->whave;
118e89516f0SMike Frysinger write = state->write;
119e89516f0SMike Frysinger window = state->window;
120e89516f0SMike Frysinger hold = state->hold;
121e89516f0SMike Frysinger bits = state->bits;
122e89516f0SMike Frysinger lcode = state->lencode;
123e89516f0SMike Frysinger dcode = state->distcode;
124e89516f0SMike Frysinger lmask = (1U << state->lenbits) - 1;
125e89516f0SMike Frysinger dmask = (1U << state->distbits) - 1;
126e89516f0SMike Frysinger
127e89516f0SMike Frysinger /* decode literals and length/distances until end-of-block or not enough
128e89516f0SMike Frysinger input data or output space */
129e89516f0SMike Frysinger do {
130e89516f0SMike Frysinger if (bits < 15) {
131e89516f0SMike Frysinger hold += (unsigned long)(PUP(in)) << bits;
132e89516f0SMike Frysinger bits += 8;
133e89516f0SMike Frysinger hold += (unsigned long)(PUP(in)) << bits;
134e89516f0SMike Frysinger bits += 8;
135e89516f0SMike Frysinger }
136e89516f0SMike Frysinger this = lcode[hold & lmask];
137e89516f0SMike Frysinger dolen:
138e89516f0SMike Frysinger op = (unsigned)(this.bits);
139e89516f0SMike Frysinger hold >>= op;
140e89516f0SMike Frysinger bits -= op;
141e89516f0SMike Frysinger op = (unsigned)(this.op);
142e89516f0SMike Frysinger if (op == 0) { /* literal */
143e89516f0SMike Frysinger Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
144e89516f0SMike Frysinger "inflate: literal '%c'\n" :
145e89516f0SMike Frysinger "inflate: literal 0x%02x\n", this.val));
146e89516f0SMike Frysinger PUP(out) = (unsigned char)(this.val);
147e89516f0SMike Frysinger }
148e89516f0SMike Frysinger else if (op & 16) { /* length base */
149e89516f0SMike Frysinger len = (unsigned)(this.val);
150e89516f0SMike Frysinger op &= 15; /* number of extra bits */
151e89516f0SMike Frysinger if (op) {
152e89516f0SMike Frysinger if (bits < op) {
153e89516f0SMike Frysinger hold += (unsigned long)(PUP(in)) << bits;
154e89516f0SMike Frysinger bits += 8;
155e89516f0SMike Frysinger }
156e89516f0SMike Frysinger len += (unsigned)hold & ((1U << op) - 1);
157e89516f0SMike Frysinger hold >>= op;
158e89516f0SMike Frysinger bits -= op;
159e89516f0SMike Frysinger }
160e89516f0SMike Frysinger Tracevv((stderr, "inflate: length %u\n", len));
161e89516f0SMike Frysinger if (bits < 15) {
162e89516f0SMike Frysinger hold += (unsigned long)(PUP(in)) << bits;
163e89516f0SMike Frysinger bits += 8;
164e89516f0SMike Frysinger hold += (unsigned long)(PUP(in)) << bits;
165e89516f0SMike Frysinger bits += 8;
166e89516f0SMike Frysinger }
167e89516f0SMike Frysinger this = dcode[hold & dmask];
168e89516f0SMike Frysinger dodist:
169e89516f0SMike Frysinger op = (unsigned)(this.bits);
170e89516f0SMike Frysinger hold >>= op;
171e89516f0SMike Frysinger bits -= op;
172e89516f0SMike Frysinger op = (unsigned)(this.op);
173e89516f0SMike Frysinger if (op & 16) { /* distance base */
174e89516f0SMike Frysinger dist = (unsigned)(this.val);
175e89516f0SMike Frysinger op &= 15; /* number of extra bits */
176e89516f0SMike Frysinger if (bits < op) {
177e89516f0SMike Frysinger hold += (unsigned long)(PUP(in)) << bits;
178e89516f0SMike Frysinger bits += 8;
179e89516f0SMike Frysinger if (bits < op) {
180e89516f0SMike Frysinger hold += (unsigned long)(PUP(in)) << bits;
181e89516f0SMike Frysinger bits += 8;
182e89516f0SMike Frysinger }
183e89516f0SMike Frysinger }
184e89516f0SMike Frysinger dist += (unsigned)hold & ((1U << op) - 1);
185e89516f0SMike Frysinger #ifdef INFLATE_STRICT
186e89516f0SMike Frysinger if (dist > dmax) {
187e89516f0SMike Frysinger strm->msg = (char *)"invalid distance too far back";
188e89516f0SMike Frysinger state->mode = BAD;
189e89516f0SMike Frysinger break;
190e89516f0SMike Frysinger }
191e89516f0SMike Frysinger #endif
192e89516f0SMike Frysinger hold >>= op;
193e89516f0SMike Frysinger bits -= op;
194e89516f0SMike Frysinger Tracevv((stderr, "inflate: distance %u\n", dist));
195e89516f0SMike Frysinger op = (unsigned)(out - beg); /* max distance in output */
196e89516f0SMike Frysinger if (dist > op) { /* see if copy from window */
197e89516f0SMike Frysinger op = dist - op; /* distance back in window */
198e89516f0SMike Frysinger if (op > whave) {
199e89516f0SMike Frysinger strm->msg = (char *)"invalid distance too far back";
200e89516f0SMike Frysinger state->mode = BAD;
201e89516f0SMike Frysinger break;
202e89516f0SMike Frysinger }
203e89516f0SMike Frysinger from = window - OFF;
204e89516f0SMike Frysinger if (write == 0) { /* very common case */
205e89516f0SMike Frysinger from += wsize - op;
206e89516f0SMike Frysinger if (op < len) { /* some from window */
207e89516f0SMike Frysinger len -= op;
208e89516f0SMike Frysinger do {
209e89516f0SMike Frysinger PUP(out) = PUP(from);
210e89516f0SMike Frysinger } while (--op);
211e89516f0SMike Frysinger from = out - dist; /* rest from output */
212e89516f0SMike Frysinger }
213e89516f0SMike Frysinger }
214e89516f0SMike Frysinger else if (write < op) { /* wrap around window */
215e89516f0SMike Frysinger from += wsize + write - op;
216e89516f0SMike Frysinger op -= write;
217e89516f0SMike Frysinger if (op < len) { /* some from end of window */
218e89516f0SMike Frysinger len -= op;
219e89516f0SMike Frysinger do {
220e89516f0SMike Frysinger PUP(out) = PUP(from);
221e89516f0SMike Frysinger } while (--op);
222e89516f0SMike Frysinger from = window - OFF;
223e89516f0SMike Frysinger if (write < len) { /* some from start of window */
224e89516f0SMike Frysinger op = write;
225e89516f0SMike Frysinger len -= op;
226e89516f0SMike Frysinger do {
227e89516f0SMike Frysinger PUP(out) = PUP(from);
228e89516f0SMike Frysinger } while (--op);
229e89516f0SMike Frysinger from = out - dist; /* rest from output */
230e89516f0SMike Frysinger }
231e89516f0SMike Frysinger }
232e89516f0SMike Frysinger }
233e89516f0SMike Frysinger else { /* contiguous in window */
234e89516f0SMike Frysinger from += write - op;
235e89516f0SMike Frysinger if (op < len) { /* some from window */
236e89516f0SMike Frysinger len -= op;
237e89516f0SMike Frysinger do {
238e89516f0SMike Frysinger PUP(out) = PUP(from);
239e89516f0SMike Frysinger } while (--op);
240e89516f0SMike Frysinger from = out - dist; /* rest from output */
241e89516f0SMike Frysinger }
242e89516f0SMike Frysinger }
243e89516f0SMike Frysinger while (len > 2) {
244e89516f0SMike Frysinger PUP(out) = PUP(from);
245e89516f0SMike Frysinger PUP(out) = PUP(from);
246e89516f0SMike Frysinger PUP(out) = PUP(from);
247e89516f0SMike Frysinger len -= 3;
248e89516f0SMike Frysinger }
249e89516f0SMike Frysinger if (len) {
250e89516f0SMike Frysinger PUP(out) = PUP(from);
251e89516f0SMike Frysinger if (len > 1)
252e89516f0SMike Frysinger PUP(out) = PUP(from);
253e89516f0SMike Frysinger }
254e89516f0SMike Frysinger }
255e89516f0SMike Frysinger else {
256e89516f0SMike Frysinger unsigned short *sout;
257e89516f0SMike Frysinger unsigned long loops;
258e89516f0SMike Frysinger
259e89516f0SMike Frysinger from = out - dist; /* copy direct from output */
260e89516f0SMike Frysinger /* minimum length is three */
261e89516f0SMike Frysinger /* Align out addr */
262e89516f0SMike Frysinger if (!((long)(out - 1 + OFF) & 1)) {
263e89516f0SMike Frysinger PUP(out) = PUP(from);
264e89516f0SMike Frysinger len--;
265e89516f0SMike Frysinger }
266e89516f0SMike Frysinger sout = (unsigned short *)(out - OFF);
267e89516f0SMike Frysinger if (dist > 2 ) {
268e89516f0SMike Frysinger unsigned short *sfrom;
269e89516f0SMike Frysinger
270e89516f0SMike Frysinger sfrom = (unsigned short *)(from - OFF);
271e89516f0SMike Frysinger loops = len >> 1;
272e89516f0SMike Frysinger do
273e89516f0SMike Frysinger PUP(sout) = get_unaligned(++sfrom);
274e89516f0SMike Frysinger while (--loops);
275e89516f0SMike Frysinger out = (unsigned char *)sout + OFF;
276e89516f0SMike Frysinger from = (unsigned char *)sfrom + OFF;
277e89516f0SMike Frysinger } else { /* dist == 1 or dist == 2 */
278e89516f0SMike Frysinger unsigned short pat16;
279e89516f0SMike Frysinger
280e89516f0SMike Frysinger pat16 = *(sout-2+2*OFF);
281e89516f0SMike Frysinger if (dist == 1)
282e89516f0SMike Frysinger #if defined(__BIG_ENDIAN)
283e89516f0SMike Frysinger pat16 = (pat16 & 0xff) | ((pat16 & 0xff ) << 8);
284e89516f0SMike Frysinger #elif defined(__LITTLE_ENDIAN)
285e89516f0SMike Frysinger pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00 ) >> 8);
286e89516f0SMike Frysinger #else
287e89516f0SMike Frysinger #error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
288e89516f0SMike Frysinger #endif
289e89516f0SMike Frysinger loops = len >> 1;
290e89516f0SMike Frysinger do
291e89516f0SMike Frysinger PUP(sout) = pat16;
292e89516f0SMike Frysinger while (--loops);
293e89516f0SMike Frysinger out = (unsigned char *)sout + OFF;
294e89516f0SMike Frysinger }
295e89516f0SMike Frysinger if (len & 1)
296e89516f0SMike Frysinger PUP(out) = PUP(from);
297e89516f0SMike Frysinger }
298e89516f0SMike Frysinger }
299e89516f0SMike Frysinger else if ((op & 64) == 0) { /* 2nd level distance code */
300e89516f0SMike Frysinger this = dcode[this.val + (hold & ((1U << op) - 1))];
301e89516f0SMike Frysinger goto dodist;
302e89516f0SMike Frysinger }
303e89516f0SMike Frysinger else {
304e89516f0SMike Frysinger strm->msg = (char *)"invalid distance code";
305e89516f0SMike Frysinger state->mode = BAD;
306e89516f0SMike Frysinger break;
307e89516f0SMike Frysinger }
308e89516f0SMike Frysinger }
309e89516f0SMike Frysinger else if ((op & 64) == 0) { /* 2nd level length code */
310e89516f0SMike Frysinger this = lcode[this.val + (hold & ((1U << op) - 1))];
311e89516f0SMike Frysinger goto dolen;
312e89516f0SMike Frysinger }
313e89516f0SMike Frysinger else if (op & 32) { /* end-of-block */
314e89516f0SMike Frysinger Tracevv((stderr, "inflate: end of block\n"));
315e89516f0SMike Frysinger state->mode = TYPE;
316e89516f0SMike Frysinger break;
317e89516f0SMike Frysinger }
318e89516f0SMike Frysinger else {
319e89516f0SMike Frysinger strm->msg = (char *)"invalid literal/length code";
320e89516f0SMike Frysinger state->mode = BAD;
321e89516f0SMike Frysinger break;
322e89516f0SMike Frysinger }
323e89516f0SMike Frysinger } while (in < last && out < end);
324e89516f0SMike Frysinger
325e89516f0SMike Frysinger /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
326e89516f0SMike Frysinger len = bits >> 3;
327e89516f0SMike Frysinger in -= len;
328e89516f0SMike Frysinger bits -= len << 3;
329e89516f0SMike Frysinger hold &= (1U << bits) - 1;
330e89516f0SMike Frysinger
331e89516f0SMike Frysinger /* update state and return */
332e89516f0SMike Frysinger strm->next_in = in + OFF;
333e89516f0SMike Frysinger strm->next_out = out + OFF;
334e89516f0SMike Frysinger strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
335e89516f0SMike Frysinger strm->avail_out = (unsigned)(out < end ?
336e89516f0SMike Frysinger 257 + (end - out) : 257 - (out - end));
337e89516f0SMike Frysinger state->hold = hold;
338e89516f0SMike Frysinger state->bits = bits;
339e89516f0SMike Frysinger return;
340e89516f0SMike Frysinger }
341e89516f0SMike Frysinger
342e89516f0SMike Frysinger /*
343e89516f0SMike Frysinger inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
344e89516f0SMike Frysinger - Using bit fields for code structure
345e89516f0SMike Frysinger - Different op definition to avoid & for extra bits (do & for table bits)
346e89516f0SMike Frysinger - Three separate decoding do-loops for direct, window, and write == 0
347e89516f0SMike Frysinger - Special case for distance > 1 copies to do overlapped load and store copy
348e89516f0SMike Frysinger - Explicit branch predictions (based on measured branch probabilities)
349e89516f0SMike Frysinger - Deferring match copy and interspersed it with decoding subsequent codes
350e89516f0SMike Frysinger - Swapping literal/length else
351e89516f0SMike Frysinger - Swapping window/direct else
352e89516f0SMike Frysinger - Larger unrolled copy loops (three is about right)
353e89516f0SMike Frysinger - Moving len -= 3 statement into middle of loop
354e89516f0SMike Frysinger */
355e89516f0SMike Frysinger
356e89516f0SMike Frysinger #endif /* !ASMINF */
357