xref: /openbmc/u-boot/lib/zlib/inffast.c (revision 57dc53a72460e8e301fa1cc7951b41db8e731485)
1e89516f0SMike Frysinger /* inffast.c -- fast decoding
2e89516f0SMike Frysinger  * Copyright (C) 1995-2004 Mark Adler
3e89516f0SMike Frysinger  * For conditions of distribution and use, see copyright notice in zlib.h
4e89516f0SMike Frysinger  */
5e89516f0SMike Frysinger 
6*a187559eSBin Meng /* U-Boot: we already included these
7e89516f0SMike Frysinger #include "zutil.h"
8e89516f0SMike Frysinger #include "inftrees.h"
9e89516f0SMike Frysinger #include "inflate.h"
10e89516f0SMike Frysinger #include "inffast.h"
11e89516f0SMike Frysinger */
12e89516f0SMike Frysinger 
13e89516f0SMike Frysinger #ifndef ASMINF
14e89516f0SMike Frysinger 
15e89516f0SMike Frysinger /* Allow machine dependent optimization for post-increment or pre-increment.
16e89516f0SMike Frysinger    Based on testing to date,
17e89516f0SMike Frysinger    Pre-increment preferred for:
18e89516f0SMike Frysinger    - PowerPC G3 (Adler)
19e89516f0SMike Frysinger    - MIPS R5000 (Randers-Pehrson)
20e89516f0SMike Frysinger    Post-increment preferred for:
21e89516f0SMike Frysinger    - none
22e89516f0SMike Frysinger    No measurable difference:
23e89516f0SMike Frysinger    - Pentium III (Anderson)
24e89516f0SMike Frysinger    - M68060 (Nikl)
25e89516f0SMike Frysinger  */
26e89516f0SMike Frysinger #ifdef POSTINC
27e89516f0SMike Frysinger #  define OFF 0
28e89516f0SMike Frysinger #  define PUP(a) *(a)++
29e89516f0SMike Frysinger #else
30e89516f0SMike Frysinger #  define OFF 1
31e89516f0SMike Frysinger #  define PUP(a) *++(a)
32e89516f0SMike Frysinger #endif
33e89516f0SMike Frysinger 
34e89516f0SMike Frysinger /*
35e89516f0SMike Frysinger    Decode literal, length, and distance codes and write out the resulting
36e89516f0SMike Frysinger    literal and match bytes until either not enough input or output is
37e89516f0SMike Frysinger    available, an end-of-block is encountered, or a data error is encountered.
38e89516f0SMike Frysinger    When large enough input and output buffers are supplied to inflate(), for
39e89516f0SMike Frysinger    example, a 16K input buffer and a 64K output buffer, more than 95% of the
40e89516f0SMike Frysinger    inflate execution time is spent in this routine.
41e89516f0SMike Frysinger 
42e89516f0SMike Frysinger    Entry assumptions:
43e89516f0SMike Frysinger 
44e89516f0SMike Frysinger         state->mode == LEN
45e89516f0SMike Frysinger         strm->avail_in >= 6
46e89516f0SMike Frysinger         strm->avail_out >= 258
47e89516f0SMike Frysinger         start >= strm->avail_out
48e89516f0SMike Frysinger         state->bits < 8
49e89516f0SMike Frysinger 
50e89516f0SMike Frysinger    On return, state->mode is one of:
51e89516f0SMike Frysinger 
52e89516f0SMike Frysinger         LEN -- ran out of enough output space or enough available input
53e89516f0SMike Frysinger         TYPE -- reached end of block code, inflate() to interpret next block
54e89516f0SMike Frysinger         BAD -- error in block data
55e89516f0SMike Frysinger 
56e89516f0SMike Frysinger    Notes:
57e89516f0SMike Frysinger 
58e89516f0SMike Frysinger     - The maximum input bits used by a length/distance pair is 15 bits for the
59e89516f0SMike Frysinger       length code, 5 bits for the length extra, 15 bits for the distance code,
60e89516f0SMike Frysinger       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
61e89516f0SMike Frysinger       Therefore if strm->avail_in >= 6, then there is enough input to avoid
62e89516f0SMike Frysinger       checking for available input while decoding.
63e89516f0SMike Frysinger 
64e89516f0SMike Frysinger     - The maximum bytes that a single length/distance pair can output is 258
65e89516f0SMike Frysinger       bytes, which is the maximum length that can be coded.  inflate_fast()
66e89516f0SMike Frysinger       requires strm->avail_out >= 258 for each loop to avoid checking for
67e89516f0SMike Frysinger       output space.
68e89516f0SMike Frysinger  */
inflate_fast(z_streamp strm,unsigned start)69ee820b5eSKim Phillips void inflate_fast(z_streamp strm, unsigned start)
70ee820b5eSKim Phillips /* start: inflate()'s starting value for strm->avail_out */
71e89516f0SMike Frysinger {
72e89516f0SMike Frysinger     struct inflate_state FAR *state;
73e89516f0SMike Frysinger     unsigned char FAR *in;      /* local strm->next_in */
74e89516f0SMike Frysinger     unsigned char FAR *last;    /* while in < last, enough input available */
75e89516f0SMike Frysinger     unsigned char FAR *out;     /* local strm->next_out */
76e89516f0SMike Frysinger     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
77e89516f0SMike Frysinger     unsigned char FAR *end;     /* while out < end, enough space available */
78e89516f0SMike Frysinger #ifdef INFLATE_STRICT
79e89516f0SMike Frysinger     unsigned dmax;              /* maximum distance from zlib header */
80e89516f0SMike Frysinger #endif
81e89516f0SMike Frysinger     unsigned wsize;             /* window size or zero if not using window */
82e89516f0SMike Frysinger     unsigned whave;             /* valid bytes in the window */
83e89516f0SMike Frysinger     unsigned write;             /* window write index */
84e89516f0SMike Frysinger     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
85e89516f0SMike Frysinger     unsigned long hold;         /* local strm->hold */
86e89516f0SMike Frysinger     unsigned bits;              /* local strm->bits */
87e89516f0SMike Frysinger     code const FAR *lcode;      /* local strm->lencode */
88e89516f0SMike Frysinger     code const FAR *dcode;      /* local strm->distcode */
89e89516f0SMike Frysinger     unsigned lmask;             /* mask for first level of length codes */
90e89516f0SMike Frysinger     unsigned dmask;             /* mask for first level of distance codes */
91e89516f0SMike Frysinger     code this;                  /* retrieved table entry */
92e89516f0SMike Frysinger     unsigned op;                /* code bits, operation, extra bits, or */
93e89516f0SMike Frysinger                                 /*  window position, window bytes to copy */
94e89516f0SMike Frysinger     unsigned len;               /* match length, unused bytes */
95e89516f0SMike Frysinger     unsigned dist;              /* match distance */
96e89516f0SMike Frysinger     unsigned char FAR *from;    /* where to copy match from */
97e89516f0SMike Frysinger 
98e89516f0SMike Frysinger     /* copy state to local variables */
99e89516f0SMike Frysinger     state = (struct inflate_state FAR *)strm->state;
100e89516f0SMike Frysinger     in = strm->next_in - OFF;
101e89516f0SMike Frysinger     last = in + (strm->avail_in - 5);
10292faa8b1SAnatolij Gustschin     if (in > last && strm->avail_in > 5) {
10392faa8b1SAnatolij Gustschin         /*
10492faa8b1SAnatolij Gustschin          * overflow detected, limit strm->avail_in to the
10592faa8b1SAnatolij Gustschin          * max. possible size and recalculate last
10692faa8b1SAnatolij Gustschin          */
10741d68b32SSimon Glass 	strm->avail_in = 0xffffffff - (uintptr_t)in;
10892faa8b1SAnatolij Gustschin         last = in + (strm->avail_in - 5);
10992faa8b1SAnatolij Gustschin     }
110e89516f0SMike Frysinger     out = strm->next_out - OFF;
111e89516f0SMike Frysinger     beg = out - (start - strm->avail_out);
112e89516f0SMike Frysinger     end = out + (strm->avail_out - 257);
113e89516f0SMike Frysinger #ifdef INFLATE_STRICT
114e89516f0SMike Frysinger     dmax = state->dmax;
115e89516f0SMike Frysinger #endif
116e89516f0SMike Frysinger     wsize = state->wsize;
117e89516f0SMike Frysinger     whave = state->whave;
118e89516f0SMike Frysinger     write = state->write;
119e89516f0SMike Frysinger     window = state->window;
120e89516f0SMike Frysinger     hold = state->hold;
121e89516f0SMike Frysinger     bits = state->bits;
122e89516f0SMike Frysinger     lcode = state->lencode;
123e89516f0SMike Frysinger     dcode = state->distcode;
124e89516f0SMike Frysinger     lmask = (1U << state->lenbits) - 1;
125e89516f0SMike Frysinger     dmask = (1U << state->distbits) - 1;
126e89516f0SMike Frysinger 
127e89516f0SMike Frysinger     /* decode literals and length/distances until end-of-block or not enough
128e89516f0SMike Frysinger        input data or output space */
129e89516f0SMike Frysinger     do {
130e89516f0SMike Frysinger         if (bits < 15) {
131e89516f0SMike Frysinger             hold += (unsigned long)(PUP(in)) << bits;
132e89516f0SMike Frysinger             bits += 8;
133e89516f0SMike Frysinger             hold += (unsigned long)(PUP(in)) << bits;
134e89516f0SMike Frysinger             bits += 8;
135e89516f0SMike Frysinger         }
136e89516f0SMike Frysinger         this = lcode[hold & lmask];
137e89516f0SMike Frysinger       dolen:
138e89516f0SMike Frysinger         op = (unsigned)(this.bits);
139e89516f0SMike Frysinger         hold >>= op;
140e89516f0SMike Frysinger         bits -= op;
141e89516f0SMike Frysinger         op = (unsigned)(this.op);
142e89516f0SMike Frysinger         if (op == 0) {                          /* literal */
143e89516f0SMike Frysinger             Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
144e89516f0SMike Frysinger                     "inflate:         literal '%c'\n" :
145e89516f0SMike Frysinger                     "inflate:         literal 0x%02x\n", this.val));
146e89516f0SMike Frysinger             PUP(out) = (unsigned char)(this.val);
147e89516f0SMike Frysinger         }
148e89516f0SMike Frysinger         else if (op & 16) {                     /* length base */
149e89516f0SMike Frysinger             len = (unsigned)(this.val);
150e89516f0SMike Frysinger             op &= 15;                           /* number of extra bits */
151e89516f0SMike Frysinger             if (op) {
152e89516f0SMike Frysinger                 if (bits < op) {
153e89516f0SMike Frysinger                     hold += (unsigned long)(PUP(in)) << bits;
154e89516f0SMike Frysinger                     bits += 8;
155e89516f0SMike Frysinger                 }
156e89516f0SMike Frysinger                 len += (unsigned)hold & ((1U << op) - 1);
157e89516f0SMike Frysinger                 hold >>= op;
158e89516f0SMike Frysinger                 bits -= op;
159e89516f0SMike Frysinger             }
160e89516f0SMike Frysinger             Tracevv((stderr, "inflate:         length %u\n", len));
161e89516f0SMike Frysinger             if (bits < 15) {
162e89516f0SMike Frysinger                 hold += (unsigned long)(PUP(in)) << bits;
163e89516f0SMike Frysinger                 bits += 8;
164e89516f0SMike Frysinger                 hold += (unsigned long)(PUP(in)) << bits;
165e89516f0SMike Frysinger                 bits += 8;
166e89516f0SMike Frysinger             }
167e89516f0SMike Frysinger             this = dcode[hold & dmask];
168e89516f0SMike Frysinger           dodist:
169e89516f0SMike Frysinger             op = (unsigned)(this.bits);
170e89516f0SMike Frysinger             hold >>= op;
171e89516f0SMike Frysinger             bits -= op;
172e89516f0SMike Frysinger             op = (unsigned)(this.op);
173e89516f0SMike Frysinger             if (op & 16) {                      /* distance base */
174e89516f0SMike Frysinger                 dist = (unsigned)(this.val);
175e89516f0SMike Frysinger                 op &= 15;                       /* number of extra bits */
176e89516f0SMike Frysinger                 if (bits < op) {
177e89516f0SMike Frysinger                     hold += (unsigned long)(PUP(in)) << bits;
178e89516f0SMike Frysinger                     bits += 8;
179e89516f0SMike Frysinger                     if (bits < op) {
180e89516f0SMike Frysinger                         hold += (unsigned long)(PUP(in)) << bits;
181e89516f0SMike Frysinger                         bits += 8;
182e89516f0SMike Frysinger                     }
183e89516f0SMike Frysinger                 }
184e89516f0SMike Frysinger                 dist += (unsigned)hold & ((1U << op) - 1);
185e89516f0SMike Frysinger #ifdef INFLATE_STRICT
186e89516f0SMike Frysinger                 if (dist > dmax) {
187e89516f0SMike Frysinger                     strm->msg = (char *)"invalid distance too far back";
188e89516f0SMike Frysinger                     state->mode = BAD;
189e89516f0SMike Frysinger                     break;
190e89516f0SMike Frysinger                 }
191e89516f0SMike Frysinger #endif
192e89516f0SMike Frysinger                 hold >>= op;
193e89516f0SMike Frysinger                 bits -= op;
194e89516f0SMike Frysinger                 Tracevv((stderr, "inflate:         distance %u\n", dist));
195e89516f0SMike Frysinger                 op = (unsigned)(out - beg);     /* max distance in output */
196e89516f0SMike Frysinger                 if (dist > op) {                /* see if copy from window */
197e89516f0SMike Frysinger                     op = dist - op;             /* distance back in window */
198e89516f0SMike Frysinger                     if (op > whave) {
199e89516f0SMike Frysinger                         strm->msg = (char *)"invalid distance too far back";
200e89516f0SMike Frysinger                         state->mode = BAD;
201e89516f0SMike Frysinger                         break;
202e89516f0SMike Frysinger                     }
203e89516f0SMike Frysinger                     from = window - OFF;
204e89516f0SMike Frysinger                     if (write == 0) {           /* very common case */
205e89516f0SMike Frysinger                         from += wsize - op;
206e89516f0SMike Frysinger                         if (op < len) {         /* some from window */
207e89516f0SMike Frysinger                             len -= op;
208e89516f0SMike Frysinger                             do {
209e89516f0SMike Frysinger                                 PUP(out) = PUP(from);
210e89516f0SMike Frysinger                             } while (--op);
211e89516f0SMike Frysinger                             from = out - dist;  /* rest from output */
212e89516f0SMike Frysinger                         }
213e89516f0SMike Frysinger                     }
214e89516f0SMike Frysinger                     else if (write < op) {      /* wrap around window */
215e89516f0SMike Frysinger                         from += wsize + write - op;
216e89516f0SMike Frysinger                         op -= write;
217e89516f0SMike Frysinger                         if (op < len) {         /* some from end of window */
218e89516f0SMike Frysinger                             len -= op;
219e89516f0SMike Frysinger                             do {
220e89516f0SMike Frysinger                                 PUP(out) = PUP(from);
221e89516f0SMike Frysinger                             } while (--op);
222e89516f0SMike Frysinger                             from = window - OFF;
223e89516f0SMike Frysinger                             if (write < len) {  /* some from start of window */
224e89516f0SMike Frysinger                                 op = write;
225e89516f0SMike Frysinger                                 len -= op;
226e89516f0SMike Frysinger                                 do {
227e89516f0SMike Frysinger                                     PUP(out) = PUP(from);
228e89516f0SMike Frysinger                                 } while (--op);
229e89516f0SMike Frysinger                                 from = out - dist;      /* rest from output */
230e89516f0SMike Frysinger                             }
231e89516f0SMike Frysinger                         }
232e89516f0SMike Frysinger                     }
233e89516f0SMike Frysinger                     else {                      /* contiguous in window */
234e89516f0SMike Frysinger                         from += write - op;
235e89516f0SMike Frysinger                         if (op < len) {         /* some from window */
236e89516f0SMike Frysinger                             len -= op;
237e89516f0SMike Frysinger                             do {
238e89516f0SMike Frysinger                                 PUP(out) = PUP(from);
239e89516f0SMike Frysinger                             } while (--op);
240e89516f0SMike Frysinger                             from = out - dist;  /* rest from output */
241e89516f0SMike Frysinger                         }
242e89516f0SMike Frysinger                     }
243e89516f0SMike Frysinger                     while (len > 2) {
244e89516f0SMike Frysinger                         PUP(out) = PUP(from);
245e89516f0SMike Frysinger                         PUP(out) = PUP(from);
246e89516f0SMike Frysinger                         PUP(out) = PUP(from);
247e89516f0SMike Frysinger                         len -= 3;
248e89516f0SMike Frysinger                     }
249e89516f0SMike Frysinger                     if (len) {
250e89516f0SMike Frysinger                         PUP(out) = PUP(from);
251e89516f0SMike Frysinger                         if (len > 1)
252e89516f0SMike Frysinger                             PUP(out) = PUP(from);
253e89516f0SMike Frysinger                     }
254e89516f0SMike Frysinger                 }
255e89516f0SMike Frysinger                 else {
256e89516f0SMike Frysinger 		    unsigned short *sout;
257e89516f0SMike Frysinger 		    unsigned long loops;
258e89516f0SMike Frysinger 
259e89516f0SMike Frysinger                     from = out - dist;          /* copy direct from output */
260e89516f0SMike Frysinger                     /* minimum length is three */
261e89516f0SMike Frysinger 		    /* Align out addr */
262e89516f0SMike Frysinger 		    if (!((long)(out - 1 + OFF) & 1)) {
263e89516f0SMike Frysinger 			PUP(out) = PUP(from);
264e89516f0SMike Frysinger 			len--;
265e89516f0SMike Frysinger 		    }
266e89516f0SMike Frysinger 		    sout = (unsigned short *)(out - OFF);
267e89516f0SMike Frysinger 		    if (dist > 2 ) {
268e89516f0SMike Frysinger 			unsigned short *sfrom;
269e89516f0SMike Frysinger 
270e89516f0SMike Frysinger 			sfrom = (unsigned short *)(from - OFF);
271e89516f0SMike Frysinger 			loops = len >> 1;
272e89516f0SMike Frysinger 			do
273e89516f0SMike Frysinger 			    PUP(sout) = get_unaligned(++sfrom);
274e89516f0SMike Frysinger 			while (--loops);
275e89516f0SMike Frysinger 			out = (unsigned char *)sout + OFF;
276e89516f0SMike Frysinger 			from = (unsigned char *)sfrom + OFF;
277e89516f0SMike Frysinger 		    } else { /* dist == 1 or dist == 2 */
278e89516f0SMike Frysinger 			unsigned short pat16;
279e89516f0SMike Frysinger 
280e89516f0SMike Frysinger 			pat16 = *(sout-2+2*OFF);
281e89516f0SMike Frysinger 			if (dist == 1)
282e89516f0SMike Frysinger #if defined(__BIG_ENDIAN)
283e89516f0SMike Frysinger 			    pat16 = (pat16 & 0xff) | ((pat16 & 0xff ) << 8);
284e89516f0SMike Frysinger #elif defined(__LITTLE_ENDIAN)
285e89516f0SMike Frysinger 			    pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00 ) >> 8);
286e89516f0SMike Frysinger #else
287e89516f0SMike Frysinger #error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
288e89516f0SMike Frysinger #endif
289e89516f0SMike Frysinger 			loops = len >> 1;
290e89516f0SMike Frysinger 			do
291e89516f0SMike Frysinger 			    PUP(sout) = pat16;
292e89516f0SMike Frysinger 			while (--loops);
293e89516f0SMike Frysinger 			out = (unsigned char *)sout + OFF;
294e89516f0SMike Frysinger 		    }
295e89516f0SMike Frysinger 		    if (len & 1)
296e89516f0SMike Frysinger 			PUP(out) = PUP(from);
297e89516f0SMike Frysinger                 }
298e89516f0SMike Frysinger             }
299e89516f0SMike Frysinger             else if ((op & 64) == 0) {          /* 2nd level distance code */
300e89516f0SMike Frysinger                 this = dcode[this.val + (hold & ((1U << op) - 1))];
301e89516f0SMike Frysinger                 goto dodist;
302e89516f0SMike Frysinger             }
303e89516f0SMike Frysinger             else {
304e89516f0SMike Frysinger                 strm->msg = (char *)"invalid distance code";
305e89516f0SMike Frysinger                 state->mode = BAD;
306e89516f0SMike Frysinger                 break;
307e89516f0SMike Frysinger             }
308e89516f0SMike Frysinger         }
309e89516f0SMike Frysinger         else if ((op & 64) == 0) {              /* 2nd level length code */
310e89516f0SMike Frysinger             this = lcode[this.val + (hold & ((1U << op) - 1))];
311e89516f0SMike Frysinger             goto dolen;
312e89516f0SMike Frysinger         }
313e89516f0SMike Frysinger         else if (op & 32) {                     /* end-of-block */
314e89516f0SMike Frysinger             Tracevv((stderr, "inflate:         end of block\n"));
315e89516f0SMike Frysinger             state->mode = TYPE;
316e89516f0SMike Frysinger             break;
317e89516f0SMike Frysinger         }
318e89516f0SMike Frysinger         else {
319e89516f0SMike Frysinger             strm->msg = (char *)"invalid literal/length code";
320e89516f0SMike Frysinger             state->mode = BAD;
321e89516f0SMike Frysinger             break;
322e89516f0SMike Frysinger         }
323e89516f0SMike Frysinger     } while (in < last && out < end);
324e89516f0SMike Frysinger 
325e89516f0SMike Frysinger     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
326e89516f0SMike Frysinger     len = bits >> 3;
327e89516f0SMike Frysinger     in -= len;
328e89516f0SMike Frysinger     bits -= len << 3;
329e89516f0SMike Frysinger     hold &= (1U << bits) - 1;
330e89516f0SMike Frysinger 
331e89516f0SMike Frysinger     /* update state and return */
332e89516f0SMike Frysinger     strm->next_in = in + OFF;
333e89516f0SMike Frysinger     strm->next_out = out + OFF;
334e89516f0SMike Frysinger     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
335e89516f0SMike Frysinger     strm->avail_out = (unsigned)(out < end ?
336e89516f0SMike Frysinger                                  257 + (end - out) : 257 - (out - end));
337e89516f0SMike Frysinger     state->hold = hold;
338e89516f0SMike Frysinger     state->bits = bits;
339e89516f0SMike Frysinger     return;
340e89516f0SMike Frysinger }
341e89516f0SMike Frysinger 
342e89516f0SMike Frysinger /*
343e89516f0SMike Frysinger    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
344e89516f0SMike Frysinger    - Using bit fields for code structure
345e89516f0SMike Frysinger    - Different op definition to avoid & for extra bits (do & for table bits)
346e89516f0SMike Frysinger    - Three separate decoding do-loops for direct, window, and write == 0
347e89516f0SMike Frysinger    - Special case for distance > 1 copies to do overlapped load and store copy
348e89516f0SMike Frysinger    - Explicit branch predictions (based on measured branch probabilities)
349e89516f0SMike Frysinger    - Deferring match copy and interspersed it with decoding subsequent codes
350e89516f0SMike Frysinger    - Swapping literal/length else
351e89516f0SMike Frysinger    - Swapping window/direct else
352e89516f0SMike Frysinger    - Larger unrolled copy loops (three is about right)
353e89516f0SMike Frysinger    - Moving len -= 3 statement into middle of loop
354e89516f0SMike Frysinger  */
355e89516f0SMike Frysinger 
356e89516f0SMike Frysinger #endif /* !ASMINF */
357