xref: /openbmc/u-boot/lib/zlib/inffast.c (revision 8c6407fc)
1 /* inffast.c -- fast decoding
2  * Copyright (C) 1995-2004 Mark Adler
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  */
5 
6 /* U-boot: we already included these
7 #include "zutil.h"
8 #include "inftrees.h"
9 #include "inflate.h"
10 #include "inffast.h"
11 */
12 
13 #ifndef ASMINF
14 
15 /* Allow machine dependent optimization for post-increment or pre-increment.
16    Based on testing to date,
17    Pre-increment preferred for:
18    - PowerPC G3 (Adler)
19    - MIPS R5000 (Randers-Pehrson)
20    Post-increment preferred for:
21    - none
22    No measurable difference:
23    - Pentium III (Anderson)
24    - M68060 (Nikl)
25  */
26 #ifdef POSTINC
27 #  define OFF 0
28 #  define PUP(a) *(a)++
29 #else
30 #  define OFF 1
31 #  define PUP(a) *++(a)
32 #endif
33 
34 /*
35    Decode literal, length, and distance codes and write out the resulting
36    literal and match bytes until either not enough input or output is
37    available, an end-of-block is encountered, or a data error is encountered.
38    When large enough input and output buffers are supplied to inflate(), for
39    example, a 16K input buffer and a 64K output buffer, more than 95% of the
40    inflate execution time is spent in this routine.
41 
42    Entry assumptions:
43 
44         state->mode == LEN
45         strm->avail_in >= 6
46         strm->avail_out >= 258
47         start >= strm->avail_out
48         state->bits < 8
49 
50    On return, state->mode is one of:
51 
52         LEN -- ran out of enough output space or enough available input
53         TYPE -- reached end of block code, inflate() to interpret next block
54         BAD -- error in block data
55 
56    Notes:
57 
58     - The maximum input bits used by a length/distance pair is 15 bits for the
59       length code, 5 bits for the length extra, 15 bits for the distance code,
60       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
61       Therefore if strm->avail_in >= 6, then there is enough input to avoid
62       checking for available input while decoding.
63 
64     - The maximum bytes that a single length/distance pair can output is 258
65       bytes, which is the maximum length that can be coded.  inflate_fast()
66       requires strm->avail_out >= 258 for each loop to avoid checking for
67       output space.
68  */
69 void inflate_fast(strm, start)
70 z_streamp strm;
71 unsigned start;         /* inflate()'s starting value for strm->avail_out */
72 {
73     struct inflate_state FAR *state;
74     unsigned char FAR *in;      /* local strm->next_in */
75     unsigned char FAR *last;    /* while in < last, enough input available */
76     unsigned char FAR *out;     /* local strm->next_out */
77     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
78     unsigned char FAR *end;     /* while out < end, enough space available */
79 #ifdef INFLATE_STRICT
80     unsigned dmax;              /* maximum distance from zlib header */
81 #endif
82     unsigned wsize;             /* window size or zero if not using window */
83     unsigned whave;             /* valid bytes in the window */
84     unsigned write;             /* window write index */
85     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
86     unsigned long hold;         /* local strm->hold */
87     unsigned bits;              /* local strm->bits */
88     code const FAR *lcode;      /* local strm->lencode */
89     code const FAR *dcode;      /* local strm->distcode */
90     unsigned lmask;             /* mask for first level of length codes */
91     unsigned dmask;             /* mask for first level of distance codes */
92     code this;                  /* retrieved table entry */
93     unsigned op;                /* code bits, operation, extra bits, or */
94                                 /*  window position, window bytes to copy */
95     unsigned len;               /* match length, unused bytes */
96     unsigned dist;              /* match distance */
97     unsigned char FAR *from;    /* where to copy match from */
98 
99     /* copy state to local variables */
100     state = (struct inflate_state FAR *)strm->state;
101     in = strm->next_in - OFF;
102     last = in + (strm->avail_in - 5);
103     out = strm->next_out - OFF;
104     beg = out - (start - strm->avail_out);
105     end = out + (strm->avail_out - 257);
106 #ifdef INFLATE_STRICT
107     dmax = state->dmax;
108 #endif
109     wsize = state->wsize;
110     whave = state->whave;
111     write = state->write;
112     window = state->window;
113     hold = state->hold;
114     bits = state->bits;
115     lcode = state->lencode;
116     dcode = state->distcode;
117     lmask = (1U << state->lenbits) - 1;
118     dmask = (1U << state->distbits) - 1;
119 
120     /* decode literals and length/distances until end-of-block or not enough
121        input data or output space */
122     do {
123         if (bits < 15) {
124             hold += (unsigned long)(PUP(in)) << bits;
125             bits += 8;
126             hold += (unsigned long)(PUP(in)) << bits;
127             bits += 8;
128         }
129         this = lcode[hold & lmask];
130       dolen:
131         op = (unsigned)(this.bits);
132         hold >>= op;
133         bits -= op;
134         op = (unsigned)(this.op);
135         if (op == 0) {                          /* literal */
136             Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
137                     "inflate:         literal '%c'\n" :
138                     "inflate:         literal 0x%02x\n", this.val));
139             PUP(out) = (unsigned char)(this.val);
140         }
141         else if (op & 16) {                     /* length base */
142             len = (unsigned)(this.val);
143             op &= 15;                           /* number of extra bits */
144             if (op) {
145                 if (bits < op) {
146                     hold += (unsigned long)(PUP(in)) << bits;
147                     bits += 8;
148                 }
149                 len += (unsigned)hold & ((1U << op) - 1);
150                 hold >>= op;
151                 bits -= op;
152             }
153             Tracevv((stderr, "inflate:         length %u\n", len));
154             if (bits < 15) {
155                 hold += (unsigned long)(PUP(in)) << bits;
156                 bits += 8;
157                 hold += (unsigned long)(PUP(in)) << bits;
158                 bits += 8;
159             }
160             this = dcode[hold & dmask];
161           dodist:
162             op = (unsigned)(this.bits);
163             hold >>= op;
164             bits -= op;
165             op = (unsigned)(this.op);
166             if (op & 16) {                      /* distance base */
167                 dist = (unsigned)(this.val);
168                 op &= 15;                       /* number of extra bits */
169                 if (bits < op) {
170                     hold += (unsigned long)(PUP(in)) << bits;
171                     bits += 8;
172                     if (bits < op) {
173                         hold += (unsigned long)(PUP(in)) << bits;
174                         bits += 8;
175                     }
176                 }
177                 dist += (unsigned)hold & ((1U << op) - 1);
178 #ifdef INFLATE_STRICT
179                 if (dist > dmax) {
180                     strm->msg = (char *)"invalid distance too far back";
181                     state->mode = BAD;
182                     break;
183                 }
184 #endif
185                 hold >>= op;
186                 bits -= op;
187                 Tracevv((stderr, "inflate:         distance %u\n", dist));
188                 op = (unsigned)(out - beg);     /* max distance in output */
189                 if (dist > op) {                /* see if copy from window */
190                     op = dist - op;             /* distance back in window */
191                     if (op > whave) {
192                         strm->msg = (char *)"invalid distance too far back";
193                         state->mode = BAD;
194                         break;
195                     }
196                     from = window - OFF;
197                     if (write == 0) {           /* very common case */
198                         from += wsize - op;
199                         if (op < len) {         /* some from window */
200                             len -= op;
201                             do {
202                                 PUP(out) = PUP(from);
203                             } while (--op);
204                             from = out - dist;  /* rest from output */
205                         }
206                     }
207                     else if (write < op) {      /* wrap around window */
208                         from += wsize + write - op;
209                         op -= write;
210                         if (op < len) {         /* some from end of window */
211                             len -= op;
212                             do {
213                                 PUP(out) = PUP(from);
214                             } while (--op);
215                             from = window - OFF;
216                             if (write < len) {  /* some from start of window */
217                                 op = write;
218                                 len -= op;
219                                 do {
220                                     PUP(out) = PUP(from);
221                                 } while (--op);
222                                 from = out - dist;      /* rest from output */
223                             }
224                         }
225                     }
226                     else {                      /* contiguous in window */
227                         from += write - op;
228                         if (op < len) {         /* some from window */
229                             len -= op;
230                             do {
231                                 PUP(out) = PUP(from);
232                             } while (--op);
233                             from = out - dist;  /* rest from output */
234                         }
235                     }
236                     while (len > 2) {
237                         PUP(out) = PUP(from);
238                         PUP(out) = PUP(from);
239                         PUP(out) = PUP(from);
240                         len -= 3;
241                     }
242                     if (len) {
243                         PUP(out) = PUP(from);
244                         if (len > 1)
245                             PUP(out) = PUP(from);
246                     }
247                 }
248                 else {
249 		    unsigned short *sout;
250 		    unsigned long loops;
251 
252                     from = out - dist;          /* copy direct from output */
253                     /* minimum length is three */
254 		    /* Align out addr */
255 		    if (!((long)(out - 1 + OFF) & 1)) {
256 			PUP(out) = PUP(from);
257 			len--;
258 		    }
259 		    sout = (unsigned short *)(out - OFF);
260 		    if (dist > 2 ) {
261 			unsigned short *sfrom;
262 
263 			sfrom = (unsigned short *)(from - OFF);
264 			loops = len >> 1;
265 			do
266 			    PUP(sout) = get_unaligned(++sfrom);
267 			while (--loops);
268 			out = (unsigned char *)sout + OFF;
269 			from = (unsigned char *)sfrom + OFF;
270 		    } else { /* dist == 1 or dist == 2 */
271 			unsigned short pat16;
272 
273 			pat16 = *(sout-2+2*OFF);
274 			if (dist == 1)
275 #if defined(__BIG_ENDIAN)
276 			    pat16 = (pat16 & 0xff) | ((pat16 & 0xff ) << 8);
277 #elif defined(__LITTLE_ENDIAN)
278 			    pat16 = (pat16 & 0xff00) | ((pat16 & 0xff00 ) >> 8);
279 #else
280 #error __BIG_ENDIAN nor __LITTLE_ENDIAN is defined
281 #endif
282 			loops = len >> 1;
283 			do
284 			    PUP(sout) = pat16;
285 			while (--loops);
286 			out = (unsigned char *)sout + OFF;
287 		    }
288 		    if (len & 1)
289 			PUP(out) = PUP(from);
290                 }
291             }
292             else if ((op & 64) == 0) {          /* 2nd level distance code */
293                 this = dcode[this.val + (hold & ((1U << op) - 1))];
294                 goto dodist;
295             }
296             else {
297                 strm->msg = (char *)"invalid distance code";
298                 state->mode = BAD;
299                 break;
300             }
301         }
302         else if ((op & 64) == 0) {              /* 2nd level length code */
303             this = lcode[this.val + (hold & ((1U << op) - 1))];
304             goto dolen;
305         }
306         else if (op & 32) {                     /* end-of-block */
307             Tracevv((stderr, "inflate:         end of block\n"));
308             state->mode = TYPE;
309             break;
310         }
311         else {
312             strm->msg = (char *)"invalid literal/length code";
313             state->mode = BAD;
314             break;
315         }
316     } while (in < last && out < end);
317 
318     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
319     len = bits >> 3;
320     in -= len;
321     bits -= len << 3;
322     hold &= (1U << bits) - 1;
323 
324     /* update state and return */
325     strm->next_in = in + OFF;
326     strm->next_out = out + OFF;
327     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
328     strm->avail_out = (unsigned)(out < end ?
329                                  257 + (end - out) : 257 - (out - end));
330     state->hold = hold;
331     state->bits = bits;
332     return;
333 }
334 
335 /*
336    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
337    - Using bit fields for code structure
338    - Different op definition to avoid & for extra bits (do & for table bits)
339    - Three separate decoding do-loops for direct, window, and write == 0
340    - Special case for distance > 1 copies to do overlapped load and store copy
341    - Explicit branch predictions (based on measured branch probabilities)
342    - Deferring match copy and interspersed it with decoding subsequent codes
343    - Swapping literal/length else
344    - Swapping window/direct else
345    - Larger unrolled copy loops (three is about right)
346    - Moving len -= 3 statement into middle of loop
347  */
348 
349 #endif /* !ASMINF */
350