xref: /openbmc/linux/lib/zlib_inflate/inffast.c (revision 762f99f4f3cb41a775b5157dd761217beba65873)
14f3865fbSRichard Purdie /* inffast.c -- fast decoding
24f3865fbSRichard Purdie  * Copyright (C) 1995-2004 Mark Adler
31da177e4SLinus Torvalds  * For conditions of distribution and use, see copyright notice in zlib.h
41da177e4SLinus Torvalds  */
51da177e4SLinus Torvalds 
61da177e4SLinus Torvalds #include <linux/zutil.h>
71da177e4SLinus Torvalds #include "inftrees.h"
84f3865fbSRichard Purdie #include "inflate.h"
91da177e4SLinus Torvalds #include "inffast.h"
101da177e4SLinus Torvalds 
114f3865fbSRichard Purdie #ifndef ASMINF
121da177e4SLinus Torvalds 
13e69eae65SJoakim Tjernlund union uu {
14e69eae65SJoakim Tjernlund 	unsigned short us;
15e69eae65SJoakim Tjernlund 	unsigned char b[2];
16e69eae65SJoakim Tjernlund };
17e69eae65SJoakim Tjernlund 
1805911c5dSZhen Lei /* Endian independent version */
19e69eae65SJoakim Tjernlund static inline unsigned short
get_unaligned16(const unsigned short * p)20e69eae65SJoakim Tjernlund get_unaligned16(const unsigned short *p)
21e69eae65SJoakim Tjernlund {
22e69eae65SJoakim Tjernlund 	union uu  mm;
23e69eae65SJoakim Tjernlund 	unsigned char *b = (unsigned char *)p;
24e69eae65SJoakim Tjernlund 
25e69eae65SJoakim Tjernlund 	mm.b[0] = b[0];
26e69eae65SJoakim Tjernlund 	mm.b[1] = b[1];
27e69eae65SJoakim Tjernlund 	return mm.us;
28e69eae65SJoakim Tjernlund }
29e69eae65SJoakim Tjernlund 
304f3865fbSRichard Purdie /*
314f3865fbSRichard Purdie    Decode literal, length, and distance codes and write out the resulting
324f3865fbSRichard Purdie    literal and match bytes until either not enough input or output is
334f3865fbSRichard Purdie    available, an end-of-block is encountered, or a data error is encountered.
344f3865fbSRichard Purdie    When large enough input and output buffers are supplied to inflate(), for
354f3865fbSRichard Purdie    example, a 16K input buffer and a 64K output buffer, more than 95% of the
364f3865fbSRichard Purdie    inflate execution time is spent in this routine.
371da177e4SLinus Torvalds 
384f3865fbSRichard Purdie    Entry assumptions:
391da177e4SLinus Torvalds 
404f3865fbSRichard Purdie         state->mode == LEN
414f3865fbSRichard Purdie         strm->avail_in >= 6
424f3865fbSRichard Purdie         strm->avail_out >= 258
434f3865fbSRichard Purdie         start >= strm->avail_out
444f3865fbSRichard Purdie         state->bits < 8
454f3865fbSRichard Purdie 
464f3865fbSRichard Purdie    On return, state->mode is one of:
474f3865fbSRichard Purdie 
484f3865fbSRichard Purdie         LEN -- ran out of enough output space or enough available input
494f3865fbSRichard Purdie         TYPE -- reached end of block code, inflate() to interpret next block
504f3865fbSRichard Purdie         BAD -- error in block data
514f3865fbSRichard Purdie 
524f3865fbSRichard Purdie    Notes:
534f3865fbSRichard Purdie 
544f3865fbSRichard Purdie     - The maximum input bits used by a length/distance pair is 15 bits for the
554f3865fbSRichard Purdie       length code, 5 bits for the length extra, 15 bits for the distance code,
564f3865fbSRichard Purdie       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
574f3865fbSRichard Purdie       Therefore if strm->avail_in >= 6, then there is enough input to avoid
584f3865fbSRichard Purdie       checking for available input while decoding.
594f3865fbSRichard Purdie 
604f3865fbSRichard Purdie     - The maximum bytes that a single length/distance pair can output is 258
614f3865fbSRichard Purdie       bytes, which is the maximum length that can be coded.  inflate_fast()
624f3865fbSRichard Purdie       requires strm->avail_out >= 258 for each loop to avoid checking for
634f3865fbSRichard Purdie       output space.
64b762450eSRandy Dunlap 
65b762450eSRandy Dunlap     - @start:	inflate()'s starting value for strm->avail_out
664f3865fbSRichard Purdie  */
inflate_fast(z_streamp strm,unsigned start)67b762450eSRandy Dunlap void inflate_fast(z_streamp strm, unsigned start)
681da177e4SLinus Torvalds {
694f3865fbSRichard Purdie     struct inflate_state *state;
708336793bSDenys Vlasenko     const unsigned char *in;    /* local strm->next_in */
718336793bSDenys Vlasenko     const unsigned char *last;  /* while in < last, enough input available */
724f3865fbSRichard Purdie     unsigned char *out;         /* local strm->next_out */
734f3865fbSRichard Purdie     unsigned char *beg;         /* inflate()'s initial strm->next_out */
744f3865fbSRichard Purdie     unsigned char *end;         /* while out < end, enough space available */
754f3865fbSRichard Purdie #ifdef INFLATE_STRICT
764f3865fbSRichard Purdie     unsigned dmax;              /* maximum distance from zlib header */
774f3865fbSRichard Purdie #endif
784f3865fbSRichard Purdie     unsigned wsize;             /* window size or zero if not using window */
794f3865fbSRichard Purdie     unsigned whave;             /* valid bytes in the window */
804f3865fbSRichard Purdie     unsigned write;             /* window write index */
814f3865fbSRichard Purdie     unsigned char *window;      /* allocated sliding window, if wsize != 0 */
824f3865fbSRichard Purdie     unsigned long hold;         /* local strm->hold */
834f3865fbSRichard Purdie     unsigned bits;              /* local strm->bits */
844f3865fbSRichard Purdie     code const *lcode;          /* local strm->lencode */
854f3865fbSRichard Purdie     code const *dcode;          /* local strm->distcode */
864f3865fbSRichard Purdie     unsigned lmask;             /* mask for first level of length codes */
874f3865fbSRichard Purdie     unsigned dmask;             /* mask for first level of distance codes */
884f3865fbSRichard Purdie     code this;                  /* retrieved table entry */
894f3865fbSRichard Purdie     unsigned op;                /* code bits, operation, extra bits, or */
904f3865fbSRichard Purdie                                 /*  window position, window bytes to copy */
914f3865fbSRichard Purdie     unsigned len;               /* match length, unused bytes */
924f3865fbSRichard Purdie     unsigned dist;              /* match distance */
934f3865fbSRichard Purdie     unsigned char *from;        /* where to copy match from */
941da177e4SLinus Torvalds 
954f3865fbSRichard Purdie     /* copy state to local variables */
964f3865fbSRichard Purdie     state = (struct inflate_state *)strm->state;
97acaab733SJann Horn     in = strm->next_in;
984f3865fbSRichard Purdie     last = in + (strm->avail_in - 5);
99acaab733SJann Horn     out = strm->next_out;
1004f3865fbSRichard Purdie     beg = out - (start - strm->avail_out);
1014f3865fbSRichard Purdie     end = out + (strm->avail_out - 257);
1024f3865fbSRichard Purdie #ifdef INFLATE_STRICT
1034f3865fbSRichard Purdie     dmax = state->dmax;
1044f3865fbSRichard Purdie #endif
1054f3865fbSRichard Purdie     wsize = state->wsize;
1064f3865fbSRichard Purdie     whave = state->whave;
1074f3865fbSRichard Purdie     write = state->write;
1084f3865fbSRichard Purdie     window = state->window;
1094f3865fbSRichard Purdie     hold = state->hold;
1104f3865fbSRichard Purdie     bits = state->bits;
1114f3865fbSRichard Purdie     lcode = state->lencode;
1124f3865fbSRichard Purdie     dcode = state->distcode;
1134f3865fbSRichard Purdie     lmask = (1U << state->lenbits) - 1;
1144f3865fbSRichard Purdie     dmask = (1U << state->distbits) - 1;
1151da177e4SLinus Torvalds 
1164f3865fbSRichard Purdie     /* decode literals and length/distances until end-of-block or not enough
1174f3865fbSRichard Purdie        input data or output space */
1184f3865fbSRichard Purdie     do {
1194f3865fbSRichard Purdie         if (bits < 15) {
120acaab733SJann Horn             hold += (unsigned long)(*in++) << bits;
1214f3865fbSRichard Purdie             bits += 8;
122acaab733SJann Horn             hold += (unsigned long)(*in++) << bits;
1234f3865fbSRichard Purdie             bits += 8;
1241da177e4SLinus Torvalds         }
1254f3865fbSRichard Purdie         this = lcode[hold & lmask];
1264f3865fbSRichard Purdie       dolen:
1274f3865fbSRichard Purdie         op = (unsigned)(this.bits);
1284f3865fbSRichard Purdie         hold >>= op;
1294f3865fbSRichard Purdie         bits -= op;
1304f3865fbSRichard Purdie         op = (unsigned)(this.op);
1314f3865fbSRichard Purdie         if (op == 0) {                          /* literal */
132acaab733SJann Horn             *out++ = (unsigned char)(this.val);
1331da177e4SLinus Torvalds         }
1344f3865fbSRichard Purdie         else if (op & 16) {                     /* length base */
1354f3865fbSRichard Purdie             len = (unsigned)(this.val);
1364f3865fbSRichard Purdie             op &= 15;                           /* number of extra bits */
1374f3865fbSRichard Purdie             if (op) {
1384f3865fbSRichard Purdie                 if (bits < op) {
139acaab733SJann Horn                     hold += (unsigned long)(*in++) << bits;
1404f3865fbSRichard Purdie                     bits += 8;
1414f3865fbSRichard Purdie                 }
1424f3865fbSRichard Purdie                 len += (unsigned)hold & ((1U << op) - 1);
1434f3865fbSRichard Purdie                 hold >>= op;
1444f3865fbSRichard Purdie                 bits -= op;
1454f3865fbSRichard Purdie             }
1464f3865fbSRichard Purdie             if (bits < 15) {
147acaab733SJann Horn                 hold += (unsigned long)(*in++) << bits;
1484f3865fbSRichard Purdie                 bits += 8;
149acaab733SJann Horn                 hold += (unsigned long)(*in++) << bits;
1504f3865fbSRichard Purdie                 bits += 8;
1514f3865fbSRichard Purdie             }
1524f3865fbSRichard Purdie             this = dcode[hold & dmask];
1534f3865fbSRichard Purdie           dodist:
1544f3865fbSRichard Purdie             op = (unsigned)(this.bits);
1554f3865fbSRichard Purdie             hold >>= op;
1564f3865fbSRichard Purdie             bits -= op;
1574f3865fbSRichard Purdie             op = (unsigned)(this.op);
1584f3865fbSRichard Purdie             if (op & 16) {                      /* distance base */
1594f3865fbSRichard Purdie                 dist = (unsigned)(this.val);
1604f3865fbSRichard Purdie                 op &= 15;                       /* number of extra bits */
1614f3865fbSRichard Purdie                 if (bits < op) {
162acaab733SJann Horn                     hold += (unsigned long)(*in++) << bits;
1634f3865fbSRichard Purdie                     bits += 8;
1644f3865fbSRichard Purdie                     if (bits < op) {
165acaab733SJann Horn                         hold += (unsigned long)(*in++) << bits;
1664f3865fbSRichard Purdie                         bits += 8;
1671da177e4SLinus Torvalds                     }
1681da177e4SLinus Torvalds                 }
1694f3865fbSRichard Purdie                 dist += (unsigned)hold & ((1U << op) - 1);
1704f3865fbSRichard Purdie #ifdef INFLATE_STRICT
1714f3865fbSRichard Purdie                 if (dist > dmax) {
1724f3865fbSRichard Purdie                     strm->msg = (char *)"invalid distance too far back";
1734f3865fbSRichard Purdie                     state->mode = BAD;
1741da177e4SLinus Torvalds                     break;
1751da177e4SLinus Torvalds                 }
1764f3865fbSRichard Purdie #endif
1774f3865fbSRichard Purdie                 hold >>= op;
1784f3865fbSRichard Purdie                 bits -= op;
1794f3865fbSRichard Purdie                 op = (unsigned)(out - beg);     /* max distance in output */
1804f3865fbSRichard Purdie                 if (dist > op) {                /* see if copy from window */
1814f3865fbSRichard Purdie                     op = dist - op;             /* distance back in window */
1824f3865fbSRichard Purdie                     if (op > whave) {
1834f3865fbSRichard Purdie                         strm->msg = (char *)"invalid distance too far back";
1844f3865fbSRichard Purdie                         state->mode = BAD;
1851da177e4SLinus Torvalds                         break;
1861da177e4SLinus Torvalds                     }
187acaab733SJann Horn                     from = window;
1884f3865fbSRichard Purdie                     if (write == 0) {           /* very common case */
1894f3865fbSRichard Purdie                         from += wsize - op;
1904f3865fbSRichard Purdie                         if (op < len) {         /* some from window */
1914f3865fbSRichard Purdie                             len -= op;
1924f3865fbSRichard Purdie                             do {
193acaab733SJann Horn                                 *out++ = *from++;
1944f3865fbSRichard Purdie                             } while (--op);
1954f3865fbSRichard Purdie                             from = out - dist;  /* rest from output */
1964f3865fbSRichard Purdie                         }
1974f3865fbSRichard Purdie                     }
1984f3865fbSRichard Purdie                     else if (write < op) {      /* wrap around window */
1994f3865fbSRichard Purdie                         from += wsize + write - op;
2004f3865fbSRichard Purdie                         op -= write;
2014f3865fbSRichard Purdie                         if (op < len) {         /* some from end of window */
2024f3865fbSRichard Purdie                             len -= op;
2034f3865fbSRichard Purdie                             do {
204acaab733SJann Horn                                 *out++ = *from++;
2054f3865fbSRichard Purdie                             } while (--op);
206acaab733SJann Horn                             from = window;
2074f3865fbSRichard Purdie                             if (write < len) {  /* some from start of window */
2084f3865fbSRichard Purdie                                 op = write;
2094f3865fbSRichard Purdie                                 len -= op;
2104f3865fbSRichard Purdie                                 do {
211acaab733SJann Horn                                     *out++ = *from++;
2124f3865fbSRichard Purdie                                 } while (--op);
2134f3865fbSRichard Purdie                                 from = out - dist;      /* rest from output */
2144f3865fbSRichard Purdie                             }
2154f3865fbSRichard Purdie                         }
2164f3865fbSRichard Purdie                     }
2174f3865fbSRichard Purdie                     else {                      /* contiguous in window */
2184f3865fbSRichard Purdie                         from += write - op;
2194f3865fbSRichard Purdie                         if (op < len) {         /* some from window */
2204f3865fbSRichard Purdie                             len -= op;
2214f3865fbSRichard Purdie                             do {
222acaab733SJann Horn                                 *out++ = *from++;
2234f3865fbSRichard Purdie                             } while (--op);
2244f3865fbSRichard Purdie                             from = out - dist;  /* rest from output */
2254f3865fbSRichard Purdie                         }
2264f3865fbSRichard Purdie                     }
2274f3865fbSRichard Purdie                     while (len > 2) {
228acaab733SJann Horn                         *out++ = *from++;
229acaab733SJann Horn                         *out++ = *from++;
230acaab733SJann Horn                         *out++ = *from++;
2314f3865fbSRichard Purdie                         len -= 3;
2324f3865fbSRichard Purdie                     }
2334f3865fbSRichard Purdie                     if (len) {
234acaab733SJann Horn                         *out++ = *from++;
2354f3865fbSRichard Purdie                         if (len > 1)
236acaab733SJann Horn                             *out++ = *from++;
2374f3865fbSRichard Purdie                     }
2384f3865fbSRichard Purdie                 }
2394f3865fbSRichard Purdie                 else {
240ac4c2a3bSJoakim Tjernlund 		    unsigned short *sout;
241ac4c2a3bSJoakim Tjernlund 		    unsigned long loops;
242ac4c2a3bSJoakim Tjernlund 
2434f3865fbSRichard Purdie                     from = out - dist;          /* copy direct from output */
244ac4c2a3bSJoakim Tjernlund 		    /* minimum length is three */
245ac4c2a3bSJoakim Tjernlund 		    /* Align out addr */
246acaab733SJann Horn 		    if (!((long)(out - 1) & 1)) {
247acaab733SJann Horn 			*out++ = *from++;
248ac4c2a3bSJoakim Tjernlund 			len--;
2494f3865fbSRichard Purdie 		    }
250acaab733SJann Horn 		    sout = (unsigned short *)(out);
251ac4c2a3bSJoakim Tjernlund 		    if (dist > 2) {
252ac4c2a3bSJoakim Tjernlund 			unsigned short *sfrom;
253ac4c2a3bSJoakim Tjernlund 
254acaab733SJann Horn 			sfrom = (unsigned short *)(from);
255ac4c2a3bSJoakim Tjernlund 			loops = len >> 1;
256*b7cd9fa5SPaul Menzel 			do {
257*b7cd9fa5SPaul Menzel 			    if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
258acaab733SJann Horn 				*sout++ = *sfrom++;
259*b7cd9fa5SPaul Menzel 			    else
260acaab733SJann Horn 				*sout++ = get_unaligned16(sfrom++);
261*b7cd9fa5SPaul Menzel 			} while (--loops);
262acaab733SJann Horn 			out = (unsigned char *)sout;
263acaab733SJann Horn 			from = (unsigned char *)sfrom;
264ac4c2a3bSJoakim Tjernlund 		    } else { /* dist == 1 or dist == 2 */
265ac4c2a3bSJoakim Tjernlund 			unsigned short pat16;
266ac4c2a3bSJoakim Tjernlund 
267acaab733SJann Horn 			pat16 = *(sout-1);
268e69eae65SJoakim Tjernlund 			if (dist == 1) {
269e69eae65SJoakim Tjernlund 				union uu mm;
270e69eae65SJoakim Tjernlund 				/* copy one char pattern to both bytes */
271e69eae65SJoakim Tjernlund 				mm.us = pat16;
272e69eae65SJoakim Tjernlund 				mm.b[0] = mm.b[1];
273e69eae65SJoakim Tjernlund 				pat16 = mm.us;
274e69eae65SJoakim Tjernlund 			}
275ac4c2a3bSJoakim Tjernlund 			loops = len >> 1;
276ac4c2a3bSJoakim Tjernlund 			do
277acaab733SJann Horn 			    *sout++ = pat16;
278ac4c2a3bSJoakim Tjernlund 			while (--loops);
279acaab733SJann Horn 			out = (unsigned char *)sout;
280ac4c2a3bSJoakim Tjernlund 		    }
281ac4c2a3bSJoakim Tjernlund 		    if (len & 1)
282acaab733SJann Horn 			*out++ = *from++;
2834f3865fbSRichard Purdie                 }
2844f3865fbSRichard Purdie             }
2854f3865fbSRichard Purdie             else if ((op & 64) == 0) {          /* 2nd level distance code */
2864f3865fbSRichard Purdie                 this = dcode[this.val + (hold & ((1U << op) - 1))];
2874f3865fbSRichard Purdie                 goto dodist;
2884f3865fbSRichard Purdie             }
2894f3865fbSRichard Purdie             else {
2904f3865fbSRichard Purdie                 strm->msg = (char *)"invalid distance code";
2914f3865fbSRichard Purdie                 state->mode = BAD;
2921da177e4SLinus Torvalds                 break;
2931da177e4SLinus Torvalds             }
2941da177e4SLinus Torvalds         }
2954f3865fbSRichard Purdie         else if ((op & 64) == 0) {              /* 2nd level length code */
2964f3865fbSRichard Purdie             this = lcode[this.val + (hold & ((1U << op) - 1))];
2974f3865fbSRichard Purdie             goto dolen;
2981da177e4SLinus Torvalds         }
2994f3865fbSRichard Purdie         else if (op & 32) {                     /* end-of-block */
3004f3865fbSRichard Purdie             state->mode = TYPE;
3014f3865fbSRichard Purdie             break;
3021da177e4SLinus Torvalds         }
3034f3865fbSRichard Purdie         else {
3044f3865fbSRichard Purdie             strm->msg = (char *)"invalid literal/length code";
3054f3865fbSRichard Purdie             state->mode = BAD;
3064f3865fbSRichard Purdie             break;
3074f3865fbSRichard Purdie         }
3084f3865fbSRichard Purdie     } while (in < last && out < end);
3091da177e4SLinus Torvalds 
3104f3865fbSRichard Purdie     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
3114f3865fbSRichard Purdie     len = bits >> 3;
3124f3865fbSRichard Purdie     in -= len;
3134f3865fbSRichard Purdie     bits -= len << 3;
3144f3865fbSRichard Purdie     hold &= (1U << bits) - 1;
3154f3865fbSRichard Purdie 
3164f3865fbSRichard Purdie     /* update state and return */
317acaab733SJann Horn     strm->next_in = in;
318acaab733SJann Horn     strm->next_out = out;
3194f3865fbSRichard Purdie     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
3204f3865fbSRichard Purdie     strm->avail_out = (unsigned)(out < end ?
3214f3865fbSRichard Purdie                                  257 + (end - out) : 257 - (out - end));
3224f3865fbSRichard Purdie     state->hold = hold;
3234f3865fbSRichard Purdie     state->bits = bits;
3244f3865fbSRichard Purdie     return;
3251da177e4SLinus Torvalds }
3264f3865fbSRichard Purdie 
3274f3865fbSRichard Purdie /*
3284f3865fbSRichard Purdie    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
3294f3865fbSRichard Purdie    - Using bit fields for code structure
3304f3865fbSRichard Purdie    - Different op definition to avoid & for extra bits (do & for table bits)
3314f3865fbSRichard Purdie    - Three separate decoding do-loops for direct, window, and write == 0
3324f3865fbSRichard Purdie    - Special case for distance > 1 copies to do overlapped load and store copy
3334f3865fbSRichard Purdie    - Explicit branch predictions (based on measured branch probabilities)
3344f3865fbSRichard Purdie    - Deferring match copy and interspersed it with decoding subsequent codes
3354f3865fbSRichard Purdie    - Swapping literal/length else
3364f3865fbSRichard Purdie    - Swapping window/direct else
3374f3865fbSRichard Purdie    - Larger unrolled copy loops (three is about right)
3384f3865fbSRichard Purdie    - Moving len -= 3 statement into middle of loop
3394f3865fbSRichard Purdie  */
3404f3865fbSRichard Purdie 
3414f3865fbSRichard Purdie #endif /* !ASMINF */
342