14f3865fbSRichard Purdie /* inffast.c -- fast decoding
24f3865fbSRichard Purdie * Copyright (C) 1995-2004 Mark Adler
31da177e4SLinus Torvalds * For conditions of distribution and use, see copyright notice in zlib.h
41da177e4SLinus Torvalds */
51da177e4SLinus Torvalds
61da177e4SLinus Torvalds #include <linux/zutil.h>
71da177e4SLinus Torvalds #include "inftrees.h"
84f3865fbSRichard Purdie #include "inflate.h"
91da177e4SLinus Torvalds #include "inffast.h"
101da177e4SLinus Torvalds
114f3865fbSRichard Purdie #ifndef ASMINF
121da177e4SLinus Torvalds
13e69eae65SJoakim Tjernlund union uu {
14e69eae65SJoakim Tjernlund unsigned short us;
15e69eae65SJoakim Tjernlund unsigned char b[2];
16e69eae65SJoakim Tjernlund };
17e69eae65SJoakim Tjernlund
1805911c5dSZhen Lei /* Endian independent version */
19e69eae65SJoakim Tjernlund static inline unsigned short
get_unaligned16(const unsigned short * p)20e69eae65SJoakim Tjernlund get_unaligned16(const unsigned short *p)
21e69eae65SJoakim Tjernlund {
22e69eae65SJoakim Tjernlund union uu mm;
23e69eae65SJoakim Tjernlund unsigned char *b = (unsigned char *)p;
24e69eae65SJoakim Tjernlund
25e69eae65SJoakim Tjernlund mm.b[0] = b[0];
26e69eae65SJoakim Tjernlund mm.b[1] = b[1];
27e69eae65SJoakim Tjernlund return mm.us;
28e69eae65SJoakim Tjernlund }
29e69eae65SJoakim Tjernlund
304f3865fbSRichard Purdie /*
314f3865fbSRichard Purdie Decode literal, length, and distance codes and write out the resulting
324f3865fbSRichard Purdie literal and match bytes until either not enough input or output is
334f3865fbSRichard Purdie available, an end-of-block is encountered, or a data error is encountered.
344f3865fbSRichard Purdie When large enough input and output buffers are supplied to inflate(), for
354f3865fbSRichard Purdie example, a 16K input buffer and a 64K output buffer, more than 95% of the
364f3865fbSRichard Purdie inflate execution time is spent in this routine.
371da177e4SLinus Torvalds
384f3865fbSRichard Purdie Entry assumptions:
391da177e4SLinus Torvalds
404f3865fbSRichard Purdie state->mode == LEN
414f3865fbSRichard Purdie strm->avail_in >= 6
424f3865fbSRichard Purdie strm->avail_out >= 258
434f3865fbSRichard Purdie start >= strm->avail_out
444f3865fbSRichard Purdie state->bits < 8
454f3865fbSRichard Purdie
464f3865fbSRichard Purdie On return, state->mode is one of:
474f3865fbSRichard Purdie
484f3865fbSRichard Purdie LEN -- ran out of enough output space or enough available input
494f3865fbSRichard Purdie TYPE -- reached end of block code, inflate() to interpret next block
504f3865fbSRichard Purdie BAD -- error in block data
514f3865fbSRichard Purdie
524f3865fbSRichard Purdie Notes:
534f3865fbSRichard Purdie
544f3865fbSRichard Purdie - The maximum input bits used by a length/distance pair is 15 bits for the
554f3865fbSRichard Purdie length code, 5 bits for the length extra, 15 bits for the distance code,
564f3865fbSRichard Purdie and 13 bits for the distance extra. This totals 48 bits, or six bytes.
574f3865fbSRichard Purdie Therefore if strm->avail_in >= 6, then there is enough input to avoid
584f3865fbSRichard Purdie checking for available input while decoding.
594f3865fbSRichard Purdie
604f3865fbSRichard Purdie - The maximum bytes that a single length/distance pair can output is 258
614f3865fbSRichard Purdie bytes, which is the maximum length that can be coded. inflate_fast()
624f3865fbSRichard Purdie requires strm->avail_out >= 258 for each loop to avoid checking for
634f3865fbSRichard Purdie output space.
64b762450eSRandy Dunlap
65b762450eSRandy Dunlap - @start: inflate()'s starting value for strm->avail_out
664f3865fbSRichard Purdie */
inflate_fast(z_streamp strm,unsigned start)67b762450eSRandy Dunlap void inflate_fast(z_streamp strm, unsigned start)
681da177e4SLinus Torvalds {
694f3865fbSRichard Purdie struct inflate_state *state;
708336793bSDenys Vlasenko const unsigned char *in; /* local strm->next_in */
718336793bSDenys Vlasenko const unsigned char *last; /* while in < last, enough input available */
724f3865fbSRichard Purdie unsigned char *out; /* local strm->next_out */
734f3865fbSRichard Purdie unsigned char *beg; /* inflate()'s initial strm->next_out */
744f3865fbSRichard Purdie unsigned char *end; /* while out < end, enough space available */
754f3865fbSRichard Purdie #ifdef INFLATE_STRICT
764f3865fbSRichard Purdie unsigned dmax; /* maximum distance from zlib header */
774f3865fbSRichard Purdie #endif
784f3865fbSRichard Purdie unsigned wsize; /* window size or zero if not using window */
794f3865fbSRichard Purdie unsigned whave; /* valid bytes in the window */
804f3865fbSRichard Purdie unsigned write; /* window write index */
814f3865fbSRichard Purdie unsigned char *window; /* allocated sliding window, if wsize != 0 */
824f3865fbSRichard Purdie unsigned long hold; /* local strm->hold */
834f3865fbSRichard Purdie unsigned bits; /* local strm->bits */
844f3865fbSRichard Purdie code const *lcode; /* local strm->lencode */
854f3865fbSRichard Purdie code const *dcode; /* local strm->distcode */
864f3865fbSRichard Purdie unsigned lmask; /* mask for first level of length codes */
874f3865fbSRichard Purdie unsigned dmask; /* mask for first level of distance codes */
884f3865fbSRichard Purdie code this; /* retrieved table entry */
894f3865fbSRichard Purdie unsigned op; /* code bits, operation, extra bits, or */
904f3865fbSRichard Purdie /* window position, window bytes to copy */
914f3865fbSRichard Purdie unsigned len; /* match length, unused bytes */
924f3865fbSRichard Purdie unsigned dist; /* match distance */
934f3865fbSRichard Purdie unsigned char *from; /* where to copy match from */
941da177e4SLinus Torvalds
954f3865fbSRichard Purdie /* copy state to local variables */
964f3865fbSRichard Purdie state = (struct inflate_state *)strm->state;
97acaab733SJann Horn in = strm->next_in;
984f3865fbSRichard Purdie last = in + (strm->avail_in - 5);
99acaab733SJann Horn out = strm->next_out;
1004f3865fbSRichard Purdie beg = out - (start - strm->avail_out);
1014f3865fbSRichard Purdie end = out + (strm->avail_out - 257);
1024f3865fbSRichard Purdie #ifdef INFLATE_STRICT
1034f3865fbSRichard Purdie dmax = state->dmax;
1044f3865fbSRichard Purdie #endif
1054f3865fbSRichard Purdie wsize = state->wsize;
1064f3865fbSRichard Purdie whave = state->whave;
1074f3865fbSRichard Purdie write = state->write;
1084f3865fbSRichard Purdie window = state->window;
1094f3865fbSRichard Purdie hold = state->hold;
1104f3865fbSRichard Purdie bits = state->bits;
1114f3865fbSRichard Purdie lcode = state->lencode;
1124f3865fbSRichard Purdie dcode = state->distcode;
1134f3865fbSRichard Purdie lmask = (1U << state->lenbits) - 1;
1144f3865fbSRichard Purdie dmask = (1U << state->distbits) - 1;
1151da177e4SLinus Torvalds
1164f3865fbSRichard Purdie /* decode literals and length/distances until end-of-block or not enough
1174f3865fbSRichard Purdie input data or output space */
1184f3865fbSRichard Purdie do {
1194f3865fbSRichard Purdie if (bits < 15) {
120acaab733SJann Horn hold += (unsigned long)(*in++) << bits;
1214f3865fbSRichard Purdie bits += 8;
122acaab733SJann Horn hold += (unsigned long)(*in++) << bits;
1234f3865fbSRichard Purdie bits += 8;
1241da177e4SLinus Torvalds }
1254f3865fbSRichard Purdie this = lcode[hold & lmask];
1264f3865fbSRichard Purdie dolen:
1274f3865fbSRichard Purdie op = (unsigned)(this.bits);
1284f3865fbSRichard Purdie hold >>= op;
1294f3865fbSRichard Purdie bits -= op;
1304f3865fbSRichard Purdie op = (unsigned)(this.op);
1314f3865fbSRichard Purdie if (op == 0) { /* literal */
132acaab733SJann Horn *out++ = (unsigned char)(this.val);
1331da177e4SLinus Torvalds }
1344f3865fbSRichard Purdie else if (op & 16) { /* length base */
1354f3865fbSRichard Purdie len = (unsigned)(this.val);
1364f3865fbSRichard Purdie op &= 15; /* number of extra bits */
1374f3865fbSRichard Purdie if (op) {
1384f3865fbSRichard Purdie if (bits < op) {
139acaab733SJann Horn hold += (unsigned long)(*in++) << bits;
1404f3865fbSRichard Purdie bits += 8;
1414f3865fbSRichard Purdie }
1424f3865fbSRichard Purdie len += (unsigned)hold & ((1U << op) - 1);
1434f3865fbSRichard Purdie hold >>= op;
1444f3865fbSRichard Purdie bits -= op;
1454f3865fbSRichard Purdie }
1464f3865fbSRichard Purdie if (bits < 15) {
147acaab733SJann Horn hold += (unsigned long)(*in++) << bits;
1484f3865fbSRichard Purdie bits += 8;
149acaab733SJann Horn hold += (unsigned long)(*in++) << bits;
1504f3865fbSRichard Purdie bits += 8;
1514f3865fbSRichard Purdie }
1524f3865fbSRichard Purdie this = dcode[hold & dmask];
1534f3865fbSRichard Purdie dodist:
1544f3865fbSRichard Purdie op = (unsigned)(this.bits);
1554f3865fbSRichard Purdie hold >>= op;
1564f3865fbSRichard Purdie bits -= op;
1574f3865fbSRichard Purdie op = (unsigned)(this.op);
1584f3865fbSRichard Purdie if (op & 16) { /* distance base */
1594f3865fbSRichard Purdie dist = (unsigned)(this.val);
1604f3865fbSRichard Purdie op &= 15; /* number of extra bits */
1614f3865fbSRichard Purdie if (bits < op) {
162acaab733SJann Horn hold += (unsigned long)(*in++) << bits;
1634f3865fbSRichard Purdie bits += 8;
1644f3865fbSRichard Purdie if (bits < op) {
165acaab733SJann Horn hold += (unsigned long)(*in++) << bits;
1664f3865fbSRichard Purdie bits += 8;
1671da177e4SLinus Torvalds }
1681da177e4SLinus Torvalds }
1694f3865fbSRichard Purdie dist += (unsigned)hold & ((1U << op) - 1);
1704f3865fbSRichard Purdie #ifdef INFLATE_STRICT
1714f3865fbSRichard Purdie if (dist > dmax) {
1724f3865fbSRichard Purdie strm->msg = (char *)"invalid distance too far back";
1734f3865fbSRichard Purdie state->mode = BAD;
1741da177e4SLinus Torvalds break;
1751da177e4SLinus Torvalds }
1764f3865fbSRichard Purdie #endif
1774f3865fbSRichard Purdie hold >>= op;
1784f3865fbSRichard Purdie bits -= op;
1794f3865fbSRichard Purdie op = (unsigned)(out - beg); /* max distance in output */
1804f3865fbSRichard Purdie if (dist > op) { /* see if copy from window */
1814f3865fbSRichard Purdie op = dist - op; /* distance back in window */
1824f3865fbSRichard Purdie if (op > whave) {
1834f3865fbSRichard Purdie strm->msg = (char *)"invalid distance too far back";
1844f3865fbSRichard Purdie state->mode = BAD;
1851da177e4SLinus Torvalds break;
1861da177e4SLinus Torvalds }
187acaab733SJann Horn from = window;
1884f3865fbSRichard Purdie if (write == 0) { /* very common case */
1894f3865fbSRichard Purdie from += wsize - op;
1904f3865fbSRichard Purdie if (op < len) { /* some from window */
1914f3865fbSRichard Purdie len -= op;
1924f3865fbSRichard Purdie do {
193acaab733SJann Horn *out++ = *from++;
1944f3865fbSRichard Purdie } while (--op);
1954f3865fbSRichard Purdie from = out - dist; /* rest from output */
1964f3865fbSRichard Purdie }
1974f3865fbSRichard Purdie }
1984f3865fbSRichard Purdie else if (write < op) { /* wrap around window */
1994f3865fbSRichard Purdie from += wsize + write - op;
2004f3865fbSRichard Purdie op -= write;
2014f3865fbSRichard Purdie if (op < len) { /* some from end of window */
2024f3865fbSRichard Purdie len -= op;
2034f3865fbSRichard Purdie do {
204acaab733SJann Horn *out++ = *from++;
2054f3865fbSRichard Purdie } while (--op);
206acaab733SJann Horn from = window;
2074f3865fbSRichard Purdie if (write < len) { /* some from start of window */
2084f3865fbSRichard Purdie op = write;
2094f3865fbSRichard Purdie len -= op;
2104f3865fbSRichard Purdie do {
211acaab733SJann Horn *out++ = *from++;
2124f3865fbSRichard Purdie } while (--op);
2134f3865fbSRichard Purdie from = out - dist; /* rest from output */
2144f3865fbSRichard Purdie }
2154f3865fbSRichard Purdie }
2164f3865fbSRichard Purdie }
2174f3865fbSRichard Purdie else { /* contiguous in window */
2184f3865fbSRichard Purdie from += write - op;
2194f3865fbSRichard Purdie if (op < len) { /* some from window */
2204f3865fbSRichard Purdie len -= op;
2214f3865fbSRichard Purdie do {
222acaab733SJann Horn *out++ = *from++;
2234f3865fbSRichard Purdie } while (--op);
2244f3865fbSRichard Purdie from = out - dist; /* rest from output */
2254f3865fbSRichard Purdie }
2264f3865fbSRichard Purdie }
2274f3865fbSRichard Purdie while (len > 2) {
228acaab733SJann Horn *out++ = *from++;
229acaab733SJann Horn *out++ = *from++;
230acaab733SJann Horn *out++ = *from++;
2314f3865fbSRichard Purdie len -= 3;
2324f3865fbSRichard Purdie }
2334f3865fbSRichard Purdie if (len) {
234acaab733SJann Horn *out++ = *from++;
2354f3865fbSRichard Purdie if (len > 1)
236acaab733SJann Horn *out++ = *from++;
2374f3865fbSRichard Purdie }
2384f3865fbSRichard Purdie }
2394f3865fbSRichard Purdie else {
240ac4c2a3bSJoakim Tjernlund unsigned short *sout;
241ac4c2a3bSJoakim Tjernlund unsigned long loops;
242ac4c2a3bSJoakim Tjernlund
2434f3865fbSRichard Purdie from = out - dist; /* copy direct from output */
244ac4c2a3bSJoakim Tjernlund /* minimum length is three */
245ac4c2a3bSJoakim Tjernlund /* Align out addr */
246acaab733SJann Horn if (!((long)(out - 1) & 1)) {
247acaab733SJann Horn *out++ = *from++;
248ac4c2a3bSJoakim Tjernlund len--;
2494f3865fbSRichard Purdie }
250acaab733SJann Horn sout = (unsigned short *)(out);
251ac4c2a3bSJoakim Tjernlund if (dist > 2) {
252ac4c2a3bSJoakim Tjernlund unsigned short *sfrom;
253ac4c2a3bSJoakim Tjernlund
254acaab733SJann Horn sfrom = (unsigned short *)(from);
255ac4c2a3bSJoakim Tjernlund loops = len >> 1;
256*b7cd9fa5SPaul Menzel do {
257*b7cd9fa5SPaul Menzel if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
258acaab733SJann Horn *sout++ = *sfrom++;
259*b7cd9fa5SPaul Menzel else
260acaab733SJann Horn *sout++ = get_unaligned16(sfrom++);
261*b7cd9fa5SPaul Menzel } while (--loops);
262acaab733SJann Horn out = (unsigned char *)sout;
263acaab733SJann Horn from = (unsigned char *)sfrom;
264ac4c2a3bSJoakim Tjernlund } else { /* dist == 1 or dist == 2 */
265ac4c2a3bSJoakim Tjernlund unsigned short pat16;
266ac4c2a3bSJoakim Tjernlund
267acaab733SJann Horn pat16 = *(sout-1);
268e69eae65SJoakim Tjernlund if (dist == 1) {
269e69eae65SJoakim Tjernlund union uu mm;
270e69eae65SJoakim Tjernlund /* copy one char pattern to both bytes */
271e69eae65SJoakim Tjernlund mm.us = pat16;
272e69eae65SJoakim Tjernlund mm.b[0] = mm.b[1];
273e69eae65SJoakim Tjernlund pat16 = mm.us;
274e69eae65SJoakim Tjernlund }
275ac4c2a3bSJoakim Tjernlund loops = len >> 1;
276ac4c2a3bSJoakim Tjernlund do
277acaab733SJann Horn *sout++ = pat16;
278ac4c2a3bSJoakim Tjernlund while (--loops);
279acaab733SJann Horn out = (unsigned char *)sout;
280ac4c2a3bSJoakim Tjernlund }
281ac4c2a3bSJoakim Tjernlund if (len & 1)
282acaab733SJann Horn *out++ = *from++;
2834f3865fbSRichard Purdie }
2844f3865fbSRichard Purdie }
2854f3865fbSRichard Purdie else if ((op & 64) == 0) { /* 2nd level distance code */
2864f3865fbSRichard Purdie this = dcode[this.val + (hold & ((1U << op) - 1))];
2874f3865fbSRichard Purdie goto dodist;
2884f3865fbSRichard Purdie }
2894f3865fbSRichard Purdie else {
2904f3865fbSRichard Purdie strm->msg = (char *)"invalid distance code";
2914f3865fbSRichard Purdie state->mode = BAD;
2921da177e4SLinus Torvalds break;
2931da177e4SLinus Torvalds }
2941da177e4SLinus Torvalds }
2954f3865fbSRichard Purdie else if ((op & 64) == 0) { /* 2nd level length code */
2964f3865fbSRichard Purdie this = lcode[this.val + (hold & ((1U << op) - 1))];
2974f3865fbSRichard Purdie goto dolen;
2981da177e4SLinus Torvalds }
2994f3865fbSRichard Purdie else if (op & 32) { /* end-of-block */
3004f3865fbSRichard Purdie state->mode = TYPE;
3014f3865fbSRichard Purdie break;
3021da177e4SLinus Torvalds }
3034f3865fbSRichard Purdie else {
3044f3865fbSRichard Purdie strm->msg = (char *)"invalid literal/length code";
3054f3865fbSRichard Purdie state->mode = BAD;
3064f3865fbSRichard Purdie break;
3074f3865fbSRichard Purdie }
3084f3865fbSRichard Purdie } while (in < last && out < end);
3091da177e4SLinus Torvalds
3104f3865fbSRichard Purdie /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
3114f3865fbSRichard Purdie len = bits >> 3;
3124f3865fbSRichard Purdie in -= len;
3134f3865fbSRichard Purdie bits -= len << 3;
3144f3865fbSRichard Purdie hold &= (1U << bits) - 1;
3154f3865fbSRichard Purdie
3164f3865fbSRichard Purdie /* update state and return */
317acaab733SJann Horn strm->next_in = in;
318acaab733SJann Horn strm->next_out = out;
3194f3865fbSRichard Purdie strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
3204f3865fbSRichard Purdie strm->avail_out = (unsigned)(out < end ?
3214f3865fbSRichard Purdie 257 + (end - out) : 257 - (out - end));
3224f3865fbSRichard Purdie state->hold = hold;
3234f3865fbSRichard Purdie state->bits = bits;
3244f3865fbSRichard Purdie return;
3251da177e4SLinus Torvalds }
3264f3865fbSRichard Purdie
3274f3865fbSRichard Purdie /*
3284f3865fbSRichard Purdie inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
3294f3865fbSRichard Purdie - Using bit fields for code structure
3304f3865fbSRichard Purdie - Different op definition to avoid & for extra bits (do & for table bits)
3314f3865fbSRichard Purdie - Three separate decoding do-loops for direct, window, and write == 0
3324f3865fbSRichard Purdie - Special case for distance > 1 copies to do overlapped load and store copy
3334f3865fbSRichard Purdie - Explicit branch predictions (based on measured branch probabilities)
3344f3865fbSRichard Purdie - Deferring match copy and interspersed it with decoding subsequent codes
3354f3865fbSRichard Purdie - Swapping literal/length else
3364f3865fbSRichard Purdie - Swapping window/direct else
3374f3865fbSRichard Purdie - Larger unrolled copy loops (three is about right)
3384f3865fbSRichard Purdie - Moving len -= 3 statement into middle of loop
3394f3865fbSRichard Purdie */
3404f3865fbSRichard Purdie
3414f3865fbSRichard Purdie #endif /* !ASMINF */
342