1 /* inffast.c -- fast decoding 2 * Copyright (C) 1995-2004 Mark Adler 3 * For conditions of distribution and use, see copyright notice in zlib.h 4 */ 5 6 #include <linux/zutil.h> 7 #include "inftrees.h" 8 #include "inflate.h" 9 #include "inffast.h" 10 11 #ifndef ASMINF 12 13 /* Allow machine dependent optimization for post-increment or pre-increment. 14 Based on testing to date, 15 Pre-increment preferred for: 16 - PowerPC G3 (Adler) 17 - MIPS R5000 (Randers-Pehrson) 18 Post-increment preferred for: 19 - none 20 No measurable difference: 21 - Pentium III (Anderson) 22 - M68060 (Nikl) 23 */ 24 union uu { 25 unsigned short us; 26 unsigned char b[2]; 27 }; 28 29 /* Endian independed version */ 30 static inline unsigned short 31 get_unaligned16(const unsigned short *p) 32 { 33 union uu mm; 34 unsigned char *b = (unsigned char *)p; 35 36 mm.b[0] = b[0]; 37 mm.b[1] = b[1]; 38 return mm.us; 39 } 40 41 #ifdef POSTINC 42 # define OFF 0 43 # define PUP(a) *(a)++ 44 # define UP_UNALIGNED(a) get_unaligned16((a)++) 45 #else 46 # define OFF 1 47 # define PUP(a) *++(a) 48 # define UP_UNALIGNED(a) get_unaligned16(++(a)) 49 #endif 50 51 /* 52 Decode literal, length, and distance codes and write out the resulting 53 literal and match bytes until either not enough input or output is 54 available, an end-of-block is encountered, or a data error is encountered. 55 When large enough input and output buffers are supplied to inflate(), for 56 example, a 16K input buffer and a 64K output buffer, more than 95% of the 57 inflate execution time is spent in this routine. 58 59 Entry assumptions: 60 61 state->mode == LEN 62 strm->avail_in >= 6 63 strm->avail_out >= 258 64 start >= strm->avail_out 65 state->bits < 8 66 67 On return, state->mode is one of: 68 69 LEN -- ran out of enough output space or enough available input 70 TYPE -- reached end of block code, inflate() to interpret next block 71 BAD -- error in block data 72 73 Notes: 74 75 - The maximum input bits used by a length/distance pair is 15 bits for the 76 length code, 5 bits for the length extra, 15 bits for the distance code, 77 and 13 bits for the distance extra. This totals 48 bits, or six bytes. 78 Therefore if strm->avail_in >= 6, then there is enough input to avoid 79 checking for available input while decoding. 80 81 - The maximum bytes that a single length/distance pair can output is 258 82 bytes, which is the maximum length that can be coded. inflate_fast() 83 requires strm->avail_out >= 258 for each loop to avoid checking for 84 output space. 85 86 - @start: inflate()'s starting value for strm->avail_out 87 */ 88 void inflate_fast(z_streamp strm, unsigned start) 89 { 90 struct inflate_state *state; 91 const unsigned char *in; /* local strm->next_in */ 92 const unsigned char *last; /* while in < last, enough input available */ 93 unsigned char *out; /* local strm->next_out */ 94 unsigned char *beg; /* inflate()'s initial strm->next_out */ 95 unsigned char *end; /* while out < end, enough space available */ 96 #ifdef INFLATE_STRICT 97 unsigned dmax; /* maximum distance from zlib header */ 98 #endif 99 unsigned wsize; /* window size or zero if not using window */ 100 unsigned whave; /* valid bytes in the window */ 101 unsigned write; /* window write index */ 102 unsigned char *window; /* allocated sliding window, if wsize != 0 */ 103 unsigned long hold; /* local strm->hold */ 104 unsigned bits; /* local strm->bits */ 105 code const *lcode; /* local strm->lencode */ 106 code const *dcode; /* local strm->distcode */ 107 unsigned lmask; /* mask for first level of length codes */ 108 unsigned dmask; /* mask for first level of distance codes */ 109 code this; /* retrieved table entry */ 110 unsigned op; /* code bits, operation, extra bits, or */ 111 /* window position, window bytes to copy */ 112 unsigned len; /* match length, unused bytes */ 113 unsigned dist; /* match distance */ 114 unsigned char *from; /* where to copy match from */ 115 116 /* copy state to local variables */ 117 state = (struct inflate_state *)strm->state; 118 in = strm->next_in - OFF; 119 last = in + (strm->avail_in - 5); 120 out = strm->next_out - OFF; 121 beg = out - (start - strm->avail_out); 122 end = out + (strm->avail_out - 257); 123 #ifdef INFLATE_STRICT 124 dmax = state->dmax; 125 #endif 126 wsize = state->wsize; 127 whave = state->whave; 128 write = state->write; 129 window = state->window; 130 hold = state->hold; 131 bits = state->bits; 132 lcode = state->lencode; 133 dcode = state->distcode; 134 lmask = (1U << state->lenbits) - 1; 135 dmask = (1U << state->distbits) - 1; 136 137 /* decode literals and length/distances until end-of-block or not enough 138 input data or output space */ 139 do { 140 if (bits < 15) { 141 hold += (unsigned long)(PUP(in)) << bits; 142 bits += 8; 143 hold += (unsigned long)(PUP(in)) << bits; 144 bits += 8; 145 } 146 this = lcode[hold & lmask]; 147 dolen: 148 op = (unsigned)(this.bits); 149 hold >>= op; 150 bits -= op; 151 op = (unsigned)(this.op); 152 if (op == 0) { /* literal */ 153 PUP(out) = (unsigned char)(this.val); 154 } 155 else if (op & 16) { /* length base */ 156 len = (unsigned)(this.val); 157 op &= 15; /* number of extra bits */ 158 if (op) { 159 if (bits < op) { 160 hold += (unsigned long)(PUP(in)) << bits; 161 bits += 8; 162 } 163 len += (unsigned)hold & ((1U << op) - 1); 164 hold >>= op; 165 bits -= op; 166 } 167 if (bits < 15) { 168 hold += (unsigned long)(PUP(in)) << bits; 169 bits += 8; 170 hold += (unsigned long)(PUP(in)) << bits; 171 bits += 8; 172 } 173 this = dcode[hold & dmask]; 174 dodist: 175 op = (unsigned)(this.bits); 176 hold >>= op; 177 bits -= op; 178 op = (unsigned)(this.op); 179 if (op & 16) { /* distance base */ 180 dist = (unsigned)(this.val); 181 op &= 15; /* number of extra bits */ 182 if (bits < op) { 183 hold += (unsigned long)(PUP(in)) << bits; 184 bits += 8; 185 if (bits < op) { 186 hold += (unsigned long)(PUP(in)) << bits; 187 bits += 8; 188 } 189 } 190 dist += (unsigned)hold & ((1U << op) - 1); 191 #ifdef INFLATE_STRICT 192 if (dist > dmax) { 193 strm->msg = (char *)"invalid distance too far back"; 194 state->mode = BAD; 195 break; 196 } 197 #endif 198 hold >>= op; 199 bits -= op; 200 op = (unsigned)(out - beg); /* max distance in output */ 201 if (dist > op) { /* see if copy from window */ 202 op = dist - op; /* distance back in window */ 203 if (op > whave) { 204 strm->msg = (char *)"invalid distance too far back"; 205 state->mode = BAD; 206 break; 207 } 208 from = window - OFF; 209 if (write == 0) { /* very common case */ 210 from += wsize - op; 211 if (op < len) { /* some from window */ 212 len -= op; 213 do { 214 PUP(out) = PUP(from); 215 } while (--op); 216 from = out - dist; /* rest from output */ 217 } 218 } 219 else if (write < op) { /* wrap around window */ 220 from += wsize + write - op; 221 op -= write; 222 if (op < len) { /* some from end of window */ 223 len -= op; 224 do { 225 PUP(out) = PUP(from); 226 } while (--op); 227 from = window - OFF; 228 if (write < len) { /* some from start of window */ 229 op = write; 230 len -= op; 231 do { 232 PUP(out) = PUP(from); 233 } while (--op); 234 from = out - dist; /* rest from output */ 235 } 236 } 237 } 238 else { /* contiguous in window */ 239 from += write - op; 240 if (op < len) { /* some from window */ 241 len -= op; 242 do { 243 PUP(out) = PUP(from); 244 } while (--op); 245 from = out - dist; /* rest from output */ 246 } 247 } 248 while (len > 2) { 249 PUP(out) = PUP(from); 250 PUP(out) = PUP(from); 251 PUP(out) = PUP(from); 252 len -= 3; 253 } 254 if (len) { 255 PUP(out) = PUP(from); 256 if (len > 1) 257 PUP(out) = PUP(from); 258 } 259 } 260 else { 261 unsigned short *sout; 262 unsigned long loops; 263 264 from = out - dist; /* copy direct from output */ 265 /* minimum length is three */ 266 /* Align out addr */ 267 if (!((long)(out - 1 + OFF) & 1)) { 268 PUP(out) = PUP(from); 269 len--; 270 } 271 sout = (unsigned short *)(out - OFF); 272 if (dist > 2) { 273 unsigned short *sfrom; 274 275 sfrom = (unsigned short *)(from - OFF); 276 loops = len >> 1; 277 do 278 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 279 PUP(sout) = PUP(sfrom); 280 #else 281 PUP(sout) = UP_UNALIGNED(sfrom); 282 #endif 283 while (--loops); 284 out = (unsigned char *)sout + OFF; 285 from = (unsigned char *)sfrom + OFF; 286 } else { /* dist == 1 or dist == 2 */ 287 unsigned short pat16; 288 289 pat16 = *(sout-1+OFF); 290 if (dist == 1) { 291 union uu mm; 292 /* copy one char pattern to both bytes */ 293 mm.us = pat16; 294 mm.b[0] = mm.b[1]; 295 pat16 = mm.us; 296 } 297 loops = len >> 1; 298 do 299 PUP(sout) = pat16; 300 while (--loops); 301 out = (unsigned char *)sout + OFF; 302 } 303 if (len & 1) 304 PUP(out) = PUP(from); 305 } 306 } 307 else if ((op & 64) == 0) { /* 2nd level distance code */ 308 this = dcode[this.val + (hold & ((1U << op) - 1))]; 309 goto dodist; 310 } 311 else { 312 strm->msg = (char *)"invalid distance code"; 313 state->mode = BAD; 314 break; 315 } 316 } 317 else if ((op & 64) == 0) { /* 2nd level length code */ 318 this = lcode[this.val + (hold & ((1U << op) - 1))]; 319 goto dolen; 320 } 321 else if (op & 32) { /* end-of-block */ 322 state->mode = TYPE; 323 break; 324 } 325 else { 326 strm->msg = (char *)"invalid literal/length code"; 327 state->mode = BAD; 328 break; 329 } 330 } while (in < last && out < end); 331 332 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 333 len = bits >> 3; 334 in -= len; 335 bits -= len << 3; 336 hold &= (1U << bits) - 1; 337 338 /* update state and return */ 339 strm->next_in = in + OFF; 340 strm->next_out = out + OFF; 341 strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 342 strm->avail_out = (unsigned)(out < end ? 343 257 + (end - out) : 257 - (out - end)); 344 state->hold = hold; 345 state->bits = bits; 346 return; 347 } 348 349 /* 350 inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 351 - Using bit fields for code structure 352 - Different op definition to avoid & for extra bits (do & for table bits) 353 - Three separate decoding do-loops for direct, window, and write == 0 354 - Special case for distance > 1 copies to do overlapped load and store copy 355 - Explicit branch predictions (based on measured branch probabilities) 356 - Deferring match copy and interspersed it with decoding subsequent codes 357 - Swapping literal/length else 358 - Swapping window/direct else 359 - Larger unrolled copy loops (three is about right) 360 - Moving len -= 3 statement into middle of loop 361 */ 362 363 #endif /* !ASMINF */ 364