1 /* 2 * LZO1X Compressor from LZO 3 * 4 * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> 5 * 6 * The full LZO package can be found at: 7 * http://www.oberhumer.com/opensource/lzo/ 8 * 9 * Changed for Linux kernel use by: 10 * Nitin Gupta <nitingupta910@gmail.com> 11 * Richard Purdie <rpurdie@openedhand.com> 12 */ 13 14 #include <linux/module.h> 15 #include <linux/kernel.h> 16 #include <asm/unaligned.h> 17 #include <linux/lzo.h> 18 #include "lzodefs.h" 19 20 static noinline size_t 21 lzo1x_1_do_compress(const unsigned char *in, size_t in_len, 22 unsigned char *out, size_t *out_len, 23 size_t ti, void *wrkmem, signed char *state_offset) 24 { 25 const unsigned char *ip; 26 unsigned char *op; 27 const unsigned char * const in_end = in + in_len; 28 const unsigned char * const ip_end = in + in_len - 20; 29 const unsigned char *ii; 30 lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; 31 32 op = out; 33 ip = in; 34 ii = ip; 35 ip += ti < 4 ? 4 - ti : 0; 36 37 for (;;) { 38 const unsigned char *m_pos = NULL; 39 size_t t, m_len, m_off; 40 u32 dv; 41 u32 run_length = 0; 42 literal: 43 ip += 1 + ((ip - ii) >> 5); 44 next: 45 if (unlikely(ip >= ip_end)) 46 break; 47 dv = get_unaligned_le32(ip); 48 49 if (dv == 0) { 50 const unsigned char *ir = ip + 4; 51 const unsigned char *limit = ip_end 52 < (ip + MAX_ZERO_RUN_LENGTH + 1) 53 ? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1; 54 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ 55 defined(LZO_FAST_64BIT_MEMORY_ACCESS) 56 u64 dv64; 57 58 for (; (ir + 32) <= limit; ir += 32) { 59 dv64 = get_unaligned((u64 *)ir); 60 dv64 |= get_unaligned((u64 *)ir + 1); 61 dv64 |= get_unaligned((u64 *)ir + 2); 62 dv64 |= get_unaligned((u64 *)ir + 3); 63 if (dv64) 64 break; 65 } 66 for (; (ir + 8) <= limit; ir += 8) { 67 dv64 = get_unaligned((u64 *)ir); 68 if (dv64) { 69 # if defined(__LITTLE_ENDIAN) 70 ir += __builtin_ctzll(dv64) >> 3; 71 # elif defined(__BIG_ENDIAN) 72 ir += __builtin_clzll(dv64) >> 3; 73 # else 74 # error "missing endian definition" 75 # endif 76 break; 77 } 78 } 79 #else 80 while ((ir < (const unsigned char *) 81 ALIGN((uintptr_t)ir, 4)) && 82 (ir < limit) && (*ir == 0)) 83 ir++; 84 for (; (ir + 4) <= limit; ir += 4) { 85 dv = *((u32 *)ir); 86 if (dv) { 87 # if defined(__LITTLE_ENDIAN) 88 ir += __builtin_ctz(dv) >> 3; 89 # elif defined(__BIG_ENDIAN) 90 ir += __builtin_clz(dv) >> 3; 91 # else 92 # error "missing endian definition" 93 # endif 94 break; 95 } 96 } 97 #endif 98 while (likely(ir < limit) && unlikely(*ir == 0)) 99 ir++; 100 run_length = ir - ip; 101 if (run_length > MAX_ZERO_RUN_LENGTH) 102 run_length = MAX_ZERO_RUN_LENGTH; 103 } else { 104 t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; 105 m_pos = in + dict[t]; 106 dict[t] = (lzo_dict_t) (ip - in); 107 if (unlikely(dv != get_unaligned_le32(m_pos))) 108 goto literal; 109 } 110 111 ii -= ti; 112 ti = 0; 113 t = ip - ii; 114 if (t != 0) { 115 if (t <= 3) { 116 op[*state_offset] |= t; 117 COPY4(op, ii); 118 op += t; 119 } else if (t <= 16) { 120 *op++ = (t - 3); 121 COPY8(op, ii); 122 COPY8(op + 8, ii + 8); 123 op += t; 124 } else { 125 if (t <= 18) { 126 *op++ = (t - 3); 127 } else { 128 size_t tt = t - 18; 129 *op++ = 0; 130 while (unlikely(tt > 255)) { 131 tt -= 255; 132 *op++ = 0; 133 } 134 *op++ = tt; 135 } 136 do { 137 COPY8(op, ii); 138 COPY8(op + 8, ii + 8); 139 op += 16; 140 ii += 16; 141 t -= 16; 142 } while (t >= 16); 143 if (t > 0) do { 144 *op++ = *ii++; 145 } while (--t > 0); 146 } 147 } 148 149 if (unlikely(run_length)) { 150 ip += run_length; 151 run_length -= MIN_ZERO_RUN_LENGTH; 152 put_unaligned_le32((run_length << 21) | 0xfffc18 153 | (run_length & 0x7), op); 154 op += 4; 155 run_length = 0; 156 *state_offset = -3; 157 goto finished_writing_instruction; 158 } 159 160 m_len = 4; 161 { 162 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) 163 u64 v; 164 v = get_unaligned((const u64 *) (ip + m_len)) ^ 165 get_unaligned((const u64 *) (m_pos + m_len)); 166 if (unlikely(v == 0)) { 167 do { 168 m_len += 8; 169 v = get_unaligned((const u64 *) (ip + m_len)) ^ 170 get_unaligned((const u64 *) (m_pos + m_len)); 171 if (unlikely(ip + m_len >= ip_end)) 172 goto m_len_done; 173 } while (v == 0); 174 } 175 # if defined(__LITTLE_ENDIAN) 176 m_len += (unsigned) __builtin_ctzll(v) / 8; 177 # elif defined(__BIG_ENDIAN) 178 m_len += (unsigned) __builtin_clzll(v) / 8; 179 # else 180 # error "missing endian definition" 181 # endif 182 #elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32) 183 u32 v; 184 v = get_unaligned((const u32 *) (ip + m_len)) ^ 185 get_unaligned((const u32 *) (m_pos + m_len)); 186 if (unlikely(v == 0)) { 187 do { 188 m_len += 4; 189 v = get_unaligned((const u32 *) (ip + m_len)) ^ 190 get_unaligned((const u32 *) (m_pos + m_len)); 191 if (v != 0) 192 break; 193 m_len += 4; 194 v = get_unaligned((const u32 *) (ip + m_len)) ^ 195 get_unaligned((const u32 *) (m_pos + m_len)); 196 if (unlikely(ip + m_len >= ip_end)) 197 goto m_len_done; 198 } while (v == 0); 199 } 200 # if defined(__LITTLE_ENDIAN) 201 m_len += (unsigned) __builtin_ctz(v) / 8; 202 # elif defined(__BIG_ENDIAN) 203 m_len += (unsigned) __builtin_clz(v) / 8; 204 # else 205 # error "missing endian definition" 206 # endif 207 #else 208 if (unlikely(ip[m_len] == m_pos[m_len])) { 209 do { 210 m_len += 1; 211 if (ip[m_len] != m_pos[m_len]) 212 break; 213 m_len += 1; 214 if (ip[m_len] != m_pos[m_len]) 215 break; 216 m_len += 1; 217 if (ip[m_len] != m_pos[m_len]) 218 break; 219 m_len += 1; 220 if (ip[m_len] != m_pos[m_len]) 221 break; 222 m_len += 1; 223 if (ip[m_len] != m_pos[m_len]) 224 break; 225 m_len += 1; 226 if (ip[m_len] != m_pos[m_len]) 227 break; 228 m_len += 1; 229 if (ip[m_len] != m_pos[m_len]) 230 break; 231 m_len += 1; 232 if (unlikely(ip + m_len >= ip_end)) 233 goto m_len_done; 234 } while (ip[m_len] == m_pos[m_len]); 235 } 236 #endif 237 } 238 m_len_done: 239 240 m_off = ip - m_pos; 241 ip += m_len; 242 if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { 243 m_off -= 1; 244 *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); 245 *op++ = (m_off >> 3); 246 } else if (m_off <= M3_MAX_OFFSET) { 247 m_off -= 1; 248 if (m_len <= M3_MAX_LEN) 249 *op++ = (M3_MARKER | (m_len - 2)); 250 else { 251 m_len -= M3_MAX_LEN; 252 *op++ = M3_MARKER | 0; 253 while (unlikely(m_len > 255)) { 254 m_len -= 255; 255 *op++ = 0; 256 } 257 *op++ = (m_len); 258 } 259 *op++ = (m_off << 2); 260 *op++ = (m_off >> 6); 261 } else { 262 m_off -= 0x4000; 263 if (m_len <= M4_MAX_LEN) 264 *op++ = (M4_MARKER | ((m_off >> 11) & 8) 265 | (m_len - 2)); 266 else { 267 m_len -= M4_MAX_LEN; 268 *op++ = (M4_MARKER | ((m_off >> 11) & 8)); 269 while (unlikely(m_len > 255)) { 270 m_len -= 255; 271 *op++ = 0; 272 } 273 *op++ = (m_len); 274 } 275 *op++ = (m_off << 2); 276 *op++ = (m_off >> 6); 277 } 278 *state_offset = -2; 279 finished_writing_instruction: 280 ii = ip; 281 goto next; 282 } 283 *out_len = op - out; 284 return in_end - (ii - ti); 285 } 286 287 int lzo1x_1_compress(const unsigned char *in, size_t in_len, 288 unsigned char *out, size_t *out_len, 289 void *wrkmem) 290 { 291 const unsigned char *ip = in; 292 unsigned char *op = out; 293 size_t l = in_len; 294 size_t t = 0; 295 signed char state_offset = -2; 296 297 // LZO v0 will never write 17 as first byte, 298 // so this is used to version the bitstream 299 *op++ = 17; 300 *op++ = LZO_VERSION; 301 302 while (l > 20) { 303 size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1); 304 uintptr_t ll_end = (uintptr_t) ip + ll; 305 if ((ll_end + ((t + ll) >> 5)) <= ll_end) 306 break; 307 BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); 308 memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); 309 t = lzo1x_1_do_compress(ip, ll, op, out_len, 310 t, wrkmem, &state_offset); 311 ip += ll; 312 op += *out_len; 313 l -= ll; 314 } 315 t += l; 316 317 if (t > 0) { 318 const unsigned char *ii = in + in_len - t; 319 320 if (op == out && t <= 238) { 321 *op++ = (17 + t); 322 } else if (t <= 3) { 323 op[state_offset] |= t; 324 } else if (t <= 18) { 325 *op++ = (t - 3); 326 } else { 327 size_t tt = t - 18; 328 *op++ = 0; 329 while (tt > 255) { 330 tt -= 255; 331 *op++ = 0; 332 } 333 *op++ = tt; 334 } 335 if (t >= 16) do { 336 COPY8(op, ii); 337 COPY8(op + 8, ii + 8); 338 op += 16; 339 ii += 16; 340 t -= 16; 341 } while (t >= 16); 342 if (t > 0) do { 343 *op++ = *ii++; 344 } while (--t > 0); 345 } 346 347 *op++ = M4_MARKER | 1; 348 *op++ = 0; 349 *op++ = 0; 350 351 *out_len = op - out; 352 return LZO_E_OK; 353 } 354 EXPORT_SYMBOL_GPL(lzo1x_1_compress); 355 356 MODULE_LICENSE("GPL"); 357 MODULE_DESCRIPTION("LZO1X-1 Compressor"); 358