xref: /openbmc/linux/lib/lz4/lz4hc_compress.c (revision bc5aa3a0)
1 /*
2  * LZ4 HC - High Compression Mode of LZ4
3  * Copyright (C) 2011-2012, Yann Collet.
4  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are
8  * met:
9  *
10  *     * Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *     * Redistributions in binary form must reproduce the above
13  * copyright notice, this list of conditions and the following disclaimer
14  * in the documentation and/or other materials provided with the
15  * distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * You can contact the author at :
30  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31  * - LZ4 source repository : http://code.google.com/p/lz4/
32  *
33  *  Changed for kernel use by:
34  *  Chanho Min <chanho.min@lge.com>
35  */
36 
37 #include <linux/module.h>
38 #include <linux/kernel.h>
39 #include <linux/lz4.h>
40 #include <asm/unaligned.h>
41 #include "lz4defs.h"
42 
43 struct lz4hc_data {
44 	const u8 *base;
45 	HTYPE hashtable[HASHTABLESIZE];
46 	u16 chaintable[MAXD];
47 	const u8 *nexttoupdate;
48 } __attribute__((__packed__));
49 
50 static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
51 {
52 	memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
53 	memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
54 
55 #if LZ4_ARCH64
56 	hc4->nexttoupdate = base + 1;
57 #else
58 	hc4->nexttoupdate = base;
59 #endif
60 	hc4->base = base;
61 	return 1;
62 }
63 
64 /* Update chains up to ip (excluded) */
65 static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
66 {
67 	u16 *chaintable = hc4->chaintable;
68 	HTYPE *hashtable  = hc4->hashtable;
69 #if LZ4_ARCH64
70 	const BYTE * const base = hc4->base;
71 #else
72 	const int base = 0;
73 #endif
74 
75 	while (hc4->nexttoupdate < ip) {
76 		const u8 *p = hc4->nexttoupdate;
77 		size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
78 		if (delta > MAX_DISTANCE)
79 			delta = MAX_DISTANCE;
80 		chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
81 		hashtable[HASH_VALUE(p)] = (p) - base;
82 		hc4->nexttoupdate++;
83 	}
84 }
85 
86 static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
87 		const u8 *const matchlimit)
88 {
89 	const u8 *p1t = p1;
90 
91 	while (p1t < matchlimit - (STEPSIZE - 1)) {
92 #if LZ4_ARCH64
93 		u64 diff = A64(p2) ^ A64(p1t);
94 #else
95 		u32 diff = A32(p2) ^ A32(p1t);
96 #endif
97 		if (!diff) {
98 			p1t += STEPSIZE;
99 			p2 += STEPSIZE;
100 			continue;
101 		}
102 		p1t += LZ4_NBCOMMONBYTES(diff);
103 		return p1t - p1;
104 	}
105 #if LZ4_ARCH64
106 	if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
107 		p1t += 4;
108 		p2 += 4;
109 	}
110 #endif
111 
112 	if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
113 		p1t += 2;
114 		p2 += 2;
115 	}
116 	if ((p1t < matchlimit) && (*p2 == *p1t))
117 		p1t++;
118 	return p1t - p1;
119 }
120 
121 static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
122 		const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
123 {
124 	u16 *const chaintable = hc4->chaintable;
125 	HTYPE *const hashtable = hc4->hashtable;
126 	const u8 *ref;
127 #if LZ4_ARCH64
128 	const BYTE * const base = hc4->base;
129 #else
130 	const int base = 0;
131 #endif
132 	int nbattempts = MAX_NB_ATTEMPTS;
133 	size_t repl = 0, ml = 0;
134 	u16 delta;
135 
136 	/* HC4 match finder */
137 	lz4hc_insert(hc4, ip);
138 	ref = hashtable[HASH_VALUE(ip)] + base;
139 
140 	/* potential repetition */
141 	if (ref >= ip-4) {
142 		/* confirmed */
143 		if (A32(ref) == A32(ip)) {
144 			delta = (u16)(ip-ref);
145 			repl = ml  = lz4hc_commonlength(ip + MINMATCH,
146 					ref + MINMATCH, matchlimit) + MINMATCH;
147 			*matchpos = ref;
148 		}
149 		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
150 	}
151 
152 	while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
153 		nbattempts--;
154 		if (*(ref + ml) == *(ip + ml)) {
155 			if (A32(ref) == A32(ip)) {
156 				size_t mlt =
157 					lz4hc_commonlength(ip + MINMATCH,
158 					ref + MINMATCH, matchlimit) + MINMATCH;
159 				if (mlt > ml) {
160 					ml = mlt;
161 					*matchpos = ref;
162 				}
163 			}
164 		}
165 		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
166 	}
167 
168 	/* Complete table */
169 	if (repl) {
170 		const BYTE *ptr = ip;
171 		const BYTE *end;
172 		end = ip + repl - (MINMATCH-1);
173 		/* Pre-Load */
174 		while (ptr < end - delta) {
175 			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
176 			ptr++;
177 		}
178 		do {
179 			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
180 			/* Head of chain */
181 			hashtable[HASH_VALUE(ptr)] = (ptr) - base;
182 			ptr++;
183 		} while (ptr < end);
184 		hc4->nexttoupdate = end;
185 	}
186 
187 	return (int)ml;
188 }
189 
190 static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
191 	const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
192 	const u8 **matchpos, const u8 **startpos)
193 {
194 	u16 *const chaintable = hc4->chaintable;
195 	HTYPE *const hashtable = hc4->hashtable;
196 #if LZ4_ARCH64
197 	const BYTE * const base = hc4->base;
198 #else
199 	const int base = 0;
200 #endif
201 	const u8 *ref;
202 	int nbattempts = MAX_NB_ATTEMPTS;
203 	int delta = (int)(ip - startlimit);
204 
205 	/* First Match */
206 	lz4hc_insert(hc4, ip);
207 	ref = hashtable[HASH_VALUE(ip)] + base;
208 
209 	while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
210 		&& (nbattempts)) {
211 		nbattempts--;
212 		if (*(startlimit + longest) == *(ref - delta + longest)) {
213 			if (A32(ref) == A32(ip)) {
214 				const u8 *reft = ref + MINMATCH;
215 				const u8 *ipt = ip + MINMATCH;
216 				const u8 *startt = ip;
217 
218 				while (ipt < matchlimit-(STEPSIZE - 1)) {
219 					#if LZ4_ARCH64
220 					u64 diff = A64(reft) ^ A64(ipt);
221 					#else
222 					u32 diff = A32(reft) ^ A32(ipt);
223 					#endif
224 
225 					if (!diff) {
226 						ipt += STEPSIZE;
227 						reft += STEPSIZE;
228 						continue;
229 					}
230 					ipt += LZ4_NBCOMMONBYTES(diff);
231 					goto _endcount;
232 				}
233 				#if LZ4_ARCH64
234 				if ((ipt < (matchlimit - 3))
235 					&& (A32(reft) == A32(ipt))) {
236 					ipt += 4;
237 					reft += 4;
238 				}
239 				ipt += 2;
240 				#endif
241 				if ((ipt < (matchlimit - 1))
242 					&& (A16(reft) == A16(ipt))) {
243 					reft += 2;
244 				}
245 				if ((ipt < matchlimit) && (*reft == *ipt))
246 					ipt++;
247 _endcount:
248 				reft = ref;
249 
250 				while ((startt > startlimit)
251 					&& (reft > hc4->base)
252 					&& (startt[-1] == reft[-1])) {
253 					startt--;
254 					reft--;
255 				}
256 
257 				if ((ipt - startt) > longest) {
258 					longest = (int)(ipt - startt);
259 					*matchpos = reft;
260 					*startpos = startt;
261 				}
262 			}
263 		}
264 		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
265 	}
266 	return longest;
267 }
268 
269 static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
270 		int ml, const u8 *ref)
271 {
272 	int length, len;
273 	u8 *token;
274 
275 	/* Encode Literal length */
276 	length = (int)(*ip - *anchor);
277 	token = (*op)++;
278 	if (length >= (int)RUN_MASK) {
279 		*token = (RUN_MASK << ML_BITS);
280 		len = length - RUN_MASK;
281 		for (; len > 254 ; len -= 255)
282 			*(*op)++ = 255;
283 		*(*op)++ = (u8)len;
284 	} else
285 		*token = (length << ML_BITS);
286 
287 	/* Copy Literals */
288 	LZ4_BLINDCOPY(*anchor, *op, length);
289 
290 	/* Encode Offset */
291 	LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
292 
293 	/* Encode MatchLength */
294 	len = (int)(ml - MINMATCH);
295 	if (len >= (int)ML_MASK) {
296 		*token += ML_MASK;
297 		len -= ML_MASK;
298 		for (; len > 509 ; len -= 510) {
299 			*(*op)++ = 255;
300 			*(*op)++ = 255;
301 		}
302 		if (len > 254) {
303 			len -= 255;
304 			*(*op)++ = 255;
305 		}
306 		*(*op)++ = (u8)len;
307 	} else
308 		*token += len;
309 
310 	/* Prepare next loop */
311 	*ip += ml;
312 	*anchor = *ip;
313 
314 	return 0;
315 }
316 
317 static int lz4_compresshcctx(struct lz4hc_data *ctx,
318 		const char *source,
319 		char *dest,
320 		int isize)
321 {
322 	const u8 *ip = (const u8 *)source;
323 	const u8 *anchor = ip;
324 	const u8 *const iend = ip + isize;
325 	const u8 *const mflimit = iend - MFLIMIT;
326 	const u8 *const matchlimit = (iend - LASTLITERALS);
327 
328 	u8 *op = (u8 *)dest;
329 
330 	int ml, ml2, ml3, ml0;
331 	const u8 *ref = NULL;
332 	const u8 *start2 = NULL;
333 	const u8 *ref2 = NULL;
334 	const u8 *start3 = NULL;
335 	const u8 *ref3 = NULL;
336 	const u8 *start0;
337 	const u8 *ref0;
338 	int lastrun;
339 
340 	ip++;
341 
342 	/* Main Loop */
343 	while (ip < mflimit) {
344 		ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
345 		if (!ml) {
346 			ip++;
347 			continue;
348 		}
349 
350 		/* saved, in case we would skip too much */
351 		start0 = ip;
352 		ref0 = ref;
353 		ml0 = ml;
354 _search2:
355 		if (ip+ml < mflimit)
356 			ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
357 				ip + 1, matchlimit, ml, &ref2, &start2);
358 		else
359 			ml2 = ml;
360 		/* No better match */
361 		if (ml2 == ml) {
362 			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
363 			continue;
364 		}
365 
366 		if (start0 < ip) {
367 			/* empirical */
368 			if (start2 < ip + ml0) {
369 				ip = start0;
370 				ref = ref0;
371 				ml = ml0;
372 			}
373 		}
374 		/*
375 		 * Here, start0==ip
376 		 * First Match too small : removed
377 		 */
378 		if ((start2 - ip) < 3) {
379 			ml = ml2;
380 			ip = start2;
381 			ref = ref2;
382 			goto _search2;
383 		}
384 
385 _search3:
386 		/*
387 		 * Currently we have :
388 		 * ml2 > ml1, and
389 		 * ip1+3 <= ip2 (usually < ip1+ml1)
390 		 */
391 		if ((start2 - ip) < OPTIMAL_ML) {
392 			int correction;
393 			int new_ml = ml;
394 			if (new_ml > OPTIMAL_ML)
395 				new_ml = OPTIMAL_ML;
396 			if (ip + new_ml > start2 + ml2 - MINMATCH)
397 				new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
398 			correction = new_ml - (int)(start2 - ip);
399 			if (correction > 0) {
400 				start2 += correction;
401 				ref2 += correction;
402 				ml2 -= correction;
403 			}
404 		}
405 		/*
406 		 * Now, we have start2 = ip+new_ml,
407 		 * with new_ml=min(ml, OPTIMAL_ML=18)
408 		 */
409 		if (start2 + ml2 < mflimit)
410 			ml3 = lz4hc_insertandgetwidermatch(ctx,
411 				start2 + ml2 - 3, start2, matchlimit,
412 				ml2, &ref3, &start3);
413 		else
414 			ml3 = ml2;
415 
416 		/* No better match : 2 sequences to encode */
417 		if (ml3 == ml2) {
418 			/* ip & ref are known; Now for ml */
419 			if (start2 < ip+ml)
420 				ml = (int)(start2 - ip);
421 
422 			/* Now, encode 2 sequences */
423 			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
424 			ip = start2;
425 			lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
426 			continue;
427 		}
428 
429 		/* Not enough space for match 2 : remove it */
430 		if (start3 < ip + ml + 3) {
431 			/*
432 			 * can write Seq1 immediately ==> Seq2 is removed,
433 			 * so Seq3 becomes Seq1
434 			 */
435 			if (start3 >= (ip + ml)) {
436 				if (start2 < ip + ml) {
437 					int correction =
438 						(int)(ip + ml - start2);
439 					start2 += correction;
440 					ref2 += correction;
441 					ml2 -= correction;
442 					if (ml2 < MINMATCH) {
443 						start2 = start3;
444 						ref2 = ref3;
445 						ml2 = ml3;
446 					}
447 				}
448 
449 				lz4_encodesequence(&ip, &op, &anchor, ml, ref);
450 				ip  = start3;
451 				ref = ref3;
452 				ml  = ml3;
453 
454 				start0 = start2;
455 				ref0 = ref2;
456 				ml0 = ml2;
457 				goto _search2;
458 			}
459 
460 			start2 = start3;
461 			ref2 = ref3;
462 			ml2 = ml3;
463 			goto _search3;
464 		}
465 
466 		/*
467 		 * OK, now we have 3 ascending matches; let's write at least
468 		 * the first one ip & ref are known; Now for ml
469 		 */
470 		if (start2 < ip + ml) {
471 			if ((start2 - ip) < (int)ML_MASK) {
472 				int correction;
473 				if (ml > OPTIMAL_ML)
474 					ml = OPTIMAL_ML;
475 				if (ip + ml > start2 + ml2 - MINMATCH)
476 					ml = (int)(start2 - ip) + ml2
477 						- MINMATCH;
478 				correction = ml - (int)(start2 - ip);
479 				if (correction > 0) {
480 					start2 += correction;
481 					ref2 += correction;
482 					ml2 -= correction;
483 				}
484 			} else
485 				ml = (int)(start2 - ip);
486 		}
487 		lz4_encodesequence(&ip, &op, &anchor, ml, ref);
488 
489 		ip = start2;
490 		ref = ref2;
491 		ml = ml2;
492 
493 		start2 = start3;
494 		ref2 = ref3;
495 		ml2 = ml3;
496 
497 		goto _search3;
498 	}
499 
500 	/* Encode Last Literals */
501 	lastrun = (int)(iend - anchor);
502 	if (lastrun >= (int)RUN_MASK) {
503 		*op++ = (RUN_MASK << ML_BITS);
504 		lastrun -= RUN_MASK;
505 		for (; lastrun > 254 ; lastrun -= 255)
506 			*op++ = 255;
507 		*op++ = (u8) lastrun;
508 	} else
509 		*op++ = (lastrun << ML_BITS);
510 	memcpy(op, anchor, iend - anchor);
511 	op += iend - anchor;
512 	/* End */
513 	return (int) (((char *)op) - dest);
514 }
515 
516 int lz4hc_compress(const unsigned char *src, size_t src_len,
517 			unsigned char *dst, size_t *dst_len, void *wrkmem)
518 {
519 	int ret = -1;
520 	int out_len = 0;
521 
522 	struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
523 	lz4hc_init(hc4, (const u8 *)src);
524 	out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
525 		(char *)dst, (int)src_len);
526 
527 	if (out_len < 0)
528 		goto exit;
529 
530 	*dst_len = out_len;
531 	return 0;
532 
533 exit:
534 	return ret;
535 }
536 EXPORT_SYMBOL(lz4hc_compress);
537 
538 MODULE_LICENSE("Dual BSD/GPL");
539 MODULE_DESCRIPTION("LZ4HC compressor");
540