xref: /openbmc/linux/lib/842/842_compress.c (revision 75bf465f0bc33e9b776a46d6a1b9b990f5fb7c37)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * 842 Software Compression
4   *
5   * Copyright (C) 2015 Dan Streetman, IBM Corp
6   *
7   * See 842.h for details of the 842 compressed format.
8   */
9  
10  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11  #define MODULE_NAME "842_compress"
12  
13  #include <linux/hashtable.h>
14  
15  #include "842.h"
16  #include "842_debugfs.h"
17  
18  #define SW842_HASHTABLE8_BITS	(10)
19  #define SW842_HASHTABLE4_BITS	(11)
20  #define SW842_HASHTABLE2_BITS	(10)
21  
22  /* By default, we allow compressing input buffers of any length, but we must
23   * use the non-standard "short data" template so the decompressor can correctly
24   * reproduce the uncompressed data buffer at the right length.  However the
25   * hardware 842 compressor will not recognize the "short data" template, and
26   * will fail to decompress any compressed buffer containing it (I have no idea
27   * why anyone would want to use software to compress and hardware to decompress
28   * but that's beside the point).  This parameter forces the compression
29   * function to simply reject any input buffer that isn't a multiple of 8 bytes
30   * long, instead of using the "short data" template, so that all compressed
31   * buffers produced by this function will be decompressable by the 842 hardware
32   * decompressor.  Unless you have a specific need for that, leave this disabled
33   * so that any length buffer can be compressed.
34   */
35  static bool sw842_strict;
36  module_param_named(strict, sw842_strict, bool, 0644);
37  
38  static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
39  	{ I8, N0, N0, N0, 0x19 }, /* 8 */
40  	{ I4, I4, N0, N0, 0x18 }, /* 18 */
41  	{ I4, I2, I2, N0, 0x17 }, /* 25 */
42  	{ I2, I2, I4, N0, 0x13 }, /* 25 */
43  	{ I2, I2, I2, I2, 0x12 }, /* 32 */
44  	{ I4, I2, D2, N0, 0x16 }, /* 33 */
45  	{ I4, D2, I2, N0, 0x15 }, /* 33 */
46  	{ I2, D2, I4, N0, 0x0e }, /* 33 */
47  	{ D2, I2, I4, N0, 0x09 }, /* 33 */
48  	{ I2, I2, I2, D2, 0x11 }, /* 40 */
49  	{ I2, I2, D2, I2, 0x10 }, /* 40 */
50  	{ I2, D2, I2, I2, 0x0d }, /* 40 */
51  	{ D2, I2, I2, I2, 0x08 }, /* 40 */
52  	{ I4, D4, N0, N0, 0x14 }, /* 41 */
53  	{ D4, I4, N0, N0, 0x04 }, /* 41 */
54  	{ I2, I2, D4, N0, 0x0f }, /* 48 */
55  	{ I2, D2, I2, D2, 0x0c }, /* 48 */
56  	{ I2, D4, I2, N0, 0x0b }, /* 48 */
57  	{ D2, I2, I2, D2, 0x07 }, /* 48 */
58  	{ D2, I2, D2, I2, 0x06 }, /* 48 */
59  	{ D4, I2, I2, N0, 0x03 }, /* 48 */
60  	{ I2, D2, D4, N0, 0x0a }, /* 56 */
61  	{ D2, I2, D4, N0, 0x05 }, /* 56 */
62  	{ D4, I2, D2, N0, 0x02 }, /* 56 */
63  	{ D4, D2, I2, N0, 0x01 }, /* 56 */
64  	{ D8, N0, N0, N0, 0x00 }, /* 64 */
65  };
66  
67  struct sw842_hlist_node8 {
68  	struct hlist_node node;
69  	u64 data;
70  	u8 index;
71  };
72  
73  struct sw842_hlist_node4 {
74  	struct hlist_node node;
75  	u32 data;
76  	u16 index;
77  };
78  
79  struct sw842_hlist_node2 {
80  	struct hlist_node node;
81  	u16 data;
82  	u8 index;
83  };
84  
85  #define INDEX_NOT_FOUND		(-1)
86  #define INDEX_NOT_CHECKED	(-2)
87  
88  struct sw842_param {
89  	u8 *in;
90  	u8 *instart;
91  	u64 ilen;
92  	u8 *out;
93  	u64 olen;
94  	u8 bit;
95  	u64 data8[1];
96  	u32 data4[2];
97  	u16 data2[4];
98  	int index8[1];
99  	int index4[2];
100  	int index2[4];
101  	DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
102  	DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
103  	DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
104  	struct sw842_hlist_node8 node8[1 << I8_BITS];
105  	struct sw842_hlist_node4 node4[1 << I4_BITS];
106  	struct sw842_hlist_node2 node2[1 << I2_BITS];
107  };
108  
109  #define get_input_data(p, o, b)						\
110  	be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
111  
112  #define init_hashtable_nodes(p, b)	do {			\
113  	int _i;							\
114  	hash_init((p)->htable##b);				\
115  	for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {	\
116  		(p)->node##b[_i].index = _i;			\
117  		(p)->node##b[_i].data = 0;			\
118  		INIT_HLIST_NODE(&(p)->node##b[_i].node);	\
119  	}							\
120  } while (0)
121  
122  #define find_index(p, b, n)	({					\
123  	struct sw842_hlist_node##b *_n;					\
124  	p->index##b[n] = INDEX_NOT_FOUND;				\
125  	hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) {	\
126  		if (p->data##b[n] == _n->data) {			\
127  			p->index##b[n] = _n->index;			\
128  			break;						\
129  		}							\
130  	}								\
131  	p->index##b[n] >= 0;						\
132  })
133  
134  #define check_index(p, b, n)			\
135  	((p)->index##b[n] == INDEX_NOT_CHECKED	\
136  	 ? find_index(p, b, n)			\
137  	 : (p)->index##b[n] >= 0)
138  
139  #define replace_hash(p, b, i, d)	do {				\
140  	struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];	\
141  	hash_del(&_n->node);						\
142  	_n->data = (p)->data##b[d];					\
143  	pr_debug("add hash index%x %x pos %x data %lx\n", b,		\
144  		 (unsigned int)_n->index,				\
145  		 (unsigned int)((p)->in - (p)->instart),		\
146  		 (unsigned long)_n->data);				\
147  	hash_add((p)->htable##b, &_n->node, _n->data);			\
148  } while (0)
149  
150  static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
151  
152  static int add_bits(struct sw842_param *p, u64 d, u8 n);
153  
__split_add_bits(struct sw842_param * p,u64 d,u8 n,u8 s)154  static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
155  {
156  	int ret;
157  
158  	if (n <= s)
159  		return -EINVAL;
160  
161  	ret = add_bits(p, d >> s, n - s);
162  	if (ret)
163  		return ret;
164  	return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
165  }
166  
add_bits(struct sw842_param * p,u64 d,u8 n)167  static int add_bits(struct sw842_param *p, u64 d, u8 n)
168  {
169  	int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
170  	u64 o;
171  	u8 *out = p->out;
172  
173  	pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
174  
175  	if (n > 64)
176  		return -EINVAL;
177  
178  	/* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
179  	 * or if we're at the end of the output buffer and would write past end
180  	 */
181  	if (bits > 64)
182  		return __split_add_bits(p, d, n, 32);
183  	else if (p->olen < 8 && bits > 32 && bits <= 56)
184  		return __split_add_bits(p, d, n, 16);
185  	else if (p->olen < 4 && bits > 16 && bits <= 24)
186  		return __split_add_bits(p, d, n, 8);
187  
188  	if (DIV_ROUND_UP(bits, 8) > p->olen)
189  		return -ENOSPC;
190  
191  	o = *out & bmask[b];
192  	d <<= s;
193  
194  	if (bits <= 8)
195  		*out = o | d;
196  	else if (bits <= 16)
197  		put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
198  	else if (bits <= 24)
199  		put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
200  	else if (bits <= 32)
201  		put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
202  	else if (bits <= 40)
203  		put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
204  	else if (bits <= 48)
205  		put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
206  	else if (bits <= 56)
207  		put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
208  	else
209  		put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
210  
211  	p->bit += n;
212  
213  	if (p->bit > 7) {
214  		p->out += p->bit / 8;
215  		p->olen -= p->bit / 8;
216  		p->bit %= 8;
217  	}
218  
219  	return 0;
220  }
221  
add_template(struct sw842_param * p,u8 c)222  static int add_template(struct sw842_param *p, u8 c)
223  {
224  	int ret, i, b = 0;
225  	u8 *t = comp_ops[c];
226  	bool inv = false;
227  
228  	if (c >= OPS_MAX)
229  		return -EINVAL;
230  
231  	pr_debug("template %x\n", t[4]);
232  
233  	ret = add_bits(p, t[4], OP_BITS);
234  	if (ret)
235  		return ret;
236  
237  	for (i = 0; i < 4; i++) {
238  		pr_debug("op %x\n", t[i]);
239  
240  		switch (t[i] & OP_AMOUNT) {
241  		case OP_AMOUNT_8:
242  			if (b)
243  				inv = true;
244  			else if (t[i] & OP_ACTION_INDEX)
245  				ret = add_bits(p, p->index8[0], I8_BITS);
246  			else if (t[i] & OP_ACTION_DATA)
247  				ret = add_bits(p, p->data8[0], 64);
248  			else
249  				inv = true;
250  			break;
251  		case OP_AMOUNT_4:
252  			if (b == 2 && t[i] & OP_ACTION_DATA)
253  				ret = add_bits(p, get_input_data(p, 2, 32), 32);
254  			else if (b != 0 && b != 4)
255  				inv = true;
256  			else if (t[i] & OP_ACTION_INDEX)
257  				ret = add_bits(p, p->index4[b >> 2], I4_BITS);
258  			else if (t[i] & OP_ACTION_DATA)
259  				ret = add_bits(p, p->data4[b >> 2], 32);
260  			else
261  				inv = true;
262  			break;
263  		case OP_AMOUNT_2:
264  			if (b != 0 && b != 2 && b != 4 && b != 6)
265  				inv = true;
266  			if (t[i] & OP_ACTION_INDEX)
267  				ret = add_bits(p, p->index2[b >> 1], I2_BITS);
268  			else if (t[i] & OP_ACTION_DATA)
269  				ret = add_bits(p, p->data2[b >> 1], 16);
270  			else
271  				inv = true;
272  			break;
273  		case OP_AMOUNT_0:
274  			inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
275  			break;
276  		default:
277  			inv = true;
278  			break;
279  		}
280  
281  		if (ret)
282  			return ret;
283  
284  		if (inv) {
285  			pr_err("Invalid templ %x op %d : %x %x %x %x\n",
286  			       c, i, t[0], t[1], t[2], t[3]);
287  			return -EINVAL;
288  		}
289  
290  		b += t[i] & OP_AMOUNT;
291  	}
292  
293  	if (b != 8) {
294  		pr_err("Invalid template %x len %x : %x %x %x %x\n",
295  		       c, b, t[0], t[1], t[2], t[3]);
296  		return -EINVAL;
297  	}
298  
299  	if (sw842_template_counts)
300  		atomic_inc(&template_count[t[4]]);
301  
302  	return 0;
303  }
304  
add_repeat_template(struct sw842_param * p,u8 r)305  static int add_repeat_template(struct sw842_param *p, u8 r)
306  {
307  	int ret;
308  
309  	/* repeat param is 0-based */
310  	if (!r || --r > REPEAT_BITS_MAX)
311  		return -EINVAL;
312  
313  	ret = add_bits(p, OP_REPEAT, OP_BITS);
314  	if (ret)
315  		return ret;
316  
317  	ret = add_bits(p, r, REPEAT_BITS);
318  	if (ret)
319  		return ret;
320  
321  	if (sw842_template_counts)
322  		atomic_inc(&template_repeat_count);
323  
324  	return 0;
325  }
326  
add_short_data_template(struct sw842_param * p,u8 b)327  static int add_short_data_template(struct sw842_param *p, u8 b)
328  {
329  	int ret, i;
330  
331  	if (!b || b > SHORT_DATA_BITS_MAX)
332  		return -EINVAL;
333  
334  	ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
335  	if (ret)
336  		return ret;
337  
338  	ret = add_bits(p, b, SHORT_DATA_BITS);
339  	if (ret)
340  		return ret;
341  
342  	for (i = 0; i < b; i++) {
343  		ret = add_bits(p, p->in[i], 8);
344  		if (ret)
345  			return ret;
346  	}
347  
348  	if (sw842_template_counts)
349  		atomic_inc(&template_short_data_count);
350  
351  	return 0;
352  }
353  
add_zeros_template(struct sw842_param * p)354  static int add_zeros_template(struct sw842_param *p)
355  {
356  	int ret = add_bits(p, OP_ZEROS, OP_BITS);
357  
358  	if (ret)
359  		return ret;
360  
361  	if (sw842_template_counts)
362  		atomic_inc(&template_zeros_count);
363  
364  	return 0;
365  }
366  
add_end_template(struct sw842_param * p)367  static int add_end_template(struct sw842_param *p)
368  {
369  	int ret = add_bits(p, OP_END, OP_BITS);
370  
371  	if (ret)
372  		return ret;
373  
374  	if (sw842_template_counts)
375  		atomic_inc(&template_end_count);
376  
377  	return 0;
378  }
379  
check_template(struct sw842_param * p,u8 c)380  static bool check_template(struct sw842_param *p, u8 c)
381  {
382  	u8 *t = comp_ops[c];
383  	int i, match, b = 0;
384  
385  	if (c >= OPS_MAX)
386  		return false;
387  
388  	for (i = 0; i < 4; i++) {
389  		if (t[i] & OP_ACTION_INDEX) {
390  			if (t[i] & OP_AMOUNT_2)
391  				match = check_index(p, 2, b >> 1);
392  			else if (t[i] & OP_AMOUNT_4)
393  				match = check_index(p, 4, b >> 2);
394  			else if (t[i] & OP_AMOUNT_8)
395  				match = check_index(p, 8, 0);
396  			else
397  				return false;
398  			if (!match)
399  				return false;
400  		}
401  
402  		b += t[i] & OP_AMOUNT;
403  	}
404  
405  	return true;
406  }
407  
get_next_data(struct sw842_param * p)408  static void get_next_data(struct sw842_param *p)
409  {
410  	p->data8[0] = get_input_data(p, 0, 64);
411  	p->data4[0] = get_input_data(p, 0, 32);
412  	p->data4[1] = get_input_data(p, 4, 32);
413  	p->data2[0] = get_input_data(p, 0, 16);
414  	p->data2[1] = get_input_data(p, 2, 16);
415  	p->data2[2] = get_input_data(p, 4, 16);
416  	p->data2[3] = get_input_data(p, 6, 16);
417  }
418  
419  /* update the hashtable entries.
420   * only call this after finding/adding the current template
421   * the dataN fields for the current 8 byte block must be already updated
422   */
update_hashtables(struct sw842_param * p)423  static void update_hashtables(struct sw842_param *p)
424  {
425  	u64 pos = p->in - p->instart;
426  	u64 n8 = (pos >> 3) % (1 << I8_BITS);
427  	u64 n4 = (pos >> 2) % (1 << I4_BITS);
428  	u64 n2 = (pos >> 1) % (1 << I2_BITS);
429  
430  	replace_hash(p, 8, n8, 0);
431  	replace_hash(p, 4, n4, 0);
432  	replace_hash(p, 4, n4, 1);
433  	replace_hash(p, 2, n2, 0);
434  	replace_hash(p, 2, n2, 1);
435  	replace_hash(p, 2, n2, 2);
436  	replace_hash(p, 2, n2, 3);
437  }
438  
439  /* find the next template to use, and add it
440   * the p->dataN fields must already be set for the current 8 byte block
441   */
process_next(struct sw842_param * p)442  static int process_next(struct sw842_param *p)
443  {
444  	int ret, i;
445  
446  	p->index8[0] = INDEX_NOT_CHECKED;
447  	p->index4[0] = INDEX_NOT_CHECKED;
448  	p->index4[1] = INDEX_NOT_CHECKED;
449  	p->index2[0] = INDEX_NOT_CHECKED;
450  	p->index2[1] = INDEX_NOT_CHECKED;
451  	p->index2[2] = INDEX_NOT_CHECKED;
452  	p->index2[3] = INDEX_NOT_CHECKED;
453  
454  	/* check up to OPS_MAX - 1; last op is our fallback */
455  	for (i = 0; i < OPS_MAX - 1; i++) {
456  		if (check_template(p, i))
457  			break;
458  	}
459  
460  	ret = add_template(p, i);
461  	if (ret)
462  		return ret;
463  
464  	return 0;
465  }
466  
467  /**
468   * sw842_compress
469   *
470   * Compress the uncompressed buffer of length @ilen at @in to the output buffer
471   * @out, using no more than @olen bytes, using the 842 compression format.
472   *
473   * Returns: 0 on success, error on failure.  The @olen parameter
474   * will contain the number of output bytes written on success, or
475   * 0 on error.
476   */
sw842_compress(const u8 * in,unsigned int ilen,u8 * out,unsigned int * olen,void * wmem)477  int sw842_compress(const u8 *in, unsigned int ilen,
478  		   u8 *out, unsigned int *olen, void *wmem)
479  {
480  	struct sw842_param *p = (struct sw842_param *)wmem;
481  	int ret;
482  	u64 last, next, pad, total;
483  	u8 repeat_count = 0;
484  	u32 crc;
485  
486  	BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
487  
488  	init_hashtable_nodes(p, 8);
489  	init_hashtable_nodes(p, 4);
490  	init_hashtable_nodes(p, 2);
491  
492  	p->in = (u8 *)in;
493  	p->instart = p->in;
494  	p->ilen = ilen;
495  	p->out = out;
496  	p->olen = *olen;
497  	p->bit = 0;
498  
499  	total = p->olen;
500  
501  	*olen = 0;
502  
503  	/* if using strict mode, we can only compress a multiple of 8 */
504  	if (sw842_strict && (ilen % 8)) {
505  		pr_err("Using strict mode, can't compress len %d\n", ilen);
506  		return -EINVAL;
507  	}
508  
509  	/* let's compress at least 8 bytes, mkay? */
510  	if (unlikely(ilen < 8))
511  		goto skip_comp;
512  
513  	/* make initial 'last' different so we don't match the first time */
514  	last = ~get_unaligned((u64 *)p->in);
515  
516  	while (p->ilen > 7) {
517  		next = get_unaligned((u64 *)p->in);
518  
519  		/* must get the next data, as we need to update the hashtable
520  		 * entries with the new data every time
521  		 */
522  		get_next_data(p);
523  
524  		/* we don't care about endianness in last or next;
525  		 * we're just comparing 8 bytes to another 8 bytes,
526  		 * they're both the same endianness
527  		 */
528  		if (next == last) {
529  			/* repeat count bits are 0-based, so we stop at +1 */
530  			if (++repeat_count <= REPEAT_BITS_MAX)
531  				goto repeat;
532  		}
533  		if (repeat_count) {
534  			ret = add_repeat_template(p, repeat_count);
535  			repeat_count = 0;
536  			if (next == last) /* reached max repeat bits */
537  				goto repeat;
538  		}
539  
540  		if (next == 0)
541  			ret = add_zeros_template(p);
542  		else
543  			ret = process_next(p);
544  
545  		if (ret)
546  			return ret;
547  
548  repeat:
549  		last = next;
550  		update_hashtables(p);
551  		p->in += 8;
552  		p->ilen -= 8;
553  	}
554  
555  	if (repeat_count) {
556  		ret = add_repeat_template(p, repeat_count);
557  		if (ret)
558  			return ret;
559  	}
560  
561  skip_comp:
562  	if (p->ilen > 0) {
563  		ret = add_short_data_template(p, p->ilen);
564  		if (ret)
565  			return ret;
566  
567  		p->in += p->ilen;
568  		p->ilen = 0;
569  	}
570  
571  	ret = add_end_template(p);
572  	if (ret)
573  		return ret;
574  
575  	/*
576  	 * crc(0:31) is appended to target data starting with the next
577  	 * bit after End of stream template.
578  	 * nx842 calculates CRC for data in big-endian format. So doing
579  	 * same here so that sw842 decompression can be used for both
580  	 * compressed data.
581  	 */
582  	crc = crc32_be(0, in, ilen);
583  	ret = add_bits(p, crc, CRC_BITS);
584  	if (ret)
585  		return ret;
586  
587  	if (p->bit) {
588  		p->out++;
589  		p->olen--;
590  		p->bit = 0;
591  	}
592  
593  	/* pad compressed length to multiple of 8 */
594  	pad = (8 - ((total - p->olen) % 8)) % 8;
595  	if (pad) {
596  		if (pad > p->olen) /* we were so close! */
597  			return -ENOSPC;
598  		memset(p->out, 0, pad);
599  		p->out += pad;
600  		p->olen -= pad;
601  	}
602  
603  	if (unlikely((total - p->olen) > UINT_MAX))
604  		return -ENOSPC;
605  
606  	*olen = total - p->olen;
607  
608  	return 0;
609  }
610  EXPORT_SYMBOL_GPL(sw842_compress);
611  
sw842_init(void)612  static int __init sw842_init(void)
613  {
614  	if (sw842_template_counts)
615  		sw842_debugfs_create();
616  
617  	return 0;
618  }
619  module_init(sw842_init);
620  
sw842_exit(void)621  static void __exit sw842_exit(void)
622  {
623  	if (sw842_template_counts)
624  		sw842_debugfs_remove();
625  }
626  module_exit(sw842_exit);
627  
628  MODULE_LICENSE("GPL");
629  MODULE_DESCRIPTION("Software 842 Compressor");
630  MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
631