xref: /openbmc/linux/lib/bch.c (revision 3c881e05c814c970e4f9577446a9d3461d134607)
1  /*
2   * Generic binary BCH encoding/decoding library
3   *
4   * This program is free software; you can redistribute it and/or modify it
5   * under the terms of the GNU General Public License version 2 as published by
6   * the Free Software Foundation.
7   *
8   * This program is distributed in the hope that it will be useful, but WITHOUT
9   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11   * more details.
12   *
13   * You should have received a copy of the GNU General Public License along with
14   * this program; if not, write to the Free Software Foundation, Inc., 51
15   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
16   *
17   * Copyright © 2011 Parrot S.A.
18   *
19   * Author: Ivan Djelic <ivan.djelic@parrot.com>
20   *
21   * Description:
22   *
23   * This library provides runtime configurable encoding/decoding of binary
24   * Bose-Chaudhuri-Hocquenghem (BCH) codes.
25   *
26   * Call bch_init to get a pointer to a newly allocated bch_control structure for
27   * the given m (Galois field order), t (error correction capability) and
28   * (optional) primitive polynomial parameters.
29   *
30   * Call bch_encode to compute and store ecc parity bytes to a given buffer.
31   * Call bch_decode to detect and locate errors in received data.
32   *
33   * On systems supporting hw BCH features, intermediate results may be provided
34   * to bch_decode in order to skip certain steps. See bch_decode() documentation
35   * for details.
36   *
37   * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
38   * parameters m and t; thus allowing extra compiler optimizations and providing
39   * better (up to 2x) encoding performance. Using this option makes sense when
40   * (m,t) are fixed and known in advance, e.g. when using BCH error correction
41   * on a particular NAND flash device.
42   *
43   * Algorithmic details:
44   *
45   * Encoding is performed by processing 32 input bits in parallel, using 4
46   * remainder lookup tables.
47   *
48   * The final stage of decoding involves the following internal steps:
49   * a. Syndrome computation
50   * b. Error locator polynomial computation using Berlekamp-Massey algorithm
51   * c. Error locator root finding (by far the most expensive step)
52   *
53   * In this implementation, step c is not performed using the usual Chien search.
54   * Instead, an alternative approach described in [1] is used. It consists in
55   * factoring the error locator polynomial using the Berlekamp Trace algorithm
56   * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
57   * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
58   * much better performance than Chien search for usual (m,t) values (typically
59   * m >= 13, t < 32, see [1]).
60   *
61   * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
62   * of characteristic 2, in: Western European Workshop on Research in Cryptology
63   * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
64   * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
65   * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
66   */
67  
68  #include <linux/kernel.h>
69  #include <linux/errno.h>
70  #include <linux/init.h>
71  #include <linux/module.h>
72  #include <linux/slab.h>
73  #include <linux/bitops.h>
74  #include <asm/byteorder.h>
75  #include <linux/bch.h>
76  
77  #if defined(CONFIG_BCH_CONST_PARAMS)
78  #define GF_M(_p)               (CONFIG_BCH_CONST_M)
79  #define GF_T(_p)               (CONFIG_BCH_CONST_T)
80  #define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
81  #define BCH_MAX_M              (CONFIG_BCH_CONST_M)
82  #define BCH_MAX_T	       (CONFIG_BCH_CONST_T)
83  #else
84  #define GF_M(_p)               ((_p)->m)
85  #define GF_T(_p)               ((_p)->t)
86  #define GF_N(_p)               ((_p)->n)
87  #define BCH_MAX_M              15 /* 2KB */
88  #define BCH_MAX_T              64 /* 64 bit correction */
89  #endif
90  
91  #define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
92  #define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
93  
94  #define BCH_ECC_MAX_WORDS      DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
95  
96  #ifndef dbg
97  #define dbg(_fmt, args...)     do {} while (0)
98  #endif
99  
100  /*
101   * represent a polynomial over GF(2^m)
102   */
103  struct gf_poly {
104  	unsigned int deg;    /* polynomial degree */
105  	unsigned int c[];   /* polynomial terms */
106  };
107  
108  /* given its degree, compute a polynomial size in bytes */
109  #define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
110  
111  /* polynomial of degree 1 */
112  struct gf_poly_deg1 {
113  	struct gf_poly poly;
114  	unsigned int   c[2];
115  };
116  
117  static u8 swap_bits_table[] = {
118  	0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
119  	0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
120  	0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
121  	0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
122  	0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
123  	0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
124  	0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
125  	0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
126  	0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
127  	0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
128  	0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
129  	0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
130  	0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
131  	0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
132  	0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
133  	0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
134  	0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
135  	0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
136  	0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
137  	0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
138  	0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
139  	0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
140  	0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
141  	0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
142  	0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
143  	0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
144  	0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
145  	0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
146  	0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
147  	0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
148  	0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
149  	0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
150  };
151  
152  static u8 swap_bits(struct bch_control *bch, u8 in)
153  {
154  	if (!bch->swap_bits)
155  		return in;
156  
157  	return swap_bits_table[in];
158  }
159  
160  /*
161   * same as bch_encode(), but process input data one byte at a time
162   */
163  static void bch_encode_unaligned(struct bch_control *bch,
164  				 const unsigned char *data, unsigned int len,
165  				 uint32_t *ecc)
166  {
167  	int i;
168  	const uint32_t *p;
169  	const int l = BCH_ECC_WORDS(bch)-1;
170  
171  	while (len--) {
172  		u8 tmp = swap_bits(bch, *data++);
173  
174  		p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
175  
176  		for (i = 0; i < l; i++)
177  			ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
178  
179  		ecc[l] = (ecc[l] << 8)^(*p);
180  	}
181  }
182  
183  /*
184   * convert ecc bytes to aligned, zero-padded 32-bit ecc words
185   */
186  static void load_ecc8(struct bch_control *bch, uint32_t *dst,
187  		      const uint8_t *src)
188  {
189  	uint8_t pad[4] = {0, 0, 0, 0};
190  	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
191  
192  	for (i = 0; i < nwords; i++, src += 4)
193  		dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
194  			((u32)swap_bits(bch, src[1]) << 16) |
195  			((u32)swap_bits(bch, src[2]) << 8) |
196  			swap_bits(bch, src[3]);
197  
198  	memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
199  	dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
200  		((u32)swap_bits(bch, pad[1]) << 16) |
201  		((u32)swap_bits(bch, pad[2]) << 8) |
202  		swap_bits(bch, pad[3]);
203  }
204  
205  /*
206   * convert 32-bit ecc words to ecc bytes
207   */
208  static void store_ecc8(struct bch_control *bch, uint8_t *dst,
209  		       const uint32_t *src)
210  {
211  	uint8_t pad[4];
212  	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
213  
214  	for (i = 0; i < nwords; i++) {
215  		*dst++ = swap_bits(bch, src[i] >> 24);
216  		*dst++ = swap_bits(bch, src[i] >> 16);
217  		*dst++ = swap_bits(bch, src[i] >> 8);
218  		*dst++ = swap_bits(bch, src[i]);
219  	}
220  	pad[0] = swap_bits(bch, src[nwords] >> 24);
221  	pad[1] = swap_bits(bch, src[nwords] >> 16);
222  	pad[2] = swap_bits(bch, src[nwords] >> 8);
223  	pad[3] = swap_bits(bch, src[nwords]);
224  	memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
225  }
226  
227  /**
228   * bch_encode - calculate BCH ecc parity of data
229   * @bch:   BCH control structure
230   * @data:  data to encode
231   * @len:   data length in bytes
232   * @ecc:   ecc parity data, must be initialized by caller
233   *
234   * The @ecc parity array is used both as input and output parameter, in order to
235   * allow incremental computations. It should be of the size indicated by member
236   * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
237   *
238   * The exact number of computed ecc parity bits is given by member @ecc_bits of
239   * @bch; it may be less than m*t for large values of t.
240   */
241  void bch_encode(struct bch_control *bch, const uint8_t *data,
242  		unsigned int len, uint8_t *ecc)
243  {
244  	const unsigned int l = BCH_ECC_WORDS(bch)-1;
245  	unsigned int i, mlen;
246  	unsigned long m;
247  	uint32_t w, r[BCH_ECC_MAX_WORDS];
248  	const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
249  	const uint32_t * const tab0 = bch->mod8_tab;
250  	const uint32_t * const tab1 = tab0 + 256*(l+1);
251  	const uint32_t * const tab2 = tab1 + 256*(l+1);
252  	const uint32_t * const tab3 = tab2 + 256*(l+1);
253  	const uint32_t *pdata, *p0, *p1, *p2, *p3;
254  
255  	if (WARN_ON(r_bytes > sizeof(r)))
256  		return;
257  
258  	if (ecc) {
259  		/* load ecc parity bytes into internal 32-bit buffer */
260  		load_ecc8(bch, bch->ecc_buf, ecc);
261  	} else {
262  		memset(bch->ecc_buf, 0, r_bytes);
263  	}
264  
265  	/* process first unaligned data bytes */
266  	m = ((unsigned long)data) & 3;
267  	if (m) {
268  		mlen = (len < (4-m)) ? len : 4-m;
269  		bch_encode_unaligned(bch, data, mlen, bch->ecc_buf);
270  		data += mlen;
271  		len  -= mlen;
272  	}
273  
274  	/* process 32-bit aligned data words */
275  	pdata = (uint32_t *)data;
276  	mlen  = len/4;
277  	data += 4*mlen;
278  	len  -= 4*mlen;
279  	memcpy(r, bch->ecc_buf, r_bytes);
280  
281  	/*
282  	 * split each 32-bit word into 4 polynomials of weight 8 as follows:
283  	 *
284  	 * 31 ...24  23 ...16  15 ... 8  7 ... 0
285  	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
286  	 *                               tttttttt  mod g = r0 (precomputed)
287  	 *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
288  	 *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
289  	 * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
290  	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
291  	 */
292  	while (mlen--) {
293  		/* input data is read in big-endian format */
294  		w = cpu_to_be32(*pdata++);
295  		if (bch->swap_bits)
296  			w = (u32)swap_bits(bch, w) |
297  			    ((u32)swap_bits(bch, w >> 8) << 8) |
298  			    ((u32)swap_bits(bch, w >> 16) << 16) |
299  			    ((u32)swap_bits(bch, w >> 24) << 24);
300  		w ^= r[0];
301  		p0 = tab0 + (l+1)*((w >>  0) & 0xff);
302  		p1 = tab1 + (l+1)*((w >>  8) & 0xff);
303  		p2 = tab2 + (l+1)*((w >> 16) & 0xff);
304  		p3 = tab3 + (l+1)*((w >> 24) & 0xff);
305  
306  		for (i = 0; i < l; i++)
307  			r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
308  
309  		r[l] = p0[l]^p1[l]^p2[l]^p3[l];
310  	}
311  	memcpy(bch->ecc_buf, r, r_bytes);
312  
313  	/* process last unaligned bytes */
314  	if (len)
315  		bch_encode_unaligned(bch, data, len, bch->ecc_buf);
316  
317  	/* store ecc parity bytes into original parity buffer */
318  	if (ecc)
319  		store_ecc8(bch, ecc, bch->ecc_buf);
320  }
321  EXPORT_SYMBOL_GPL(bch_encode);
322  
323  static inline int modulo(struct bch_control *bch, unsigned int v)
324  {
325  	const unsigned int n = GF_N(bch);
326  	while (v >= n) {
327  		v -= n;
328  		v = (v & n) + (v >> GF_M(bch));
329  	}
330  	return v;
331  }
332  
333  /*
334   * shorter and faster modulo function, only works when v < 2N.
335   */
336  static inline int mod_s(struct bch_control *bch, unsigned int v)
337  {
338  	const unsigned int n = GF_N(bch);
339  	return (v < n) ? v : v-n;
340  }
341  
342  static inline int deg(unsigned int poly)
343  {
344  	/* polynomial degree is the most-significant bit index */
345  	return fls(poly)-1;
346  }
347  
348  static inline int parity(unsigned int x)
349  {
350  	/*
351  	 * public domain code snippet, lifted from
352  	 * http://www-graphics.stanford.edu/~seander/bithacks.html
353  	 */
354  	x ^= x >> 1;
355  	x ^= x >> 2;
356  	x = (x & 0x11111111U) * 0x11111111U;
357  	return (x >> 28) & 1;
358  }
359  
360  /* Galois field basic operations: multiply, divide, inverse, etc. */
361  
362  static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
363  				  unsigned int b)
364  {
365  	return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
366  					       bch->a_log_tab[b])] : 0;
367  }
368  
369  static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
370  {
371  	return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
372  }
373  
374  static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
375  				  unsigned int b)
376  {
377  	return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
378  					GF_N(bch)-bch->a_log_tab[b])] : 0;
379  }
380  
381  static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
382  {
383  	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
384  }
385  
386  static inline unsigned int a_pow(struct bch_control *bch, int i)
387  {
388  	return bch->a_pow_tab[modulo(bch, i)];
389  }
390  
391  static inline int a_log(struct bch_control *bch, unsigned int x)
392  {
393  	return bch->a_log_tab[x];
394  }
395  
396  static inline int a_ilog(struct bch_control *bch, unsigned int x)
397  {
398  	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
399  }
400  
401  /*
402   * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
403   */
404  static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
405  			      unsigned int *syn)
406  {
407  	int i, j, s;
408  	unsigned int m;
409  	uint32_t poly;
410  	const int t = GF_T(bch);
411  
412  	s = bch->ecc_bits;
413  
414  	/* make sure extra bits in last ecc word are cleared */
415  	m = ((unsigned int)s) & 31;
416  	if (m)
417  		ecc[s/32] &= ~((1u << (32-m))-1);
418  	memset(syn, 0, 2*t*sizeof(*syn));
419  
420  	/* compute v(a^j) for j=1 .. 2t-1 */
421  	do {
422  		poly = *ecc++;
423  		s -= 32;
424  		while (poly) {
425  			i = deg(poly);
426  			for (j = 0; j < 2*t; j += 2)
427  				syn[j] ^= a_pow(bch, (j+1)*(i+s));
428  
429  			poly ^= (1 << i);
430  		}
431  	} while (s > 0);
432  
433  	/* v(a^(2j)) = v(a^j)^2 */
434  	for (j = 0; j < t; j++)
435  		syn[2*j+1] = gf_sqr(bch, syn[j]);
436  }
437  
438  static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
439  {
440  	memcpy(dst, src, GF_POLY_SZ(src->deg));
441  }
442  
443  static int compute_error_locator_polynomial(struct bch_control *bch,
444  					    const unsigned int *syn)
445  {
446  	const unsigned int t = GF_T(bch);
447  	const unsigned int n = GF_N(bch);
448  	unsigned int i, j, tmp, l, pd = 1, d = syn[0];
449  	struct gf_poly *elp = bch->elp;
450  	struct gf_poly *pelp = bch->poly_2t[0];
451  	struct gf_poly *elp_copy = bch->poly_2t[1];
452  	int k, pp = -1;
453  
454  	memset(pelp, 0, GF_POLY_SZ(2*t));
455  	memset(elp, 0, GF_POLY_SZ(2*t));
456  
457  	pelp->deg = 0;
458  	pelp->c[0] = 1;
459  	elp->deg = 0;
460  	elp->c[0] = 1;
461  
462  	/* use simplified binary Berlekamp-Massey algorithm */
463  	for (i = 0; (i < t) && (elp->deg <= t); i++) {
464  		if (d) {
465  			k = 2*i-pp;
466  			gf_poly_copy(elp_copy, elp);
467  			/* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
468  			tmp = a_log(bch, d)+n-a_log(bch, pd);
469  			for (j = 0; j <= pelp->deg; j++) {
470  				if (pelp->c[j]) {
471  					l = a_log(bch, pelp->c[j]);
472  					elp->c[j+k] ^= a_pow(bch, tmp+l);
473  				}
474  			}
475  			/* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
476  			tmp = pelp->deg+k;
477  			if (tmp > elp->deg) {
478  				elp->deg = tmp;
479  				gf_poly_copy(pelp, elp_copy);
480  				pd = d;
481  				pp = 2*i;
482  			}
483  		}
484  		/* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
485  		if (i < t-1) {
486  			d = syn[2*i+2];
487  			for (j = 1; j <= elp->deg; j++)
488  				d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
489  		}
490  	}
491  	dbg("elp=%s\n", gf_poly_str(elp));
492  	return (elp->deg > t) ? -1 : (int)elp->deg;
493  }
494  
495  /*
496   * solve a m x m linear system in GF(2) with an expected number of solutions,
497   * and return the number of found solutions
498   */
499  static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
500  			       unsigned int *sol, int nsol)
501  {
502  	const int m = GF_M(bch);
503  	unsigned int tmp, mask;
504  	int rem, c, r, p, k, param[BCH_MAX_M];
505  
506  	k = 0;
507  	mask = 1 << m;
508  
509  	/* Gaussian elimination */
510  	for (c = 0; c < m; c++) {
511  		rem = 0;
512  		p = c-k;
513  		/* find suitable row for elimination */
514  		for (r = p; r < m; r++) {
515  			if (rows[r] & mask) {
516  				if (r != p) {
517  					tmp = rows[r];
518  					rows[r] = rows[p];
519  					rows[p] = tmp;
520  				}
521  				rem = r+1;
522  				break;
523  			}
524  		}
525  		if (rem) {
526  			/* perform elimination on remaining rows */
527  			tmp = rows[p];
528  			for (r = rem; r < m; r++) {
529  				if (rows[r] & mask)
530  					rows[r] ^= tmp;
531  			}
532  		} else {
533  			/* elimination not needed, store defective row index */
534  			param[k++] = c;
535  		}
536  		mask >>= 1;
537  	}
538  	/* rewrite system, inserting fake parameter rows */
539  	if (k > 0) {
540  		p = k;
541  		for (r = m-1; r >= 0; r--) {
542  			if ((r > m-1-k) && rows[r])
543  				/* system has no solution */
544  				return 0;
545  
546  			rows[r] = (p && (r == param[p-1])) ?
547  				p--, 1u << (m-r) : rows[r-p];
548  		}
549  	}
550  
551  	if (nsol != (1 << k))
552  		/* unexpected number of solutions */
553  		return 0;
554  
555  	for (p = 0; p < nsol; p++) {
556  		/* set parameters for p-th solution */
557  		for (c = 0; c < k; c++)
558  			rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
559  
560  		/* compute unique solution */
561  		tmp = 0;
562  		for (r = m-1; r >= 0; r--) {
563  			mask = rows[r] & (tmp|1);
564  			tmp |= parity(mask) << (m-r);
565  		}
566  		sol[p] = tmp >> 1;
567  	}
568  	return nsol;
569  }
570  
571  /*
572   * this function builds and solves a linear system for finding roots of a degree
573   * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
574   */
575  static int find_affine4_roots(struct bch_control *bch, unsigned int a,
576  			      unsigned int b, unsigned int c,
577  			      unsigned int *roots)
578  {
579  	int i, j, k;
580  	const int m = GF_M(bch);
581  	unsigned int mask = 0xff, t, rows[16] = {0,};
582  
583  	j = a_log(bch, b);
584  	k = a_log(bch, a);
585  	rows[0] = c;
586  
587  	/* build linear system to solve X^4+aX^2+bX+c = 0 */
588  	for (i = 0; i < m; i++) {
589  		rows[i+1] = bch->a_pow_tab[4*i]^
590  			(a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
591  			(b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
592  		j++;
593  		k += 2;
594  	}
595  	/*
596  	 * transpose 16x16 matrix before passing it to linear solver
597  	 * warning: this code assumes m < 16
598  	 */
599  	for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
600  		for (k = 0; k < 16; k = (k+j+1) & ~j) {
601  			t = ((rows[k] >> j)^rows[k+j]) & mask;
602  			rows[k] ^= (t << j);
603  			rows[k+j] ^= t;
604  		}
605  	}
606  	return solve_linear_system(bch, rows, roots, 4);
607  }
608  
609  /*
610   * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
611   */
612  static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
613  				unsigned int *roots)
614  {
615  	int n = 0;
616  
617  	if (poly->c[0])
618  		/* poly[X] = bX+c with c!=0, root=c/b */
619  		roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
620  				   bch->a_log_tab[poly->c[1]]);
621  	return n;
622  }
623  
624  /*
625   * compute roots of a degree 2 polynomial over GF(2^m)
626   */
627  static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
628  				unsigned int *roots)
629  {
630  	int n = 0, i, l0, l1, l2;
631  	unsigned int u, v, r;
632  
633  	if (poly->c[0] && poly->c[1]) {
634  
635  		l0 = bch->a_log_tab[poly->c[0]];
636  		l1 = bch->a_log_tab[poly->c[1]];
637  		l2 = bch->a_log_tab[poly->c[2]];
638  
639  		/* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
640  		u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
641  		/*
642  		 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
643  		 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
644  		 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
645  		 * i.e. r and r+1 are roots iff Tr(u)=0
646  		 */
647  		r = 0;
648  		v = u;
649  		while (v) {
650  			i = deg(v);
651  			r ^= bch->xi_tab[i];
652  			v ^= (1 << i);
653  		}
654  		/* verify root */
655  		if ((gf_sqr(bch, r)^r) == u) {
656  			/* reverse z=a/bX transformation and compute log(1/r) */
657  			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
658  					    bch->a_log_tab[r]+l2);
659  			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
660  					    bch->a_log_tab[r^1]+l2);
661  		}
662  	}
663  	return n;
664  }
665  
666  /*
667   * compute roots of a degree 3 polynomial over GF(2^m)
668   */
669  static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
670  				unsigned int *roots)
671  {
672  	int i, n = 0;
673  	unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
674  
675  	if (poly->c[0]) {
676  		/* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
677  		e3 = poly->c[3];
678  		c2 = gf_div(bch, poly->c[0], e3);
679  		b2 = gf_div(bch, poly->c[1], e3);
680  		a2 = gf_div(bch, poly->c[2], e3);
681  
682  		/* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
683  		c = gf_mul(bch, a2, c2);           /* c = a2c2      */
684  		b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
685  		a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
686  
687  		/* find the 4 roots of this affine polynomial */
688  		if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
689  			/* remove a2 from final list of roots */
690  			for (i = 0; i < 4; i++) {
691  				if (tmp[i] != a2)
692  					roots[n++] = a_ilog(bch, tmp[i]);
693  			}
694  		}
695  	}
696  	return n;
697  }
698  
699  /*
700   * compute roots of a degree 4 polynomial over GF(2^m)
701   */
702  static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
703  				unsigned int *roots)
704  {
705  	int i, l, n = 0;
706  	unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
707  
708  	if (poly->c[0] == 0)
709  		return 0;
710  
711  	/* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
712  	e4 = poly->c[4];
713  	d = gf_div(bch, poly->c[0], e4);
714  	c = gf_div(bch, poly->c[1], e4);
715  	b = gf_div(bch, poly->c[2], e4);
716  	a = gf_div(bch, poly->c[3], e4);
717  
718  	/* use Y=1/X transformation to get an affine polynomial */
719  	if (a) {
720  		/* first, eliminate cX by using z=X+e with ae^2+c=0 */
721  		if (c) {
722  			/* compute e such that e^2 = c/a */
723  			f = gf_div(bch, c, a);
724  			l = a_log(bch, f);
725  			l += (l & 1) ? GF_N(bch) : 0;
726  			e = a_pow(bch, l/2);
727  			/*
728  			 * use transformation z=X+e:
729  			 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
730  			 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
731  			 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
732  			 * z^4 + az^3 +     b'z^2 + d'
733  			 */
734  			d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
735  			b = gf_mul(bch, a, e)^b;
736  		}
737  		/* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
738  		if (d == 0)
739  			/* assume all roots have multiplicity 1 */
740  			return 0;
741  
742  		c2 = gf_inv(bch, d);
743  		b2 = gf_div(bch, a, d);
744  		a2 = gf_div(bch, b, d);
745  	} else {
746  		/* polynomial is already affine */
747  		c2 = d;
748  		b2 = c;
749  		a2 = b;
750  	}
751  	/* find the 4 roots of this affine polynomial */
752  	if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
753  		for (i = 0; i < 4; i++) {
754  			/* post-process roots (reverse transformations) */
755  			f = a ? gf_inv(bch, roots[i]) : roots[i];
756  			roots[i] = a_ilog(bch, f^e);
757  		}
758  		n = 4;
759  	}
760  	return n;
761  }
762  
763  /*
764   * build monic, log-based representation of a polynomial
765   */
766  static void gf_poly_logrep(struct bch_control *bch,
767  			   const struct gf_poly *a, int *rep)
768  {
769  	int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
770  
771  	/* represent 0 values with -1; warning, rep[d] is not set to 1 */
772  	for (i = 0; i < d; i++)
773  		rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
774  }
775  
776  /*
777   * compute polynomial Euclidean division remainder in GF(2^m)[X]
778   */
779  static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
780  			const struct gf_poly *b, int *rep)
781  {
782  	int la, p, m;
783  	unsigned int i, j, *c = a->c;
784  	const unsigned int d = b->deg;
785  
786  	if (a->deg < d)
787  		return;
788  
789  	/* reuse or compute log representation of denominator */
790  	if (!rep) {
791  		rep = bch->cache;
792  		gf_poly_logrep(bch, b, rep);
793  	}
794  
795  	for (j = a->deg; j >= d; j--) {
796  		if (c[j]) {
797  			la = a_log(bch, c[j]);
798  			p = j-d;
799  			for (i = 0; i < d; i++, p++) {
800  				m = rep[i];
801  				if (m >= 0)
802  					c[p] ^= bch->a_pow_tab[mod_s(bch,
803  								     m+la)];
804  			}
805  		}
806  	}
807  	a->deg = d-1;
808  	while (!c[a->deg] && a->deg)
809  		a->deg--;
810  }
811  
812  /*
813   * compute polynomial Euclidean division quotient in GF(2^m)[X]
814   */
815  static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
816  			const struct gf_poly *b, struct gf_poly *q)
817  {
818  	if (a->deg >= b->deg) {
819  		q->deg = a->deg-b->deg;
820  		/* compute a mod b (modifies a) */
821  		gf_poly_mod(bch, a, b, NULL);
822  		/* quotient is stored in upper part of polynomial a */
823  		memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
824  	} else {
825  		q->deg = 0;
826  		q->c[0] = 0;
827  	}
828  }
829  
830  /*
831   * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
832   */
833  static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
834  				   struct gf_poly *b)
835  {
836  	struct gf_poly *tmp;
837  
838  	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
839  
840  	if (a->deg < b->deg) {
841  		tmp = b;
842  		b = a;
843  		a = tmp;
844  	}
845  
846  	while (b->deg > 0) {
847  		gf_poly_mod(bch, a, b, NULL);
848  		tmp = b;
849  		b = a;
850  		a = tmp;
851  	}
852  
853  	dbg("%s\n", gf_poly_str(a));
854  
855  	return a;
856  }
857  
858  /*
859   * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
860   * This is used in Berlekamp Trace algorithm for splitting polynomials
861   */
862  static void compute_trace_bk_mod(struct bch_control *bch, int k,
863  				 const struct gf_poly *f, struct gf_poly *z,
864  				 struct gf_poly *out)
865  {
866  	const int m = GF_M(bch);
867  	int i, j;
868  
869  	/* z contains z^2j mod f */
870  	z->deg = 1;
871  	z->c[0] = 0;
872  	z->c[1] = bch->a_pow_tab[k];
873  
874  	out->deg = 0;
875  	memset(out, 0, GF_POLY_SZ(f->deg));
876  
877  	/* compute f log representation only once */
878  	gf_poly_logrep(bch, f, bch->cache);
879  
880  	for (i = 0; i < m; i++) {
881  		/* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
882  		for (j = z->deg; j >= 0; j--) {
883  			out->c[j] ^= z->c[j];
884  			z->c[2*j] = gf_sqr(bch, z->c[j]);
885  			z->c[2*j+1] = 0;
886  		}
887  		if (z->deg > out->deg)
888  			out->deg = z->deg;
889  
890  		if (i < m-1) {
891  			z->deg *= 2;
892  			/* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
893  			gf_poly_mod(bch, z, f, bch->cache);
894  		}
895  	}
896  	while (!out->c[out->deg] && out->deg)
897  		out->deg--;
898  
899  	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
900  }
901  
902  /*
903   * factor a polynomial using Berlekamp Trace algorithm (BTA)
904   */
905  static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
906  			      struct gf_poly **g, struct gf_poly **h)
907  {
908  	struct gf_poly *f2 = bch->poly_2t[0];
909  	struct gf_poly *q  = bch->poly_2t[1];
910  	struct gf_poly *tk = bch->poly_2t[2];
911  	struct gf_poly *z  = bch->poly_2t[3];
912  	struct gf_poly *gcd;
913  
914  	dbg("factoring %s...\n", gf_poly_str(f));
915  
916  	*g = f;
917  	*h = NULL;
918  
919  	/* tk = Tr(a^k.X) mod f */
920  	compute_trace_bk_mod(bch, k, f, z, tk);
921  
922  	if (tk->deg > 0) {
923  		/* compute g = gcd(f, tk) (destructive operation) */
924  		gf_poly_copy(f2, f);
925  		gcd = gf_poly_gcd(bch, f2, tk);
926  		if (gcd->deg < f->deg) {
927  			/* compute h=f/gcd(f,tk); this will modify f and q */
928  			gf_poly_div(bch, f, gcd, q);
929  			/* store g and h in-place (clobbering f) */
930  			*h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
931  			gf_poly_copy(*g, gcd);
932  			gf_poly_copy(*h, q);
933  		}
934  	}
935  }
936  
937  /*
938   * find roots of a polynomial, using BTZ algorithm; see the beginning of this
939   * file for details
940   */
941  static int find_poly_roots(struct bch_control *bch, unsigned int k,
942  			   struct gf_poly *poly, unsigned int *roots)
943  {
944  	int cnt;
945  	struct gf_poly *f1, *f2;
946  
947  	switch (poly->deg) {
948  		/* handle low degree polynomials with ad hoc techniques */
949  	case 1:
950  		cnt = find_poly_deg1_roots(bch, poly, roots);
951  		break;
952  	case 2:
953  		cnt = find_poly_deg2_roots(bch, poly, roots);
954  		break;
955  	case 3:
956  		cnt = find_poly_deg3_roots(bch, poly, roots);
957  		break;
958  	case 4:
959  		cnt = find_poly_deg4_roots(bch, poly, roots);
960  		break;
961  	default:
962  		/* factor polynomial using Berlekamp Trace Algorithm (BTA) */
963  		cnt = 0;
964  		if (poly->deg && (k <= GF_M(bch))) {
965  			factor_polynomial(bch, k, poly, &f1, &f2);
966  			if (f1)
967  				cnt += find_poly_roots(bch, k+1, f1, roots);
968  			if (f2)
969  				cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
970  		}
971  		break;
972  	}
973  	return cnt;
974  }
975  
976  #if defined(USE_CHIEN_SEARCH)
977  /*
978   * exhaustive root search (Chien) implementation - not used, included only for
979   * reference/comparison tests
980   */
981  static int chien_search(struct bch_control *bch, unsigned int len,
982  			struct gf_poly *p, unsigned int *roots)
983  {
984  	int m;
985  	unsigned int i, j, syn, syn0, count = 0;
986  	const unsigned int k = 8*len+bch->ecc_bits;
987  
988  	/* use a log-based representation of polynomial */
989  	gf_poly_logrep(bch, p, bch->cache);
990  	bch->cache[p->deg] = 0;
991  	syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
992  
993  	for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
994  		/* compute elp(a^i) */
995  		for (j = 1, syn = syn0; j <= p->deg; j++) {
996  			m = bch->cache[j];
997  			if (m >= 0)
998  				syn ^= a_pow(bch, m+j*i);
999  		}
1000  		if (syn == 0) {
1001  			roots[count++] = GF_N(bch)-i;
1002  			if (count == p->deg)
1003  				break;
1004  		}
1005  	}
1006  	return (count == p->deg) ? count : 0;
1007  }
1008  #define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
1009  #endif /* USE_CHIEN_SEARCH */
1010  
1011  /**
1012   * bch_decode - decode received codeword and find bit error locations
1013   * @bch:      BCH control structure
1014   * @data:     received data, ignored if @calc_ecc is provided
1015   * @len:      data length in bytes, must always be provided
1016   * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
1017   * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
1018   * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
1019   * @errloc:   output array of error locations
1020   *
1021   * Returns:
1022   *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
1023   *  invalid parameters were provided
1024   *
1025   * Depending on the available hw BCH support and the need to compute @calc_ecc
1026   * separately (using bch_encode()), this function should be called with one of
1027   * the following parameter configurations -
1028   *
1029   * by providing @data and @recv_ecc only:
1030   *   bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
1031   *
1032   * by providing @recv_ecc and @calc_ecc:
1033   *   bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
1034   *
1035   * by providing ecc = recv_ecc XOR calc_ecc:
1036   *   bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
1037   *
1038   * by providing syndrome results @syn:
1039   *   bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
1040   *
1041   * Once bch_decode() has successfully returned with a positive value, error
1042   * locations returned in array @errloc should be interpreted as follows -
1043   *
1044   * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1045   * data correction)
1046   *
1047   * if (errloc[n] < 8*len), then n-th error is located in data and can be
1048   * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1049   *
1050   * Note that this function does not perform any data correction by itself, it
1051   * merely indicates error locations.
1052   */
1053  int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len,
1054  	       const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1055  	       const unsigned int *syn, unsigned int *errloc)
1056  {
1057  	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1058  	unsigned int nbits;
1059  	int i, err, nroots;
1060  	uint32_t sum;
1061  
1062  	/* sanity check: make sure data length can be handled */
1063  	if (8*len > (bch->n-bch->ecc_bits))
1064  		return -EINVAL;
1065  
1066  	/* if caller does not provide syndromes, compute them */
1067  	if (!syn) {
1068  		if (!calc_ecc) {
1069  			/* compute received data ecc into an internal buffer */
1070  			if (!data || !recv_ecc)
1071  				return -EINVAL;
1072  			bch_encode(bch, data, len, NULL);
1073  		} else {
1074  			/* load provided calculated ecc */
1075  			load_ecc8(bch, bch->ecc_buf, calc_ecc);
1076  		}
1077  		/* load received ecc or assume it was XORed in calc_ecc */
1078  		if (recv_ecc) {
1079  			load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1080  			/* XOR received and calculated ecc */
1081  			for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1082  				bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1083  				sum |= bch->ecc_buf[i];
1084  			}
1085  			if (!sum)
1086  				/* no error found */
1087  				return 0;
1088  		}
1089  		compute_syndromes(bch, bch->ecc_buf, bch->syn);
1090  		syn = bch->syn;
1091  	}
1092  
1093  	err = compute_error_locator_polynomial(bch, syn);
1094  	if (err > 0) {
1095  		nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1096  		if (err != nroots)
1097  			err = -1;
1098  	}
1099  	if (err > 0) {
1100  		/* post-process raw error locations for easier correction */
1101  		nbits = (len*8)+bch->ecc_bits;
1102  		for (i = 0; i < err; i++) {
1103  			if (errloc[i] >= nbits) {
1104  				err = -1;
1105  				break;
1106  			}
1107  			errloc[i] = nbits-1-errloc[i];
1108  			if (!bch->swap_bits)
1109  				errloc[i] = (errloc[i] & ~7) |
1110  					    (7-(errloc[i] & 7));
1111  		}
1112  	}
1113  	return (err >= 0) ? err : -EBADMSG;
1114  }
1115  EXPORT_SYMBOL_GPL(bch_decode);
1116  
1117  /*
1118   * generate Galois field lookup tables
1119   */
1120  static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1121  {
1122  	unsigned int i, x = 1;
1123  	const unsigned int k = 1 << deg(poly);
1124  
1125  	/* primitive polynomial must be of degree m */
1126  	if (k != (1u << GF_M(bch)))
1127  		return -1;
1128  
1129  	for (i = 0; i < GF_N(bch); i++) {
1130  		bch->a_pow_tab[i] = x;
1131  		bch->a_log_tab[x] = i;
1132  		if (i && (x == 1))
1133  			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1134  			return -1;
1135  		x <<= 1;
1136  		if (x & k)
1137  			x ^= poly;
1138  	}
1139  	bch->a_pow_tab[GF_N(bch)] = 1;
1140  	bch->a_log_tab[0] = 0;
1141  
1142  	return 0;
1143  }
1144  
1145  /*
1146   * compute generator polynomial remainder tables for fast encoding
1147   */
1148  static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1149  {
1150  	int i, j, b, d;
1151  	uint32_t data, hi, lo, *tab;
1152  	const int l = BCH_ECC_WORDS(bch);
1153  	const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1154  	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1155  
1156  	memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1157  
1158  	for (i = 0; i < 256; i++) {
1159  		/* p(X)=i is a small polynomial of weight <= 8 */
1160  		for (b = 0; b < 4; b++) {
1161  			/* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1162  			tab = bch->mod8_tab + (b*256+i)*l;
1163  			data = i << (8*b);
1164  			while (data) {
1165  				d = deg(data);
1166  				/* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1167  				data ^= g[0] >> (31-d);
1168  				for (j = 0; j < ecclen; j++) {
1169  					hi = (d < 31) ? g[j] << (d+1) : 0;
1170  					lo = (j+1 < plen) ?
1171  						g[j+1] >> (31-d) : 0;
1172  					tab[j] ^= hi|lo;
1173  				}
1174  			}
1175  		}
1176  	}
1177  }
1178  
1179  /*
1180   * build a base for factoring degree 2 polynomials
1181   */
1182  static int build_deg2_base(struct bch_control *bch)
1183  {
1184  	const int m = GF_M(bch);
1185  	int i, j, r;
1186  	unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1187  
1188  	/* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1189  	for (i = 0; i < m; i++) {
1190  		for (j = 0, sum = 0; j < m; j++)
1191  			sum ^= a_pow(bch, i*(1 << j));
1192  
1193  		if (sum) {
1194  			ak = bch->a_pow_tab[i];
1195  			break;
1196  		}
1197  	}
1198  	/* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1199  	remaining = m;
1200  	memset(xi, 0, sizeof(xi));
1201  
1202  	for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1203  		y = gf_sqr(bch, x)^x;
1204  		for (i = 0; i < 2; i++) {
1205  			r = a_log(bch, y);
1206  			if (y && (r < m) && !xi[r]) {
1207  				bch->xi_tab[r] = x;
1208  				xi[r] = 1;
1209  				remaining--;
1210  				dbg("x%d = %x\n", r, x);
1211  				break;
1212  			}
1213  			y ^= ak;
1214  		}
1215  	}
1216  	/* should not happen but check anyway */
1217  	return remaining ? -1 : 0;
1218  }
1219  
1220  static void *bch_alloc(size_t size, int *err)
1221  {
1222  	void *ptr;
1223  
1224  	ptr = kmalloc(size, GFP_KERNEL);
1225  	if (ptr == NULL)
1226  		*err = 1;
1227  	return ptr;
1228  }
1229  
1230  /*
1231   * compute generator polynomial for given (m,t) parameters.
1232   */
1233  static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1234  {
1235  	const unsigned int m = GF_M(bch);
1236  	const unsigned int t = GF_T(bch);
1237  	int n, err = 0;
1238  	unsigned int i, j, nbits, r, word, *roots;
1239  	struct gf_poly *g;
1240  	uint32_t *genpoly;
1241  
1242  	g = bch_alloc(GF_POLY_SZ(m*t), &err);
1243  	roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1244  	genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1245  
1246  	if (err) {
1247  		kfree(genpoly);
1248  		genpoly = NULL;
1249  		goto finish;
1250  	}
1251  
1252  	/* enumerate all roots of g(X) */
1253  	memset(roots , 0, (bch->n+1)*sizeof(*roots));
1254  	for (i = 0; i < t; i++) {
1255  		for (j = 0, r = 2*i+1; j < m; j++) {
1256  			roots[r] = 1;
1257  			r = mod_s(bch, 2*r);
1258  		}
1259  	}
1260  	/* build generator polynomial g(X) */
1261  	g->deg = 0;
1262  	g->c[0] = 1;
1263  	for (i = 0; i < GF_N(bch); i++) {
1264  		if (roots[i]) {
1265  			/* multiply g(X) by (X+root) */
1266  			r = bch->a_pow_tab[i];
1267  			g->c[g->deg+1] = 1;
1268  			for (j = g->deg; j > 0; j--)
1269  				g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1270  
1271  			g->c[0] = gf_mul(bch, g->c[0], r);
1272  			g->deg++;
1273  		}
1274  	}
1275  	/* store left-justified binary representation of g(X) */
1276  	n = g->deg+1;
1277  	i = 0;
1278  
1279  	while (n > 0) {
1280  		nbits = (n > 32) ? 32 : n;
1281  		for (j = 0, word = 0; j < nbits; j++) {
1282  			if (g->c[n-1-j])
1283  				word |= 1u << (31-j);
1284  		}
1285  		genpoly[i++] = word;
1286  		n -= nbits;
1287  	}
1288  	bch->ecc_bits = g->deg;
1289  
1290  finish:
1291  	kfree(g);
1292  	kfree(roots);
1293  
1294  	return genpoly;
1295  }
1296  
1297  /**
1298   * bch_init - initialize a BCH encoder/decoder
1299   * @m:          Galois field order, should be in the range 5-15
1300   * @t:          maximum error correction capability, in bits
1301   * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
1302   * @swap_bits:  swap bits within data and syndrome bytes
1303   *
1304   * Returns:
1305   *  a newly allocated BCH control structure if successful, NULL otherwise
1306   *
1307   * This initialization can take some time, as lookup tables are built for fast
1308   * encoding/decoding; make sure not to call this function from a time critical
1309   * path. Usually, bch_init() should be called on module/driver init and
1310   * bch_free() should be called to release memory on exit.
1311   *
1312   * You may provide your own primitive polynomial of degree @m in argument
1313   * @prim_poly, or let bch_init() use its default polynomial.
1314   *
1315   * Once bch_init() has successfully returned a pointer to a newly allocated
1316   * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1317   * the structure.
1318   */
1319  struct bch_control *bch_init(int m, int t, unsigned int prim_poly,
1320  			     bool swap_bits)
1321  {
1322  	int err = 0;
1323  	unsigned int i, words;
1324  	uint32_t *genpoly;
1325  	struct bch_control *bch = NULL;
1326  
1327  	const int min_m = 5;
1328  
1329  	/* default primitive polynomials */
1330  	static const unsigned int prim_poly_tab[] = {
1331  		0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1332  		0x402b, 0x8003,
1333  	};
1334  
1335  #if defined(CONFIG_BCH_CONST_PARAMS)
1336  	if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1337  		printk(KERN_ERR "bch encoder/decoder was configured to support "
1338  		       "parameters m=%d, t=%d only!\n",
1339  		       CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1340  		goto fail;
1341  	}
1342  #endif
1343  	if ((m < min_m) || (m > BCH_MAX_M))
1344  		/*
1345  		 * values of m greater than 15 are not currently supported;
1346  		 * supporting m > 15 would require changing table base type
1347  		 * (uint16_t) and a small patch in matrix transposition
1348  		 */
1349  		goto fail;
1350  
1351  	if (t > BCH_MAX_T)
1352  		/*
1353  		 * we can support larger than 64 bits if necessary, at the
1354  		 * cost of higher stack usage.
1355  		 */
1356  		goto fail;
1357  
1358  	/* sanity checks */
1359  	if ((t < 1) || (m*t >= ((1 << m)-1)))
1360  		/* invalid t value */
1361  		goto fail;
1362  
1363  	/* select a primitive polynomial for generating GF(2^m) */
1364  	if (prim_poly == 0)
1365  		prim_poly = prim_poly_tab[m-min_m];
1366  
1367  	bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1368  	if (bch == NULL)
1369  		goto fail;
1370  
1371  	bch->m = m;
1372  	bch->t = t;
1373  	bch->n = (1 << m)-1;
1374  	words  = DIV_ROUND_UP(m*t, 32);
1375  	bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1376  	bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1377  	bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1378  	bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1379  	bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1380  	bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1381  	bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1382  	bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
1383  	bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
1384  	bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1385  	bch->swap_bits = swap_bits;
1386  
1387  	for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1388  		bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1389  
1390  	if (err)
1391  		goto fail;
1392  
1393  	err = build_gf_tables(bch, prim_poly);
1394  	if (err)
1395  		goto fail;
1396  
1397  	/* use generator polynomial for computing encoding tables */
1398  	genpoly = compute_generator_polynomial(bch);
1399  	if (genpoly == NULL)
1400  		goto fail;
1401  
1402  	build_mod8_tables(bch, genpoly);
1403  	kfree(genpoly);
1404  
1405  	err = build_deg2_base(bch);
1406  	if (err)
1407  		goto fail;
1408  
1409  	return bch;
1410  
1411  fail:
1412  	bch_free(bch);
1413  	return NULL;
1414  }
1415  EXPORT_SYMBOL_GPL(bch_init);
1416  
1417  /**
1418   *  bch_free - free the BCH control structure
1419   *  @bch:    BCH control structure to release
1420   */
1421  void bch_free(struct bch_control *bch)
1422  {
1423  	unsigned int i;
1424  
1425  	if (bch) {
1426  		kfree(bch->a_pow_tab);
1427  		kfree(bch->a_log_tab);
1428  		kfree(bch->mod8_tab);
1429  		kfree(bch->ecc_buf);
1430  		kfree(bch->ecc_buf2);
1431  		kfree(bch->xi_tab);
1432  		kfree(bch->syn);
1433  		kfree(bch->cache);
1434  		kfree(bch->elp);
1435  
1436  		for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1437  			kfree(bch->poly_2t[i]);
1438  
1439  		kfree(bch);
1440  	}
1441  }
1442  EXPORT_SYMBOL_GPL(bch_free);
1443  
1444  MODULE_LICENSE("GPL");
1445  MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1446  MODULE_DESCRIPTION("Binary BCH encoder/decoder");
1447