xref: /openbmc/linux/lib/bch.c (revision f7af616c632ee2ac3af0876fe33bf9e0232e665a)
1 /*
2  * Generic binary BCH encoding/decoding library
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of the GNU General Public License version 2 as published by
6  * the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 51
15  * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
16  *
17  * Copyright © 2011 Parrot S.A.
18  *
19  * Author: Ivan Djelic <ivan.djelic@parrot.com>
20  *
21  * Description:
22  *
23  * This library provides runtime configurable encoding/decoding of binary
24  * Bose-Chaudhuri-Hocquenghem (BCH) codes.
25  *
26  * Call bch_init to get a pointer to a newly allocated bch_control structure for
27  * the given m (Galois field order), t (error correction capability) and
28  * (optional) primitive polynomial parameters.
29  *
30  * Call bch_encode to compute and store ecc parity bytes to a given buffer.
31  * Call bch_decode to detect and locate errors in received data.
32  *
33  * On systems supporting hw BCH features, intermediate results may be provided
34  * to bch_decode in order to skip certain steps. See bch_decode() documentation
35  * for details.
36  *
37  * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of
38  * parameters m and t; thus allowing extra compiler optimizations and providing
39  * better (up to 2x) encoding performance. Using this option makes sense when
40  * (m,t) are fixed and known in advance, e.g. when using BCH error correction
41  * on a particular NAND flash device.
42  *
43  * Algorithmic details:
44  *
45  * Encoding is performed by processing 32 input bits in parallel, using 4
46  * remainder lookup tables.
47  *
48  * The final stage of decoding involves the following internal steps:
49  * a. Syndrome computation
50  * b. Error locator polynomial computation using Berlekamp-Massey algorithm
51  * c. Error locator root finding (by far the most expensive step)
52  *
53  * In this implementation, step c is not performed using the usual Chien search.
54  * Instead, an alternative approach described in [1] is used. It consists in
55  * factoring the error locator polynomial using the Berlekamp Trace algorithm
56  * (BTA) down to a certain degree (4), after which ad hoc low-degree polynomial
57  * solving techniques [2] are used. The resulting algorithm, called BTZ, yields
58  * much better performance than Chien search for usual (m,t) values (typically
59  * m >= 13, t < 32, see [1]).
60  *
61  * [1] B. Biswas, V. Herbert. Efficient root finding of polynomials over fields
62  * of characteristic 2, in: Western European Workshop on Research in Cryptology
63  * - WEWoRC 2009, Graz, Austria, LNCS, Springer, July 2009, to appear.
64  * [2] [Zin96] V.A. Zinoviev. On the solution of equations of degree 10 over
65  * finite fields GF(2^q). In Rapport de recherche INRIA no 2829, 1996.
66  */
67 
68 #include <linux/kernel.h>
69 #include <linux/errno.h>
70 #include <linux/init.h>
71 #include <linux/module.h>
72 #include <linux/slab.h>
73 #include <linux/bitops.h>
74 #include <asm/byteorder.h>
75 #include <linux/bch.h>
76 
77 #if defined(CONFIG_BCH_CONST_PARAMS)
78 #define GF_M(_p)               (CONFIG_BCH_CONST_M)
79 #define GF_T(_p)               (CONFIG_BCH_CONST_T)
80 #define GF_N(_p)               ((1 << (CONFIG_BCH_CONST_M))-1)
81 #define BCH_MAX_M              (CONFIG_BCH_CONST_M)
82 #define BCH_MAX_T	       (CONFIG_BCH_CONST_T)
83 #else
84 #define GF_M(_p)               ((_p)->m)
85 #define GF_T(_p)               ((_p)->t)
86 #define GF_N(_p)               ((_p)->n)
87 #define BCH_MAX_M              15 /* 2KB */
88 #define BCH_MAX_T              64 /* 64 bit correction */
89 #endif
90 
91 #define BCH_ECC_WORDS(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
92 #define BCH_ECC_BYTES(_p)      DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
93 
94 #define BCH_ECC_MAX_WORDS      DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
95 
96 #ifndef dbg
97 #define dbg(_fmt, args...)     do {} while (0)
98 #endif
99 
100 /*
101  * represent a polynomial over GF(2^m)
102  */
103 struct gf_poly {
104 	unsigned int deg;    /* polynomial degree */
105 	unsigned int c[];   /* polynomial terms */
106 };
107 
108 /* given its degree, compute a polynomial size in bytes */
109 #define GF_POLY_SZ(_d) (sizeof(struct gf_poly)+((_d)+1)*sizeof(unsigned int))
110 
111 /* polynomial of degree 1 */
112 struct gf_poly_deg1 {
113 	struct gf_poly poly;
114 	unsigned int   c[2];
115 };
116 
117 static u8 swap_bits_table[] = {
118 	0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
119 	0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
120 	0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
121 	0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
122 	0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
123 	0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
124 	0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
125 	0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
126 	0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
127 	0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
128 	0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
129 	0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
130 	0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
131 	0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
132 	0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
133 	0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
134 	0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
135 	0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
136 	0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
137 	0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
138 	0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
139 	0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
140 	0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
141 	0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
142 	0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
143 	0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
144 	0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
145 	0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
146 	0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
147 	0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
148 	0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
149 	0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
150 };
151 
152 static u8 swap_bits(struct bch_control *bch, u8 in)
153 {
154 	if (!bch->swap_bits)
155 		return in;
156 
157 	return swap_bits_table[in];
158 }
159 
160 /*
161  * same as bch_encode(), but process input data one byte at a time
162  */
163 static void bch_encode_unaligned(struct bch_control *bch,
164 				 const unsigned char *data, unsigned int len,
165 				 uint32_t *ecc)
166 {
167 	int i;
168 	const uint32_t *p;
169 	const int l = BCH_ECC_WORDS(bch)-1;
170 
171 	while (len--) {
172 		u8 tmp = swap_bits(bch, *data++);
173 
174 		p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
175 
176 		for (i = 0; i < l; i++)
177 			ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++);
178 
179 		ecc[l] = (ecc[l] << 8)^(*p);
180 	}
181 }
182 
183 /*
184  * convert ecc bytes to aligned, zero-padded 32-bit ecc words
185  */
186 static void load_ecc8(struct bch_control *bch, uint32_t *dst,
187 		      const uint8_t *src)
188 {
189 	uint8_t pad[4] = {0, 0, 0, 0};
190 	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
191 
192 	for (i = 0; i < nwords; i++, src += 4)
193 		dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
194 			((u32)swap_bits(bch, src[1]) << 16) |
195 			((u32)swap_bits(bch, src[2]) << 8) |
196 			swap_bits(bch, src[3]);
197 
198 	memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
199 	dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
200 		((u32)swap_bits(bch, pad[1]) << 16) |
201 		((u32)swap_bits(bch, pad[2]) << 8) |
202 		swap_bits(bch, pad[3]);
203 }
204 
205 /*
206  * convert 32-bit ecc words to ecc bytes
207  */
208 static void store_ecc8(struct bch_control *bch, uint8_t *dst,
209 		       const uint32_t *src)
210 {
211 	uint8_t pad[4];
212 	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
213 
214 	for (i = 0; i < nwords; i++) {
215 		*dst++ = swap_bits(bch, src[i] >> 24);
216 		*dst++ = swap_bits(bch, src[i] >> 16);
217 		*dst++ = swap_bits(bch, src[i] >> 8);
218 		*dst++ = swap_bits(bch, src[i]);
219 	}
220 	pad[0] = swap_bits(bch, src[nwords] >> 24);
221 	pad[1] = swap_bits(bch, src[nwords] >> 16);
222 	pad[2] = swap_bits(bch, src[nwords] >> 8);
223 	pad[3] = swap_bits(bch, src[nwords]);
224 	memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
225 }
226 
227 /**
228  * bch_encode - calculate BCH ecc parity of data
229  * @bch:   BCH control structure
230  * @data:  data to encode
231  * @len:   data length in bytes
232  * @ecc:   ecc parity data, must be initialized by caller
233  *
234  * The @ecc parity array is used both as input and output parameter, in order to
235  * allow incremental computations. It should be of the size indicated by member
236  * @ecc_bytes of @bch, and should be initialized to 0 before the first call.
237  *
238  * The exact number of computed ecc parity bits is given by member @ecc_bits of
239  * @bch; it may be less than m*t for large values of t.
240  */
241 void bch_encode(struct bch_control *bch, const uint8_t *data,
242 		unsigned int len, uint8_t *ecc)
243 {
244 	const unsigned int l = BCH_ECC_WORDS(bch)-1;
245 	unsigned int i, mlen;
246 	unsigned long m;
247 	uint32_t w, r[BCH_ECC_MAX_WORDS];
248 	const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r);
249 	const uint32_t * const tab0 = bch->mod8_tab;
250 	const uint32_t * const tab1 = tab0 + 256*(l+1);
251 	const uint32_t * const tab2 = tab1 + 256*(l+1);
252 	const uint32_t * const tab3 = tab2 + 256*(l+1);
253 	const uint32_t *pdata, *p0, *p1, *p2, *p3;
254 
255 	if (WARN_ON(r_bytes > sizeof(r)))
256 		return;
257 
258 	if (ecc) {
259 		/* load ecc parity bytes into internal 32-bit buffer */
260 		load_ecc8(bch, bch->ecc_buf, ecc);
261 	} else {
262 		memset(bch->ecc_buf, 0, r_bytes);
263 	}
264 
265 	/* process first unaligned data bytes */
266 	m = ((unsigned long)data) & 3;
267 	if (m) {
268 		mlen = (len < (4-m)) ? len : 4-m;
269 		bch_encode_unaligned(bch, data, mlen, bch->ecc_buf);
270 		data += mlen;
271 		len  -= mlen;
272 	}
273 
274 	/* process 32-bit aligned data words */
275 	pdata = (uint32_t *)data;
276 	mlen  = len/4;
277 	data += 4*mlen;
278 	len  -= 4*mlen;
279 	memcpy(r, bch->ecc_buf, r_bytes);
280 
281 	/*
282 	 * split each 32-bit word into 4 polynomials of weight 8 as follows:
283 	 *
284 	 * 31 ...24  23 ...16  15 ... 8  7 ... 0
285 	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt
286 	 *                               tttttttt  mod g = r0 (precomputed)
287 	 *                     zzzzzzzz  00000000  mod g = r1 (precomputed)
288 	 *           yyyyyyyy  00000000  00000000  mod g = r2 (precomputed)
289 	 * xxxxxxxx  00000000  00000000  00000000  mod g = r3 (precomputed)
290 	 * xxxxxxxx  yyyyyyyy  zzzzzzzz  tttttttt  mod g = r0^r1^r2^r3
291 	 */
292 	while (mlen--) {
293 		/* input data is read in big-endian format */
294 		w = cpu_to_be32(*pdata++);
295 		if (bch->swap_bits)
296 			w = (u32)swap_bits(bch, w) |
297 			    ((u32)swap_bits(bch, w >> 8) << 8) |
298 			    ((u32)swap_bits(bch, w >> 16) << 16) |
299 			    ((u32)swap_bits(bch, w >> 24) << 24);
300 		w ^= r[0];
301 		p0 = tab0 + (l+1)*((w >>  0) & 0xff);
302 		p1 = tab1 + (l+1)*((w >>  8) & 0xff);
303 		p2 = tab2 + (l+1)*((w >> 16) & 0xff);
304 		p3 = tab3 + (l+1)*((w >> 24) & 0xff);
305 
306 		for (i = 0; i < l; i++)
307 			r[i] = r[i+1]^p0[i]^p1[i]^p2[i]^p3[i];
308 
309 		r[l] = p0[l]^p1[l]^p2[l]^p3[l];
310 	}
311 	memcpy(bch->ecc_buf, r, r_bytes);
312 
313 	/* process last unaligned bytes */
314 	if (len)
315 		bch_encode_unaligned(bch, data, len, bch->ecc_buf);
316 
317 	/* store ecc parity bytes into original parity buffer */
318 	if (ecc)
319 		store_ecc8(bch, ecc, bch->ecc_buf);
320 }
321 EXPORT_SYMBOL_GPL(bch_encode);
322 
323 static inline int modulo(struct bch_control *bch, unsigned int v)
324 {
325 	const unsigned int n = GF_N(bch);
326 	while (v >= n) {
327 		v -= n;
328 		v = (v & n) + (v >> GF_M(bch));
329 	}
330 	return v;
331 }
332 
333 /*
334  * shorter and faster modulo function, only works when v < 2N.
335  */
336 static inline int mod_s(struct bch_control *bch, unsigned int v)
337 {
338 	const unsigned int n = GF_N(bch);
339 	return (v < n) ? v : v-n;
340 }
341 
342 static inline int deg(unsigned int poly)
343 {
344 	/* polynomial degree is the most-significant bit index */
345 	return fls(poly)-1;
346 }
347 
348 static inline int parity(unsigned int x)
349 {
350 	/*
351 	 * public domain code snippet, lifted from
352 	 * http://www-graphics.stanford.edu/~seander/bithacks.html
353 	 */
354 	x ^= x >> 1;
355 	x ^= x >> 2;
356 	x = (x & 0x11111111U) * 0x11111111U;
357 	return (x >> 28) & 1;
358 }
359 
360 /* Galois field basic operations: multiply, divide, inverse, etc. */
361 
362 static inline unsigned int gf_mul(struct bch_control *bch, unsigned int a,
363 				  unsigned int b)
364 {
365 	return (a && b) ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
366 					       bch->a_log_tab[b])] : 0;
367 }
368 
369 static inline unsigned int gf_sqr(struct bch_control *bch, unsigned int a)
370 {
371 	return a ? bch->a_pow_tab[mod_s(bch, 2*bch->a_log_tab[a])] : 0;
372 }
373 
374 static inline unsigned int gf_div(struct bch_control *bch, unsigned int a,
375 				  unsigned int b)
376 {
377 	return a ? bch->a_pow_tab[mod_s(bch, bch->a_log_tab[a]+
378 					GF_N(bch)-bch->a_log_tab[b])] : 0;
379 }
380 
381 static inline unsigned int gf_inv(struct bch_control *bch, unsigned int a)
382 {
383 	return bch->a_pow_tab[GF_N(bch)-bch->a_log_tab[a]];
384 }
385 
386 static inline unsigned int a_pow(struct bch_control *bch, int i)
387 {
388 	return bch->a_pow_tab[modulo(bch, i)];
389 }
390 
391 static inline int a_log(struct bch_control *bch, unsigned int x)
392 {
393 	return bch->a_log_tab[x];
394 }
395 
396 static inline int a_ilog(struct bch_control *bch, unsigned int x)
397 {
398 	return mod_s(bch, GF_N(bch)-bch->a_log_tab[x]);
399 }
400 
401 /*
402  * compute 2t syndromes of ecc polynomial, i.e. ecc(a^j) for j=1..2t
403  */
404 static void compute_syndromes(struct bch_control *bch, uint32_t *ecc,
405 			      unsigned int *syn)
406 {
407 	int i, j, s;
408 	unsigned int m;
409 	uint32_t poly;
410 	const int t = GF_T(bch);
411 
412 	s = bch->ecc_bits;
413 
414 	/* make sure extra bits in last ecc word are cleared */
415 	m = ((unsigned int)s) & 31;
416 	if (m)
417 		ecc[s/32] &= ~((1u << (32-m))-1);
418 	memset(syn, 0, 2*t*sizeof(*syn));
419 
420 	/* compute v(a^j) for j=1 .. 2t-1 */
421 	do {
422 		poly = *ecc++;
423 		s -= 32;
424 		while (poly) {
425 			i = deg(poly);
426 			for (j = 0; j < 2*t; j += 2)
427 				syn[j] ^= a_pow(bch, (j+1)*(i+s));
428 
429 			poly ^= (1 << i);
430 		}
431 	} while (s > 0);
432 
433 	/* v(a^(2j)) = v(a^j)^2 */
434 	for (j = 0; j < t; j++)
435 		syn[2*j+1] = gf_sqr(bch, syn[j]);
436 }
437 
438 static void gf_poly_copy(struct gf_poly *dst, struct gf_poly *src)
439 {
440 	memcpy(dst, src, GF_POLY_SZ(src->deg));
441 }
442 
443 static int compute_error_locator_polynomial(struct bch_control *bch,
444 					    const unsigned int *syn)
445 {
446 	const unsigned int t = GF_T(bch);
447 	const unsigned int n = GF_N(bch);
448 	unsigned int i, j, tmp, l, pd = 1, d = syn[0];
449 	struct gf_poly *elp = bch->elp;
450 	struct gf_poly *pelp = bch->poly_2t[0];
451 	struct gf_poly *elp_copy = bch->poly_2t[1];
452 	int k, pp = -1;
453 
454 	memset(pelp, 0, GF_POLY_SZ(2*t));
455 	memset(elp, 0, GF_POLY_SZ(2*t));
456 
457 	pelp->deg = 0;
458 	pelp->c[0] = 1;
459 	elp->deg = 0;
460 	elp->c[0] = 1;
461 
462 	/* use simplified binary Berlekamp-Massey algorithm */
463 	for (i = 0; (i < t) && (elp->deg <= t); i++) {
464 		if (d) {
465 			k = 2*i-pp;
466 			gf_poly_copy(elp_copy, elp);
467 			/* e[i+1](X) = e[i](X)+di*dp^-1*X^2(i-p)*e[p](X) */
468 			tmp = a_log(bch, d)+n-a_log(bch, pd);
469 			for (j = 0; j <= pelp->deg; j++) {
470 				if (pelp->c[j]) {
471 					l = a_log(bch, pelp->c[j]);
472 					elp->c[j+k] ^= a_pow(bch, tmp+l);
473 				}
474 			}
475 			/* compute l[i+1] = max(l[i]->c[l[p]+2*(i-p]) */
476 			tmp = pelp->deg+k;
477 			if (tmp > elp->deg) {
478 				elp->deg = tmp;
479 				gf_poly_copy(pelp, elp_copy);
480 				pd = d;
481 				pp = 2*i;
482 			}
483 		}
484 		/* di+1 = S(2i+3)+elp[i+1].1*S(2i+2)+...+elp[i+1].lS(2i+3-l) */
485 		if (i < t-1) {
486 			d = syn[2*i+2];
487 			for (j = 1; j <= elp->deg; j++)
488 				d ^= gf_mul(bch, elp->c[j], syn[2*i+2-j]);
489 		}
490 	}
491 	dbg("elp=%s\n", gf_poly_str(elp));
492 	return (elp->deg > t) ? -1 : (int)elp->deg;
493 }
494 
495 /*
496  * solve a m x m linear system in GF(2) with an expected number of solutions,
497  * and return the number of found solutions
498  */
499 static int solve_linear_system(struct bch_control *bch, unsigned int *rows,
500 			       unsigned int *sol, int nsol)
501 {
502 	const int m = GF_M(bch);
503 	unsigned int tmp, mask;
504 	int rem, c, r, p, k, param[BCH_MAX_M];
505 
506 	k = 0;
507 	mask = 1 << m;
508 
509 	/* Gaussian elimination */
510 	for (c = 0; c < m; c++) {
511 		rem = 0;
512 		p = c-k;
513 		/* find suitable row for elimination */
514 		for (r = p; r < m; r++) {
515 			if (rows[r] & mask) {
516 				if (r != p) {
517 					tmp = rows[r];
518 					rows[r] = rows[p];
519 					rows[p] = tmp;
520 				}
521 				rem = r+1;
522 				break;
523 			}
524 		}
525 		if (rem) {
526 			/* perform elimination on remaining rows */
527 			tmp = rows[p];
528 			for (r = rem; r < m; r++) {
529 				if (rows[r] & mask)
530 					rows[r] ^= tmp;
531 			}
532 		} else {
533 			/* elimination not needed, store defective row index */
534 			param[k++] = c;
535 		}
536 		mask >>= 1;
537 	}
538 	/* rewrite system, inserting fake parameter rows */
539 	if (k > 0) {
540 		p = k;
541 		for (r = m-1; r >= 0; r--) {
542 			if ((r > m-1-k) && rows[r])
543 				/* system has no solution */
544 				return 0;
545 
546 			rows[r] = (p && (r == param[p-1])) ?
547 				p--, 1u << (m-r) : rows[r-p];
548 		}
549 	}
550 
551 	if (nsol != (1 << k))
552 		/* unexpected number of solutions */
553 		return 0;
554 
555 	for (p = 0; p < nsol; p++) {
556 		/* set parameters for p-th solution */
557 		for (c = 0; c < k; c++)
558 			rows[param[c]] = (rows[param[c]] & ~1)|((p >> c) & 1);
559 
560 		/* compute unique solution */
561 		tmp = 0;
562 		for (r = m-1; r >= 0; r--) {
563 			mask = rows[r] & (tmp|1);
564 			tmp |= parity(mask) << (m-r);
565 		}
566 		sol[p] = tmp >> 1;
567 	}
568 	return nsol;
569 }
570 
571 /*
572  * this function builds and solves a linear system for finding roots of a degree
573  * 4 affine monic polynomial X^4+aX^2+bX+c over GF(2^m).
574  */
575 static int find_affine4_roots(struct bch_control *bch, unsigned int a,
576 			      unsigned int b, unsigned int c,
577 			      unsigned int *roots)
578 {
579 	int i, j, k;
580 	const int m = GF_M(bch);
581 	unsigned int mask = 0xff, t, rows[16] = {0,};
582 
583 	j = a_log(bch, b);
584 	k = a_log(bch, a);
585 	rows[0] = c;
586 
587 	/* build linear system to solve X^4+aX^2+bX+c = 0 */
588 	for (i = 0; i < m; i++) {
589 		rows[i+1] = bch->a_pow_tab[4*i]^
590 			(a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
591 			(b ? bch->a_pow_tab[mod_s(bch, j)] : 0);
592 		j++;
593 		k += 2;
594 	}
595 	/*
596 	 * transpose 16x16 matrix before passing it to linear solver
597 	 * warning: this code assumes m < 16
598 	 */
599 	for (j = 8; j != 0; j >>= 1, mask ^= (mask << j)) {
600 		for (k = 0; k < 16; k = (k+j+1) & ~j) {
601 			t = ((rows[k] >> j)^rows[k+j]) & mask;
602 			rows[k] ^= (t << j);
603 			rows[k+j] ^= t;
604 		}
605 	}
606 	return solve_linear_system(bch, rows, roots, 4);
607 }
608 
609 /*
610  * compute root r of a degree 1 polynomial over GF(2^m) (returned as log(1/r))
611  */
612 static int find_poly_deg1_roots(struct bch_control *bch, struct gf_poly *poly,
613 				unsigned int *roots)
614 {
615 	int n = 0;
616 
617 	if (poly->c[0])
618 		/* poly[X] = bX+c with c!=0, root=c/b */
619 		roots[n++] = mod_s(bch, GF_N(bch)-bch->a_log_tab[poly->c[0]]+
620 				   bch->a_log_tab[poly->c[1]]);
621 	return n;
622 }
623 
624 /*
625  * compute roots of a degree 2 polynomial over GF(2^m)
626  */
627 static int find_poly_deg2_roots(struct bch_control *bch, struct gf_poly *poly,
628 				unsigned int *roots)
629 {
630 	int n = 0, i, l0, l1, l2;
631 	unsigned int u, v, r;
632 
633 	if (poly->c[0] && poly->c[1]) {
634 
635 		l0 = bch->a_log_tab[poly->c[0]];
636 		l1 = bch->a_log_tab[poly->c[1]];
637 		l2 = bch->a_log_tab[poly->c[2]];
638 
639 		/* using z=a/bX, transform aX^2+bX+c into z^2+z+u (u=ac/b^2) */
640 		u = a_pow(bch, l0+l2+2*(GF_N(bch)-l1));
641 		/*
642 		 * let u = sum(li.a^i) i=0..m-1; then compute r = sum(li.xi):
643 		 * r^2+r = sum(li.(xi^2+xi)) = sum(li.(a^i+Tr(a^i).a^k)) =
644 		 * u + sum(li.Tr(a^i).a^k) = u+a^k.Tr(sum(li.a^i)) = u+a^k.Tr(u)
645 		 * i.e. r and r+1 are roots iff Tr(u)=0
646 		 */
647 		r = 0;
648 		v = u;
649 		while (v) {
650 			i = deg(v);
651 			r ^= bch->xi_tab[i];
652 			v ^= (1 << i);
653 		}
654 		/* verify root */
655 		if ((gf_sqr(bch, r)^r) == u) {
656 			/* reverse z=a/bX transformation and compute log(1/r) */
657 			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
658 					    bch->a_log_tab[r]+l2);
659 			roots[n++] = modulo(bch, 2*GF_N(bch)-l1-
660 					    bch->a_log_tab[r^1]+l2);
661 		}
662 	}
663 	return n;
664 }
665 
666 /*
667  * compute roots of a degree 3 polynomial over GF(2^m)
668  */
669 static int find_poly_deg3_roots(struct bch_control *bch, struct gf_poly *poly,
670 				unsigned int *roots)
671 {
672 	int i, n = 0;
673 	unsigned int a, b, c, a2, b2, c2, e3, tmp[4];
674 
675 	if (poly->c[0]) {
676 		/* transform polynomial into monic X^3 + a2X^2 + b2X + c2 */
677 		e3 = poly->c[3];
678 		c2 = gf_div(bch, poly->c[0], e3);
679 		b2 = gf_div(bch, poly->c[1], e3);
680 		a2 = gf_div(bch, poly->c[2], e3);
681 
682 		/* (X+a2)(X^3+a2X^2+b2X+c2) = X^4+aX^2+bX+c (affine) */
683 		c = gf_mul(bch, a2, c2);           /* c = a2c2      */
684 		b = gf_mul(bch, a2, b2)^c2;        /* b = a2b2 + c2 */
685 		a = gf_sqr(bch, a2)^b2;            /* a = a2^2 + b2 */
686 
687 		/* find the 4 roots of this affine polynomial */
688 		if (find_affine4_roots(bch, a, b, c, tmp) == 4) {
689 			/* remove a2 from final list of roots */
690 			for (i = 0; i < 4; i++) {
691 				if (tmp[i] != a2)
692 					roots[n++] = a_ilog(bch, tmp[i]);
693 			}
694 		}
695 	}
696 	return n;
697 }
698 
699 /*
700  * compute roots of a degree 4 polynomial over GF(2^m)
701  */
702 static int find_poly_deg4_roots(struct bch_control *bch, struct gf_poly *poly,
703 				unsigned int *roots)
704 {
705 	int i, l, n = 0;
706 	unsigned int a, b, c, d, e = 0, f, a2, b2, c2, e4;
707 
708 	if (poly->c[0] == 0)
709 		return 0;
710 
711 	/* transform polynomial into monic X^4 + aX^3 + bX^2 + cX + d */
712 	e4 = poly->c[4];
713 	d = gf_div(bch, poly->c[0], e4);
714 	c = gf_div(bch, poly->c[1], e4);
715 	b = gf_div(bch, poly->c[2], e4);
716 	a = gf_div(bch, poly->c[3], e4);
717 
718 	/* use Y=1/X transformation to get an affine polynomial */
719 	if (a) {
720 		/* first, eliminate cX by using z=X+e with ae^2+c=0 */
721 		if (c) {
722 			/* compute e such that e^2 = c/a */
723 			f = gf_div(bch, c, a);
724 			l = a_log(bch, f);
725 			l += (l & 1) ? GF_N(bch) : 0;
726 			e = a_pow(bch, l/2);
727 			/*
728 			 * use transformation z=X+e:
729 			 * z^4+e^4 + a(z^3+ez^2+e^2z+e^3) + b(z^2+e^2) +cz+ce+d
730 			 * z^4 + az^3 + (ae+b)z^2 + (ae^2+c)z+e^4+be^2+ae^3+ce+d
731 			 * z^4 + az^3 + (ae+b)z^2 + e^4+be^2+d
732 			 * z^4 + az^3 +     b'z^2 + d'
733 			 */
734 			d = a_pow(bch, 2*l)^gf_mul(bch, b, f)^d;
735 			b = gf_mul(bch, a, e)^b;
736 		}
737 		/* now, use Y=1/X to get Y^4 + b/dY^2 + a/dY + 1/d */
738 		if (d == 0)
739 			/* assume all roots have multiplicity 1 */
740 			return 0;
741 
742 		c2 = gf_inv(bch, d);
743 		b2 = gf_div(bch, a, d);
744 		a2 = gf_div(bch, b, d);
745 	} else {
746 		/* polynomial is already affine */
747 		c2 = d;
748 		b2 = c;
749 		a2 = b;
750 	}
751 	/* find the 4 roots of this affine polynomial */
752 	if (find_affine4_roots(bch, a2, b2, c2, roots) == 4) {
753 		for (i = 0; i < 4; i++) {
754 			/* post-process roots (reverse transformations) */
755 			f = a ? gf_inv(bch, roots[i]) : roots[i];
756 			roots[i] = a_ilog(bch, f^e);
757 		}
758 		n = 4;
759 	}
760 	return n;
761 }
762 
763 /*
764  * build monic, log-based representation of a polynomial
765  */
766 static void gf_poly_logrep(struct bch_control *bch,
767 			   const struct gf_poly *a, int *rep)
768 {
769 	int i, d = a->deg, l = GF_N(bch)-a_log(bch, a->c[a->deg]);
770 
771 	/* represent 0 values with -1; warning, rep[d] is not set to 1 */
772 	for (i = 0; i < d; i++)
773 		rep[i] = a->c[i] ? mod_s(bch, a_log(bch, a->c[i])+l) : -1;
774 }
775 
776 /*
777  * compute polynomial Euclidean division remainder in GF(2^m)[X]
778  */
779 static void gf_poly_mod(struct bch_control *bch, struct gf_poly *a,
780 			const struct gf_poly *b, int *rep)
781 {
782 	int la, p, m;
783 	unsigned int i, j, *c = a->c;
784 	const unsigned int d = b->deg;
785 
786 	if (a->deg < d)
787 		return;
788 
789 	/* reuse or compute log representation of denominator */
790 	if (!rep) {
791 		rep = bch->cache;
792 		gf_poly_logrep(bch, b, rep);
793 	}
794 
795 	for (j = a->deg; j >= d; j--) {
796 		if (c[j]) {
797 			la = a_log(bch, c[j]);
798 			p = j-d;
799 			for (i = 0; i < d; i++, p++) {
800 				m = rep[i];
801 				if (m >= 0)
802 					c[p] ^= bch->a_pow_tab[mod_s(bch,
803 								     m+la)];
804 			}
805 		}
806 	}
807 	a->deg = d-1;
808 	while (!c[a->deg] && a->deg)
809 		a->deg--;
810 }
811 
812 /*
813  * compute polynomial Euclidean division quotient in GF(2^m)[X]
814  */
815 static void gf_poly_div(struct bch_control *bch, struct gf_poly *a,
816 			const struct gf_poly *b, struct gf_poly *q)
817 {
818 	if (a->deg >= b->deg) {
819 		q->deg = a->deg-b->deg;
820 		/* compute a mod b (modifies a) */
821 		gf_poly_mod(bch, a, b, NULL);
822 		/* quotient is stored in upper part of polynomial a */
823 		memcpy(q->c, &a->c[b->deg], (1+q->deg)*sizeof(unsigned int));
824 	} else {
825 		q->deg = 0;
826 		q->c[0] = 0;
827 	}
828 }
829 
830 /*
831  * compute polynomial GCD (Greatest Common Divisor) in GF(2^m)[X]
832  */
833 static struct gf_poly *gf_poly_gcd(struct bch_control *bch, struct gf_poly *a,
834 				   struct gf_poly *b)
835 {
836 	struct gf_poly *tmp;
837 
838 	dbg("gcd(%s,%s)=", gf_poly_str(a), gf_poly_str(b));
839 
840 	if (a->deg < b->deg) {
841 		tmp = b;
842 		b = a;
843 		a = tmp;
844 	}
845 
846 	while (b->deg > 0) {
847 		gf_poly_mod(bch, a, b, NULL);
848 		tmp = b;
849 		b = a;
850 		a = tmp;
851 	}
852 
853 	dbg("%s\n", gf_poly_str(a));
854 
855 	return a;
856 }
857 
858 /*
859  * Given a polynomial f and an integer k, compute Tr(a^kX) mod f
860  * This is used in Berlekamp Trace algorithm for splitting polynomials
861  */
862 static void compute_trace_bk_mod(struct bch_control *bch, int k,
863 				 const struct gf_poly *f, struct gf_poly *z,
864 				 struct gf_poly *out)
865 {
866 	const int m = GF_M(bch);
867 	int i, j;
868 
869 	/* z contains z^2j mod f */
870 	z->deg = 1;
871 	z->c[0] = 0;
872 	z->c[1] = bch->a_pow_tab[k];
873 
874 	out->deg = 0;
875 	memset(out, 0, GF_POLY_SZ(f->deg));
876 
877 	/* compute f log representation only once */
878 	gf_poly_logrep(bch, f, bch->cache);
879 
880 	for (i = 0; i < m; i++) {
881 		/* add a^(k*2^i)(z^(2^i) mod f) and compute (z^(2^i) mod f)^2 */
882 		for (j = z->deg; j >= 0; j--) {
883 			out->c[j] ^= z->c[j];
884 			z->c[2*j] = gf_sqr(bch, z->c[j]);
885 			z->c[2*j+1] = 0;
886 		}
887 		if (z->deg > out->deg)
888 			out->deg = z->deg;
889 
890 		if (i < m-1) {
891 			z->deg *= 2;
892 			/* z^(2(i+1)) mod f = (z^(2^i) mod f)^2 mod f */
893 			gf_poly_mod(bch, z, f, bch->cache);
894 		}
895 	}
896 	while (!out->c[out->deg] && out->deg)
897 		out->deg--;
898 
899 	dbg("Tr(a^%d.X) mod f = %s\n", k, gf_poly_str(out));
900 }
901 
902 /*
903  * factor a polynomial using Berlekamp Trace algorithm (BTA)
904  */
905 static void factor_polynomial(struct bch_control *bch, int k, struct gf_poly *f,
906 			      struct gf_poly **g, struct gf_poly **h)
907 {
908 	struct gf_poly *f2 = bch->poly_2t[0];
909 	struct gf_poly *q  = bch->poly_2t[1];
910 	struct gf_poly *tk = bch->poly_2t[2];
911 	struct gf_poly *z  = bch->poly_2t[3];
912 	struct gf_poly *gcd;
913 
914 	dbg("factoring %s...\n", gf_poly_str(f));
915 
916 	*g = f;
917 	*h = NULL;
918 
919 	/* tk = Tr(a^k.X) mod f */
920 	compute_trace_bk_mod(bch, k, f, z, tk);
921 
922 	if (tk->deg > 0) {
923 		/* compute g = gcd(f, tk) (destructive operation) */
924 		gf_poly_copy(f2, f);
925 		gcd = gf_poly_gcd(bch, f2, tk);
926 		if (gcd->deg < f->deg) {
927 			/* compute h=f/gcd(f,tk); this will modify f and q */
928 			gf_poly_div(bch, f, gcd, q);
929 			/* store g and h in-place (clobbering f) */
930 			*h = &((struct gf_poly_deg1 *)f)[gcd->deg].poly;
931 			gf_poly_copy(*g, gcd);
932 			gf_poly_copy(*h, q);
933 		}
934 	}
935 }
936 
937 /*
938  * find roots of a polynomial, using BTZ algorithm; see the beginning of this
939  * file for details
940  */
941 static int find_poly_roots(struct bch_control *bch, unsigned int k,
942 			   struct gf_poly *poly, unsigned int *roots)
943 {
944 	int cnt;
945 	struct gf_poly *f1, *f2;
946 
947 	switch (poly->deg) {
948 		/* handle low degree polynomials with ad hoc techniques */
949 	case 1:
950 		cnt = find_poly_deg1_roots(bch, poly, roots);
951 		break;
952 	case 2:
953 		cnt = find_poly_deg2_roots(bch, poly, roots);
954 		break;
955 	case 3:
956 		cnt = find_poly_deg3_roots(bch, poly, roots);
957 		break;
958 	case 4:
959 		cnt = find_poly_deg4_roots(bch, poly, roots);
960 		break;
961 	default:
962 		/* factor polynomial using Berlekamp Trace Algorithm (BTA) */
963 		cnt = 0;
964 		if (poly->deg && (k <= GF_M(bch))) {
965 			factor_polynomial(bch, k, poly, &f1, &f2);
966 			if (f1)
967 				cnt += find_poly_roots(bch, k+1, f1, roots);
968 			if (f2)
969 				cnt += find_poly_roots(bch, k+1, f2, roots+cnt);
970 		}
971 		break;
972 	}
973 	return cnt;
974 }
975 
976 #if defined(USE_CHIEN_SEARCH)
977 /*
978  * exhaustive root search (Chien) implementation - not used, included only for
979  * reference/comparison tests
980  */
981 static int chien_search(struct bch_control *bch, unsigned int len,
982 			struct gf_poly *p, unsigned int *roots)
983 {
984 	int m;
985 	unsigned int i, j, syn, syn0, count = 0;
986 	const unsigned int k = 8*len+bch->ecc_bits;
987 
988 	/* use a log-based representation of polynomial */
989 	gf_poly_logrep(bch, p, bch->cache);
990 	bch->cache[p->deg] = 0;
991 	syn0 = gf_div(bch, p->c[0], p->c[p->deg]);
992 
993 	for (i = GF_N(bch)-k+1; i <= GF_N(bch); i++) {
994 		/* compute elp(a^i) */
995 		for (j = 1, syn = syn0; j <= p->deg; j++) {
996 			m = bch->cache[j];
997 			if (m >= 0)
998 				syn ^= a_pow(bch, m+j*i);
999 		}
1000 		if (syn == 0) {
1001 			roots[count++] = GF_N(bch)-i;
1002 			if (count == p->deg)
1003 				break;
1004 		}
1005 	}
1006 	return (count == p->deg) ? count : 0;
1007 }
1008 #define find_poly_roots(_p, _k, _elp, _loc) chien_search(_p, len, _elp, _loc)
1009 #endif /* USE_CHIEN_SEARCH */
1010 
1011 /**
1012  * bch_decode - decode received codeword and find bit error locations
1013  * @bch:      BCH control structure
1014  * @data:     received data, ignored if @calc_ecc is provided
1015  * @len:      data length in bytes, must always be provided
1016  * @recv_ecc: received ecc, if NULL then assume it was XORed in @calc_ecc
1017  * @calc_ecc: calculated ecc, if NULL then calc_ecc is computed from @data
1018  * @syn:      hw computed syndrome data (if NULL, syndrome is calculated)
1019  * @errloc:   output array of error locations
1020  *
1021  * Returns:
1022  *  The number of errors found, or -EBADMSG if decoding failed, or -EINVAL if
1023  *  invalid parameters were provided
1024  *
1025  * Depending on the available hw BCH support and the need to compute @calc_ecc
1026  * separately (using bch_encode()), this function should be called with one of
1027  * the following parameter configurations -
1028  *
1029  * by providing @data and @recv_ecc only:
1030  *   bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc)
1031  *
1032  * by providing @recv_ecc and @calc_ecc:
1033  *   bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc)
1034  *
1035  * by providing ecc = recv_ecc XOR calc_ecc:
1036  *   bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc)
1037  *
1038  * by providing syndrome results @syn:
1039  *   bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc)
1040  *
1041  * Once bch_decode() has successfully returned with a positive value, error
1042  * locations returned in array @errloc should be interpreted as follows -
1043  *
1044  * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for
1045  * data correction)
1046  *
1047  * if (errloc[n] < 8*len), then n-th error is located in data and can be
1048  * corrected with statement data[errloc[n]/8] ^= 1 << (errloc[n] % 8);
1049  *
1050  * Note that this function does not perform any data correction by itself, it
1051  * merely indicates error locations.
1052  */
1053 int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len,
1054 	       const uint8_t *recv_ecc, const uint8_t *calc_ecc,
1055 	       const unsigned int *syn, unsigned int *errloc)
1056 {
1057 	const unsigned int ecc_words = BCH_ECC_WORDS(bch);
1058 	unsigned int nbits;
1059 	int i, err, nroots;
1060 	uint32_t sum;
1061 
1062 	/* sanity check: make sure data length can be handled */
1063 	if (8*len > (bch->n-bch->ecc_bits))
1064 		return -EINVAL;
1065 
1066 	/* if caller does not provide syndromes, compute them */
1067 	if (!syn) {
1068 		if (!calc_ecc) {
1069 			/* compute received data ecc into an internal buffer */
1070 			if (!data || !recv_ecc)
1071 				return -EINVAL;
1072 			bch_encode(bch, data, len, NULL);
1073 		} else {
1074 			/* load provided calculated ecc */
1075 			load_ecc8(bch, bch->ecc_buf, calc_ecc);
1076 		}
1077 		/* load received ecc or assume it was XORed in calc_ecc */
1078 		if (recv_ecc) {
1079 			load_ecc8(bch, bch->ecc_buf2, recv_ecc);
1080 			/* XOR received and calculated ecc */
1081 			for (i = 0, sum = 0; i < (int)ecc_words; i++) {
1082 				bch->ecc_buf[i] ^= bch->ecc_buf2[i];
1083 				sum |= bch->ecc_buf[i];
1084 			}
1085 			if (!sum)
1086 				/* no error found */
1087 				return 0;
1088 		}
1089 		compute_syndromes(bch, bch->ecc_buf, bch->syn);
1090 		syn = bch->syn;
1091 	}
1092 
1093 	err = compute_error_locator_polynomial(bch, syn);
1094 	if (err > 0) {
1095 		nroots = find_poly_roots(bch, 1, bch->elp, errloc);
1096 		if (err != nroots)
1097 			err = -1;
1098 	}
1099 	if (err > 0) {
1100 		/* post-process raw error locations for easier correction */
1101 		nbits = (len*8)+bch->ecc_bits;
1102 		for (i = 0; i < err; i++) {
1103 			if (errloc[i] >= nbits) {
1104 				err = -1;
1105 				break;
1106 			}
1107 			errloc[i] = nbits-1-errloc[i];
1108 			if (!bch->swap_bits)
1109 				errloc[i] = (errloc[i] & ~7) |
1110 					    (7-(errloc[i] & 7));
1111 		}
1112 	}
1113 	return (err >= 0) ? err : -EBADMSG;
1114 }
1115 EXPORT_SYMBOL_GPL(bch_decode);
1116 
1117 /*
1118  * generate Galois field lookup tables
1119  */
1120 static int build_gf_tables(struct bch_control *bch, unsigned int poly)
1121 {
1122 	unsigned int i, x = 1;
1123 	const unsigned int k = 1 << deg(poly);
1124 
1125 	/* primitive polynomial must be of degree m */
1126 	if (k != (1u << GF_M(bch)))
1127 		return -1;
1128 
1129 	for (i = 0; i < GF_N(bch); i++) {
1130 		bch->a_pow_tab[i] = x;
1131 		bch->a_log_tab[x] = i;
1132 		if (i && (x == 1))
1133 			/* polynomial is not primitive (a^i=1 with 0<i<2^m-1) */
1134 			return -1;
1135 		x <<= 1;
1136 		if (x & k)
1137 			x ^= poly;
1138 	}
1139 	bch->a_pow_tab[GF_N(bch)] = 1;
1140 	bch->a_log_tab[0] = 0;
1141 
1142 	return 0;
1143 }
1144 
1145 /*
1146  * compute generator polynomial remainder tables for fast encoding
1147  */
1148 static void build_mod8_tables(struct bch_control *bch, const uint32_t *g)
1149 {
1150 	int i, j, b, d;
1151 	uint32_t data, hi, lo, *tab;
1152 	const int l = BCH_ECC_WORDS(bch);
1153 	const int plen = DIV_ROUND_UP(bch->ecc_bits+1, 32);
1154 	const int ecclen = DIV_ROUND_UP(bch->ecc_bits, 32);
1155 
1156 	memset(bch->mod8_tab, 0, 4*256*l*sizeof(*bch->mod8_tab));
1157 
1158 	for (i = 0; i < 256; i++) {
1159 		/* p(X)=i is a small polynomial of weight <= 8 */
1160 		for (b = 0; b < 4; b++) {
1161 			/* we want to compute (p(X).X^(8*b+deg(g))) mod g(X) */
1162 			tab = bch->mod8_tab + (b*256+i)*l;
1163 			data = i << (8*b);
1164 			while (data) {
1165 				d = deg(data);
1166 				/* subtract X^d.g(X) from p(X).X^(8*b+deg(g)) */
1167 				data ^= g[0] >> (31-d);
1168 				for (j = 0; j < ecclen; j++) {
1169 					hi = (d < 31) ? g[j] << (d+1) : 0;
1170 					lo = (j+1 < plen) ?
1171 						g[j+1] >> (31-d) : 0;
1172 					tab[j] ^= hi|lo;
1173 				}
1174 			}
1175 		}
1176 	}
1177 }
1178 
1179 /*
1180  * build a base for factoring degree 2 polynomials
1181  */
1182 static int build_deg2_base(struct bch_control *bch)
1183 {
1184 	const int m = GF_M(bch);
1185 	int i, j, r;
1186 	unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M];
1187 
1188 	/* find k s.t. Tr(a^k) = 1 and 0 <= k < m */
1189 	for (i = 0; i < m; i++) {
1190 		for (j = 0, sum = 0; j < m; j++)
1191 			sum ^= a_pow(bch, i*(1 << j));
1192 
1193 		if (sum) {
1194 			ak = bch->a_pow_tab[i];
1195 			break;
1196 		}
1197 	}
1198 	/* find xi, i=0..m-1 such that xi^2+xi = a^i+Tr(a^i).a^k */
1199 	remaining = m;
1200 	memset(xi, 0, sizeof(xi));
1201 
1202 	for (x = 0; (x <= GF_N(bch)) && remaining; x++) {
1203 		y = gf_sqr(bch, x)^x;
1204 		for (i = 0; i < 2; i++) {
1205 			r = a_log(bch, y);
1206 			if (y && (r < m) && !xi[r]) {
1207 				bch->xi_tab[r] = x;
1208 				xi[r] = 1;
1209 				remaining--;
1210 				dbg("x%d = %x\n", r, x);
1211 				break;
1212 			}
1213 			y ^= ak;
1214 		}
1215 	}
1216 	/* should not happen but check anyway */
1217 	return remaining ? -1 : 0;
1218 }
1219 
1220 static void *bch_alloc(size_t size, int *err)
1221 {
1222 	void *ptr;
1223 
1224 	ptr = kmalloc(size, GFP_KERNEL);
1225 	if (ptr == NULL)
1226 		*err = 1;
1227 	return ptr;
1228 }
1229 
1230 /*
1231  * compute generator polynomial for given (m,t) parameters.
1232  */
1233 static uint32_t *compute_generator_polynomial(struct bch_control *bch)
1234 {
1235 	const unsigned int m = GF_M(bch);
1236 	const unsigned int t = GF_T(bch);
1237 	int n, err = 0;
1238 	unsigned int i, j, nbits, r, word, *roots;
1239 	struct gf_poly *g;
1240 	uint32_t *genpoly;
1241 
1242 	g = bch_alloc(GF_POLY_SZ(m*t), &err);
1243 	roots = bch_alloc((bch->n+1)*sizeof(*roots), &err);
1244 	genpoly = bch_alloc(DIV_ROUND_UP(m*t+1, 32)*sizeof(*genpoly), &err);
1245 
1246 	if (err) {
1247 		kfree(genpoly);
1248 		genpoly = NULL;
1249 		goto finish;
1250 	}
1251 
1252 	/* enumerate all roots of g(X) */
1253 	memset(roots , 0, (bch->n+1)*sizeof(*roots));
1254 	for (i = 0; i < t; i++) {
1255 		for (j = 0, r = 2*i+1; j < m; j++) {
1256 			roots[r] = 1;
1257 			r = mod_s(bch, 2*r);
1258 		}
1259 	}
1260 	/* build generator polynomial g(X) */
1261 	g->deg = 0;
1262 	g->c[0] = 1;
1263 	for (i = 0; i < GF_N(bch); i++) {
1264 		if (roots[i]) {
1265 			/* multiply g(X) by (X+root) */
1266 			r = bch->a_pow_tab[i];
1267 			g->c[g->deg+1] = 1;
1268 			for (j = g->deg; j > 0; j--)
1269 				g->c[j] = gf_mul(bch, g->c[j], r)^g->c[j-1];
1270 
1271 			g->c[0] = gf_mul(bch, g->c[0], r);
1272 			g->deg++;
1273 		}
1274 	}
1275 	/* store left-justified binary representation of g(X) */
1276 	n = g->deg+1;
1277 	i = 0;
1278 
1279 	while (n > 0) {
1280 		nbits = (n > 32) ? 32 : n;
1281 		for (j = 0, word = 0; j < nbits; j++) {
1282 			if (g->c[n-1-j])
1283 				word |= 1u << (31-j);
1284 		}
1285 		genpoly[i++] = word;
1286 		n -= nbits;
1287 	}
1288 	bch->ecc_bits = g->deg;
1289 
1290 finish:
1291 	kfree(g);
1292 	kfree(roots);
1293 
1294 	return genpoly;
1295 }
1296 
1297 /**
1298  * bch_init - initialize a BCH encoder/decoder
1299  * @m:          Galois field order, should be in the range 5-15
1300  * @t:          maximum error correction capability, in bits
1301  * @prim_poly:  user-provided primitive polynomial (or 0 to use default)
1302  * @swap_bits:  swap bits within data and syndrome bytes
1303  *
1304  * Returns:
1305  *  a newly allocated BCH control structure if successful, NULL otherwise
1306  *
1307  * This initialization can take some time, as lookup tables are built for fast
1308  * encoding/decoding; make sure not to call this function from a time critical
1309  * path. Usually, bch_init() should be called on module/driver init and
1310  * bch_free() should be called to release memory on exit.
1311  *
1312  * You may provide your own primitive polynomial of degree @m in argument
1313  * @prim_poly, or let bch_init() use its default polynomial.
1314  *
1315  * Once bch_init() has successfully returned a pointer to a newly allocated
1316  * BCH control structure, ecc length in bytes is given by member @ecc_bytes of
1317  * the structure.
1318  */
1319 struct bch_control *bch_init(int m, int t, unsigned int prim_poly,
1320 			     bool swap_bits)
1321 {
1322 	int err = 0;
1323 	unsigned int i, words;
1324 	uint32_t *genpoly;
1325 	struct bch_control *bch = NULL;
1326 
1327 	const int min_m = 5;
1328 
1329 	/* default primitive polynomials */
1330 	static const unsigned int prim_poly_tab[] = {
1331 		0x25, 0x43, 0x83, 0x11d, 0x211, 0x409, 0x805, 0x1053, 0x201b,
1332 		0x402b, 0x8003,
1333 	};
1334 
1335 #if defined(CONFIG_BCH_CONST_PARAMS)
1336 	if ((m != (CONFIG_BCH_CONST_M)) || (t != (CONFIG_BCH_CONST_T))) {
1337 		printk(KERN_ERR "bch encoder/decoder was configured to support "
1338 		       "parameters m=%d, t=%d only!\n",
1339 		       CONFIG_BCH_CONST_M, CONFIG_BCH_CONST_T);
1340 		goto fail;
1341 	}
1342 #endif
1343 	if ((m < min_m) || (m > BCH_MAX_M))
1344 		/*
1345 		 * values of m greater than 15 are not currently supported;
1346 		 * supporting m > 15 would require changing table base type
1347 		 * (uint16_t) and a small patch in matrix transposition
1348 		 */
1349 		goto fail;
1350 
1351 	if (t > BCH_MAX_T)
1352 		/*
1353 		 * we can support larger than 64 bits if necessary, at the
1354 		 * cost of higher stack usage.
1355 		 */
1356 		goto fail;
1357 
1358 	/* sanity checks */
1359 	if ((t < 1) || (m*t >= ((1 << m)-1)))
1360 		/* invalid t value */
1361 		goto fail;
1362 
1363 	/* select a primitive polynomial for generating GF(2^m) */
1364 	if (prim_poly == 0)
1365 		prim_poly = prim_poly_tab[m-min_m];
1366 
1367 	bch = kzalloc(sizeof(*bch), GFP_KERNEL);
1368 	if (bch == NULL)
1369 		goto fail;
1370 
1371 	bch->m = m;
1372 	bch->t = t;
1373 	bch->n = (1 << m)-1;
1374 	words  = DIV_ROUND_UP(m*t, 32);
1375 	bch->ecc_bytes = DIV_ROUND_UP(m*t, 8);
1376 	bch->a_pow_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_pow_tab), &err);
1377 	bch->a_log_tab = bch_alloc((1+bch->n)*sizeof(*bch->a_log_tab), &err);
1378 	bch->mod8_tab  = bch_alloc(words*1024*sizeof(*bch->mod8_tab), &err);
1379 	bch->ecc_buf   = bch_alloc(words*sizeof(*bch->ecc_buf), &err);
1380 	bch->ecc_buf2  = bch_alloc(words*sizeof(*bch->ecc_buf2), &err);
1381 	bch->xi_tab    = bch_alloc(m*sizeof(*bch->xi_tab), &err);
1382 	bch->syn       = bch_alloc(2*t*sizeof(*bch->syn), &err);
1383 	bch->cache     = bch_alloc(2*t*sizeof(*bch->cache), &err);
1384 	bch->elp       = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err);
1385 	bch->swap_bits = swap_bits;
1386 
1387 	for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1388 		bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err);
1389 
1390 	if (err)
1391 		goto fail;
1392 
1393 	err = build_gf_tables(bch, prim_poly);
1394 	if (err)
1395 		goto fail;
1396 
1397 	/* use generator polynomial for computing encoding tables */
1398 	genpoly = compute_generator_polynomial(bch);
1399 	if (genpoly == NULL)
1400 		goto fail;
1401 
1402 	build_mod8_tables(bch, genpoly);
1403 	kfree(genpoly);
1404 
1405 	err = build_deg2_base(bch);
1406 	if (err)
1407 		goto fail;
1408 
1409 	return bch;
1410 
1411 fail:
1412 	bch_free(bch);
1413 	return NULL;
1414 }
1415 EXPORT_SYMBOL_GPL(bch_init);
1416 
1417 /**
1418  *  bch_free - free the BCH control structure
1419  *  @bch:    BCH control structure to release
1420  */
1421 void bch_free(struct bch_control *bch)
1422 {
1423 	unsigned int i;
1424 
1425 	if (bch) {
1426 		kfree(bch->a_pow_tab);
1427 		kfree(bch->a_log_tab);
1428 		kfree(bch->mod8_tab);
1429 		kfree(bch->ecc_buf);
1430 		kfree(bch->ecc_buf2);
1431 		kfree(bch->xi_tab);
1432 		kfree(bch->syn);
1433 		kfree(bch->cache);
1434 		kfree(bch->elp);
1435 
1436 		for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++)
1437 			kfree(bch->poly_2t[i]);
1438 
1439 		kfree(bch);
1440 	}
1441 }
1442 EXPORT_SYMBOL_GPL(bch_free);
1443 
1444 MODULE_LICENSE("GPL");
1445 MODULE_AUTHOR("Ivan Djelic <ivan.djelic@parrot.com>");
1446 MODULE_DESCRIPTION("Binary BCH encoder/decoder");
1447