xref: /openbmc/linux/drivers/mtd/nand/ecc-sw-hamming.c (revision f7af616c632ee2ac3af0876fe33bf9e0232e665a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * This file contains an ECC algorithm that detects and corrects 1 bit
4  * errors in a 256 byte block of data.
5  *
6  * Copyright © 2008 Koninklijke Philips Electronics NV.
7  *                  Author: Frans Meulenbroeks
8  *
9  * Completely replaces the previous ECC implementation which was written by:
10  *   Steven J. Hill (sjhill@realitydiluted.com)
11  *   Thomas Gleixner (tglx@linutronix.de)
12  *
13  * Information on how this algorithm works and how it was developed
14  * can be found in Documentation/driver-api/mtd/nand_ecc.rst
15  */
16 
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/mtd/nand.h>
21 #include <linux/mtd/nand-ecc-sw-hamming.h>
22 #include <linux/slab.h>
23 #include <asm/byteorder.h>
24 
25 /*
26  * invparity is a 256 byte table that contains the odd parity
27  * for each byte. So if the number of bits in a byte is even,
28  * the array element is 1, and when the number of bits is odd
29  * the array eleemnt is 0.
30  */
31 static const char invparity[256] = {
32 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
33 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
34 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
35 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
36 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
37 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
38 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
39 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
40 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
41 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
42 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
43 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
44 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
45 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
46 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
47 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
48 };
49 
50 /*
51  * bitsperbyte contains the number of bits per byte
52  * this is only used for testing and repairing parity
53  * (a precalculated value slightly improves performance)
54  */
55 static const char bitsperbyte[256] = {
56 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
57 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
58 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
59 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
60 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
61 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
62 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
63 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
64 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
65 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
66 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
67 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
68 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
69 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
70 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
71 	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
72 };
73 
74 /*
75  * addressbits is a lookup table to filter out the bits from the xor-ed
76  * ECC data that identify the faulty location.
77  * this is only used for repairing parity
78  * see the comments in nand_ecc_sw_hamming_correct for more details
79  */
80 static const char addressbits[256] = {
81 	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
82 	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
83 	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
84 	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
85 	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
86 	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
87 	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
88 	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
89 	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
90 	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
91 	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
92 	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
93 	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
94 	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
95 	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
96 	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
97 	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
98 	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
99 	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
100 	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
101 	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
102 	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
103 	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
104 	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
105 	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
106 	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
107 	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
108 	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
109 	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
110 	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
111 	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
112 	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
113 };
114 
115 int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size,
116 			     unsigned char *code, bool sm_order)
117 {
118 	const u32 *bp = (uint32_t *)buf;
119 	const u32 eccsize_mult = (step_size == 256) ? 1 : 2;
120 	/* current value in buffer */
121 	u32 cur;
122 	/* rp0..rp17 are the various accumulated parities (per byte) */
123 	u32 rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7, rp8, rp9, rp10, rp11, rp12,
124 		rp13, rp14, rp15, rp16, rp17;
125 	/* Cumulative parity for all data */
126 	u32 par;
127 	/* Cumulative parity at the end of the loop (rp12, rp14, rp16) */
128 	u32 tmppar;
129 	int i;
130 
131 	par = 0;
132 	rp4 = 0;
133 	rp6 = 0;
134 	rp8 = 0;
135 	rp10 = 0;
136 	rp12 = 0;
137 	rp14 = 0;
138 	rp16 = 0;
139 	rp17 = 0;
140 
141 	/*
142 	 * The loop is unrolled a number of times;
143 	 * This avoids if statements to decide on which rp value to update
144 	 * Also we process the data by longwords.
145 	 * Note: passing unaligned data might give a performance penalty.
146 	 * It is assumed that the buffers are aligned.
147 	 * tmppar is the cumulative sum of this iteration.
148 	 * needed for calculating rp12, rp14, rp16 and par
149 	 * also used as a performance improvement for rp6, rp8 and rp10
150 	 */
151 	for (i = 0; i < eccsize_mult << 2; i++) {
152 		cur = *bp++;
153 		tmppar = cur;
154 		rp4 ^= cur;
155 		cur = *bp++;
156 		tmppar ^= cur;
157 		rp6 ^= tmppar;
158 		cur = *bp++;
159 		tmppar ^= cur;
160 		rp4 ^= cur;
161 		cur = *bp++;
162 		tmppar ^= cur;
163 		rp8 ^= tmppar;
164 
165 		cur = *bp++;
166 		tmppar ^= cur;
167 		rp4 ^= cur;
168 		rp6 ^= cur;
169 		cur = *bp++;
170 		tmppar ^= cur;
171 		rp6 ^= cur;
172 		cur = *bp++;
173 		tmppar ^= cur;
174 		rp4 ^= cur;
175 		cur = *bp++;
176 		tmppar ^= cur;
177 		rp10 ^= tmppar;
178 
179 		cur = *bp++;
180 		tmppar ^= cur;
181 		rp4 ^= cur;
182 		rp6 ^= cur;
183 		rp8 ^= cur;
184 		cur = *bp++;
185 		tmppar ^= cur;
186 		rp6 ^= cur;
187 		rp8 ^= cur;
188 		cur = *bp++;
189 		tmppar ^= cur;
190 		rp4 ^= cur;
191 		rp8 ^= cur;
192 		cur = *bp++;
193 		tmppar ^= cur;
194 		rp8 ^= cur;
195 
196 		cur = *bp++;
197 		tmppar ^= cur;
198 		rp4 ^= cur;
199 		rp6 ^= cur;
200 		cur = *bp++;
201 		tmppar ^= cur;
202 		rp6 ^= cur;
203 		cur = *bp++;
204 		tmppar ^= cur;
205 		rp4 ^= cur;
206 		cur = *bp++;
207 		tmppar ^= cur;
208 
209 		par ^= tmppar;
210 		if ((i & 0x1) == 0)
211 			rp12 ^= tmppar;
212 		if ((i & 0x2) == 0)
213 			rp14 ^= tmppar;
214 		if (eccsize_mult == 2 && (i & 0x4) == 0)
215 			rp16 ^= tmppar;
216 	}
217 
218 	/*
219 	 * handle the fact that we use longword operations
220 	 * we'll bring rp4..rp14..rp16 back to single byte entities by
221 	 * shifting and xoring first fold the upper and lower 16 bits,
222 	 * then the upper and lower 8 bits.
223 	 */
224 	rp4 ^= (rp4 >> 16);
225 	rp4 ^= (rp4 >> 8);
226 	rp4 &= 0xff;
227 	rp6 ^= (rp6 >> 16);
228 	rp6 ^= (rp6 >> 8);
229 	rp6 &= 0xff;
230 	rp8 ^= (rp8 >> 16);
231 	rp8 ^= (rp8 >> 8);
232 	rp8 &= 0xff;
233 	rp10 ^= (rp10 >> 16);
234 	rp10 ^= (rp10 >> 8);
235 	rp10 &= 0xff;
236 	rp12 ^= (rp12 >> 16);
237 	rp12 ^= (rp12 >> 8);
238 	rp12 &= 0xff;
239 	rp14 ^= (rp14 >> 16);
240 	rp14 ^= (rp14 >> 8);
241 	rp14 &= 0xff;
242 	if (eccsize_mult == 2) {
243 		rp16 ^= (rp16 >> 16);
244 		rp16 ^= (rp16 >> 8);
245 		rp16 &= 0xff;
246 	}
247 
248 	/*
249 	 * we also need to calculate the row parity for rp0..rp3
250 	 * This is present in par, because par is now
251 	 * rp3 rp3 rp2 rp2 in little endian and
252 	 * rp2 rp2 rp3 rp3 in big endian
253 	 * as well as
254 	 * rp1 rp0 rp1 rp0 in little endian and
255 	 * rp0 rp1 rp0 rp1 in big endian
256 	 * First calculate rp2 and rp3
257 	 */
258 #ifdef __BIG_ENDIAN
259 	rp2 = (par >> 16);
260 	rp2 ^= (rp2 >> 8);
261 	rp2 &= 0xff;
262 	rp3 = par & 0xffff;
263 	rp3 ^= (rp3 >> 8);
264 	rp3 &= 0xff;
265 #else
266 	rp3 = (par >> 16);
267 	rp3 ^= (rp3 >> 8);
268 	rp3 &= 0xff;
269 	rp2 = par & 0xffff;
270 	rp2 ^= (rp2 >> 8);
271 	rp2 &= 0xff;
272 #endif
273 
274 	/* reduce par to 16 bits then calculate rp1 and rp0 */
275 	par ^= (par >> 16);
276 #ifdef __BIG_ENDIAN
277 	rp0 = (par >> 8) & 0xff;
278 	rp1 = (par & 0xff);
279 #else
280 	rp1 = (par >> 8) & 0xff;
281 	rp0 = (par & 0xff);
282 #endif
283 
284 	/* finally reduce par to 8 bits */
285 	par ^= (par >> 8);
286 	par &= 0xff;
287 
288 	/*
289 	 * and calculate rp5..rp15..rp17
290 	 * note that par = rp4 ^ rp5 and due to the commutative property
291 	 * of the ^ operator we can say:
292 	 * rp5 = (par ^ rp4);
293 	 * The & 0xff seems superfluous, but benchmarking learned that
294 	 * leaving it out gives slightly worse results. No idea why, probably
295 	 * it has to do with the way the pipeline in pentium is organized.
296 	 */
297 	rp5 = (par ^ rp4) & 0xff;
298 	rp7 = (par ^ rp6) & 0xff;
299 	rp9 = (par ^ rp8) & 0xff;
300 	rp11 = (par ^ rp10) & 0xff;
301 	rp13 = (par ^ rp12) & 0xff;
302 	rp15 = (par ^ rp14) & 0xff;
303 	if (eccsize_mult == 2)
304 		rp17 = (par ^ rp16) & 0xff;
305 
306 	/*
307 	 * Finally calculate the ECC bits.
308 	 * Again here it might seem that there are performance optimisations
309 	 * possible, but benchmarks showed that on the system this is developed
310 	 * the code below is the fastest
311 	 */
312 	if (sm_order) {
313 		code[0] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
314 			  (invparity[rp5] << 5) | (invparity[rp4] << 4) |
315 			  (invparity[rp3] << 3) | (invparity[rp2] << 2) |
316 			  (invparity[rp1] << 1) | (invparity[rp0]);
317 		code[1] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
318 			  (invparity[rp13] << 5) | (invparity[rp12] << 4) |
319 			  (invparity[rp11] << 3) | (invparity[rp10] << 2) |
320 			  (invparity[rp9] << 1) | (invparity[rp8]);
321 	} else {
322 		code[1] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
323 			  (invparity[rp5] << 5) | (invparity[rp4] << 4) |
324 			  (invparity[rp3] << 3) | (invparity[rp2] << 2) |
325 			  (invparity[rp1] << 1) | (invparity[rp0]);
326 		code[0] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
327 			  (invparity[rp13] << 5) | (invparity[rp12] << 4) |
328 			  (invparity[rp11] << 3) | (invparity[rp10] << 2) |
329 			  (invparity[rp9] << 1) | (invparity[rp8]);
330 	}
331 
332 	if (eccsize_mult == 1)
333 		code[2] =
334 		    (invparity[par & 0xf0] << 7) |
335 		    (invparity[par & 0x0f] << 6) |
336 		    (invparity[par & 0xcc] << 5) |
337 		    (invparity[par & 0x33] << 4) |
338 		    (invparity[par & 0xaa] << 3) |
339 		    (invparity[par & 0x55] << 2) |
340 		    3;
341 	else
342 		code[2] =
343 		    (invparity[par & 0xf0] << 7) |
344 		    (invparity[par & 0x0f] << 6) |
345 		    (invparity[par & 0xcc] << 5) |
346 		    (invparity[par & 0x33] << 4) |
347 		    (invparity[par & 0xaa] << 3) |
348 		    (invparity[par & 0x55] << 2) |
349 		    (invparity[rp17] << 1) |
350 		    (invparity[rp16] << 0);
351 
352 	return 0;
353 }
354 EXPORT_SYMBOL(ecc_sw_hamming_calculate);
355 
356 /**
357  * nand_ecc_sw_hamming_calculate - Calculate 3-byte ECC for 256/512-byte block
358  * @nand: NAND device
359  * @buf: Input buffer with raw data
360  * @code: Output buffer with ECC
361  */
362 int nand_ecc_sw_hamming_calculate(struct nand_device *nand,
363 				  const unsigned char *buf, unsigned char *code)
364 {
365 	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
366 	unsigned int step_size = nand->ecc.ctx.conf.step_size;
367 
368 	return ecc_sw_hamming_calculate(buf, step_size, code,
369 					engine_conf->sm_order);
370 }
371 EXPORT_SYMBOL(nand_ecc_sw_hamming_calculate);
372 
373 int ecc_sw_hamming_correct(unsigned char *buf, unsigned char *read_ecc,
374 			   unsigned char *calc_ecc, unsigned int step_size,
375 			   bool sm_order)
376 {
377 	const u32 eccsize_mult = step_size >> 8;
378 	unsigned char b0, b1, b2, bit_addr;
379 	unsigned int byte_addr;
380 
381 	/*
382 	 * b0 to b2 indicate which bit is faulty (if any)
383 	 * we might need the xor result  more than once,
384 	 * so keep them in a local var
385 	*/
386 	if (sm_order) {
387 		b0 = read_ecc[0] ^ calc_ecc[0];
388 		b1 = read_ecc[1] ^ calc_ecc[1];
389 	} else {
390 		b0 = read_ecc[1] ^ calc_ecc[1];
391 		b1 = read_ecc[0] ^ calc_ecc[0];
392 	}
393 
394 	b2 = read_ecc[2] ^ calc_ecc[2];
395 
396 	/* check if there are any bitfaults */
397 
398 	/* repeated if statements are slightly more efficient than switch ... */
399 	/* ordered in order of likelihood */
400 
401 	if ((b0 | b1 | b2) == 0)
402 		return 0;	/* no error */
403 
404 	if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
405 	    (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
406 	    ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
407 	     (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
408 	/* single bit error */
409 		/*
410 		 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
411 		 * byte, cp 5/3/1 indicate the faulty bit.
412 		 * A lookup table (called addressbits) is used to filter
413 		 * the bits from the byte they are in.
414 		 * A marginal optimisation is possible by having three
415 		 * different lookup tables.
416 		 * One as we have now (for b0), one for b2
417 		 * (that would avoid the >> 1), and one for b1 (with all values
418 		 * << 4). However it was felt that introducing two more tables
419 		 * hardly justify the gain.
420 		 *
421 		 * The b2 shift is there to get rid of the lowest two bits.
422 		 * We could also do addressbits[b2] >> 1 but for the
423 		 * performance it does not make any difference
424 		 */
425 		if (eccsize_mult == 1)
426 			byte_addr = (addressbits[b1] << 4) + addressbits[b0];
427 		else
428 			byte_addr = (addressbits[b2 & 0x3] << 8) +
429 				    (addressbits[b1] << 4) + addressbits[b0];
430 		bit_addr = addressbits[b2 >> 2];
431 		/* flip the bit */
432 		buf[byte_addr] ^= (1 << bit_addr);
433 		return 1;
434 
435 	}
436 	/* count nr of bits; use table lookup, faster than calculating it */
437 	if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
438 		return 1;	/* error in ECC data; no action needed */
439 
440 	pr_err("%s: uncorrectable ECC error\n", __func__);
441 	return -EBADMSG;
442 }
443 EXPORT_SYMBOL(ecc_sw_hamming_correct);
444 
445 /**
446  * nand_ecc_sw_hamming_correct - Detect and correct bit error(s)
447  * @nand: NAND device
448  * @buf: Raw data read from the chip
449  * @read_ecc: ECC bytes read from the chip
450  * @calc_ecc: ECC calculated from the raw data
451  *
452  * Detect and correct up to 1 bit error per 256/512-byte block.
453  */
454 int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf,
455 				unsigned char *read_ecc,
456 				unsigned char *calc_ecc)
457 {
458 	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
459 	unsigned int step_size = nand->ecc.ctx.conf.step_size;
460 
461 	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, step_size,
462 				      engine_conf->sm_order);
463 }
464 EXPORT_SYMBOL(nand_ecc_sw_hamming_correct);
465 
466 int nand_ecc_sw_hamming_init_ctx(struct nand_device *nand)
467 {
468 	struct nand_ecc_props *conf = &nand->ecc.ctx.conf;
469 	struct nand_ecc_sw_hamming_conf *engine_conf;
470 	struct mtd_info *mtd = nanddev_to_mtd(nand);
471 	int ret;
472 
473 	if (!mtd->ooblayout) {
474 		switch (mtd->oobsize) {
475 		case 8:
476 		case 16:
477 			mtd_set_ooblayout(mtd, nand_get_small_page_ooblayout());
478 			break;
479 		case 64:
480 		case 128:
481 			mtd_set_ooblayout(mtd,
482 					  nand_get_large_page_hamming_ooblayout());
483 			break;
484 		default:
485 			return -ENOTSUPP;
486 		}
487 	}
488 
489 	conf->engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
490 	conf->algo = NAND_ECC_ALGO_HAMMING;
491 	conf->step_size = nand->ecc.user_conf.step_size;
492 	conf->strength = 1;
493 
494 	/* Use the strongest configuration by default */
495 	if (conf->step_size != 256 && conf->step_size != 512)
496 		conf->step_size = 256;
497 
498 	engine_conf = kzalloc(sizeof(*engine_conf), GFP_KERNEL);
499 	if (!engine_conf)
500 		return -ENOMEM;
501 
502 	ret = nand_ecc_init_req_tweaking(&engine_conf->req_ctx, nand);
503 	if (ret)
504 		goto free_engine_conf;
505 
506 	engine_conf->code_size = 3;
507 	engine_conf->calc_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
508 	engine_conf->code_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
509 	if (!engine_conf->calc_buf || !engine_conf->code_buf) {
510 		ret = -ENOMEM;
511 		goto free_bufs;
512 	}
513 
514 	nand->ecc.ctx.priv = engine_conf;
515 	nand->ecc.ctx.nsteps = mtd->writesize / conf->step_size;
516 	nand->ecc.ctx.total = nand->ecc.ctx.nsteps * engine_conf->code_size;
517 
518 	return 0;
519 
520 free_bufs:
521 	nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
522 	kfree(engine_conf->calc_buf);
523 	kfree(engine_conf->code_buf);
524 free_engine_conf:
525 	kfree(engine_conf);
526 
527 	return ret;
528 }
529 EXPORT_SYMBOL(nand_ecc_sw_hamming_init_ctx);
530 
531 void nand_ecc_sw_hamming_cleanup_ctx(struct nand_device *nand)
532 {
533 	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
534 
535 	if (engine_conf) {
536 		nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
537 		kfree(engine_conf->calc_buf);
538 		kfree(engine_conf->code_buf);
539 		kfree(engine_conf);
540 	}
541 }
542 EXPORT_SYMBOL(nand_ecc_sw_hamming_cleanup_ctx);
543 
544 static int nand_ecc_sw_hamming_prepare_io_req(struct nand_device *nand,
545 					      struct nand_page_io_req *req)
546 {
547 	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
548 	struct mtd_info *mtd = nanddev_to_mtd(nand);
549 	int eccsize = nand->ecc.ctx.conf.step_size;
550 	int eccbytes = engine_conf->code_size;
551 	int eccsteps = nand->ecc.ctx.nsteps;
552 	int total = nand->ecc.ctx.total;
553 	u8 *ecccalc = engine_conf->calc_buf;
554 	const u8 *data;
555 	int i;
556 
557 	/* Nothing to do for a raw operation */
558 	if (req->mode == MTD_OPS_RAW)
559 		return 0;
560 
561 	/* This engine does not provide BBM/free OOB bytes protection */
562 	if (!req->datalen)
563 		return 0;
564 
565 	nand_ecc_tweak_req(&engine_conf->req_ctx, req);
566 
567 	/* No more preparation for page read */
568 	if (req->type == NAND_PAGE_READ)
569 		return 0;
570 
571 	/* Preparation for page write: derive the ECC bytes and place them */
572 	for (i = 0, data = req->databuf.out;
573 	     eccsteps;
574 	     eccsteps--, i += eccbytes, data += eccsize)
575 		nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
576 
577 	return mtd_ooblayout_set_eccbytes(mtd, ecccalc, (void *)req->oobbuf.out,
578 					  0, total);
579 }
580 
581 static int nand_ecc_sw_hamming_finish_io_req(struct nand_device *nand,
582 					     struct nand_page_io_req *req)
583 {
584 	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
585 	struct mtd_info *mtd = nanddev_to_mtd(nand);
586 	int eccsize = nand->ecc.ctx.conf.step_size;
587 	int total = nand->ecc.ctx.total;
588 	int eccbytes = engine_conf->code_size;
589 	int eccsteps = nand->ecc.ctx.nsteps;
590 	u8 *ecccalc = engine_conf->calc_buf;
591 	u8 *ecccode = engine_conf->code_buf;
592 	unsigned int max_bitflips = 0;
593 	u8 *data = req->databuf.in;
594 	int i, ret;
595 
596 	/* Nothing to do for a raw operation */
597 	if (req->mode == MTD_OPS_RAW)
598 		return 0;
599 
600 	/* This engine does not provide BBM/free OOB bytes protection */
601 	if (!req->datalen)
602 		return 0;
603 
604 	/* No more preparation for page write */
605 	if (req->type == NAND_PAGE_WRITE) {
606 		nand_ecc_restore_req(&engine_conf->req_ctx, req);
607 		return 0;
608 	}
609 
610 	/* Finish a page read: retrieve the (raw) ECC bytes*/
611 	ret = mtd_ooblayout_get_eccbytes(mtd, ecccode, req->oobbuf.in, 0,
612 					 total);
613 	if (ret)
614 		return ret;
615 
616 	/* Calculate the ECC bytes */
617 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, data += eccsize)
618 		nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
619 
620 	/* Finish a page read: compare and correct */
621 	for (eccsteps = nand->ecc.ctx.nsteps, i = 0, data = req->databuf.in;
622 	     eccsteps;
623 	     eccsteps--, i += eccbytes, data += eccsize) {
624 		int stat =  nand_ecc_sw_hamming_correct(nand, data,
625 							&ecccode[i],
626 							&ecccalc[i]);
627 		if (stat < 0) {
628 			mtd->ecc_stats.failed++;
629 		} else {
630 			mtd->ecc_stats.corrected += stat;
631 			max_bitflips = max_t(unsigned int, max_bitflips, stat);
632 		}
633 	}
634 
635 	nand_ecc_restore_req(&engine_conf->req_ctx, req);
636 
637 	return max_bitflips;
638 }
639 
640 static struct nand_ecc_engine_ops nand_ecc_sw_hamming_engine_ops = {
641 	.init_ctx = nand_ecc_sw_hamming_init_ctx,
642 	.cleanup_ctx = nand_ecc_sw_hamming_cleanup_ctx,
643 	.prepare_io_req = nand_ecc_sw_hamming_prepare_io_req,
644 	.finish_io_req = nand_ecc_sw_hamming_finish_io_req,
645 };
646 
647 static struct nand_ecc_engine nand_ecc_sw_hamming_engine = {
648 	.ops = &nand_ecc_sw_hamming_engine_ops,
649 };
650 
651 struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void)
652 {
653 	return &nand_ecc_sw_hamming_engine;
654 }
655 EXPORT_SYMBOL(nand_ecc_sw_hamming_get_engine);
656 
657 MODULE_LICENSE("GPL");
658 MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
659 MODULE_DESCRIPTION("NAND software Hamming ECC support");
660