xref: /openbmc/linux/drivers/mtd/nand/ecc-sw-hamming.c (revision c64d01b3ceba873aa8e8605598cec4a6bc6d1601)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * This file contains an ECC algorithm that detects and corrects 1 bit
4  * errors in a 256 byte block of data.
5  *
6  * Copyright © 2008 Koninklijke Philips Electronics NV.
7  *                  Author: Frans Meulenbroeks
8  *
9  * Completely replaces the previous ECC implementation which was written by:
10  *   Steven J. Hill (sjhill@realitydiluted.com)
11  *   Thomas Gleixner (tglx@linutronix.de)
12  *
13  * Information on how this algorithm works and how it was developed
14  * can be found in Documentation/driver-api/mtd/nand_ecc.rst
15  */
16 
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/mtd/nand.h>
21 #include <linux/mtd/nand-ecc-sw-hamming.h>
22 #include <linux/slab.h>
23 #include <asm/byteorder.h>
24 
25 /*
26  * invparity is a 256 byte table that contains the odd parity
27  * for each byte. So if the number of bits in a byte is even,
28  * the array element is 1, and when the number of bits is odd
29  * the array eleemnt is 0.
30  */
31 static const char invparity[256] = {
32 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
33 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
34 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
35 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
36 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
37 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
38 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
39 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
40 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
41 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
42 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
43 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
44 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
45 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
46 	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
47 	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
48 };
49 
50 /*
51  * bitsperbyte contains the number of bits per byte
52  * this is only used for testing and repairing parity
53  * (a precalculated value slightly improves performance)
54  */
55 static const char bitsperbyte[256] = {
56 	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
57 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
58 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
59 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
60 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
61 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
62 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
63 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
64 	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
65 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
66 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
67 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
68 	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
69 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
70 	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
71 	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
72 };
73 
74 /*
75  * addressbits is a lookup table to filter out the bits from the xor-ed
76  * ECC data that identify the faulty location.
77  * this is only used for repairing parity
78  * see the comments in nand_ecc_sw_hamming_correct for more details
79  */
80 static const char addressbits[256] = {
81 	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
82 	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
83 	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
84 	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
85 	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
86 	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
87 	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
88 	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
89 	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
90 	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
91 	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
92 	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
93 	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
94 	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
95 	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
96 	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
97 	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
98 	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
99 	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
100 	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
101 	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
102 	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
103 	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
104 	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
105 	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
106 	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
107 	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
108 	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
109 	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
110 	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
111 	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
112 	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
113 };
114 
115 int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size,
116 			     unsigned char *code, bool sm_order)
117 {
118 	const u32 *bp = (uint32_t *)buf;
119 	const u32 eccsize_mult = (step_size == 256) ? 1 : 2;
120 	/* current value in buffer */
121 	u32 cur;
122 	/* rp0..rp17 are the various accumulated parities (per byte) */
123 	u32 rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7, rp8, rp9, rp10, rp11, rp12,
124 		rp13, rp14, rp15, rp16, rp17;
125 	/* Cumulative parity for all data */
126 	u32 par;
127 	/* Cumulative parity at the end of the loop (rp12, rp14, rp16) */
128 	u32 tmppar;
129 	int i;
130 
131 	par = 0;
132 	rp4 = 0;
133 	rp6 = 0;
134 	rp8 = 0;
135 	rp10 = 0;
136 	rp12 = 0;
137 	rp14 = 0;
138 	rp16 = 0;
139 	rp17 = 0;
140 
141 	/*
142 	 * The loop is unrolled a number of times;
143 	 * This avoids if statements to decide on which rp value to update
144 	 * Also we process the data by longwords.
145 	 * Note: passing unaligned data might give a performance penalty.
146 	 * It is assumed that the buffers are aligned.
147 	 * tmppar is the cumulative sum of this iteration.
148 	 * needed for calculating rp12, rp14, rp16 and par
149 	 * also used as a performance improvement for rp6, rp8 and rp10
150 	 */
151 	for (i = 0; i < eccsize_mult << 2; i++) {
152 		cur = *bp++;
153 		tmppar = cur;
154 		rp4 ^= cur;
155 		cur = *bp++;
156 		tmppar ^= cur;
157 		rp6 ^= tmppar;
158 		cur = *bp++;
159 		tmppar ^= cur;
160 		rp4 ^= cur;
161 		cur = *bp++;
162 		tmppar ^= cur;
163 		rp8 ^= tmppar;
164 
165 		cur = *bp++;
166 		tmppar ^= cur;
167 		rp4 ^= cur;
168 		rp6 ^= cur;
169 		cur = *bp++;
170 		tmppar ^= cur;
171 		rp6 ^= cur;
172 		cur = *bp++;
173 		tmppar ^= cur;
174 		rp4 ^= cur;
175 		cur = *bp++;
176 		tmppar ^= cur;
177 		rp10 ^= tmppar;
178 
179 		cur = *bp++;
180 		tmppar ^= cur;
181 		rp4 ^= cur;
182 		rp6 ^= cur;
183 		rp8 ^= cur;
184 		cur = *bp++;
185 		tmppar ^= cur;
186 		rp6 ^= cur;
187 		rp8 ^= cur;
188 		cur = *bp++;
189 		tmppar ^= cur;
190 		rp4 ^= cur;
191 		rp8 ^= cur;
192 		cur = *bp++;
193 		tmppar ^= cur;
194 		rp8 ^= cur;
195 
196 		cur = *bp++;
197 		tmppar ^= cur;
198 		rp4 ^= cur;
199 		rp6 ^= cur;
200 		cur = *bp++;
201 		tmppar ^= cur;
202 		rp6 ^= cur;
203 		cur = *bp++;
204 		tmppar ^= cur;
205 		rp4 ^= cur;
206 		cur = *bp++;
207 		tmppar ^= cur;
208 
209 		par ^= tmppar;
210 		if ((i & 0x1) == 0)
211 			rp12 ^= tmppar;
212 		if ((i & 0x2) == 0)
213 			rp14 ^= tmppar;
214 		if (eccsize_mult == 2 && (i & 0x4) == 0)
215 			rp16 ^= tmppar;
216 	}
217 
218 	/*
219 	 * handle the fact that we use longword operations
220 	 * we'll bring rp4..rp14..rp16 back to single byte entities by
221 	 * shifting and xoring first fold the upper and lower 16 bits,
222 	 * then the upper and lower 8 bits.
223 	 */
224 	rp4 ^= (rp4 >> 16);
225 	rp4 ^= (rp4 >> 8);
226 	rp4 &= 0xff;
227 	rp6 ^= (rp6 >> 16);
228 	rp6 ^= (rp6 >> 8);
229 	rp6 &= 0xff;
230 	rp8 ^= (rp8 >> 16);
231 	rp8 ^= (rp8 >> 8);
232 	rp8 &= 0xff;
233 	rp10 ^= (rp10 >> 16);
234 	rp10 ^= (rp10 >> 8);
235 	rp10 &= 0xff;
236 	rp12 ^= (rp12 >> 16);
237 	rp12 ^= (rp12 >> 8);
238 	rp12 &= 0xff;
239 	rp14 ^= (rp14 >> 16);
240 	rp14 ^= (rp14 >> 8);
241 	rp14 &= 0xff;
242 	if (eccsize_mult == 2) {
243 		rp16 ^= (rp16 >> 16);
244 		rp16 ^= (rp16 >> 8);
245 		rp16 &= 0xff;
246 	}
247 
248 	/*
249 	 * we also need to calculate the row parity for rp0..rp3
250 	 * This is present in par, because par is now
251 	 * rp3 rp3 rp2 rp2 in little endian and
252 	 * rp2 rp2 rp3 rp3 in big endian
253 	 * as well as
254 	 * rp1 rp0 rp1 rp0 in little endian and
255 	 * rp0 rp1 rp0 rp1 in big endian
256 	 * First calculate rp2 and rp3
257 	 */
258 #ifdef __BIG_ENDIAN
259 	rp2 = (par >> 16);
260 	rp2 ^= (rp2 >> 8);
261 	rp2 &= 0xff;
262 	rp3 = par & 0xffff;
263 	rp3 ^= (rp3 >> 8);
264 	rp3 &= 0xff;
265 #else
266 	rp3 = (par >> 16);
267 	rp3 ^= (rp3 >> 8);
268 	rp3 &= 0xff;
269 	rp2 = par & 0xffff;
270 	rp2 ^= (rp2 >> 8);
271 	rp2 &= 0xff;
272 #endif
273 
274 	/* reduce par to 16 bits then calculate rp1 and rp0 */
275 	par ^= (par >> 16);
276 #ifdef __BIG_ENDIAN
277 	rp0 = (par >> 8) & 0xff;
278 	rp1 = (par & 0xff);
279 #else
280 	rp1 = (par >> 8) & 0xff;
281 	rp0 = (par & 0xff);
282 #endif
283 
284 	/* finally reduce par to 8 bits */
285 	par ^= (par >> 8);
286 	par &= 0xff;
287 
288 	/*
289 	 * and calculate rp5..rp15..rp17
290 	 * note that par = rp4 ^ rp5 and due to the commutative property
291 	 * of the ^ operator we can say:
292 	 * rp5 = (par ^ rp4);
293 	 * The & 0xff seems superfluous, but benchmarking learned that
294 	 * leaving it out gives slightly worse results. No idea why, probably
295 	 * it has to do with the way the pipeline in pentium is organized.
296 	 */
297 	rp5 = (par ^ rp4) & 0xff;
298 	rp7 = (par ^ rp6) & 0xff;
299 	rp9 = (par ^ rp8) & 0xff;
300 	rp11 = (par ^ rp10) & 0xff;
301 	rp13 = (par ^ rp12) & 0xff;
302 	rp15 = (par ^ rp14) & 0xff;
303 	if (eccsize_mult == 2)
304 		rp17 = (par ^ rp16) & 0xff;
305 
306 	/*
307 	 * Finally calculate the ECC bits.
308 	 * Again here it might seem that there are performance optimisations
309 	 * possible, but benchmarks showed that on the system this is developed
310 	 * the code below is the fastest
311 	 */
312 	if (sm_order) {
313 		code[0] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
314 			  (invparity[rp5] << 5) | (invparity[rp4] << 4) |
315 			  (invparity[rp3] << 3) | (invparity[rp2] << 2) |
316 			  (invparity[rp1] << 1) | (invparity[rp0]);
317 		code[1] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
318 			  (invparity[rp13] << 5) | (invparity[rp12] << 4) |
319 			  (invparity[rp11] << 3) | (invparity[rp10] << 2) |
320 			  (invparity[rp9] << 1) | (invparity[rp8]);
321 	} else {
322 		code[1] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
323 			  (invparity[rp5] << 5) | (invparity[rp4] << 4) |
324 			  (invparity[rp3] << 3) | (invparity[rp2] << 2) |
325 			  (invparity[rp1] << 1) | (invparity[rp0]);
326 		code[0] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
327 			  (invparity[rp13] << 5) | (invparity[rp12] << 4) |
328 			  (invparity[rp11] << 3) | (invparity[rp10] << 2) |
329 			  (invparity[rp9] << 1) | (invparity[rp8]);
330 	}
331 
332 	if (eccsize_mult == 1)
333 		code[2] =
334 		    (invparity[par & 0xf0] << 7) |
335 		    (invparity[par & 0x0f] << 6) |
336 		    (invparity[par & 0xcc] << 5) |
337 		    (invparity[par & 0x33] << 4) |
338 		    (invparity[par & 0xaa] << 3) |
339 		    (invparity[par & 0x55] << 2) |
340 		    3;
341 	else
342 		code[2] =
343 		    (invparity[par & 0xf0] << 7) |
344 		    (invparity[par & 0x0f] << 6) |
345 		    (invparity[par & 0xcc] << 5) |
346 		    (invparity[par & 0x33] << 4) |
347 		    (invparity[par & 0xaa] << 3) |
348 		    (invparity[par & 0x55] << 2) |
349 		    (invparity[rp17] << 1) |
350 		    (invparity[rp16] << 0);
351 
352 	return 0;
353 }
354 EXPORT_SYMBOL(ecc_sw_hamming_calculate);
355 
356 /**
357  * nand_ecc_sw_hamming_calculate - Calculate 3-byte ECC for 256/512-byte block
358  * @nand: NAND device
359  * @buf: Input buffer with raw data
360  * @code: Output buffer with ECC
361  */
362 int nand_ecc_sw_hamming_calculate(struct nand_device *nand,
363 				  const unsigned char *buf, unsigned char *code)
364 {
365 	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
366 	unsigned int step_size = nand->ecc.ctx.conf.step_size;
367 	bool sm_order = engine_conf ? engine_conf->sm_order : false;
368 
369 	return ecc_sw_hamming_calculate(buf, step_size, code, sm_order);
370 }
371 EXPORT_SYMBOL(nand_ecc_sw_hamming_calculate);
372 
373 int ecc_sw_hamming_correct(unsigned char *buf, unsigned char *read_ecc,
374 			   unsigned char *calc_ecc, unsigned int step_size,
375 			   bool sm_order)
376 {
377 	const u32 eccsize_mult = step_size >> 8;
378 	unsigned char b0, b1, b2, bit_addr;
379 	unsigned int byte_addr;
380 
381 	/*
382 	 * b0 to b2 indicate which bit is faulty (if any)
383 	 * we might need the xor result  more than once,
384 	 * so keep them in a local var
385 	*/
386 	if (sm_order) {
387 		b0 = read_ecc[0] ^ calc_ecc[0];
388 		b1 = read_ecc[1] ^ calc_ecc[1];
389 	} else {
390 		b0 = read_ecc[1] ^ calc_ecc[1];
391 		b1 = read_ecc[0] ^ calc_ecc[0];
392 	}
393 
394 	b2 = read_ecc[2] ^ calc_ecc[2];
395 
396 	/* check if there are any bitfaults */
397 
398 	/* repeated if statements are slightly more efficient than switch ... */
399 	/* ordered in order of likelihood */
400 
401 	if ((b0 | b1 | b2) == 0)
402 		return 0;	/* no error */
403 
404 	if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
405 	    (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
406 	    ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
407 	     (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
408 	/* single bit error */
409 		/*
410 		 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
411 		 * byte, cp 5/3/1 indicate the faulty bit.
412 		 * A lookup table (called addressbits) is used to filter
413 		 * the bits from the byte they are in.
414 		 * A marginal optimisation is possible by having three
415 		 * different lookup tables.
416 		 * One as we have now (for b0), one for b2
417 		 * (that would avoid the >> 1), and one for b1 (with all values
418 		 * << 4). However it was felt that introducing two more tables
419 		 * hardly justify the gain.
420 		 *
421 		 * The b2 shift is there to get rid of the lowest two bits.
422 		 * We could also do addressbits[b2] >> 1 but for the
423 		 * performance it does not make any difference
424 		 */
425 		if (eccsize_mult == 1)
426 			byte_addr = (addressbits[b1] << 4) + addressbits[b0];
427 		else
428 			byte_addr = (addressbits[b2 & 0x3] << 8) +
429 				    (addressbits[b1] << 4) + addressbits[b0];
430 		bit_addr = addressbits[b2 >> 2];
431 		/* flip the bit */
432 		buf[byte_addr] ^= (1 << bit_addr);
433 		return 1;
434 
435 	}
436 	/* count nr of bits; use table lookup, faster than calculating it */
437 	if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
438 		return 1;	/* error in ECC data; no action needed */
439 
440 	pr_err("%s: uncorrectable ECC error\n", __func__);
441 	return -EBADMSG;
442 }
443 EXPORT_SYMBOL(ecc_sw_hamming_correct);
444 
445 /**
446  * nand_ecc_sw_hamming_correct - Detect and correct bit error(s)
447  * @nand: NAND device
448  * @buf: Raw data read from the chip
449  * @read_ecc: ECC bytes read from the chip
450  * @calc_ecc: ECC calculated from the raw data
451  *
452  * Detect and correct up to 1 bit error per 256/512-byte block.
453  */
454 int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf,
455 				unsigned char *read_ecc,
456 				unsigned char *calc_ecc)
457 {
458 	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
459 	unsigned int step_size = nand->ecc.ctx.conf.step_size;
460 	bool sm_order = engine_conf ? engine_conf->sm_order : false;
461 
462 	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, step_size,
463 				      sm_order);
464 }
465 EXPORT_SYMBOL(nand_ecc_sw_hamming_correct);
466 
467 int nand_ecc_sw_hamming_init_ctx(struct nand_device *nand)
468 {
469 	struct nand_ecc_props *conf = &nand->ecc.ctx.conf;
470 	struct nand_ecc_sw_hamming_conf *engine_conf;
471 	struct mtd_info *mtd = nanddev_to_mtd(nand);
472 	int ret;
473 
474 	if (!mtd->ooblayout) {
475 		switch (mtd->oobsize) {
476 		case 8:
477 		case 16:
478 			mtd_set_ooblayout(mtd, nand_get_small_page_ooblayout());
479 			break;
480 		case 64:
481 		case 128:
482 			mtd_set_ooblayout(mtd,
483 					  nand_get_large_page_hamming_ooblayout());
484 			break;
485 		default:
486 			return -ENOTSUPP;
487 		}
488 	}
489 
490 	conf->engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
491 	conf->algo = NAND_ECC_ALGO_HAMMING;
492 	conf->step_size = nand->ecc.user_conf.step_size;
493 	conf->strength = 1;
494 
495 	/* Use the strongest configuration by default */
496 	if (conf->step_size != 256 && conf->step_size != 512)
497 		conf->step_size = 256;
498 
499 	engine_conf = kzalloc(sizeof(*engine_conf), GFP_KERNEL);
500 	if (!engine_conf)
501 		return -ENOMEM;
502 
503 	ret = nand_ecc_init_req_tweaking(&engine_conf->req_ctx, nand);
504 	if (ret)
505 		goto free_engine_conf;
506 
507 	engine_conf->code_size = 3;
508 	engine_conf->calc_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
509 	engine_conf->code_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
510 	if (!engine_conf->calc_buf || !engine_conf->code_buf) {
511 		ret = -ENOMEM;
512 		goto free_bufs;
513 	}
514 
515 	nand->ecc.ctx.priv = engine_conf;
516 	nand->ecc.ctx.nsteps = mtd->writesize / conf->step_size;
517 	nand->ecc.ctx.total = nand->ecc.ctx.nsteps * engine_conf->code_size;
518 
519 	return 0;
520 
521 free_bufs:
522 	nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
523 	kfree(engine_conf->calc_buf);
524 	kfree(engine_conf->code_buf);
525 free_engine_conf:
526 	kfree(engine_conf);
527 
528 	return ret;
529 }
530 EXPORT_SYMBOL(nand_ecc_sw_hamming_init_ctx);
531 
532 void nand_ecc_sw_hamming_cleanup_ctx(struct nand_device *nand)
533 {
534 	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
535 
536 	if (engine_conf) {
537 		nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
538 		kfree(engine_conf->calc_buf);
539 		kfree(engine_conf->code_buf);
540 		kfree(engine_conf);
541 	}
542 }
543 EXPORT_SYMBOL(nand_ecc_sw_hamming_cleanup_ctx);
544 
545 static int nand_ecc_sw_hamming_prepare_io_req(struct nand_device *nand,
546 					      struct nand_page_io_req *req)
547 {
548 	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
549 	struct mtd_info *mtd = nanddev_to_mtd(nand);
550 	int eccsize = nand->ecc.ctx.conf.step_size;
551 	int eccbytes = engine_conf->code_size;
552 	int eccsteps = nand->ecc.ctx.nsteps;
553 	int total = nand->ecc.ctx.total;
554 	u8 *ecccalc = engine_conf->calc_buf;
555 	const u8 *data;
556 	int i;
557 
558 	/* Nothing to do for a raw operation */
559 	if (req->mode == MTD_OPS_RAW)
560 		return 0;
561 
562 	/* This engine does not provide BBM/free OOB bytes protection */
563 	if (!req->datalen)
564 		return 0;
565 
566 	nand_ecc_tweak_req(&engine_conf->req_ctx, req);
567 
568 	/* No more preparation for page read */
569 	if (req->type == NAND_PAGE_READ)
570 		return 0;
571 
572 	/* Preparation for page write: derive the ECC bytes and place them */
573 	for (i = 0, data = req->databuf.out;
574 	     eccsteps;
575 	     eccsteps--, i += eccbytes, data += eccsize)
576 		nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
577 
578 	return mtd_ooblayout_set_eccbytes(mtd, ecccalc, (void *)req->oobbuf.out,
579 					  0, total);
580 }
581 
582 static int nand_ecc_sw_hamming_finish_io_req(struct nand_device *nand,
583 					     struct nand_page_io_req *req)
584 {
585 	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
586 	struct mtd_info *mtd = nanddev_to_mtd(nand);
587 	int eccsize = nand->ecc.ctx.conf.step_size;
588 	int total = nand->ecc.ctx.total;
589 	int eccbytes = engine_conf->code_size;
590 	int eccsteps = nand->ecc.ctx.nsteps;
591 	u8 *ecccalc = engine_conf->calc_buf;
592 	u8 *ecccode = engine_conf->code_buf;
593 	unsigned int max_bitflips = 0;
594 	u8 *data = req->databuf.in;
595 	int i, ret;
596 
597 	/* Nothing to do for a raw operation */
598 	if (req->mode == MTD_OPS_RAW)
599 		return 0;
600 
601 	/* This engine does not provide BBM/free OOB bytes protection */
602 	if (!req->datalen)
603 		return 0;
604 
605 	/* No more preparation for page write */
606 	if (req->type == NAND_PAGE_WRITE) {
607 		nand_ecc_restore_req(&engine_conf->req_ctx, req);
608 		return 0;
609 	}
610 
611 	/* Finish a page read: retrieve the (raw) ECC bytes*/
612 	ret = mtd_ooblayout_get_eccbytes(mtd, ecccode, req->oobbuf.in, 0,
613 					 total);
614 	if (ret)
615 		return ret;
616 
617 	/* Calculate the ECC bytes */
618 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, data += eccsize)
619 		nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
620 
621 	/* Finish a page read: compare and correct */
622 	for (eccsteps = nand->ecc.ctx.nsteps, i = 0, data = req->databuf.in;
623 	     eccsteps;
624 	     eccsteps--, i += eccbytes, data += eccsize) {
625 		int stat =  nand_ecc_sw_hamming_correct(nand, data,
626 							&ecccode[i],
627 							&ecccalc[i]);
628 		if (stat < 0) {
629 			mtd->ecc_stats.failed++;
630 		} else {
631 			mtd->ecc_stats.corrected += stat;
632 			max_bitflips = max_t(unsigned int, max_bitflips, stat);
633 		}
634 	}
635 
636 	nand_ecc_restore_req(&engine_conf->req_ctx, req);
637 
638 	return max_bitflips;
639 }
640 
641 static struct nand_ecc_engine_ops nand_ecc_sw_hamming_engine_ops = {
642 	.init_ctx = nand_ecc_sw_hamming_init_ctx,
643 	.cleanup_ctx = nand_ecc_sw_hamming_cleanup_ctx,
644 	.prepare_io_req = nand_ecc_sw_hamming_prepare_io_req,
645 	.finish_io_req = nand_ecc_sw_hamming_finish_io_req,
646 };
647 
648 static struct nand_ecc_engine nand_ecc_sw_hamming_engine = {
649 	.ops = &nand_ecc_sw_hamming_engine_ops,
650 };
651 
652 struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void)
653 {
654 	return &nand_ecc_sw_hamming_engine;
655 }
656 EXPORT_SYMBOL(nand_ecc_sw_hamming_get_engine);
657 
658 MODULE_LICENSE("GPL");
659 MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
660 MODULE_DESCRIPTION("NAND software Hamming ECC support");
661