xref: /openbmc/linux/drivers/mtd/tests/nandbiterrs.c (revision 10c1d542c7e871865bca381842fd04a92d2b95ec)
1 /*
2  * Copyright © 2012 NetCommWireless
3  * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au>
4  *
5  * Test for multi-bit error recovery on a NAND page This mostly tests the
6  * ECC controller / driver.
7  *
8  * There are two test modes:
9  *
10  *	0 - artificially inserting bit errors until the ECC fails
11  *	    This is the default method and fairly quick. It should
12  *	    be independent of the quality of the FLASH.
13  *
14  *	1 - re-writing the same pattern repeatedly until the ECC fails.
15  *	    This method relies on the physics of NAND FLASH to eventually
16  *	    generate '0' bits if '1' has been written sufficient times.
17  *	    Depending on the NAND, the first bit errors will appear after
18  *	    1000 or more writes and then will usually snowball, reaching the
19  *	    limits of the ECC quickly.
20  *
21  *	    The test stops after 10000 cycles, should your FLASH be
22  *	    exceptionally good and not generate bit errors before that. Try
23  *	    a different page in that case.
24  *
25  * Please note that neither of these tests will significantly 'use up' any
26  * FLASH endurance. Only a maximum of two erase operations will be performed.
27  *
28  *
29  * This program is free software; you can redistribute it and/or modify it
30  * under the terms of the GNU General Public License version 2 as published by
31  * the Free Software Foundation.
32  *
33  * This program is distributed in the hope that it will be useful, but WITHOUT
34  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
35  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
36  * more details.
37  *
38  * You should have received a copy of the GNU General Public License along with
39  * this program; see the file COPYING. If not, write to the Free Software
40  * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
41  */
42 
43 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
44 
45 #include <linux/init.h>
46 #include <linux/module.h>
47 #include <linux/moduleparam.h>
48 #include <linux/mtd/mtd.h>
49 #include <linux/err.h>
50 #include <linux/mtd/rawnand.h>
51 #include <linux/slab.h>
52 #include "mtd_test.h"
53 
54 static int dev;
55 module_param(dev, int, S_IRUGO);
56 MODULE_PARM_DESC(dev, "MTD device number to use");
57 
58 static unsigned page_offset;
59 module_param(page_offset, uint, S_IRUGO);
60 MODULE_PARM_DESC(page_offset, "Page number relative to dev start");
61 
62 static unsigned seed;
63 module_param(seed, uint, S_IRUGO);
64 MODULE_PARM_DESC(seed, "Random seed");
65 
66 static int mode;
67 module_param(mode, int, S_IRUGO);
68 MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test");
69 
70 static unsigned max_overwrite = 10000;
71 
72 static loff_t   offset;     /* Offset of the page we're using. */
73 static unsigned eraseblock; /* Eraseblock number for our page. */
74 
75 /* We assume that the ECC can correct up to a certain number
76  * of biterrors per subpage. */
77 static unsigned subsize;  /* Size of subpages */
78 static unsigned subcount; /* Number of subpages per page */
79 
80 static struct mtd_info *mtd;   /* MTD device */
81 
82 static uint8_t *wbuffer; /* One page write / compare buffer */
83 static uint8_t *rbuffer; /* One page read buffer */
84 
85 /* 'random' bytes from known offsets */
86 static uint8_t hash(unsigned offset)
87 {
88 	unsigned v = offset;
89 	unsigned char c;
90 	v ^= 0x7f7edfd3;
91 	v = v ^ (v >> 3);
92 	v = v ^ (v >> 5);
93 	v = v ^ (v >> 13);
94 	c = v & 0xFF;
95 	/* Reverse bits of result. */
96 	c = (c & 0x0F) << 4 | (c & 0xF0) >> 4;
97 	c = (c & 0x33) << 2 | (c & 0xCC) >> 2;
98 	c = (c & 0x55) << 1 | (c & 0xAA) >> 1;
99 	return c;
100 }
101 
102 /* Writes wbuffer to page */
103 static int write_page(int log)
104 {
105 	if (log)
106 		pr_info("write_page\n");
107 
108 	return mtdtest_write(mtd, offset, mtd->writesize, wbuffer);
109 }
110 
111 /* Re-writes the data area while leaving the OOB alone. */
112 static int rewrite_page(int log)
113 {
114 	int err = 0;
115 	struct mtd_oob_ops ops;
116 
117 	if (log)
118 		pr_info("rewrite page\n");
119 
120 	ops.mode      = MTD_OPS_RAW; /* No ECC */
121 	ops.len       = mtd->writesize;
122 	ops.retlen    = 0;
123 	ops.ooblen    = 0;
124 	ops.oobretlen = 0;
125 	ops.ooboffs   = 0;
126 	ops.datbuf    = wbuffer;
127 	ops.oobbuf    = NULL;
128 
129 	err = mtd_write_oob(mtd, offset, &ops);
130 	if (err || ops.retlen != mtd->writesize) {
131 		pr_err("error: write_oob failed (%d)\n", err);
132 		if (!err)
133 			err = -EIO;
134 	}
135 
136 	return err;
137 }
138 
139 /* Reads page into rbuffer. Returns number of corrected bit errors (>=0)
140  * or error (<0) */
141 static int read_page(int log)
142 {
143 	int err = 0;
144 	size_t read;
145 	struct mtd_ecc_stats oldstats;
146 
147 	if (log)
148 		pr_info("read_page\n");
149 
150 	/* Saving last mtd stats */
151 	memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats));
152 
153 	err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer);
154 	if (!err || err == -EUCLEAN)
155 		err = mtd->ecc_stats.corrected - oldstats.corrected;
156 
157 	if (err < 0 || read != mtd->writesize) {
158 		pr_err("error: read failed at %#llx\n", (long long)offset);
159 		if (err >= 0)
160 			err = -EIO;
161 	}
162 
163 	return err;
164 }
165 
166 /* Verifies rbuffer against random sequence */
167 static int verify_page(int log)
168 {
169 	unsigned i, errs = 0;
170 
171 	if (log)
172 		pr_info("verify_page\n");
173 
174 	for (i = 0; i < mtd->writesize; i++) {
175 		if (rbuffer[i] != hash(i+seed)) {
176 			pr_err("Error: page offset %u, expected %02x, got %02x\n",
177 				i, hash(i+seed), rbuffer[i]);
178 			errs++;
179 		}
180 	}
181 
182 	if (errs)
183 		return -EIO;
184 	else
185 		return 0;
186 }
187 
188 #define CBIT(v, n) ((v) & (1 << (n)))
189 #define BCLR(v, n) ((v) = (v) & ~(1 << (n)))
190 
191 /* Finds the first '1' bit in wbuffer starting at offset 'byte'
192  * and sets it to '0'. */
193 static int insert_biterror(unsigned byte)
194 {
195 	int bit;
196 
197 	while (byte < mtd->writesize) {
198 		for (bit = 7; bit >= 0; bit--) {
199 			if (CBIT(wbuffer[byte], bit)) {
200 				BCLR(wbuffer[byte], bit);
201 				pr_info("Inserted biterror @ %u/%u\n", byte, bit);
202 				return 0;
203 			}
204 		}
205 		byte++;
206 	}
207 	pr_err("biterror: Failed to find a '1' bit\n");
208 	return -EIO;
209 }
210 
211 /* Writes 'random' data to page and then introduces deliberate bit
212  * errors into the page, while verifying each step. */
213 static int incremental_errors_test(void)
214 {
215 	int err = 0;
216 	unsigned i;
217 	unsigned errs_per_subpage = 0;
218 
219 	pr_info("incremental biterrors test\n");
220 
221 	for (i = 0; i < mtd->writesize; i++)
222 		wbuffer[i] = hash(i+seed);
223 
224 	err = write_page(1);
225 	if (err)
226 		goto exit;
227 
228 	while (1) {
229 
230 		err = rewrite_page(1);
231 		if (err)
232 			goto exit;
233 
234 		err = read_page(1);
235 		if (err > 0)
236 			pr_info("Read reported %d corrected bit errors\n", err);
237 		if (err < 0) {
238 			pr_err("After %d biterrors per subpage, read reported error %d\n",
239 				errs_per_subpage, err);
240 			err = 0;
241 			goto exit;
242 		}
243 
244 		err = verify_page(1);
245 		if (err) {
246 			pr_err("ECC failure, read data is incorrect despite read success\n");
247 			goto exit;
248 		}
249 
250 		pr_info("Successfully corrected %d bit errors per subpage\n",
251 			errs_per_subpage);
252 
253 		for (i = 0; i < subcount; i++) {
254 			err = insert_biterror(i * subsize);
255 			if (err < 0)
256 				goto exit;
257 		}
258 		errs_per_subpage++;
259 	}
260 
261 exit:
262 	return err;
263 }
264 
265 
266 /* Writes 'random' data to page and then re-writes that same data repeatedly.
267    This eventually develops bit errors (bits written as '1' will slowly become
268    '0'), which are corrected as far as the ECC is capable of. */
269 static int overwrite_test(void)
270 {
271 	int err = 0;
272 	unsigned i;
273 	unsigned max_corrected = 0;
274 	unsigned opno = 0;
275 	/* We don't expect more than this many correctable bit errors per
276 	 * page. */
277 	#define MAXBITS 512
278 	static unsigned bitstats[MAXBITS]; /* bit error histogram. */
279 
280 	memset(bitstats, 0, sizeof(bitstats));
281 
282 	pr_info("overwrite biterrors test\n");
283 
284 	for (i = 0; i < mtd->writesize; i++)
285 		wbuffer[i] = hash(i+seed);
286 
287 	err = write_page(1);
288 	if (err)
289 		goto exit;
290 
291 	while (opno < max_overwrite) {
292 
293 		err = write_page(0);
294 		if (err)
295 			break;
296 
297 		err = read_page(0);
298 		if (err >= 0) {
299 			if (err >= MAXBITS) {
300 				pr_info("Implausible number of bit errors corrected\n");
301 				err = -EIO;
302 				break;
303 			}
304 			bitstats[err]++;
305 			if (err > max_corrected) {
306 				max_corrected = err;
307 				pr_info("Read reported %d corrected bit errors\n",
308 					err);
309 			}
310 		} else { /* err < 0 */
311 			pr_info("Read reported error %d\n", err);
312 			err = 0;
313 			break;
314 		}
315 
316 		err = verify_page(0);
317 		if (err) {
318 			bitstats[max_corrected] = opno;
319 			pr_info("ECC failure, read data is incorrect despite read success\n");
320 			break;
321 		}
322 
323 		err = mtdtest_relax();
324 		if (err)
325 			break;
326 
327 		opno++;
328 	}
329 
330 	/* At this point bitstats[0] contains the number of ops with no bit
331 	 * errors, bitstats[1] the number of ops with 1 bit error, etc. */
332 	pr_info("Bit error histogram (%d operations total):\n", opno);
333 	for (i = 0; i < max_corrected; i++)
334 		pr_info("Page reads with %3d corrected bit errors: %d\n",
335 			i, bitstats[i]);
336 
337 exit:
338 	return err;
339 }
340 
341 static int __init mtd_nandbiterrs_init(void)
342 {
343 	int err = 0;
344 
345 	printk("\n");
346 	printk(KERN_INFO "==================================================\n");
347 	pr_info("MTD device: %d\n", dev);
348 
349 	mtd = get_mtd_device(NULL, dev);
350 	if (IS_ERR(mtd)) {
351 		err = PTR_ERR(mtd);
352 		pr_err("error: cannot get MTD device\n");
353 		goto exit_mtddev;
354 	}
355 
356 	if (!mtd_type_is_nand(mtd)) {
357 		pr_info("this test requires NAND flash\n");
358 		err = -ENODEV;
359 		goto exit_nand;
360 	}
361 
362 	pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n",
363 		(unsigned long long)mtd->size, mtd->erasesize,
364 		mtd->writesize, mtd->oobsize);
365 
366 	subsize  = mtd->writesize >> mtd->subpage_sft;
367 	subcount = mtd->writesize / subsize;
368 
369 	pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize);
370 
371 	offset     = (loff_t)page_offset * mtd->writesize;
372 	eraseblock = mtd_div_by_eb(offset, mtd);
373 
374 	pr_info("Using page=%u, offset=%llu, eraseblock=%u\n",
375 		page_offset, offset, eraseblock);
376 
377 	wbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
378 	if (!wbuffer) {
379 		err = -ENOMEM;
380 		goto exit_wbuffer;
381 	}
382 
383 	rbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
384 	if (!rbuffer) {
385 		err = -ENOMEM;
386 		goto exit_rbuffer;
387 	}
388 
389 	err = mtdtest_erase_eraseblock(mtd, eraseblock);
390 	if (err)
391 		goto exit_error;
392 
393 	if (mode == 0)
394 		err = incremental_errors_test();
395 	else
396 		err = overwrite_test();
397 
398 	if (err)
399 		goto exit_error;
400 
401 	/* We leave the block un-erased in case of test failure. */
402 	err = mtdtest_erase_eraseblock(mtd, eraseblock);
403 	if (err)
404 		goto exit_error;
405 
406 	err = -EIO;
407 	pr_info("finished successfully.\n");
408 	printk(KERN_INFO "==================================================\n");
409 
410 exit_error:
411 	kfree(rbuffer);
412 exit_rbuffer:
413 	kfree(wbuffer);
414 exit_wbuffer:
415 	/* Nothing */
416 exit_nand:
417 	put_mtd_device(mtd);
418 exit_mtddev:
419 	return err;
420 }
421 
422 static void __exit mtd_nandbiterrs_exit(void)
423 {
424 	return;
425 }
426 
427 module_init(mtd_nandbiterrs_init);
428 module_exit(mtd_nandbiterrs_exit);
429 
430 MODULE_DESCRIPTION("NAND bit error recovery test");
431 MODULE_AUTHOR("Iwo Mergler");
432 MODULE_LICENSE("GPL");
433