xref: /openbmc/linux/drivers/mtd/tests/nandbiterrs.c (revision a995c792)
1 /*
2  * Copyright © 2012 NetCommWireless
3  * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au>
4  *
5  * Test for multi-bit error recovery on a NAND page This mostly tests the
6  * ECC controller / driver.
7  *
8  * There are two test modes:
9  *
10  *	0 - artificially inserting bit errors until the ECC fails
11  *	    This is the default method and fairly quick. It should
12  *	    be independent of the quality of the FLASH.
13  *
14  *	1 - re-writing the same pattern repeatedly until the ECC fails.
15  *	    This method relies on the physics of NAND FLASH to eventually
16  *	    generate '0' bits if '1' has been written sufficient times.
17  *	    Depending on the NAND, the first bit errors will appear after
18  *	    1000 or more writes and then will usually snowball, reaching the
19  *	    limits of the ECC quickly.
20  *
21  *	    The test stops after 10000 cycles, should your FLASH be
22  *	    exceptionally good and not generate bit errors before that. Try
23  *	    a different page in that case.
24  *
25  * Please note that neither of these tests will significantly 'use up' any
26  * FLASH endurance. Only a maximum of two erase operations will be performed.
27  *
28  *
29  * This program is free software; you can redistribute it and/or modify it
30  * under the terms of the GNU General Public License version 2 as published by
31  * the Free Software Foundation.
32  *
33  * This program is distributed in the hope that it will be useful, but WITHOUT
34  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
35  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
36  * more details.
37  *
38  * You should have received a copy of the GNU General Public License along with
39  * this program; see the file COPYING. If not, write to the Free Software
40  * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
41  */
42 
43 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
44 
45 #include <linux/init.h>
46 #include <linux/module.h>
47 #include <linux/moduleparam.h>
48 #include <linux/mtd/mtd.h>
49 #include <linux/err.h>
50 #include <linux/mtd/nand.h>
51 #include <linux/slab.h>
52 
53 static int dev;
54 module_param(dev, int, S_IRUGO);
55 MODULE_PARM_DESC(dev, "MTD device number to use");
56 
57 static unsigned page_offset;
58 module_param(page_offset, uint, S_IRUGO);
59 MODULE_PARM_DESC(page_offset, "Page number relative to dev start");
60 
61 static unsigned seed;
62 module_param(seed, uint, S_IRUGO);
63 MODULE_PARM_DESC(seed, "Random seed");
64 
65 static int mode;
66 module_param(mode, int, S_IRUGO);
67 MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test");
68 
69 static unsigned max_overwrite = 10000;
70 
71 static loff_t   offset;     /* Offset of the page we're using. */
72 static unsigned eraseblock; /* Eraseblock number for our page. */
73 
74 /* We assume that the ECC can correct up to a certain number
75  * of biterrors per subpage. */
76 static unsigned subsize;  /* Size of subpages */
77 static unsigned subcount; /* Number of subpages per page */
78 
79 static struct mtd_info *mtd;   /* MTD device */
80 
81 static uint8_t *wbuffer; /* One page write / compare buffer */
82 static uint8_t *rbuffer; /* One page read buffer */
83 
84 /* 'random' bytes from known offsets */
85 static uint8_t hash(unsigned offset)
86 {
87 	unsigned v = offset;
88 	unsigned char c;
89 	v ^= 0x7f7edfd3;
90 	v = v ^ (v >> 3);
91 	v = v ^ (v >> 5);
92 	v = v ^ (v >> 13);
93 	c = v & 0xFF;
94 	/* Reverse bits of result. */
95 	c = (c & 0x0F) << 4 | (c & 0xF0) >> 4;
96 	c = (c & 0x33) << 2 | (c & 0xCC) >> 2;
97 	c = (c & 0x55) << 1 | (c & 0xAA) >> 1;
98 	return c;
99 }
100 
101 static int erase_block(void)
102 {
103 	int err;
104 	struct erase_info ei;
105 	loff_t addr = eraseblock * mtd->erasesize;
106 
107 	pr_info("erase_block\n");
108 
109 	memset(&ei, 0, sizeof(struct erase_info));
110 	ei.mtd  = mtd;
111 	ei.addr = addr;
112 	ei.len  = mtd->erasesize;
113 
114 	err = mtd_erase(mtd, &ei);
115 	if (err || ei.state == MTD_ERASE_FAILED) {
116 		pr_err("error %d while erasing\n", err);
117 		if (!err)
118 			err = -EIO;
119 		return err;
120 	}
121 
122 	return 0;
123 }
124 
125 /* Writes wbuffer to page */
126 static int write_page(int log)
127 {
128 	int err = 0;
129 	size_t written;
130 
131 	if (log)
132 		pr_info("write_page\n");
133 
134 	err = mtd_write(mtd, offset, mtd->writesize, &written, wbuffer);
135 	if (err || written != mtd->writesize) {
136 		pr_err("error: write failed at %#llx\n", (long long)offset);
137 		if (!err)
138 			err = -EIO;
139 	}
140 
141 	return err;
142 }
143 
144 /* Re-writes the data area while leaving the OOB alone. */
145 static int rewrite_page(int log)
146 {
147 	int err = 0;
148 	struct mtd_oob_ops ops;
149 
150 	if (log)
151 		pr_info("rewrite page\n");
152 
153 	ops.mode      = MTD_OPS_RAW; /* No ECC */
154 	ops.len       = mtd->writesize;
155 	ops.retlen    = 0;
156 	ops.ooblen    = 0;
157 	ops.oobretlen = 0;
158 	ops.ooboffs   = 0;
159 	ops.datbuf    = wbuffer;
160 	ops.oobbuf    = NULL;
161 
162 	err = mtd_write_oob(mtd, offset, &ops);
163 	if (err || ops.retlen != mtd->writesize) {
164 		pr_err("error: write_oob failed (%d)\n", err);
165 		if (!err)
166 			err = -EIO;
167 	}
168 
169 	return err;
170 }
171 
172 /* Reads page into rbuffer. Returns number of corrected bit errors (>=0)
173  * or error (<0) */
174 static int read_page(int log)
175 {
176 	int err = 0;
177 	size_t read;
178 	struct mtd_ecc_stats oldstats;
179 
180 	if (log)
181 		pr_info("read_page\n");
182 
183 	/* Saving last mtd stats */
184 	memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats));
185 
186 	err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer);
187 	if (err == -EUCLEAN)
188 		err = mtd->ecc_stats.corrected - oldstats.corrected;
189 
190 	if (err < 0 || read != mtd->writesize) {
191 		pr_err("error: read failed at %#llx\n", (long long)offset);
192 		if (err >= 0)
193 			err = -EIO;
194 	}
195 
196 	return err;
197 }
198 
199 /* Verifies rbuffer against random sequence */
200 static int verify_page(int log)
201 {
202 	unsigned i, errs = 0;
203 
204 	if (log)
205 		pr_info("verify_page\n");
206 
207 	for (i = 0; i < mtd->writesize; i++) {
208 		if (rbuffer[i] != hash(i+seed)) {
209 			pr_err("Error: page offset %u, expected %02x, got %02x\n",
210 				i, hash(i+seed), rbuffer[i]);
211 			errs++;
212 		}
213 	}
214 
215 	if (errs)
216 		return -EIO;
217 	else
218 		return 0;
219 }
220 
221 #define CBIT(v, n) ((v) & (1 << (n)))
222 #define BCLR(v, n) ((v) = (v) & ~(1 << (n)))
223 
224 /* Finds the first '1' bit in wbuffer starting at offset 'byte'
225  * and sets it to '0'. */
226 static int insert_biterror(unsigned byte)
227 {
228 	int bit;
229 
230 	while (byte < mtd->writesize) {
231 		for (bit = 7; bit >= 0; bit--) {
232 			if (CBIT(wbuffer[byte], bit)) {
233 				BCLR(wbuffer[byte], bit);
234 				pr_info("Inserted biterror @ %u/%u\n", byte, bit);
235 				return 0;
236 			}
237 		}
238 		byte++;
239 	}
240 	pr_err("biterror: Failed to find a '1' bit\n");
241 	return -EIO;
242 }
243 
244 /* Writes 'random' data to page and then introduces deliberate bit
245  * errors into the page, while verifying each step. */
246 static int incremental_errors_test(void)
247 {
248 	int err = 0;
249 	unsigned i;
250 	unsigned errs_per_subpage = 0;
251 
252 	pr_info("incremental biterrors test\n");
253 
254 	for (i = 0; i < mtd->writesize; i++)
255 		wbuffer[i] = hash(i+seed);
256 
257 	err = write_page(1);
258 	if (err)
259 		goto exit;
260 
261 	while (1) {
262 
263 		err = rewrite_page(1);
264 		if (err)
265 			goto exit;
266 
267 		err = read_page(1);
268 		if (err > 0)
269 			pr_info("Read reported %d corrected bit errors\n", err);
270 		if (err < 0) {
271 			pr_err("After %d biterrors per subpage, read reported error %d\n",
272 				errs_per_subpage, err);
273 			err = 0;
274 			goto exit;
275 		}
276 
277 		err = verify_page(1);
278 		if (err) {
279 			pr_err("ECC failure, read data is incorrect despite read success\n");
280 			goto exit;
281 		}
282 
283 		pr_info("Successfully corrected %d bit errors per subpage\n",
284 			errs_per_subpage);
285 
286 		for (i = 0; i < subcount; i++) {
287 			err = insert_biterror(i * subsize);
288 			if (err < 0)
289 				goto exit;
290 		}
291 		errs_per_subpage++;
292 	}
293 
294 exit:
295 	return err;
296 }
297 
298 
299 /* Writes 'random' data to page and then re-writes that same data repeatedly.
300    This eventually develops bit errors (bits written as '1' will slowly become
301    '0'), which are corrected as far as the ECC is capable of. */
302 static int overwrite_test(void)
303 {
304 	int err = 0;
305 	unsigned i;
306 	unsigned max_corrected = 0;
307 	unsigned opno = 0;
308 	/* We don't expect more than this many correctable bit errors per
309 	 * page. */
310 	#define MAXBITS 512
311 	static unsigned bitstats[MAXBITS]; /* bit error histogram. */
312 
313 	memset(bitstats, 0, sizeof(bitstats));
314 
315 	pr_info("overwrite biterrors test\n");
316 
317 	for (i = 0; i < mtd->writesize; i++)
318 		wbuffer[i] = hash(i+seed);
319 
320 	err = write_page(1);
321 	if (err)
322 		goto exit;
323 
324 	while (opno < max_overwrite) {
325 
326 		err = rewrite_page(0);
327 		if (err)
328 			break;
329 
330 		err = read_page(0);
331 		if (err >= 0) {
332 			if (err >= MAXBITS) {
333 				pr_info("Implausible number of bit errors corrected\n");
334 				err = -EIO;
335 				break;
336 			}
337 			bitstats[err]++;
338 			if (err > max_corrected) {
339 				max_corrected = err;
340 				pr_info("Read reported %d corrected bit errors\n",
341 					err);
342 			}
343 		} else { /* err < 0 */
344 			pr_info("Read reported error %d\n", err);
345 			err = 0;
346 			break;
347 		}
348 
349 		err = verify_page(0);
350 		if (err) {
351 			bitstats[max_corrected] = opno;
352 			pr_info("ECC failure, read data is incorrect despite read success\n");
353 			break;
354 		}
355 
356 		opno++;
357 	}
358 
359 	/* At this point bitstats[0] contains the number of ops with no bit
360 	 * errors, bitstats[1] the number of ops with 1 bit error, etc. */
361 	pr_info("Bit error histogram (%d operations total):\n", opno);
362 	for (i = 0; i < max_corrected; i++)
363 		pr_info("Page reads with %3d corrected bit errors: %d\n",
364 			i, bitstats[i]);
365 
366 exit:
367 	return err;
368 }
369 
370 static int __init mtd_nandbiterrs_init(void)
371 {
372 	int err = 0;
373 
374 	printk("\n");
375 	printk(KERN_INFO "==================================================\n");
376 	pr_info("MTD device: %d\n", dev);
377 
378 	mtd = get_mtd_device(NULL, dev);
379 	if (IS_ERR(mtd)) {
380 		err = PTR_ERR(mtd);
381 		pr_err("error: cannot get MTD device\n");
382 		goto exit_mtddev;
383 	}
384 
385 	if (mtd->type != MTD_NANDFLASH) {
386 		pr_info("this test requires NAND flash\n");
387 		err = -ENODEV;
388 		goto exit_nand;
389 	}
390 
391 	pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n",
392 		(unsigned long long)mtd->size, mtd->erasesize,
393 		mtd->writesize, mtd->oobsize);
394 
395 	subsize  = mtd->writesize >> mtd->subpage_sft;
396 	subcount = mtd->writesize / subsize;
397 
398 	pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize);
399 
400 	offset     = page_offset * mtd->writesize;
401 	eraseblock = mtd_div_by_eb(offset, mtd);
402 
403 	pr_info("Using page=%u, offset=%llu, eraseblock=%u\n",
404 		page_offset, offset, eraseblock);
405 
406 	wbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
407 	if (!wbuffer) {
408 		err = -ENOMEM;
409 		goto exit_wbuffer;
410 	}
411 
412 	rbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
413 	if (!rbuffer) {
414 		err = -ENOMEM;
415 		goto exit_rbuffer;
416 	}
417 
418 	err = erase_block();
419 	if (err)
420 		goto exit_error;
421 
422 	if (mode == 0)
423 		err = incremental_errors_test();
424 	else
425 		err = overwrite_test();
426 
427 	if (err)
428 		goto exit_error;
429 
430 	/* We leave the block un-erased in case of test failure. */
431 	err = erase_block();
432 	if (err)
433 		goto exit_error;
434 
435 	err = -EIO;
436 	pr_info("finished successfully.\n");
437 	printk(KERN_INFO "==================================================\n");
438 
439 exit_error:
440 	kfree(rbuffer);
441 exit_rbuffer:
442 	kfree(wbuffer);
443 exit_wbuffer:
444 	/* Nothing */
445 exit_nand:
446 	put_mtd_device(mtd);
447 exit_mtddev:
448 	return err;
449 }
450 
451 static void __exit mtd_nandbiterrs_exit(void)
452 {
453 	return;
454 }
455 
456 module_init(mtd_nandbiterrs_init);
457 module_exit(mtd_nandbiterrs_exit);
458 
459 MODULE_DESCRIPTION("NAND bit error recovery test");
460 MODULE_AUTHOR("Iwo Mergler");
461 MODULE_LICENSE("GPL");
462