xref: /openbmc/linux/drivers/mtd/tests/nandbiterrs.c (revision b34e08d5)
1 /*
2  * Copyright © 2012 NetCommWireless
3  * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au>
4  *
5  * Test for multi-bit error recovery on a NAND page This mostly tests the
6  * ECC controller / driver.
7  *
8  * There are two test modes:
9  *
10  *	0 - artificially inserting bit errors until the ECC fails
11  *	    This is the default method and fairly quick. It should
12  *	    be independent of the quality of the FLASH.
13  *
14  *	1 - re-writing the same pattern repeatedly until the ECC fails.
15  *	    This method relies on the physics of NAND FLASH to eventually
16  *	    generate '0' bits if '1' has been written sufficient times.
17  *	    Depending on the NAND, the first bit errors will appear after
18  *	    1000 or more writes and then will usually snowball, reaching the
19  *	    limits of the ECC quickly.
20  *
21  *	    The test stops after 10000 cycles, should your FLASH be
22  *	    exceptionally good and not generate bit errors before that. Try
23  *	    a different page in that case.
24  *
25  * Please note that neither of these tests will significantly 'use up' any
26  * FLASH endurance. Only a maximum of two erase operations will be performed.
27  *
28  *
29  * This program is free software; you can redistribute it and/or modify it
30  * under the terms of the GNU General Public License version 2 as published by
31  * the Free Software Foundation.
32  *
33  * This program is distributed in the hope that it will be useful, but WITHOUT
34  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
35  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
36  * more details.
37  *
38  * You should have received a copy of the GNU General Public License along with
39  * this program; see the file COPYING. If not, write to the Free Software
40  * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
41  */
42 
43 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
44 
45 #include <linux/init.h>
46 #include <linux/module.h>
47 #include <linux/moduleparam.h>
48 #include <linux/mtd/mtd.h>
49 #include <linux/err.h>
50 #include <linux/mtd/nand.h>
51 #include <linux/slab.h>
52 #include "mtd_test.h"
53 
54 static int dev;
55 module_param(dev, int, S_IRUGO);
56 MODULE_PARM_DESC(dev, "MTD device number to use");
57 
58 static unsigned page_offset;
59 module_param(page_offset, uint, S_IRUGO);
60 MODULE_PARM_DESC(page_offset, "Page number relative to dev start");
61 
62 static unsigned seed;
63 module_param(seed, uint, S_IRUGO);
64 MODULE_PARM_DESC(seed, "Random seed");
65 
66 static int mode;
67 module_param(mode, int, S_IRUGO);
68 MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test");
69 
70 static unsigned max_overwrite = 10000;
71 
72 static loff_t   offset;     /* Offset of the page we're using. */
73 static unsigned eraseblock; /* Eraseblock number for our page. */
74 
75 /* We assume that the ECC can correct up to a certain number
76  * of biterrors per subpage. */
77 static unsigned subsize;  /* Size of subpages */
78 static unsigned subcount; /* Number of subpages per page */
79 
80 static struct mtd_info *mtd;   /* MTD device */
81 
82 static uint8_t *wbuffer; /* One page write / compare buffer */
83 static uint8_t *rbuffer; /* One page read buffer */
84 
85 /* 'random' bytes from known offsets */
86 static uint8_t hash(unsigned offset)
87 {
88 	unsigned v = offset;
89 	unsigned char c;
90 	v ^= 0x7f7edfd3;
91 	v = v ^ (v >> 3);
92 	v = v ^ (v >> 5);
93 	v = v ^ (v >> 13);
94 	c = v & 0xFF;
95 	/* Reverse bits of result. */
96 	c = (c & 0x0F) << 4 | (c & 0xF0) >> 4;
97 	c = (c & 0x33) << 2 | (c & 0xCC) >> 2;
98 	c = (c & 0x55) << 1 | (c & 0xAA) >> 1;
99 	return c;
100 }
101 
102 /* Writes wbuffer to page */
103 static int write_page(int log)
104 {
105 	if (log)
106 		pr_info("write_page\n");
107 
108 	return mtdtest_write(mtd, offset, mtd->writesize, wbuffer);
109 }
110 
111 /* Re-writes the data area while leaving the OOB alone. */
112 static int rewrite_page(int log)
113 {
114 	int err = 0;
115 	struct mtd_oob_ops ops;
116 
117 	if (log)
118 		pr_info("rewrite page\n");
119 
120 	ops.mode      = MTD_OPS_RAW; /* No ECC */
121 	ops.len       = mtd->writesize;
122 	ops.retlen    = 0;
123 	ops.ooblen    = 0;
124 	ops.oobretlen = 0;
125 	ops.ooboffs   = 0;
126 	ops.datbuf    = wbuffer;
127 	ops.oobbuf    = NULL;
128 
129 	err = mtd_write_oob(mtd, offset, &ops);
130 	if (err || ops.retlen != mtd->writesize) {
131 		pr_err("error: write_oob failed (%d)\n", err);
132 		if (!err)
133 			err = -EIO;
134 	}
135 
136 	return err;
137 }
138 
139 /* Reads page into rbuffer. Returns number of corrected bit errors (>=0)
140  * or error (<0) */
141 static int read_page(int log)
142 {
143 	int err = 0;
144 	size_t read;
145 	struct mtd_ecc_stats oldstats;
146 
147 	if (log)
148 		pr_info("read_page\n");
149 
150 	/* Saving last mtd stats */
151 	memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats));
152 
153 	err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer);
154 	if (err == -EUCLEAN)
155 		err = mtd->ecc_stats.corrected - oldstats.corrected;
156 
157 	if (err < 0 || read != mtd->writesize) {
158 		pr_err("error: read failed at %#llx\n", (long long)offset);
159 		if (err >= 0)
160 			err = -EIO;
161 	}
162 
163 	return err;
164 }
165 
166 /* Verifies rbuffer against random sequence */
167 static int verify_page(int log)
168 {
169 	unsigned i, errs = 0;
170 
171 	if (log)
172 		pr_info("verify_page\n");
173 
174 	for (i = 0; i < mtd->writesize; i++) {
175 		if (rbuffer[i] != hash(i+seed)) {
176 			pr_err("Error: page offset %u, expected %02x, got %02x\n",
177 				i, hash(i+seed), rbuffer[i]);
178 			errs++;
179 		}
180 	}
181 
182 	if (errs)
183 		return -EIO;
184 	else
185 		return 0;
186 }
187 
188 #define CBIT(v, n) ((v) & (1 << (n)))
189 #define BCLR(v, n) ((v) = (v) & ~(1 << (n)))
190 
191 /* Finds the first '1' bit in wbuffer starting at offset 'byte'
192  * and sets it to '0'. */
193 static int insert_biterror(unsigned byte)
194 {
195 	int bit;
196 
197 	while (byte < mtd->writesize) {
198 		for (bit = 7; bit >= 0; bit--) {
199 			if (CBIT(wbuffer[byte], bit)) {
200 				BCLR(wbuffer[byte], bit);
201 				pr_info("Inserted biterror @ %u/%u\n", byte, bit);
202 				return 0;
203 			}
204 		}
205 		byte++;
206 	}
207 	pr_err("biterror: Failed to find a '1' bit\n");
208 	return -EIO;
209 }
210 
211 /* Writes 'random' data to page and then introduces deliberate bit
212  * errors into the page, while verifying each step. */
213 static int incremental_errors_test(void)
214 {
215 	int err = 0;
216 	unsigned i;
217 	unsigned errs_per_subpage = 0;
218 
219 	pr_info("incremental biterrors test\n");
220 
221 	for (i = 0; i < mtd->writesize; i++)
222 		wbuffer[i] = hash(i+seed);
223 
224 	err = write_page(1);
225 	if (err)
226 		goto exit;
227 
228 	while (1) {
229 
230 		err = rewrite_page(1);
231 		if (err)
232 			goto exit;
233 
234 		err = read_page(1);
235 		if (err > 0)
236 			pr_info("Read reported %d corrected bit errors\n", err);
237 		if (err < 0) {
238 			pr_err("After %d biterrors per subpage, read reported error %d\n",
239 				errs_per_subpage, err);
240 			err = 0;
241 			goto exit;
242 		}
243 
244 		err = verify_page(1);
245 		if (err) {
246 			pr_err("ECC failure, read data is incorrect despite read success\n");
247 			goto exit;
248 		}
249 
250 		pr_info("Successfully corrected %d bit errors per subpage\n",
251 			errs_per_subpage);
252 
253 		for (i = 0; i < subcount; i++) {
254 			err = insert_biterror(i * subsize);
255 			if (err < 0)
256 				goto exit;
257 		}
258 		errs_per_subpage++;
259 	}
260 
261 exit:
262 	return err;
263 }
264 
265 
266 /* Writes 'random' data to page and then re-writes that same data repeatedly.
267    This eventually develops bit errors (bits written as '1' will slowly become
268    '0'), which are corrected as far as the ECC is capable of. */
269 static int overwrite_test(void)
270 {
271 	int err = 0;
272 	unsigned i;
273 	unsigned max_corrected = 0;
274 	unsigned opno = 0;
275 	/* We don't expect more than this many correctable bit errors per
276 	 * page. */
277 	#define MAXBITS 512
278 	static unsigned bitstats[MAXBITS]; /* bit error histogram. */
279 
280 	memset(bitstats, 0, sizeof(bitstats));
281 
282 	pr_info("overwrite biterrors test\n");
283 
284 	for (i = 0; i < mtd->writesize; i++)
285 		wbuffer[i] = hash(i+seed);
286 
287 	err = write_page(1);
288 	if (err)
289 		goto exit;
290 
291 	while (opno < max_overwrite) {
292 
293 		err = rewrite_page(0);
294 		if (err)
295 			break;
296 
297 		err = read_page(0);
298 		if (err >= 0) {
299 			if (err >= MAXBITS) {
300 				pr_info("Implausible number of bit errors corrected\n");
301 				err = -EIO;
302 				break;
303 			}
304 			bitstats[err]++;
305 			if (err > max_corrected) {
306 				max_corrected = err;
307 				pr_info("Read reported %d corrected bit errors\n",
308 					err);
309 			}
310 		} else { /* err < 0 */
311 			pr_info("Read reported error %d\n", err);
312 			err = 0;
313 			break;
314 		}
315 
316 		err = verify_page(0);
317 		if (err) {
318 			bitstats[max_corrected] = opno;
319 			pr_info("ECC failure, read data is incorrect despite read success\n");
320 			break;
321 		}
322 
323 		opno++;
324 	}
325 
326 	/* At this point bitstats[0] contains the number of ops with no bit
327 	 * errors, bitstats[1] the number of ops with 1 bit error, etc. */
328 	pr_info("Bit error histogram (%d operations total):\n", opno);
329 	for (i = 0; i < max_corrected; i++)
330 		pr_info("Page reads with %3d corrected bit errors: %d\n",
331 			i, bitstats[i]);
332 
333 exit:
334 	return err;
335 }
336 
337 static int __init mtd_nandbiterrs_init(void)
338 {
339 	int err = 0;
340 
341 	printk("\n");
342 	printk(KERN_INFO "==================================================\n");
343 	pr_info("MTD device: %d\n", dev);
344 
345 	mtd = get_mtd_device(NULL, dev);
346 	if (IS_ERR(mtd)) {
347 		err = PTR_ERR(mtd);
348 		pr_err("error: cannot get MTD device\n");
349 		goto exit_mtddev;
350 	}
351 
352 	if (!mtd_type_is_nand(mtd)) {
353 		pr_info("this test requires NAND flash\n");
354 		err = -ENODEV;
355 		goto exit_nand;
356 	}
357 
358 	pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n",
359 		(unsigned long long)mtd->size, mtd->erasesize,
360 		mtd->writesize, mtd->oobsize);
361 
362 	subsize  = mtd->writesize >> mtd->subpage_sft;
363 	subcount = mtd->writesize / subsize;
364 
365 	pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize);
366 
367 	offset     = page_offset * mtd->writesize;
368 	eraseblock = mtd_div_by_eb(offset, mtd);
369 
370 	pr_info("Using page=%u, offset=%llu, eraseblock=%u\n",
371 		page_offset, offset, eraseblock);
372 
373 	wbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
374 	if (!wbuffer) {
375 		err = -ENOMEM;
376 		goto exit_wbuffer;
377 	}
378 
379 	rbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
380 	if (!rbuffer) {
381 		err = -ENOMEM;
382 		goto exit_rbuffer;
383 	}
384 
385 	err = mtdtest_erase_eraseblock(mtd, eraseblock);
386 	if (err)
387 		goto exit_error;
388 
389 	if (mode == 0)
390 		err = incremental_errors_test();
391 	else
392 		err = overwrite_test();
393 
394 	if (err)
395 		goto exit_error;
396 
397 	/* We leave the block un-erased in case of test failure. */
398 	err = mtdtest_erase_eraseblock(mtd, eraseblock);
399 	if (err)
400 		goto exit_error;
401 
402 	err = -EIO;
403 	pr_info("finished successfully.\n");
404 	printk(KERN_INFO "==================================================\n");
405 
406 exit_error:
407 	kfree(rbuffer);
408 exit_rbuffer:
409 	kfree(wbuffer);
410 exit_wbuffer:
411 	/* Nothing */
412 exit_nand:
413 	put_mtd_device(mtd);
414 exit_mtddev:
415 	return err;
416 }
417 
418 static void __exit mtd_nandbiterrs_exit(void)
419 {
420 	return;
421 }
422 
423 module_init(mtd_nandbiterrs_init);
424 module_exit(mtd_nandbiterrs_exit);
425 
426 MODULE_DESCRIPTION("NAND bit error recovery test");
427 MODULE_AUTHOR("Iwo Mergler");
428 MODULE_LICENSE("GPL");
429