xref: /openbmc/linux/drivers/mtd/tests/nandbiterrs.c (revision 745df179)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2012 NetCommWireless
4  * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au>
5  *
6  * Test for multi-bit error recovery on a NAND page This mostly tests the
7  * ECC controller / driver.
8  *
9  * There are two test modes:
10  *
11  *	0 - artificially inserting bit errors until the ECC fails
12  *	    This is the default method and fairly quick. It should
13  *	    be independent of the quality of the FLASH.
14  *
15  *	1 - re-writing the same pattern repeatedly until the ECC fails.
16  *	    This method relies on the physics of NAND FLASH to eventually
17  *	    generate '0' bits if '1' has been written sufficient times.
18  *	    Depending on the NAND, the first bit errors will appear after
19  *	    1000 or more writes and then will usually snowball, reaching the
20  *	    limits of the ECC quickly.
21  *
22  *	    The test stops after 10000 cycles, should your FLASH be
23  *	    exceptionally good and not generate bit errors before that. Try
24  *	    a different page in that case.
25  *
26  * Please note that neither of these tests will significantly 'use up' any
27  * FLASH endurance. Only a maximum of two erase operations will be performed.
28  */
29 
30 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
31 
32 #include <linux/init.h>
33 #include <linux/module.h>
34 #include <linux/moduleparam.h>
35 #include <linux/mtd/mtd.h>
36 #include <linux/err.h>
37 #include <linux/mtd/rawnand.h>
38 #include <linux/slab.h>
39 #include "mtd_test.h"
40 
41 static int dev;
42 module_param(dev, int, S_IRUGO);
43 MODULE_PARM_DESC(dev, "MTD device number to use");
44 
45 static unsigned page_offset;
46 module_param(page_offset, uint, S_IRUGO);
47 MODULE_PARM_DESC(page_offset, "Page number relative to dev start");
48 
49 static unsigned seed;
50 module_param(seed, uint, S_IRUGO);
51 MODULE_PARM_DESC(seed, "Random seed");
52 
53 static int mode;
54 module_param(mode, int, S_IRUGO);
55 MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test");
56 
57 static unsigned max_overwrite = 10000;
58 
59 static loff_t   offset;     /* Offset of the page we're using. */
60 static unsigned eraseblock; /* Eraseblock number for our page. */
61 
62 /* We assume that the ECC can correct up to a certain number
63  * of biterrors per subpage. */
64 static unsigned subsize;  /* Size of subpages */
65 static unsigned subcount; /* Number of subpages per page */
66 
67 static struct mtd_info *mtd;   /* MTD device */
68 
69 static uint8_t *wbuffer; /* One page write / compare buffer */
70 static uint8_t *rbuffer; /* One page read buffer */
71 
72 /* 'random' bytes from known offsets */
hash(unsigned offset)73 static uint8_t hash(unsigned offset)
74 {
75 	unsigned v = offset;
76 	unsigned char c;
77 	v ^= 0x7f7edfd3;
78 	v = v ^ (v >> 3);
79 	v = v ^ (v >> 5);
80 	v = v ^ (v >> 13);
81 	c = v & 0xFF;
82 	/* Reverse bits of result. */
83 	c = (c & 0x0F) << 4 | (c & 0xF0) >> 4;
84 	c = (c & 0x33) << 2 | (c & 0xCC) >> 2;
85 	c = (c & 0x55) << 1 | (c & 0xAA) >> 1;
86 	return c;
87 }
88 
89 /* Writes wbuffer to page */
write_page(int log)90 static int write_page(int log)
91 {
92 	if (log)
93 		pr_info("write_page\n");
94 
95 	return mtdtest_write(mtd, offset, mtd->writesize, wbuffer);
96 }
97 
98 /* Re-writes the data area while leaving the OOB alone. */
rewrite_page(int log)99 static int rewrite_page(int log)
100 {
101 	int err = 0;
102 	struct mtd_oob_ops ops = { };
103 
104 	if (log)
105 		pr_info("rewrite page\n");
106 
107 	ops.mode      = MTD_OPS_RAW; /* No ECC */
108 	ops.len       = mtd->writesize;
109 	ops.retlen    = 0;
110 	ops.ooblen    = 0;
111 	ops.oobretlen = 0;
112 	ops.ooboffs   = 0;
113 	ops.datbuf    = wbuffer;
114 	ops.oobbuf    = NULL;
115 
116 	err = mtd_write_oob(mtd, offset, &ops);
117 	if (err || ops.retlen != mtd->writesize) {
118 		pr_err("error: write_oob failed (%d)\n", err);
119 		if (!err)
120 			err = -EIO;
121 	}
122 
123 	return err;
124 }
125 
126 /* Reads page into rbuffer. Returns number of corrected bit errors (>=0)
127  * or error (<0) */
read_page(int log)128 static int read_page(int log)
129 {
130 	int err = 0;
131 	size_t read;
132 	struct mtd_ecc_stats oldstats;
133 
134 	if (log)
135 		pr_info("read_page\n");
136 
137 	/* Saving last mtd stats */
138 	memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats));
139 
140 	err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer);
141 	if (!err || err == -EUCLEAN)
142 		err = mtd->ecc_stats.corrected - oldstats.corrected;
143 
144 	if (err < 0 || read != mtd->writesize) {
145 		pr_err("error: read failed at %#llx\n", (long long)offset);
146 		if (err >= 0)
147 			err = -EIO;
148 	}
149 
150 	return err;
151 }
152 
153 /* Verifies rbuffer against random sequence */
verify_page(int log)154 static int verify_page(int log)
155 {
156 	unsigned i, errs = 0;
157 
158 	if (log)
159 		pr_info("verify_page\n");
160 
161 	for (i = 0; i < mtd->writesize; i++) {
162 		if (rbuffer[i] != hash(i+seed)) {
163 			pr_err("Error: page offset %u, expected %02x, got %02x\n",
164 				i, hash(i+seed), rbuffer[i]);
165 			errs++;
166 		}
167 	}
168 
169 	if (errs)
170 		return -EIO;
171 	else
172 		return 0;
173 }
174 
175 #define CBIT(v, n) ((v) & (1 << (n)))
176 #define BCLR(v, n) ((v) = (v) & ~(1 << (n)))
177 
178 /* Finds the first '1' bit in wbuffer starting at offset 'byte'
179  * and sets it to '0'. */
insert_biterror(unsigned byte)180 static int insert_biterror(unsigned byte)
181 {
182 	int bit;
183 
184 	while (byte < mtd->writesize) {
185 		for (bit = 7; bit >= 0; bit--) {
186 			if (CBIT(wbuffer[byte], bit)) {
187 				BCLR(wbuffer[byte], bit);
188 				pr_info("Inserted biterror @ %u/%u\n", byte, bit);
189 				return 0;
190 			}
191 		}
192 		byte++;
193 	}
194 	pr_err("biterror: Failed to find a '1' bit\n");
195 	return -EIO;
196 }
197 
198 /* Writes 'random' data to page and then introduces deliberate bit
199  * errors into the page, while verifying each step. */
incremental_errors_test(void)200 static int incremental_errors_test(void)
201 {
202 	int err = 0;
203 	unsigned i;
204 	unsigned errs_per_subpage = 0;
205 
206 	pr_info("incremental biterrors test\n");
207 
208 	for (i = 0; i < mtd->writesize; i++)
209 		wbuffer[i] = hash(i+seed);
210 
211 	err = write_page(1);
212 	if (err)
213 		goto exit;
214 
215 	while (1) {
216 
217 		err = rewrite_page(1);
218 		if (err)
219 			goto exit;
220 
221 		err = read_page(1);
222 		if (err > 0)
223 			pr_info("Read reported %d corrected bit errors\n", err);
224 		if (err < 0) {
225 			pr_err("After %d biterrors per subpage, read reported error %d\n",
226 				errs_per_subpage, err);
227 			err = 0;
228 			goto exit;
229 		}
230 
231 		err = verify_page(1);
232 		if (err) {
233 			pr_err("ECC failure, read data is incorrect despite read success\n");
234 			goto exit;
235 		}
236 
237 		pr_info("Successfully corrected %d bit errors per subpage\n",
238 			errs_per_subpage);
239 
240 		for (i = 0; i < subcount; i++) {
241 			err = insert_biterror(i * subsize);
242 			if (err < 0)
243 				goto exit;
244 		}
245 		errs_per_subpage++;
246 	}
247 
248 exit:
249 	return err;
250 }
251 
252 
253 /* Writes 'random' data to page and then re-writes that same data repeatedly.
254    This eventually develops bit errors (bits written as '1' will slowly become
255    '0'), which are corrected as far as the ECC is capable of. */
overwrite_test(void)256 static int overwrite_test(void)
257 {
258 	int err = 0;
259 	unsigned i;
260 	unsigned max_corrected = 0;
261 	unsigned opno = 0;
262 	/* We don't expect more than this many correctable bit errors per
263 	 * page. */
264 	#define MAXBITS 512
265 	static unsigned bitstats[MAXBITS]; /* bit error histogram. */
266 
267 	memset(bitstats, 0, sizeof(bitstats));
268 
269 	pr_info("overwrite biterrors test\n");
270 
271 	for (i = 0; i < mtd->writesize; i++)
272 		wbuffer[i] = hash(i+seed);
273 
274 	err = write_page(1);
275 	if (err)
276 		goto exit;
277 
278 	while (opno < max_overwrite) {
279 
280 		err = write_page(0);
281 		if (err)
282 			break;
283 
284 		err = read_page(0);
285 		if (err >= 0) {
286 			if (err >= MAXBITS) {
287 				pr_info("Implausible number of bit errors corrected\n");
288 				err = -EIO;
289 				break;
290 			}
291 			bitstats[err]++;
292 			if (err > max_corrected) {
293 				max_corrected = err;
294 				pr_info("Read reported %d corrected bit errors\n",
295 					err);
296 			}
297 		} else { /* err < 0 */
298 			pr_info("Read reported error %d\n", err);
299 			err = 0;
300 			break;
301 		}
302 
303 		err = verify_page(0);
304 		if (err) {
305 			bitstats[max_corrected] = opno;
306 			pr_info("ECC failure, read data is incorrect despite read success\n");
307 			break;
308 		}
309 
310 		err = mtdtest_relax();
311 		if (err)
312 			break;
313 
314 		opno++;
315 	}
316 
317 	/* At this point bitstats[0] contains the number of ops with no bit
318 	 * errors, bitstats[1] the number of ops with 1 bit error, etc. */
319 	pr_info("Bit error histogram (%d operations total):\n", opno);
320 	for (i = 0; i < max_corrected; i++)
321 		pr_info("Page reads with %3d corrected bit errors: %d\n",
322 			i, bitstats[i]);
323 
324 exit:
325 	return err;
326 }
327 
mtd_nandbiterrs_init(void)328 static int __init mtd_nandbiterrs_init(void)
329 {
330 	int err = 0;
331 
332 	printk("\n");
333 	printk(KERN_INFO "==================================================\n");
334 	pr_info("MTD device: %d\n", dev);
335 
336 	mtd = get_mtd_device(NULL, dev);
337 	if (IS_ERR(mtd)) {
338 		err = PTR_ERR(mtd);
339 		pr_err("error: cannot get MTD device\n");
340 		goto exit_mtddev;
341 	}
342 
343 	if (!mtd_type_is_nand(mtd)) {
344 		pr_info("this test requires NAND flash\n");
345 		err = -ENODEV;
346 		goto exit_nand;
347 	}
348 
349 	pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n",
350 		(unsigned long long)mtd->size, mtd->erasesize,
351 		mtd->writesize, mtd->oobsize);
352 
353 	subsize  = mtd->writesize >> mtd->subpage_sft;
354 	subcount = mtd->writesize / subsize;
355 
356 	pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize);
357 
358 	offset     = (loff_t)page_offset * mtd->writesize;
359 	eraseblock = mtd_div_by_eb(offset, mtd);
360 
361 	pr_info("Using page=%u, offset=%llu, eraseblock=%u\n",
362 		page_offset, offset, eraseblock);
363 
364 	wbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
365 	if (!wbuffer) {
366 		err = -ENOMEM;
367 		goto exit_wbuffer;
368 	}
369 
370 	rbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
371 	if (!rbuffer) {
372 		err = -ENOMEM;
373 		goto exit_rbuffer;
374 	}
375 
376 	err = mtdtest_erase_eraseblock(mtd, eraseblock);
377 	if (err)
378 		goto exit_error;
379 
380 	if (mode == 0)
381 		err = incremental_errors_test();
382 	else
383 		err = overwrite_test();
384 
385 	if (err)
386 		goto exit_error;
387 
388 	/* We leave the block un-erased in case of test failure. */
389 	err = mtdtest_erase_eraseblock(mtd, eraseblock);
390 	if (err)
391 		goto exit_error;
392 
393 	err = -EIO;
394 	pr_info("finished successfully.\n");
395 	printk(KERN_INFO "==================================================\n");
396 
397 exit_error:
398 	kfree(rbuffer);
399 exit_rbuffer:
400 	kfree(wbuffer);
401 exit_wbuffer:
402 	/* Nothing */
403 exit_nand:
404 	put_mtd_device(mtd);
405 exit_mtddev:
406 	return err;
407 }
408 
mtd_nandbiterrs_exit(void)409 static void __exit mtd_nandbiterrs_exit(void)
410 {
411 	return;
412 }
413 
414 module_init(mtd_nandbiterrs_init);
415 module_exit(mtd_nandbiterrs_exit);
416 
417 MODULE_DESCRIPTION("NAND bit error recovery test");
418 MODULE_AUTHOR("Iwo Mergler");
419 MODULE_LICENSE("GPL");
420