xref: /openbmc/linux/drivers/mtd/tests/nandbiterrs.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Copyright © 2012 NetCommWireless
4   * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au>
5   *
6   * Test for multi-bit error recovery on a NAND page This mostly tests the
7   * ECC controller / driver.
8   *
9   * There are two test modes:
10   *
11   *	0 - artificially inserting bit errors until the ECC fails
12   *	    This is the default method and fairly quick. It should
13   *	    be independent of the quality of the FLASH.
14   *
15   *	1 - re-writing the same pattern repeatedly until the ECC fails.
16   *	    This method relies on the physics of NAND FLASH to eventually
17   *	    generate '0' bits if '1' has been written sufficient times.
18   *	    Depending on the NAND, the first bit errors will appear after
19   *	    1000 or more writes and then will usually snowball, reaching the
20   *	    limits of the ECC quickly.
21   *
22   *	    The test stops after 10000 cycles, should your FLASH be
23   *	    exceptionally good and not generate bit errors before that. Try
24   *	    a different page in that case.
25   *
26   * Please note that neither of these tests will significantly 'use up' any
27   * FLASH endurance. Only a maximum of two erase operations will be performed.
28   */
29  
30  #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
31  
32  #include <linux/init.h>
33  #include <linux/module.h>
34  #include <linux/moduleparam.h>
35  #include <linux/mtd/mtd.h>
36  #include <linux/err.h>
37  #include <linux/mtd/rawnand.h>
38  #include <linux/slab.h>
39  #include "mtd_test.h"
40  
41  static int dev;
42  module_param(dev, int, S_IRUGO);
43  MODULE_PARM_DESC(dev, "MTD device number to use");
44  
45  static unsigned page_offset;
46  module_param(page_offset, uint, S_IRUGO);
47  MODULE_PARM_DESC(page_offset, "Page number relative to dev start");
48  
49  static unsigned seed;
50  module_param(seed, uint, S_IRUGO);
51  MODULE_PARM_DESC(seed, "Random seed");
52  
53  static int mode;
54  module_param(mode, int, S_IRUGO);
55  MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test");
56  
57  static unsigned max_overwrite = 10000;
58  
59  static loff_t   offset;     /* Offset of the page we're using. */
60  static unsigned eraseblock; /* Eraseblock number for our page. */
61  
62  /* We assume that the ECC can correct up to a certain number
63   * of biterrors per subpage. */
64  static unsigned subsize;  /* Size of subpages */
65  static unsigned subcount; /* Number of subpages per page */
66  
67  static struct mtd_info *mtd;   /* MTD device */
68  
69  static uint8_t *wbuffer; /* One page write / compare buffer */
70  static uint8_t *rbuffer; /* One page read buffer */
71  
72  /* 'random' bytes from known offsets */
hash(unsigned offset)73  static uint8_t hash(unsigned offset)
74  {
75  	unsigned v = offset;
76  	unsigned char c;
77  	v ^= 0x7f7edfd3;
78  	v = v ^ (v >> 3);
79  	v = v ^ (v >> 5);
80  	v = v ^ (v >> 13);
81  	c = v & 0xFF;
82  	/* Reverse bits of result. */
83  	c = (c & 0x0F) << 4 | (c & 0xF0) >> 4;
84  	c = (c & 0x33) << 2 | (c & 0xCC) >> 2;
85  	c = (c & 0x55) << 1 | (c & 0xAA) >> 1;
86  	return c;
87  }
88  
89  /* Writes wbuffer to page */
write_page(int log)90  static int write_page(int log)
91  {
92  	if (log)
93  		pr_info("write_page\n");
94  
95  	return mtdtest_write(mtd, offset, mtd->writesize, wbuffer);
96  }
97  
98  /* Re-writes the data area while leaving the OOB alone. */
rewrite_page(int log)99  static int rewrite_page(int log)
100  {
101  	int err = 0;
102  	struct mtd_oob_ops ops = { };
103  
104  	if (log)
105  		pr_info("rewrite page\n");
106  
107  	ops.mode      = MTD_OPS_RAW; /* No ECC */
108  	ops.len       = mtd->writesize;
109  	ops.retlen    = 0;
110  	ops.ooblen    = 0;
111  	ops.oobretlen = 0;
112  	ops.ooboffs   = 0;
113  	ops.datbuf    = wbuffer;
114  	ops.oobbuf    = NULL;
115  
116  	err = mtd_write_oob(mtd, offset, &ops);
117  	if (err || ops.retlen != mtd->writesize) {
118  		pr_err("error: write_oob failed (%d)\n", err);
119  		if (!err)
120  			err = -EIO;
121  	}
122  
123  	return err;
124  }
125  
126  /* Reads page into rbuffer. Returns number of corrected bit errors (>=0)
127   * or error (<0) */
read_page(int log)128  static int read_page(int log)
129  {
130  	int err = 0;
131  	size_t read;
132  	struct mtd_ecc_stats oldstats;
133  
134  	if (log)
135  		pr_info("read_page\n");
136  
137  	/* Saving last mtd stats */
138  	memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats));
139  
140  	err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer);
141  	if (!err || err == -EUCLEAN)
142  		err = mtd->ecc_stats.corrected - oldstats.corrected;
143  
144  	if (err < 0 || read != mtd->writesize) {
145  		pr_err("error: read failed at %#llx\n", (long long)offset);
146  		if (err >= 0)
147  			err = -EIO;
148  	}
149  
150  	return err;
151  }
152  
153  /* Verifies rbuffer against random sequence */
verify_page(int log)154  static int verify_page(int log)
155  {
156  	unsigned i, errs = 0;
157  
158  	if (log)
159  		pr_info("verify_page\n");
160  
161  	for (i = 0; i < mtd->writesize; i++) {
162  		if (rbuffer[i] != hash(i+seed)) {
163  			pr_err("Error: page offset %u, expected %02x, got %02x\n",
164  				i, hash(i+seed), rbuffer[i]);
165  			errs++;
166  		}
167  	}
168  
169  	if (errs)
170  		return -EIO;
171  	else
172  		return 0;
173  }
174  
175  #define CBIT(v, n) ((v) & (1 << (n)))
176  #define BCLR(v, n) ((v) = (v) & ~(1 << (n)))
177  
178  /* Finds the first '1' bit in wbuffer starting at offset 'byte'
179   * and sets it to '0'. */
insert_biterror(unsigned byte)180  static int insert_biterror(unsigned byte)
181  {
182  	int bit;
183  
184  	while (byte < mtd->writesize) {
185  		for (bit = 7; bit >= 0; bit--) {
186  			if (CBIT(wbuffer[byte], bit)) {
187  				BCLR(wbuffer[byte], bit);
188  				pr_info("Inserted biterror @ %u/%u\n", byte, bit);
189  				return 0;
190  			}
191  		}
192  		byte++;
193  	}
194  	pr_err("biterror: Failed to find a '1' bit\n");
195  	return -EIO;
196  }
197  
198  /* Writes 'random' data to page and then introduces deliberate bit
199   * errors into the page, while verifying each step. */
incremental_errors_test(void)200  static int incremental_errors_test(void)
201  {
202  	int err = 0;
203  	unsigned i;
204  	unsigned errs_per_subpage = 0;
205  
206  	pr_info("incremental biterrors test\n");
207  
208  	for (i = 0; i < mtd->writesize; i++)
209  		wbuffer[i] = hash(i+seed);
210  
211  	err = write_page(1);
212  	if (err)
213  		goto exit;
214  
215  	while (1) {
216  
217  		err = rewrite_page(1);
218  		if (err)
219  			goto exit;
220  
221  		err = read_page(1);
222  		if (err > 0)
223  			pr_info("Read reported %d corrected bit errors\n", err);
224  		if (err < 0) {
225  			pr_err("After %d biterrors per subpage, read reported error %d\n",
226  				errs_per_subpage, err);
227  			err = 0;
228  			goto exit;
229  		}
230  
231  		err = verify_page(1);
232  		if (err) {
233  			pr_err("ECC failure, read data is incorrect despite read success\n");
234  			goto exit;
235  		}
236  
237  		pr_info("Successfully corrected %d bit errors per subpage\n",
238  			errs_per_subpage);
239  
240  		for (i = 0; i < subcount; i++) {
241  			err = insert_biterror(i * subsize);
242  			if (err < 0)
243  				goto exit;
244  		}
245  		errs_per_subpage++;
246  	}
247  
248  exit:
249  	return err;
250  }
251  
252  
253  /* Writes 'random' data to page and then re-writes that same data repeatedly.
254     This eventually develops bit errors (bits written as '1' will slowly become
255     '0'), which are corrected as far as the ECC is capable of. */
overwrite_test(void)256  static int overwrite_test(void)
257  {
258  	int err = 0;
259  	unsigned i;
260  	unsigned max_corrected = 0;
261  	unsigned opno = 0;
262  	/* We don't expect more than this many correctable bit errors per
263  	 * page. */
264  	#define MAXBITS 512
265  	static unsigned bitstats[MAXBITS]; /* bit error histogram. */
266  
267  	memset(bitstats, 0, sizeof(bitstats));
268  
269  	pr_info("overwrite biterrors test\n");
270  
271  	for (i = 0; i < mtd->writesize; i++)
272  		wbuffer[i] = hash(i+seed);
273  
274  	err = write_page(1);
275  	if (err)
276  		goto exit;
277  
278  	while (opno < max_overwrite) {
279  
280  		err = write_page(0);
281  		if (err)
282  			break;
283  
284  		err = read_page(0);
285  		if (err >= 0) {
286  			if (err >= MAXBITS) {
287  				pr_info("Implausible number of bit errors corrected\n");
288  				err = -EIO;
289  				break;
290  			}
291  			bitstats[err]++;
292  			if (err > max_corrected) {
293  				max_corrected = err;
294  				pr_info("Read reported %d corrected bit errors\n",
295  					err);
296  			}
297  		} else { /* err < 0 */
298  			pr_info("Read reported error %d\n", err);
299  			err = 0;
300  			break;
301  		}
302  
303  		err = verify_page(0);
304  		if (err) {
305  			bitstats[max_corrected] = opno;
306  			pr_info("ECC failure, read data is incorrect despite read success\n");
307  			break;
308  		}
309  
310  		err = mtdtest_relax();
311  		if (err)
312  			break;
313  
314  		opno++;
315  	}
316  
317  	/* At this point bitstats[0] contains the number of ops with no bit
318  	 * errors, bitstats[1] the number of ops with 1 bit error, etc. */
319  	pr_info("Bit error histogram (%d operations total):\n", opno);
320  	for (i = 0; i < max_corrected; i++)
321  		pr_info("Page reads with %3d corrected bit errors: %d\n",
322  			i, bitstats[i]);
323  
324  exit:
325  	return err;
326  }
327  
mtd_nandbiterrs_init(void)328  static int __init mtd_nandbiterrs_init(void)
329  {
330  	int err = 0;
331  
332  	printk("\n");
333  	printk(KERN_INFO "==================================================\n");
334  	pr_info("MTD device: %d\n", dev);
335  
336  	mtd = get_mtd_device(NULL, dev);
337  	if (IS_ERR(mtd)) {
338  		err = PTR_ERR(mtd);
339  		pr_err("error: cannot get MTD device\n");
340  		goto exit_mtddev;
341  	}
342  
343  	if (!mtd_type_is_nand(mtd)) {
344  		pr_info("this test requires NAND flash\n");
345  		err = -ENODEV;
346  		goto exit_nand;
347  	}
348  
349  	pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n",
350  		(unsigned long long)mtd->size, mtd->erasesize,
351  		mtd->writesize, mtd->oobsize);
352  
353  	subsize  = mtd->writesize >> mtd->subpage_sft;
354  	subcount = mtd->writesize / subsize;
355  
356  	pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize);
357  
358  	offset     = (loff_t)page_offset * mtd->writesize;
359  	eraseblock = mtd_div_by_eb(offset, mtd);
360  
361  	pr_info("Using page=%u, offset=%llu, eraseblock=%u\n",
362  		page_offset, offset, eraseblock);
363  
364  	wbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
365  	if (!wbuffer) {
366  		err = -ENOMEM;
367  		goto exit_wbuffer;
368  	}
369  
370  	rbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
371  	if (!rbuffer) {
372  		err = -ENOMEM;
373  		goto exit_rbuffer;
374  	}
375  
376  	err = mtdtest_erase_eraseblock(mtd, eraseblock);
377  	if (err)
378  		goto exit_error;
379  
380  	if (mode == 0)
381  		err = incremental_errors_test();
382  	else
383  		err = overwrite_test();
384  
385  	if (err)
386  		goto exit_error;
387  
388  	/* We leave the block un-erased in case of test failure. */
389  	err = mtdtest_erase_eraseblock(mtd, eraseblock);
390  	if (err)
391  		goto exit_error;
392  
393  	err = -EIO;
394  	pr_info("finished successfully.\n");
395  	printk(KERN_INFO "==================================================\n");
396  
397  exit_error:
398  	kfree(rbuffer);
399  exit_rbuffer:
400  	kfree(wbuffer);
401  exit_wbuffer:
402  	/* Nothing */
403  exit_nand:
404  	put_mtd_device(mtd);
405  exit_mtddev:
406  	return err;
407  }
408  
mtd_nandbiterrs_exit(void)409  static void __exit mtd_nandbiterrs_exit(void)
410  {
411  	return;
412  }
413  
414  module_init(mtd_nandbiterrs_init);
415  module_exit(mtd_nandbiterrs_exit);
416  
417  MODULE_DESCRIPTION("NAND bit error recovery test");
418  MODULE_AUTHOR("Iwo Mergler");
419  MODULE_LICENSE("GPL");
420