1 /* 2 * Copyright © 2012 NetCommWireless 3 * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au> 4 * 5 * Test for multi-bit error recovery on a NAND page This mostly tests the 6 * ECC controller / driver. 7 * 8 * There are two test modes: 9 * 10 * 0 - artificially inserting bit errors until the ECC fails 11 * This is the default method and fairly quick. It should 12 * be independent of the quality of the FLASH. 13 * 14 * 1 - re-writing the same pattern repeatedly until the ECC fails. 15 * This method relies on the physics of NAND FLASH to eventually 16 * generate '0' bits if '1' has been written sufficient times. 17 * Depending on the NAND, the first bit errors will appear after 18 * 1000 or more writes and then will usually snowball, reaching the 19 * limits of the ECC quickly. 20 * 21 * The test stops after 10000 cycles, should your FLASH be 22 * exceptionally good and not generate bit errors before that. Try 23 * a different page in that case. 24 * 25 * Please note that neither of these tests will significantly 'use up' any 26 * FLASH endurance. Only a maximum of two erase operations will be performed. 27 * 28 * 29 * This program is free software; you can redistribute it and/or modify it 30 * under the terms of the GNU General Public License version 2 as published by 31 * the Free Software Foundation. 32 * 33 * This program is distributed in the hope that it will be useful, but WITHOUT 34 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 35 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 36 * more details. 37 * 38 * You should have received a copy of the GNU General Public License along with 39 * this program; see the file COPYING. If not, write to the Free Software 40 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 41 */ 42 43 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 44 45 #include <linux/init.h> 46 #include <linux/module.h> 47 #include <linux/moduleparam.h> 48 #include <linux/mtd/mtd.h> 49 #include <linux/err.h> 50 #include <linux/mtd/nand.h> 51 #include <linux/slab.h> 52 #include "mtd_test.h" 53 54 static int dev; 55 module_param(dev, int, S_IRUGO); 56 MODULE_PARM_DESC(dev, "MTD device number to use"); 57 58 static unsigned page_offset; 59 module_param(page_offset, uint, S_IRUGO); 60 MODULE_PARM_DESC(page_offset, "Page number relative to dev start"); 61 62 static unsigned seed; 63 module_param(seed, uint, S_IRUGO); 64 MODULE_PARM_DESC(seed, "Random seed"); 65 66 static int mode; 67 module_param(mode, int, S_IRUGO); 68 MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test"); 69 70 static unsigned max_overwrite = 10000; 71 72 static loff_t offset; /* Offset of the page we're using. */ 73 static unsigned eraseblock; /* Eraseblock number for our page. */ 74 75 /* We assume that the ECC can correct up to a certain number 76 * of biterrors per subpage. */ 77 static unsigned subsize; /* Size of subpages */ 78 static unsigned subcount; /* Number of subpages per page */ 79 80 static struct mtd_info *mtd; /* MTD device */ 81 82 static uint8_t *wbuffer; /* One page write / compare buffer */ 83 static uint8_t *rbuffer; /* One page read buffer */ 84 85 /* 'random' bytes from known offsets */ 86 static uint8_t hash(unsigned offset) 87 { 88 unsigned v = offset; 89 unsigned char c; 90 v ^= 0x7f7edfd3; 91 v = v ^ (v >> 3); 92 v = v ^ (v >> 5); 93 v = v ^ (v >> 13); 94 c = v & 0xFF; 95 /* Reverse bits of result. */ 96 c = (c & 0x0F) << 4 | (c & 0xF0) >> 4; 97 c = (c & 0x33) << 2 | (c & 0xCC) >> 2; 98 c = (c & 0x55) << 1 | (c & 0xAA) >> 1; 99 return c; 100 } 101 102 /* Writes wbuffer to page */ 103 static int write_page(int log) 104 { 105 if (log) 106 pr_info("write_page\n"); 107 108 return mtdtest_write(mtd, offset, mtd->writesize, wbuffer); 109 } 110 111 /* Re-writes the data area while leaving the OOB alone. */ 112 static int rewrite_page(int log) 113 { 114 int err = 0; 115 struct mtd_oob_ops ops; 116 117 if (log) 118 pr_info("rewrite page\n"); 119 120 ops.mode = MTD_OPS_RAW; /* No ECC */ 121 ops.len = mtd->writesize; 122 ops.retlen = 0; 123 ops.ooblen = 0; 124 ops.oobretlen = 0; 125 ops.ooboffs = 0; 126 ops.datbuf = wbuffer; 127 ops.oobbuf = NULL; 128 129 err = mtd_write_oob(mtd, offset, &ops); 130 if (err || ops.retlen != mtd->writesize) { 131 pr_err("error: write_oob failed (%d)\n", err); 132 if (!err) 133 err = -EIO; 134 } 135 136 return err; 137 } 138 139 /* Reads page into rbuffer. Returns number of corrected bit errors (>=0) 140 * or error (<0) */ 141 static int read_page(int log) 142 { 143 int err = 0; 144 size_t read; 145 struct mtd_ecc_stats oldstats; 146 147 if (log) 148 pr_info("read_page\n"); 149 150 /* Saving last mtd stats */ 151 memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats)); 152 153 err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer); 154 if (err == -EUCLEAN) 155 err = mtd->ecc_stats.corrected - oldstats.corrected; 156 157 if (err < 0 || read != mtd->writesize) { 158 pr_err("error: read failed at %#llx\n", (long long)offset); 159 if (err >= 0) 160 err = -EIO; 161 } 162 163 return err; 164 } 165 166 /* Verifies rbuffer against random sequence */ 167 static int verify_page(int log) 168 { 169 unsigned i, errs = 0; 170 171 if (log) 172 pr_info("verify_page\n"); 173 174 for (i = 0; i < mtd->writesize; i++) { 175 if (rbuffer[i] != hash(i+seed)) { 176 pr_err("Error: page offset %u, expected %02x, got %02x\n", 177 i, hash(i+seed), rbuffer[i]); 178 errs++; 179 } 180 } 181 182 if (errs) 183 return -EIO; 184 else 185 return 0; 186 } 187 188 #define CBIT(v, n) ((v) & (1 << (n))) 189 #define BCLR(v, n) ((v) = (v) & ~(1 << (n))) 190 191 /* Finds the first '1' bit in wbuffer starting at offset 'byte' 192 * and sets it to '0'. */ 193 static int insert_biterror(unsigned byte) 194 { 195 int bit; 196 197 while (byte < mtd->writesize) { 198 for (bit = 7; bit >= 0; bit--) { 199 if (CBIT(wbuffer[byte], bit)) { 200 BCLR(wbuffer[byte], bit); 201 pr_info("Inserted biterror @ %u/%u\n", byte, bit); 202 return 0; 203 } 204 } 205 byte++; 206 } 207 pr_err("biterror: Failed to find a '1' bit\n"); 208 return -EIO; 209 } 210 211 /* Writes 'random' data to page and then introduces deliberate bit 212 * errors into the page, while verifying each step. */ 213 static int incremental_errors_test(void) 214 { 215 int err = 0; 216 unsigned i; 217 unsigned errs_per_subpage = 0; 218 219 pr_info("incremental biterrors test\n"); 220 221 for (i = 0; i < mtd->writesize; i++) 222 wbuffer[i] = hash(i+seed); 223 224 err = write_page(1); 225 if (err) 226 goto exit; 227 228 while (1) { 229 230 err = rewrite_page(1); 231 if (err) 232 goto exit; 233 234 err = read_page(1); 235 if (err > 0) 236 pr_info("Read reported %d corrected bit errors\n", err); 237 if (err < 0) { 238 pr_err("After %d biterrors per subpage, read reported error %d\n", 239 errs_per_subpage, err); 240 err = 0; 241 goto exit; 242 } 243 244 err = verify_page(1); 245 if (err) { 246 pr_err("ECC failure, read data is incorrect despite read success\n"); 247 goto exit; 248 } 249 250 pr_info("Successfully corrected %d bit errors per subpage\n", 251 errs_per_subpage); 252 253 for (i = 0; i < subcount; i++) { 254 err = insert_biterror(i * subsize); 255 if (err < 0) 256 goto exit; 257 } 258 errs_per_subpage++; 259 } 260 261 exit: 262 return err; 263 } 264 265 266 /* Writes 'random' data to page and then re-writes that same data repeatedly. 267 This eventually develops bit errors (bits written as '1' will slowly become 268 '0'), which are corrected as far as the ECC is capable of. */ 269 static int overwrite_test(void) 270 { 271 int err = 0; 272 unsigned i; 273 unsigned max_corrected = 0; 274 unsigned opno = 0; 275 /* We don't expect more than this many correctable bit errors per 276 * page. */ 277 #define MAXBITS 512 278 static unsigned bitstats[MAXBITS]; /* bit error histogram. */ 279 280 memset(bitstats, 0, sizeof(bitstats)); 281 282 pr_info("overwrite biterrors test\n"); 283 284 for (i = 0; i < mtd->writesize; i++) 285 wbuffer[i] = hash(i+seed); 286 287 err = write_page(1); 288 if (err) 289 goto exit; 290 291 while (opno < max_overwrite) { 292 293 err = rewrite_page(0); 294 if (err) 295 break; 296 297 err = read_page(0); 298 if (err >= 0) { 299 if (err >= MAXBITS) { 300 pr_info("Implausible number of bit errors corrected\n"); 301 err = -EIO; 302 break; 303 } 304 bitstats[err]++; 305 if (err > max_corrected) { 306 max_corrected = err; 307 pr_info("Read reported %d corrected bit errors\n", 308 err); 309 } 310 } else { /* err < 0 */ 311 pr_info("Read reported error %d\n", err); 312 err = 0; 313 break; 314 } 315 316 err = verify_page(0); 317 if (err) { 318 bitstats[max_corrected] = opno; 319 pr_info("ECC failure, read data is incorrect despite read success\n"); 320 break; 321 } 322 323 opno++; 324 } 325 326 /* At this point bitstats[0] contains the number of ops with no bit 327 * errors, bitstats[1] the number of ops with 1 bit error, etc. */ 328 pr_info("Bit error histogram (%d operations total):\n", opno); 329 for (i = 0; i < max_corrected; i++) 330 pr_info("Page reads with %3d corrected bit errors: %d\n", 331 i, bitstats[i]); 332 333 exit: 334 return err; 335 } 336 337 static int __init mtd_nandbiterrs_init(void) 338 { 339 int err = 0; 340 341 printk("\n"); 342 printk(KERN_INFO "==================================================\n"); 343 pr_info("MTD device: %d\n", dev); 344 345 mtd = get_mtd_device(NULL, dev); 346 if (IS_ERR(mtd)) { 347 err = PTR_ERR(mtd); 348 pr_err("error: cannot get MTD device\n"); 349 goto exit_mtddev; 350 } 351 352 if (!mtd_type_is_nand(mtd)) { 353 pr_info("this test requires NAND flash\n"); 354 err = -ENODEV; 355 goto exit_nand; 356 } 357 358 pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n", 359 (unsigned long long)mtd->size, mtd->erasesize, 360 mtd->writesize, mtd->oobsize); 361 362 subsize = mtd->writesize >> mtd->subpage_sft; 363 subcount = mtd->writesize / subsize; 364 365 pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize); 366 367 offset = page_offset * mtd->writesize; 368 eraseblock = mtd_div_by_eb(offset, mtd); 369 370 pr_info("Using page=%u, offset=%llu, eraseblock=%u\n", 371 page_offset, offset, eraseblock); 372 373 wbuffer = kmalloc(mtd->writesize, GFP_KERNEL); 374 if (!wbuffer) { 375 err = -ENOMEM; 376 goto exit_wbuffer; 377 } 378 379 rbuffer = kmalloc(mtd->writesize, GFP_KERNEL); 380 if (!rbuffer) { 381 err = -ENOMEM; 382 goto exit_rbuffer; 383 } 384 385 err = mtdtest_erase_eraseblock(mtd, eraseblock); 386 if (err) 387 goto exit_error; 388 389 if (mode == 0) 390 err = incremental_errors_test(); 391 else 392 err = overwrite_test(); 393 394 if (err) 395 goto exit_error; 396 397 /* We leave the block un-erased in case of test failure. */ 398 err = mtdtest_erase_eraseblock(mtd, eraseblock); 399 if (err) 400 goto exit_error; 401 402 err = -EIO; 403 pr_info("finished successfully.\n"); 404 printk(KERN_INFO "==================================================\n"); 405 406 exit_error: 407 kfree(rbuffer); 408 exit_rbuffer: 409 kfree(wbuffer); 410 exit_wbuffer: 411 /* Nothing */ 412 exit_nand: 413 put_mtd_device(mtd); 414 exit_mtddev: 415 return err; 416 } 417 418 static void __exit mtd_nandbiterrs_exit(void) 419 { 420 return; 421 } 422 423 module_init(mtd_nandbiterrs_init); 424 module_exit(mtd_nandbiterrs_exit); 425 426 MODULE_DESCRIPTION("NAND bit error recovery test"); 427 MODULE_AUTHOR("Iwo Mergler"); 428 MODULE_LICENSE("GPL"); 429