1 /* 2 * Copyright © 2012 NetCommWireless 3 * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au> 4 * 5 * Test for multi-bit error recovery on a NAND page This mostly tests the 6 * ECC controller / driver. 7 * 8 * There are two test modes: 9 * 10 * 0 - artificially inserting bit errors until the ECC fails 11 * This is the default method and fairly quick. It should 12 * be independent of the quality of the FLASH. 13 * 14 * 1 - re-writing the same pattern repeatedly until the ECC fails. 15 * This method relies on the physics of NAND FLASH to eventually 16 * generate '0' bits if '1' has been written sufficient times. 17 * Depending on the NAND, the first bit errors will appear after 18 * 1000 or more writes and then will usually snowball, reaching the 19 * limits of the ECC quickly. 20 * 21 * The test stops after 10000 cycles, should your FLASH be 22 * exceptionally good and not generate bit errors before that. Try 23 * a different page in that case. 24 * 25 * Please note that neither of these tests will significantly 'use up' any 26 * FLASH endurance. Only a maximum of two erase operations will be performed. 27 * 28 * 29 * This program is free software; you can redistribute it and/or modify it 30 * under the terms of the GNU General Public License version 2 as published by 31 * the Free Software Foundation. 32 * 33 * This program is distributed in the hope that it will be useful, but WITHOUT 34 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 35 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 36 * more details. 37 * 38 * You should have received a copy of the GNU General Public License along with 39 * this program; see the file COPYING. If not, write to the Free Software 40 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 41 */ 42 43 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 44 45 #include <linux/init.h> 46 #include <linux/module.h> 47 #include <linux/moduleparam.h> 48 #include <linux/mtd/mtd.h> 49 #include <linux/err.h> 50 #include <linux/mtd/nand.h> 51 #include <linux/slab.h> 52 53 static int dev; 54 module_param(dev, int, S_IRUGO); 55 MODULE_PARM_DESC(dev, "MTD device number to use"); 56 57 static unsigned page_offset; 58 module_param(page_offset, uint, S_IRUGO); 59 MODULE_PARM_DESC(page_offset, "Page number relative to dev start"); 60 61 static unsigned seed; 62 module_param(seed, uint, S_IRUGO); 63 MODULE_PARM_DESC(seed, "Random seed"); 64 65 static int mode; 66 module_param(mode, int, S_IRUGO); 67 MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test"); 68 69 static unsigned max_overwrite = 10000; 70 71 static loff_t offset; /* Offset of the page we're using. */ 72 static unsigned eraseblock; /* Eraseblock number for our page. */ 73 74 /* We assume that the ECC can correct up to a certain number 75 * of biterrors per subpage. */ 76 static unsigned subsize; /* Size of subpages */ 77 static unsigned subcount; /* Number of subpages per page */ 78 79 static struct mtd_info *mtd; /* MTD device */ 80 81 static uint8_t *wbuffer; /* One page write / compare buffer */ 82 static uint8_t *rbuffer; /* One page read buffer */ 83 84 /* 'random' bytes from known offsets */ 85 static uint8_t hash(unsigned offset) 86 { 87 unsigned v = offset; 88 unsigned char c; 89 v ^= 0x7f7edfd3; 90 v = v ^ (v >> 3); 91 v = v ^ (v >> 5); 92 v = v ^ (v >> 13); 93 c = v & 0xFF; 94 /* Reverse bits of result. */ 95 c = (c & 0x0F) << 4 | (c & 0xF0) >> 4; 96 c = (c & 0x33) << 2 | (c & 0xCC) >> 2; 97 c = (c & 0x55) << 1 | (c & 0xAA) >> 1; 98 return c; 99 } 100 101 static int erase_block(void) 102 { 103 int err; 104 struct erase_info ei; 105 loff_t addr = eraseblock * mtd->erasesize; 106 107 pr_info("erase_block\n"); 108 109 memset(&ei, 0, sizeof(struct erase_info)); 110 ei.mtd = mtd; 111 ei.addr = addr; 112 ei.len = mtd->erasesize; 113 114 err = mtd_erase(mtd, &ei); 115 if (err || ei.state == MTD_ERASE_FAILED) { 116 pr_err("error %d while erasing\n", err); 117 if (!err) 118 err = -EIO; 119 return err; 120 } 121 122 return 0; 123 } 124 125 /* Writes wbuffer to page */ 126 static int write_page(int log) 127 { 128 int err = 0; 129 size_t written; 130 131 if (log) 132 pr_info("write_page\n"); 133 134 err = mtd_write(mtd, offset, mtd->writesize, &written, wbuffer); 135 if (err || written != mtd->writesize) { 136 pr_err("error: write failed at %#llx\n", (long long)offset); 137 if (!err) 138 err = -EIO; 139 } 140 141 return err; 142 } 143 144 /* Re-writes the data area while leaving the OOB alone. */ 145 static int rewrite_page(int log) 146 { 147 int err = 0; 148 struct mtd_oob_ops ops; 149 150 if (log) 151 pr_info("rewrite page\n"); 152 153 ops.mode = MTD_OPS_RAW; /* No ECC */ 154 ops.len = mtd->writesize; 155 ops.retlen = 0; 156 ops.ooblen = 0; 157 ops.oobretlen = 0; 158 ops.ooboffs = 0; 159 ops.datbuf = wbuffer; 160 ops.oobbuf = NULL; 161 162 err = mtd_write_oob(mtd, offset, &ops); 163 if (err || ops.retlen != mtd->writesize) { 164 pr_err("error: write_oob failed (%d)\n", err); 165 if (!err) 166 err = -EIO; 167 } 168 169 return err; 170 } 171 172 /* Reads page into rbuffer. Returns number of corrected bit errors (>=0) 173 * or error (<0) */ 174 static int read_page(int log) 175 { 176 int err = 0; 177 size_t read; 178 struct mtd_ecc_stats oldstats; 179 180 if (log) 181 pr_info("read_page\n"); 182 183 /* Saving last mtd stats */ 184 memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats)); 185 186 err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer); 187 if (err == -EUCLEAN) 188 err = mtd->ecc_stats.corrected - oldstats.corrected; 189 190 if (err < 0 || read != mtd->writesize) { 191 pr_err("error: read failed at %#llx\n", (long long)offset); 192 if (err >= 0) 193 err = -EIO; 194 } 195 196 return err; 197 } 198 199 /* Verifies rbuffer against random sequence */ 200 static int verify_page(int log) 201 { 202 unsigned i, errs = 0; 203 204 if (log) 205 pr_info("verify_page\n"); 206 207 for (i = 0; i < mtd->writesize; i++) { 208 if (rbuffer[i] != hash(i+seed)) { 209 pr_err("Error: page offset %u, expected %02x, got %02x\n", 210 i, hash(i+seed), rbuffer[i]); 211 errs++; 212 } 213 } 214 215 if (errs) 216 return -EIO; 217 else 218 return 0; 219 } 220 221 #define CBIT(v, n) ((v) & (1 << (n))) 222 #define BCLR(v, n) ((v) = (v) & ~(1 << (n))) 223 224 /* Finds the first '1' bit in wbuffer starting at offset 'byte' 225 * and sets it to '0'. */ 226 static int insert_biterror(unsigned byte) 227 { 228 int bit; 229 230 while (byte < mtd->writesize) { 231 for (bit = 7; bit >= 0; bit--) { 232 if (CBIT(wbuffer[byte], bit)) { 233 BCLR(wbuffer[byte], bit); 234 pr_info("Inserted biterror @ %u/%u\n", byte, bit); 235 return 0; 236 } 237 } 238 byte++; 239 } 240 pr_err("biterror: Failed to find a '1' bit\n"); 241 return -EIO; 242 } 243 244 /* Writes 'random' data to page and then introduces deliberate bit 245 * errors into the page, while verifying each step. */ 246 static int incremental_errors_test(void) 247 { 248 int err = 0; 249 unsigned i; 250 unsigned errs_per_subpage = 0; 251 252 pr_info("incremental biterrors test\n"); 253 254 for (i = 0; i < mtd->writesize; i++) 255 wbuffer[i] = hash(i+seed); 256 257 err = write_page(1); 258 if (err) 259 goto exit; 260 261 while (1) { 262 263 err = rewrite_page(1); 264 if (err) 265 goto exit; 266 267 err = read_page(1); 268 if (err > 0) 269 pr_info("Read reported %d corrected bit errors\n", err); 270 if (err < 0) { 271 pr_err("After %d biterrors per subpage, read reported error %d\n", 272 errs_per_subpage, err); 273 err = 0; 274 goto exit; 275 } 276 277 err = verify_page(1); 278 if (err) { 279 pr_err("ECC failure, read data is incorrect despite read success\n"); 280 goto exit; 281 } 282 283 pr_info("Successfully corrected %d bit errors per subpage\n", 284 errs_per_subpage); 285 286 for (i = 0; i < subcount; i++) { 287 err = insert_biterror(i * subsize); 288 if (err < 0) 289 goto exit; 290 } 291 errs_per_subpage++; 292 } 293 294 exit: 295 return err; 296 } 297 298 299 /* Writes 'random' data to page and then re-writes that same data repeatedly. 300 This eventually develops bit errors (bits written as '1' will slowly become 301 '0'), which are corrected as far as the ECC is capable of. */ 302 static int overwrite_test(void) 303 { 304 int err = 0; 305 unsigned i; 306 unsigned max_corrected = 0; 307 unsigned opno = 0; 308 /* We don't expect more than this many correctable bit errors per 309 * page. */ 310 #define MAXBITS 512 311 static unsigned bitstats[MAXBITS]; /* bit error histogram. */ 312 313 memset(bitstats, 0, sizeof(bitstats)); 314 315 pr_info("overwrite biterrors test\n"); 316 317 for (i = 0; i < mtd->writesize; i++) 318 wbuffer[i] = hash(i+seed); 319 320 err = write_page(1); 321 if (err) 322 goto exit; 323 324 while (opno < max_overwrite) { 325 326 err = rewrite_page(0); 327 if (err) 328 break; 329 330 err = read_page(0); 331 if (err >= 0) { 332 if (err >= MAXBITS) { 333 pr_info("Implausible number of bit errors corrected\n"); 334 err = -EIO; 335 break; 336 } 337 bitstats[err]++; 338 if (err > max_corrected) { 339 max_corrected = err; 340 pr_info("Read reported %d corrected bit errors\n", 341 err); 342 } 343 } else { /* err < 0 */ 344 pr_info("Read reported error %d\n", err); 345 err = 0; 346 break; 347 } 348 349 err = verify_page(0); 350 if (err) { 351 bitstats[max_corrected] = opno; 352 pr_info("ECC failure, read data is incorrect despite read success\n"); 353 break; 354 } 355 356 opno++; 357 } 358 359 /* At this point bitstats[0] contains the number of ops with no bit 360 * errors, bitstats[1] the number of ops with 1 bit error, etc. */ 361 pr_info("Bit error histogram (%d operations total):\n", opno); 362 for (i = 0; i < max_corrected; i++) 363 pr_info("Page reads with %3d corrected bit errors: %d\n", 364 i, bitstats[i]); 365 366 exit: 367 return err; 368 } 369 370 static int __init mtd_nandbiterrs_init(void) 371 { 372 int err = 0; 373 374 printk("\n"); 375 printk(KERN_INFO "==================================================\n"); 376 pr_info("MTD device: %d\n", dev); 377 378 mtd = get_mtd_device(NULL, dev); 379 if (IS_ERR(mtd)) { 380 err = PTR_ERR(mtd); 381 pr_err("error: cannot get MTD device\n"); 382 goto exit_mtddev; 383 } 384 385 if (mtd->type != MTD_NANDFLASH) { 386 pr_info("this test requires NAND flash\n"); 387 err = -ENODEV; 388 goto exit_nand; 389 } 390 391 pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n", 392 (unsigned long long)mtd->size, mtd->erasesize, 393 mtd->writesize, mtd->oobsize); 394 395 subsize = mtd->writesize >> mtd->subpage_sft; 396 subcount = mtd->writesize / subsize; 397 398 pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize); 399 400 offset = page_offset * mtd->writesize; 401 eraseblock = mtd_div_by_eb(offset, mtd); 402 403 pr_info("Using page=%u, offset=%llu, eraseblock=%u\n", 404 page_offset, offset, eraseblock); 405 406 wbuffer = kmalloc(mtd->writesize, GFP_KERNEL); 407 if (!wbuffer) { 408 err = -ENOMEM; 409 goto exit_wbuffer; 410 } 411 412 rbuffer = kmalloc(mtd->writesize, GFP_KERNEL); 413 if (!rbuffer) { 414 err = -ENOMEM; 415 goto exit_rbuffer; 416 } 417 418 err = erase_block(); 419 if (err) 420 goto exit_error; 421 422 if (mode == 0) 423 err = incremental_errors_test(); 424 else 425 err = overwrite_test(); 426 427 if (err) 428 goto exit_error; 429 430 /* We leave the block un-erased in case of test failure. */ 431 err = erase_block(); 432 if (err) 433 goto exit_error; 434 435 err = -EIO; 436 pr_info("finished successfully.\n"); 437 printk(KERN_INFO "==================================================\n"); 438 439 exit_error: 440 kfree(rbuffer); 441 exit_rbuffer: 442 kfree(wbuffer); 443 exit_wbuffer: 444 /* Nothing */ 445 exit_nand: 446 put_mtd_device(mtd); 447 exit_mtddev: 448 return err; 449 } 450 451 static void __exit mtd_nandbiterrs_exit(void) 452 { 453 return; 454 } 455 456 module_init(mtd_nandbiterrs_init); 457 module_exit(mtd_nandbiterrs_exit); 458 459 MODULE_DESCRIPTION("NAND bit error recovery test"); 460 MODULE_AUTHOR("Iwo Mergler"); 461 MODULE_LICENSE("GPL"); 462