1 /* 2 * Swap block device support for MTDs 3 * Turns an MTD device into a swap device with block wear leveling 4 * 5 * Copyright © 2007,2011 Nokia Corporation. All rights reserved. 6 * 7 * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com> 8 * 9 * Based on Richard Purdie's earlier implementation in 2007. Background 10 * support and lock-less operation written by Adrian Hunter. 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * version 2 as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, but 17 * WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 24 * 02110-1301 USA 25 */ 26 27 #include <linux/kernel.h> 28 #include <linux/module.h> 29 #include <linux/mtd/mtd.h> 30 #include <linux/mtd/blktrans.h> 31 #include <linux/rbtree.h> 32 #include <linux/sched.h> 33 #include <linux/slab.h> 34 #include <linux/vmalloc.h> 35 #include <linux/genhd.h> 36 #include <linux/swap.h> 37 #include <linux/debugfs.h> 38 #include <linux/seq_file.h> 39 #include <linux/device.h> 40 #include <linux/math64.h> 41 42 #define MTDSWAP_PREFIX "mtdswap" 43 44 /* 45 * The number of free eraseblocks when GC should stop 46 */ 47 #define CLEAN_BLOCK_THRESHOLD 20 48 49 /* 50 * Number of free eraseblocks below which GC can also collect low frag 51 * blocks. 52 */ 53 #define LOW_FRAG_GC_TRESHOLD 5 54 55 /* 56 * Wear level cost amortization. We want to do wear leveling on the background 57 * without disturbing gc too much. This is made by defining max GC frequency. 58 * Frequency value 6 means 1/6 of the GC passes will pick an erase block based 59 * on the biggest wear difference rather than the biggest dirtiness. 60 * 61 * The lower freq2 should be chosen so that it makes sure the maximum erase 62 * difference will decrease even if a malicious application is deliberately 63 * trying to make erase differences large. 64 */ 65 #define MAX_ERASE_DIFF 4000 66 #define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF 67 #define COLLECT_NONDIRTY_FREQ1 6 68 #define COLLECT_NONDIRTY_FREQ2 4 69 70 #define PAGE_UNDEF UINT_MAX 71 #define BLOCK_UNDEF UINT_MAX 72 #define BLOCK_ERROR (UINT_MAX - 1) 73 #define BLOCK_MAX (UINT_MAX - 2) 74 75 #define EBLOCK_BAD (1 << 0) 76 #define EBLOCK_NOMAGIC (1 << 1) 77 #define EBLOCK_BITFLIP (1 << 2) 78 #define EBLOCK_FAILED (1 << 3) 79 #define EBLOCK_READERR (1 << 4) 80 #define EBLOCK_IDX_SHIFT 5 81 82 struct swap_eb { 83 struct rb_node rb; 84 struct rb_root *root; 85 86 unsigned int flags; 87 unsigned int active_count; 88 unsigned int erase_count; 89 unsigned int pad; /* speeds up pointer decrement */ 90 }; 91 92 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \ 93 rb)->erase_count) 94 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \ 95 rb)->erase_count) 96 97 struct mtdswap_tree { 98 struct rb_root root; 99 unsigned int count; 100 }; 101 102 enum { 103 MTDSWAP_CLEAN, 104 MTDSWAP_USED, 105 MTDSWAP_LOWFRAG, 106 MTDSWAP_HIFRAG, 107 MTDSWAP_DIRTY, 108 MTDSWAP_BITFLIP, 109 MTDSWAP_FAILING, 110 MTDSWAP_TREE_CNT, 111 }; 112 113 struct mtdswap_dev { 114 struct mtd_blktrans_dev *mbd_dev; 115 struct mtd_info *mtd; 116 struct device *dev; 117 118 unsigned int *page_data; 119 unsigned int *revmap; 120 121 unsigned int eblks; 122 unsigned int spare_eblks; 123 unsigned int pages_per_eblk; 124 unsigned int max_erase_count; 125 struct swap_eb *eb_data; 126 127 struct mtdswap_tree trees[MTDSWAP_TREE_CNT]; 128 129 unsigned long long sect_read_count; 130 unsigned long long sect_write_count; 131 unsigned long long mtd_write_count; 132 unsigned long long mtd_read_count; 133 unsigned long long discard_count; 134 unsigned long long discard_page_count; 135 136 unsigned int curr_write_pos; 137 struct swap_eb *curr_write; 138 139 char *page_buf; 140 char *oob_buf; 141 142 struct dentry *debugfs_root; 143 }; 144 145 struct mtdswap_oobdata { 146 __le16 magic; 147 __le32 count; 148 } __packed; 149 150 #define MTDSWAP_MAGIC_CLEAN 0x2095 151 #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) 152 #define MTDSWAP_TYPE_CLEAN 0 153 #define MTDSWAP_TYPE_DIRTY 1 154 #define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata) 155 156 #define MTDSWAP_ERASE_RETRIES 3 /* Before marking erase block bad */ 157 #define MTDSWAP_IO_RETRIES 3 158 159 enum { 160 MTDSWAP_SCANNED_CLEAN, 161 MTDSWAP_SCANNED_DIRTY, 162 MTDSWAP_SCANNED_BITFLIP, 163 MTDSWAP_SCANNED_BAD, 164 }; 165 166 /* 167 * In the worst case mtdswap_writesect() has allocated the last clean 168 * page from the current block and is then pre-empted by the GC 169 * thread. The thread can consume a full erase block when moving a 170 * block. 171 */ 172 #define MIN_SPARE_EBLOCKS 2 173 #define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1) 174 175 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root) 176 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL) 177 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name)) 178 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count) 179 180 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv) 181 182 static char partitions[128] = ""; 183 module_param_string(partitions, partitions, sizeof(partitions), 0444); 184 MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap " 185 "partitions=\"1,3,5\""); 186 187 static unsigned int spare_eblocks = 10; 188 module_param(spare_eblocks, uint, 0444); 189 MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for " 190 "garbage collection (default 10%)"); 191 192 static bool header; /* false */ 193 module_param(header, bool, 0444); 194 MODULE_PARM_DESC(header, 195 "Include builtin swap header (default 0, without header)"); 196 197 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background); 198 199 static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb) 200 { 201 return (loff_t)(eb - d->eb_data) * d->mtd->erasesize; 202 } 203 204 static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb) 205 { 206 unsigned int oldidx; 207 struct mtdswap_tree *tp; 208 209 if (eb->root) { 210 tp = container_of(eb->root, struct mtdswap_tree, root); 211 oldidx = tp - &d->trees[0]; 212 213 d->trees[oldidx].count--; 214 rb_erase(&eb->rb, eb->root); 215 } 216 } 217 218 static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb) 219 { 220 struct rb_node **p, *parent = NULL; 221 struct swap_eb *cur; 222 223 p = &root->rb_node; 224 while (*p) { 225 parent = *p; 226 cur = rb_entry(parent, struct swap_eb, rb); 227 if (eb->erase_count > cur->erase_count) 228 p = &(*p)->rb_right; 229 else 230 p = &(*p)->rb_left; 231 } 232 233 rb_link_node(&eb->rb, parent, p); 234 rb_insert_color(&eb->rb, root); 235 } 236 237 static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx) 238 { 239 struct rb_root *root; 240 241 if (eb->root == &d->trees[idx].root) 242 return; 243 244 mtdswap_eb_detach(d, eb); 245 root = &d->trees[idx].root; 246 __mtdswap_rb_add(root, eb); 247 eb->root = root; 248 d->trees[idx].count++; 249 } 250 251 static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx) 252 { 253 struct rb_node *p; 254 unsigned int i; 255 256 p = rb_first(root); 257 i = 0; 258 while (i < idx && p) { 259 p = rb_next(p); 260 i++; 261 } 262 263 return p; 264 } 265 266 static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb) 267 { 268 int ret; 269 loff_t offset; 270 271 d->spare_eblks--; 272 eb->flags |= EBLOCK_BAD; 273 mtdswap_eb_detach(d, eb); 274 eb->root = NULL; 275 276 /* badblocks not supported */ 277 if (!mtd_can_have_bb(d->mtd)) 278 return 1; 279 280 offset = mtdswap_eb_offset(d, eb); 281 dev_warn(d->dev, "Marking bad block at %08llx\n", offset); 282 ret = mtd_block_markbad(d->mtd, offset); 283 284 if (ret) { 285 dev_warn(d->dev, "Mark block bad failed for block at %08llx " 286 "error %d\n", offset, ret); 287 return ret; 288 } 289 290 return 1; 291 292 } 293 294 static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb) 295 { 296 unsigned int marked = eb->flags & EBLOCK_FAILED; 297 struct swap_eb *curr_write = d->curr_write; 298 299 eb->flags |= EBLOCK_FAILED; 300 if (curr_write == eb) { 301 d->curr_write = NULL; 302 303 if (!marked && d->curr_write_pos != 0) { 304 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 305 return 0; 306 } 307 } 308 309 return mtdswap_handle_badblock(d, eb); 310 } 311 312 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from, 313 struct mtd_oob_ops *ops) 314 { 315 int ret = mtd_read_oob(d->mtd, from, ops); 316 317 if (mtd_is_bitflip(ret)) 318 return ret; 319 320 if (ret) { 321 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n", 322 ret, from); 323 return ret; 324 } 325 326 if (ops->oobretlen < ops->ooblen) { 327 dev_warn(d->dev, "Read OOB return short read (%zd bytes not " 328 "%zd) for block at %08llx\n", 329 ops->oobretlen, ops->ooblen, from); 330 return -EIO; 331 } 332 333 return 0; 334 } 335 336 static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb) 337 { 338 struct mtdswap_oobdata *data, *data2; 339 int ret; 340 loff_t offset; 341 struct mtd_oob_ops ops; 342 343 offset = mtdswap_eb_offset(d, eb); 344 345 /* Check first if the block is bad. */ 346 if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset)) 347 return MTDSWAP_SCANNED_BAD; 348 349 ops.ooblen = 2 * d->mtd->oobavail; 350 ops.oobbuf = d->oob_buf; 351 ops.ooboffs = 0; 352 ops.datbuf = NULL; 353 ops.mode = MTD_OPS_AUTO_OOB; 354 355 ret = mtdswap_read_oob(d, offset, &ops); 356 357 if (ret && !mtd_is_bitflip(ret)) 358 return ret; 359 360 data = (struct mtdswap_oobdata *)d->oob_buf; 361 data2 = (struct mtdswap_oobdata *) 362 (d->oob_buf + d->mtd->oobavail); 363 364 if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) { 365 eb->erase_count = le32_to_cpu(data->count); 366 if (mtd_is_bitflip(ret)) 367 ret = MTDSWAP_SCANNED_BITFLIP; 368 else { 369 if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY) 370 ret = MTDSWAP_SCANNED_DIRTY; 371 else 372 ret = MTDSWAP_SCANNED_CLEAN; 373 } 374 } else { 375 eb->flags |= EBLOCK_NOMAGIC; 376 ret = MTDSWAP_SCANNED_DIRTY; 377 } 378 379 return ret; 380 } 381 382 static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb, 383 u16 marker) 384 { 385 struct mtdswap_oobdata n; 386 int ret; 387 loff_t offset; 388 struct mtd_oob_ops ops; 389 390 ops.ooboffs = 0; 391 ops.oobbuf = (uint8_t *)&n; 392 ops.mode = MTD_OPS_AUTO_OOB; 393 ops.datbuf = NULL; 394 395 if (marker == MTDSWAP_TYPE_CLEAN) { 396 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN); 397 n.count = cpu_to_le32(eb->erase_count); 398 ops.ooblen = MTDSWAP_OOBSIZE; 399 offset = mtdswap_eb_offset(d, eb); 400 } else { 401 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY); 402 ops.ooblen = sizeof(n.magic); 403 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize; 404 } 405 406 ret = mtd_write_oob(d->mtd, offset, &ops); 407 408 if (ret) { 409 dev_warn(d->dev, "Write OOB failed for block at %08llx " 410 "error %d\n", offset, ret); 411 if (ret == -EIO || mtd_is_eccerr(ret)) 412 mtdswap_handle_write_error(d, eb); 413 return ret; 414 } 415 416 if (ops.oobretlen != ops.ooblen) { 417 dev_warn(d->dev, "Short OOB write for block at %08llx: " 418 "%zd not %zd\n", 419 offset, ops.oobretlen, ops.ooblen); 420 return ret; 421 } 422 423 return 0; 424 } 425 426 /* 427 * Are there any erase blocks without MAGIC_CLEAN header, presumably 428 * because power was cut off after erase but before header write? We 429 * need to guestimate the erase count. 430 */ 431 static void mtdswap_check_counts(struct mtdswap_dev *d) 432 { 433 struct rb_root hist_root = RB_ROOT; 434 struct rb_node *medrb; 435 struct swap_eb *eb; 436 unsigned int i, cnt, median; 437 438 cnt = 0; 439 for (i = 0; i < d->eblks; i++) { 440 eb = d->eb_data + i; 441 442 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 443 continue; 444 445 __mtdswap_rb_add(&hist_root, eb); 446 cnt++; 447 } 448 449 if (cnt == 0) 450 return; 451 452 medrb = mtdswap_rb_index(&hist_root, cnt / 2); 453 median = rb_entry(medrb, struct swap_eb, rb)->erase_count; 454 455 d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root); 456 457 for (i = 0; i < d->eblks; i++) { 458 eb = d->eb_data + i; 459 460 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR)) 461 eb->erase_count = median; 462 463 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 464 continue; 465 466 rb_erase(&eb->rb, &hist_root); 467 } 468 } 469 470 static void mtdswap_scan_eblks(struct mtdswap_dev *d) 471 { 472 int status; 473 unsigned int i, idx; 474 struct swap_eb *eb; 475 476 for (i = 0; i < d->eblks; i++) { 477 eb = d->eb_data + i; 478 479 status = mtdswap_read_markers(d, eb); 480 if (status < 0) 481 eb->flags |= EBLOCK_READERR; 482 else if (status == MTDSWAP_SCANNED_BAD) { 483 eb->flags |= EBLOCK_BAD; 484 continue; 485 } 486 487 switch (status) { 488 case MTDSWAP_SCANNED_CLEAN: 489 idx = MTDSWAP_CLEAN; 490 break; 491 case MTDSWAP_SCANNED_DIRTY: 492 case MTDSWAP_SCANNED_BITFLIP: 493 idx = MTDSWAP_DIRTY; 494 break; 495 default: 496 idx = MTDSWAP_FAILING; 497 } 498 499 eb->flags |= (idx << EBLOCK_IDX_SHIFT); 500 } 501 502 mtdswap_check_counts(d); 503 504 for (i = 0; i < d->eblks; i++) { 505 eb = d->eb_data + i; 506 507 if (eb->flags & EBLOCK_BAD) 508 continue; 509 510 idx = eb->flags >> EBLOCK_IDX_SHIFT; 511 mtdswap_rb_add(d, eb, idx); 512 } 513 } 514 515 /* 516 * Place eblk into a tree corresponding to its number of active blocks 517 * it contains. 518 */ 519 static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb) 520 { 521 unsigned int weight = eb->active_count; 522 unsigned int maxweight = d->pages_per_eblk; 523 524 if (eb == d->curr_write) 525 return; 526 527 if (eb->flags & EBLOCK_BITFLIP) 528 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 529 else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED)) 530 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 531 if (weight == maxweight) 532 mtdswap_rb_add(d, eb, MTDSWAP_USED); 533 else if (weight == 0) 534 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 535 else if (weight > (maxweight/2)) 536 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG); 537 else 538 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG); 539 } 540 541 542 static void mtdswap_erase_callback(struct erase_info *done) 543 { 544 wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv; 545 wake_up(wait_q); 546 } 547 548 static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb) 549 { 550 struct mtd_info *mtd = d->mtd; 551 struct erase_info erase; 552 wait_queue_head_t wq; 553 unsigned int retries = 0; 554 int ret; 555 556 eb->erase_count++; 557 if (eb->erase_count > d->max_erase_count) 558 d->max_erase_count = eb->erase_count; 559 560 retry: 561 init_waitqueue_head(&wq); 562 memset(&erase, 0, sizeof(struct erase_info)); 563 564 erase.mtd = mtd; 565 erase.callback = mtdswap_erase_callback; 566 erase.addr = mtdswap_eb_offset(d, eb); 567 erase.len = mtd->erasesize; 568 erase.priv = (u_long)&wq; 569 570 ret = mtd_erase(mtd, &erase); 571 if (ret) { 572 if (retries++ < MTDSWAP_ERASE_RETRIES) { 573 dev_warn(d->dev, 574 "erase of erase block %#llx on %s failed", 575 erase.addr, mtd->name); 576 yield(); 577 goto retry; 578 } 579 580 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n", 581 erase.addr, mtd->name); 582 583 mtdswap_handle_badblock(d, eb); 584 return -EIO; 585 } 586 587 ret = wait_event_interruptible(wq, erase.state == MTD_ERASE_DONE || 588 erase.state == MTD_ERASE_FAILED); 589 if (ret) { 590 dev_err(d->dev, "Interrupted erase block %#llx erasure on %s\n", 591 erase.addr, mtd->name); 592 return -EINTR; 593 } 594 595 if (erase.state == MTD_ERASE_FAILED) { 596 if (retries++ < MTDSWAP_ERASE_RETRIES) { 597 dev_warn(d->dev, 598 "erase of erase block %#llx on %s failed", 599 erase.addr, mtd->name); 600 yield(); 601 goto retry; 602 } 603 604 mtdswap_handle_badblock(d, eb); 605 return -EIO; 606 } 607 608 return 0; 609 } 610 611 static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page, 612 unsigned int *block) 613 { 614 int ret; 615 struct swap_eb *old_eb = d->curr_write; 616 struct rb_root *clean_root; 617 struct swap_eb *eb; 618 619 if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) { 620 do { 621 if (TREE_EMPTY(d, CLEAN)) 622 return -ENOSPC; 623 624 clean_root = TREE_ROOT(d, CLEAN); 625 eb = rb_entry(rb_first(clean_root), struct swap_eb, rb); 626 rb_erase(&eb->rb, clean_root); 627 eb->root = NULL; 628 TREE_COUNT(d, CLEAN)--; 629 630 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY); 631 } while (ret == -EIO || mtd_is_eccerr(ret)); 632 633 if (ret) 634 return ret; 635 636 d->curr_write_pos = 0; 637 d->curr_write = eb; 638 if (old_eb) 639 mtdswap_store_eb(d, old_eb); 640 } 641 642 *block = (d->curr_write - d->eb_data) * d->pages_per_eblk + 643 d->curr_write_pos; 644 645 d->curr_write->active_count++; 646 d->revmap[*block] = page; 647 d->curr_write_pos++; 648 649 return 0; 650 } 651 652 static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d) 653 { 654 return TREE_COUNT(d, CLEAN) * d->pages_per_eblk + 655 d->pages_per_eblk - d->curr_write_pos; 656 } 657 658 static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d) 659 { 660 return mtdswap_free_page_cnt(d) > d->pages_per_eblk; 661 } 662 663 static int mtdswap_write_block(struct mtdswap_dev *d, char *buf, 664 unsigned int page, unsigned int *bp, int gc_context) 665 { 666 struct mtd_info *mtd = d->mtd; 667 struct swap_eb *eb; 668 size_t retlen; 669 loff_t writepos; 670 int ret; 671 672 retry: 673 if (!gc_context) 674 while (!mtdswap_enough_free_pages(d)) 675 if (mtdswap_gc(d, 0) > 0) 676 return -ENOSPC; 677 678 ret = mtdswap_map_free_block(d, page, bp); 679 eb = d->eb_data + (*bp / d->pages_per_eblk); 680 681 if (ret == -EIO || mtd_is_eccerr(ret)) { 682 d->curr_write = NULL; 683 eb->active_count--; 684 d->revmap[*bp] = PAGE_UNDEF; 685 goto retry; 686 } 687 688 if (ret < 0) 689 return ret; 690 691 writepos = (loff_t)*bp << PAGE_SHIFT; 692 ret = mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf); 693 if (ret == -EIO || mtd_is_eccerr(ret)) { 694 d->curr_write_pos--; 695 eb->active_count--; 696 d->revmap[*bp] = PAGE_UNDEF; 697 mtdswap_handle_write_error(d, eb); 698 goto retry; 699 } 700 701 if (ret < 0) { 702 dev_err(d->dev, "Write to MTD device failed: %d (%zd written)", 703 ret, retlen); 704 goto err; 705 } 706 707 if (retlen != PAGE_SIZE) { 708 dev_err(d->dev, "Short write to MTD device: %zd written", 709 retlen); 710 ret = -EIO; 711 goto err; 712 } 713 714 return ret; 715 716 err: 717 d->curr_write_pos--; 718 eb->active_count--; 719 d->revmap[*bp] = PAGE_UNDEF; 720 721 return ret; 722 } 723 724 static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock, 725 unsigned int *newblock) 726 { 727 struct mtd_info *mtd = d->mtd; 728 struct swap_eb *eb, *oldeb; 729 int ret; 730 size_t retlen; 731 unsigned int page, retries; 732 loff_t readpos; 733 734 page = d->revmap[oldblock]; 735 readpos = (loff_t) oldblock << PAGE_SHIFT; 736 retries = 0; 737 738 retry: 739 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf); 740 741 if (ret < 0 && !mtd_is_bitflip(ret)) { 742 oldeb = d->eb_data + oldblock / d->pages_per_eblk; 743 oldeb->flags |= EBLOCK_READERR; 744 745 dev_err(d->dev, "Read Error: %d (block %u)\n", ret, 746 oldblock); 747 retries++; 748 if (retries < MTDSWAP_IO_RETRIES) 749 goto retry; 750 751 goto read_error; 752 } 753 754 if (retlen != PAGE_SIZE) { 755 dev_err(d->dev, "Short read: %zd (block %u)\n", retlen, 756 oldblock); 757 ret = -EIO; 758 goto read_error; 759 } 760 761 ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1); 762 if (ret < 0) { 763 d->page_data[page] = BLOCK_ERROR; 764 dev_err(d->dev, "Write error: %d\n", ret); 765 return ret; 766 } 767 768 eb = d->eb_data + *newblock / d->pages_per_eblk; 769 d->page_data[page] = *newblock; 770 d->revmap[oldblock] = PAGE_UNDEF; 771 eb = d->eb_data + oldblock / d->pages_per_eblk; 772 eb->active_count--; 773 774 return 0; 775 776 read_error: 777 d->page_data[page] = BLOCK_ERROR; 778 d->revmap[oldblock] = PAGE_UNDEF; 779 return ret; 780 } 781 782 static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb) 783 { 784 unsigned int i, block, eblk_base, newblock; 785 int ret, errcode; 786 787 errcode = 0; 788 eblk_base = (eb - d->eb_data) * d->pages_per_eblk; 789 790 for (i = 0; i < d->pages_per_eblk; i++) { 791 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 792 return -ENOSPC; 793 794 block = eblk_base + i; 795 if (d->revmap[block] == PAGE_UNDEF) 796 continue; 797 798 ret = mtdswap_move_block(d, block, &newblock); 799 if (ret < 0 && !errcode) 800 errcode = ret; 801 } 802 803 return errcode; 804 } 805 806 static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d) 807 { 808 int idx, stopat; 809 810 if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_TRESHOLD) 811 stopat = MTDSWAP_LOWFRAG; 812 else 813 stopat = MTDSWAP_HIFRAG; 814 815 for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--) 816 if (d->trees[idx].root.rb_node != NULL) 817 return idx; 818 819 return -1; 820 } 821 822 static int mtdswap_wlfreq(unsigned int maxdiff) 823 { 824 unsigned int h, x, y, dist, base; 825 826 /* 827 * Calculate linear ramp down from f1 to f2 when maxdiff goes from 828 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE. Similar 829 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE. 830 */ 831 832 dist = maxdiff - MAX_ERASE_DIFF; 833 if (dist > COLLECT_NONDIRTY_BASE) 834 dist = COLLECT_NONDIRTY_BASE; 835 836 /* 837 * Modelling the slop as right angular triangle with base 838 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is 839 * equal to the ratio h/base. 840 */ 841 h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2; 842 base = COLLECT_NONDIRTY_BASE; 843 844 x = dist - base; 845 y = (x * h + base / 2) / base; 846 847 return COLLECT_NONDIRTY_FREQ2 + y; 848 } 849 850 static int mtdswap_choose_wl_tree(struct mtdswap_dev *d) 851 { 852 static unsigned int pick_cnt; 853 unsigned int i, idx = -1, wear, max; 854 struct rb_root *root; 855 856 max = 0; 857 for (i = 0; i <= MTDSWAP_DIRTY; i++) { 858 root = &d->trees[i].root; 859 if (root->rb_node == NULL) 860 continue; 861 862 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root); 863 if (wear > max) { 864 max = wear; 865 idx = i; 866 } 867 } 868 869 if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) { 870 pick_cnt = 0; 871 return idx; 872 } 873 874 pick_cnt++; 875 return -1; 876 } 877 878 static int mtdswap_choose_gc_tree(struct mtdswap_dev *d, 879 unsigned int background) 880 { 881 int idx; 882 883 if (TREE_NONEMPTY(d, FAILING) && 884 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY)))) 885 return MTDSWAP_FAILING; 886 887 idx = mtdswap_choose_wl_tree(d); 888 if (idx >= MTDSWAP_CLEAN) 889 return idx; 890 891 return __mtdswap_choose_gc_tree(d); 892 } 893 894 static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d, 895 unsigned int background) 896 { 897 struct rb_root *rp = NULL; 898 struct swap_eb *eb = NULL; 899 int idx; 900 901 if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD && 902 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING)) 903 return NULL; 904 905 idx = mtdswap_choose_gc_tree(d, background); 906 if (idx < 0) 907 return NULL; 908 909 rp = &d->trees[idx].root; 910 eb = rb_entry(rb_first(rp), struct swap_eb, rb); 911 912 rb_erase(&eb->rb, rp); 913 eb->root = NULL; 914 d->trees[idx].count--; 915 return eb; 916 } 917 918 static unsigned int mtdswap_test_patt(unsigned int i) 919 { 920 return i % 2 ? 0x55555555 : 0xAAAAAAAA; 921 } 922 923 static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d, 924 struct swap_eb *eb) 925 { 926 struct mtd_info *mtd = d->mtd; 927 unsigned int test, i, j, patt, mtd_pages; 928 loff_t base, pos; 929 unsigned int *p1 = (unsigned int *)d->page_buf; 930 unsigned char *p2 = (unsigned char *)d->oob_buf; 931 struct mtd_oob_ops ops; 932 int ret; 933 934 ops.mode = MTD_OPS_AUTO_OOB; 935 ops.len = mtd->writesize; 936 ops.ooblen = mtd->oobavail; 937 ops.ooboffs = 0; 938 ops.datbuf = d->page_buf; 939 ops.oobbuf = d->oob_buf; 940 base = mtdswap_eb_offset(d, eb); 941 mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize; 942 943 for (test = 0; test < 2; test++) { 944 pos = base; 945 for (i = 0; i < mtd_pages; i++) { 946 patt = mtdswap_test_patt(test + i); 947 memset(d->page_buf, patt, mtd->writesize); 948 memset(d->oob_buf, patt, mtd->oobavail); 949 ret = mtd_write_oob(mtd, pos, &ops); 950 if (ret) 951 goto error; 952 953 pos += mtd->writesize; 954 } 955 956 pos = base; 957 for (i = 0; i < mtd_pages; i++) { 958 ret = mtd_read_oob(mtd, pos, &ops); 959 if (ret) 960 goto error; 961 962 patt = mtdswap_test_patt(test + i); 963 for (j = 0; j < mtd->writesize/sizeof(int); j++) 964 if (p1[j] != patt) 965 goto error; 966 967 for (j = 0; j < mtd->oobavail; j++) 968 if (p2[j] != (unsigned char)patt) 969 goto error; 970 971 pos += mtd->writesize; 972 } 973 974 ret = mtdswap_erase_block(d, eb); 975 if (ret) 976 goto error; 977 } 978 979 eb->flags &= ~EBLOCK_READERR; 980 return 1; 981 982 error: 983 mtdswap_handle_badblock(d, eb); 984 return 0; 985 } 986 987 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background) 988 { 989 struct swap_eb *eb; 990 int ret; 991 992 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 993 return 1; 994 995 eb = mtdswap_pick_gc_eblk(d, background); 996 if (!eb) 997 return 1; 998 999 ret = mtdswap_gc_eblock(d, eb); 1000 if (ret == -ENOSPC) 1001 return 1; 1002 1003 if (eb->flags & EBLOCK_FAILED) { 1004 mtdswap_handle_badblock(d, eb); 1005 return 0; 1006 } 1007 1008 eb->flags &= ~EBLOCK_BITFLIP; 1009 ret = mtdswap_erase_block(d, eb); 1010 if ((eb->flags & EBLOCK_READERR) && 1011 (ret || !mtdswap_eblk_passes(d, eb))) 1012 return 0; 1013 1014 if (ret == 0) 1015 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN); 1016 1017 if (ret == 0) 1018 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN); 1019 else if (ret != -EIO && !mtd_is_eccerr(ret)) 1020 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 1021 1022 return 0; 1023 } 1024 1025 static void mtdswap_background(struct mtd_blktrans_dev *dev) 1026 { 1027 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1028 int ret; 1029 1030 while (1) { 1031 ret = mtdswap_gc(d, 1); 1032 if (ret || mtd_blktrans_cease_background(dev)) 1033 return; 1034 } 1035 } 1036 1037 static void mtdswap_cleanup(struct mtdswap_dev *d) 1038 { 1039 vfree(d->eb_data); 1040 vfree(d->revmap); 1041 vfree(d->page_data); 1042 kfree(d->oob_buf); 1043 kfree(d->page_buf); 1044 } 1045 1046 static int mtdswap_flush(struct mtd_blktrans_dev *dev) 1047 { 1048 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1049 1050 mtd_sync(d->mtd); 1051 return 0; 1052 } 1053 1054 static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size) 1055 { 1056 loff_t offset; 1057 unsigned int badcnt; 1058 1059 badcnt = 0; 1060 1061 if (mtd_can_have_bb(mtd)) 1062 for (offset = 0; offset < size; offset += mtd->erasesize) 1063 if (mtd_block_isbad(mtd, offset)) 1064 badcnt++; 1065 1066 return badcnt; 1067 } 1068 1069 static int mtdswap_writesect(struct mtd_blktrans_dev *dev, 1070 unsigned long page, char *buf) 1071 { 1072 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1073 unsigned int newblock, mapped; 1074 struct swap_eb *eb; 1075 int ret; 1076 1077 d->sect_write_count++; 1078 1079 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 1080 return -ENOSPC; 1081 1082 if (header) { 1083 /* Ignore writes to the header page */ 1084 if (unlikely(page == 0)) 1085 return 0; 1086 1087 page--; 1088 } 1089 1090 mapped = d->page_data[page]; 1091 if (mapped <= BLOCK_MAX) { 1092 eb = d->eb_data + (mapped / d->pages_per_eblk); 1093 eb->active_count--; 1094 mtdswap_store_eb(d, eb); 1095 d->page_data[page] = BLOCK_UNDEF; 1096 d->revmap[mapped] = PAGE_UNDEF; 1097 } 1098 1099 ret = mtdswap_write_block(d, buf, page, &newblock, 0); 1100 d->mtd_write_count++; 1101 1102 if (ret < 0) 1103 return ret; 1104 1105 eb = d->eb_data + (newblock / d->pages_per_eblk); 1106 d->page_data[page] = newblock; 1107 1108 return 0; 1109 } 1110 1111 /* Provide a dummy swap header for the kernel */ 1112 static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf) 1113 { 1114 union swap_header *hd = (union swap_header *)(buf); 1115 1116 memset(buf, 0, PAGE_SIZE - 10); 1117 1118 hd->info.version = 1; 1119 hd->info.last_page = d->mbd_dev->size - 1; 1120 hd->info.nr_badpages = 0; 1121 1122 memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10); 1123 1124 return 0; 1125 } 1126 1127 static int mtdswap_readsect(struct mtd_blktrans_dev *dev, 1128 unsigned long page, char *buf) 1129 { 1130 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1131 struct mtd_info *mtd = d->mtd; 1132 unsigned int realblock, retries; 1133 loff_t readpos; 1134 struct swap_eb *eb; 1135 size_t retlen; 1136 int ret; 1137 1138 d->sect_read_count++; 1139 1140 if (header) { 1141 if (unlikely(page == 0)) 1142 return mtdswap_auto_header(d, buf); 1143 1144 page--; 1145 } 1146 1147 realblock = d->page_data[page]; 1148 if (realblock > BLOCK_MAX) { 1149 memset(buf, 0x0, PAGE_SIZE); 1150 if (realblock == BLOCK_UNDEF) 1151 return 0; 1152 else 1153 return -EIO; 1154 } 1155 1156 eb = d->eb_data + (realblock / d->pages_per_eblk); 1157 BUG_ON(d->revmap[realblock] == PAGE_UNDEF); 1158 1159 readpos = (loff_t)realblock << PAGE_SHIFT; 1160 retries = 0; 1161 1162 retry: 1163 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf); 1164 1165 d->mtd_read_count++; 1166 if (mtd_is_bitflip(ret)) { 1167 eb->flags |= EBLOCK_BITFLIP; 1168 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 1169 ret = 0; 1170 } 1171 1172 if (ret < 0) { 1173 dev_err(d->dev, "Read error %d\n", ret); 1174 eb->flags |= EBLOCK_READERR; 1175 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 1176 retries++; 1177 if (retries < MTDSWAP_IO_RETRIES) 1178 goto retry; 1179 1180 return ret; 1181 } 1182 1183 if (retlen != PAGE_SIZE) { 1184 dev_err(d->dev, "Short read %zd\n", retlen); 1185 return -EIO; 1186 } 1187 1188 return 0; 1189 } 1190 1191 static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first, 1192 unsigned nr_pages) 1193 { 1194 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1195 unsigned long page; 1196 struct swap_eb *eb; 1197 unsigned int mapped; 1198 1199 d->discard_count++; 1200 1201 for (page = first; page < first + nr_pages; page++) { 1202 mapped = d->page_data[page]; 1203 if (mapped <= BLOCK_MAX) { 1204 eb = d->eb_data + (mapped / d->pages_per_eblk); 1205 eb->active_count--; 1206 mtdswap_store_eb(d, eb); 1207 d->page_data[page] = BLOCK_UNDEF; 1208 d->revmap[mapped] = PAGE_UNDEF; 1209 d->discard_page_count++; 1210 } else if (mapped == BLOCK_ERROR) { 1211 d->page_data[page] = BLOCK_UNDEF; 1212 d->discard_page_count++; 1213 } 1214 } 1215 1216 return 0; 1217 } 1218 1219 static int mtdswap_show(struct seq_file *s, void *data) 1220 { 1221 struct mtdswap_dev *d = (struct mtdswap_dev *) s->private; 1222 unsigned long sum; 1223 unsigned int count[MTDSWAP_TREE_CNT]; 1224 unsigned int min[MTDSWAP_TREE_CNT]; 1225 unsigned int max[MTDSWAP_TREE_CNT]; 1226 unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages; 1227 uint64_t use_size; 1228 char *name[] = {"clean", "used", "low", "high", "dirty", "bitflip", 1229 "failing"}; 1230 1231 mutex_lock(&d->mbd_dev->lock); 1232 1233 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1234 struct rb_root *root = &d->trees[i].root; 1235 1236 if (root->rb_node) { 1237 count[i] = d->trees[i].count; 1238 min[i] = rb_entry(rb_first(root), struct swap_eb, 1239 rb)->erase_count; 1240 max[i] = rb_entry(rb_last(root), struct swap_eb, 1241 rb)->erase_count; 1242 } else 1243 count[i] = 0; 1244 } 1245 1246 if (d->curr_write) { 1247 cw = 1; 1248 cwp = d->curr_write_pos; 1249 cwecount = d->curr_write->erase_count; 1250 } 1251 1252 sum = 0; 1253 for (i = 0; i < d->eblks; i++) 1254 sum += d->eb_data[i].erase_count; 1255 1256 use_size = (uint64_t)d->eblks * d->mtd->erasesize; 1257 bb_cnt = mtdswap_badblocks(d->mtd, use_size); 1258 1259 mapped = 0; 1260 pages = d->mbd_dev->size; 1261 for (i = 0; i < pages; i++) 1262 if (d->page_data[i] != BLOCK_UNDEF) 1263 mapped++; 1264 1265 mutex_unlock(&d->mbd_dev->lock); 1266 1267 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1268 if (!count[i]) 1269 continue; 1270 1271 if (min[i] != max[i]) 1272 seq_printf(s, "%s:\t%5d erase blocks, erased min %d, " 1273 "max %d times\n", 1274 name[i], count[i], min[i], max[i]); 1275 else 1276 seq_printf(s, "%s:\t%5d erase blocks, all erased %d " 1277 "times\n", name[i], count[i], min[i]); 1278 } 1279 1280 if (bb_cnt) 1281 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt); 1282 1283 if (cw) 1284 seq_printf(s, "current erase block: %u pages used, %u free, " 1285 "erased %u times\n", 1286 cwp, d->pages_per_eblk - cwp, cwecount); 1287 1288 seq_printf(s, "total erasures: %lu\n", sum); 1289 1290 seq_puts(s, "\n"); 1291 1292 seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count); 1293 seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count); 1294 seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count); 1295 seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count); 1296 seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count); 1297 seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count); 1298 1299 seq_puts(s, "\n"); 1300 seq_printf(s, "total pages: %u\n", pages); 1301 seq_printf(s, "pages mapped: %u\n", mapped); 1302 1303 return 0; 1304 } 1305 1306 static int mtdswap_open(struct inode *inode, struct file *file) 1307 { 1308 return single_open(file, mtdswap_show, inode->i_private); 1309 } 1310 1311 static const struct file_operations mtdswap_fops = { 1312 .open = mtdswap_open, 1313 .read = seq_read, 1314 .llseek = seq_lseek, 1315 .release = single_release, 1316 }; 1317 1318 static int mtdswap_add_debugfs(struct mtdswap_dev *d) 1319 { 1320 struct gendisk *gd = d->mbd_dev->disk; 1321 struct device *dev = disk_to_dev(gd); 1322 1323 struct dentry *root; 1324 struct dentry *dent; 1325 1326 root = debugfs_create_dir(gd->disk_name, NULL); 1327 if (IS_ERR(root)) 1328 return 0; 1329 1330 if (!root) { 1331 dev_err(dev, "failed to initialize debugfs\n"); 1332 return -1; 1333 } 1334 1335 d->debugfs_root = root; 1336 1337 dent = debugfs_create_file("stats", S_IRUSR, root, d, 1338 &mtdswap_fops); 1339 if (!dent) { 1340 dev_err(d->dev, "debugfs_create_file failed\n"); 1341 debugfs_remove_recursive(root); 1342 d->debugfs_root = NULL; 1343 return -1; 1344 } 1345 1346 return 0; 1347 } 1348 1349 static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks, 1350 unsigned int spare_cnt) 1351 { 1352 struct mtd_info *mtd = d->mbd_dev->mtd; 1353 unsigned int i, eblk_bytes, pages, blocks; 1354 int ret = -ENOMEM; 1355 1356 d->mtd = mtd; 1357 d->eblks = eblocks; 1358 d->spare_eblks = spare_cnt; 1359 d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT; 1360 1361 pages = d->mbd_dev->size; 1362 blocks = eblocks * d->pages_per_eblk; 1363 1364 for (i = 0; i < MTDSWAP_TREE_CNT; i++) 1365 d->trees[i].root = RB_ROOT; 1366 1367 d->page_data = vmalloc(sizeof(int)*pages); 1368 if (!d->page_data) 1369 goto page_data_fail; 1370 1371 d->revmap = vmalloc(sizeof(int)*blocks); 1372 if (!d->revmap) 1373 goto revmap_fail; 1374 1375 eblk_bytes = sizeof(struct swap_eb)*d->eblks; 1376 d->eb_data = vzalloc(eblk_bytes); 1377 if (!d->eb_data) 1378 goto eb_data_fail; 1379 1380 for (i = 0; i < pages; i++) 1381 d->page_data[i] = BLOCK_UNDEF; 1382 1383 for (i = 0; i < blocks; i++) 1384 d->revmap[i] = PAGE_UNDEF; 1385 1386 d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1387 if (!d->page_buf) 1388 goto page_buf_fail; 1389 1390 d->oob_buf = kmalloc(2 * mtd->oobavail, GFP_KERNEL); 1391 if (!d->oob_buf) 1392 goto oob_buf_fail; 1393 1394 mtdswap_scan_eblks(d); 1395 1396 return 0; 1397 1398 oob_buf_fail: 1399 kfree(d->page_buf); 1400 page_buf_fail: 1401 vfree(d->eb_data); 1402 eb_data_fail: 1403 vfree(d->revmap); 1404 revmap_fail: 1405 vfree(d->page_data); 1406 page_data_fail: 1407 printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret); 1408 return ret; 1409 } 1410 1411 static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) 1412 { 1413 struct mtdswap_dev *d; 1414 struct mtd_blktrans_dev *mbd_dev; 1415 char *parts; 1416 char *this_opt; 1417 unsigned long part; 1418 unsigned int eblocks, eavailable, bad_blocks, spare_cnt; 1419 uint64_t swap_size, use_size, size_limit; 1420 int ret; 1421 1422 parts = &partitions[0]; 1423 if (!*parts) 1424 return; 1425 1426 while ((this_opt = strsep(&parts, ",")) != NULL) { 1427 if (kstrtoul(this_opt, 0, &part) < 0) 1428 return; 1429 1430 if (mtd->index == part) 1431 break; 1432 } 1433 1434 if (mtd->index != part) 1435 return; 1436 1437 if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) { 1438 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE " 1439 "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE); 1440 return; 1441 } 1442 1443 if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) { 1444 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size" 1445 " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize); 1446 return; 1447 } 1448 1449 if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) { 1450 printk(KERN_ERR "%s: Not enough free bytes in OOB, " 1451 "%d available, %zu needed.\n", 1452 MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE); 1453 return; 1454 } 1455 1456 if (spare_eblocks > 100) 1457 spare_eblocks = 100; 1458 1459 use_size = mtd->size; 1460 size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE; 1461 1462 if (mtd->size > size_limit) { 1463 printk(KERN_WARNING "%s: Device too large. Limiting size to " 1464 "%llu bytes\n", MTDSWAP_PREFIX, size_limit); 1465 use_size = size_limit; 1466 } 1467 1468 eblocks = mtd_div_by_eb(use_size, mtd); 1469 use_size = (uint64_t)eblocks * mtd->erasesize; 1470 bad_blocks = mtdswap_badblocks(mtd, use_size); 1471 eavailable = eblocks - bad_blocks; 1472 1473 if (eavailable < MIN_ERASE_BLOCKS) { 1474 printk(KERN_ERR "%s: Not enough erase blocks. %u available, " 1475 "%d needed\n", MTDSWAP_PREFIX, eavailable, 1476 MIN_ERASE_BLOCKS); 1477 return; 1478 } 1479 1480 spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100); 1481 1482 if (spare_cnt < MIN_SPARE_EBLOCKS) 1483 spare_cnt = MIN_SPARE_EBLOCKS; 1484 1485 if (spare_cnt > eavailable - 1) 1486 spare_cnt = eavailable - 1; 1487 1488 swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize + 1489 (header ? PAGE_SIZE : 0); 1490 1491 printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, " 1492 "%u spare, %u bad blocks\n", 1493 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks); 1494 1495 d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL); 1496 if (!d) 1497 return; 1498 1499 mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL); 1500 if (!mbd_dev) { 1501 kfree(d); 1502 return; 1503 } 1504 1505 d->mbd_dev = mbd_dev; 1506 mbd_dev->priv = d; 1507 1508 mbd_dev->mtd = mtd; 1509 mbd_dev->devnum = mtd->index; 1510 mbd_dev->size = swap_size >> PAGE_SHIFT; 1511 mbd_dev->tr = tr; 1512 1513 if (!(mtd->flags & MTD_WRITEABLE)) 1514 mbd_dev->readonly = 1; 1515 1516 if (mtdswap_init(d, eblocks, spare_cnt) < 0) 1517 goto init_failed; 1518 1519 if (add_mtd_blktrans_dev(mbd_dev) < 0) 1520 goto cleanup; 1521 1522 d->dev = disk_to_dev(mbd_dev->disk); 1523 1524 ret = mtdswap_add_debugfs(d); 1525 if (ret < 0) 1526 goto debugfs_failed; 1527 1528 return; 1529 1530 debugfs_failed: 1531 del_mtd_blktrans_dev(mbd_dev); 1532 1533 cleanup: 1534 mtdswap_cleanup(d); 1535 1536 init_failed: 1537 kfree(mbd_dev); 1538 kfree(d); 1539 } 1540 1541 static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev) 1542 { 1543 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1544 1545 debugfs_remove_recursive(d->debugfs_root); 1546 del_mtd_blktrans_dev(dev); 1547 mtdswap_cleanup(d); 1548 kfree(d); 1549 } 1550 1551 static struct mtd_blktrans_ops mtdswap_ops = { 1552 .name = "mtdswap", 1553 .major = 0, 1554 .part_bits = 0, 1555 .blksize = PAGE_SIZE, 1556 .flush = mtdswap_flush, 1557 .readsect = mtdswap_readsect, 1558 .writesect = mtdswap_writesect, 1559 .discard = mtdswap_discard, 1560 .background = mtdswap_background, 1561 .add_mtd = mtdswap_add_mtd, 1562 .remove_dev = mtdswap_remove_dev, 1563 .owner = THIS_MODULE, 1564 }; 1565 1566 static int __init mtdswap_modinit(void) 1567 { 1568 return register_mtd_blktrans(&mtdswap_ops); 1569 } 1570 1571 static void __exit mtdswap_modexit(void) 1572 { 1573 deregister_mtd_blktrans(&mtdswap_ops); 1574 } 1575 1576 module_init(mtdswap_modinit); 1577 module_exit(mtdswap_modexit); 1578 1579 1580 MODULE_LICENSE("GPL"); 1581 MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); 1582 MODULE_DESCRIPTION("Block device access to an MTD suitable for using as " 1583 "swap space"); 1584