1 /* 2 * Swap block device support for MTDs 3 * Turns an MTD device into a swap device with block wear leveling 4 * 5 * Copyright © 2007,2011 Nokia Corporation. All rights reserved. 6 * 7 * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com> 8 * 9 * Based on Richard Purdie's earlier implementation in 2007. Background 10 * support and lock-less operation written by Adrian Hunter. 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * version 2 as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, but 17 * WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 24 * 02110-1301 USA 25 */ 26 27 #include <linux/kernel.h> 28 #include <linux/module.h> 29 #include <linux/mtd/mtd.h> 30 #include <linux/mtd/blktrans.h> 31 #include <linux/rbtree.h> 32 #include <linux/sched.h> 33 #include <linux/slab.h> 34 #include <linux/vmalloc.h> 35 #include <linux/genhd.h> 36 #include <linux/swap.h> 37 #include <linux/debugfs.h> 38 #include <linux/seq_file.h> 39 #include <linux/device.h> 40 #include <linux/math64.h> 41 42 #define MTDSWAP_PREFIX "mtdswap" 43 44 /* 45 * The number of free eraseblocks when GC should stop 46 */ 47 #define CLEAN_BLOCK_THRESHOLD 20 48 49 /* 50 * Number of free eraseblocks below which GC can also collect low frag 51 * blocks. 52 */ 53 #define LOW_FRAG_GC_THRESHOLD 5 54 55 /* 56 * Wear level cost amortization. We want to do wear leveling on the background 57 * without disturbing gc too much. This is made by defining max GC frequency. 58 * Frequency value 6 means 1/6 of the GC passes will pick an erase block based 59 * on the biggest wear difference rather than the biggest dirtiness. 60 * 61 * The lower freq2 should be chosen so that it makes sure the maximum erase 62 * difference will decrease even if a malicious application is deliberately 63 * trying to make erase differences large. 64 */ 65 #define MAX_ERASE_DIFF 4000 66 #define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF 67 #define COLLECT_NONDIRTY_FREQ1 6 68 #define COLLECT_NONDIRTY_FREQ2 4 69 70 #define PAGE_UNDEF UINT_MAX 71 #define BLOCK_UNDEF UINT_MAX 72 #define BLOCK_ERROR (UINT_MAX - 1) 73 #define BLOCK_MAX (UINT_MAX - 2) 74 75 #define EBLOCK_BAD (1 << 0) 76 #define EBLOCK_NOMAGIC (1 << 1) 77 #define EBLOCK_BITFLIP (1 << 2) 78 #define EBLOCK_FAILED (1 << 3) 79 #define EBLOCK_READERR (1 << 4) 80 #define EBLOCK_IDX_SHIFT 5 81 82 struct swap_eb { 83 struct rb_node rb; 84 struct rb_root *root; 85 86 unsigned int flags; 87 unsigned int active_count; 88 unsigned int erase_count; 89 unsigned int pad; /* speeds up pointer decrement */ 90 }; 91 92 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \ 93 rb)->erase_count) 94 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \ 95 rb)->erase_count) 96 97 struct mtdswap_tree { 98 struct rb_root root; 99 unsigned int count; 100 }; 101 102 enum { 103 MTDSWAP_CLEAN, 104 MTDSWAP_USED, 105 MTDSWAP_LOWFRAG, 106 MTDSWAP_HIFRAG, 107 MTDSWAP_DIRTY, 108 MTDSWAP_BITFLIP, 109 MTDSWAP_FAILING, 110 MTDSWAP_TREE_CNT, 111 }; 112 113 struct mtdswap_dev { 114 struct mtd_blktrans_dev *mbd_dev; 115 struct mtd_info *mtd; 116 struct device *dev; 117 118 unsigned int *page_data; 119 unsigned int *revmap; 120 121 unsigned int eblks; 122 unsigned int spare_eblks; 123 unsigned int pages_per_eblk; 124 unsigned int max_erase_count; 125 struct swap_eb *eb_data; 126 127 struct mtdswap_tree trees[MTDSWAP_TREE_CNT]; 128 129 unsigned long long sect_read_count; 130 unsigned long long sect_write_count; 131 unsigned long long mtd_write_count; 132 unsigned long long mtd_read_count; 133 unsigned long long discard_count; 134 unsigned long long discard_page_count; 135 136 unsigned int curr_write_pos; 137 struct swap_eb *curr_write; 138 139 char *page_buf; 140 char *oob_buf; 141 }; 142 143 struct mtdswap_oobdata { 144 __le16 magic; 145 __le32 count; 146 } __packed; 147 148 #define MTDSWAP_MAGIC_CLEAN 0x2095 149 #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) 150 #define MTDSWAP_TYPE_CLEAN 0 151 #define MTDSWAP_TYPE_DIRTY 1 152 #define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata) 153 154 #define MTDSWAP_ERASE_RETRIES 3 /* Before marking erase block bad */ 155 #define MTDSWAP_IO_RETRIES 3 156 157 enum { 158 MTDSWAP_SCANNED_CLEAN, 159 MTDSWAP_SCANNED_DIRTY, 160 MTDSWAP_SCANNED_BITFLIP, 161 MTDSWAP_SCANNED_BAD, 162 }; 163 164 /* 165 * In the worst case mtdswap_writesect() has allocated the last clean 166 * page from the current block and is then pre-empted by the GC 167 * thread. The thread can consume a full erase block when moving a 168 * block. 169 */ 170 #define MIN_SPARE_EBLOCKS 2 171 #define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1) 172 173 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root) 174 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL) 175 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name)) 176 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count) 177 178 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv) 179 180 static char partitions[128] = ""; 181 module_param_string(partitions, partitions, sizeof(partitions), 0444); 182 MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap " 183 "partitions=\"1,3,5\""); 184 185 static unsigned int spare_eblocks = 10; 186 module_param(spare_eblocks, uint, 0444); 187 MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for " 188 "garbage collection (default 10%)"); 189 190 static bool header; /* false */ 191 module_param(header, bool, 0444); 192 MODULE_PARM_DESC(header, 193 "Include builtin swap header (default 0, without header)"); 194 195 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background); 196 197 static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb) 198 { 199 return (loff_t)(eb - d->eb_data) * d->mtd->erasesize; 200 } 201 202 static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb) 203 { 204 unsigned int oldidx; 205 struct mtdswap_tree *tp; 206 207 if (eb->root) { 208 tp = container_of(eb->root, struct mtdswap_tree, root); 209 oldidx = tp - &d->trees[0]; 210 211 d->trees[oldidx].count--; 212 rb_erase(&eb->rb, eb->root); 213 } 214 } 215 216 static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb) 217 { 218 struct rb_node **p, *parent = NULL; 219 struct swap_eb *cur; 220 221 p = &root->rb_node; 222 while (*p) { 223 parent = *p; 224 cur = rb_entry(parent, struct swap_eb, rb); 225 if (eb->erase_count > cur->erase_count) 226 p = &(*p)->rb_right; 227 else 228 p = &(*p)->rb_left; 229 } 230 231 rb_link_node(&eb->rb, parent, p); 232 rb_insert_color(&eb->rb, root); 233 } 234 235 static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx) 236 { 237 struct rb_root *root; 238 239 if (eb->root == &d->trees[idx].root) 240 return; 241 242 mtdswap_eb_detach(d, eb); 243 root = &d->trees[idx].root; 244 __mtdswap_rb_add(root, eb); 245 eb->root = root; 246 d->trees[idx].count++; 247 } 248 249 static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx) 250 { 251 struct rb_node *p; 252 unsigned int i; 253 254 p = rb_first(root); 255 i = 0; 256 while (i < idx && p) { 257 p = rb_next(p); 258 i++; 259 } 260 261 return p; 262 } 263 264 static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb) 265 { 266 int ret; 267 loff_t offset; 268 269 d->spare_eblks--; 270 eb->flags |= EBLOCK_BAD; 271 mtdswap_eb_detach(d, eb); 272 eb->root = NULL; 273 274 /* badblocks not supported */ 275 if (!mtd_can_have_bb(d->mtd)) 276 return 1; 277 278 offset = mtdswap_eb_offset(d, eb); 279 dev_warn(d->dev, "Marking bad block at %08llx\n", offset); 280 ret = mtd_block_markbad(d->mtd, offset); 281 282 if (ret) { 283 dev_warn(d->dev, "Mark block bad failed for block at %08llx " 284 "error %d\n", offset, ret); 285 return ret; 286 } 287 288 return 1; 289 290 } 291 292 static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb) 293 { 294 unsigned int marked = eb->flags & EBLOCK_FAILED; 295 struct swap_eb *curr_write = d->curr_write; 296 297 eb->flags |= EBLOCK_FAILED; 298 if (curr_write == eb) { 299 d->curr_write = NULL; 300 301 if (!marked && d->curr_write_pos != 0) { 302 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 303 return 0; 304 } 305 } 306 307 return mtdswap_handle_badblock(d, eb); 308 } 309 310 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from, 311 struct mtd_oob_ops *ops) 312 { 313 int ret = mtd_read_oob(d->mtd, from, ops); 314 315 if (mtd_is_bitflip(ret)) 316 return ret; 317 318 if (ret) { 319 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n", 320 ret, from); 321 return ret; 322 } 323 324 if (ops->oobretlen < ops->ooblen) { 325 dev_warn(d->dev, "Read OOB return short read (%zd bytes not " 326 "%zd) for block at %08llx\n", 327 ops->oobretlen, ops->ooblen, from); 328 return -EIO; 329 } 330 331 return 0; 332 } 333 334 static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb) 335 { 336 struct mtdswap_oobdata *data, *data2; 337 int ret; 338 loff_t offset; 339 struct mtd_oob_ops ops; 340 341 offset = mtdswap_eb_offset(d, eb); 342 343 /* Check first if the block is bad. */ 344 if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset)) 345 return MTDSWAP_SCANNED_BAD; 346 347 ops.ooblen = 2 * d->mtd->oobavail; 348 ops.oobbuf = d->oob_buf; 349 ops.ooboffs = 0; 350 ops.datbuf = NULL; 351 ops.mode = MTD_OPS_AUTO_OOB; 352 353 ret = mtdswap_read_oob(d, offset, &ops); 354 355 if (ret && !mtd_is_bitflip(ret)) 356 return ret; 357 358 data = (struct mtdswap_oobdata *)d->oob_buf; 359 data2 = (struct mtdswap_oobdata *) 360 (d->oob_buf + d->mtd->oobavail); 361 362 if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) { 363 eb->erase_count = le32_to_cpu(data->count); 364 if (mtd_is_bitflip(ret)) 365 ret = MTDSWAP_SCANNED_BITFLIP; 366 else { 367 if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY) 368 ret = MTDSWAP_SCANNED_DIRTY; 369 else 370 ret = MTDSWAP_SCANNED_CLEAN; 371 } 372 } else { 373 eb->flags |= EBLOCK_NOMAGIC; 374 ret = MTDSWAP_SCANNED_DIRTY; 375 } 376 377 return ret; 378 } 379 380 static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb, 381 u16 marker) 382 { 383 struct mtdswap_oobdata n; 384 int ret; 385 loff_t offset; 386 struct mtd_oob_ops ops; 387 388 ops.ooboffs = 0; 389 ops.oobbuf = (uint8_t *)&n; 390 ops.mode = MTD_OPS_AUTO_OOB; 391 ops.datbuf = NULL; 392 393 if (marker == MTDSWAP_TYPE_CLEAN) { 394 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN); 395 n.count = cpu_to_le32(eb->erase_count); 396 ops.ooblen = MTDSWAP_OOBSIZE; 397 offset = mtdswap_eb_offset(d, eb); 398 } else { 399 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY); 400 ops.ooblen = sizeof(n.magic); 401 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize; 402 } 403 404 ret = mtd_write_oob(d->mtd, offset, &ops); 405 406 if (ret) { 407 dev_warn(d->dev, "Write OOB failed for block at %08llx " 408 "error %d\n", offset, ret); 409 if (ret == -EIO || mtd_is_eccerr(ret)) 410 mtdswap_handle_write_error(d, eb); 411 return ret; 412 } 413 414 if (ops.oobretlen != ops.ooblen) { 415 dev_warn(d->dev, "Short OOB write for block at %08llx: " 416 "%zd not %zd\n", 417 offset, ops.oobretlen, ops.ooblen); 418 return ret; 419 } 420 421 return 0; 422 } 423 424 /* 425 * Are there any erase blocks without MAGIC_CLEAN header, presumably 426 * because power was cut off after erase but before header write? We 427 * need to guestimate the erase count. 428 */ 429 static void mtdswap_check_counts(struct mtdswap_dev *d) 430 { 431 struct rb_root hist_root = RB_ROOT; 432 struct rb_node *medrb; 433 struct swap_eb *eb; 434 unsigned int i, cnt, median; 435 436 cnt = 0; 437 for (i = 0; i < d->eblks; i++) { 438 eb = d->eb_data + i; 439 440 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 441 continue; 442 443 __mtdswap_rb_add(&hist_root, eb); 444 cnt++; 445 } 446 447 if (cnt == 0) 448 return; 449 450 medrb = mtdswap_rb_index(&hist_root, cnt / 2); 451 median = rb_entry(medrb, struct swap_eb, rb)->erase_count; 452 453 d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root); 454 455 for (i = 0; i < d->eblks; i++) { 456 eb = d->eb_data + i; 457 458 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR)) 459 eb->erase_count = median; 460 461 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 462 continue; 463 464 rb_erase(&eb->rb, &hist_root); 465 } 466 } 467 468 static void mtdswap_scan_eblks(struct mtdswap_dev *d) 469 { 470 int status; 471 unsigned int i, idx; 472 struct swap_eb *eb; 473 474 for (i = 0; i < d->eblks; i++) { 475 eb = d->eb_data + i; 476 477 status = mtdswap_read_markers(d, eb); 478 if (status < 0) 479 eb->flags |= EBLOCK_READERR; 480 else if (status == MTDSWAP_SCANNED_BAD) { 481 eb->flags |= EBLOCK_BAD; 482 continue; 483 } 484 485 switch (status) { 486 case MTDSWAP_SCANNED_CLEAN: 487 idx = MTDSWAP_CLEAN; 488 break; 489 case MTDSWAP_SCANNED_DIRTY: 490 case MTDSWAP_SCANNED_BITFLIP: 491 idx = MTDSWAP_DIRTY; 492 break; 493 default: 494 idx = MTDSWAP_FAILING; 495 } 496 497 eb->flags |= (idx << EBLOCK_IDX_SHIFT); 498 } 499 500 mtdswap_check_counts(d); 501 502 for (i = 0; i < d->eblks; i++) { 503 eb = d->eb_data + i; 504 505 if (eb->flags & EBLOCK_BAD) 506 continue; 507 508 idx = eb->flags >> EBLOCK_IDX_SHIFT; 509 mtdswap_rb_add(d, eb, idx); 510 } 511 } 512 513 /* 514 * Place eblk into a tree corresponding to its number of active blocks 515 * it contains. 516 */ 517 static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb) 518 { 519 unsigned int weight = eb->active_count; 520 unsigned int maxweight = d->pages_per_eblk; 521 522 if (eb == d->curr_write) 523 return; 524 525 if (eb->flags & EBLOCK_BITFLIP) 526 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 527 else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED)) 528 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 529 if (weight == maxweight) 530 mtdswap_rb_add(d, eb, MTDSWAP_USED); 531 else if (weight == 0) 532 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 533 else if (weight > (maxweight/2)) 534 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG); 535 else 536 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG); 537 } 538 539 static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb) 540 { 541 struct mtd_info *mtd = d->mtd; 542 struct erase_info erase; 543 unsigned int retries = 0; 544 int ret; 545 546 eb->erase_count++; 547 if (eb->erase_count > d->max_erase_count) 548 d->max_erase_count = eb->erase_count; 549 550 retry: 551 memset(&erase, 0, sizeof(struct erase_info)); 552 erase.addr = mtdswap_eb_offset(d, eb); 553 erase.len = mtd->erasesize; 554 555 ret = mtd_erase(mtd, &erase); 556 if (ret) { 557 if (retries++ < MTDSWAP_ERASE_RETRIES) { 558 dev_warn(d->dev, 559 "erase of erase block %#llx on %s failed", 560 erase.addr, mtd->name); 561 yield(); 562 goto retry; 563 } 564 565 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n", 566 erase.addr, mtd->name); 567 568 mtdswap_handle_badblock(d, eb); 569 return -EIO; 570 } 571 572 return 0; 573 } 574 575 static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page, 576 unsigned int *block) 577 { 578 int ret; 579 struct swap_eb *old_eb = d->curr_write; 580 struct rb_root *clean_root; 581 struct swap_eb *eb; 582 583 if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) { 584 do { 585 if (TREE_EMPTY(d, CLEAN)) 586 return -ENOSPC; 587 588 clean_root = TREE_ROOT(d, CLEAN); 589 eb = rb_entry(rb_first(clean_root), struct swap_eb, rb); 590 rb_erase(&eb->rb, clean_root); 591 eb->root = NULL; 592 TREE_COUNT(d, CLEAN)--; 593 594 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY); 595 } while (ret == -EIO || mtd_is_eccerr(ret)); 596 597 if (ret) 598 return ret; 599 600 d->curr_write_pos = 0; 601 d->curr_write = eb; 602 if (old_eb) 603 mtdswap_store_eb(d, old_eb); 604 } 605 606 *block = (d->curr_write - d->eb_data) * d->pages_per_eblk + 607 d->curr_write_pos; 608 609 d->curr_write->active_count++; 610 d->revmap[*block] = page; 611 d->curr_write_pos++; 612 613 return 0; 614 } 615 616 static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d) 617 { 618 return TREE_COUNT(d, CLEAN) * d->pages_per_eblk + 619 d->pages_per_eblk - d->curr_write_pos; 620 } 621 622 static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d) 623 { 624 return mtdswap_free_page_cnt(d) > d->pages_per_eblk; 625 } 626 627 static int mtdswap_write_block(struct mtdswap_dev *d, char *buf, 628 unsigned int page, unsigned int *bp, int gc_context) 629 { 630 struct mtd_info *mtd = d->mtd; 631 struct swap_eb *eb; 632 size_t retlen; 633 loff_t writepos; 634 int ret; 635 636 retry: 637 if (!gc_context) 638 while (!mtdswap_enough_free_pages(d)) 639 if (mtdswap_gc(d, 0) > 0) 640 return -ENOSPC; 641 642 ret = mtdswap_map_free_block(d, page, bp); 643 eb = d->eb_data + (*bp / d->pages_per_eblk); 644 645 if (ret == -EIO || mtd_is_eccerr(ret)) { 646 d->curr_write = NULL; 647 eb->active_count--; 648 d->revmap[*bp] = PAGE_UNDEF; 649 goto retry; 650 } 651 652 if (ret < 0) 653 return ret; 654 655 writepos = (loff_t)*bp << PAGE_SHIFT; 656 ret = mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf); 657 if (ret == -EIO || mtd_is_eccerr(ret)) { 658 d->curr_write_pos--; 659 eb->active_count--; 660 d->revmap[*bp] = PAGE_UNDEF; 661 mtdswap_handle_write_error(d, eb); 662 goto retry; 663 } 664 665 if (ret < 0) { 666 dev_err(d->dev, "Write to MTD device failed: %d (%zd written)", 667 ret, retlen); 668 goto err; 669 } 670 671 if (retlen != PAGE_SIZE) { 672 dev_err(d->dev, "Short write to MTD device: %zd written", 673 retlen); 674 ret = -EIO; 675 goto err; 676 } 677 678 return ret; 679 680 err: 681 d->curr_write_pos--; 682 eb->active_count--; 683 d->revmap[*bp] = PAGE_UNDEF; 684 685 return ret; 686 } 687 688 static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock, 689 unsigned int *newblock) 690 { 691 struct mtd_info *mtd = d->mtd; 692 struct swap_eb *eb, *oldeb; 693 int ret; 694 size_t retlen; 695 unsigned int page, retries; 696 loff_t readpos; 697 698 page = d->revmap[oldblock]; 699 readpos = (loff_t) oldblock << PAGE_SHIFT; 700 retries = 0; 701 702 retry: 703 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf); 704 705 if (ret < 0 && !mtd_is_bitflip(ret)) { 706 oldeb = d->eb_data + oldblock / d->pages_per_eblk; 707 oldeb->flags |= EBLOCK_READERR; 708 709 dev_err(d->dev, "Read Error: %d (block %u)\n", ret, 710 oldblock); 711 retries++; 712 if (retries < MTDSWAP_IO_RETRIES) 713 goto retry; 714 715 goto read_error; 716 } 717 718 if (retlen != PAGE_SIZE) { 719 dev_err(d->dev, "Short read: %zd (block %u)\n", retlen, 720 oldblock); 721 ret = -EIO; 722 goto read_error; 723 } 724 725 ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1); 726 if (ret < 0) { 727 d->page_data[page] = BLOCK_ERROR; 728 dev_err(d->dev, "Write error: %d\n", ret); 729 return ret; 730 } 731 732 eb = d->eb_data + *newblock / d->pages_per_eblk; 733 d->page_data[page] = *newblock; 734 d->revmap[oldblock] = PAGE_UNDEF; 735 eb = d->eb_data + oldblock / d->pages_per_eblk; 736 eb->active_count--; 737 738 return 0; 739 740 read_error: 741 d->page_data[page] = BLOCK_ERROR; 742 d->revmap[oldblock] = PAGE_UNDEF; 743 return ret; 744 } 745 746 static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb) 747 { 748 unsigned int i, block, eblk_base, newblock; 749 int ret, errcode; 750 751 errcode = 0; 752 eblk_base = (eb - d->eb_data) * d->pages_per_eblk; 753 754 for (i = 0; i < d->pages_per_eblk; i++) { 755 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 756 return -ENOSPC; 757 758 block = eblk_base + i; 759 if (d->revmap[block] == PAGE_UNDEF) 760 continue; 761 762 ret = mtdswap_move_block(d, block, &newblock); 763 if (ret < 0 && !errcode) 764 errcode = ret; 765 } 766 767 return errcode; 768 } 769 770 static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d) 771 { 772 int idx, stopat; 773 774 if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_THRESHOLD) 775 stopat = MTDSWAP_LOWFRAG; 776 else 777 stopat = MTDSWAP_HIFRAG; 778 779 for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--) 780 if (d->trees[idx].root.rb_node != NULL) 781 return idx; 782 783 return -1; 784 } 785 786 static int mtdswap_wlfreq(unsigned int maxdiff) 787 { 788 unsigned int h, x, y, dist, base; 789 790 /* 791 * Calculate linear ramp down from f1 to f2 when maxdiff goes from 792 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE. Similar 793 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE. 794 */ 795 796 dist = maxdiff - MAX_ERASE_DIFF; 797 if (dist > COLLECT_NONDIRTY_BASE) 798 dist = COLLECT_NONDIRTY_BASE; 799 800 /* 801 * Modelling the slop as right angular triangle with base 802 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is 803 * equal to the ratio h/base. 804 */ 805 h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2; 806 base = COLLECT_NONDIRTY_BASE; 807 808 x = dist - base; 809 y = (x * h + base / 2) / base; 810 811 return COLLECT_NONDIRTY_FREQ2 + y; 812 } 813 814 static int mtdswap_choose_wl_tree(struct mtdswap_dev *d) 815 { 816 static unsigned int pick_cnt; 817 unsigned int i, idx = -1, wear, max; 818 struct rb_root *root; 819 820 max = 0; 821 for (i = 0; i <= MTDSWAP_DIRTY; i++) { 822 root = &d->trees[i].root; 823 if (root->rb_node == NULL) 824 continue; 825 826 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root); 827 if (wear > max) { 828 max = wear; 829 idx = i; 830 } 831 } 832 833 if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) { 834 pick_cnt = 0; 835 return idx; 836 } 837 838 pick_cnt++; 839 return -1; 840 } 841 842 static int mtdswap_choose_gc_tree(struct mtdswap_dev *d, 843 unsigned int background) 844 { 845 int idx; 846 847 if (TREE_NONEMPTY(d, FAILING) && 848 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY)))) 849 return MTDSWAP_FAILING; 850 851 idx = mtdswap_choose_wl_tree(d); 852 if (idx >= MTDSWAP_CLEAN) 853 return idx; 854 855 return __mtdswap_choose_gc_tree(d); 856 } 857 858 static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d, 859 unsigned int background) 860 { 861 struct rb_root *rp = NULL; 862 struct swap_eb *eb = NULL; 863 int idx; 864 865 if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD && 866 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING)) 867 return NULL; 868 869 idx = mtdswap_choose_gc_tree(d, background); 870 if (idx < 0) 871 return NULL; 872 873 rp = &d->trees[idx].root; 874 eb = rb_entry(rb_first(rp), struct swap_eb, rb); 875 876 rb_erase(&eb->rb, rp); 877 eb->root = NULL; 878 d->trees[idx].count--; 879 return eb; 880 } 881 882 static unsigned int mtdswap_test_patt(unsigned int i) 883 { 884 return i % 2 ? 0x55555555 : 0xAAAAAAAA; 885 } 886 887 static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d, 888 struct swap_eb *eb) 889 { 890 struct mtd_info *mtd = d->mtd; 891 unsigned int test, i, j, patt, mtd_pages; 892 loff_t base, pos; 893 unsigned int *p1 = (unsigned int *)d->page_buf; 894 unsigned char *p2 = (unsigned char *)d->oob_buf; 895 struct mtd_oob_ops ops; 896 int ret; 897 898 ops.mode = MTD_OPS_AUTO_OOB; 899 ops.len = mtd->writesize; 900 ops.ooblen = mtd->oobavail; 901 ops.ooboffs = 0; 902 ops.datbuf = d->page_buf; 903 ops.oobbuf = d->oob_buf; 904 base = mtdswap_eb_offset(d, eb); 905 mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize; 906 907 for (test = 0; test < 2; test++) { 908 pos = base; 909 for (i = 0; i < mtd_pages; i++) { 910 patt = mtdswap_test_patt(test + i); 911 memset(d->page_buf, patt, mtd->writesize); 912 memset(d->oob_buf, patt, mtd->oobavail); 913 ret = mtd_write_oob(mtd, pos, &ops); 914 if (ret) 915 goto error; 916 917 pos += mtd->writesize; 918 } 919 920 pos = base; 921 for (i = 0; i < mtd_pages; i++) { 922 ret = mtd_read_oob(mtd, pos, &ops); 923 if (ret) 924 goto error; 925 926 patt = mtdswap_test_patt(test + i); 927 for (j = 0; j < mtd->writesize/sizeof(int); j++) 928 if (p1[j] != patt) 929 goto error; 930 931 for (j = 0; j < mtd->oobavail; j++) 932 if (p2[j] != (unsigned char)patt) 933 goto error; 934 935 pos += mtd->writesize; 936 } 937 938 ret = mtdswap_erase_block(d, eb); 939 if (ret) 940 goto error; 941 } 942 943 eb->flags &= ~EBLOCK_READERR; 944 return 1; 945 946 error: 947 mtdswap_handle_badblock(d, eb); 948 return 0; 949 } 950 951 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background) 952 { 953 struct swap_eb *eb; 954 int ret; 955 956 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 957 return 1; 958 959 eb = mtdswap_pick_gc_eblk(d, background); 960 if (!eb) 961 return 1; 962 963 ret = mtdswap_gc_eblock(d, eb); 964 if (ret == -ENOSPC) 965 return 1; 966 967 if (eb->flags & EBLOCK_FAILED) { 968 mtdswap_handle_badblock(d, eb); 969 return 0; 970 } 971 972 eb->flags &= ~EBLOCK_BITFLIP; 973 ret = mtdswap_erase_block(d, eb); 974 if ((eb->flags & EBLOCK_READERR) && 975 (ret || !mtdswap_eblk_passes(d, eb))) 976 return 0; 977 978 if (ret == 0) 979 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN); 980 981 if (ret == 0) 982 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN); 983 else if (ret != -EIO && !mtd_is_eccerr(ret)) 984 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 985 986 return 0; 987 } 988 989 static void mtdswap_background(struct mtd_blktrans_dev *dev) 990 { 991 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 992 int ret; 993 994 while (1) { 995 ret = mtdswap_gc(d, 1); 996 if (ret || mtd_blktrans_cease_background(dev)) 997 return; 998 } 999 } 1000 1001 static void mtdswap_cleanup(struct mtdswap_dev *d) 1002 { 1003 vfree(d->eb_data); 1004 vfree(d->revmap); 1005 vfree(d->page_data); 1006 kfree(d->oob_buf); 1007 kfree(d->page_buf); 1008 } 1009 1010 static int mtdswap_flush(struct mtd_blktrans_dev *dev) 1011 { 1012 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1013 1014 mtd_sync(d->mtd); 1015 return 0; 1016 } 1017 1018 static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size) 1019 { 1020 loff_t offset; 1021 unsigned int badcnt; 1022 1023 badcnt = 0; 1024 1025 if (mtd_can_have_bb(mtd)) 1026 for (offset = 0; offset < size; offset += mtd->erasesize) 1027 if (mtd_block_isbad(mtd, offset)) 1028 badcnt++; 1029 1030 return badcnt; 1031 } 1032 1033 static int mtdswap_writesect(struct mtd_blktrans_dev *dev, 1034 unsigned long page, char *buf) 1035 { 1036 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1037 unsigned int newblock, mapped; 1038 struct swap_eb *eb; 1039 int ret; 1040 1041 d->sect_write_count++; 1042 1043 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 1044 return -ENOSPC; 1045 1046 if (header) { 1047 /* Ignore writes to the header page */ 1048 if (unlikely(page == 0)) 1049 return 0; 1050 1051 page--; 1052 } 1053 1054 mapped = d->page_data[page]; 1055 if (mapped <= BLOCK_MAX) { 1056 eb = d->eb_data + (mapped / d->pages_per_eblk); 1057 eb->active_count--; 1058 mtdswap_store_eb(d, eb); 1059 d->page_data[page] = BLOCK_UNDEF; 1060 d->revmap[mapped] = PAGE_UNDEF; 1061 } 1062 1063 ret = mtdswap_write_block(d, buf, page, &newblock, 0); 1064 d->mtd_write_count++; 1065 1066 if (ret < 0) 1067 return ret; 1068 1069 eb = d->eb_data + (newblock / d->pages_per_eblk); 1070 d->page_data[page] = newblock; 1071 1072 return 0; 1073 } 1074 1075 /* Provide a dummy swap header for the kernel */ 1076 static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf) 1077 { 1078 union swap_header *hd = (union swap_header *)(buf); 1079 1080 memset(buf, 0, PAGE_SIZE - 10); 1081 1082 hd->info.version = 1; 1083 hd->info.last_page = d->mbd_dev->size - 1; 1084 hd->info.nr_badpages = 0; 1085 1086 memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10); 1087 1088 return 0; 1089 } 1090 1091 static int mtdswap_readsect(struct mtd_blktrans_dev *dev, 1092 unsigned long page, char *buf) 1093 { 1094 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1095 struct mtd_info *mtd = d->mtd; 1096 unsigned int realblock, retries; 1097 loff_t readpos; 1098 struct swap_eb *eb; 1099 size_t retlen; 1100 int ret; 1101 1102 d->sect_read_count++; 1103 1104 if (header) { 1105 if (unlikely(page == 0)) 1106 return mtdswap_auto_header(d, buf); 1107 1108 page--; 1109 } 1110 1111 realblock = d->page_data[page]; 1112 if (realblock > BLOCK_MAX) { 1113 memset(buf, 0x0, PAGE_SIZE); 1114 if (realblock == BLOCK_UNDEF) 1115 return 0; 1116 else 1117 return -EIO; 1118 } 1119 1120 eb = d->eb_data + (realblock / d->pages_per_eblk); 1121 BUG_ON(d->revmap[realblock] == PAGE_UNDEF); 1122 1123 readpos = (loff_t)realblock << PAGE_SHIFT; 1124 retries = 0; 1125 1126 retry: 1127 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf); 1128 1129 d->mtd_read_count++; 1130 if (mtd_is_bitflip(ret)) { 1131 eb->flags |= EBLOCK_BITFLIP; 1132 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 1133 ret = 0; 1134 } 1135 1136 if (ret < 0) { 1137 dev_err(d->dev, "Read error %d\n", ret); 1138 eb->flags |= EBLOCK_READERR; 1139 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 1140 retries++; 1141 if (retries < MTDSWAP_IO_RETRIES) 1142 goto retry; 1143 1144 return ret; 1145 } 1146 1147 if (retlen != PAGE_SIZE) { 1148 dev_err(d->dev, "Short read %zd\n", retlen); 1149 return -EIO; 1150 } 1151 1152 return 0; 1153 } 1154 1155 static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first, 1156 unsigned nr_pages) 1157 { 1158 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1159 unsigned long page; 1160 struct swap_eb *eb; 1161 unsigned int mapped; 1162 1163 d->discard_count++; 1164 1165 for (page = first; page < first + nr_pages; page++) { 1166 mapped = d->page_data[page]; 1167 if (mapped <= BLOCK_MAX) { 1168 eb = d->eb_data + (mapped / d->pages_per_eblk); 1169 eb->active_count--; 1170 mtdswap_store_eb(d, eb); 1171 d->page_data[page] = BLOCK_UNDEF; 1172 d->revmap[mapped] = PAGE_UNDEF; 1173 d->discard_page_count++; 1174 } else if (mapped == BLOCK_ERROR) { 1175 d->page_data[page] = BLOCK_UNDEF; 1176 d->discard_page_count++; 1177 } 1178 } 1179 1180 return 0; 1181 } 1182 1183 static int mtdswap_show(struct seq_file *s, void *data) 1184 { 1185 struct mtdswap_dev *d = (struct mtdswap_dev *) s->private; 1186 unsigned long sum; 1187 unsigned int count[MTDSWAP_TREE_CNT]; 1188 unsigned int min[MTDSWAP_TREE_CNT]; 1189 unsigned int max[MTDSWAP_TREE_CNT]; 1190 unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages; 1191 uint64_t use_size; 1192 static const char * const name[] = { 1193 "clean", "used", "low", "high", "dirty", "bitflip", "failing" 1194 }; 1195 1196 mutex_lock(&d->mbd_dev->lock); 1197 1198 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1199 struct rb_root *root = &d->trees[i].root; 1200 1201 if (root->rb_node) { 1202 count[i] = d->trees[i].count; 1203 min[i] = MTDSWAP_ECNT_MIN(root); 1204 max[i] = MTDSWAP_ECNT_MAX(root); 1205 } else 1206 count[i] = 0; 1207 } 1208 1209 if (d->curr_write) { 1210 cw = 1; 1211 cwp = d->curr_write_pos; 1212 cwecount = d->curr_write->erase_count; 1213 } 1214 1215 sum = 0; 1216 for (i = 0; i < d->eblks; i++) 1217 sum += d->eb_data[i].erase_count; 1218 1219 use_size = (uint64_t)d->eblks * d->mtd->erasesize; 1220 bb_cnt = mtdswap_badblocks(d->mtd, use_size); 1221 1222 mapped = 0; 1223 pages = d->mbd_dev->size; 1224 for (i = 0; i < pages; i++) 1225 if (d->page_data[i] != BLOCK_UNDEF) 1226 mapped++; 1227 1228 mutex_unlock(&d->mbd_dev->lock); 1229 1230 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1231 if (!count[i]) 1232 continue; 1233 1234 if (min[i] != max[i]) 1235 seq_printf(s, "%s:\t%5d erase blocks, erased min %d, " 1236 "max %d times\n", 1237 name[i], count[i], min[i], max[i]); 1238 else 1239 seq_printf(s, "%s:\t%5d erase blocks, all erased %d " 1240 "times\n", name[i], count[i], min[i]); 1241 } 1242 1243 if (bb_cnt) 1244 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt); 1245 1246 if (cw) 1247 seq_printf(s, "current erase block: %u pages used, %u free, " 1248 "erased %u times\n", 1249 cwp, d->pages_per_eblk - cwp, cwecount); 1250 1251 seq_printf(s, "total erasures: %lu\n", sum); 1252 1253 seq_puts(s, "\n"); 1254 1255 seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count); 1256 seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count); 1257 seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count); 1258 seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count); 1259 seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count); 1260 seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count); 1261 1262 seq_puts(s, "\n"); 1263 seq_printf(s, "total pages: %u\n", pages); 1264 seq_printf(s, "pages mapped: %u\n", mapped); 1265 1266 return 0; 1267 } 1268 1269 static int mtdswap_open(struct inode *inode, struct file *file) 1270 { 1271 return single_open(file, mtdswap_show, inode->i_private); 1272 } 1273 1274 static const struct file_operations mtdswap_fops = { 1275 .open = mtdswap_open, 1276 .read = seq_read, 1277 .llseek = seq_lseek, 1278 .release = single_release, 1279 }; 1280 1281 static int mtdswap_add_debugfs(struct mtdswap_dev *d) 1282 { 1283 struct dentry *root = d->mtd->dbg.dfs_dir; 1284 struct dentry *dent; 1285 1286 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 1287 return 0; 1288 1289 if (IS_ERR_OR_NULL(root)) 1290 return -1; 1291 1292 dent = debugfs_create_file("mtdswap_stats", S_IRUSR, root, d, 1293 &mtdswap_fops); 1294 if (!dent) { 1295 dev_err(d->dev, "debugfs_create_file failed\n"); 1296 return -1; 1297 } 1298 1299 return 0; 1300 } 1301 1302 static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks, 1303 unsigned int spare_cnt) 1304 { 1305 struct mtd_info *mtd = d->mbd_dev->mtd; 1306 unsigned int i, eblk_bytes, pages, blocks; 1307 int ret = -ENOMEM; 1308 1309 d->mtd = mtd; 1310 d->eblks = eblocks; 1311 d->spare_eblks = spare_cnt; 1312 d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT; 1313 1314 pages = d->mbd_dev->size; 1315 blocks = eblocks * d->pages_per_eblk; 1316 1317 for (i = 0; i < MTDSWAP_TREE_CNT; i++) 1318 d->trees[i].root = RB_ROOT; 1319 1320 d->page_data = vmalloc(array_size(pages, sizeof(int))); 1321 if (!d->page_data) 1322 goto page_data_fail; 1323 1324 d->revmap = vmalloc(array_size(blocks, sizeof(int))); 1325 if (!d->revmap) 1326 goto revmap_fail; 1327 1328 eblk_bytes = sizeof(struct swap_eb)*d->eblks; 1329 d->eb_data = vzalloc(eblk_bytes); 1330 if (!d->eb_data) 1331 goto eb_data_fail; 1332 1333 for (i = 0; i < pages; i++) 1334 d->page_data[i] = BLOCK_UNDEF; 1335 1336 for (i = 0; i < blocks; i++) 1337 d->revmap[i] = PAGE_UNDEF; 1338 1339 d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1340 if (!d->page_buf) 1341 goto page_buf_fail; 1342 1343 d->oob_buf = kmalloc_array(2, mtd->oobavail, GFP_KERNEL); 1344 if (!d->oob_buf) 1345 goto oob_buf_fail; 1346 1347 mtdswap_scan_eblks(d); 1348 1349 return 0; 1350 1351 oob_buf_fail: 1352 kfree(d->page_buf); 1353 page_buf_fail: 1354 vfree(d->eb_data); 1355 eb_data_fail: 1356 vfree(d->revmap); 1357 revmap_fail: 1358 vfree(d->page_data); 1359 page_data_fail: 1360 printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret); 1361 return ret; 1362 } 1363 1364 static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) 1365 { 1366 struct mtdswap_dev *d; 1367 struct mtd_blktrans_dev *mbd_dev; 1368 char *parts; 1369 char *this_opt; 1370 unsigned long part; 1371 unsigned int eblocks, eavailable, bad_blocks, spare_cnt; 1372 uint64_t swap_size, use_size, size_limit; 1373 int ret; 1374 1375 parts = &partitions[0]; 1376 if (!*parts) 1377 return; 1378 1379 while ((this_opt = strsep(&parts, ",")) != NULL) { 1380 if (kstrtoul(this_opt, 0, &part) < 0) 1381 return; 1382 1383 if (mtd->index == part) 1384 break; 1385 } 1386 1387 if (mtd->index != part) 1388 return; 1389 1390 if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) { 1391 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE " 1392 "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE); 1393 return; 1394 } 1395 1396 if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) { 1397 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size" 1398 " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize); 1399 return; 1400 } 1401 1402 if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) { 1403 printk(KERN_ERR "%s: Not enough free bytes in OOB, " 1404 "%d available, %zu needed.\n", 1405 MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE); 1406 return; 1407 } 1408 1409 if (spare_eblocks > 100) 1410 spare_eblocks = 100; 1411 1412 use_size = mtd->size; 1413 size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE; 1414 1415 if (mtd->size > size_limit) { 1416 printk(KERN_WARNING "%s: Device too large. Limiting size to " 1417 "%llu bytes\n", MTDSWAP_PREFIX, size_limit); 1418 use_size = size_limit; 1419 } 1420 1421 eblocks = mtd_div_by_eb(use_size, mtd); 1422 use_size = (uint64_t)eblocks * mtd->erasesize; 1423 bad_blocks = mtdswap_badblocks(mtd, use_size); 1424 eavailable = eblocks - bad_blocks; 1425 1426 if (eavailable < MIN_ERASE_BLOCKS) { 1427 printk(KERN_ERR "%s: Not enough erase blocks. %u available, " 1428 "%d needed\n", MTDSWAP_PREFIX, eavailable, 1429 MIN_ERASE_BLOCKS); 1430 return; 1431 } 1432 1433 spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100); 1434 1435 if (spare_cnt < MIN_SPARE_EBLOCKS) 1436 spare_cnt = MIN_SPARE_EBLOCKS; 1437 1438 if (spare_cnt > eavailable - 1) 1439 spare_cnt = eavailable - 1; 1440 1441 swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize + 1442 (header ? PAGE_SIZE : 0); 1443 1444 printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, " 1445 "%u spare, %u bad blocks\n", 1446 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks); 1447 1448 d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL); 1449 if (!d) 1450 return; 1451 1452 mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL); 1453 if (!mbd_dev) { 1454 kfree(d); 1455 return; 1456 } 1457 1458 d->mbd_dev = mbd_dev; 1459 mbd_dev->priv = d; 1460 1461 mbd_dev->mtd = mtd; 1462 mbd_dev->devnum = mtd->index; 1463 mbd_dev->size = swap_size >> PAGE_SHIFT; 1464 mbd_dev->tr = tr; 1465 1466 if (!(mtd->flags & MTD_WRITEABLE)) 1467 mbd_dev->readonly = 1; 1468 1469 if (mtdswap_init(d, eblocks, spare_cnt) < 0) 1470 goto init_failed; 1471 1472 if (add_mtd_blktrans_dev(mbd_dev) < 0) 1473 goto cleanup; 1474 1475 d->dev = disk_to_dev(mbd_dev->disk); 1476 1477 ret = mtdswap_add_debugfs(d); 1478 if (ret < 0) 1479 goto debugfs_failed; 1480 1481 return; 1482 1483 debugfs_failed: 1484 del_mtd_blktrans_dev(mbd_dev); 1485 1486 cleanup: 1487 mtdswap_cleanup(d); 1488 1489 init_failed: 1490 kfree(mbd_dev); 1491 kfree(d); 1492 } 1493 1494 static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev) 1495 { 1496 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1497 1498 del_mtd_blktrans_dev(dev); 1499 mtdswap_cleanup(d); 1500 kfree(d); 1501 } 1502 1503 static struct mtd_blktrans_ops mtdswap_ops = { 1504 .name = "mtdswap", 1505 .major = 0, 1506 .part_bits = 0, 1507 .blksize = PAGE_SIZE, 1508 .flush = mtdswap_flush, 1509 .readsect = mtdswap_readsect, 1510 .writesect = mtdswap_writesect, 1511 .discard = mtdswap_discard, 1512 .background = mtdswap_background, 1513 .add_mtd = mtdswap_add_mtd, 1514 .remove_dev = mtdswap_remove_dev, 1515 .owner = THIS_MODULE, 1516 }; 1517 1518 static int __init mtdswap_modinit(void) 1519 { 1520 return register_mtd_blktrans(&mtdswap_ops); 1521 } 1522 1523 static void __exit mtdswap_modexit(void) 1524 { 1525 deregister_mtd_blktrans(&mtdswap_ops); 1526 } 1527 1528 module_init(mtdswap_modinit); 1529 module_exit(mtdswap_modexit); 1530 1531 1532 MODULE_LICENSE("GPL"); 1533 MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); 1534 MODULE_DESCRIPTION("Block device access to an MTD suitable for using as " 1535 "swap space"); 1536