1 /* 2 * Swap block device support for MTDs 3 * Turns an MTD device into a swap device with block wear leveling 4 * 5 * Copyright © 2007,2011 Nokia Corporation. All rights reserved. 6 * 7 * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com> 8 * 9 * Based on Richard Purdie's earlier implementation in 2007. Background 10 * support and lock-less operation written by Adrian Hunter. 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * version 2 as published by the Free Software Foundation. 15 * 16 * This program is distributed in the hope that it will be useful, but 17 * WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 24 * 02110-1301 USA 25 */ 26 27 #include <linux/kernel.h> 28 #include <linux/module.h> 29 #include <linux/mtd/mtd.h> 30 #include <linux/mtd/blktrans.h> 31 #include <linux/rbtree.h> 32 #include <linux/sched.h> 33 #include <linux/slab.h> 34 #include <linux/vmalloc.h> 35 #include <linux/genhd.h> 36 #include <linux/swap.h> 37 #include <linux/debugfs.h> 38 #include <linux/seq_file.h> 39 #include <linux/device.h> 40 #include <linux/math64.h> 41 42 #define MTDSWAP_PREFIX "mtdswap" 43 44 /* 45 * The number of free eraseblocks when GC should stop 46 */ 47 #define CLEAN_BLOCK_THRESHOLD 20 48 49 /* 50 * Number of free eraseblocks below which GC can also collect low frag 51 * blocks. 52 */ 53 #define LOW_FRAG_GC_THRESHOLD 5 54 55 /* 56 * Wear level cost amortization. We want to do wear leveling on the background 57 * without disturbing gc too much. This is made by defining max GC frequency. 58 * Frequency value 6 means 1/6 of the GC passes will pick an erase block based 59 * on the biggest wear difference rather than the biggest dirtiness. 60 * 61 * The lower freq2 should be chosen so that it makes sure the maximum erase 62 * difference will decrease even if a malicious application is deliberately 63 * trying to make erase differences large. 64 */ 65 #define MAX_ERASE_DIFF 4000 66 #define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF 67 #define COLLECT_NONDIRTY_FREQ1 6 68 #define COLLECT_NONDIRTY_FREQ2 4 69 70 #define PAGE_UNDEF UINT_MAX 71 #define BLOCK_UNDEF UINT_MAX 72 #define BLOCK_ERROR (UINT_MAX - 1) 73 #define BLOCK_MAX (UINT_MAX - 2) 74 75 #define EBLOCK_BAD (1 << 0) 76 #define EBLOCK_NOMAGIC (1 << 1) 77 #define EBLOCK_BITFLIP (1 << 2) 78 #define EBLOCK_FAILED (1 << 3) 79 #define EBLOCK_READERR (1 << 4) 80 #define EBLOCK_IDX_SHIFT 5 81 82 struct swap_eb { 83 struct rb_node rb; 84 struct rb_root *root; 85 86 unsigned int flags; 87 unsigned int active_count; 88 unsigned int erase_count; 89 unsigned int pad; /* speeds up pointer decrement */ 90 }; 91 92 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \ 93 rb)->erase_count) 94 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \ 95 rb)->erase_count) 96 97 struct mtdswap_tree { 98 struct rb_root root; 99 unsigned int count; 100 }; 101 102 enum { 103 MTDSWAP_CLEAN, 104 MTDSWAP_USED, 105 MTDSWAP_LOWFRAG, 106 MTDSWAP_HIFRAG, 107 MTDSWAP_DIRTY, 108 MTDSWAP_BITFLIP, 109 MTDSWAP_FAILING, 110 MTDSWAP_TREE_CNT, 111 }; 112 113 struct mtdswap_dev { 114 struct mtd_blktrans_dev *mbd_dev; 115 struct mtd_info *mtd; 116 struct device *dev; 117 118 unsigned int *page_data; 119 unsigned int *revmap; 120 121 unsigned int eblks; 122 unsigned int spare_eblks; 123 unsigned int pages_per_eblk; 124 unsigned int max_erase_count; 125 struct swap_eb *eb_data; 126 127 struct mtdswap_tree trees[MTDSWAP_TREE_CNT]; 128 129 unsigned long long sect_read_count; 130 unsigned long long sect_write_count; 131 unsigned long long mtd_write_count; 132 unsigned long long mtd_read_count; 133 unsigned long long discard_count; 134 unsigned long long discard_page_count; 135 136 unsigned int curr_write_pos; 137 struct swap_eb *curr_write; 138 139 char *page_buf; 140 char *oob_buf; 141 }; 142 143 struct mtdswap_oobdata { 144 __le16 magic; 145 __le32 count; 146 } __packed; 147 148 #define MTDSWAP_MAGIC_CLEAN 0x2095 149 #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) 150 #define MTDSWAP_TYPE_CLEAN 0 151 #define MTDSWAP_TYPE_DIRTY 1 152 #define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata) 153 154 #define MTDSWAP_ERASE_RETRIES 3 /* Before marking erase block bad */ 155 #define MTDSWAP_IO_RETRIES 3 156 157 enum { 158 MTDSWAP_SCANNED_CLEAN, 159 MTDSWAP_SCANNED_DIRTY, 160 MTDSWAP_SCANNED_BITFLIP, 161 MTDSWAP_SCANNED_BAD, 162 }; 163 164 /* 165 * In the worst case mtdswap_writesect() has allocated the last clean 166 * page from the current block and is then pre-empted by the GC 167 * thread. The thread can consume a full erase block when moving a 168 * block. 169 */ 170 #define MIN_SPARE_EBLOCKS 2 171 #define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1) 172 173 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root) 174 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL) 175 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name)) 176 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count) 177 178 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv) 179 180 static char partitions[128] = ""; 181 module_param_string(partitions, partitions, sizeof(partitions), 0444); 182 MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap " 183 "partitions=\"1,3,5\""); 184 185 static unsigned int spare_eblocks = 10; 186 module_param(spare_eblocks, uint, 0444); 187 MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for " 188 "garbage collection (default 10%)"); 189 190 static bool header; /* false */ 191 module_param(header, bool, 0444); 192 MODULE_PARM_DESC(header, 193 "Include builtin swap header (default 0, without header)"); 194 195 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background); 196 197 static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb) 198 { 199 return (loff_t)(eb - d->eb_data) * d->mtd->erasesize; 200 } 201 202 static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb) 203 { 204 unsigned int oldidx; 205 struct mtdswap_tree *tp; 206 207 if (eb->root) { 208 tp = container_of(eb->root, struct mtdswap_tree, root); 209 oldidx = tp - &d->trees[0]; 210 211 d->trees[oldidx].count--; 212 rb_erase(&eb->rb, eb->root); 213 } 214 } 215 216 static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb) 217 { 218 struct rb_node **p, *parent = NULL; 219 struct swap_eb *cur; 220 221 p = &root->rb_node; 222 while (*p) { 223 parent = *p; 224 cur = rb_entry(parent, struct swap_eb, rb); 225 if (eb->erase_count > cur->erase_count) 226 p = &(*p)->rb_right; 227 else 228 p = &(*p)->rb_left; 229 } 230 231 rb_link_node(&eb->rb, parent, p); 232 rb_insert_color(&eb->rb, root); 233 } 234 235 static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx) 236 { 237 struct rb_root *root; 238 239 if (eb->root == &d->trees[idx].root) 240 return; 241 242 mtdswap_eb_detach(d, eb); 243 root = &d->trees[idx].root; 244 __mtdswap_rb_add(root, eb); 245 eb->root = root; 246 d->trees[idx].count++; 247 } 248 249 static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx) 250 { 251 struct rb_node *p; 252 unsigned int i; 253 254 p = rb_first(root); 255 i = 0; 256 while (i < idx && p) { 257 p = rb_next(p); 258 i++; 259 } 260 261 return p; 262 } 263 264 static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb) 265 { 266 int ret; 267 loff_t offset; 268 269 d->spare_eblks--; 270 eb->flags |= EBLOCK_BAD; 271 mtdswap_eb_detach(d, eb); 272 eb->root = NULL; 273 274 /* badblocks not supported */ 275 if (!mtd_can_have_bb(d->mtd)) 276 return 1; 277 278 offset = mtdswap_eb_offset(d, eb); 279 dev_warn(d->dev, "Marking bad block at %08llx\n", offset); 280 ret = mtd_block_markbad(d->mtd, offset); 281 282 if (ret) { 283 dev_warn(d->dev, "Mark block bad failed for block at %08llx " 284 "error %d\n", offset, ret); 285 return ret; 286 } 287 288 return 1; 289 290 } 291 292 static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb) 293 { 294 unsigned int marked = eb->flags & EBLOCK_FAILED; 295 struct swap_eb *curr_write = d->curr_write; 296 297 eb->flags |= EBLOCK_FAILED; 298 if (curr_write == eb) { 299 d->curr_write = NULL; 300 301 if (!marked && d->curr_write_pos != 0) { 302 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 303 return 0; 304 } 305 } 306 307 return mtdswap_handle_badblock(d, eb); 308 } 309 310 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from, 311 struct mtd_oob_ops *ops) 312 { 313 int ret = mtd_read_oob(d->mtd, from, ops); 314 315 if (mtd_is_bitflip(ret)) 316 return ret; 317 318 if (ret) { 319 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n", 320 ret, from); 321 return ret; 322 } 323 324 if (ops->oobretlen < ops->ooblen) { 325 dev_warn(d->dev, "Read OOB return short read (%zd bytes not " 326 "%zd) for block at %08llx\n", 327 ops->oobretlen, ops->ooblen, from); 328 return -EIO; 329 } 330 331 return 0; 332 } 333 334 static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb) 335 { 336 struct mtdswap_oobdata *data, *data2; 337 int ret; 338 loff_t offset; 339 struct mtd_oob_ops ops; 340 341 offset = mtdswap_eb_offset(d, eb); 342 343 /* Check first if the block is bad. */ 344 if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset)) 345 return MTDSWAP_SCANNED_BAD; 346 347 ops.ooblen = 2 * d->mtd->oobavail; 348 ops.oobbuf = d->oob_buf; 349 ops.ooboffs = 0; 350 ops.datbuf = NULL; 351 ops.mode = MTD_OPS_AUTO_OOB; 352 353 ret = mtdswap_read_oob(d, offset, &ops); 354 355 if (ret && !mtd_is_bitflip(ret)) 356 return ret; 357 358 data = (struct mtdswap_oobdata *)d->oob_buf; 359 data2 = (struct mtdswap_oobdata *) 360 (d->oob_buf + d->mtd->oobavail); 361 362 if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) { 363 eb->erase_count = le32_to_cpu(data->count); 364 if (mtd_is_bitflip(ret)) 365 ret = MTDSWAP_SCANNED_BITFLIP; 366 else { 367 if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY) 368 ret = MTDSWAP_SCANNED_DIRTY; 369 else 370 ret = MTDSWAP_SCANNED_CLEAN; 371 } 372 } else { 373 eb->flags |= EBLOCK_NOMAGIC; 374 ret = MTDSWAP_SCANNED_DIRTY; 375 } 376 377 return ret; 378 } 379 380 static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb, 381 u16 marker) 382 { 383 struct mtdswap_oobdata n; 384 int ret; 385 loff_t offset; 386 struct mtd_oob_ops ops; 387 388 ops.ooboffs = 0; 389 ops.oobbuf = (uint8_t *)&n; 390 ops.mode = MTD_OPS_AUTO_OOB; 391 ops.datbuf = NULL; 392 393 if (marker == MTDSWAP_TYPE_CLEAN) { 394 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN); 395 n.count = cpu_to_le32(eb->erase_count); 396 ops.ooblen = MTDSWAP_OOBSIZE; 397 offset = mtdswap_eb_offset(d, eb); 398 } else { 399 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY); 400 ops.ooblen = sizeof(n.magic); 401 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize; 402 } 403 404 ret = mtd_write_oob(d->mtd, offset, &ops); 405 406 if (ret) { 407 dev_warn(d->dev, "Write OOB failed for block at %08llx " 408 "error %d\n", offset, ret); 409 if (ret == -EIO || mtd_is_eccerr(ret)) 410 mtdswap_handle_write_error(d, eb); 411 return ret; 412 } 413 414 if (ops.oobretlen != ops.ooblen) { 415 dev_warn(d->dev, "Short OOB write for block at %08llx: " 416 "%zd not %zd\n", 417 offset, ops.oobretlen, ops.ooblen); 418 return ret; 419 } 420 421 return 0; 422 } 423 424 /* 425 * Are there any erase blocks without MAGIC_CLEAN header, presumably 426 * because power was cut off after erase but before header write? We 427 * need to guestimate the erase count. 428 */ 429 static void mtdswap_check_counts(struct mtdswap_dev *d) 430 { 431 struct rb_root hist_root = RB_ROOT; 432 struct rb_node *medrb; 433 struct swap_eb *eb; 434 unsigned int i, cnt, median; 435 436 cnt = 0; 437 for (i = 0; i < d->eblks; i++) { 438 eb = d->eb_data + i; 439 440 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 441 continue; 442 443 __mtdswap_rb_add(&hist_root, eb); 444 cnt++; 445 } 446 447 if (cnt == 0) 448 return; 449 450 medrb = mtdswap_rb_index(&hist_root, cnt / 2); 451 median = rb_entry(medrb, struct swap_eb, rb)->erase_count; 452 453 d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root); 454 455 for (i = 0; i < d->eblks; i++) { 456 eb = d->eb_data + i; 457 458 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR)) 459 eb->erase_count = median; 460 461 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 462 continue; 463 464 rb_erase(&eb->rb, &hist_root); 465 } 466 } 467 468 static void mtdswap_scan_eblks(struct mtdswap_dev *d) 469 { 470 int status; 471 unsigned int i, idx; 472 struct swap_eb *eb; 473 474 for (i = 0; i < d->eblks; i++) { 475 eb = d->eb_data + i; 476 477 status = mtdswap_read_markers(d, eb); 478 if (status < 0) 479 eb->flags |= EBLOCK_READERR; 480 else if (status == MTDSWAP_SCANNED_BAD) { 481 eb->flags |= EBLOCK_BAD; 482 continue; 483 } 484 485 switch (status) { 486 case MTDSWAP_SCANNED_CLEAN: 487 idx = MTDSWAP_CLEAN; 488 break; 489 case MTDSWAP_SCANNED_DIRTY: 490 case MTDSWAP_SCANNED_BITFLIP: 491 idx = MTDSWAP_DIRTY; 492 break; 493 default: 494 idx = MTDSWAP_FAILING; 495 } 496 497 eb->flags |= (idx << EBLOCK_IDX_SHIFT); 498 } 499 500 mtdswap_check_counts(d); 501 502 for (i = 0; i < d->eblks; i++) { 503 eb = d->eb_data + i; 504 505 if (eb->flags & EBLOCK_BAD) 506 continue; 507 508 idx = eb->flags >> EBLOCK_IDX_SHIFT; 509 mtdswap_rb_add(d, eb, idx); 510 } 511 } 512 513 /* 514 * Place eblk into a tree corresponding to its number of active blocks 515 * it contains. 516 */ 517 static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb) 518 { 519 unsigned int weight = eb->active_count; 520 unsigned int maxweight = d->pages_per_eblk; 521 522 if (eb == d->curr_write) 523 return; 524 525 if (eb->flags & EBLOCK_BITFLIP) 526 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 527 else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED)) 528 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 529 if (weight == maxweight) 530 mtdswap_rb_add(d, eb, MTDSWAP_USED); 531 else if (weight == 0) 532 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 533 else if (weight > (maxweight/2)) 534 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG); 535 else 536 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG); 537 } 538 539 540 static void mtdswap_erase_callback(struct erase_info *done) 541 { 542 wait_queue_head_t *wait_q = (wait_queue_head_t *)done->priv; 543 wake_up(wait_q); 544 } 545 546 static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb) 547 { 548 struct mtd_info *mtd = d->mtd; 549 struct erase_info erase; 550 wait_queue_head_t wq; 551 unsigned int retries = 0; 552 int ret; 553 554 eb->erase_count++; 555 if (eb->erase_count > d->max_erase_count) 556 d->max_erase_count = eb->erase_count; 557 558 retry: 559 init_waitqueue_head(&wq); 560 memset(&erase, 0, sizeof(struct erase_info)); 561 562 erase.mtd = mtd; 563 erase.callback = mtdswap_erase_callback; 564 erase.addr = mtdswap_eb_offset(d, eb); 565 erase.len = mtd->erasesize; 566 erase.priv = (u_long)&wq; 567 568 ret = mtd_erase(mtd, &erase); 569 if (ret) { 570 if (retries++ < MTDSWAP_ERASE_RETRIES) { 571 dev_warn(d->dev, 572 "erase of erase block %#llx on %s failed", 573 erase.addr, mtd->name); 574 yield(); 575 goto retry; 576 } 577 578 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n", 579 erase.addr, mtd->name); 580 581 mtdswap_handle_badblock(d, eb); 582 return -EIO; 583 } 584 585 ret = wait_event_interruptible(wq, erase.state == MTD_ERASE_DONE || 586 erase.state == MTD_ERASE_FAILED); 587 if (ret) { 588 dev_err(d->dev, "Interrupted erase block %#llx erasure on %s\n", 589 erase.addr, mtd->name); 590 return -EINTR; 591 } 592 593 if (erase.state == MTD_ERASE_FAILED) { 594 if (retries++ < MTDSWAP_ERASE_RETRIES) { 595 dev_warn(d->dev, 596 "erase of erase block %#llx on %s failed", 597 erase.addr, mtd->name); 598 yield(); 599 goto retry; 600 } 601 602 mtdswap_handle_badblock(d, eb); 603 return -EIO; 604 } 605 606 return 0; 607 } 608 609 static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page, 610 unsigned int *block) 611 { 612 int ret; 613 struct swap_eb *old_eb = d->curr_write; 614 struct rb_root *clean_root; 615 struct swap_eb *eb; 616 617 if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) { 618 do { 619 if (TREE_EMPTY(d, CLEAN)) 620 return -ENOSPC; 621 622 clean_root = TREE_ROOT(d, CLEAN); 623 eb = rb_entry(rb_first(clean_root), struct swap_eb, rb); 624 rb_erase(&eb->rb, clean_root); 625 eb->root = NULL; 626 TREE_COUNT(d, CLEAN)--; 627 628 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY); 629 } while (ret == -EIO || mtd_is_eccerr(ret)); 630 631 if (ret) 632 return ret; 633 634 d->curr_write_pos = 0; 635 d->curr_write = eb; 636 if (old_eb) 637 mtdswap_store_eb(d, old_eb); 638 } 639 640 *block = (d->curr_write - d->eb_data) * d->pages_per_eblk + 641 d->curr_write_pos; 642 643 d->curr_write->active_count++; 644 d->revmap[*block] = page; 645 d->curr_write_pos++; 646 647 return 0; 648 } 649 650 static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d) 651 { 652 return TREE_COUNT(d, CLEAN) * d->pages_per_eblk + 653 d->pages_per_eblk - d->curr_write_pos; 654 } 655 656 static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d) 657 { 658 return mtdswap_free_page_cnt(d) > d->pages_per_eblk; 659 } 660 661 static int mtdswap_write_block(struct mtdswap_dev *d, char *buf, 662 unsigned int page, unsigned int *bp, int gc_context) 663 { 664 struct mtd_info *mtd = d->mtd; 665 struct swap_eb *eb; 666 size_t retlen; 667 loff_t writepos; 668 int ret; 669 670 retry: 671 if (!gc_context) 672 while (!mtdswap_enough_free_pages(d)) 673 if (mtdswap_gc(d, 0) > 0) 674 return -ENOSPC; 675 676 ret = mtdswap_map_free_block(d, page, bp); 677 eb = d->eb_data + (*bp / d->pages_per_eblk); 678 679 if (ret == -EIO || mtd_is_eccerr(ret)) { 680 d->curr_write = NULL; 681 eb->active_count--; 682 d->revmap[*bp] = PAGE_UNDEF; 683 goto retry; 684 } 685 686 if (ret < 0) 687 return ret; 688 689 writepos = (loff_t)*bp << PAGE_SHIFT; 690 ret = mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf); 691 if (ret == -EIO || mtd_is_eccerr(ret)) { 692 d->curr_write_pos--; 693 eb->active_count--; 694 d->revmap[*bp] = PAGE_UNDEF; 695 mtdswap_handle_write_error(d, eb); 696 goto retry; 697 } 698 699 if (ret < 0) { 700 dev_err(d->dev, "Write to MTD device failed: %d (%zd written)", 701 ret, retlen); 702 goto err; 703 } 704 705 if (retlen != PAGE_SIZE) { 706 dev_err(d->dev, "Short write to MTD device: %zd written", 707 retlen); 708 ret = -EIO; 709 goto err; 710 } 711 712 return ret; 713 714 err: 715 d->curr_write_pos--; 716 eb->active_count--; 717 d->revmap[*bp] = PAGE_UNDEF; 718 719 return ret; 720 } 721 722 static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock, 723 unsigned int *newblock) 724 { 725 struct mtd_info *mtd = d->mtd; 726 struct swap_eb *eb, *oldeb; 727 int ret; 728 size_t retlen; 729 unsigned int page, retries; 730 loff_t readpos; 731 732 page = d->revmap[oldblock]; 733 readpos = (loff_t) oldblock << PAGE_SHIFT; 734 retries = 0; 735 736 retry: 737 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf); 738 739 if (ret < 0 && !mtd_is_bitflip(ret)) { 740 oldeb = d->eb_data + oldblock / d->pages_per_eblk; 741 oldeb->flags |= EBLOCK_READERR; 742 743 dev_err(d->dev, "Read Error: %d (block %u)\n", ret, 744 oldblock); 745 retries++; 746 if (retries < MTDSWAP_IO_RETRIES) 747 goto retry; 748 749 goto read_error; 750 } 751 752 if (retlen != PAGE_SIZE) { 753 dev_err(d->dev, "Short read: %zd (block %u)\n", retlen, 754 oldblock); 755 ret = -EIO; 756 goto read_error; 757 } 758 759 ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1); 760 if (ret < 0) { 761 d->page_data[page] = BLOCK_ERROR; 762 dev_err(d->dev, "Write error: %d\n", ret); 763 return ret; 764 } 765 766 eb = d->eb_data + *newblock / d->pages_per_eblk; 767 d->page_data[page] = *newblock; 768 d->revmap[oldblock] = PAGE_UNDEF; 769 eb = d->eb_data + oldblock / d->pages_per_eblk; 770 eb->active_count--; 771 772 return 0; 773 774 read_error: 775 d->page_data[page] = BLOCK_ERROR; 776 d->revmap[oldblock] = PAGE_UNDEF; 777 return ret; 778 } 779 780 static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb) 781 { 782 unsigned int i, block, eblk_base, newblock; 783 int ret, errcode; 784 785 errcode = 0; 786 eblk_base = (eb - d->eb_data) * d->pages_per_eblk; 787 788 for (i = 0; i < d->pages_per_eblk; i++) { 789 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 790 return -ENOSPC; 791 792 block = eblk_base + i; 793 if (d->revmap[block] == PAGE_UNDEF) 794 continue; 795 796 ret = mtdswap_move_block(d, block, &newblock); 797 if (ret < 0 && !errcode) 798 errcode = ret; 799 } 800 801 return errcode; 802 } 803 804 static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d) 805 { 806 int idx, stopat; 807 808 if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_THRESHOLD) 809 stopat = MTDSWAP_LOWFRAG; 810 else 811 stopat = MTDSWAP_HIFRAG; 812 813 for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--) 814 if (d->trees[idx].root.rb_node != NULL) 815 return idx; 816 817 return -1; 818 } 819 820 static int mtdswap_wlfreq(unsigned int maxdiff) 821 { 822 unsigned int h, x, y, dist, base; 823 824 /* 825 * Calculate linear ramp down from f1 to f2 when maxdiff goes from 826 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE. Similar 827 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE. 828 */ 829 830 dist = maxdiff - MAX_ERASE_DIFF; 831 if (dist > COLLECT_NONDIRTY_BASE) 832 dist = COLLECT_NONDIRTY_BASE; 833 834 /* 835 * Modelling the slop as right angular triangle with base 836 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is 837 * equal to the ratio h/base. 838 */ 839 h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2; 840 base = COLLECT_NONDIRTY_BASE; 841 842 x = dist - base; 843 y = (x * h + base / 2) / base; 844 845 return COLLECT_NONDIRTY_FREQ2 + y; 846 } 847 848 static int mtdswap_choose_wl_tree(struct mtdswap_dev *d) 849 { 850 static unsigned int pick_cnt; 851 unsigned int i, idx = -1, wear, max; 852 struct rb_root *root; 853 854 max = 0; 855 for (i = 0; i <= MTDSWAP_DIRTY; i++) { 856 root = &d->trees[i].root; 857 if (root->rb_node == NULL) 858 continue; 859 860 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root); 861 if (wear > max) { 862 max = wear; 863 idx = i; 864 } 865 } 866 867 if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) { 868 pick_cnt = 0; 869 return idx; 870 } 871 872 pick_cnt++; 873 return -1; 874 } 875 876 static int mtdswap_choose_gc_tree(struct mtdswap_dev *d, 877 unsigned int background) 878 { 879 int idx; 880 881 if (TREE_NONEMPTY(d, FAILING) && 882 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY)))) 883 return MTDSWAP_FAILING; 884 885 idx = mtdswap_choose_wl_tree(d); 886 if (idx >= MTDSWAP_CLEAN) 887 return idx; 888 889 return __mtdswap_choose_gc_tree(d); 890 } 891 892 static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d, 893 unsigned int background) 894 { 895 struct rb_root *rp = NULL; 896 struct swap_eb *eb = NULL; 897 int idx; 898 899 if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD && 900 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING)) 901 return NULL; 902 903 idx = mtdswap_choose_gc_tree(d, background); 904 if (idx < 0) 905 return NULL; 906 907 rp = &d->trees[idx].root; 908 eb = rb_entry(rb_first(rp), struct swap_eb, rb); 909 910 rb_erase(&eb->rb, rp); 911 eb->root = NULL; 912 d->trees[idx].count--; 913 return eb; 914 } 915 916 static unsigned int mtdswap_test_patt(unsigned int i) 917 { 918 return i % 2 ? 0x55555555 : 0xAAAAAAAA; 919 } 920 921 static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d, 922 struct swap_eb *eb) 923 { 924 struct mtd_info *mtd = d->mtd; 925 unsigned int test, i, j, patt, mtd_pages; 926 loff_t base, pos; 927 unsigned int *p1 = (unsigned int *)d->page_buf; 928 unsigned char *p2 = (unsigned char *)d->oob_buf; 929 struct mtd_oob_ops ops; 930 int ret; 931 932 ops.mode = MTD_OPS_AUTO_OOB; 933 ops.len = mtd->writesize; 934 ops.ooblen = mtd->oobavail; 935 ops.ooboffs = 0; 936 ops.datbuf = d->page_buf; 937 ops.oobbuf = d->oob_buf; 938 base = mtdswap_eb_offset(d, eb); 939 mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize; 940 941 for (test = 0; test < 2; test++) { 942 pos = base; 943 for (i = 0; i < mtd_pages; i++) { 944 patt = mtdswap_test_patt(test + i); 945 memset(d->page_buf, patt, mtd->writesize); 946 memset(d->oob_buf, patt, mtd->oobavail); 947 ret = mtd_write_oob(mtd, pos, &ops); 948 if (ret) 949 goto error; 950 951 pos += mtd->writesize; 952 } 953 954 pos = base; 955 for (i = 0; i < mtd_pages; i++) { 956 ret = mtd_read_oob(mtd, pos, &ops); 957 if (ret) 958 goto error; 959 960 patt = mtdswap_test_patt(test + i); 961 for (j = 0; j < mtd->writesize/sizeof(int); j++) 962 if (p1[j] != patt) 963 goto error; 964 965 for (j = 0; j < mtd->oobavail; j++) 966 if (p2[j] != (unsigned char)patt) 967 goto error; 968 969 pos += mtd->writesize; 970 } 971 972 ret = mtdswap_erase_block(d, eb); 973 if (ret) 974 goto error; 975 } 976 977 eb->flags &= ~EBLOCK_READERR; 978 return 1; 979 980 error: 981 mtdswap_handle_badblock(d, eb); 982 return 0; 983 } 984 985 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background) 986 { 987 struct swap_eb *eb; 988 int ret; 989 990 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 991 return 1; 992 993 eb = mtdswap_pick_gc_eblk(d, background); 994 if (!eb) 995 return 1; 996 997 ret = mtdswap_gc_eblock(d, eb); 998 if (ret == -ENOSPC) 999 return 1; 1000 1001 if (eb->flags & EBLOCK_FAILED) { 1002 mtdswap_handle_badblock(d, eb); 1003 return 0; 1004 } 1005 1006 eb->flags &= ~EBLOCK_BITFLIP; 1007 ret = mtdswap_erase_block(d, eb); 1008 if ((eb->flags & EBLOCK_READERR) && 1009 (ret || !mtdswap_eblk_passes(d, eb))) 1010 return 0; 1011 1012 if (ret == 0) 1013 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN); 1014 1015 if (ret == 0) 1016 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN); 1017 else if (ret != -EIO && !mtd_is_eccerr(ret)) 1018 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 1019 1020 return 0; 1021 } 1022 1023 static void mtdswap_background(struct mtd_blktrans_dev *dev) 1024 { 1025 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1026 int ret; 1027 1028 while (1) { 1029 ret = mtdswap_gc(d, 1); 1030 if (ret || mtd_blktrans_cease_background(dev)) 1031 return; 1032 } 1033 } 1034 1035 static void mtdswap_cleanup(struct mtdswap_dev *d) 1036 { 1037 vfree(d->eb_data); 1038 vfree(d->revmap); 1039 vfree(d->page_data); 1040 kfree(d->oob_buf); 1041 kfree(d->page_buf); 1042 } 1043 1044 static int mtdswap_flush(struct mtd_blktrans_dev *dev) 1045 { 1046 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1047 1048 mtd_sync(d->mtd); 1049 return 0; 1050 } 1051 1052 static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size) 1053 { 1054 loff_t offset; 1055 unsigned int badcnt; 1056 1057 badcnt = 0; 1058 1059 if (mtd_can_have_bb(mtd)) 1060 for (offset = 0; offset < size; offset += mtd->erasesize) 1061 if (mtd_block_isbad(mtd, offset)) 1062 badcnt++; 1063 1064 return badcnt; 1065 } 1066 1067 static int mtdswap_writesect(struct mtd_blktrans_dev *dev, 1068 unsigned long page, char *buf) 1069 { 1070 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1071 unsigned int newblock, mapped; 1072 struct swap_eb *eb; 1073 int ret; 1074 1075 d->sect_write_count++; 1076 1077 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 1078 return -ENOSPC; 1079 1080 if (header) { 1081 /* Ignore writes to the header page */ 1082 if (unlikely(page == 0)) 1083 return 0; 1084 1085 page--; 1086 } 1087 1088 mapped = d->page_data[page]; 1089 if (mapped <= BLOCK_MAX) { 1090 eb = d->eb_data + (mapped / d->pages_per_eblk); 1091 eb->active_count--; 1092 mtdswap_store_eb(d, eb); 1093 d->page_data[page] = BLOCK_UNDEF; 1094 d->revmap[mapped] = PAGE_UNDEF; 1095 } 1096 1097 ret = mtdswap_write_block(d, buf, page, &newblock, 0); 1098 d->mtd_write_count++; 1099 1100 if (ret < 0) 1101 return ret; 1102 1103 eb = d->eb_data + (newblock / d->pages_per_eblk); 1104 d->page_data[page] = newblock; 1105 1106 return 0; 1107 } 1108 1109 /* Provide a dummy swap header for the kernel */ 1110 static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf) 1111 { 1112 union swap_header *hd = (union swap_header *)(buf); 1113 1114 memset(buf, 0, PAGE_SIZE - 10); 1115 1116 hd->info.version = 1; 1117 hd->info.last_page = d->mbd_dev->size - 1; 1118 hd->info.nr_badpages = 0; 1119 1120 memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10); 1121 1122 return 0; 1123 } 1124 1125 static int mtdswap_readsect(struct mtd_blktrans_dev *dev, 1126 unsigned long page, char *buf) 1127 { 1128 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1129 struct mtd_info *mtd = d->mtd; 1130 unsigned int realblock, retries; 1131 loff_t readpos; 1132 struct swap_eb *eb; 1133 size_t retlen; 1134 int ret; 1135 1136 d->sect_read_count++; 1137 1138 if (header) { 1139 if (unlikely(page == 0)) 1140 return mtdswap_auto_header(d, buf); 1141 1142 page--; 1143 } 1144 1145 realblock = d->page_data[page]; 1146 if (realblock > BLOCK_MAX) { 1147 memset(buf, 0x0, PAGE_SIZE); 1148 if (realblock == BLOCK_UNDEF) 1149 return 0; 1150 else 1151 return -EIO; 1152 } 1153 1154 eb = d->eb_data + (realblock / d->pages_per_eblk); 1155 BUG_ON(d->revmap[realblock] == PAGE_UNDEF); 1156 1157 readpos = (loff_t)realblock << PAGE_SHIFT; 1158 retries = 0; 1159 1160 retry: 1161 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf); 1162 1163 d->mtd_read_count++; 1164 if (mtd_is_bitflip(ret)) { 1165 eb->flags |= EBLOCK_BITFLIP; 1166 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 1167 ret = 0; 1168 } 1169 1170 if (ret < 0) { 1171 dev_err(d->dev, "Read error %d\n", ret); 1172 eb->flags |= EBLOCK_READERR; 1173 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 1174 retries++; 1175 if (retries < MTDSWAP_IO_RETRIES) 1176 goto retry; 1177 1178 return ret; 1179 } 1180 1181 if (retlen != PAGE_SIZE) { 1182 dev_err(d->dev, "Short read %zd\n", retlen); 1183 return -EIO; 1184 } 1185 1186 return 0; 1187 } 1188 1189 static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first, 1190 unsigned nr_pages) 1191 { 1192 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1193 unsigned long page; 1194 struct swap_eb *eb; 1195 unsigned int mapped; 1196 1197 d->discard_count++; 1198 1199 for (page = first; page < first + nr_pages; page++) { 1200 mapped = d->page_data[page]; 1201 if (mapped <= BLOCK_MAX) { 1202 eb = d->eb_data + (mapped / d->pages_per_eblk); 1203 eb->active_count--; 1204 mtdswap_store_eb(d, eb); 1205 d->page_data[page] = BLOCK_UNDEF; 1206 d->revmap[mapped] = PAGE_UNDEF; 1207 d->discard_page_count++; 1208 } else if (mapped == BLOCK_ERROR) { 1209 d->page_data[page] = BLOCK_UNDEF; 1210 d->discard_page_count++; 1211 } 1212 } 1213 1214 return 0; 1215 } 1216 1217 static int mtdswap_show(struct seq_file *s, void *data) 1218 { 1219 struct mtdswap_dev *d = (struct mtdswap_dev *) s->private; 1220 unsigned long sum; 1221 unsigned int count[MTDSWAP_TREE_CNT]; 1222 unsigned int min[MTDSWAP_TREE_CNT]; 1223 unsigned int max[MTDSWAP_TREE_CNT]; 1224 unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages; 1225 uint64_t use_size; 1226 static const char * const name[] = { 1227 "clean", "used", "low", "high", "dirty", "bitflip", "failing" 1228 }; 1229 1230 mutex_lock(&d->mbd_dev->lock); 1231 1232 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1233 struct rb_root *root = &d->trees[i].root; 1234 1235 if (root->rb_node) { 1236 count[i] = d->trees[i].count; 1237 min[i] = MTDSWAP_ECNT_MIN(root); 1238 max[i] = MTDSWAP_ECNT_MAX(root); 1239 } else 1240 count[i] = 0; 1241 } 1242 1243 if (d->curr_write) { 1244 cw = 1; 1245 cwp = d->curr_write_pos; 1246 cwecount = d->curr_write->erase_count; 1247 } 1248 1249 sum = 0; 1250 for (i = 0; i < d->eblks; i++) 1251 sum += d->eb_data[i].erase_count; 1252 1253 use_size = (uint64_t)d->eblks * d->mtd->erasesize; 1254 bb_cnt = mtdswap_badblocks(d->mtd, use_size); 1255 1256 mapped = 0; 1257 pages = d->mbd_dev->size; 1258 for (i = 0; i < pages; i++) 1259 if (d->page_data[i] != BLOCK_UNDEF) 1260 mapped++; 1261 1262 mutex_unlock(&d->mbd_dev->lock); 1263 1264 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1265 if (!count[i]) 1266 continue; 1267 1268 if (min[i] != max[i]) 1269 seq_printf(s, "%s:\t%5d erase blocks, erased min %d, " 1270 "max %d times\n", 1271 name[i], count[i], min[i], max[i]); 1272 else 1273 seq_printf(s, "%s:\t%5d erase blocks, all erased %d " 1274 "times\n", name[i], count[i], min[i]); 1275 } 1276 1277 if (bb_cnt) 1278 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt); 1279 1280 if (cw) 1281 seq_printf(s, "current erase block: %u pages used, %u free, " 1282 "erased %u times\n", 1283 cwp, d->pages_per_eblk - cwp, cwecount); 1284 1285 seq_printf(s, "total erasures: %lu\n", sum); 1286 1287 seq_puts(s, "\n"); 1288 1289 seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count); 1290 seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count); 1291 seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count); 1292 seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count); 1293 seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count); 1294 seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count); 1295 1296 seq_puts(s, "\n"); 1297 seq_printf(s, "total pages: %u\n", pages); 1298 seq_printf(s, "pages mapped: %u\n", mapped); 1299 1300 return 0; 1301 } 1302 1303 static int mtdswap_open(struct inode *inode, struct file *file) 1304 { 1305 return single_open(file, mtdswap_show, inode->i_private); 1306 } 1307 1308 static const struct file_operations mtdswap_fops = { 1309 .open = mtdswap_open, 1310 .read = seq_read, 1311 .llseek = seq_lseek, 1312 .release = single_release, 1313 }; 1314 1315 static int mtdswap_add_debugfs(struct mtdswap_dev *d) 1316 { 1317 struct dentry *root = d->mtd->dbg.dfs_dir; 1318 struct dentry *dent; 1319 1320 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 1321 return 0; 1322 1323 if (IS_ERR_OR_NULL(root)) 1324 return -1; 1325 1326 dent = debugfs_create_file("mtdswap_stats", S_IRUSR, root, d, 1327 &mtdswap_fops); 1328 if (!dent) { 1329 dev_err(d->dev, "debugfs_create_file failed\n"); 1330 return -1; 1331 } 1332 1333 return 0; 1334 } 1335 1336 static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks, 1337 unsigned int spare_cnt) 1338 { 1339 struct mtd_info *mtd = d->mbd_dev->mtd; 1340 unsigned int i, eblk_bytes, pages, blocks; 1341 int ret = -ENOMEM; 1342 1343 d->mtd = mtd; 1344 d->eblks = eblocks; 1345 d->spare_eblks = spare_cnt; 1346 d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT; 1347 1348 pages = d->mbd_dev->size; 1349 blocks = eblocks * d->pages_per_eblk; 1350 1351 for (i = 0; i < MTDSWAP_TREE_CNT; i++) 1352 d->trees[i].root = RB_ROOT; 1353 1354 d->page_data = vmalloc(sizeof(int)*pages); 1355 if (!d->page_data) 1356 goto page_data_fail; 1357 1358 d->revmap = vmalloc(sizeof(int)*blocks); 1359 if (!d->revmap) 1360 goto revmap_fail; 1361 1362 eblk_bytes = sizeof(struct swap_eb)*d->eblks; 1363 d->eb_data = vzalloc(eblk_bytes); 1364 if (!d->eb_data) 1365 goto eb_data_fail; 1366 1367 for (i = 0; i < pages; i++) 1368 d->page_data[i] = BLOCK_UNDEF; 1369 1370 for (i = 0; i < blocks; i++) 1371 d->revmap[i] = PAGE_UNDEF; 1372 1373 d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1374 if (!d->page_buf) 1375 goto page_buf_fail; 1376 1377 d->oob_buf = kmalloc(2 * mtd->oobavail, GFP_KERNEL); 1378 if (!d->oob_buf) 1379 goto oob_buf_fail; 1380 1381 mtdswap_scan_eblks(d); 1382 1383 return 0; 1384 1385 oob_buf_fail: 1386 kfree(d->page_buf); 1387 page_buf_fail: 1388 vfree(d->eb_data); 1389 eb_data_fail: 1390 vfree(d->revmap); 1391 revmap_fail: 1392 vfree(d->page_data); 1393 page_data_fail: 1394 printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret); 1395 return ret; 1396 } 1397 1398 static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) 1399 { 1400 struct mtdswap_dev *d; 1401 struct mtd_blktrans_dev *mbd_dev; 1402 char *parts; 1403 char *this_opt; 1404 unsigned long part; 1405 unsigned int eblocks, eavailable, bad_blocks, spare_cnt; 1406 uint64_t swap_size, use_size, size_limit; 1407 int ret; 1408 1409 parts = &partitions[0]; 1410 if (!*parts) 1411 return; 1412 1413 while ((this_opt = strsep(&parts, ",")) != NULL) { 1414 if (kstrtoul(this_opt, 0, &part) < 0) 1415 return; 1416 1417 if (mtd->index == part) 1418 break; 1419 } 1420 1421 if (mtd->index != part) 1422 return; 1423 1424 if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) { 1425 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE " 1426 "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE); 1427 return; 1428 } 1429 1430 if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) { 1431 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size" 1432 " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize); 1433 return; 1434 } 1435 1436 if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) { 1437 printk(KERN_ERR "%s: Not enough free bytes in OOB, " 1438 "%d available, %zu needed.\n", 1439 MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE); 1440 return; 1441 } 1442 1443 if (spare_eblocks > 100) 1444 spare_eblocks = 100; 1445 1446 use_size = mtd->size; 1447 size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE; 1448 1449 if (mtd->size > size_limit) { 1450 printk(KERN_WARNING "%s: Device too large. Limiting size to " 1451 "%llu bytes\n", MTDSWAP_PREFIX, size_limit); 1452 use_size = size_limit; 1453 } 1454 1455 eblocks = mtd_div_by_eb(use_size, mtd); 1456 use_size = (uint64_t)eblocks * mtd->erasesize; 1457 bad_blocks = mtdswap_badblocks(mtd, use_size); 1458 eavailable = eblocks - bad_blocks; 1459 1460 if (eavailable < MIN_ERASE_BLOCKS) { 1461 printk(KERN_ERR "%s: Not enough erase blocks. %u available, " 1462 "%d needed\n", MTDSWAP_PREFIX, eavailable, 1463 MIN_ERASE_BLOCKS); 1464 return; 1465 } 1466 1467 spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100); 1468 1469 if (spare_cnt < MIN_SPARE_EBLOCKS) 1470 spare_cnt = MIN_SPARE_EBLOCKS; 1471 1472 if (spare_cnt > eavailable - 1) 1473 spare_cnt = eavailable - 1; 1474 1475 swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize + 1476 (header ? PAGE_SIZE : 0); 1477 1478 printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, " 1479 "%u spare, %u bad blocks\n", 1480 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks); 1481 1482 d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL); 1483 if (!d) 1484 return; 1485 1486 mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL); 1487 if (!mbd_dev) { 1488 kfree(d); 1489 return; 1490 } 1491 1492 d->mbd_dev = mbd_dev; 1493 mbd_dev->priv = d; 1494 1495 mbd_dev->mtd = mtd; 1496 mbd_dev->devnum = mtd->index; 1497 mbd_dev->size = swap_size >> PAGE_SHIFT; 1498 mbd_dev->tr = tr; 1499 1500 if (!(mtd->flags & MTD_WRITEABLE)) 1501 mbd_dev->readonly = 1; 1502 1503 if (mtdswap_init(d, eblocks, spare_cnt) < 0) 1504 goto init_failed; 1505 1506 if (add_mtd_blktrans_dev(mbd_dev) < 0) 1507 goto cleanup; 1508 1509 d->dev = disk_to_dev(mbd_dev->disk); 1510 1511 ret = mtdswap_add_debugfs(d); 1512 if (ret < 0) 1513 goto debugfs_failed; 1514 1515 return; 1516 1517 debugfs_failed: 1518 del_mtd_blktrans_dev(mbd_dev); 1519 1520 cleanup: 1521 mtdswap_cleanup(d); 1522 1523 init_failed: 1524 kfree(mbd_dev); 1525 kfree(d); 1526 } 1527 1528 static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev) 1529 { 1530 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1531 1532 del_mtd_blktrans_dev(dev); 1533 mtdswap_cleanup(d); 1534 kfree(d); 1535 } 1536 1537 static struct mtd_blktrans_ops mtdswap_ops = { 1538 .name = "mtdswap", 1539 .major = 0, 1540 .part_bits = 0, 1541 .blksize = PAGE_SIZE, 1542 .flush = mtdswap_flush, 1543 .readsect = mtdswap_readsect, 1544 .writesect = mtdswap_writesect, 1545 .discard = mtdswap_discard, 1546 .background = mtdswap_background, 1547 .add_mtd = mtdswap_add_mtd, 1548 .remove_dev = mtdswap_remove_dev, 1549 .owner = THIS_MODULE, 1550 }; 1551 1552 static int __init mtdswap_modinit(void) 1553 { 1554 return register_mtd_blktrans(&mtdswap_ops); 1555 } 1556 1557 static void __exit mtdswap_modexit(void) 1558 { 1559 deregister_mtd_blktrans(&mtdswap_ops); 1560 } 1561 1562 module_init(mtdswap_modinit); 1563 module_exit(mtdswap_modexit); 1564 1565 1566 MODULE_LICENSE("GPL"); 1567 MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); 1568 MODULE_DESCRIPTION("Block device access to an MTD suitable for using as " 1569 "swap space"); 1570