1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Swap block device support for MTDs 4 * Turns an MTD device into a swap device with block wear leveling 5 * 6 * Copyright © 2007,2011 Nokia Corporation. All rights reserved. 7 * 8 * Authors: Jarkko Lavinen <jarkko.lavinen@nokia.com> 9 * 10 * Based on Richard Purdie's earlier implementation in 2007. Background 11 * support and lock-less operation written by Adrian Hunter. 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/module.h> 16 #include <linux/mtd/mtd.h> 17 #include <linux/mtd/blktrans.h> 18 #include <linux/rbtree.h> 19 #include <linux/sched.h> 20 #include <linux/slab.h> 21 #include <linux/vmalloc.h> 22 #include <linux/genhd.h> 23 #include <linux/swap.h> 24 #include <linux/debugfs.h> 25 #include <linux/seq_file.h> 26 #include <linux/device.h> 27 #include <linux/math64.h> 28 29 #define MTDSWAP_PREFIX "mtdswap" 30 31 /* 32 * The number of free eraseblocks when GC should stop 33 */ 34 #define CLEAN_BLOCK_THRESHOLD 20 35 36 /* 37 * Number of free eraseblocks below which GC can also collect low frag 38 * blocks. 39 */ 40 #define LOW_FRAG_GC_THRESHOLD 5 41 42 /* 43 * Wear level cost amortization. We want to do wear leveling on the background 44 * without disturbing gc too much. This is made by defining max GC frequency. 45 * Frequency value 6 means 1/6 of the GC passes will pick an erase block based 46 * on the biggest wear difference rather than the biggest dirtiness. 47 * 48 * The lower freq2 should be chosen so that it makes sure the maximum erase 49 * difference will decrease even if a malicious application is deliberately 50 * trying to make erase differences large. 51 */ 52 #define MAX_ERASE_DIFF 4000 53 #define COLLECT_NONDIRTY_BASE MAX_ERASE_DIFF 54 #define COLLECT_NONDIRTY_FREQ1 6 55 #define COLLECT_NONDIRTY_FREQ2 4 56 57 #define PAGE_UNDEF UINT_MAX 58 #define BLOCK_UNDEF UINT_MAX 59 #define BLOCK_ERROR (UINT_MAX - 1) 60 #define BLOCK_MAX (UINT_MAX - 2) 61 62 #define EBLOCK_BAD (1 << 0) 63 #define EBLOCK_NOMAGIC (1 << 1) 64 #define EBLOCK_BITFLIP (1 << 2) 65 #define EBLOCK_FAILED (1 << 3) 66 #define EBLOCK_READERR (1 << 4) 67 #define EBLOCK_IDX_SHIFT 5 68 69 struct swap_eb { 70 struct rb_node rb; 71 struct rb_root *root; 72 73 unsigned int flags; 74 unsigned int active_count; 75 unsigned int erase_count; 76 unsigned int pad; /* speeds up pointer decrement */ 77 }; 78 79 #define MTDSWAP_ECNT_MIN(rbroot) (rb_entry(rb_first(rbroot), struct swap_eb, \ 80 rb)->erase_count) 81 #define MTDSWAP_ECNT_MAX(rbroot) (rb_entry(rb_last(rbroot), struct swap_eb, \ 82 rb)->erase_count) 83 84 struct mtdswap_tree { 85 struct rb_root root; 86 unsigned int count; 87 }; 88 89 enum { 90 MTDSWAP_CLEAN, 91 MTDSWAP_USED, 92 MTDSWAP_LOWFRAG, 93 MTDSWAP_HIFRAG, 94 MTDSWAP_DIRTY, 95 MTDSWAP_BITFLIP, 96 MTDSWAP_FAILING, 97 MTDSWAP_TREE_CNT, 98 }; 99 100 struct mtdswap_dev { 101 struct mtd_blktrans_dev *mbd_dev; 102 struct mtd_info *mtd; 103 struct device *dev; 104 105 unsigned int *page_data; 106 unsigned int *revmap; 107 108 unsigned int eblks; 109 unsigned int spare_eblks; 110 unsigned int pages_per_eblk; 111 unsigned int max_erase_count; 112 struct swap_eb *eb_data; 113 114 struct mtdswap_tree trees[MTDSWAP_TREE_CNT]; 115 116 unsigned long long sect_read_count; 117 unsigned long long sect_write_count; 118 unsigned long long mtd_write_count; 119 unsigned long long mtd_read_count; 120 unsigned long long discard_count; 121 unsigned long long discard_page_count; 122 123 unsigned int curr_write_pos; 124 struct swap_eb *curr_write; 125 126 char *page_buf; 127 char *oob_buf; 128 }; 129 130 struct mtdswap_oobdata { 131 __le16 magic; 132 __le32 count; 133 } __packed; 134 135 #define MTDSWAP_MAGIC_CLEAN 0x2095 136 #define MTDSWAP_MAGIC_DIRTY (MTDSWAP_MAGIC_CLEAN + 1) 137 #define MTDSWAP_TYPE_CLEAN 0 138 #define MTDSWAP_TYPE_DIRTY 1 139 #define MTDSWAP_OOBSIZE sizeof(struct mtdswap_oobdata) 140 141 #define MTDSWAP_ERASE_RETRIES 3 /* Before marking erase block bad */ 142 #define MTDSWAP_IO_RETRIES 3 143 144 enum { 145 MTDSWAP_SCANNED_CLEAN, 146 MTDSWAP_SCANNED_DIRTY, 147 MTDSWAP_SCANNED_BITFLIP, 148 MTDSWAP_SCANNED_BAD, 149 }; 150 151 /* 152 * In the worst case mtdswap_writesect() has allocated the last clean 153 * page from the current block and is then pre-empted by the GC 154 * thread. The thread can consume a full erase block when moving a 155 * block. 156 */ 157 #define MIN_SPARE_EBLOCKS 2 158 #define MIN_ERASE_BLOCKS (MIN_SPARE_EBLOCKS + 1) 159 160 #define TREE_ROOT(d, name) (&d->trees[MTDSWAP_ ## name].root) 161 #define TREE_EMPTY(d, name) (TREE_ROOT(d, name)->rb_node == NULL) 162 #define TREE_NONEMPTY(d, name) (!TREE_EMPTY(d, name)) 163 #define TREE_COUNT(d, name) (d->trees[MTDSWAP_ ## name].count) 164 165 #define MTDSWAP_MBD_TO_MTDSWAP(dev) ((struct mtdswap_dev *)dev->priv) 166 167 static char partitions[128] = ""; 168 module_param_string(partitions, partitions, sizeof(partitions), 0444); 169 MODULE_PARM_DESC(partitions, "MTD partition numbers to use as swap " 170 "partitions=\"1,3,5\""); 171 172 static unsigned int spare_eblocks = 10; 173 module_param(spare_eblocks, uint, 0444); 174 MODULE_PARM_DESC(spare_eblocks, "Percentage of spare erase blocks for " 175 "garbage collection (default 10%)"); 176 177 static bool header; /* false */ 178 module_param(header, bool, 0444); 179 MODULE_PARM_DESC(header, 180 "Include builtin swap header (default 0, without header)"); 181 182 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background); 183 184 static loff_t mtdswap_eb_offset(struct mtdswap_dev *d, struct swap_eb *eb) 185 { 186 return (loff_t)(eb - d->eb_data) * d->mtd->erasesize; 187 } 188 189 static void mtdswap_eb_detach(struct mtdswap_dev *d, struct swap_eb *eb) 190 { 191 unsigned int oldidx; 192 struct mtdswap_tree *tp; 193 194 if (eb->root) { 195 tp = container_of(eb->root, struct mtdswap_tree, root); 196 oldidx = tp - &d->trees[0]; 197 198 d->trees[oldidx].count--; 199 rb_erase(&eb->rb, eb->root); 200 } 201 } 202 203 static void __mtdswap_rb_add(struct rb_root *root, struct swap_eb *eb) 204 { 205 struct rb_node **p, *parent = NULL; 206 struct swap_eb *cur; 207 208 p = &root->rb_node; 209 while (*p) { 210 parent = *p; 211 cur = rb_entry(parent, struct swap_eb, rb); 212 if (eb->erase_count > cur->erase_count) 213 p = &(*p)->rb_right; 214 else 215 p = &(*p)->rb_left; 216 } 217 218 rb_link_node(&eb->rb, parent, p); 219 rb_insert_color(&eb->rb, root); 220 } 221 222 static void mtdswap_rb_add(struct mtdswap_dev *d, struct swap_eb *eb, int idx) 223 { 224 struct rb_root *root; 225 226 if (eb->root == &d->trees[idx].root) 227 return; 228 229 mtdswap_eb_detach(d, eb); 230 root = &d->trees[idx].root; 231 __mtdswap_rb_add(root, eb); 232 eb->root = root; 233 d->trees[idx].count++; 234 } 235 236 static struct rb_node *mtdswap_rb_index(struct rb_root *root, unsigned int idx) 237 { 238 struct rb_node *p; 239 unsigned int i; 240 241 p = rb_first(root); 242 i = 0; 243 while (i < idx && p) { 244 p = rb_next(p); 245 i++; 246 } 247 248 return p; 249 } 250 251 static int mtdswap_handle_badblock(struct mtdswap_dev *d, struct swap_eb *eb) 252 { 253 int ret; 254 loff_t offset; 255 256 d->spare_eblks--; 257 eb->flags |= EBLOCK_BAD; 258 mtdswap_eb_detach(d, eb); 259 eb->root = NULL; 260 261 /* badblocks not supported */ 262 if (!mtd_can_have_bb(d->mtd)) 263 return 1; 264 265 offset = mtdswap_eb_offset(d, eb); 266 dev_warn(d->dev, "Marking bad block at %08llx\n", offset); 267 ret = mtd_block_markbad(d->mtd, offset); 268 269 if (ret) { 270 dev_warn(d->dev, "Mark block bad failed for block at %08llx " 271 "error %d\n", offset, ret); 272 return ret; 273 } 274 275 return 1; 276 277 } 278 279 static int mtdswap_handle_write_error(struct mtdswap_dev *d, struct swap_eb *eb) 280 { 281 unsigned int marked = eb->flags & EBLOCK_FAILED; 282 struct swap_eb *curr_write = d->curr_write; 283 284 eb->flags |= EBLOCK_FAILED; 285 if (curr_write == eb) { 286 d->curr_write = NULL; 287 288 if (!marked && d->curr_write_pos != 0) { 289 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 290 return 0; 291 } 292 } 293 294 return mtdswap_handle_badblock(d, eb); 295 } 296 297 static int mtdswap_read_oob(struct mtdswap_dev *d, loff_t from, 298 struct mtd_oob_ops *ops) 299 { 300 int ret = mtd_read_oob(d->mtd, from, ops); 301 302 if (mtd_is_bitflip(ret)) 303 return ret; 304 305 if (ret) { 306 dev_warn(d->dev, "Read OOB failed %d for block at %08llx\n", 307 ret, from); 308 return ret; 309 } 310 311 if (ops->oobretlen < ops->ooblen) { 312 dev_warn(d->dev, "Read OOB return short read (%zd bytes not " 313 "%zd) for block at %08llx\n", 314 ops->oobretlen, ops->ooblen, from); 315 return -EIO; 316 } 317 318 return 0; 319 } 320 321 static int mtdswap_read_markers(struct mtdswap_dev *d, struct swap_eb *eb) 322 { 323 struct mtdswap_oobdata *data, *data2; 324 int ret; 325 loff_t offset; 326 struct mtd_oob_ops ops; 327 328 offset = mtdswap_eb_offset(d, eb); 329 330 /* Check first if the block is bad. */ 331 if (mtd_can_have_bb(d->mtd) && mtd_block_isbad(d->mtd, offset)) 332 return MTDSWAP_SCANNED_BAD; 333 334 ops.ooblen = 2 * d->mtd->oobavail; 335 ops.oobbuf = d->oob_buf; 336 ops.ooboffs = 0; 337 ops.datbuf = NULL; 338 ops.mode = MTD_OPS_AUTO_OOB; 339 340 ret = mtdswap_read_oob(d, offset, &ops); 341 342 if (ret && !mtd_is_bitflip(ret)) 343 return ret; 344 345 data = (struct mtdswap_oobdata *)d->oob_buf; 346 data2 = (struct mtdswap_oobdata *) 347 (d->oob_buf + d->mtd->oobavail); 348 349 if (le16_to_cpu(data->magic) == MTDSWAP_MAGIC_CLEAN) { 350 eb->erase_count = le32_to_cpu(data->count); 351 if (mtd_is_bitflip(ret)) 352 ret = MTDSWAP_SCANNED_BITFLIP; 353 else { 354 if (le16_to_cpu(data2->magic) == MTDSWAP_MAGIC_DIRTY) 355 ret = MTDSWAP_SCANNED_DIRTY; 356 else 357 ret = MTDSWAP_SCANNED_CLEAN; 358 } 359 } else { 360 eb->flags |= EBLOCK_NOMAGIC; 361 ret = MTDSWAP_SCANNED_DIRTY; 362 } 363 364 return ret; 365 } 366 367 static int mtdswap_write_marker(struct mtdswap_dev *d, struct swap_eb *eb, 368 u16 marker) 369 { 370 struct mtdswap_oobdata n; 371 int ret; 372 loff_t offset; 373 struct mtd_oob_ops ops; 374 375 ops.ooboffs = 0; 376 ops.oobbuf = (uint8_t *)&n; 377 ops.mode = MTD_OPS_AUTO_OOB; 378 ops.datbuf = NULL; 379 380 if (marker == MTDSWAP_TYPE_CLEAN) { 381 n.magic = cpu_to_le16(MTDSWAP_MAGIC_CLEAN); 382 n.count = cpu_to_le32(eb->erase_count); 383 ops.ooblen = MTDSWAP_OOBSIZE; 384 offset = mtdswap_eb_offset(d, eb); 385 } else { 386 n.magic = cpu_to_le16(MTDSWAP_MAGIC_DIRTY); 387 ops.ooblen = sizeof(n.magic); 388 offset = mtdswap_eb_offset(d, eb) + d->mtd->writesize; 389 } 390 391 ret = mtd_write_oob(d->mtd, offset, &ops); 392 393 if (ret) { 394 dev_warn(d->dev, "Write OOB failed for block at %08llx " 395 "error %d\n", offset, ret); 396 if (ret == -EIO || mtd_is_eccerr(ret)) 397 mtdswap_handle_write_error(d, eb); 398 return ret; 399 } 400 401 if (ops.oobretlen != ops.ooblen) { 402 dev_warn(d->dev, "Short OOB write for block at %08llx: " 403 "%zd not %zd\n", 404 offset, ops.oobretlen, ops.ooblen); 405 return ret; 406 } 407 408 return 0; 409 } 410 411 /* 412 * Are there any erase blocks without MAGIC_CLEAN header, presumably 413 * because power was cut off after erase but before header write? We 414 * need to guestimate the erase count. 415 */ 416 static void mtdswap_check_counts(struct mtdswap_dev *d) 417 { 418 struct rb_root hist_root = RB_ROOT; 419 struct rb_node *medrb; 420 struct swap_eb *eb; 421 unsigned int i, cnt, median; 422 423 cnt = 0; 424 for (i = 0; i < d->eblks; i++) { 425 eb = d->eb_data + i; 426 427 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 428 continue; 429 430 __mtdswap_rb_add(&hist_root, eb); 431 cnt++; 432 } 433 434 if (cnt == 0) 435 return; 436 437 medrb = mtdswap_rb_index(&hist_root, cnt / 2); 438 median = rb_entry(medrb, struct swap_eb, rb)->erase_count; 439 440 d->max_erase_count = MTDSWAP_ECNT_MAX(&hist_root); 441 442 for (i = 0; i < d->eblks; i++) { 443 eb = d->eb_data + i; 444 445 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_READERR)) 446 eb->erase_count = median; 447 448 if (eb->flags & (EBLOCK_NOMAGIC | EBLOCK_BAD | EBLOCK_READERR)) 449 continue; 450 451 rb_erase(&eb->rb, &hist_root); 452 } 453 } 454 455 static void mtdswap_scan_eblks(struct mtdswap_dev *d) 456 { 457 int status; 458 unsigned int i, idx; 459 struct swap_eb *eb; 460 461 for (i = 0; i < d->eblks; i++) { 462 eb = d->eb_data + i; 463 464 status = mtdswap_read_markers(d, eb); 465 if (status < 0) 466 eb->flags |= EBLOCK_READERR; 467 else if (status == MTDSWAP_SCANNED_BAD) { 468 eb->flags |= EBLOCK_BAD; 469 continue; 470 } 471 472 switch (status) { 473 case MTDSWAP_SCANNED_CLEAN: 474 idx = MTDSWAP_CLEAN; 475 break; 476 case MTDSWAP_SCANNED_DIRTY: 477 case MTDSWAP_SCANNED_BITFLIP: 478 idx = MTDSWAP_DIRTY; 479 break; 480 default: 481 idx = MTDSWAP_FAILING; 482 } 483 484 eb->flags |= (idx << EBLOCK_IDX_SHIFT); 485 } 486 487 mtdswap_check_counts(d); 488 489 for (i = 0; i < d->eblks; i++) { 490 eb = d->eb_data + i; 491 492 if (eb->flags & EBLOCK_BAD) 493 continue; 494 495 idx = eb->flags >> EBLOCK_IDX_SHIFT; 496 mtdswap_rb_add(d, eb, idx); 497 } 498 } 499 500 /* 501 * Place eblk into a tree corresponding to its number of active blocks 502 * it contains. 503 */ 504 static void mtdswap_store_eb(struct mtdswap_dev *d, struct swap_eb *eb) 505 { 506 unsigned int weight = eb->active_count; 507 unsigned int maxweight = d->pages_per_eblk; 508 509 if (eb == d->curr_write) 510 return; 511 512 if (eb->flags & EBLOCK_BITFLIP) 513 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 514 else if (eb->flags & (EBLOCK_READERR | EBLOCK_FAILED)) 515 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 516 if (weight == maxweight) 517 mtdswap_rb_add(d, eb, MTDSWAP_USED); 518 else if (weight == 0) 519 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 520 else if (weight > (maxweight/2)) 521 mtdswap_rb_add(d, eb, MTDSWAP_LOWFRAG); 522 else 523 mtdswap_rb_add(d, eb, MTDSWAP_HIFRAG); 524 } 525 526 static int mtdswap_erase_block(struct mtdswap_dev *d, struct swap_eb *eb) 527 { 528 struct mtd_info *mtd = d->mtd; 529 struct erase_info erase; 530 unsigned int retries = 0; 531 int ret; 532 533 eb->erase_count++; 534 if (eb->erase_count > d->max_erase_count) 535 d->max_erase_count = eb->erase_count; 536 537 retry: 538 memset(&erase, 0, sizeof(struct erase_info)); 539 erase.addr = mtdswap_eb_offset(d, eb); 540 erase.len = mtd->erasesize; 541 542 ret = mtd_erase(mtd, &erase); 543 if (ret) { 544 if (retries++ < MTDSWAP_ERASE_RETRIES) { 545 dev_warn(d->dev, 546 "erase of erase block %#llx on %s failed", 547 erase.addr, mtd->name); 548 yield(); 549 goto retry; 550 } 551 552 dev_err(d->dev, "Cannot erase erase block %#llx on %s\n", 553 erase.addr, mtd->name); 554 555 mtdswap_handle_badblock(d, eb); 556 return -EIO; 557 } 558 559 return 0; 560 } 561 562 static int mtdswap_map_free_block(struct mtdswap_dev *d, unsigned int page, 563 unsigned int *block) 564 { 565 int ret; 566 struct swap_eb *old_eb = d->curr_write; 567 struct rb_root *clean_root; 568 struct swap_eb *eb; 569 570 if (old_eb == NULL || d->curr_write_pos >= d->pages_per_eblk) { 571 do { 572 if (TREE_EMPTY(d, CLEAN)) 573 return -ENOSPC; 574 575 clean_root = TREE_ROOT(d, CLEAN); 576 eb = rb_entry(rb_first(clean_root), struct swap_eb, rb); 577 rb_erase(&eb->rb, clean_root); 578 eb->root = NULL; 579 TREE_COUNT(d, CLEAN)--; 580 581 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_DIRTY); 582 } while (ret == -EIO || mtd_is_eccerr(ret)); 583 584 if (ret) 585 return ret; 586 587 d->curr_write_pos = 0; 588 d->curr_write = eb; 589 if (old_eb) 590 mtdswap_store_eb(d, old_eb); 591 } 592 593 *block = (d->curr_write - d->eb_data) * d->pages_per_eblk + 594 d->curr_write_pos; 595 596 d->curr_write->active_count++; 597 d->revmap[*block] = page; 598 d->curr_write_pos++; 599 600 return 0; 601 } 602 603 static unsigned int mtdswap_free_page_cnt(struct mtdswap_dev *d) 604 { 605 return TREE_COUNT(d, CLEAN) * d->pages_per_eblk + 606 d->pages_per_eblk - d->curr_write_pos; 607 } 608 609 static unsigned int mtdswap_enough_free_pages(struct mtdswap_dev *d) 610 { 611 return mtdswap_free_page_cnt(d) > d->pages_per_eblk; 612 } 613 614 static int mtdswap_write_block(struct mtdswap_dev *d, char *buf, 615 unsigned int page, unsigned int *bp, int gc_context) 616 { 617 struct mtd_info *mtd = d->mtd; 618 struct swap_eb *eb; 619 size_t retlen; 620 loff_t writepos; 621 int ret; 622 623 retry: 624 if (!gc_context) 625 while (!mtdswap_enough_free_pages(d)) 626 if (mtdswap_gc(d, 0) > 0) 627 return -ENOSPC; 628 629 ret = mtdswap_map_free_block(d, page, bp); 630 eb = d->eb_data + (*bp / d->pages_per_eblk); 631 632 if (ret == -EIO || mtd_is_eccerr(ret)) { 633 d->curr_write = NULL; 634 eb->active_count--; 635 d->revmap[*bp] = PAGE_UNDEF; 636 goto retry; 637 } 638 639 if (ret < 0) 640 return ret; 641 642 writepos = (loff_t)*bp << PAGE_SHIFT; 643 ret = mtd_write(mtd, writepos, PAGE_SIZE, &retlen, buf); 644 if (ret == -EIO || mtd_is_eccerr(ret)) { 645 d->curr_write_pos--; 646 eb->active_count--; 647 d->revmap[*bp] = PAGE_UNDEF; 648 mtdswap_handle_write_error(d, eb); 649 goto retry; 650 } 651 652 if (ret < 0) { 653 dev_err(d->dev, "Write to MTD device failed: %d (%zd written)", 654 ret, retlen); 655 goto err; 656 } 657 658 if (retlen != PAGE_SIZE) { 659 dev_err(d->dev, "Short write to MTD device: %zd written", 660 retlen); 661 ret = -EIO; 662 goto err; 663 } 664 665 return ret; 666 667 err: 668 d->curr_write_pos--; 669 eb->active_count--; 670 d->revmap[*bp] = PAGE_UNDEF; 671 672 return ret; 673 } 674 675 static int mtdswap_move_block(struct mtdswap_dev *d, unsigned int oldblock, 676 unsigned int *newblock) 677 { 678 struct mtd_info *mtd = d->mtd; 679 struct swap_eb *eb, *oldeb; 680 int ret; 681 size_t retlen; 682 unsigned int page, retries; 683 loff_t readpos; 684 685 page = d->revmap[oldblock]; 686 readpos = (loff_t) oldblock << PAGE_SHIFT; 687 retries = 0; 688 689 retry: 690 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, d->page_buf); 691 692 if (ret < 0 && !mtd_is_bitflip(ret)) { 693 oldeb = d->eb_data + oldblock / d->pages_per_eblk; 694 oldeb->flags |= EBLOCK_READERR; 695 696 dev_err(d->dev, "Read Error: %d (block %u)\n", ret, 697 oldblock); 698 retries++; 699 if (retries < MTDSWAP_IO_RETRIES) 700 goto retry; 701 702 goto read_error; 703 } 704 705 if (retlen != PAGE_SIZE) { 706 dev_err(d->dev, "Short read: %zd (block %u)\n", retlen, 707 oldblock); 708 ret = -EIO; 709 goto read_error; 710 } 711 712 ret = mtdswap_write_block(d, d->page_buf, page, newblock, 1); 713 if (ret < 0) { 714 d->page_data[page] = BLOCK_ERROR; 715 dev_err(d->dev, "Write error: %d\n", ret); 716 return ret; 717 } 718 719 d->page_data[page] = *newblock; 720 d->revmap[oldblock] = PAGE_UNDEF; 721 eb = d->eb_data + oldblock / d->pages_per_eblk; 722 eb->active_count--; 723 724 return 0; 725 726 read_error: 727 d->page_data[page] = BLOCK_ERROR; 728 d->revmap[oldblock] = PAGE_UNDEF; 729 return ret; 730 } 731 732 static int mtdswap_gc_eblock(struct mtdswap_dev *d, struct swap_eb *eb) 733 { 734 unsigned int i, block, eblk_base, newblock; 735 int ret, errcode; 736 737 errcode = 0; 738 eblk_base = (eb - d->eb_data) * d->pages_per_eblk; 739 740 for (i = 0; i < d->pages_per_eblk; i++) { 741 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 742 return -ENOSPC; 743 744 block = eblk_base + i; 745 if (d->revmap[block] == PAGE_UNDEF) 746 continue; 747 748 ret = mtdswap_move_block(d, block, &newblock); 749 if (ret < 0 && !errcode) 750 errcode = ret; 751 } 752 753 return errcode; 754 } 755 756 static int __mtdswap_choose_gc_tree(struct mtdswap_dev *d) 757 { 758 int idx, stopat; 759 760 if (TREE_COUNT(d, CLEAN) < LOW_FRAG_GC_THRESHOLD) 761 stopat = MTDSWAP_LOWFRAG; 762 else 763 stopat = MTDSWAP_HIFRAG; 764 765 for (idx = MTDSWAP_BITFLIP; idx >= stopat; idx--) 766 if (d->trees[idx].root.rb_node != NULL) 767 return idx; 768 769 return -1; 770 } 771 772 static int mtdswap_wlfreq(unsigned int maxdiff) 773 { 774 unsigned int h, x, y, dist, base; 775 776 /* 777 * Calculate linear ramp down from f1 to f2 when maxdiff goes from 778 * MAX_ERASE_DIFF to MAX_ERASE_DIFF + COLLECT_NONDIRTY_BASE. Similar 779 * to triangle with height f1 - f1 and width COLLECT_NONDIRTY_BASE. 780 */ 781 782 dist = maxdiff - MAX_ERASE_DIFF; 783 if (dist > COLLECT_NONDIRTY_BASE) 784 dist = COLLECT_NONDIRTY_BASE; 785 786 /* 787 * Modelling the slop as right angular triangle with base 788 * COLLECT_NONDIRTY_BASE and height freq1 - freq2. The ratio y/x is 789 * equal to the ratio h/base. 790 */ 791 h = COLLECT_NONDIRTY_FREQ1 - COLLECT_NONDIRTY_FREQ2; 792 base = COLLECT_NONDIRTY_BASE; 793 794 x = dist - base; 795 y = (x * h + base / 2) / base; 796 797 return COLLECT_NONDIRTY_FREQ2 + y; 798 } 799 800 static int mtdswap_choose_wl_tree(struct mtdswap_dev *d) 801 { 802 static unsigned int pick_cnt; 803 unsigned int i, idx = -1, wear, max; 804 struct rb_root *root; 805 806 max = 0; 807 for (i = 0; i <= MTDSWAP_DIRTY; i++) { 808 root = &d->trees[i].root; 809 if (root->rb_node == NULL) 810 continue; 811 812 wear = d->max_erase_count - MTDSWAP_ECNT_MIN(root); 813 if (wear > max) { 814 max = wear; 815 idx = i; 816 } 817 } 818 819 if (max > MAX_ERASE_DIFF && pick_cnt >= mtdswap_wlfreq(max) - 1) { 820 pick_cnt = 0; 821 return idx; 822 } 823 824 pick_cnt++; 825 return -1; 826 } 827 828 static int mtdswap_choose_gc_tree(struct mtdswap_dev *d, 829 unsigned int background) 830 { 831 int idx; 832 833 if (TREE_NONEMPTY(d, FAILING) && 834 (background || (TREE_EMPTY(d, CLEAN) && TREE_EMPTY(d, DIRTY)))) 835 return MTDSWAP_FAILING; 836 837 idx = mtdswap_choose_wl_tree(d); 838 if (idx >= MTDSWAP_CLEAN) 839 return idx; 840 841 return __mtdswap_choose_gc_tree(d); 842 } 843 844 static struct swap_eb *mtdswap_pick_gc_eblk(struct mtdswap_dev *d, 845 unsigned int background) 846 { 847 struct rb_root *rp = NULL; 848 struct swap_eb *eb = NULL; 849 int idx; 850 851 if (background && TREE_COUNT(d, CLEAN) > CLEAN_BLOCK_THRESHOLD && 852 TREE_EMPTY(d, DIRTY) && TREE_EMPTY(d, FAILING)) 853 return NULL; 854 855 idx = mtdswap_choose_gc_tree(d, background); 856 if (idx < 0) 857 return NULL; 858 859 rp = &d->trees[idx].root; 860 eb = rb_entry(rb_first(rp), struct swap_eb, rb); 861 862 rb_erase(&eb->rb, rp); 863 eb->root = NULL; 864 d->trees[idx].count--; 865 return eb; 866 } 867 868 static unsigned int mtdswap_test_patt(unsigned int i) 869 { 870 return i % 2 ? 0x55555555 : 0xAAAAAAAA; 871 } 872 873 static unsigned int mtdswap_eblk_passes(struct mtdswap_dev *d, 874 struct swap_eb *eb) 875 { 876 struct mtd_info *mtd = d->mtd; 877 unsigned int test, i, j, patt, mtd_pages; 878 loff_t base, pos; 879 unsigned int *p1 = (unsigned int *)d->page_buf; 880 unsigned char *p2 = (unsigned char *)d->oob_buf; 881 struct mtd_oob_ops ops; 882 int ret; 883 884 ops.mode = MTD_OPS_AUTO_OOB; 885 ops.len = mtd->writesize; 886 ops.ooblen = mtd->oobavail; 887 ops.ooboffs = 0; 888 ops.datbuf = d->page_buf; 889 ops.oobbuf = d->oob_buf; 890 base = mtdswap_eb_offset(d, eb); 891 mtd_pages = d->pages_per_eblk * PAGE_SIZE / mtd->writesize; 892 893 for (test = 0; test < 2; test++) { 894 pos = base; 895 for (i = 0; i < mtd_pages; i++) { 896 patt = mtdswap_test_patt(test + i); 897 memset(d->page_buf, patt, mtd->writesize); 898 memset(d->oob_buf, patt, mtd->oobavail); 899 ret = mtd_write_oob(mtd, pos, &ops); 900 if (ret) 901 goto error; 902 903 pos += mtd->writesize; 904 } 905 906 pos = base; 907 for (i = 0; i < mtd_pages; i++) { 908 ret = mtd_read_oob(mtd, pos, &ops); 909 if (ret) 910 goto error; 911 912 patt = mtdswap_test_patt(test + i); 913 for (j = 0; j < mtd->writesize/sizeof(int); j++) 914 if (p1[j] != patt) 915 goto error; 916 917 for (j = 0; j < mtd->oobavail; j++) 918 if (p2[j] != (unsigned char)patt) 919 goto error; 920 921 pos += mtd->writesize; 922 } 923 924 ret = mtdswap_erase_block(d, eb); 925 if (ret) 926 goto error; 927 } 928 929 eb->flags &= ~EBLOCK_READERR; 930 return 1; 931 932 error: 933 mtdswap_handle_badblock(d, eb); 934 return 0; 935 } 936 937 static int mtdswap_gc(struct mtdswap_dev *d, unsigned int background) 938 { 939 struct swap_eb *eb; 940 int ret; 941 942 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 943 return 1; 944 945 eb = mtdswap_pick_gc_eblk(d, background); 946 if (!eb) 947 return 1; 948 949 ret = mtdswap_gc_eblock(d, eb); 950 if (ret == -ENOSPC) 951 return 1; 952 953 if (eb->flags & EBLOCK_FAILED) { 954 mtdswap_handle_badblock(d, eb); 955 return 0; 956 } 957 958 eb->flags &= ~EBLOCK_BITFLIP; 959 ret = mtdswap_erase_block(d, eb); 960 if ((eb->flags & EBLOCK_READERR) && 961 (ret || !mtdswap_eblk_passes(d, eb))) 962 return 0; 963 964 if (ret == 0) 965 ret = mtdswap_write_marker(d, eb, MTDSWAP_TYPE_CLEAN); 966 967 if (ret == 0) 968 mtdswap_rb_add(d, eb, MTDSWAP_CLEAN); 969 else if (ret != -EIO && !mtd_is_eccerr(ret)) 970 mtdswap_rb_add(d, eb, MTDSWAP_DIRTY); 971 972 return 0; 973 } 974 975 static void mtdswap_background(struct mtd_blktrans_dev *dev) 976 { 977 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 978 int ret; 979 980 while (1) { 981 ret = mtdswap_gc(d, 1); 982 if (ret || mtd_blktrans_cease_background(dev)) 983 return; 984 } 985 } 986 987 static void mtdswap_cleanup(struct mtdswap_dev *d) 988 { 989 vfree(d->eb_data); 990 vfree(d->revmap); 991 vfree(d->page_data); 992 kfree(d->oob_buf); 993 kfree(d->page_buf); 994 } 995 996 static int mtdswap_flush(struct mtd_blktrans_dev *dev) 997 { 998 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 999 1000 mtd_sync(d->mtd); 1001 return 0; 1002 } 1003 1004 static unsigned int mtdswap_badblocks(struct mtd_info *mtd, uint64_t size) 1005 { 1006 loff_t offset; 1007 unsigned int badcnt; 1008 1009 badcnt = 0; 1010 1011 if (mtd_can_have_bb(mtd)) 1012 for (offset = 0; offset < size; offset += mtd->erasesize) 1013 if (mtd_block_isbad(mtd, offset)) 1014 badcnt++; 1015 1016 return badcnt; 1017 } 1018 1019 static int mtdswap_writesect(struct mtd_blktrans_dev *dev, 1020 unsigned long page, char *buf) 1021 { 1022 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1023 unsigned int newblock, mapped; 1024 struct swap_eb *eb; 1025 int ret; 1026 1027 d->sect_write_count++; 1028 1029 if (d->spare_eblks < MIN_SPARE_EBLOCKS) 1030 return -ENOSPC; 1031 1032 if (header) { 1033 /* Ignore writes to the header page */ 1034 if (unlikely(page == 0)) 1035 return 0; 1036 1037 page--; 1038 } 1039 1040 mapped = d->page_data[page]; 1041 if (mapped <= BLOCK_MAX) { 1042 eb = d->eb_data + (mapped / d->pages_per_eblk); 1043 eb->active_count--; 1044 mtdswap_store_eb(d, eb); 1045 d->page_data[page] = BLOCK_UNDEF; 1046 d->revmap[mapped] = PAGE_UNDEF; 1047 } 1048 1049 ret = mtdswap_write_block(d, buf, page, &newblock, 0); 1050 d->mtd_write_count++; 1051 1052 if (ret < 0) 1053 return ret; 1054 1055 d->page_data[page] = newblock; 1056 1057 return 0; 1058 } 1059 1060 /* Provide a dummy swap header for the kernel */ 1061 static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf) 1062 { 1063 union swap_header *hd = (union swap_header *)(buf); 1064 1065 memset(buf, 0, PAGE_SIZE - 10); 1066 1067 hd->info.version = 1; 1068 hd->info.last_page = d->mbd_dev->size - 1; 1069 hd->info.nr_badpages = 0; 1070 1071 memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10); 1072 1073 return 0; 1074 } 1075 1076 static int mtdswap_readsect(struct mtd_blktrans_dev *dev, 1077 unsigned long page, char *buf) 1078 { 1079 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1080 struct mtd_info *mtd = d->mtd; 1081 unsigned int realblock, retries; 1082 loff_t readpos; 1083 struct swap_eb *eb; 1084 size_t retlen; 1085 int ret; 1086 1087 d->sect_read_count++; 1088 1089 if (header) { 1090 if (unlikely(page == 0)) 1091 return mtdswap_auto_header(d, buf); 1092 1093 page--; 1094 } 1095 1096 realblock = d->page_data[page]; 1097 if (realblock > BLOCK_MAX) { 1098 memset(buf, 0x0, PAGE_SIZE); 1099 if (realblock == BLOCK_UNDEF) 1100 return 0; 1101 else 1102 return -EIO; 1103 } 1104 1105 eb = d->eb_data + (realblock / d->pages_per_eblk); 1106 BUG_ON(d->revmap[realblock] == PAGE_UNDEF); 1107 1108 readpos = (loff_t)realblock << PAGE_SHIFT; 1109 retries = 0; 1110 1111 retry: 1112 ret = mtd_read(mtd, readpos, PAGE_SIZE, &retlen, buf); 1113 1114 d->mtd_read_count++; 1115 if (mtd_is_bitflip(ret)) { 1116 eb->flags |= EBLOCK_BITFLIP; 1117 mtdswap_rb_add(d, eb, MTDSWAP_BITFLIP); 1118 ret = 0; 1119 } 1120 1121 if (ret < 0) { 1122 dev_err(d->dev, "Read error %d\n", ret); 1123 eb->flags |= EBLOCK_READERR; 1124 mtdswap_rb_add(d, eb, MTDSWAP_FAILING); 1125 retries++; 1126 if (retries < MTDSWAP_IO_RETRIES) 1127 goto retry; 1128 1129 return ret; 1130 } 1131 1132 if (retlen != PAGE_SIZE) { 1133 dev_err(d->dev, "Short read %zd\n", retlen); 1134 return -EIO; 1135 } 1136 1137 return 0; 1138 } 1139 1140 static int mtdswap_discard(struct mtd_blktrans_dev *dev, unsigned long first, 1141 unsigned nr_pages) 1142 { 1143 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1144 unsigned long page; 1145 struct swap_eb *eb; 1146 unsigned int mapped; 1147 1148 d->discard_count++; 1149 1150 for (page = first; page < first + nr_pages; page++) { 1151 mapped = d->page_data[page]; 1152 if (mapped <= BLOCK_MAX) { 1153 eb = d->eb_data + (mapped / d->pages_per_eblk); 1154 eb->active_count--; 1155 mtdswap_store_eb(d, eb); 1156 d->page_data[page] = BLOCK_UNDEF; 1157 d->revmap[mapped] = PAGE_UNDEF; 1158 d->discard_page_count++; 1159 } else if (mapped == BLOCK_ERROR) { 1160 d->page_data[page] = BLOCK_UNDEF; 1161 d->discard_page_count++; 1162 } 1163 } 1164 1165 return 0; 1166 } 1167 1168 static int mtdswap_show(struct seq_file *s, void *data) 1169 { 1170 struct mtdswap_dev *d = (struct mtdswap_dev *) s->private; 1171 unsigned long sum; 1172 unsigned int count[MTDSWAP_TREE_CNT]; 1173 unsigned int min[MTDSWAP_TREE_CNT]; 1174 unsigned int max[MTDSWAP_TREE_CNT]; 1175 unsigned int i, cw = 0, cwp = 0, cwecount = 0, bb_cnt, mapped, pages; 1176 uint64_t use_size; 1177 static const char * const name[] = { 1178 "clean", "used", "low", "high", "dirty", "bitflip", "failing" 1179 }; 1180 1181 mutex_lock(&d->mbd_dev->lock); 1182 1183 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1184 struct rb_root *root = &d->trees[i].root; 1185 1186 if (root->rb_node) { 1187 count[i] = d->trees[i].count; 1188 min[i] = MTDSWAP_ECNT_MIN(root); 1189 max[i] = MTDSWAP_ECNT_MAX(root); 1190 } else 1191 count[i] = 0; 1192 } 1193 1194 if (d->curr_write) { 1195 cw = 1; 1196 cwp = d->curr_write_pos; 1197 cwecount = d->curr_write->erase_count; 1198 } 1199 1200 sum = 0; 1201 for (i = 0; i < d->eblks; i++) 1202 sum += d->eb_data[i].erase_count; 1203 1204 use_size = (uint64_t)d->eblks * d->mtd->erasesize; 1205 bb_cnt = mtdswap_badblocks(d->mtd, use_size); 1206 1207 mapped = 0; 1208 pages = d->mbd_dev->size; 1209 for (i = 0; i < pages; i++) 1210 if (d->page_data[i] != BLOCK_UNDEF) 1211 mapped++; 1212 1213 mutex_unlock(&d->mbd_dev->lock); 1214 1215 for (i = 0; i < MTDSWAP_TREE_CNT; i++) { 1216 if (!count[i]) 1217 continue; 1218 1219 if (min[i] != max[i]) 1220 seq_printf(s, "%s:\t%5d erase blocks, erased min %d, " 1221 "max %d times\n", 1222 name[i], count[i], min[i], max[i]); 1223 else 1224 seq_printf(s, "%s:\t%5d erase blocks, all erased %d " 1225 "times\n", name[i], count[i], min[i]); 1226 } 1227 1228 if (bb_cnt) 1229 seq_printf(s, "bad:\t%5u erase blocks\n", bb_cnt); 1230 1231 if (cw) 1232 seq_printf(s, "current erase block: %u pages used, %u free, " 1233 "erased %u times\n", 1234 cwp, d->pages_per_eblk - cwp, cwecount); 1235 1236 seq_printf(s, "total erasures: %lu\n", sum); 1237 1238 seq_puts(s, "\n"); 1239 1240 seq_printf(s, "mtdswap_readsect count: %llu\n", d->sect_read_count); 1241 seq_printf(s, "mtdswap_writesect count: %llu\n", d->sect_write_count); 1242 seq_printf(s, "mtdswap_discard count: %llu\n", d->discard_count); 1243 seq_printf(s, "mtd read count: %llu\n", d->mtd_read_count); 1244 seq_printf(s, "mtd write count: %llu\n", d->mtd_write_count); 1245 seq_printf(s, "discarded pages count: %llu\n", d->discard_page_count); 1246 1247 seq_puts(s, "\n"); 1248 seq_printf(s, "total pages: %u\n", pages); 1249 seq_printf(s, "pages mapped: %u\n", mapped); 1250 1251 return 0; 1252 } 1253 DEFINE_SHOW_ATTRIBUTE(mtdswap); 1254 1255 static int mtdswap_add_debugfs(struct mtdswap_dev *d) 1256 { 1257 struct dentry *root = d->mtd->dbg.dfs_dir; 1258 1259 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 1260 return 0; 1261 1262 if (IS_ERR_OR_NULL(root)) 1263 return -1; 1264 1265 debugfs_create_file("mtdswap_stats", S_IRUSR, root, d, &mtdswap_fops); 1266 1267 return 0; 1268 } 1269 1270 static int mtdswap_init(struct mtdswap_dev *d, unsigned int eblocks, 1271 unsigned int spare_cnt) 1272 { 1273 struct mtd_info *mtd = d->mbd_dev->mtd; 1274 unsigned int i, eblk_bytes, pages, blocks; 1275 int ret = -ENOMEM; 1276 1277 d->mtd = mtd; 1278 d->eblks = eblocks; 1279 d->spare_eblks = spare_cnt; 1280 d->pages_per_eblk = mtd->erasesize >> PAGE_SHIFT; 1281 1282 pages = d->mbd_dev->size; 1283 blocks = eblocks * d->pages_per_eblk; 1284 1285 for (i = 0; i < MTDSWAP_TREE_CNT; i++) 1286 d->trees[i].root = RB_ROOT; 1287 1288 d->page_data = vmalloc(array_size(pages, sizeof(int))); 1289 if (!d->page_data) 1290 goto page_data_fail; 1291 1292 d->revmap = vmalloc(array_size(blocks, sizeof(int))); 1293 if (!d->revmap) 1294 goto revmap_fail; 1295 1296 eblk_bytes = sizeof(struct swap_eb)*d->eblks; 1297 d->eb_data = vzalloc(eblk_bytes); 1298 if (!d->eb_data) 1299 goto eb_data_fail; 1300 1301 for (i = 0; i < pages; i++) 1302 d->page_data[i] = BLOCK_UNDEF; 1303 1304 for (i = 0; i < blocks; i++) 1305 d->revmap[i] = PAGE_UNDEF; 1306 1307 d->page_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 1308 if (!d->page_buf) 1309 goto page_buf_fail; 1310 1311 d->oob_buf = kmalloc_array(2, mtd->oobavail, GFP_KERNEL); 1312 if (!d->oob_buf) 1313 goto oob_buf_fail; 1314 1315 mtdswap_scan_eblks(d); 1316 1317 return 0; 1318 1319 oob_buf_fail: 1320 kfree(d->page_buf); 1321 page_buf_fail: 1322 vfree(d->eb_data); 1323 eb_data_fail: 1324 vfree(d->revmap); 1325 revmap_fail: 1326 vfree(d->page_data); 1327 page_data_fail: 1328 printk(KERN_ERR "%s: init failed (%d)\n", MTDSWAP_PREFIX, ret); 1329 return ret; 1330 } 1331 1332 static void mtdswap_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd) 1333 { 1334 struct mtdswap_dev *d; 1335 struct mtd_blktrans_dev *mbd_dev; 1336 char *parts; 1337 char *this_opt; 1338 unsigned long part; 1339 unsigned int eblocks, eavailable, bad_blocks, spare_cnt; 1340 uint64_t swap_size, use_size, size_limit; 1341 int ret; 1342 1343 parts = &partitions[0]; 1344 if (!*parts) 1345 return; 1346 1347 while ((this_opt = strsep(&parts, ",")) != NULL) { 1348 if (kstrtoul(this_opt, 0, &part) < 0) 1349 return; 1350 1351 if (mtd->index == part) 1352 break; 1353 } 1354 1355 if (mtd->index != part) 1356 return; 1357 1358 if (mtd->erasesize < PAGE_SIZE || mtd->erasesize % PAGE_SIZE) { 1359 printk(KERN_ERR "%s: Erase size %u not multiple of PAGE_SIZE " 1360 "%lu\n", MTDSWAP_PREFIX, mtd->erasesize, PAGE_SIZE); 1361 return; 1362 } 1363 1364 if (PAGE_SIZE % mtd->writesize || mtd->writesize > PAGE_SIZE) { 1365 printk(KERN_ERR "%s: PAGE_SIZE %lu not multiple of write size" 1366 " %u\n", MTDSWAP_PREFIX, PAGE_SIZE, mtd->writesize); 1367 return; 1368 } 1369 1370 if (!mtd->oobsize || mtd->oobavail < MTDSWAP_OOBSIZE) { 1371 printk(KERN_ERR "%s: Not enough free bytes in OOB, " 1372 "%d available, %zu needed.\n", 1373 MTDSWAP_PREFIX, mtd->oobavail, MTDSWAP_OOBSIZE); 1374 return; 1375 } 1376 1377 if (spare_eblocks > 100) 1378 spare_eblocks = 100; 1379 1380 use_size = mtd->size; 1381 size_limit = (uint64_t) BLOCK_MAX * PAGE_SIZE; 1382 1383 if (mtd->size > size_limit) { 1384 printk(KERN_WARNING "%s: Device too large. Limiting size to " 1385 "%llu bytes\n", MTDSWAP_PREFIX, size_limit); 1386 use_size = size_limit; 1387 } 1388 1389 eblocks = mtd_div_by_eb(use_size, mtd); 1390 use_size = (uint64_t)eblocks * mtd->erasesize; 1391 bad_blocks = mtdswap_badblocks(mtd, use_size); 1392 eavailable = eblocks - bad_blocks; 1393 1394 if (eavailable < MIN_ERASE_BLOCKS) { 1395 printk(KERN_ERR "%s: Not enough erase blocks. %u available, " 1396 "%d needed\n", MTDSWAP_PREFIX, eavailable, 1397 MIN_ERASE_BLOCKS); 1398 return; 1399 } 1400 1401 spare_cnt = div_u64((uint64_t)eavailable * spare_eblocks, 100); 1402 1403 if (spare_cnt < MIN_SPARE_EBLOCKS) 1404 spare_cnt = MIN_SPARE_EBLOCKS; 1405 1406 if (spare_cnt > eavailable - 1) 1407 spare_cnt = eavailable - 1; 1408 1409 swap_size = (uint64_t)(eavailable - spare_cnt) * mtd->erasesize + 1410 (header ? PAGE_SIZE : 0); 1411 1412 printk(KERN_INFO "%s: Enabling MTD swap on device %lu, size %llu KB, " 1413 "%u spare, %u bad blocks\n", 1414 MTDSWAP_PREFIX, part, swap_size / 1024, spare_cnt, bad_blocks); 1415 1416 d = kzalloc(sizeof(struct mtdswap_dev), GFP_KERNEL); 1417 if (!d) 1418 return; 1419 1420 mbd_dev = kzalloc(sizeof(struct mtd_blktrans_dev), GFP_KERNEL); 1421 if (!mbd_dev) { 1422 kfree(d); 1423 return; 1424 } 1425 1426 d->mbd_dev = mbd_dev; 1427 mbd_dev->priv = d; 1428 1429 mbd_dev->mtd = mtd; 1430 mbd_dev->devnum = mtd->index; 1431 mbd_dev->size = swap_size >> PAGE_SHIFT; 1432 mbd_dev->tr = tr; 1433 1434 if (!(mtd->flags & MTD_WRITEABLE)) 1435 mbd_dev->readonly = 1; 1436 1437 if (mtdswap_init(d, eblocks, spare_cnt) < 0) 1438 goto init_failed; 1439 1440 if (add_mtd_blktrans_dev(mbd_dev) < 0) 1441 goto cleanup; 1442 1443 d->dev = disk_to_dev(mbd_dev->disk); 1444 1445 ret = mtdswap_add_debugfs(d); 1446 if (ret < 0) 1447 goto debugfs_failed; 1448 1449 return; 1450 1451 debugfs_failed: 1452 del_mtd_blktrans_dev(mbd_dev); 1453 1454 cleanup: 1455 mtdswap_cleanup(d); 1456 1457 init_failed: 1458 kfree(mbd_dev); 1459 kfree(d); 1460 } 1461 1462 static void mtdswap_remove_dev(struct mtd_blktrans_dev *dev) 1463 { 1464 struct mtdswap_dev *d = MTDSWAP_MBD_TO_MTDSWAP(dev); 1465 1466 del_mtd_blktrans_dev(dev); 1467 mtdswap_cleanup(d); 1468 kfree(d); 1469 } 1470 1471 static struct mtd_blktrans_ops mtdswap_ops = { 1472 .name = "mtdswap", 1473 .major = 0, 1474 .part_bits = 0, 1475 .blksize = PAGE_SIZE, 1476 .flush = mtdswap_flush, 1477 .readsect = mtdswap_readsect, 1478 .writesect = mtdswap_writesect, 1479 .discard = mtdswap_discard, 1480 .background = mtdswap_background, 1481 .add_mtd = mtdswap_add_mtd, 1482 .remove_dev = mtdswap_remove_dev, 1483 .owner = THIS_MODULE, 1484 }; 1485 1486 module_mtd_blktrans(mtdswap_ops); 1487 1488 MODULE_LICENSE("GPL"); 1489 MODULE_AUTHOR("Jarkko Lavinen <jarkko.lavinen@nokia.com>"); 1490 MODULE_DESCRIPTION("Block device access to an MTD suitable for using as " 1491 "swap space"); 1492