1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/genhd.h> 26 #include <linux/highmem.h> 27 #include <linux/slab.h> 28 #include <linux/backing-dev.h> 29 #include <linux/string.h> 30 #include <linux/vmalloc.h> 31 #include <linux/err.h> 32 #include <linux/idr.h> 33 #include <linux/sysfs.h> 34 #include <linux/debugfs.h> 35 #include <linux/cpuhotplug.h> 36 #include <linux/part_stat.h> 37 38 #include "zram_drv.h" 39 40 static DEFINE_IDR(zram_index_idr); 41 /* idr index must be protected */ 42 static DEFINE_MUTEX(zram_index_mutex); 43 44 static int zram_major; 45 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 46 47 /* Module params (documentation at end) */ 48 static unsigned int num_devices = 1; 49 /* 50 * Pages that compress to sizes equals or greater than this are stored 51 * uncompressed in memory. 52 */ 53 static size_t huge_class_size; 54 55 static const struct block_device_operations zram_devops; 56 static const struct block_device_operations zram_wb_devops; 57 58 static void zram_free_page(struct zram *zram, size_t index); 59 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 60 u32 index, int offset, struct bio *bio); 61 62 63 static int zram_slot_trylock(struct zram *zram, u32 index) 64 { 65 return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags); 66 } 67 68 static void zram_slot_lock(struct zram *zram, u32 index) 69 { 70 bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags); 71 } 72 73 static void zram_slot_unlock(struct zram *zram, u32 index) 74 { 75 bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); 76 } 77 78 static inline bool init_done(struct zram *zram) 79 { 80 return zram->disksize; 81 } 82 83 static inline struct zram *dev_to_zram(struct device *dev) 84 { 85 return (struct zram *)dev_to_disk(dev)->private_data; 86 } 87 88 static unsigned long zram_get_handle(struct zram *zram, u32 index) 89 { 90 return zram->table[index].handle; 91 } 92 93 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 94 { 95 zram->table[index].handle = handle; 96 } 97 98 /* flag operations require table entry bit_spin_lock() being held */ 99 static bool zram_test_flag(struct zram *zram, u32 index, 100 enum zram_pageflags flag) 101 { 102 return zram->table[index].flags & BIT(flag); 103 } 104 105 static void zram_set_flag(struct zram *zram, u32 index, 106 enum zram_pageflags flag) 107 { 108 zram->table[index].flags |= BIT(flag); 109 } 110 111 static void zram_clear_flag(struct zram *zram, u32 index, 112 enum zram_pageflags flag) 113 { 114 zram->table[index].flags &= ~BIT(flag); 115 } 116 117 static inline void zram_set_element(struct zram *zram, u32 index, 118 unsigned long element) 119 { 120 zram->table[index].element = element; 121 } 122 123 static unsigned long zram_get_element(struct zram *zram, u32 index) 124 { 125 return zram->table[index].element; 126 } 127 128 static size_t zram_get_obj_size(struct zram *zram, u32 index) 129 { 130 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 131 } 132 133 static void zram_set_obj_size(struct zram *zram, 134 u32 index, size_t size) 135 { 136 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; 137 138 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; 139 } 140 141 static inline bool zram_allocated(struct zram *zram, u32 index) 142 { 143 return zram_get_obj_size(zram, index) || 144 zram_test_flag(zram, index, ZRAM_SAME) || 145 zram_test_flag(zram, index, ZRAM_WB); 146 } 147 148 #if PAGE_SIZE != 4096 149 static inline bool is_partial_io(struct bio_vec *bvec) 150 { 151 return bvec->bv_len != PAGE_SIZE; 152 } 153 #else 154 static inline bool is_partial_io(struct bio_vec *bvec) 155 { 156 return false; 157 } 158 #endif 159 160 /* 161 * Check if request is within bounds and aligned on zram logical blocks. 162 */ 163 static inline bool valid_io_request(struct zram *zram, 164 sector_t start, unsigned int size) 165 { 166 u64 end, bound; 167 168 /* unaligned request */ 169 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 170 return false; 171 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 172 return false; 173 174 end = start + (size >> SECTOR_SHIFT); 175 bound = zram->disksize >> SECTOR_SHIFT; 176 /* out of range range */ 177 if (unlikely(start >= bound || end > bound || start > end)) 178 return false; 179 180 /* I/O request is valid */ 181 return true; 182 } 183 184 static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 185 { 186 *index += (*offset + bvec->bv_len) / PAGE_SIZE; 187 *offset = (*offset + bvec->bv_len) % PAGE_SIZE; 188 } 189 190 static inline void update_used_max(struct zram *zram, 191 const unsigned long pages) 192 { 193 unsigned long old_max, cur_max; 194 195 old_max = atomic_long_read(&zram->stats.max_used_pages); 196 197 do { 198 cur_max = old_max; 199 if (pages > cur_max) 200 old_max = atomic_long_cmpxchg( 201 &zram->stats.max_used_pages, cur_max, pages); 202 } while (old_max != cur_max); 203 } 204 205 static inline void zram_fill_page(void *ptr, unsigned long len, 206 unsigned long value) 207 { 208 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 209 memset_l(ptr, value, len / sizeof(unsigned long)); 210 } 211 212 static bool page_same_filled(void *ptr, unsigned long *element) 213 { 214 unsigned long *page; 215 unsigned long val; 216 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 217 218 page = (unsigned long *)ptr; 219 val = page[0]; 220 221 if (val != page[last_pos]) 222 return false; 223 224 for (pos = 1; pos < last_pos; pos++) { 225 if (val != page[pos]) 226 return false; 227 } 228 229 *element = val; 230 231 return true; 232 } 233 234 static ssize_t initstate_show(struct device *dev, 235 struct device_attribute *attr, char *buf) 236 { 237 u32 val; 238 struct zram *zram = dev_to_zram(dev); 239 240 down_read(&zram->init_lock); 241 val = init_done(zram); 242 up_read(&zram->init_lock); 243 244 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 245 } 246 247 static ssize_t disksize_show(struct device *dev, 248 struct device_attribute *attr, char *buf) 249 { 250 struct zram *zram = dev_to_zram(dev); 251 252 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 253 } 254 255 static ssize_t mem_limit_store(struct device *dev, 256 struct device_attribute *attr, const char *buf, size_t len) 257 { 258 u64 limit; 259 char *tmp; 260 struct zram *zram = dev_to_zram(dev); 261 262 limit = memparse(buf, &tmp); 263 if (buf == tmp) /* no chars parsed, invalid input */ 264 return -EINVAL; 265 266 down_write(&zram->init_lock); 267 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 268 up_write(&zram->init_lock); 269 270 return len; 271 } 272 273 static ssize_t mem_used_max_store(struct device *dev, 274 struct device_attribute *attr, const char *buf, size_t len) 275 { 276 int err; 277 unsigned long val; 278 struct zram *zram = dev_to_zram(dev); 279 280 err = kstrtoul(buf, 10, &val); 281 if (err || val != 0) 282 return -EINVAL; 283 284 down_read(&zram->init_lock); 285 if (init_done(zram)) { 286 atomic_long_set(&zram->stats.max_used_pages, 287 zs_get_total_pages(zram->mem_pool)); 288 } 289 up_read(&zram->init_lock); 290 291 return len; 292 } 293 294 /* 295 * Mark all pages which are older than or equal to cutoff as IDLE. 296 * Callers should hold the zram init lock in read mode 297 */ 298 static void mark_idle(struct zram *zram, ktime_t cutoff) 299 { 300 int is_idle = 1; 301 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 302 int index; 303 304 for (index = 0; index < nr_pages; index++) { 305 /* 306 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race. 307 * See the comment in writeback_store. 308 */ 309 zram_slot_lock(zram, index); 310 if (zram_allocated(zram, index) && 311 !zram_test_flag(zram, index, ZRAM_UNDER_WB)) { 312 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 313 is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time); 314 #endif 315 if (is_idle) 316 zram_set_flag(zram, index, ZRAM_IDLE); 317 } 318 zram_slot_unlock(zram, index); 319 } 320 } 321 322 static ssize_t idle_store(struct device *dev, 323 struct device_attribute *attr, const char *buf, size_t len) 324 { 325 struct zram *zram = dev_to_zram(dev); 326 ktime_t cutoff_time = 0; 327 ssize_t rv = -EINVAL; 328 329 if (!sysfs_streq(buf, "all")) { 330 /* 331 * If it did not parse as 'all' try to treat it as an integer when 332 * we have memory tracking enabled. 333 */ 334 u64 age_sec; 335 336 if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec)) 337 cutoff_time = ktime_sub(ktime_get_boottime(), 338 ns_to_ktime(age_sec * NSEC_PER_SEC)); 339 else 340 goto out; 341 } 342 343 down_read(&zram->init_lock); 344 if (!init_done(zram)) 345 goto out_unlock; 346 347 /* A cutoff_time of 0 marks everything as idle, this is the "all" behavior */ 348 mark_idle(zram, cutoff_time); 349 rv = len; 350 351 out_unlock: 352 up_read(&zram->init_lock); 353 out: 354 return rv; 355 } 356 357 #ifdef CONFIG_ZRAM_WRITEBACK 358 static ssize_t writeback_limit_enable_store(struct device *dev, 359 struct device_attribute *attr, const char *buf, size_t len) 360 { 361 struct zram *zram = dev_to_zram(dev); 362 u64 val; 363 ssize_t ret = -EINVAL; 364 365 if (kstrtoull(buf, 10, &val)) 366 return ret; 367 368 down_read(&zram->init_lock); 369 spin_lock(&zram->wb_limit_lock); 370 zram->wb_limit_enable = val; 371 spin_unlock(&zram->wb_limit_lock); 372 up_read(&zram->init_lock); 373 ret = len; 374 375 return ret; 376 } 377 378 static ssize_t writeback_limit_enable_show(struct device *dev, 379 struct device_attribute *attr, char *buf) 380 { 381 bool val; 382 struct zram *zram = dev_to_zram(dev); 383 384 down_read(&zram->init_lock); 385 spin_lock(&zram->wb_limit_lock); 386 val = zram->wb_limit_enable; 387 spin_unlock(&zram->wb_limit_lock); 388 up_read(&zram->init_lock); 389 390 return scnprintf(buf, PAGE_SIZE, "%d\n", val); 391 } 392 393 static ssize_t writeback_limit_store(struct device *dev, 394 struct device_attribute *attr, const char *buf, size_t len) 395 { 396 struct zram *zram = dev_to_zram(dev); 397 u64 val; 398 ssize_t ret = -EINVAL; 399 400 if (kstrtoull(buf, 10, &val)) 401 return ret; 402 403 down_read(&zram->init_lock); 404 spin_lock(&zram->wb_limit_lock); 405 zram->bd_wb_limit = val; 406 spin_unlock(&zram->wb_limit_lock); 407 up_read(&zram->init_lock); 408 ret = len; 409 410 return ret; 411 } 412 413 static ssize_t writeback_limit_show(struct device *dev, 414 struct device_attribute *attr, char *buf) 415 { 416 u64 val; 417 struct zram *zram = dev_to_zram(dev); 418 419 down_read(&zram->init_lock); 420 spin_lock(&zram->wb_limit_lock); 421 val = zram->bd_wb_limit; 422 spin_unlock(&zram->wb_limit_lock); 423 up_read(&zram->init_lock); 424 425 return scnprintf(buf, PAGE_SIZE, "%llu\n", val); 426 } 427 428 static void reset_bdev(struct zram *zram) 429 { 430 struct block_device *bdev; 431 432 if (!zram->backing_dev) 433 return; 434 435 bdev = zram->bdev; 436 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 437 /* hope filp_close flush all of IO */ 438 filp_close(zram->backing_dev, NULL); 439 zram->backing_dev = NULL; 440 zram->bdev = NULL; 441 zram->disk->fops = &zram_devops; 442 kvfree(zram->bitmap); 443 zram->bitmap = NULL; 444 } 445 446 static ssize_t backing_dev_show(struct device *dev, 447 struct device_attribute *attr, char *buf) 448 { 449 struct file *file; 450 struct zram *zram = dev_to_zram(dev); 451 char *p; 452 ssize_t ret; 453 454 down_read(&zram->init_lock); 455 file = zram->backing_dev; 456 if (!file) { 457 memcpy(buf, "none\n", 5); 458 up_read(&zram->init_lock); 459 return 5; 460 } 461 462 p = file_path(file, buf, PAGE_SIZE - 1); 463 if (IS_ERR(p)) { 464 ret = PTR_ERR(p); 465 goto out; 466 } 467 468 ret = strlen(p); 469 memmove(buf, p, ret); 470 buf[ret++] = '\n'; 471 out: 472 up_read(&zram->init_lock); 473 return ret; 474 } 475 476 static ssize_t backing_dev_store(struct device *dev, 477 struct device_attribute *attr, const char *buf, size_t len) 478 { 479 char *file_name; 480 size_t sz; 481 struct file *backing_dev = NULL; 482 struct inode *inode; 483 struct address_space *mapping; 484 unsigned int bitmap_sz; 485 unsigned long nr_pages, *bitmap = NULL; 486 struct block_device *bdev = NULL; 487 int err; 488 struct zram *zram = dev_to_zram(dev); 489 490 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 491 if (!file_name) 492 return -ENOMEM; 493 494 down_write(&zram->init_lock); 495 if (init_done(zram)) { 496 pr_info("Can't setup backing device for initialized device\n"); 497 err = -EBUSY; 498 goto out; 499 } 500 501 strlcpy(file_name, buf, PATH_MAX); 502 /* ignore trailing newline */ 503 sz = strlen(file_name); 504 if (sz > 0 && file_name[sz - 1] == '\n') 505 file_name[sz - 1] = 0x00; 506 507 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); 508 if (IS_ERR(backing_dev)) { 509 err = PTR_ERR(backing_dev); 510 backing_dev = NULL; 511 goto out; 512 } 513 514 mapping = backing_dev->f_mapping; 515 inode = mapping->host; 516 517 /* Support only block device in this moment */ 518 if (!S_ISBLK(inode->i_mode)) { 519 err = -ENOTBLK; 520 goto out; 521 } 522 523 bdev = blkdev_get_by_dev(inode->i_rdev, 524 FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); 525 if (IS_ERR(bdev)) { 526 err = PTR_ERR(bdev); 527 bdev = NULL; 528 goto out; 529 } 530 531 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 532 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 533 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 534 if (!bitmap) { 535 err = -ENOMEM; 536 goto out; 537 } 538 539 reset_bdev(zram); 540 541 zram->bdev = bdev; 542 zram->backing_dev = backing_dev; 543 zram->bitmap = bitmap; 544 zram->nr_pages = nr_pages; 545 /* 546 * With writeback feature, zram does asynchronous IO so it's no longer 547 * synchronous device so let's remove synchronous io flag. Othewise, 548 * upper layer(e.g., swap) could wait IO completion rather than 549 * (submit and return), which will cause system sluggish. 550 * Furthermore, when the IO function returns(e.g., swap_readpage), 551 * upper layer expects IO was done so it could deallocate the page 552 * freely but in fact, IO is going on so finally could cause 553 * use-after-free when the IO is really done. 554 */ 555 zram->disk->fops = &zram_wb_devops; 556 up_write(&zram->init_lock); 557 558 pr_info("setup backing device %s\n", file_name); 559 kfree(file_name); 560 561 return len; 562 out: 563 kvfree(bitmap); 564 565 if (bdev) 566 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 567 568 if (backing_dev) 569 filp_close(backing_dev, NULL); 570 571 up_write(&zram->init_lock); 572 573 kfree(file_name); 574 575 return err; 576 } 577 578 static unsigned long alloc_block_bdev(struct zram *zram) 579 { 580 unsigned long blk_idx = 1; 581 retry: 582 /* skip 0 bit to confuse zram.handle = 0 */ 583 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); 584 if (blk_idx == zram->nr_pages) 585 return 0; 586 587 if (test_and_set_bit(blk_idx, zram->bitmap)) 588 goto retry; 589 590 atomic64_inc(&zram->stats.bd_count); 591 return blk_idx; 592 } 593 594 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) 595 { 596 int was_set; 597 598 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 599 WARN_ON_ONCE(!was_set); 600 atomic64_dec(&zram->stats.bd_count); 601 } 602 603 static void zram_page_end_io(struct bio *bio) 604 { 605 struct page *page = bio_first_page_all(bio); 606 607 page_endio(page, op_is_write(bio_op(bio)), 608 blk_status_to_errno(bio->bi_status)); 609 bio_put(bio); 610 } 611 612 /* 613 * Returns 1 if the submission is successful. 614 */ 615 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, 616 unsigned long entry, struct bio *parent) 617 { 618 struct bio *bio; 619 620 bio = bio_alloc(GFP_NOIO, 1); 621 if (!bio) 622 return -ENOMEM; 623 624 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 625 bio_set_dev(bio, zram->bdev); 626 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { 627 bio_put(bio); 628 return -EIO; 629 } 630 631 if (!parent) { 632 bio->bi_opf = REQ_OP_READ; 633 bio->bi_end_io = zram_page_end_io; 634 } else { 635 bio->bi_opf = parent->bi_opf; 636 bio_chain(bio, parent); 637 } 638 639 submit_bio(bio); 640 return 1; 641 } 642 643 #define PAGE_WB_SIG "page_index=" 644 645 #define PAGE_WRITEBACK 0 646 #define HUGE_WRITEBACK 1 647 #define IDLE_WRITEBACK 2 648 649 650 static ssize_t writeback_store(struct device *dev, 651 struct device_attribute *attr, const char *buf, size_t len) 652 { 653 struct zram *zram = dev_to_zram(dev); 654 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 655 unsigned long index = 0; 656 struct bio bio; 657 struct bio_vec bio_vec; 658 struct page *page; 659 ssize_t ret = len; 660 int mode, err; 661 unsigned long blk_idx = 0; 662 663 if (sysfs_streq(buf, "idle")) 664 mode = IDLE_WRITEBACK; 665 else if (sysfs_streq(buf, "huge")) 666 mode = HUGE_WRITEBACK; 667 else { 668 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) 669 return -EINVAL; 670 671 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || 672 index >= nr_pages) 673 return -EINVAL; 674 675 nr_pages = 1; 676 mode = PAGE_WRITEBACK; 677 } 678 679 down_read(&zram->init_lock); 680 if (!init_done(zram)) { 681 ret = -EINVAL; 682 goto release_init_lock; 683 } 684 685 if (!zram->backing_dev) { 686 ret = -ENODEV; 687 goto release_init_lock; 688 } 689 690 page = alloc_page(GFP_KERNEL); 691 if (!page) { 692 ret = -ENOMEM; 693 goto release_init_lock; 694 } 695 696 for (; nr_pages != 0; index++, nr_pages--) { 697 struct bio_vec bvec; 698 699 bvec.bv_page = page; 700 bvec.bv_len = PAGE_SIZE; 701 bvec.bv_offset = 0; 702 703 spin_lock(&zram->wb_limit_lock); 704 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 705 spin_unlock(&zram->wb_limit_lock); 706 ret = -EIO; 707 break; 708 } 709 spin_unlock(&zram->wb_limit_lock); 710 711 if (!blk_idx) { 712 blk_idx = alloc_block_bdev(zram); 713 if (!blk_idx) { 714 ret = -ENOSPC; 715 break; 716 } 717 } 718 719 zram_slot_lock(zram, index); 720 if (!zram_allocated(zram, index)) 721 goto next; 722 723 if (zram_test_flag(zram, index, ZRAM_WB) || 724 zram_test_flag(zram, index, ZRAM_SAME) || 725 zram_test_flag(zram, index, ZRAM_UNDER_WB)) 726 goto next; 727 728 if (mode == IDLE_WRITEBACK && 729 !zram_test_flag(zram, index, ZRAM_IDLE)) 730 goto next; 731 if (mode == HUGE_WRITEBACK && 732 !zram_test_flag(zram, index, ZRAM_HUGE)) 733 goto next; 734 /* 735 * Clearing ZRAM_UNDER_WB is duty of caller. 736 * IOW, zram_free_page never clear it. 737 */ 738 zram_set_flag(zram, index, ZRAM_UNDER_WB); 739 /* Need for hugepage writeback racing */ 740 zram_set_flag(zram, index, ZRAM_IDLE); 741 zram_slot_unlock(zram, index); 742 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { 743 zram_slot_lock(zram, index); 744 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 745 zram_clear_flag(zram, index, ZRAM_IDLE); 746 zram_slot_unlock(zram, index); 747 continue; 748 } 749 750 bio_init(&bio, &bio_vec, 1); 751 bio_set_dev(&bio, zram->bdev); 752 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 753 bio.bi_opf = REQ_OP_WRITE | REQ_SYNC; 754 755 bio_add_page(&bio, bvec.bv_page, bvec.bv_len, 756 bvec.bv_offset); 757 /* 758 * XXX: A single page IO would be inefficient for write 759 * but it would be not bad as starter. 760 */ 761 err = submit_bio_wait(&bio); 762 if (err) { 763 zram_slot_lock(zram, index); 764 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 765 zram_clear_flag(zram, index, ZRAM_IDLE); 766 zram_slot_unlock(zram, index); 767 /* 768 * Return last IO error unless every IO were 769 * not suceeded. 770 */ 771 ret = err; 772 continue; 773 } 774 775 atomic64_inc(&zram->stats.bd_writes); 776 /* 777 * We released zram_slot_lock so need to check if the slot was 778 * changed. If there is freeing for the slot, we can catch it 779 * easily by zram_allocated. 780 * A subtle case is the slot is freed/reallocated/marked as 781 * ZRAM_IDLE again. To close the race, idle_store doesn't 782 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB. 783 * Thus, we could close the race by checking ZRAM_IDLE bit. 784 */ 785 zram_slot_lock(zram, index); 786 if (!zram_allocated(zram, index) || 787 !zram_test_flag(zram, index, ZRAM_IDLE)) { 788 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 789 zram_clear_flag(zram, index, ZRAM_IDLE); 790 goto next; 791 } 792 793 zram_free_page(zram, index); 794 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 795 zram_set_flag(zram, index, ZRAM_WB); 796 zram_set_element(zram, index, blk_idx); 797 blk_idx = 0; 798 atomic64_inc(&zram->stats.pages_stored); 799 spin_lock(&zram->wb_limit_lock); 800 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 801 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 802 spin_unlock(&zram->wb_limit_lock); 803 next: 804 zram_slot_unlock(zram, index); 805 } 806 807 if (blk_idx) 808 free_block_bdev(zram, blk_idx); 809 __free_page(page); 810 release_init_lock: 811 up_read(&zram->init_lock); 812 813 return ret; 814 } 815 816 struct zram_work { 817 struct work_struct work; 818 struct zram *zram; 819 unsigned long entry; 820 struct bio *bio; 821 struct bio_vec bvec; 822 }; 823 824 #if PAGE_SIZE != 4096 825 static void zram_sync_read(struct work_struct *work) 826 { 827 struct zram_work *zw = container_of(work, struct zram_work, work); 828 struct zram *zram = zw->zram; 829 unsigned long entry = zw->entry; 830 struct bio *bio = zw->bio; 831 832 read_from_bdev_async(zram, &zw->bvec, entry, bio); 833 } 834 835 /* 836 * Block layer want one ->submit_bio to be active at a time, so if we use 837 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 838 * use a worker thread context. 839 */ 840 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 841 unsigned long entry, struct bio *bio) 842 { 843 struct zram_work work; 844 845 work.bvec = *bvec; 846 work.zram = zram; 847 work.entry = entry; 848 work.bio = bio; 849 850 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 851 queue_work(system_unbound_wq, &work.work); 852 flush_work(&work.work); 853 destroy_work_on_stack(&work.work); 854 855 return 1; 856 } 857 #else 858 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 859 unsigned long entry, struct bio *bio) 860 { 861 WARN_ON(1); 862 return -EIO; 863 } 864 #endif 865 866 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 867 unsigned long entry, struct bio *parent, bool sync) 868 { 869 atomic64_inc(&zram->stats.bd_reads); 870 if (sync) 871 return read_from_bdev_sync(zram, bvec, entry, parent); 872 else 873 return read_from_bdev_async(zram, bvec, entry, parent); 874 } 875 #else 876 static inline void reset_bdev(struct zram *zram) {}; 877 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 878 unsigned long entry, struct bio *parent, bool sync) 879 { 880 return -EIO; 881 } 882 883 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; 884 #endif 885 886 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 887 888 static struct dentry *zram_debugfs_root; 889 890 static void zram_debugfs_create(void) 891 { 892 zram_debugfs_root = debugfs_create_dir("zram", NULL); 893 } 894 895 static void zram_debugfs_destroy(void) 896 { 897 debugfs_remove_recursive(zram_debugfs_root); 898 } 899 900 static void zram_accessed(struct zram *zram, u32 index) 901 { 902 zram_clear_flag(zram, index, ZRAM_IDLE); 903 zram->table[index].ac_time = ktime_get_boottime(); 904 } 905 906 static ssize_t read_block_state(struct file *file, char __user *buf, 907 size_t count, loff_t *ppos) 908 { 909 char *kbuf; 910 ssize_t index, written = 0; 911 struct zram *zram = file->private_data; 912 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 913 struct timespec64 ts; 914 915 kbuf = kvmalloc(count, GFP_KERNEL); 916 if (!kbuf) 917 return -ENOMEM; 918 919 down_read(&zram->init_lock); 920 if (!init_done(zram)) { 921 up_read(&zram->init_lock); 922 kvfree(kbuf); 923 return -EINVAL; 924 } 925 926 for (index = *ppos; index < nr_pages; index++) { 927 int copied; 928 929 zram_slot_lock(zram, index); 930 if (!zram_allocated(zram, index)) 931 goto next; 932 933 ts = ktime_to_timespec64(zram->table[index].ac_time); 934 copied = snprintf(kbuf + written, count, 935 "%12zd %12lld.%06lu %c%c%c%c\n", 936 index, (s64)ts.tv_sec, 937 ts.tv_nsec / NSEC_PER_USEC, 938 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', 939 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', 940 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 941 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); 942 943 if (count <= copied) { 944 zram_slot_unlock(zram, index); 945 break; 946 } 947 written += copied; 948 count -= copied; 949 next: 950 zram_slot_unlock(zram, index); 951 *ppos += 1; 952 } 953 954 up_read(&zram->init_lock); 955 if (copy_to_user(buf, kbuf, written)) 956 written = -EFAULT; 957 kvfree(kbuf); 958 959 return written; 960 } 961 962 static const struct file_operations proc_zram_block_state_op = { 963 .open = simple_open, 964 .read = read_block_state, 965 .llseek = default_llseek, 966 }; 967 968 static void zram_debugfs_register(struct zram *zram) 969 { 970 if (!zram_debugfs_root) 971 return; 972 973 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 974 zram_debugfs_root); 975 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 976 zram, &proc_zram_block_state_op); 977 } 978 979 static void zram_debugfs_unregister(struct zram *zram) 980 { 981 debugfs_remove_recursive(zram->debugfs_dir); 982 } 983 #else 984 static void zram_debugfs_create(void) {}; 985 static void zram_debugfs_destroy(void) {}; 986 static void zram_accessed(struct zram *zram, u32 index) 987 { 988 zram_clear_flag(zram, index, ZRAM_IDLE); 989 }; 990 static void zram_debugfs_register(struct zram *zram) {}; 991 static void zram_debugfs_unregister(struct zram *zram) {}; 992 #endif 993 994 /* 995 * We switched to per-cpu streams and this attr is not needed anymore. 996 * However, we will keep it around for some time, because: 997 * a) we may revert per-cpu streams in the future 998 * b) it's visible to user space and we need to follow our 2 years 999 * retirement rule; but we already have a number of 'soon to be 1000 * altered' attrs, so max_comp_streams need to wait for the next 1001 * layoff cycle. 1002 */ 1003 static ssize_t max_comp_streams_show(struct device *dev, 1004 struct device_attribute *attr, char *buf) 1005 { 1006 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); 1007 } 1008 1009 static ssize_t max_comp_streams_store(struct device *dev, 1010 struct device_attribute *attr, const char *buf, size_t len) 1011 { 1012 return len; 1013 } 1014 1015 static ssize_t comp_algorithm_show(struct device *dev, 1016 struct device_attribute *attr, char *buf) 1017 { 1018 size_t sz; 1019 struct zram *zram = dev_to_zram(dev); 1020 1021 down_read(&zram->init_lock); 1022 sz = zcomp_available_show(zram->compressor, buf); 1023 up_read(&zram->init_lock); 1024 1025 return sz; 1026 } 1027 1028 static ssize_t comp_algorithm_store(struct device *dev, 1029 struct device_attribute *attr, const char *buf, size_t len) 1030 { 1031 struct zram *zram = dev_to_zram(dev); 1032 char compressor[ARRAY_SIZE(zram->compressor)]; 1033 size_t sz; 1034 1035 strlcpy(compressor, buf, sizeof(compressor)); 1036 /* ignore trailing newline */ 1037 sz = strlen(compressor); 1038 if (sz > 0 && compressor[sz - 1] == '\n') 1039 compressor[sz - 1] = 0x00; 1040 1041 if (!zcomp_available_algorithm(compressor)) 1042 return -EINVAL; 1043 1044 down_write(&zram->init_lock); 1045 if (init_done(zram)) { 1046 up_write(&zram->init_lock); 1047 pr_info("Can't change algorithm for initialized device\n"); 1048 return -EBUSY; 1049 } 1050 1051 strcpy(zram->compressor, compressor); 1052 up_write(&zram->init_lock); 1053 return len; 1054 } 1055 1056 static ssize_t compact_store(struct device *dev, 1057 struct device_attribute *attr, const char *buf, size_t len) 1058 { 1059 struct zram *zram = dev_to_zram(dev); 1060 1061 down_read(&zram->init_lock); 1062 if (!init_done(zram)) { 1063 up_read(&zram->init_lock); 1064 return -EINVAL; 1065 } 1066 1067 zs_compact(zram->mem_pool); 1068 up_read(&zram->init_lock); 1069 1070 return len; 1071 } 1072 1073 static ssize_t io_stat_show(struct device *dev, 1074 struct device_attribute *attr, char *buf) 1075 { 1076 struct zram *zram = dev_to_zram(dev); 1077 ssize_t ret; 1078 1079 down_read(&zram->init_lock); 1080 ret = scnprintf(buf, PAGE_SIZE, 1081 "%8llu %8llu %8llu %8llu\n", 1082 (u64)atomic64_read(&zram->stats.failed_reads), 1083 (u64)atomic64_read(&zram->stats.failed_writes), 1084 (u64)atomic64_read(&zram->stats.invalid_io), 1085 (u64)atomic64_read(&zram->stats.notify_free)); 1086 up_read(&zram->init_lock); 1087 1088 return ret; 1089 } 1090 1091 static ssize_t mm_stat_show(struct device *dev, 1092 struct device_attribute *attr, char *buf) 1093 { 1094 struct zram *zram = dev_to_zram(dev); 1095 struct zs_pool_stats pool_stats; 1096 u64 orig_size, mem_used = 0; 1097 long max_used; 1098 ssize_t ret; 1099 1100 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1101 1102 down_read(&zram->init_lock); 1103 if (init_done(zram)) { 1104 mem_used = zs_get_total_pages(zram->mem_pool); 1105 zs_pool_stats(zram->mem_pool, &pool_stats); 1106 } 1107 1108 orig_size = atomic64_read(&zram->stats.pages_stored); 1109 max_used = atomic_long_read(&zram->stats.max_used_pages); 1110 1111 ret = scnprintf(buf, PAGE_SIZE, 1112 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1113 orig_size << PAGE_SHIFT, 1114 (u64)atomic64_read(&zram->stats.compr_data_size), 1115 mem_used << PAGE_SHIFT, 1116 zram->limit_pages << PAGE_SHIFT, 1117 max_used << PAGE_SHIFT, 1118 (u64)atomic64_read(&zram->stats.same_pages), 1119 atomic_long_read(&pool_stats.pages_compacted), 1120 (u64)atomic64_read(&zram->stats.huge_pages), 1121 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1122 up_read(&zram->init_lock); 1123 1124 return ret; 1125 } 1126 1127 #ifdef CONFIG_ZRAM_WRITEBACK 1128 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 1129 static ssize_t bd_stat_show(struct device *dev, 1130 struct device_attribute *attr, char *buf) 1131 { 1132 struct zram *zram = dev_to_zram(dev); 1133 ssize_t ret; 1134 1135 down_read(&zram->init_lock); 1136 ret = scnprintf(buf, PAGE_SIZE, 1137 "%8llu %8llu %8llu\n", 1138 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 1139 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 1140 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 1141 up_read(&zram->init_lock); 1142 1143 return ret; 1144 } 1145 #endif 1146 1147 static ssize_t debug_stat_show(struct device *dev, 1148 struct device_attribute *attr, char *buf) 1149 { 1150 int version = 1; 1151 struct zram *zram = dev_to_zram(dev); 1152 ssize_t ret; 1153 1154 down_read(&zram->init_lock); 1155 ret = scnprintf(buf, PAGE_SIZE, 1156 "version: %d\n%8llu %8llu\n", 1157 version, 1158 (u64)atomic64_read(&zram->stats.writestall), 1159 (u64)atomic64_read(&zram->stats.miss_free)); 1160 up_read(&zram->init_lock); 1161 1162 return ret; 1163 } 1164 1165 static DEVICE_ATTR_RO(io_stat); 1166 static DEVICE_ATTR_RO(mm_stat); 1167 #ifdef CONFIG_ZRAM_WRITEBACK 1168 static DEVICE_ATTR_RO(bd_stat); 1169 #endif 1170 static DEVICE_ATTR_RO(debug_stat); 1171 1172 static void zram_meta_free(struct zram *zram, u64 disksize) 1173 { 1174 size_t num_pages = disksize >> PAGE_SHIFT; 1175 size_t index; 1176 1177 /* Free all pages that are still in this zram device */ 1178 for (index = 0; index < num_pages; index++) 1179 zram_free_page(zram, index); 1180 1181 zs_destroy_pool(zram->mem_pool); 1182 vfree(zram->table); 1183 } 1184 1185 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1186 { 1187 size_t num_pages; 1188 1189 num_pages = disksize >> PAGE_SHIFT; 1190 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1191 if (!zram->table) 1192 return false; 1193 1194 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1195 if (!zram->mem_pool) { 1196 vfree(zram->table); 1197 return false; 1198 } 1199 1200 if (!huge_class_size) 1201 huge_class_size = zs_huge_class_size(zram->mem_pool); 1202 return true; 1203 } 1204 1205 /* 1206 * To protect concurrent access to the same index entry, 1207 * caller should hold this table index entry's bit_spinlock to 1208 * indicate this index entry is accessing. 1209 */ 1210 static void zram_free_page(struct zram *zram, size_t index) 1211 { 1212 unsigned long handle; 1213 1214 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 1215 zram->table[index].ac_time = 0; 1216 #endif 1217 if (zram_test_flag(zram, index, ZRAM_IDLE)) 1218 zram_clear_flag(zram, index, ZRAM_IDLE); 1219 1220 if (zram_test_flag(zram, index, ZRAM_HUGE)) { 1221 zram_clear_flag(zram, index, ZRAM_HUGE); 1222 atomic64_dec(&zram->stats.huge_pages); 1223 } 1224 1225 if (zram_test_flag(zram, index, ZRAM_WB)) { 1226 zram_clear_flag(zram, index, ZRAM_WB); 1227 free_block_bdev(zram, zram_get_element(zram, index)); 1228 goto out; 1229 } 1230 1231 /* 1232 * No memory is allocated for same element filled pages. 1233 * Simply clear same page flag. 1234 */ 1235 if (zram_test_flag(zram, index, ZRAM_SAME)) { 1236 zram_clear_flag(zram, index, ZRAM_SAME); 1237 atomic64_dec(&zram->stats.same_pages); 1238 goto out; 1239 } 1240 1241 handle = zram_get_handle(zram, index); 1242 if (!handle) 1243 return; 1244 1245 zs_free(zram->mem_pool, handle); 1246 1247 atomic64_sub(zram_get_obj_size(zram, index), 1248 &zram->stats.compr_data_size); 1249 out: 1250 atomic64_dec(&zram->stats.pages_stored); 1251 zram_set_handle(zram, index, 0); 1252 zram_set_obj_size(zram, index, 0); 1253 WARN_ON_ONCE(zram->table[index].flags & 1254 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); 1255 } 1256 1257 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, 1258 struct bio *bio, bool partial_io) 1259 { 1260 struct zcomp_strm *zstrm; 1261 unsigned long handle; 1262 unsigned int size; 1263 void *src, *dst; 1264 int ret; 1265 1266 zram_slot_lock(zram, index); 1267 if (zram_test_flag(zram, index, ZRAM_WB)) { 1268 struct bio_vec bvec; 1269 1270 zram_slot_unlock(zram, index); 1271 1272 bvec.bv_page = page; 1273 bvec.bv_len = PAGE_SIZE; 1274 bvec.bv_offset = 0; 1275 return read_from_bdev(zram, &bvec, 1276 zram_get_element(zram, index), 1277 bio, partial_io); 1278 } 1279 1280 handle = zram_get_handle(zram, index); 1281 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { 1282 unsigned long value; 1283 void *mem; 1284 1285 value = handle ? zram_get_element(zram, index) : 0; 1286 mem = kmap_atomic(page); 1287 zram_fill_page(mem, PAGE_SIZE, value); 1288 kunmap_atomic(mem); 1289 zram_slot_unlock(zram, index); 1290 return 0; 1291 } 1292 1293 size = zram_get_obj_size(zram, index); 1294 1295 if (size != PAGE_SIZE) 1296 zstrm = zcomp_stream_get(zram->comp); 1297 1298 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 1299 if (size == PAGE_SIZE) { 1300 dst = kmap_atomic(page); 1301 memcpy(dst, src, PAGE_SIZE); 1302 kunmap_atomic(dst); 1303 ret = 0; 1304 } else { 1305 dst = kmap_atomic(page); 1306 ret = zcomp_decompress(zstrm, src, size, dst); 1307 kunmap_atomic(dst); 1308 zcomp_stream_put(zram->comp); 1309 } 1310 zs_unmap_object(zram->mem_pool, handle); 1311 zram_slot_unlock(zram, index); 1312 1313 /* Should NEVER happen. Return bio error if it does. */ 1314 if (WARN_ON(ret)) 1315 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 1316 1317 return ret; 1318 } 1319 1320 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 1321 u32 index, int offset, struct bio *bio) 1322 { 1323 int ret; 1324 struct page *page; 1325 1326 page = bvec->bv_page; 1327 if (is_partial_io(bvec)) { 1328 /* Use a temporary buffer to decompress the page */ 1329 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1330 if (!page) 1331 return -ENOMEM; 1332 } 1333 1334 ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); 1335 if (unlikely(ret)) 1336 goto out; 1337 1338 if (is_partial_io(bvec)) { 1339 void *dst = kmap_atomic(bvec->bv_page); 1340 void *src = kmap_atomic(page); 1341 1342 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len); 1343 kunmap_atomic(src); 1344 kunmap_atomic(dst); 1345 } 1346 out: 1347 if (is_partial_io(bvec)) 1348 __free_page(page); 1349 1350 return ret; 1351 } 1352 1353 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1354 u32 index, struct bio *bio) 1355 { 1356 int ret = 0; 1357 unsigned long alloced_pages; 1358 unsigned long handle = 0; 1359 unsigned int comp_len = 0; 1360 void *src, *dst, *mem; 1361 struct zcomp_strm *zstrm; 1362 struct page *page = bvec->bv_page; 1363 unsigned long element = 0; 1364 enum zram_pageflags flags = 0; 1365 1366 mem = kmap_atomic(page); 1367 if (page_same_filled(mem, &element)) { 1368 kunmap_atomic(mem); 1369 /* Free memory associated with this sector now. */ 1370 flags = ZRAM_SAME; 1371 atomic64_inc(&zram->stats.same_pages); 1372 goto out; 1373 } 1374 kunmap_atomic(mem); 1375 1376 compress_again: 1377 zstrm = zcomp_stream_get(zram->comp); 1378 src = kmap_atomic(page); 1379 ret = zcomp_compress(zstrm, src, &comp_len); 1380 kunmap_atomic(src); 1381 1382 if (unlikely(ret)) { 1383 zcomp_stream_put(zram->comp); 1384 pr_err("Compression failed! err=%d\n", ret); 1385 zs_free(zram->mem_pool, handle); 1386 return ret; 1387 } 1388 1389 if (comp_len >= huge_class_size) 1390 comp_len = PAGE_SIZE; 1391 /* 1392 * handle allocation has 2 paths: 1393 * a) fast path is executed with preemption disabled (for 1394 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, 1395 * since we can't sleep; 1396 * b) slow path enables preemption and attempts to allocate 1397 * the page with __GFP_DIRECT_RECLAIM bit set. we have to 1398 * put per-cpu compression stream and, thus, to re-do 1399 * the compression once handle is allocated. 1400 * 1401 * if we have a 'non-null' handle here then we are coming 1402 * from the slow path and handle has already been allocated. 1403 */ 1404 if (!handle) 1405 handle = zs_malloc(zram->mem_pool, comp_len, 1406 __GFP_KSWAPD_RECLAIM | 1407 __GFP_NOWARN | 1408 __GFP_HIGHMEM | 1409 __GFP_MOVABLE); 1410 if (!handle) { 1411 zcomp_stream_put(zram->comp); 1412 atomic64_inc(&zram->stats.writestall); 1413 handle = zs_malloc(zram->mem_pool, comp_len, 1414 GFP_NOIO | __GFP_HIGHMEM | 1415 __GFP_MOVABLE); 1416 if (handle) 1417 goto compress_again; 1418 return -ENOMEM; 1419 } 1420 1421 alloced_pages = zs_get_total_pages(zram->mem_pool); 1422 update_used_max(zram, alloced_pages); 1423 1424 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 1425 zcomp_stream_put(zram->comp); 1426 zs_free(zram->mem_pool, handle); 1427 return -ENOMEM; 1428 } 1429 1430 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1431 1432 src = zstrm->buffer; 1433 if (comp_len == PAGE_SIZE) 1434 src = kmap_atomic(page); 1435 memcpy(dst, src, comp_len); 1436 if (comp_len == PAGE_SIZE) 1437 kunmap_atomic(src); 1438 1439 zcomp_stream_put(zram->comp); 1440 zs_unmap_object(zram->mem_pool, handle); 1441 atomic64_add(comp_len, &zram->stats.compr_data_size); 1442 out: 1443 /* 1444 * Free memory associated with this sector 1445 * before overwriting unused sectors. 1446 */ 1447 zram_slot_lock(zram, index); 1448 zram_free_page(zram, index); 1449 1450 if (comp_len == PAGE_SIZE) { 1451 zram_set_flag(zram, index, ZRAM_HUGE); 1452 atomic64_inc(&zram->stats.huge_pages); 1453 atomic64_inc(&zram->stats.huge_pages_since); 1454 } 1455 1456 if (flags) { 1457 zram_set_flag(zram, index, flags); 1458 zram_set_element(zram, index, element); 1459 } else { 1460 zram_set_handle(zram, index, handle); 1461 zram_set_obj_size(zram, index, comp_len); 1462 } 1463 zram_slot_unlock(zram, index); 1464 1465 /* Update stats */ 1466 atomic64_inc(&zram->stats.pages_stored); 1467 return ret; 1468 } 1469 1470 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1471 u32 index, int offset, struct bio *bio) 1472 { 1473 int ret; 1474 struct page *page = NULL; 1475 void *src; 1476 struct bio_vec vec; 1477 1478 vec = *bvec; 1479 if (is_partial_io(bvec)) { 1480 void *dst; 1481 /* 1482 * This is a partial IO. We need to read the full page 1483 * before to write the changes. 1484 */ 1485 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1486 if (!page) 1487 return -ENOMEM; 1488 1489 ret = __zram_bvec_read(zram, page, index, bio, true); 1490 if (ret) 1491 goto out; 1492 1493 src = kmap_atomic(bvec->bv_page); 1494 dst = kmap_atomic(page); 1495 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len); 1496 kunmap_atomic(dst); 1497 kunmap_atomic(src); 1498 1499 vec.bv_page = page; 1500 vec.bv_len = PAGE_SIZE; 1501 vec.bv_offset = 0; 1502 } 1503 1504 ret = __zram_bvec_write(zram, &vec, index, bio); 1505 out: 1506 if (is_partial_io(bvec)) 1507 __free_page(page); 1508 return ret; 1509 } 1510 1511 /* 1512 * zram_bio_discard - handler on discard request 1513 * @index: physical block index in PAGE_SIZE units 1514 * @offset: byte offset within physical block 1515 */ 1516 static void zram_bio_discard(struct zram *zram, u32 index, 1517 int offset, struct bio *bio) 1518 { 1519 size_t n = bio->bi_iter.bi_size; 1520 1521 /* 1522 * zram manages data in physical block size units. Because logical block 1523 * size isn't identical with physical block size on some arch, we 1524 * could get a discard request pointing to a specific offset within a 1525 * certain physical block. Although we can handle this request by 1526 * reading that physiclal block and decompressing and partially zeroing 1527 * and re-compressing and then re-storing it, this isn't reasonable 1528 * because our intent with a discard request is to save memory. So 1529 * skipping this logical block is appropriate here. 1530 */ 1531 if (offset) { 1532 if (n <= (PAGE_SIZE - offset)) 1533 return; 1534 1535 n -= (PAGE_SIZE - offset); 1536 index++; 1537 } 1538 1539 while (n >= PAGE_SIZE) { 1540 zram_slot_lock(zram, index); 1541 zram_free_page(zram, index); 1542 zram_slot_unlock(zram, index); 1543 atomic64_inc(&zram->stats.notify_free); 1544 index++; 1545 n -= PAGE_SIZE; 1546 } 1547 } 1548 1549 /* 1550 * Returns errno if it has some problem. Otherwise return 0 or 1. 1551 * Returns 0 if IO request was done synchronously 1552 * Returns 1 if IO request was successfully submitted. 1553 */ 1554 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 1555 int offset, unsigned int op, struct bio *bio) 1556 { 1557 int ret; 1558 1559 if (!op_is_write(op)) { 1560 atomic64_inc(&zram->stats.num_reads); 1561 ret = zram_bvec_read(zram, bvec, index, offset, bio); 1562 flush_dcache_page(bvec->bv_page); 1563 } else { 1564 atomic64_inc(&zram->stats.num_writes); 1565 ret = zram_bvec_write(zram, bvec, index, offset, bio); 1566 } 1567 1568 zram_slot_lock(zram, index); 1569 zram_accessed(zram, index); 1570 zram_slot_unlock(zram, index); 1571 1572 if (unlikely(ret < 0)) { 1573 if (!op_is_write(op)) 1574 atomic64_inc(&zram->stats.failed_reads); 1575 else 1576 atomic64_inc(&zram->stats.failed_writes); 1577 } 1578 1579 return ret; 1580 } 1581 1582 static void __zram_make_request(struct zram *zram, struct bio *bio) 1583 { 1584 int offset; 1585 u32 index; 1586 struct bio_vec bvec; 1587 struct bvec_iter iter; 1588 unsigned long start_time; 1589 1590 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1591 offset = (bio->bi_iter.bi_sector & 1592 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1593 1594 switch (bio_op(bio)) { 1595 case REQ_OP_DISCARD: 1596 case REQ_OP_WRITE_ZEROES: 1597 zram_bio_discard(zram, index, offset, bio); 1598 bio_endio(bio); 1599 return; 1600 default: 1601 break; 1602 } 1603 1604 start_time = bio_start_io_acct(bio); 1605 bio_for_each_segment(bvec, bio, iter) { 1606 struct bio_vec bv = bvec; 1607 unsigned int unwritten = bvec.bv_len; 1608 1609 do { 1610 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, 1611 unwritten); 1612 if (zram_bvec_rw(zram, &bv, index, offset, 1613 bio_op(bio), bio) < 0) { 1614 bio->bi_status = BLK_STS_IOERR; 1615 break; 1616 } 1617 1618 bv.bv_offset += bv.bv_len; 1619 unwritten -= bv.bv_len; 1620 1621 update_position(&index, &offset, &bv); 1622 } while (unwritten); 1623 } 1624 bio_end_io_acct(bio, start_time); 1625 bio_endio(bio); 1626 } 1627 1628 /* 1629 * Handler function for all zram I/O requests. 1630 */ 1631 static void zram_submit_bio(struct bio *bio) 1632 { 1633 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 1634 1635 if (!valid_io_request(zram, bio->bi_iter.bi_sector, 1636 bio->bi_iter.bi_size)) { 1637 atomic64_inc(&zram->stats.invalid_io); 1638 bio_io_error(bio); 1639 return; 1640 } 1641 1642 __zram_make_request(zram, bio); 1643 } 1644 1645 static void zram_slot_free_notify(struct block_device *bdev, 1646 unsigned long index) 1647 { 1648 struct zram *zram; 1649 1650 zram = bdev->bd_disk->private_data; 1651 1652 atomic64_inc(&zram->stats.notify_free); 1653 if (!zram_slot_trylock(zram, index)) { 1654 atomic64_inc(&zram->stats.miss_free); 1655 return; 1656 } 1657 1658 zram_free_page(zram, index); 1659 zram_slot_unlock(zram, index); 1660 } 1661 1662 static int zram_rw_page(struct block_device *bdev, sector_t sector, 1663 struct page *page, unsigned int op) 1664 { 1665 int offset, ret; 1666 u32 index; 1667 struct zram *zram; 1668 struct bio_vec bv; 1669 unsigned long start_time; 1670 1671 if (PageTransHuge(page)) 1672 return -ENOTSUPP; 1673 zram = bdev->bd_disk->private_data; 1674 1675 if (!valid_io_request(zram, sector, PAGE_SIZE)) { 1676 atomic64_inc(&zram->stats.invalid_io); 1677 ret = -EINVAL; 1678 goto out; 1679 } 1680 1681 index = sector >> SECTORS_PER_PAGE_SHIFT; 1682 offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1683 1684 bv.bv_page = page; 1685 bv.bv_len = PAGE_SIZE; 1686 bv.bv_offset = 0; 1687 1688 start_time = disk_start_io_acct(bdev->bd_disk, SECTORS_PER_PAGE, op); 1689 ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); 1690 disk_end_io_acct(bdev->bd_disk, op, start_time); 1691 out: 1692 /* 1693 * If I/O fails, just return error(ie, non-zero) without 1694 * calling page_endio. 1695 * It causes resubmit the I/O with bio request by upper functions 1696 * of rw_page(e.g., swap_readpage, __swap_writepage) and 1697 * bio->bi_end_io does things to handle the error 1698 * (e.g., SetPageError, set_page_dirty and extra works). 1699 */ 1700 if (unlikely(ret < 0)) 1701 return ret; 1702 1703 switch (ret) { 1704 case 0: 1705 page_endio(page, op_is_write(op), 0); 1706 break; 1707 case 1: 1708 ret = 0; 1709 break; 1710 default: 1711 WARN_ON(1); 1712 } 1713 return ret; 1714 } 1715 1716 static void zram_reset_device(struct zram *zram) 1717 { 1718 struct zcomp *comp; 1719 u64 disksize; 1720 1721 down_write(&zram->init_lock); 1722 1723 zram->limit_pages = 0; 1724 1725 if (!init_done(zram)) { 1726 up_write(&zram->init_lock); 1727 return; 1728 } 1729 1730 comp = zram->comp; 1731 disksize = zram->disksize; 1732 zram->disksize = 0; 1733 1734 set_capacity_and_notify(zram->disk, 0); 1735 part_stat_set_all(zram->disk->part0, 0); 1736 1737 /* I/O operation under all of CPU are done so let's free */ 1738 zram_meta_free(zram, disksize); 1739 memset(&zram->stats, 0, sizeof(zram->stats)); 1740 zcomp_destroy(comp); 1741 reset_bdev(zram); 1742 1743 up_write(&zram->init_lock); 1744 } 1745 1746 static ssize_t disksize_store(struct device *dev, 1747 struct device_attribute *attr, const char *buf, size_t len) 1748 { 1749 u64 disksize; 1750 struct zcomp *comp; 1751 struct zram *zram = dev_to_zram(dev); 1752 int err; 1753 1754 disksize = memparse(buf, NULL); 1755 if (!disksize) 1756 return -EINVAL; 1757 1758 down_write(&zram->init_lock); 1759 if (init_done(zram)) { 1760 pr_info("Cannot change disksize for initialized device\n"); 1761 err = -EBUSY; 1762 goto out_unlock; 1763 } 1764 1765 disksize = PAGE_ALIGN(disksize); 1766 if (!zram_meta_alloc(zram, disksize)) { 1767 err = -ENOMEM; 1768 goto out_unlock; 1769 } 1770 1771 comp = zcomp_create(zram->compressor); 1772 if (IS_ERR(comp)) { 1773 pr_err("Cannot initialise %s compressing backend\n", 1774 zram->compressor); 1775 err = PTR_ERR(comp); 1776 goto out_free_meta; 1777 } 1778 1779 zram->comp = comp; 1780 zram->disksize = disksize; 1781 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 1782 up_write(&zram->init_lock); 1783 1784 return len; 1785 1786 out_free_meta: 1787 zram_meta_free(zram, disksize); 1788 out_unlock: 1789 up_write(&zram->init_lock); 1790 return err; 1791 } 1792 1793 static ssize_t reset_store(struct device *dev, 1794 struct device_attribute *attr, const char *buf, size_t len) 1795 { 1796 int ret; 1797 unsigned short do_reset; 1798 struct zram *zram; 1799 struct block_device *bdev; 1800 1801 ret = kstrtou16(buf, 10, &do_reset); 1802 if (ret) 1803 return ret; 1804 1805 if (!do_reset) 1806 return -EINVAL; 1807 1808 zram = dev_to_zram(dev); 1809 bdev = zram->disk->part0; 1810 1811 mutex_lock(&bdev->bd_disk->open_mutex); 1812 /* Do not reset an active device or claimed device */ 1813 if (bdev->bd_openers || zram->claim) { 1814 mutex_unlock(&bdev->bd_disk->open_mutex); 1815 return -EBUSY; 1816 } 1817 1818 /* From now on, anyone can't open /dev/zram[0-9] */ 1819 zram->claim = true; 1820 mutex_unlock(&bdev->bd_disk->open_mutex); 1821 1822 /* Make sure all the pending I/O are finished */ 1823 sync_blockdev(bdev); 1824 zram_reset_device(zram); 1825 1826 mutex_lock(&bdev->bd_disk->open_mutex); 1827 zram->claim = false; 1828 mutex_unlock(&bdev->bd_disk->open_mutex); 1829 1830 return len; 1831 } 1832 1833 static int zram_open(struct block_device *bdev, fmode_t mode) 1834 { 1835 int ret = 0; 1836 struct zram *zram; 1837 1838 WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex)); 1839 1840 zram = bdev->bd_disk->private_data; 1841 /* zram was claimed to reset so open request fails */ 1842 if (zram->claim) 1843 ret = -EBUSY; 1844 1845 return ret; 1846 } 1847 1848 static const struct block_device_operations zram_devops = { 1849 .open = zram_open, 1850 .submit_bio = zram_submit_bio, 1851 .swap_slot_free_notify = zram_slot_free_notify, 1852 .rw_page = zram_rw_page, 1853 .owner = THIS_MODULE 1854 }; 1855 1856 #ifdef CONFIG_ZRAM_WRITEBACK 1857 static const struct block_device_operations zram_wb_devops = { 1858 .open = zram_open, 1859 .submit_bio = zram_submit_bio, 1860 .swap_slot_free_notify = zram_slot_free_notify, 1861 .owner = THIS_MODULE 1862 }; 1863 #endif 1864 1865 static DEVICE_ATTR_WO(compact); 1866 static DEVICE_ATTR_RW(disksize); 1867 static DEVICE_ATTR_RO(initstate); 1868 static DEVICE_ATTR_WO(reset); 1869 static DEVICE_ATTR_WO(mem_limit); 1870 static DEVICE_ATTR_WO(mem_used_max); 1871 static DEVICE_ATTR_WO(idle); 1872 static DEVICE_ATTR_RW(max_comp_streams); 1873 static DEVICE_ATTR_RW(comp_algorithm); 1874 #ifdef CONFIG_ZRAM_WRITEBACK 1875 static DEVICE_ATTR_RW(backing_dev); 1876 static DEVICE_ATTR_WO(writeback); 1877 static DEVICE_ATTR_RW(writeback_limit); 1878 static DEVICE_ATTR_RW(writeback_limit_enable); 1879 #endif 1880 1881 static struct attribute *zram_disk_attrs[] = { 1882 &dev_attr_disksize.attr, 1883 &dev_attr_initstate.attr, 1884 &dev_attr_reset.attr, 1885 &dev_attr_compact.attr, 1886 &dev_attr_mem_limit.attr, 1887 &dev_attr_mem_used_max.attr, 1888 &dev_attr_idle.attr, 1889 &dev_attr_max_comp_streams.attr, 1890 &dev_attr_comp_algorithm.attr, 1891 #ifdef CONFIG_ZRAM_WRITEBACK 1892 &dev_attr_backing_dev.attr, 1893 &dev_attr_writeback.attr, 1894 &dev_attr_writeback_limit.attr, 1895 &dev_attr_writeback_limit_enable.attr, 1896 #endif 1897 &dev_attr_io_stat.attr, 1898 &dev_attr_mm_stat.attr, 1899 #ifdef CONFIG_ZRAM_WRITEBACK 1900 &dev_attr_bd_stat.attr, 1901 #endif 1902 &dev_attr_debug_stat.attr, 1903 NULL, 1904 }; 1905 1906 static const struct attribute_group zram_disk_attr_group = { 1907 .attrs = zram_disk_attrs, 1908 }; 1909 1910 static const struct attribute_group *zram_disk_attr_groups[] = { 1911 &zram_disk_attr_group, 1912 NULL, 1913 }; 1914 1915 /* 1916 * Allocate and initialize new zram device. the function returns 1917 * '>= 0' device_id upon success, and negative value otherwise. 1918 */ 1919 static int zram_add(void) 1920 { 1921 struct zram *zram; 1922 int ret, device_id; 1923 1924 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 1925 if (!zram) 1926 return -ENOMEM; 1927 1928 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 1929 if (ret < 0) 1930 goto out_free_dev; 1931 device_id = ret; 1932 1933 init_rwsem(&zram->init_lock); 1934 #ifdef CONFIG_ZRAM_WRITEBACK 1935 spin_lock_init(&zram->wb_limit_lock); 1936 #endif 1937 1938 /* gendisk structure */ 1939 zram->disk = blk_alloc_disk(NUMA_NO_NODE); 1940 if (!zram->disk) { 1941 pr_err("Error allocating disk structure for device %d\n", 1942 device_id); 1943 ret = -ENOMEM; 1944 goto out_free_idr; 1945 } 1946 1947 zram->disk->major = zram_major; 1948 zram->disk->first_minor = device_id; 1949 zram->disk->minors = 1; 1950 zram->disk->fops = &zram_devops; 1951 zram->disk->private_data = zram; 1952 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 1953 1954 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1955 set_capacity(zram->disk, 0); 1956 /* zram devices sort of resembles non-rotational disks */ 1957 blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); 1958 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1959 1960 /* 1961 * To ensure that we always get PAGE_SIZE aligned 1962 * and n*PAGE_SIZED sized I/O requests. 1963 */ 1964 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 1965 blk_queue_logical_block_size(zram->disk->queue, 1966 ZRAM_LOGICAL_BLOCK_SIZE); 1967 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 1968 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1969 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1970 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 1971 blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue); 1972 1973 /* 1974 * zram_bio_discard() will clear all logical blocks if logical block 1975 * size is identical with physical block size(PAGE_SIZE). But if it is 1976 * different, we will skip discarding some parts of logical blocks in 1977 * the part of the request range which isn't aligned to physical block 1978 * size. So we can't ensure that all discarded logical blocks are 1979 * zeroed. 1980 */ 1981 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 1982 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); 1983 1984 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); 1985 ret = device_add_disk(NULL, zram->disk, zram_disk_attr_groups); 1986 if (ret) 1987 goto out_cleanup_disk; 1988 1989 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1990 1991 zram_debugfs_register(zram); 1992 pr_info("Added device: %s\n", zram->disk->disk_name); 1993 return device_id; 1994 1995 out_cleanup_disk: 1996 blk_cleanup_disk(zram->disk); 1997 out_free_idr: 1998 idr_remove(&zram_index_idr, device_id); 1999 out_free_dev: 2000 kfree(zram); 2001 return ret; 2002 } 2003 2004 static int zram_remove(struct zram *zram) 2005 { 2006 struct block_device *bdev = zram->disk->part0; 2007 bool claimed; 2008 2009 mutex_lock(&bdev->bd_disk->open_mutex); 2010 if (bdev->bd_openers) { 2011 mutex_unlock(&bdev->bd_disk->open_mutex); 2012 return -EBUSY; 2013 } 2014 2015 claimed = zram->claim; 2016 if (!claimed) 2017 zram->claim = true; 2018 mutex_unlock(&bdev->bd_disk->open_mutex); 2019 2020 zram_debugfs_unregister(zram); 2021 2022 if (claimed) { 2023 /* 2024 * If we were claimed by reset_store(), del_gendisk() will 2025 * wait until reset_store() is done, so nothing need to do. 2026 */ 2027 ; 2028 } else { 2029 /* Make sure all the pending I/O are finished */ 2030 sync_blockdev(bdev); 2031 zram_reset_device(zram); 2032 } 2033 2034 pr_info("Removed device: %s\n", zram->disk->disk_name); 2035 2036 del_gendisk(zram->disk); 2037 2038 /* del_gendisk drains pending reset_store */ 2039 WARN_ON_ONCE(claimed && zram->claim); 2040 2041 /* 2042 * disksize_store() may be called in between zram_reset_device() 2043 * and del_gendisk(), so run the last reset to avoid leaking 2044 * anything allocated with disksize_store() 2045 */ 2046 zram_reset_device(zram); 2047 2048 blk_cleanup_disk(zram->disk); 2049 kfree(zram); 2050 return 0; 2051 } 2052 2053 /* zram-control sysfs attributes */ 2054 2055 /* 2056 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 2057 * sense that reading from this file does alter the state of your system -- it 2058 * creates a new un-initialized zram device and returns back this device's 2059 * device_id (or an error code if it fails to create a new device). 2060 */ 2061 static ssize_t hot_add_show(struct class *class, 2062 struct class_attribute *attr, 2063 char *buf) 2064 { 2065 int ret; 2066 2067 mutex_lock(&zram_index_mutex); 2068 ret = zram_add(); 2069 mutex_unlock(&zram_index_mutex); 2070 2071 if (ret < 0) 2072 return ret; 2073 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 2074 } 2075 static struct class_attribute class_attr_hot_add = 2076 __ATTR(hot_add, 0400, hot_add_show, NULL); 2077 2078 static ssize_t hot_remove_store(struct class *class, 2079 struct class_attribute *attr, 2080 const char *buf, 2081 size_t count) 2082 { 2083 struct zram *zram; 2084 int ret, dev_id; 2085 2086 /* dev_id is gendisk->first_minor, which is `int' */ 2087 ret = kstrtoint(buf, 10, &dev_id); 2088 if (ret) 2089 return ret; 2090 if (dev_id < 0) 2091 return -EINVAL; 2092 2093 mutex_lock(&zram_index_mutex); 2094 2095 zram = idr_find(&zram_index_idr, dev_id); 2096 if (zram) { 2097 ret = zram_remove(zram); 2098 if (!ret) 2099 idr_remove(&zram_index_idr, dev_id); 2100 } else { 2101 ret = -ENODEV; 2102 } 2103 2104 mutex_unlock(&zram_index_mutex); 2105 return ret ? ret : count; 2106 } 2107 static CLASS_ATTR_WO(hot_remove); 2108 2109 static struct attribute *zram_control_class_attrs[] = { 2110 &class_attr_hot_add.attr, 2111 &class_attr_hot_remove.attr, 2112 NULL, 2113 }; 2114 ATTRIBUTE_GROUPS(zram_control_class); 2115 2116 static struct class zram_control_class = { 2117 .name = "zram-control", 2118 .owner = THIS_MODULE, 2119 .class_groups = zram_control_class_groups, 2120 }; 2121 2122 static int zram_remove_cb(int id, void *ptr, void *data) 2123 { 2124 WARN_ON_ONCE(zram_remove(ptr)); 2125 return 0; 2126 } 2127 2128 static void destroy_devices(void) 2129 { 2130 class_unregister(&zram_control_class); 2131 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 2132 zram_debugfs_destroy(); 2133 idr_destroy(&zram_index_idr); 2134 unregister_blkdev(zram_major, "zram"); 2135 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2136 } 2137 2138 static int __init zram_init(void) 2139 { 2140 int ret; 2141 2142 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 2143 zcomp_cpu_up_prepare, zcomp_cpu_dead); 2144 if (ret < 0) 2145 return ret; 2146 2147 ret = class_register(&zram_control_class); 2148 if (ret) { 2149 pr_err("Unable to register zram-control class\n"); 2150 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2151 return ret; 2152 } 2153 2154 zram_debugfs_create(); 2155 zram_major = register_blkdev(0, "zram"); 2156 if (zram_major <= 0) { 2157 pr_err("Unable to get major number\n"); 2158 class_unregister(&zram_control_class); 2159 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2160 return -EBUSY; 2161 } 2162 2163 while (num_devices != 0) { 2164 mutex_lock(&zram_index_mutex); 2165 ret = zram_add(); 2166 mutex_unlock(&zram_index_mutex); 2167 if (ret < 0) 2168 goto out_error; 2169 num_devices--; 2170 } 2171 2172 return 0; 2173 2174 out_error: 2175 destroy_devices(); 2176 return ret; 2177 } 2178 2179 static void __exit zram_exit(void) 2180 { 2181 destroy_devices(); 2182 } 2183 2184 module_init(zram_init); 2185 module_exit(zram_exit); 2186 2187 module_param(num_devices, uint, 0); 2188 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 2189 2190 MODULE_LICENSE("Dual BSD/GPL"); 2191 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 2192 MODULE_DESCRIPTION("Compressed RAM Block Device"); 2193