1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/highmem.h> 26 #include <linux/slab.h> 27 #include <linux/backing-dev.h> 28 #include <linux/string.h> 29 #include <linux/vmalloc.h> 30 #include <linux/err.h> 31 #include <linux/idr.h> 32 #include <linux/sysfs.h> 33 #include <linux/debugfs.h> 34 #include <linux/cpuhotplug.h> 35 #include <linux/part_stat.h> 36 37 #include "zram_drv.h" 38 39 static DEFINE_IDR(zram_index_idr); 40 /* idr index must be protected */ 41 static DEFINE_MUTEX(zram_index_mutex); 42 43 static int zram_major; 44 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 45 46 /* Module params (documentation at end) */ 47 static unsigned int num_devices = 1; 48 /* 49 * Pages that compress to sizes equals or greater than this are stored 50 * uncompressed in memory. 51 */ 52 static size_t huge_class_size; 53 54 static const struct block_device_operations zram_devops; 55 #ifdef CONFIG_ZRAM_WRITEBACK 56 static const struct block_device_operations zram_wb_devops; 57 #endif 58 59 static void zram_free_page(struct zram *zram, size_t index); 60 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 61 u32 index, int offset, struct bio *bio); 62 63 64 static int zram_slot_trylock(struct zram *zram, u32 index) 65 { 66 return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags); 67 } 68 69 static void zram_slot_lock(struct zram *zram, u32 index) 70 { 71 bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags); 72 } 73 74 static void zram_slot_unlock(struct zram *zram, u32 index) 75 { 76 bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); 77 } 78 79 static inline bool init_done(struct zram *zram) 80 { 81 return zram->disksize; 82 } 83 84 static inline struct zram *dev_to_zram(struct device *dev) 85 { 86 return (struct zram *)dev_to_disk(dev)->private_data; 87 } 88 89 static unsigned long zram_get_handle(struct zram *zram, u32 index) 90 { 91 return zram->table[index].handle; 92 } 93 94 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 95 { 96 zram->table[index].handle = handle; 97 } 98 99 /* flag operations require table entry bit_spin_lock() being held */ 100 static bool zram_test_flag(struct zram *zram, u32 index, 101 enum zram_pageflags flag) 102 { 103 return zram->table[index].flags & BIT(flag); 104 } 105 106 static void zram_set_flag(struct zram *zram, u32 index, 107 enum zram_pageflags flag) 108 { 109 zram->table[index].flags |= BIT(flag); 110 } 111 112 static void zram_clear_flag(struct zram *zram, u32 index, 113 enum zram_pageflags flag) 114 { 115 zram->table[index].flags &= ~BIT(flag); 116 } 117 118 static inline void zram_set_element(struct zram *zram, u32 index, 119 unsigned long element) 120 { 121 zram->table[index].element = element; 122 } 123 124 static unsigned long zram_get_element(struct zram *zram, u32 index) 125 { 126 return zram->table[index].element; 127 } 128 129 static size_t zram_get_obj_size(struct zram *zram, u32 index) 130 { 131 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 132 } 133 134 static void zram_set_obj_size(struct zram *zram, 135 u32 index, size_t size) 136 { 137 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; 138 139 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; 140 } 141 142 static inline bool zram_allocated(struct zram *zram, u32 index) 143 { 144 return zram_get_obj_size(zram, index) || 145 zram_test_flag(zram, index, ZRAM_SAME) || 146 zram_test_flag(zram, index, ZRAM_WB); 147 } 148 149 #if PAGE_SIZE != 4096 150 static inline bool is_partial_io(struct bio_vec *bvec) 151 { 152 return bvec->bv_len != PAGE_SIZE; 153 } 154 #else 155 static inline bool is_partial_io(struct bio_vec *bvec) 156 { 157 return false; 158 } 159 #endif 160 161 /* 162 * Check if request is within bounds and aligned on zram logical blocks. 163 */ 164 static inline bool valid_io_request(struct zram *zram, 165 sector_t start, unsigned int size) 166 { 167 u64 end, bound; 168 169 /* unaligned request */ 170 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 171 return false; 172 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 173 return false; 174 175 end = start + (size >> SECTOR_SHIFT); 176 bound = zram->disksize >> SECTOR_SHIFT; 177 /* out of range range */ 178 if (unlikely(start >= bound || end > bound || start > end)) 179 return false; 180 181 /* I/O request is valid */ 182 return true; 183 } 184 185 static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 186 { 187 *index += (*offset + bvec->bv_len) / PAGE_SIZE; 188 *offset = (*offset + bvec->bv_len) % PAGE_SIZE; 189 } 190 191 static inline void update_used_max(struct zram *zram, 192 const unsigned long pages) 193 { 194 unsigned long old_max, cur_max; 195 196 old_max = atomic_long_read(&zram->stats.max_used_pages); 197 198 do { 199 cur_max = old_max; 200 if (pages > cur_max) 201 old_max = atomic_long_cmpxchg( 202 &zram->stats.max_used_pages, cur_max, pages); 203 } while (old_max != cur_max); 204 } 205 206 static inline void zram_fill_page(void *ptr, unsigned long len, 207 unsigned long value) 208 { 209 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 210 memset_l(ptr, value, len / sizeof(unsigned long)); 211 } 212 213 static bool page_same_filled(void *ptr, unsigned long *element) 214 { 215 unsigned long *page; 216 unsigned long val; 217 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 218 219 page = (unsigned long *)ptr; 220 val = page[0]; 221 222 if (val != page[last_pos]) 223 return false; 224 225 for (pos = 1; pos < last_pos; pos++) { 226 if (val != page[pos]) 227 return false; 228 } 229 230 *element = val; 231 232 return true; 233 } 234 235 static ssize_t initstate_show(struct device *dev, 236 struct device_attribute *attr, char *buf) 237 { 238 u32 val; 239 struct zram *zram = dev_to_zram(dev); 240 241 down_read(&zram->init_lock); 242 val = init_done(zram); 243 up_read(&zram->init_lock); 244 245 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 246 } 247 248 static ssize_t disksize_show(struct device *dev, 249 struct device_attribute *attr, char *buf) 250 { 251 struct zram *zram = dev_to_zram(dev); 252 253 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 254 } 255 256 static ssize_t mem_limit_store(struct device *dev, 257 struct device_attribute *attr, const char *buf, size_t len) 258 { 259 u64 limit; 260 char *tmp; 261 struct zram *zram = dev_to_zram(dev); 262 263 limit = memparse(buf, &tmp); 264 if (buf == tmp) /* no chars parsed, invalid input */ 265 return -EINVAL; 266 267 down_write(&zram->init_lock); 268 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 269 up_write(&zram->init_lock); 270 271 return len; 272 } 273 274 static ssize_t mem_used_max_store(struct device *dev, 275 struct device_attribute *attr, const char *buf, size_t len) 276 { 277 int err; 278 unsigned long val; 279 struct zram *zram = dev_to_zram(dev); 280 281 err = kstrtoul(buf, 10, &val); 282 if (err || val != 0) 283 return -EINVAL; 284 285 down_read(&zram->init_lock); 286 if (init_done(zram)) { 287 atomic_long_set(&zram->stats.max_used_pages, 288 zs_get_total_pages(zram->mem_pool)); 289 } 290 up_read(&zram->init_lock); 291 292 return len; 293 } 294 295 /* 296 * Mark all pages which are older than or equal to cutoff as IDLE. 297 * Callers should hold the zram init lock in read mode 298 */ 299 static void mark_idle(struct zram *zram, ktime_t cutoff) 300 { 301 int is_idle = 1; 302 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 303 int index; 304 305 for (index = 0; index < nr_pages; index++) { 306 /* 307 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race. 308 * See the comment in writeback_store. 309 */ 310 zram_slot_lock(zram, index); 311 if (zram_allocated(zram, index) && 312 !zram_test_flag(zram, index, ZRAM_UNDER_WB)) { 313 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 314 is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time); 315 #endif 316 if (is_idle) 317 zram_set_flag(zram, index, ZRAM_IDLE); 318 } 319 zram_slot_unlock(zram, index); 320 } 321 } 322 323 static ssize_t idle_store(struct device *dev, 324 struct device_attribute *attr, const char *buf, size_t len) 325 { 326 struct zram *zram = dev_to_zram(dev); 327 ktime_t cutoff_time = 0; 328 ssize_t rv = -EINVAL; 329 330 if (!sysfs_streq(buf, "all")) { 331 /* 332 * If it did not parse as 'all' try to treat it as an integer 333 * when we have memory tracking enabled. 334 */ 335 u64 age_sec; 336 337 if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec)) 338 cutoff_time = ktime_sub(ktime_get_boottime(), 339 ns_to_ktime(age_sec * NSEC_PER_SEC)); 340 else 341 goto out; 342 } 343 344 down_read(&zram->init_lock); 345 if (!init_done(zram)) 346 goto out_unlock; 347 348 /* 349 * A cutoff_time of 0 marks everything as idle, this is the 350 * "all" behavior. 351 */ 352 mark_idle(zram, cutoff_time); 353 rv = len; 354 355 out_unlock: 356 up_read(&zram->init_lock); 357 out: 358 return rv; 359 } 360 361 #ifdef CONFIG_ZRAM_WRITEBACK 362 static ssize_t writeback_limit_enable_store(struct device *dev, 363 struct device_attribute *attr, const char *buf, size_t len) 364 { 365 struct zram *zram = dev_to_zram(dev); 366 u64 val; 367 ssize_t ret = -EINVAL; 368 369 if (kstrtoull(buf, 10, &val)) 370 return ret; 371 372 down_read(&zram->init_lock); 373 spin_lock(&zram->wb_limit_lock); 374 zram->wb_limit_enable = val; 375 spin_unlock(&zram->wb_limit_lock); 376 up_read(&zram->init_lock); 377 ret = len; 378 379 return ret; 380 } 381 382 static ssize_t writeback_limit_enable_show(struct device *dev, 383 struct device_attribute *attr, char *buf) 384 { 385 bool val; 386 struct zram *zram = dev_to_zram(dev); 387 388 down_read(&zram->init_lock); 389 spin_lock(&zram->wb_limit_lock); 390 val = zram->wb_limit_enable; 391 spin_unlock(&zram->wb_limit_lock); 392 up_read(&zram->init_lock); 393 394 return scnprintf(buf, PAGE_SIZE, "%d\n", val); 395 } 396 397 static ssize_t writeback_limit_store(struct device *dev, 398 struct device_attribute *attr, const char *buf, size_t len) 399 { 400 struct zram *zram = dev_to_zram(dev); 401 u64 val; 402 ssize_t ret = -EINVAL; 403 404 if (kstrtoull(buf, 10, &val)) 405 return ret; 406 407 down_read(&zram->init_lock); 408 spin_lock(&zram->wb_limit_lock); 409 zram->bd_wb_limit = val; 410 spin_unlock(&zram->wb_limit_lock); 411 up_read(&zram->init_lock); 412 ret = len; 413 414 return ret; 415 } 416 417 static ssize_t writeback_limit_show(struct device *dev, 418 struct device_attribute *attr, char *buf) 419 { 420 u64 val; 421 struct zram *zram = dev_to_zram(dev); 422 423 down_read(&zram->init_lock); 424 spin_lock(&zram->wb_limit_lock); 425 val = zram->bd_wb_limit; 426 spin_unlock(&zram->wb_limit_lock); 427 up_read(&zram->init_lock); 428 429 return scnprintf(buf, PAGE_SIZE, "%llu\n", val); 430 } 431 432 static void reset_bdev(struct zram *zram) 433 { 434 struct block_device *bdev; 435 436 if (!zram->backing_dev) 437 return; 438 439 bdev = zram->bdev; 440 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 441 /* hope filp_close flush all of IO */ 442 filp_close(zram->backing_dev, NULL); 443 zram->backing_dev = NULL; 444 zram->bdev = NULL; 445 zram->disk->fops = &zram_devops; 446 kvfree(zram->bitmap); 447 zram->bitmap = NULL; 448 } 449 450 static ssize_t backing_dev_show(struct device *dev, 451 struct device_attribute *attr, char *buf) 452 { 453 struct file *file; 454 struct zram *zram = dev_to_zram(dev); 455 char *p; 456 ssize_t ret; 457 458 down_read(&zram->init_lock); 459 file = zram->backing_dev; 460 if (!file) { 461 memcpy(buf, "none\n", 5); 462 up_read(&zram->init_lock); 463 return 5; 464 } 465 466 p = file_path(file, buf, PAGE_SIZE - 1); 467 if (IS_ERR(p)) { 468 ret = PTR_ERR(p); 469 goto out; 470 } 471 472 ret = strlen(p); 473 memmove(buf, p, ret); 474 buf[ret++] = '\n'; 475 out: 476 up_read(&zram->init_lock); 477 return ret; 478 } 479 480 static ssize_t backing_dev_store(struct device *dev, 481 struct device_attribute *attr, const char *buf, size_t len) 482 { 483 char *file_name; 484 size_t sz; 485 struct file *backing_dev = NULL; 486 struct inode *inode; 487 struct address_space *mapping; 488 unsigned int bitmap_sz; 489 unsigned long nr_pages, *bitmap = NULL; 490 struct block_device *bdev = NULL; 491 int err; 492 struct zram *zram = dev_to_zram(dev); 493 494 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 495 if (!file_name) 496 return -ENOMEM; 497 498 down_write(&zram->init_lock); 499 if (init_done(zram)) { 500 pr_info("Can't setup backing device for initialized device\n"); 501 err = -EBUSY; 502 goto out; 503 } 504 505 strscpy(file_name, buf, PATH_MAX); 506 /* ignore trailing newline */ 507 sz = strlen(file_name); 508 if (sz > 0 && file_name[sz - 1] == '\n') 509 file_name[sz - 1] = 0x00; 510 511 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); 512 if (IS_ERR(backing_dev)) { 513 err = PTR_ERR(backing_dev); 514 backing_dev = NULL; 515 goto out; 516 } 517 518 mapping = backing_dev->f_mapping; 519 inode = mapping->host; 520 521 /* Support only block device in this moment */ 522 if (!S_ISBLK(inode->i_mode)) { 523 err = -ENOTBLK; 524 goto out; 525 } 526 527 bdev = blkdev_get_by_dev(inode->i_rdev, 528 FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); 529 if (IS_ERR(bdev)) { 530 err = PTR_ERR(bdev); 531 bdev = NULL; 532 goto out; 533 } 534 535 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 536 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 537 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 538 if (!bitmap) { 539 err = -ENOMEM; 540 goto out; 541 } 542 543 reset_bdev(zram); 544 545 zram->bdev = bdev; 546 zram->backing_dev = backing_dev; 547 zram->bitmap = bitmap; 548 zram->nr_pages = nr_pages; 549 /* 550 * With writeback feature, zram does asynchronous IO so it's no longer 551 * synchronous device so let's remove synchronous io flag. Othewise, 552 * upper layer(e.g., swap) could wait IO completion rather than 553 * (submit and return), which will cause system sluggish. 554 * Furthermore, when the IO function returns(e.g., swap_readpage), 555 * upper layer expects IO was done so it could deallocate the page 556 * freely but in fact, IO is going on so finally could cause 557 * use-after-free when the IO is really done. 558 */ 559 zram->disk->fops = &zram_wb_devops; 560 up_write(&zram->init_lock); 561 562 pr_info("setup backing device %s\n", file_name); 563 kfree(file_name); 564 565 return len; 566 out: 567 kvfree(bitmap); 568 569 if (bdev) 570 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 571 572 if (backing_dev) 573 filp_close(backing_dev, NULL); 574 575 up_write(&zram->init_lock); 576 577 kfree(file_name); 578 579 return err; 580 } 581 582 static unsigned long alloc_block_bdev(struct zram *zram) 583 { 584 unsigned long blk_idx = 1; 585 retry: 586 /* skip 0 bit to confuse zram.handle = 0 */ 587 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); 588 if (blk_idx == zram->nr_pages) 589 return 0; 590 591 if (test_and_set_bit(blk_idx, zram->bitmap)) 592 goto retry; 593 594 atomic64_inc(&zram->stats.bd_count); 595 return blk_idx; 596 } 597 598 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) 599 { 600 int was_set; 601 602 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 603 WARN_ON_ONCE(!was_set); 604 atomic64_dec(&zram->stats.bd_count); 605 } 606 607 static void zram_page_end_io(struct bio *bio) 608 { 609 struct page *page = bio_first_page_all(bio); 610 611 page_endio(page, op_is_write(bio_op(bio)), 612 blk_status_to_errno(bio->bi_status)); 613 bio_put(bio); 614 } 615 616 /* 617 * Returns 1 if the submission is successful. 618 */ 619 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, 620 unsigned long entry, struct bio *parent) 621 { 622 struct bio *bio; 623 624 bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ, 625 GFP_NOIO); 626 if (!bio) 627 return -ENOMEM; 628 629 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 630 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { 631 bio_put(bio); 632 return -EIO; 633 } 634 635 if (!parent) 636 bio->bi_end_io = zram_page_end_io; 637 else 638 bio_chain(bio, parent); 639 640 submit_bio(bio); 641 return 1; 642 } 643 644 #define PAGE_WB_SIG "page_index=" 645 646 #define PAGE_WRITEBACK 0 647 #define HUGE_WRITEBACK (1<<0) 648 #define IDLE_WRITEBACK (1<<1) 649 650 651 static ssize_t writeback_store(struct device *dev, 652 struct device_attribute *attr, const char *buf, size_t len) 653 { 654 struct zram *zram = dev_to_zram(dev); 655 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 656 unsigned long index = 0; 657 struct bio bio; 658 struct bio_vec bio_vec; 659 struct page *page; 660 ssize_t ret = len; 661 int mode, err; 662 unsigned long blk_idx = 0; 663 664 if (sysfs_streq(buf, "idle")) 665 mode = IDLE_WRITEBACK; 666 else if (sysfs_streq(buf, "huge")) 667 mode = HUGE_WRITEBACK; 668 else if (sysfs_streq(buf, "huge_idle")) 669 mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 670 else { 671 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) 672 return -EINVAL; 673 674 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || 675 index >= nr_pages) 676 return -EINVAL; 677 678 nr_pages = 1; 679 mode = PAGE_WRITEBACK; 680 } 681 682 down_read(&zram->init_lock); 683 if (!init_done(zram)) { 684 ret = -EINVAL; 685 goto release_init_lock; 686 } 687 688 if (!zram->backing_dev) { 689 ret = -ENODEV; 690 goto release_init_lock; 691 } 692 693 page = alloc_page(GFP_KERNEL); 694 if (!page) { 695 ret = -ENOMEM; 696 goto release_init_lock; 697 } 698 699 for (; nr_pages != 0; index++, nr_pages--) { 700 struct bio_vec bvec; 701 702 bvec.bv_page = page; 703 bvec.bv_len = PAGE_SIZE; 704 bvec.bv_offset = 0; 705 706 spin_lock(&zram->wb_limit_lock); 707 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 708 spin_unlock(&zram->wb_limit_lock); 709 ret = -EIO; 710 break; 711 } 712 spin_unlock(&zram->wb_limit_lock); 713 714 if (!blk_idx) { 715 blk_idx = alloc_block_bdev(zram); 716 if (!blk_idx) { 717 ret = -ENOSPC; 718 break; 719 } 720 } 721 722 zram_slot_lock(zram, index); 723 if (!zram_allocated(zram, index)) 724 goto next; 725 726 if (zram_test_flag(zram, index, ZRAM_WB) || 727 zram_test_flag(zram, index, ZRAM_SAME) || 728 zram_test_flag(zram, index, ZRAM_UNDER_WB)) 729 goto next; 730 731 if (mode & IDLE_WRITEBACK && 732 !zram_test_flag(zram, index, ZRAM_IDLE)) 733 goto next; 734 if (mode & HUGE_WRITEBACK && 735 !zram_test_flag(zram, index, ZRAM_HUGE)) 736 goto next; 737 /* 738 * Clearing ZRAM_UNDER_WB is duty of caller. 739 * IOW, zram_free_page never clear it. 740 */ 741 zram_set_flag(zram, index, ZRAM_UNDER_WB); 742 /* Need for hugepage writeback racing */ 743 zram_set_flag(zram, index, ZRAM_IDLE); 744 zram_slot_unlock(zram, index); 745 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { 746 zram_slot_lock(zram, index); 747 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 748 zram_clear_flag(zram, index, ZRAM_IDLE); 749 zram_slot_unlock(zram, index); 750 continue; 751 } 752 753 bio_init(&bio, zram->bdev, &bio_vec, 1, 754 REQ_OP_WRITE | REQ_SYNC); 755 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 756 757 bio_add_page(&bio, bvec.bv_page, bvec.bv_len, 758 bvec.bv_offset); 759 /* 760 * XXX: A single page IO would be inefficient for write 761 * but it would be not bad as starter. 762 */ 763 err = submit_bio_wait(&bio); 764 if (err) { 765 zram_slot_lock(zram, index); 766 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 767 zram_clear_flag(zram, index, ZRAM_IDLE); 768 zram_slot_unlock(zram, index); 769 /* 770 * Return last IO error unless every IO were 771 * not suceeded. 772 */ 773 ret = err; 774 continue; 775 } 776 777 atomic64_inc(&zram->stats.bd_writes); 778 /* 779 * We released zram_slot_lock so need to check if the slot was 780 * changed. If there is freeing for the slot, we can catch it 781 * easily by zram_allocated. 782 * A subtle case is the slot is freed/reallocated/marked as 783 * ZRAM_IDLE again. To close the race, idle_store doesn't 784 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB. 785 * Thus, we could close the race by checking ZRAM_IDLE bit. 786 */ 787 zram_slot_lock(zram, index); 788 if (!zram_allocated(zram, index) || 789 !zram_test_flag(zram, index, ZRAM_IDLE)) { 790 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 791 zram_clear_flag(zram, index, ZRAM_IDLE); 792 goto next; 793 } 794 795 zram_free_page(zram, index); 796 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 797 zram_set_flag(zram, index, ZRAM_WB); 798 zram_set_element(zram, index, blk_idx); 799 blk_idx = 0; 800 atomic64_inc(&zram->stats.pages_stored); 801 spin_lock(&zram->wb_limit_lock); 802 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 803 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 804 spin_unlock(&zram->wb_limit_lock); 805 next: 806 zram_slot_unlock(zram, index); 807 } 808 809 if (blk_idx) 810 free_block_bdev(zram, blk_idx); 811 __free_page(page); 812 release_init_lock: 813 up_read(&zram->init_lock); 814 815 return ret; 816 } 817 818 struct zram_work { 819 struct work_struct work; 820 struct zram *zram; 821 unsigned long entry; 822 struct bio *bio; 823 struct bio_vec bvec; 824 }; 825 826 #if PAGE_SIZE != 4096 827 static void zram_sync_read(struct work_struct *work) 828 { 829 struct zram_work *zw = container_of(work, struct zram_work, work); 830 struct zram *zram = zw->zram; 831 unsigned long entry = zw->entry; 832 struct bio *bio = zw->bio; 833 834 read_from_bdev_async(zram, &zw->bvec, entry, bio); 835 } 836 837 /* 838 * Block layer want one ->submit_bio to be active at a time, so if we use 839 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 840 * use a worker thread context. 841 */ 842 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 843 unsigned long entry, struct bio *bio) 844 { 845 struct zram_work work; 846 847 work.bvec = *bvec; 848 work.zram = zram; 849 work.entry = entry; 850 work.bio = bio; 851 852 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 853 queue_work(system_unbound_wq, &work.work); 854 flush_work(&work.work); 855 destroy_work_on_stack(&work.work); 856 857 return 1; 858 } 859 #else 860 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 861 unsigned long entry, struct bio *bio) 862 { 863 WARN_ON(1); 864 return -EIO; 865 } 866 #endif 867 868 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 869 unsigned long entry, struct bio *parent, bool sync) 870 { 871 atomic64_inc(&zram->stats.bd_reads); 872 if (sync) 873 return read_from_bdev_sync(zram, bvec, entry, parent); 874 else 875 return read_from_bdev_async(zram, bvec, entry, parent); 876 } 877 #else 878 static inline void reset_bdev(struct zram *zram) {}; 879 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 880 unsigned long entry, struct bio *parent, bool sync) 881 { 882 return -EIO; 883 } 884 885 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; 886 #endif 887 888 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 889 890 static struct dentry *zram_debugfs_root; 891 892 static void zram_debugfs_create(void) 893 { 894 zram_debugfs_root = debugfs_create_dir("zram", NULL); 895 } 896 897 static void zram_debugfs_destroy(void) 898 { 899 debugfs_remove_recursive(zram_debugfs_root); 900 } 901 902 static void zram_accessed(struct zram *zram, u32 index) 903 { 904 zram_clear_flag(zram, index, ZRAM_IDLE); 905 zram->table[index].ac_time = ktime_get_boottime(); 906 } 907 908 static ssize_t read_block_state(struct file *file, char __user *buf, 909 size_t count, loff_t *ppos) 910 { 911 char *kbuf; 912 ssize_t index, written = 0; 913 struct zram *zram = file->private_data; 914 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 915 struct timespec64 ts; 916 917 kbuf = kvmalloc(count, GFP_KERNEL); 918 if (!kbuf) 919 return -ENOMEM; 920 921 down_read(&zram->init_lock); 922 if (!init_done(zram)) { 923 up_read(&zram->init_lock); 924 kvfree(kbuf); 925 return -EINVAL; 926 } 927 928 for (index = *ppos; index < nr_pages; index++) { 929 int copied; 930 931 zram_slot_lock(zram, index); 932 if (!zram_allocated(zram, index)) 933 goto next; 934 935 ts = ktime_to_timespec64(zram->table[index].ac_time); 936 copied = snprintf(kbuf + written, count, 937 "%12zd %12lld.%06lu %c%c%c%c\n", 938 index, (s64)ts.tv_sec, 939 ts.tv_nsec / NSEC_PER_USEC, 940 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', 941 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', 942 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 943 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); 944 945 if (count <= copied) { 946 zram_slot_unlock(zram, index); 947 break; 948 } 949 written += copied; 950 count -= copied; 951 next: 952 zram_slot_unlock(zram, index); 953 *ppos += 1; 954 } 955 956 up_read(&zram->init_lock); 957 if (copy_to_user(buf, kbuf, written)) 958 written = -EFAULT; 959 kvfree(kbuf); 960 961 return written; 962 } 963 964 static const struct file_operations proc_zram_block_state_op = { 965 .open = simple_open, 966 .read = read_block_state, 967 .llseek = default_llseek, 968 }; 969 970 static void zram_debugfs_register(struct zram *zram) 971 { 972 if (!zram_debugfs_root) 973 return; 974 975 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 976 zram_debugfs_root); 977 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 978 zram, &proc_zram_block_state_op); 979 } 980 981 static void zram_debugfs_unregister(struct zram *zram) 982 { 983 debugfs_remove_recursive(zram->debugfs_dir); 984 } 985 #else 986 static void zram_debugfs_create(void) {}; 987 static void zram_debugfs_destroy(void) {}; 988 static void zram_accessed(struct zram *zram, u32 index) 989 { 990 zram_clear_flag(zram, index, ZRAM_IDLE); 991 }; 992 static void zram_debugfs_register(struct zram *zram) {}; 993 static void zram_debugfs_unregister(struct zram *zram) {}; 994 #endif 995 996 /* 997 * We switched to per-cpu streams and this attr is not needed anymore. 998 * However, we will keep it around for some time, because: 999 * a) we may revert per-cpu streams in the future 1000 * b) it's visible to user space and we need to follow our 2 years 1001 * retirement rule; but we already have a number of 'soon to be 1002 * altered' attrs, so max_comp_streams need to wait for the next 1003 * layoff cycle. 1004 */ 1005 static ssize_t max_comp_streams_show(struct device *dev, 1006 struct device_attribute *attr, char *buf) 1007 { 1008 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); 1009 } 1010 1011 static ssize_t max_comp_streams_store(struct device *dev, 1012 struct device_attribute *attr, const char *buf, size_t len) 1013 { 1014 return len; 1015 } 1016 1017 static ssize_t comp_algorithm_show(struct device *dev, 1018 struct device_attribute *attr, char *buf) 1019 { 1020 size_t sz; 1021 struct zram *zram = dev_to_zram(dev); 1022 1023 down_read(&zram->init_lock); 1024 sz = zcomp_available_show(zram->compressor, buf); 1025 up_read(&zram->init_lock); 1026 1027 return sz; 1028 } 1029 1030 static ssize_t comp_algorithm_store(struct device *dev, 1031 struct device_attribute *attr, const char *buf, size_t len) 1032 { 1033 struct zram *zram = dev_to_zram(dev); 1034 char compressor[ARRAY_SIZE(zram->compressor)]; 1035 size_t sz; 1036 1037 strscpy(compressor, buf, sizeof(compressor)); 1038 /* ignore trailing newline */ 1039 sz = strlen(compressor); 1040 if (sz > 0 && compressor[sz - 1] == '\n') 1041 compressor[sz - 1] = 0x00; 1042 1043 if (!zcomp_available_algorithm(compressor)) 1044 return -EINVAL; 1045 1046 down_write(&zram->init_lock); 1047 if (init_done(zram)) { 1048 up_write(&zram->init_lock); 1049 pr_info("Can't change algorithm for initialized device\n"); 1050 return -EBUSY; 1051 } 1052 1053 strcpy(zram->compressor, compressor); 1054 up_write(&zram->init_lock); 1055 return len; 1056 } 1057 1058 static ssize_t compact_store(struct device *dev, 1059 struct device_attribute *attr, const char *buf, size_t len) 1060 { 1061 struct zram *zram = dev_to_zram(dev); 1062 1063 down_read(&zram->init_lock); 1064 if (!init_done(zram)) { 1065 up_read(&zram->init_lock); 1066 return -EINVAL; 1067 } 1068 1069 zs_compact(zram->mem_pool); 1070 up_read(&zram->init_lock); 1071 1072 return len; 1073 } 1074 1075 static ssize_t io_stat_show(struct device *dev, 1076 struct device_attribute *attr, char *buf) 1077 { 1078 struct zram *zram = dev_to_zram(dev); 1079 ssize_t ret; 1080 1081 down_read(&zram->init_lock); 1082 ret = scnprintf(buf, PAGE_SIZE, 1083 "%8llu %8llu %8llu %8llu\n", 1084 (u64)atomic64_read(&zram->stats.failed_reads), 1085 (u64)atomic64_read(&zram->stats.failed_writes), 1086 (u64)atomic64_read(&zram->stats.invalid_io), 1087 (u64)atomic64_read(&zram->stats.notify_free)); 1088 up_read(&zram->init_lock); 1089 1090 return ret; 1091 } 1092 1093 static ssize_t mm_stat_show(struct device *dev, 1094 struct device_attribute *attr, char *buf) 1095 { 1096 struct zram *zram = dev_to_zram(dev); 1097 struct zs_pool_stats pool_stats; 1098 u64 orig_size, mem_used = 0; 1099 long max_used; 1100 ssize_t ret; 1101 1102 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1103 1104 down_read(&zram->init_lock); 1105 if (init_done(zram)) { 1106 mem_used = zs_get_total_pages(zram->mem_pool); 1107 zs_pool_stats(zram->mem_pool, &pool_stats); 1108 } 1109 1110 orig_size = atomic64_read(&zram->stats.pages_stored); 1111 max_used = atomic_long_read(&zram->stats.max_used_pages); 1112 1113 ret = scnprintf(buf, PAGE_SIZE, 1114 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1115 orig_size << PAGE_SHIFT, 1116 (u64)atomic64_read(&zram->stats.compr_data_size), 1117 mem_used << PAGE_SHIFT, 1118 zram->limit_pages << PAGE_SHIFT, 1119 max_used << PAGE_SHIFT, 1120 (u64)atomic64_read(&zram->stats.same_pages), 1121 atomic_long_read(&pool_stats.pages_compacted), 1122 (u64)atomic64_read(&zram->stats.huge_pages), 1123 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1124 up_read(&zram->init_lock); 1125 1126 return ret; 1127 } 1128 1129 #ifdef CONFIG_ZRAM_WRITEBACK 1130 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 1131 static ssize_t bd_stat_show(struct device *dev, 1132 struct device_attribute *attr, char *buf) 1133 { 1134 struct zram *zram = dev_to_zram(dev); 1135 ssize_t ret; 1136 1137 down_read(&zram->init_lock); 1138 ret = scnprintf(buf, PAGE_SIZE, 1139 "%8llu %8llu %8llu\n", 1140 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 1141 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 1142 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 1143 up_read(&zram->init_lock); 1144 1145 return ret; 1146 } 1147 #endif 1148 1149 static ssize_t debug_stat_show(struct device *dev, 1150 struct device_attribute *attr, char *buf) 1151 { 1152 int version = 1; 1153 struct zram *zram = dev_to_zram(dev); 1154 ssize_t ret; 1155 1156 down_read(&zram->init_lock); 1157 ret = scnprintf(buf, PAGE_SIZE, 1158 "version: %d\n%8llu %8llu\n", 1159 version, 1160 (u64)atomic64_read(&zram->stats.writestall), 1161 (u64)atomic64_read(&zram->stats.miss_free)); 1162 up_read(&zram->init_lock); 1163 1164 return ret; 1165 } 1166 1167 static DEVICE_ATTR_RO(io_stat); 1168 static DEVICE_ATTR_RO(mm_stat); 1169 #ifdef CONFIG_ZRAM_WRITEBACK 1170 static DEVICE_ATTR_RO(bd_stat); 1171 #endif 1172 static DEVICE_ATTR_RO(debug_stat); 1173 1174 static void zram_meta_free(struct zram *zram, u64 disksize) 1175 { 1176 size_t num_pages = disksize >> PAGE_SHIFT; 1177 size_t index; 1178 1179 /* Free all pages that are still in this zram device */ 1180 for (index = 0; index < num_pages; index++) 1181 zram_free_page(zram, index); 1182 1183 zs_destroy_pool(zram->mem_pool); 1184 vfree(zram->table); 1185 } 1186 1187 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1188 { 1189 size_t num_pages; 1190 1191 num_pages = disksize >> PAGE_SHIFT; 1192 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1193 if (!zram->table) 1194 return false; 1195 1196 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1197 if (!zram->mem_pool) { 1198 vfree(zram->table); 1199 return false; 1200 } 1201 1202 if (!huge_class_size) 1203 huge_class_size = zs_huge_class_size(zram->mem_pool); 1204 return true; 1205 } 1206 1207 /* 1208 * To protect concurrent access to the same index entry, 1209 * caller should hold this table index entry's bit_spinlock to 1210 * indicate this index entry is accessing. 1211 */ 1212 static void zram_free_page(struct zram *zram, size_t index) 1213 { 1214 unsigned long handle; 1215 1216 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 1217 zram->table[index].ac_time = 0; 1218 #endif 1219 if (zram_test_flag(zram, index, ZRAM_IDLE)) 1220 zram_clear_flag(zram, index, ZRAM_IDLE); 1221 1222 if (zram_test_flag(zram, index, ZRAM_HUGE)) { 1223 zram_clear_flag(zram, index, ZRAM_HUGE); 1224 atomic64_dec(&zram->stats.huge_pages); 1225 } 1226 1227 if (zram_test_flag(zram, index, ZRAM_WB)) { 1228 zram_clear_flag(zram, index, ZRAM_WB); 1229 free_block_bdev(zram, zram_get_element(zram, index)); 1230 goto out; 1231 } 1232 1233 /* 1234 * No memory is allocated for same element filled pages. 1235 * Simply clear same page flag. 1236 */ 1237 if (zram_test_flag(zram, index, ZRAM_SAME)) { 1238 zram_clear_flag(zram, index, ZRAM_SAME); 1239 atomic64_dec(&zram->stats.same_pages); 1240 goto out; 1241 } 1242 1243 handle = zram_get_handle(zram, index); 1244 if (!handle) 1245 return; 1246 1247 zs_free(zram->mem_pool, handle); 1248 1249 atomic64_sub(zram_get_obj_size(zram, index), 1250 &zram->stats.compr_data_size); 1251 out: 1252 atomic64_dec(&zram->stats.pages_stored); 1253 zram_set_handle(zram, index, 0); 1254 zram_set_obj_size(zram, index, 0); 1255 WARN_ON_ONCE(zram->table[index].flags & 1256 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); 1257 } 1258 1259 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, 1260 struct bio *bio, bool partial_io) 1261 { 1262 struct zcomp_strm *zstrm; 1263 unsigned long handle; 1264 unsigned int size; 1265 void *src, *dst; 1266 int ret; 1267 1268 zram_slot_lock(zram, index); 1269 if (zram_test_flag(zram, index, ZRAM_WB)) { 1270 struct bio_vec bvec; 1271 1272 zram_slot_unlock(zram, index); 1273 1274 bvec.bv_page = page; 1275 bvec.bv_len = PAGE_SIZE; 1276 bvec.bv_offset = 0; 1277 return read_from_bdev(zram, &bvec, 1278 zram_get_element(zram, index), 1279 bio, partial_io); 1280 } 1281 1282 handle = zram_get_handle(zram, index); 1283 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { 1284 unsigned long value; 1285 void *mem; 1286 1287 value = handle ? zram_get_element(zram, index) : 0; 1288 mem = kmap_atomic(page); 1289 zram_fill_page(mem, PAGE_SIZE, value); 1290 kunmap_atomic(mem); 1291 zram_slot_unlock(zram, index); 1292 return 0; 1293 } 1294 1295 size = zram_get_obj_size(zram, index); 1296 1297 if (size != PAGE_SIZE) 1298 zstrm = zcomp_stream_get(zram->comp); 1299 1300 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 1301 if (size == PAGE_SIZE) { 1302 dst = kmap_atomic(page); 1303 memcpy(dst, src, PAGE_SIZE); 1304 kunmap_atomic(dst); 1305 ret = 0; 1306 } else { 1307 dst = kmap_atomic(page); 1308 ret = zcomp_decompress(zstrm, src, size, dst); 1309 kunmap_atomic(dst); 1310 zcomp_stream_put(zram->comp); 1311 } 1312 zs_unmap_object(zram->mem_pool, handle); 1313 zram_slot_unlock(zram, index); 1314 1315 /* Should NEVER happen. Return bio error if it does. */ 1316 if (WARN_ON(ret)) 1317 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 1318 1319 return ret; 1320 } 1321 1322 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 1323 u32 index, int offset, struct bio *bio) 1324 { 1325 int ret; 1326 struct page *page; 1327 1328 page = bvec->bv_page; 1329 if (is_partial_io(bvec)) { 1330 /* Use a temporary buffer to decompress the page */ 1331 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1332 if (!page) 1333 return -ENOMEM; 1334 } 1335 1336 ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); 1337 if (unlikely(ret)) 1338 goto out; 1339 1340 if (is_partial_io(bvec)) { 1341 void *src = kmap_atomic(page); 1342 1343 memcpy_to_bvec(bvec, src + offset); 1344 kunmap_atomic(src); 1345 } 1346 out: 1347 if (is_partial_io(bvec)) 1348 __free_page(page); 1349 1350 return ret; 1351 } 1352 1353 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1354 u32 index, struct bio *bio) 1355 { 1356 int ret = 0; 1357 unsigned long alloced_pages; 1358 unsigned long handle = -ENOMEM; 1359 unsigned int comp_len = 0; 1360 void *src, *dst, *mem; 1361 struct zcomp_strm *zstrm; 1362 struct page *page = bvec->bv_page; 1363 unsigned long element = 0; 1364 enum zram_pageflags flags = 0; 1365 1366 mem = kmap_atomic(page); 1367 if (page_same_filled(mem, &element)) { 1368 kunmap_atomic(mem); 1369 /* Free memory associated with this sector now. */ 1370 flags = ZRAM_SAME; 1371 atomic64_inc(&zram->stats.same_pages); 1372 goto out; 1373 } 1374 kunmap_atomic(mem); 1375 1376 compress_again: 1377 zstrm = zcomp_stream_get(zram->comp); 1378 src = kmap_atomic(page); 1379 ret = zcomp_compress(zstrm, src, &comp_len); 1380 kunmap_atomic(src); 1381 1382 if (unlikely(ret)) { 1383 zcomp_stream_put(zram->comp); 1384 pr_err("Compression failed! err=%d\n", ret); 1385 zs_free(zram->mem_pool, handle); 1386 return ret; 1387 } 1388 1389 if (comp_len >= huge_class_size) 1390 comp_len = PAGE_SIZE; 1391 /* 1392 * handle allocation has 2 paths: 1393 * a) fast path is executed with preemption disabled (for 1394 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, 1395 * since we can't sleep; 1396 * b) slow path enables preemption and attempts to allocate 1397 * the page with __GFP_DIRECT_RECLAIM bit set. we have to 1398 * put per-cpu compression stream and, thus, to re-do 1399 * the compression once handle is allocated. 1400 * 1401 * if we have a 'non-null' handle here then we are coming 1402 * from the slow path and handle has already been allocated. 1403 */ 1404 if (IS_ERR((void *)handle)) 1405 handle = zs_malloc(zram->mem_pool, comp_len, 1406 __GFP_KSWAPD_RECLAIM | 1407 __GFP_NOWARN | 1408 __GFP_HIGHMEM | 1409 __GFP_MOVABLE); 1410 if (IS_ERR((void *)handle)) { 1411 zcomp_stream_put(zram->comp); 1412 atomic64_inc(&zram->stats.writestall); 1413 handle = zs_malloc(zram->mem_pool, comp_len, 1414 GFP_NOIO | __GFP_HIGHMEM | 1415 __GFP_MOVABLE); 1416 if (IS_ERR((void *)handle)) 1417 return PTR_ERR((void *)handle); 1418 1419 if (comp_len != PAGE_SIZE) 1420 goto compress_again; 1421 /* 1422 * If the page is not compressible, you need to acquire the 1423 * lock and execute the code below. The zcomp_stream_get() 1424 * call is needed to disable the cpu hotplug and grab the 1425 * zstrm buffer back. It is necessary that the dereferencing 1426 * of the zstrm variable below occurs correctly. 1427 */ 1428 zstrm = zcomp_stream_get(zram->comp); 1429 } 1430 1431 alloced_pages = zs_get_total_pages(zram->mem_pool); 1432 update_used_max(zram, alloced_pages); 1433 1434 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 1435 zcomp_stream_put(zram->comp); 1436 zs_free(zram->mem_pool, handle); 1437 return -ENOMEM; 1438 } 1439 1440 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1441 1442 src = zstrm->buffer; 1443 if (comp_len == PAGE_SIZE) 1444 src = kmap_atomic(page); 1445 memcpy(dst, src, comp_len); 1446 if (comp_len == PAGE_SIZE) 1447 kunmap_atomic(src); 1448 1449 zcomp_stream_put(zram->comp); 1450 zs_unmap_object(zram->mem_pool, handle); 1451 atomic64_add(comp_len, &zram->stats.compr_data_size); 1452 out: 1453 /* 1454 * Free memory associated with this sector 1455 * before overwriting unused sectors. 1456 */ 1457 zram_slot_lock(zram, index); 1458 zram_free_page(zram, index); 1459 1460 if (comp_len == PAGE_SIZE) { 1461 zram_set_flag(zram, index, ZRAM_HUGE); 1462 atomic64_inc(&zram->stats.huge_pages); 1463 atomic64_inc(&zram->stats.huge_pages_since); 1464 } 1465 1466 if (flags) { 1467 zram_set_flag(zram, index, flags); 1468 zram_set_element(zram, index, element); 1469 } else { 1470 zram_set_handle(zram, index, handle); 1471 zram_set_obj_size(zram, index, comp_len); 1472 } 1473 zram_slot_unlock(zram, index); 1474 1475 /* Update stats */ 1476 atomic64_inc(&zram->stats.pages_stored); 1477 return ret; 1478 } 1479 1480 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1481 u32 index, int offset, struct bio *bio) 1482 { 1483 int ret; 1484 struct page *page = NULL; 1485 struct bio_vec vec; 1486 1487 vec = *bvec; 1488 if (is_partial_io(bvec)) { 1489 void *dst; 1490 /* 1491 * This is a partial IO. We need to read the full page 1492 * before to write the changes. 1493 */ 1494 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1495 if (!page) 1496 return -ENOMEM; 1497 1498 ret = __zram_bvec_read(zram, page, index, bio, true); 1499 if (ret) 1500 goto out; 1501 1502 dst = kmap_atomic(page); 1503 memcpy_from_bvec(dst + offset, bvec); 1504 kunmap_atomic(dst); 1505 1506 vec.bv_page = page; 1507 vec.bv_len = PAGE_SIZE; 1508 vec.bv_offset = 0; 1509 } 1510 1511 ret = __zram_bvec_write(zram, &vec, index, bio); 1512 out: 1513 if (is_partial_io(bvec)) 1514 __free_page(page); 1515 return ret; 1516 } 1517 1518 /* 1519 * zram_bio_discard - handler on discard request 1520 * @index: physical block index in PAGE_SIZE units 1521 * @offset: byte offset within physical block 1522 */ 1523 static void zram_bio_discard(struct zram *zram, u32 index, 1524 int offset, struct bio *bio) 1525 { 1526 size_t n = bio->bi_iter.bi_size; 1527 1528 /* 1529 * zram manages data in physical block size units. Because logical block 1530 * size isn't identical with physical block size on some arch, we 1531 * could get a discard request pointing to a specific offset within a 1532 * certain physical block. Although we can handle this request by 1533 * reading that physiclal block and decompressing and partially zeroing 1534 * and re-compressing and then re-storing it, this isn't reasonable 1535 * because our intent with a discard request is to save memory. So 1536 * skipping this logical block is appropriate here. 1537 */ 1538 if (offset) { 1539 if (n <= (PAGE_SIZE - offset)) 1540 return; 1541 1542 n -= (PAGE_SIZE - offset); 1543 index++; 1544 } 1545 1546 while (n >= PAGE_SIZE) { 1547 zram_slot_lock(zram, index); 1548 zram_free_page(zram, index); 1549 zram_slot_unlock(zram, index); 1550 atomic64_inc(&zram->stats.notify_free); 1551 index++; 1552 n -= PAGE_SIZE; 1553 } 1554 } 1555 1556 /* 1557 * Returns errno if it has some problem. Otherwise return 0 or 1. 1558 * Returns 0 if IO request was done synchronously 1559 * Returns 1 if IO request was successfully submitted. 1560 */ 1561 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 1562 int offset, enum req_op op, struct bio *bio) 1563 { 1564 int ret; 1565 1566 if (!op_is_write(op)) { 1567 atomic64_inc(&zram->stats.num_reads); 1568 ret = zram_bvec_read(zram, bvec, index, offset, bio); 1569 flush_dcache_page(bvec->bv_page); 1570 } else { 1571 atomic64_inc(&zram->stats.num_writes); 1572 ret = zram_bvec_write(zram, bvec, index, offset, bio); 1573 } 1574 1575 zram_slot_lock(zram, index); 1576 zram_accessed(zram, index); 1577 zram_slot_unlock(zram, index); 1578 1579 if (unlikely(ret < 0)) { 1580 if (!op_is_write(op)) 1581 atomic64_inc(&zram->stats.failed_reads); 1582 else 1583 atomic64_inc(&zram->stats.failed_writes); 1584 } 1585 1586 return ret; 1587 } 1588 1589 static void __zram_make_request(struct zram *zram, struct bio *bio) 1590 { 1591 int offset; 1592 u32 index; 1593 struct bio_vec bvec; 1594 struct bvec_iter iter; 1595 unsigned long start_time; 1596 1597 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1598 offset = (bio->bi_iter.bi_sector & 1599 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1600 1601 switch (bio_op(bio)) { 1602 case REQ_OP_DISCARD: 1603 case REQ_OP_WRITE_ZEROES: 1604 zram_bio_discard(zram, index, offset, bio); 1605 bio_endio(bio); 1606 return; 1607 default: 1608 break; 1609 } 1610 1611 start_time = bio_start_io_acct(bio); 1612 bio_for_each_segment(bvec, bio, iter) { 1613 struct bio_vec bv = bvec; 1614 unsigned int unwritten = bvec.bv_len; 1615 1616 do { 1617 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, 1618 unwritten); 1619 if (zram_bvec_rw(zram, &bv, index, offset, 1620 bio_op(bio), bio) < 0) { 1621 bio->bi_status = BLK_STS_IOERR; 1622 break; 1623 } 1624 1625 bv.bv_offset += bv.bv_len; 1626 unwritten -= bv.bv_len; 1627 1628 update_position(&index, &offset, &bv); 1629 } while (unwritten); 1630 } 1631 bio_end_io_acct(bio, start_time); 1632 bio_endio(bio); 1633 } 1634 1635 /* 1636 * Handler function for all zram I/O requests. 1637 */ 1638 static void zram_submit_bio(struct bio *bio) 1639 { 1640 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 1641 1642 if (!valid_io_request(zram, bio->bi_iter.bi_sector, 1643 bio->bi_iter.bi_size)) { 1644 atomic64_inc(&zram->stats.invalid_io); 1645 bio_io_error(bio); 1646 return; 1647 } 1648 1649 __zram_make_request(zram, bio); 1650 } 1651 1652 static void zram_slot_free_notify(struct block_device *bdev, 1653 unsigned long index) 1654 { 1655 struct zram *zram; 1656 1657 zram = bdev->bd_disk->private_data; 1658 1659 atomic64_inc(&zram->stats.notify_free); 1660 if (!zram_slot_trylock(zram, index)) { 1661 atomic64_inc(&zram->stats.miss_free); 1662 return; 1663 } 1664 1665 zram_free_page(zram, index); 1666 zram_slot_unlock(zram, index); 1667 } 1668 1669 static int zram_rw_page(struct block_device *bdev, sector_t sector, 1670 struct page *page, enum req_op op) 1671 { 1672 int offset, ret; 1673 u32 index; 1674 struct zram *zram; 1675 struct bio_vec bv; 1676 unsigned long start_time; 1677 1678 if (PageTransHuge(page)) 1679 return -ENOTSUPP; 1680 zram = bdev->bd_disk->private_data; 1681 1682 if (!valid_io_request(zram, sector, PAGE_SIZE)) { 1683 atomic64_inc(&zram->stats.invalid_io); 1684 ret = -EINVAL; 1685 goto out; 1686 } 1687 1688 index = sector >> SECTORS_PER_PAGE_SHIFT; 1689 offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1690 1691 bv.bv_page = page; 1692 bv.bv_len = PAGE_SIZE; 1693 bv.bv_offset = 0; 1694 1695 start_time = bdev_start_io_acct(bdev->bd_disk->part0, 1696 SECTORS_PER_PAGE, op, jiffies); 1697 ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); 1698 bdev_end_io_acct(bdev->bd_disk->part0, op, start_time); 1699 out: 1700 /* 1701 * If I/O fails, just return error(ie, non-zero) without 1702 * calling page_endio. 1703 * It causes resubmit the I/O with bio request by upper functions 1704 * of rw_page(e.g., swap_readpage, __swap_writepage) and 1705 * bio->bi_end_io does things to handle the error 1706 * (e.g., SetPageError, set_page_dirty and extra works). 1707 */ 1708 if (unlikely(ret < 0)) 1709 return ret; 1710 1711 switch (ret) { 1712 case 0: 1713 page_endio(page, op_is_write(op), 0); 1714 break; 1715 case 1: 1716 ret = 0; 1717 break; 1718 default: 1719 WARN_ON(1); 1720 } 1721 return ret; 1722 } 1723 1724 static void zram_reset_device(struct zram *zram) 1725 { 1726 down_write(&zram->init_lock); 1727 1728 zram->limit_pages = 0; 1729 1730 if (!init_done(zram)) { 1731 up_write(&zram->init_lock); 1732 return; 1733 } 1734 1735 set_capacity_and_notify(zram->disk, 0); 1736 part_stat_set_all(zram->disk->part0, 0); 1737 1738 /* I/O operation under all of CPU are done so let's free */ 1739 zram_meta_free(zram, zram->disksize); 1740 zram->disksize = 0; 1741 memset(&zram->stats, 0, sizeof(zram->stats)); 1742 zcomp_destroy(zram->comp); 1743 zram->comp = NULL; 1744 reset_bdev(zram); 1745 1746 up_write(&zram->init_lock); 1747 } 1748 1749 static ssize_t disksize_store(struct device *dev, 1750 struct device_attribute *attr, const char *buf, size_t len) 1751 { 1752 u64 disksize; 1753 struct zcomp *comp; 1754 struct zram *zram = dev_to_zram(dev); 1755 int err; 1756 1757 disksize = memparse(buf, NULL); 1758 if (!disksize) 1759 return -EINVAL; 1760 1761 down_write(&zram->init_lock); 1762 if (init_done(zram)) { 1763 pr_info("Cannot change disksize for initialized device\n"); 1764 err = -EBUSY; 1765 goto out_unlock; 1766 } 1767 1768 disksize = PAGE_ALIGN(disksize); 1769 if (!zram_meta_alloc(zram, disksize)) { 1770 err = -ENOMEM; 1771 goto out_unlock; 1772 } 1773 1774 comp = zcomp_create(zram->compressor); 1775 if (IS_ERR(comp)) { 1776 pr_err("Cannot initialise %s compressing backend\n", 1777 zram->compressor); 1778 err = PTR_ERR(comp); 1779 goto out_free_meta; 1780 } 1781 1782 zram->comp = comp; 1783 zram->disksize = disksize; 1784 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 1785 up_write(&zram->init_lock); 1786 1787 return len; 1788 1789 out_free_meta: 1790 zram_meta_free(zram, disksize); 1791 out_unlock: 1792 up_write(&zram->init_lock); 1793 return err; 1794 } 1795 1796 static ssize_t reset_store(struct device *dev, 1797 struct device_attribute *attr, const char *buf, size_t len) 1798 { 1799 int ret; 1800 unsigned short do_reset; 1801 struct zram *zram; 1802 struct gendisk *disk; 1803 1804 ret = kstrtou16(buf, 10, &do_reset); 1805 if (ret) 1806 return ret; 1807 1808 if (!do_reset) 1809 return -EINVAL; 1810 1811 zram = dev_to_zram(dev); 1812 disk = zram->disk; 1813 1814 mutex_lock(&disk->open_mutex); 1815 /* Do not reset an active device or claimed device */ 1816 if (disk_openers(disk) || zram->claim) { 1817 mutex_unlock(&disk->open_mutex); 1818 return -EBUSY; 1819 } 1820 1821 /* From now on, anyone can't open /dev/zram[0-9] */ 1822 zram->claim = true; 1823 mutex_unlock(&disk->open_mutex); 1824 1825 /* Make sure all the pending I/O are finished */ 1826 sync_blockdev(disk->part0); 1827 zram_reset_device(zram); 1828 1829 mutex_lock(&disk->open_mutex); 1830 zram->claim = false; 1831 mutex_unlock(&disk->open_mutex); 1832 1833 return len; 1834 } 1835 1836 static int zram_open(struct block_device *bdev, fmode_t mode) 1837 { 1838 int ret = 0; 1839 struct zram *zram; 1840 1841 WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex)); 1842 1843 zram = bdev->bd_disk->private_data; 1844 /* zram was claimed to reset so open request fails */ 1845 if (zram->claim) 1846 ret = -EBUSY; 1847 1848 return ret; 1849 } 1850 1851 static const struct block_device_operations zram_devops = { 1852 .open = zram_open, 1853 .submit_bio = zram_submit_bio, 1854 .swap_slot_free_notify = zram_slot_free_notify, 1855 .rw_page = zram_rw_page, 1856 .owner = THIS_MODULE 1857 }; 1858 1859 #ifdef CONFIG_ZRAM_WRITEBACK 1860 static const struct block_device_operations zram_wb_devops = { 1861 .open = zram_open, 1862 .submit_bio = zram_submit_bio, 1863 .swap_slot_free_notify = zram_slot_free_notify, 1864 .owner = THIS_MODULE 1865 }; 1866 #endif 1867 1868 static DEVICE_ATTR_WO(compact); 1869 static DEVICE_ATTR_RW(disksize); 1870 static DEVICE_ATTR_RO(initstate); 1871 static DEVICE_ATTR_WO(reset); 1872 static DEVICE_ATTR_WO(mem_limit); 1873 static DEVICE_ATTR_WO(mem_used_max); 1874 static DEVICE_ATTR_WO(idle); 1875 static DEVICE_ATTR_RW(max_comp_streams); 1876 static DEVICE_ATTR_RW(comp_algorithm); 1877 #ifdef CONFIG_ZRAM_WRITEBACK 1878 static DEVICE_ATTR_RW(backing_dev); 1879 static DEVICE_ATTR_WO(writeback); 1880 static DEVICE_ATTR_RW(writeback_limit); 1881 static DEVICE_ATTR_RW(writeback_limit_enable); 1882 #endif 1883 1884 static struct attribute *zram_disk_attrs[] = { 1885 &dev_attr_disksize.attr, 1886 &dev_attr_initstate.attr, 1887 &dev_attr_reset.attr, 1888 &dev_attr_compact.attr, 1889 &dev_attr_mem_limit.attr, 1890 &dev_attr_mem_used_max.attr, 1891 &dev_attr_idle.attr, 1892 &dev_attr_max_comp_streams.attr, 1893 &dev_attr_comp_algorithm.attr, 1894 #ifdef CONFIG_ZRAM_WRITEBACK 1895 &dev_attr_backing_dev.attr, 1896 &dev_attr_writeback.attr, 1897 &dev_attr_writeback_limit.attr, 1898 &dev_attr_writeback_limit_enable.attr, 1899 #endif 1900 &dev_attr_io_stat.attr, 1901 &dev_attr_mm_stat.attr, 1902 #ifdef CONFIG_ZRAM_WRITEBACK 1903 &dev_attr_bd_stat.attr, 1904 #endif 1905 &dev_attr_debug_stat.attr, 1906 NULL, 1907 }; 1908 1909 ATTRIBUTE_GROUPS(zram_disk); 1910 1911 /* 1912 * Allocate and initialize new zram device. the function returns 1913 * '>= 0' device_id upon success, and negative value otherwise. 1914 */ 1915 static int zram_add(void) 1916 { 1917 struct zram *zram; 1918 int ret, device_id; 1919 1920 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 1921 if (!zram) 1922 return -ENOMEM; 1923 1924 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 1925 if (ret < 0) 1926 goto out_free_dev; 1927 device_id = ret; 1928 1929 init_rwsem(&zram->init_lock); 1930 #ifdef CONFIG_ZRAM_WRITEBACK 1931 spin_lock_init(&zram->wb_limit_lock); 1932 #endif 1933 1934 /* gendisk structure */ 1935 zram->disk = blk_alloc_disk(NUMA_NO_NODE); 1936 if (!zram->disk) { 1937 pr_err("Error allocating disk structure for device %d\n", 1938 device_id); 1939 ret = -ENOMEM; 1940 goto out_free_idr; 1941 } 1942 1943 zram->disk->major = zram_major; 1944 zram->disk->first_minor = device_id; 1945 zram->disk->minors = 1; 1946 zram->disk->flags |= GENHD_FL_NO_PART; 1947 zram->disk->fops = &zram_devops; 1948 zram->disk->private_data = zram; 1949 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 1950 1951 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1952 set_capacity(zram->disk, 0); 1953 /* zram devices sort of resembles non-rotational disks */ 1954 blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); 1955 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1956 1957 /* 1958 * To ensure that we always get PAGE_SIZE aligned 1959 * and n*PAGE_SIZED sized I/O requests. 1960 */ 1961 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 1962 blk_queue_logical_block_size(zram->disk->queue, 1963 ZRAM_LOGICAL_BLOCK_SIZE); 1964 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 1965 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1966 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1967 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 1968 1969 /* 1970 * zram_bio_discard() will clear all logical blocks if logical block 1971 * size is identical with physical block size(PAGE_SIZE). But if it is 1972 * different, we will skip discarding some parts of logical blocks in 1973 * the part of the request range which isn't aligned to physical block 1974 * size. So we can't ensure that all discarded logical blocks are 1975 * zeroed. 1976 */ 1977 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 1978 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); 1979 1980 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); 1981 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 1982 if (ret) 1983 goto out_cleanup_disk; 1984 1985 strscpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1986 1987 zram_debugfs_register(zram); 1988 pr_info("Added device: %s\n", zram->disk->disk_name); 1989 return device_id; 1990 1991 out_cleanup_disk: 1992 put_disk(zram->disk); 1993 out_free_idr: 1994 idr_remove(&zram_index_idr, device_id); 1995 out_free_dev: 1996 kfree(zram); 1997 return ret; 1998 } 1999 2000 static int zram_remove(struct zram *zram) 2001 { 2002 bool claimed; 2003 2004 mutex_lock(&zram->disk->open_mutex); 2005 if (disk_openers(zram->disk)) { 2006 mutex_unlock(&zram->disk->open_mutex); 2007 return -EBUSY; 2008 } 2009 2010 claimed = zram->claim; 2011 if (!claimed) 2012 zram->claim = true; 2013 mutex_unlock(&zram->disk->open_mutex); 2014 2015 zram_debugfs_unregister(zram); 2016 2017 if (claimed) { 2018 /* 2019 * If we were claimed by reset_store(), del_gendisk() will 2020 * wait until reset_store() is done, so nothing need to do. 2021 */ 2022 ; 2023 } else { 2024 /* Make sure all the pending I/O are finished */ 2025 sync_blockdev(zram->disk->part0); 2026 zram_reset_device(zram); 2027 } 2028 2029 pr_info("Removed device: %s\n", zram->disk->disk_name); 2030 2031 del_gendisk(zram->disk); 2032 2033 /* del_gendisk drains pending reset_store */ 2034 WARN_ON_ONCE(claimed && zram->claim); 2035 2036 /* 2037 * disksize_store() may be called in between zram_reset_device() 2038 * and del_gendisk(), so run the last reset to avoid leaking 2039 * anything allocated with disksize_store() 2040 */ 2041 zram_reset_device(zram); 2042 2043 put_disk(zram->disk); 2044 kfree(zram); 2045 return 0; 2046 } 2047 2048 /* zram-control sysfs attributes */ 2049 2050 /* 2051 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 2052 * sense that reading from this file does alter the state of your system -- it 2053 * creates a new un-initialized zram device and returns back this device's 2054 * device_id (or an error code if it fails to create a new device). 2055 */ 2056 static ssize_t hot_add_show(struct class *class, 2057 struct class_attribute *attr, 2058 char *buf) 2059 { 2060 int ret; 2061 2062 mutex_lock(&zram_index_mutex); 2063 ret = zram_add(); 2064 mutex_unlock(&zram_index_mutex); 2065 2066 if (ret < 0) 2067 return ret; 2068 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 2069 } 2070 static struct class_attribute class_attr_hot_add = 2071 __ATTR(hot_add, 0400, hot_add_show, NULL); 2072 2073 static ssize_t hot_remove_store(struct class *class, 2074 struct class_attribute *attr, 2075 const char *buf, 2076 size_t count) 2077 { 2078 struct zram *zram; 2079 int ret, dev_id; 2080 2081 /* dev_id is gendisk->first_minor, which is `int' */ 2082 ret = kstrtoint(buf, 10, &dev_id); 2083 if (ret) 2084 return ret; 2085 if (dev_id < 0) 2086 return -EINVAL; 2087 2088 mutex_lock(&zram_index_mutex); 2089 2090 zram = idr_find(&zram_index_idr, dev_id); 2091 if (zram) { 2092 ret = zram_remove(zram); 2093 if (!ret) 2094 idr_remove(&zram_index_idr, dev_id); 2095 } else { 2096 ret = -ENODEV; 2097 } 2098 2099 mutex_unlock(&zram_index_mutex); 2100 return ret ? ret : count; 2101 } 2102 static CLASS_ATTR_WO(hot_remove); 2103 2104 static struct attribute *zram_control_class_attrs[] = { 2105 &class_attr_hot_add.attr, 2106 &class_attr_hot_remove.attr, 2107 NULL, 2108 }; 2109 ATTRIBUTE_GROUPS(zram_control_class); 2110 2111 static struct class zram_control_class = { 2112 .name = "zram-control", 2113 .owner = THIS_MODULE, 2114 .class_groups = zram_control_class_groups, 2115 }; 2116 2117 static int zram_remove_cb(int id, void *ptr, void *data) 2118 { 2119 WARN_ON_ONCE(zram_remove(ptr)); 2120 return 0; 2121 } 2122 2123 static void destroy_devices(void) 2124 { 2125 class_unregister(&zram_control_class); 2126 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 2127 zram_debugfs_destroy(); 2128 idr_destroy(&zram_index_idr); 2129 unregister_blkdev(zram_major, "zram"); 2130 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2131 } 2132 2133 static int __init zram_init(void) 2134 { 2135 int ret; 2136 2137 BUILD_BUG_ON(__NR_ZRAM_PAGEFLAGS > BITS_PER_LONG); 2138 2139 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 2140 zcomp_cpu_up_prepare, zcomp_cpu_dead); 2141 if (ret < 0) 2142 return ret; 2143 2144 ret = class_register(&zram_control_class); 2145 if (ret) { 2146 pr_err("Unable to register zram-control class\n"); 2147 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2148 return ret; 2149 } 2150 2151 zram_debugfs_create(); 2152 zram_major = register_blkdev(0, "zram"); 2153 if (zram_major <= 0) { 2154 pr_err("Unable to get major number\n"); 2155 class_unregister(&zram_control_class); 2156 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2157 return -EBUSY; 2158 } 2159 2160 while (num_devices != 0) { 2161 mutex_lock(&zram_index_mutex); 2162 ret = zram_add(); 2163 mutex_unlock(&zram_index_mutex); 2164 if (ret < 0) 2165 goto out_error; 2166 num_devices--; 2167 } 2168 2169 return 0; 2170 2171 out_error: 2172 destroy_devices(); 2173 return ret; 2174 } 2175 2176 static void __exit zram_exit(void) 2177 { 2178 destroy_devices(); 2179 } 2180 2181 module_init(zram_init); 2182 module_exit(zram_exit); 2183 2184 module_param(num_devices, uint, 0); 2185 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 2186 2187 MODULE_LICENSE("Dual BSD/GPL"); 2188 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 2189 MODULE_DESCRIPTION("Compressed RAM Block Device"); 2190