1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/highmem.h> 26 #include <linux/slab.h> 27 #include <linux/backing-dev.h> 28 #include <linux/string.h> 29 #include <linux/vmalloc.h> 30 #include <linux/err.h> 31 #include <linux/idr.h> 32 #include <linux/sysfs.h> 33 #include <linux/debugfs.h> 34 #include <linux/cpuhotplug.h> 35 #include <linux/part_stat.h> 36 37 #include "zram_drv.h" 38 39 static DEFINE_IDR(zram_index_idr); 40 /* idr index must be protected */ 41 static DEFINE_MUTEX(zram_index_mutex); 42 43 static int zram_major; 44 static const char *default_compressor = CONFIG_ZRAM_DEF_COMP; 45 46 /* Module params (documentation at end) */ 47 static unsigned int num_devices = 1; 48 /* 49 * Pages that compress to sizes equals or greater than this are stored 50 * uncompressed in memory. 51 */ 52 static size_t huge_class_size; 53 54 static const struct block_device_operations zram_devops; 55 #ifdef CONFIG_ZRAM_WRITEBACK 56 static const struct block_device_operations zram_wb_devops; 57 #endif 58 59 static void zram_free_page(struct zram *zram, size_t index); 60 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 61 u32 index, int offset, struct bio *bio); 62 63 64 static int zram_slot_trylock(struct zram *zram, u32 index) 65 { 66 return bit_spin_trylock(ZRAM_LOCK, &zram->table[index].flags); 67 } 68 69 static void zram_slot_lock(struct zram *zram, u32 index) 70 { 71 bit_spin_lock(ZRAM_LOCK, &zram->table[index].flags); 72 } 73 74 static void zram_slot_unlock(struct zram *zram, u32 index) 75 { 76 bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); 77 } 78 79 static inline bool init_done(struct zram *zram) 80 { 81 return zram->disksize; 82 } 83 84 static inline struct zram *dev_to_zram(struct device *dev) 85 { 86 return (struct zram *)dev_to_disk(dev)->private_data; 87 } 88 89 static unsigned long zram_get_handle(struct zram *zram, u32 index) 90 { 91 return zram->table[index].handle; 92 } 93 94 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 95 { 96 zram->table[index].handle = handle; 97 } 98 99 /* flag operations require table entry bit_spin_lock() being held */ 100 static bool zram_test_flag(struct zram *zram, u32 index, 101 enum zram_pageflags flag) 102 { 103 return zram->table[index].flags & BIT(flag); 104 } 105 106 static void zram_set_flag(struct zram *zram, u32 index, 107 enum zram_pageflags flag) 108 { 109 zram->table[index].flags |= BIT(flag); 110 } 111 112 static void zram_clear_flag(struct zram *zram, u32 index, 113 enum zram_pageflags flag) 114 { 115 zram->table[index].flags &= ~BIT(flag); 116 } 117 118 static inline void zram_set_element(struct zram *zram, u32 index, 119 unsigned long element) 120 { 121 zram->table[index].element = element; 122 } 123 124 static unsigned long zram_get_element(struct zram *zram, u32 index) 125 { 126 return zram->table[index].element; 127 } 128 129 static size_t zram_get_obj_size(struct zram *zram, u32 index) 130 { 131 return zram->table[index].flags & (BIT(ZRAM_FLAG_SHIFT) - 1); 132 } 133 134 static void zram_set_obj_size(struct zram *zram, 135 u32 index, size_t size) 136 { 137 unsigned long flags = zram->table[index].flags >> ZRAM_FLAG_SHIFT; 138 139 zram->table[index].flags = (flags << ZRAM_FLAG_SHIFT) | size; 140 } 141 142 static inline bool zram_allocated(struct zram *zram, u32 index) 143 { 144 return zram_get_obj_size(zram, index) || 145 zram_test_flag(zram, index, ZRAM_SAME) || 146 zram_test_flag(zram, index, ZRAM_WB); 147 } 148 149 #if PAGE_SIZE != 4096 150 static inline bool is_partial_io(struct bio_vec *bvec) 151 { 152 return bvec->bv_len != PAGE_SIZE; 153 } 154 #else 155 static inline bool is_partial_io(struct bio_vec *bvec) 156 { 157 return false; 158 } 159 #endif 160 161 /* 162 * Check if request is within bounds and aligned on zram logical blocks. 163 */ 164 static inline bool valid_io_request(struct zram *zram, 165 sector_t start, unsigned int size) 166 { 167 u64 end, bound; 168 169 /* unaligned request */ 170 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 171 return false; 172 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 173 return false; 174 175 end = start + (size >> SECTOR_SHIFT); 176 bound = zram->disksize >> SECTOR_SHIFT; 177 /* out of range range */ 178 if (unlikely(start >= bound || end > bound || start > end)) 179 return false; 180 181 /* I/O request is valid */ 182 return true; 183 } 184 185 static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 186 { 187 *index += (*offset + bvec->bv_len) / PAGE_SIZE; 188 *offset = (*offset + bvec->bv_len) % PAGE_SIZE; 189 } 190 191 static inline void update_used_max(struct zram *zram, 192 const unsigned long pages) 193 { 194 unsigned long old_max, cur_max; 195 196 old_max = atomic_long_read(&zram->stats.max_used_pages); 197 198 do { 199 cur_max = old_max; 200 if (pages > cur_max) 201 old_max = atomic_long_cmpxchg( 202 &zram->stats.max_used_pages, cur_max, pages); 203 } while (old_max != cur_max); 204 } 205 206 static inline void zram_fill_page(void *ptr, unsigned long len, 207 unsigned long value) 208 { 209 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 210 memset_l(ptr, value, len / sizeof(unsigned long)); 211 } 212 213 static bool page_same_filled(void *ptr, unsigned long *element) 214 { 215 unsigned long *page; 216 unsigned long val; 217 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 218 219 page = (unsigned long *)ptr; 220 val = page[0]; 221 222 if (val != page[last_pos]) 223 return false; 224 225 for (pos = 1; pos < last_pos; pos++) { 226 if (val != page[pos]) 227 return false; 228 } 229 230 *element = val; 231 232 return true; 233 } 234 235 static ssize_t initstate_show(struct device *dev, 236 struct device_attribute *attr, char *buf) 237 { 238 u32 val; 239 struct zram *zram = dev_to_zram(dev); 240 241 down_read(&zram->init_lock); 242 val = init_done(zram); 243 up_read(&zram->init_lock); 244 245 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 246 } 247 248 static ssize_t disksize_show(struct device *dev, 249 struct device_attribute *attr, char *buf) 250 { 251 struct zram *zram = dev_to_zram(dev); 252 253 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 254 } 255 256 static ssize_t mem_limit_store(struct device *dev, 257 struct device_attribute *attr, const char *buf, size_t len) 258 { 259 u64 limit; 260 char *tmp; 261 struct zram *zram = dev_to_zram(dev); 262 263 limit = memparse(buf, &tmp); 264 if (buf == tmp) /* no chars parsed, invalid input */ 265 return -EINVAL; 266 267 down_write(&zram->init_lock); 268 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 269 up_write(&zram->init_lock); 270 271 return len; 272 } 273 274 static ssize_t mem_used_max_store(struct device *dev, 275 struct device_attribute *attr, const char *buf, size_t len) 276 { 277 int err; 278 unsigned long val; 279 struct zram *zram = dev_to_zram(dev); 280 281 err = kstrtoul(buf, 10, &val); 282 if (err || val != 0) 283 return -EINVAL; 284 285 down_read(&zram->init_lock); 286 if (init_done(zram)) { 287 atomic_long_set(&zram->stats.max_used_pages, 288 zs_get_total_pages(zram->mem_pool)); 289 } 290 up_read(&zram->init_lock); 291 292 return len; 293 } 294 295 /* 296 * Mark all pages which are older than or equal to cutoff as IDLE. 297 * Callers should hold the zram init lock in read mode 298 */ 299 static void mark_idle(struct zram *zram, ktime_t cutoff) 300 { 301 int is_idle = 1; 302 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 303 int index; 304 305 for (index = 0; index < nr_pages; index++) { 306 /* 307 * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race. 308 * See the comment in writeback_store. 309 */ 310 zram_slot_lock(zram, index); 311 if (zram_allocated(zram, index) && 312 !zram_test_flag(zram, index, ZRAM_UNDER_WB)) { 313 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 314 is_idle = !cutoff || ktime_after(cutoff, zram->table[index].ac_time); 315 #endif 316 if (is_idle) 317 zram_set_flag(zram, index, ZRAM_IDLE); 318 } 319 zram_slot_unlock(zram, index); 320 } 321 } 322 323 static ssize_t idle_store(struct device *dev, 324 struct device_attribute *attr, const char *buf, size_t len) 325 { 326 struct zram *zram = dev_to_zram(dev); 327 ktime_t cutoff_time = 0; 328 ssize_t rv = -EINVAL; 329 330 if (!sysfs_streq(buf, "all")) { 331 /* 332 * If it did not parse as 'all' try to treat it as an integer when 333 * we have memory tracking enabled. 334 */ 335 u64 age_sec; 336 337 if (IS_ENABLED(CONFIG_ZRAM_MEMORY_TRACKING) && !kstrtoull(buf, 0, &age_sec)) 338 cutoff_time = ktime_sub(ktime_get_boottime(), 339 ns_to_ktime(age_sec * NSEC_PER_SEC)); 340 else 341 goto out; 342 } 343 344 down_read(&zram->init_lock); 345 if (!init_done(zram)) 346 goto out_unlock; 347 348 /* A cutoff_time of 0 marks everything as idle, this is the "all" behavior */ 349 mark_idle(zram, cutoff_time); 350 rv = len; 351 352 out_unlock: 353 up_read(&zram->init_lock); 354 out: 355 return rv; 356 } 357 358 #ifdef CONFIG_ZRAM_WRITEBACK 359 static ssize_t writeback_limit_enable_store(struct device *dev, 360 struct device_attribute *attr, const char *buf, size_t len) 361 { 362 struct zram *zram = dev_to_zram(dev); 363 u64 val; 364 ssize_t ret = -EINVAL; 365 366 if (kstrtoull(buf, 10, &val)) 367 return ret; 368 369 down_read(&zram->init_lock); 370 spin_lock(&zram->wb_limit_lock); 371 zram->wb_limit_enable = val; 372 spin_unlock(&zram->wb_limit_lock); 373 up_read(&zram->init_lock); 374 ret = len; 375 376 return ret; 377 } 378 379 static ssize_t writeback_limit_enable_show(struct device *dev, 380 struct device_attribute *attr, char *buf) 381 { 382 bool val; 383 struct zram *zram = dev_to_zram(dev); 384 385 down_read(&zram->init_lock); 386 spin_lock(&zram->wb_limit_lock); 387 val = zram->wb_limit_enable; 388 spin_unlock(&zram->wb_limit_lock); 389 up_read(&zram->init_lock); 390 391 return scnprintf(buf, PAGE_SIZE, "%d\n", val); 392 } 393 394 static ssize_t writeback_limit_store(struct device *dev, 395 struct device_attribute *attr, const char *buf, size_t len) 396 { 397 struct zram *zram = dev_to_zram(dev); 398 u64 val; 399 ssize_t ret = -EINVAL; 400 401 if (kstrtoull(buf, 10, &val)) 402 return ret; 403 404 down_read(&zram->init_lock); 405 spin_lock(&zram->wb_limit_lock); 406 zram->bd_wb_limit = val; 407 spin_unlock(&zram->wb_limit_lock); 408 up_read(&zram->init_lock); 409 ret = len; 410 411 return ret; 412 } 413 414 static ssize_t writeback_limit_show(struct device *dev, 415 struct device_attribute *attr, char *buf) 416 { 417 u64 val; 418 struct zram *zram = dev_to_zram(dev); 419 420 down_read(&zram->init_lock); 421 spin_lock(&zram->wb_limit_lock); 422 val = zram->bd_wb_limit; 423 spin_unlock(&zram->wb_limit_lock); 424 up_read(&zram->init_lock); 425 426 return scnprintf(buf, PAGE_SIZE, "%llu\n", val); 427 } 428 429 static void reset_bdev(struct zram *zram) 430 { 431 struct block_device *bdev; 432 433 if (!zram->backing_dev) 434 return; 435 436 bdev = zram->bdev; 437 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 438 /* hope filp_close flush all of IO */ 439 filp_close(zram->backing_dev, NULL); 440 zram->backing_dev = NULL; 441 zram->bdev = NULL; 442 zram->disk->fops = &zram_devops; 443 kvfree(zram->bitmap); 444 zram->bitmap = NULL; 445 } 446 447 static ssize_t backing_dev_show(struct device *dev, 448 struct device_attribute *attr, char *buf) 449 { 450 struct file *file; 451 struct zram *zram = dev_to_zram(dev); 452 char *p; 453 ssize_t ret; 454 455 down_read(&zram->init_lock); 456 file = zram->backing_dev; 457 if (!file) { 458 memcpy(buf, "none\n", 5); 459 up_read(&zram->init_lock); 460 return 5; 461 } 462 463 p = file_path(file, buf, PAGE_SIZE - 1); 464 if (IS_ERR(p)) { 465 ret = PTR_ERR(p); 466 goto out; 467 } 468 469 ret = strlen(p); 470 memmove(buf, p, ret); 471 buf[ret++] = '\n'; 472 out: 473 up_read(&zram->init_lock); 474 return ret; 475 } 476 477 static ssize_t backing_dev_store(struct device *dev, 478 struct device_attribute *attr, const char *buf, size_t len) 479 { 480 char *file_name; 481 size_t sz; 482 struct file *backing_dev = NULL; 483 struct inode *inode; 484 struct address_space *mapping; 485 unsigned int bitmap_sz; 486 unsigned long nr_pages, *bitmap = NULL; 487 struct block_device *bdev = NULL; 488 int err; 489 struct zram *zram = dev_to_zram(dev); 490 491 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 492 if (!file_name) 493 return -ENOMEM; 494 495 down_write(&zram->init_lock); 496 if (init_done(zram)) { 497 pr_info("Can't setup backing device for initialized device\n"); 498 err = -EBUSY; 499 goto out; 500 } 501 502 strlcpy(file_name, buf, PATH_MAX); 503 /* ignore trailing newline */ 504 sz = strlen(file_name); 505 if (sz > 0 && file_name[sz - 1] == '\n') 506 file_name[sz - 1] = 0x00; 507 508 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); 509 if (IS_ERR(backing_dev)) { 510 err = PTR_ERR(backing_dev); 511 backing_dev = NULL; 512 goto out; 513 } 514 515 mapping = backing_dev->f_mapping; 516 inode = mapping->host; 517 518 /* Support only block device in this moment */ 519 if (!S_ISBLK(inode->i_mode)) { 520 err = -ENOTBLK; 521 goto out; 522 } 523 524 bdev = blkdev_get_by_dev(inode->i_rdev, 525 FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); 526 if (IS_ERR(bdev)) { 527 err = PTR_ERR(bdev); 528 bdev = NULL; 529 goto out; 530 } 531 532 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 533 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 534 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 535 if (!bitmap) { 536 err = -ENOMEM; 537 goto out; 538 } 539 540 reset_bdev(zram); 541 542 zram->bdev = bdev; 543 zram->backing_dev = backing_dev; 544 zram->bitmap = bitmap; 545 zram->nr_pages = nr_pages; 546 /* 547 * With writeback feature, zram does asynchronous IO so it's no longer 548 * synchronous device so let's remove synchronous io flag. Othewise, 549 * upper layer(e.g., swap) could wait IO completion rather than 550 * (submit and return), which will cause system sluggish. 551 * Furthermore, when the IO function returns(e.g., swap_readpage), 552 * upper layer expects IO was done so it could deallocate the page 553 * freely but in fact, IO is going on so finally could cause 554 * use-after-free when the IO is really done. 555 */ 556 zram->disk->fops = &zram_wb_devops; 557 up_write(&zram->init_lock); 558 559 pr_info("setup backing device %s\n", file_name); 560 kfree(file_name); 561 562 return len; 563 out: 564 kvfree(bitmap); 565 566 if (bdev) 567 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 568 569 if (backing_dev) 570 filp_close(backing_dev, NULL); 571 572 up_write(&zram->init_lock); 573 574 kfree(file_name); 575 576 return err; 577 } 578 579 static unsigned long alloc_block_bdev(struct zram *zram) 580 { 581 unsigned long blk_idx = 1; 582 retry: 583 /* skip 0 bit to confuse zram.handle = 0 */ 584 blk_idx = find_next_zero_bit(zram->bitmap, zram->nr_pages, blk_idx); 585 if (blk_idx == zram->nr_pages) 586 return 0; 587 588 if (test_and_set_bit(blk_idx, zram->bitmap)) 589 goto retry; 590 591 atomic64_inc(&zram->stats.bd_count); 592 return blk_idx; 593 } 594 595 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) 596 { 597 int was_set; 598 599 was_set = test_and_clear_bit(blk_idx, zram->bitmap); 600 WARN_ON_ONCE(!was_set); 601 atomic64_dec(&zram->stats.bd_count); 602 } 603 604 static void zram_page_end_io(struct bio *bio) 605 { 606 struct page *page = bio_first_page_all(bio); 607 608 page_endio(page, op_is_write(bio_op(bio)), 609 blk_status_to_errno(bio->bi_status)); 610 bio_put(bio); 611 } 612 613 /* 614 * Returns 1 if the submission is successful. 615 */ 616 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, 617 unsigned long entry, struct bio *parent) 618 { 619 struct bio *bio; 620 621 bio = bio_alloc(zram->bdev, 1, parent ? parent->bi_opf : REQ_OP_READ, 622 GFP_NOIO); 623 if (!bio) 624 return -ENOMEM; 625 626 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 627 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { 628 bio_put(bio); 629 return -EIO; 630 } 631 632 if (!parent) 633 bio->bi_end_io = zram_page_end_io; 634 else 635 bio_chain(bio, parent); 636 637 submit_bio(bio); 638 return 1; 639 } 640 641 #define PAGE_WB_SIG "page_index=" 642 643 #define PAGE_WRITEBACK 0 644 #define HUGE_WRITEBACK (1<<0) 645 #define IDLE_WRITEBACK (1<<1) 646 647 648 static ssize_t writeback_store(struct device *dev, 649 struct device_attribute *attr, const char *buf, size_t len) 650 { 651 struct zram *zram = dev_to_zram(dev); 652 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 653 unsigned long index = 0; 654 struct bio bio; 655 struct bio_vec bio_vec; 656 struct page *page; 657 ssize_t ret = len; 658 int mode, err; 659 unsigned long blk_idx = 0; 660 661 if (sysfs_streq(buf, "idle")) 662 mode = IDLE_WRITEBACK; 663 else if (sysfs_streq(buf, "huge")) 664 mode = HUGE_WRITEBACK; 665 else if (sysfs_streq(buf, "huge_idle")) 666 mode = IDLE_WRITEBACK | HUGE_WRITEBACK; 667 else { 668 if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) 669 return -EINVAL; 670 671 if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || 672 index >= nr_pages) 673 return -EINVAL; 674 675 nr_pages = 1; 676 mode = PAGE_WRITEBACK; 677 } 678 679 down_read(&zram->init_lock); 680 if (!init_done(zram)) { 681 ret = -EINVAL; 682 goto release_init_lock; 683 } 684 685 if (!zram->backing_dev) { 686 ret = -ENODEV; 687 goto release_init_lock; 688 } 689 690 page = alloc_page(GFP_KERNEL); 691 if (!page) { 692 ret = -ENOMEM; 693 goto release_init_lock; 694 } 695 696 for (; nr_pages != 0; index++, nr_pages--) { 697 struct bio_vec bvec; 698 699 bvec.bv_page = page; 700 bvec.bv_len = PAGE_SIZE; 701 bvec.bv_offset = 0; 702 703 spin_lock(&zram->wb_limit_lock); 704 if (zram->wb_limit_enable && !zram->bd_wb_limit) { 705 spin_unlock(&zram->wb_limit_lock); 706 ret = -EIO; 707 break; 708 } 709 spin_unlock(&zram->wb_limit_lock); 710 711 if (!blk_idx) { 712 blk_idx = alloc_block_bdev(zram); 713 if (!blk_idx) { 714 ret = -ENOSPC; 715 break; 716 } 717 } 718 719 zram_slot_lock(zram, index); 720 if (!zram_allocated(zram, index)) 721 goto next; 722 723 if (zram_test_flag(zram, index, ZRAM_WB) || 724 zram_test_flag(zram, index, ZRAM_SAME) || 725 zram_test_flag(zram, index, ZRAM_UNDER_WB)) 726 goto next; 727 728 if (mode & IDLE_WRITEBACK && 729 !zram_test_flag(zram, index, ZRAM_IDLE)) 730 goto next; 731 if (mode & HUGE_WRITEBACK && 732 !zram_test_flag(zram, index, ZRAM_HUGE)) 733 goto next; 734 /* 735 * Clearing ZRAM_UNDER_WB is duty of caller. 736 * IOW, zram_free_page never clear it. 737 */ 738 zram_set_flag(zram, index, ZRAM_UNDER_WB); 739 /* Need for hugepage writeback racing */ 740 zram_set_flag(zram, index, ZRAM_IDLE); 741 zram_slot_unlock(zram, index); 742 if (zram_bvec_read(zram, &bvec, index, 0, NULL)) { 743 zram_slot_lock(zram, index); 744 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 745 zram_clear_flag(zram, index, ZRAM_IDLE); 746 zram_slot_unlock(zram, index); 747 continue; 748 } 749 750 bio_init(&bio, zram->bdev, &bio_vec, 1, 751 REQ_OP_WRITE | REQ_SYNC); 752 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 753 754 bio_add_page(&bio, bvec.bv_page, bvec.bv_len, 755 bvec.bv_offset); 756 /* 757 * XXX: A single page IO would be inefficient for write 758 * but it would be not bad as starter. 759 */ 760 err = submit_bio_wait(&bio); 761 if (err) { 762 zram_slot_lock(zram, index); 763 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 764 zram_clear_flag(zram, index, ZRAM_IDLE); 765 zram_slot_unlock(zram, index); 766 /* 767 * Return last IO error unless every IO were 768 * not suceeded. 769 */ 770 ret = err; 771 continue; 772 } 773 774 atomic64_inc(&zram->stats.bd_writes); 775 /* 776 * We released zram_slot_lock so need to check if the slot was 777 * changed. If there is freeing for the slot, we can catch it 778 * easily by zram_allocated. 779 * A subtle case is the slot is freed/reallocated/marked as 780 * ZRAM_IDLE again. To close the race, idle_store doesn't 781 * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB. 782 * Thus, we could close the race by checking ZRAM_IDLE bit. 783 */ 784 zram_slot_lock(zram, index); 785 if (!zram_allocated(zram, index) || 786 !zram_test_flag(zram, index, ZRAM_IDLE)) { 787 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 788 zram_clear_flag(zram, index, ZRAM_IDLE); 789 goto next; 790 } 791 792 zram_free_page(zram, index); 793 zram_clear_flag(zram, index, ZRAM_UNDER_WB); 794 zram_set_flag(zram, index, ZRAM_WB); 795 zram_set_element(zram, index, blk_idx); 796 blk_idx = 0; 797 atomic64_inc(&zram->stats.pages_stored); 798 spin_lock(&zram->wb_limit_lock); 799 if (zram->wb_limit_enable && zram->bd_wb_limit > 0) 800 zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); 801 spin_unlock(&zram->wb_limit_lock); 802 next: 803 zram_slot_unlock(zram, index); 804 } 805 806 if (blk_idx) 807 free_block_bdev(zram, blk_idx); 808 __free_page(page); 809 release_init_lock: 810 up_read(&zram->init_lock); 811 812 return ret; 813 } 814 815 struct zram_work { 816 struct work_struct work; 817 struct zram *zram; 818 unsigned long entry; 819 struct bio *bio; 820 struct bio_vec bvec; 821 }; 822 823 #if PAGE_SIZE != 4096 824 static void zram_sync_read(struct work_struct *work) 825 { 826 struct zram_work *zw = container_of(work, struct zram_work, work); 827 struct zram *zram = zw->zram; 828 unsigned long entry = zw->entry; 829 struct bio *bio = zw->bio; 830 831 read_from_bdev_async(zram, &zw->bvec, entry, bio); 832 } 833 834 /* 835 * Block layer want one ->submit_bio to be active at a time, so if we use 836 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 837 * use a worker thread context. 838 */ 839 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 840 unsigned long entry, struct bio *bio) 841 { 842 struct zram_work work; 843 844 work.bvec = *bvec; 845 work.zram = zram; 846 work.entry = entry; 847 work.bio = bio; 848 849 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 850 queue_work(system_unbound_wq, &work.work); 851 flush_work(&work.work); 852 destroy_work_on_stack(&work.work); 853 854 return 1; 855 } 856 #else 857 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 858 unsigned long entry, struct bio *bio) 859 { 860 WARN_ON(1); 861 return -EIO; 862 } 863 #endif 864 865 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 866 unsigned long entry, struct bio *parent, bool sync) 867 { 868 atomic64_inc(&zram->stats.bd_reads); 869 if (sync) 870 return read_from_bdev_sync(zram, bvec, entry, parent); 871 else 872 return read_from_bdev_async(zram, bvec, entry, parent); 873 } 874 #else 875 static inline void reset_bdev(struct zram *zram) {}; 876 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 877 unsigned long entry, struct bio *parent, bool sync) 878 { 879 return -EIO; 880 } 881 882 static void free_block_bdev(struct zram *zram, unsigned long blk_idx) {}; 883 #endif 884 885 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 886 887 static struct dentry *zram_debugfs_root; 888 889 static void zram_debugfs_create(void) 890 { 891 zram_debugfs_root = debugfs_create_dir("zram", NULL); 892 } 893 894 static void zram_debugfs_destroy(void) 895 { 896 debugfs_remove_recursive(zram_debugfs_root); 897 } 898 899 static void zram_accessed(struct zram *zram, u32 index) 900 { 901 zram_clear_flag(zram, index, ZRAM_IDLE); 902 zram->table[index].ac_time = ktime_get_boottime(); 903 } 904 905 static ssize_t read_block_state(struct file *file, char __user *buf, 906 size_t count, loff_t *ppos) 907 { 908 char *kbuf; 909 ssize_t index, written = 0; 910 struct zram *zram = file->private_data; 911 unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; 912 struct timespec64 ts; 913 914 kbuf = kvmalloc(count, GFP_KERNEL); 915 if (!kbuf) 916 return -ENOMEM; 917 918 down_read(&zram->init_lock); 919 if (!init_done(zram)) { 920 up_read(&zram->init_lock); 921 kvfree(kbuf); 922 return -EINVAL; 923 } 924 925 for (index = *ppos; index < nr_pages; index++) { 926 int copied; 927 928 zram_slot_lock(zram, index); 929 if (!zram_allocated(zram, index)) 930 goto next; 931 932 ts = ktime_to_timespec64(zram->table[index].ac_time); 933 copied = snprintf(kbuf + written, count, 934 "%12zd %12lld.%06lu %c%c%c%c\n", 935 index, (s64)ts.tv_sec, 936 ts.tv_nsec / NSEC_PER_USEC, 937 zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.', 938 zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.', 939 zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', 940 zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); 941 942 if (count <= copied) { 943 zram_slot_unlock(zram, index); 944 break; 945 } 946 written += copied; 947 count -= copied; 948 next: 949 zram_slot_unlock(zram, index); 950 *ppos += 1; 951 } 952 953 up_read(&zram->init_lock); 954 if (copy_to_user(buf, kbuf, written)) 955 written = -EFAULT; 956 kvfree(kbuf); 957 958 return written; 959 } 960 961 static const struct file_operations proc_zram_block_state_op = { 962 .open = simple_open, 963 .read = read_block_state, 964 .llseek = default_llseek, 965 }; 966 967 static void zram_debugfs_register(struct zram *zram) 968 { 969 if (!zram_debugfs_root) 970 return; 971 972 zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name, 973 zram_debugfs_root); 974 debugfs_create_file("block_state", 0400, zram->debugfs_dir, 975 zram, &proc_zram_block_state_op); 976 } 977 978 static void zram_debugfs_unregister(struct zram *zram) 979 { 980 debugfs_remove_recursive(zram->debugfs_dir); 981 } 982 #else 983 static void zram_debugfs_create(void) {}; 984 static void zram_debugfs_destroy(void) {}; 985 static void zram_accessed(struct zram *zram, u32 index) 986 { 987 zram_clear_flag(zram, index, ZRAM_IDLE); 988 }; 989 static void zram_debugfs_register(struct zram *zram) {}; 990 static void zram_debugfs_unregister(struct zram *zram) {}; 991 #endif 992 993 /* 994 * We switched to per-cpu streams and this attr is not needed anymore. 995 * However, we will keep it around for some time, because: 996 * a) we may revert per-cpu streams in the future 997 * b) it's visible to user space and we need to follow our 2 years 998 * retirement rule; but we already have a number of 'soon to be 999 * altered' attrs, so max_comp_streams need to wait for the next 1000 * layoff cycle. 1001 */ 1002 static ssize_t max_comp_streams_show(struct device *dev, 1003 struct device_attribute *attr, char *buf) 1004 { 1005 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); 1006 } 1007 1008 static ssize_t max_comp_streams_store(struct device *dev, 1009 struct device_attribute *attr, const char *buf, size_t len) 1010 { 1011 return len; 1012 } 1013 1014 static ssize_t comp_algorithm_show(struct device *dev, 1015 struct device_attribute *attr, char *buf) 1016 { 1017 size_t sz; 1018 struct zram *zram = dev_to_zram(dev); 1019 1020 down_read(&zram->init_lock); 1021 sz = zcomp_available_show(zram->compressor, buf); 1022 up_read(&zram->init_lock); 1023 1024 return sz; 1025 } 1026 1027 static ssize_t comp_algorithm_store(struct device *dev, 1028 struct device_attribute *attr, const char *buf, size_t len) 1029 { 1030 struct zram *zram = dev_to_zram(dev); 1031 char compressor[ARRAY_SIZE(zram->compressor)]; 1032 size_t sz; 1033 1034 strlcpy(compressor, buf, sizeof(compressor)); 1035 /* ignore trailing newline */ 1036 sz = strlen(compressor); 1037 if (sz > 0 && compressor[sz - 1] == '\n') 1038 compressor[sz - 1] = 0x00; 1039 1040 if (!zcomp_available_algorithm(compressor)) 1041 return -EINVAL; 1042 1043 down_write(&zram->init_lock); 1044 if (init_done(zram)) { 1045 up_write(&zram->init_lock); 1046 pr_info("Can't change algorithm for initialized device\n"); 1047 return -EBUSY; 1048 } 1049 1050 strcpy(zram->compressor, compressor); 1051 up_write(&zram->init_lock); 1052 return len; 1053 } 1054 1055 static ssize_t compact_store(struct device *dev, 1056 struct device_attribute *attr, const char *buf, size_t len) 1057 { 1058 struct zram *zram = dev_to_zram(dev); 1059 1060 down_read(&zram->init_lock); 1061 if (!init_done(zram)) { 1062 up_read(&zram->init_lock); 1063 return -EINVAL; 1064 } 1065 1066 zs_compact(zram->mem_pool); 1067 up_read(&zram->init_lock); 1068 1069 return len; 1070 } 1071 1072 static ssize_t io_stat_show(struct device *dev, 1073 struct device_attribute *attr, char *buf) 1074 { 1075 struct zram *zram = dev_to_zram(dev); 1076 ssize_t ret; 1077 1078 down_read(&zram->init_lock); 1079 ret = scnprintf(buf, PAGE_SIZE, 1080 "%8llu %8llu %8llu %8llu\n", 1081 (u64)atomic64_read(&zram->stats.failed_reads), 1082 (u64)atomic64_read(&zram->stats.failed_writes), 1083 (u64)atomic64_read(&zram->stats.invalid_io), 1084 (u64)atomic64_read(&zram->stats.notify_free)); 1085 up_read(&zram->init_lock); 1086 1087 return ret; 1088 } 1089 1090 static ssize_t mm_stat_show(struct device *dev, 1091 struct device_attribute *attr, char *buf) 1092 { 1093 struct zram *zram = dev_to_zram(dev); 1094 struct zs_pool_stats pool_stats; 1095 u64 orig_size, mem_used = 0; 1096 long max_used; 1097 ssize_t ret; 1098 1099 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 1100 1101 down_read(&zram->init_lock); 1102 if (init_done(zram)) { 1103 mem_used = zs_get_total_pages(zram->mem_pool); 1104 zs_pool_stats(zram->mem_pool, &pool_stats); 1105 } 1106 1107 orig_size = atomic64_read(&zram->stats.pages_stored); 1108 max_used = atomic_long_read(&zram->stats.max_used_pages); 1109 1110 ret = scnprintf(buf, PAGE_SIZE, 1111 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu %8llu\n", 1112 orig_size << PAGE_SHIFT, 1113 (u64)atomic64_read(&zram->stats.compr_data_size), 1114 mem_used << PAGE_SHIFT, 1115 zram->limit_pages << PAGE_SHIFT, 1116 max_used << PAGE_SHIFT, 1117 (u64)atomic64_read(&zram->stats.same_pages), 1118 atomic_long_read(&pool_stats.pages_compacted), 1119 (u64)atomic64_read(&zram->stats.huge_pages), 1120 (u64)atomic64_read(&zram->stats.huge_pages_since)); 1121 up_read(&zram->init_lock); 1122 1123 return ret; 1124 } 1125 1126 #ifdef CONFIG_ZRAM_WRITEBACK 1127 #define FOUR_K(x) ((x) * (1 << (PAGE_SHIFT - 12))) 1128 static ssize_t bd_stat_show(struct device *dev, 1129 struct device_attribute *attr, char *buf) 1130 { 1131 struct zram *zram = dev_to_zram(dev); 1132 ssize_t ret; 1133 1134 down_read(&zram->init_lock); 1135 ret = scnprintf(buf, PAGE_SIZE, 1136 "%8llu %8llu %8llu\n", 1137 FOUR_K((u64)atomic64_read(&zram->stats.bd_count)), 1138 FOUR_K((u64)atomic64_read(&zram->stats.bd_reads)), 1139 FOUR_K((u64)atomic64_read(&zram->stats.bd_writes))); 1140 up_read(&zram->init_lock); 1141 1142 return ret; 1143 } 1144 #endif 1145 1146 static ssize_t debug_stat_show(struct device *dev, 1147 struct device_attribute *attr, char *buf) 1148 { 1149 int version = 1; 1150 struct zram *zram = dev_to_zram(dev); 1151 ssize_t ret; 1152 1153 down_read(&zram->init_lock); 1154 ret = scnprintf(buf, PAGE_SIZE, 1155 "version: %d\n%8llu %8llu\n", 1156 version, 1157 (u64)atomic64_read(&zram->stats.writestall), 1158 (u64)atomic64_read(&zram->stats.miss_free)); 1159 up_read(&zram->init_lock); 1160 1161 return ret; 1162 } 1163 1164 static DEVICE_ATTR_RO(io_stat); 1165 static DEVICE_ATTR_RO(mm_stat); 1166 #ifdef CONFIG_ZRAM_WRITEBACK 1167 static DEVICE_ATTR_RO(bd_stat); 1168 #endif 1169 static DEVICE_ATTR_RO(debug_stat); 1170 1171 static void zram_meta_free(struct zram *zram, u64 disksize) 1172 { 1173 size_t num_pages = disksize >> PAGE_SHIFT; 1174 size_t index; 1175 1176 /* Free all pages that are still in this zram device */ 1177 for (index = 0; index < num_pages; index++) 1178 zram_free_page(zram, index); 1179 1180 zs_destroy_pool(zram->mem_pool); 1181 vfree(zram->table); 1182 } 1183 1184 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 1185 { 1186 size_t num_pages; 1187 1188 num_pages = disksize >> PAGE_SHIFT; 1189 zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); 1190 if (!zram->table) 1191 return false; 1192 1193 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 1194 if (!zram->mem_pool) { 1195 vfree(zram->table); 1196 return false; 1197 } 1198 1199 if (!huge_class_size) 1200 huge_class_size = zs_huge_class_size(zram->mem_pool); 1201 return true; 1202 } 1203 1204 /* 1205 * To protect concurrent access to the same index entry, 1206 * caller should hold this table index entry's bit_spinlock to 1207 * indicate this index entry is accessing. 1208 */ 1209 static void zram_free_page(struct zram *zram, size_t index) 1210 { 1211 unsigned long handle; 1212 1213 #ifdef CONFIG_ZRAM_MEMORY_TRACKING 1214 zram->table[index].ac_time = 0; 1215 #endif 1216 if (zram_test_flag(zram, index, ZRAM_IDLE)) 1217 zram_clear_flag(zram, index, ZRAM_IDLE); 1218 1219 if (zram_test_flag(zram, index, ZRAM_HUGE)) { 1220 zram_clear_flag(zram, index, ZRAM_HUGE); 1221 atomic64_dec(&zram->stats.huge_pages); 1222 } 1223 1224 if (zram_test_flag(zram, index, ZRAM_WB)) { 1225 zram_clear_flag(zram, index, ZRAM_WB); 1226 free_block_bdev(zram, zram_get_element(zram, index)); 1227 goto out; 1228 } 1229 1230 /* 1231 * No memory is allocated for same element filled pages. 1232 * Simply clear same page flag. 1233 */ 1234 if (zram_test_flag(zram, index, ZRAM_SAME)) { 1235 zram_clear_flag(zram, index, ZRAM_SAME); 1236 atomic64_dec(&zram->stats.same_pages); 1237 goto out; 1238 } 1239 1240 handle = zram_get_handle(zram, index); 1241 if (!handle) 1242 return; 1243 1244 zs_free(zram->mem_pool, handle); 1245 1246 atomic64_sub(zram_get_obj_size(zram, index), 1247 &zram->stats.compr_data_size); 1248 out: 1249 atomic64_dec(&zram->stats.pages_stored); 1250 zram_set_handle(zram, index, 0); 1251 zram_set_obj_size(zram, index, 0); 1252 WARN_ON_ONCE(zram->table[index].flags & 1253 ~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB)); 1254 } 1255 1256 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, 1257 struct bio *bio, bool partial_io) 1258 { 1259 struct zcomp_strm *zstrm; 1260 unsigned long handle; 1261 unsigned int size; 1262 void *src, *dst; 1263 int ret; 1264 1265 zram_slot_lock(zram, index); 1266 if (zram_test_flag(zram, index, ZRAM_WB)) { 1267 struct bio_vec bvec; 1268 1269 zram_slot_unlock(zram, index); 1270 1271 bvec.bv_page = page; 1272 bvec.bv_len = PAGE_SIZE; 1273 bvec.bv_offset = 0; 1274 return read_from_bdev(zram, &bvec, 1275 zram_get_element(zram, index), 1276 bio, partial_io); 1277 } 1278 1279 handle = zram_get_handle(zram, index); 1280 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { 1281 unsigned long value; 1282 void *mem; 1283 1284 value = handle ? zram_get_element(zram, index) : 0; 1285 mem = kmap_atomic(page); 1286 zram_fill_page(mem, PAGE_SIZE, value); 1287 kunmap_atomic(mem); 1288 zram_slot_unlock(zram, index); 1289 return 0; 1290 } 1291 1292 size = zram_get_obj_size(zram, index); 1293 1294 if (size != PAGE_SIZE) 1295 zstrm = zcomp_stream_get(zram->comp); 1296 1297 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 1298 if (size == PAGE_SIZE) { 1299 dst = kmap_atomic(page); 1300 memcpy(dst, src, PAGE_SIZE); 1301 kunmap_atomic(dst); 1302 ret = 0; 1303 } else { 1304 dst = kmap_atomic(page); 1305 ret = zcomp_decompress(zstrm, src, size, dst); 1306 kunmap_atomic(dst); 1307 zcomp_stream_put(zram->comp); 1308 } 1309 zs_unmap_object(zram->mem_pool, handle); 1310 zram_slot_unlock(zram, index); 1311 1312 /* Should NEVER happen. Return bio error if it does. */ 1313 if (WARN_ON(ret)) 1314 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 1315 1316 return ret; 1317 } 1318 1319 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 1320 u32 index, int offset, struct bio *bio) 1321 { 1322 int ret; 1323 struct page *page; 1324 1325 page = bvec->bv_page; 1326 if (is_partial_io(bvec)) { 1327 /* Use a temporary buffer to decompress the page */ 1328 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1329 if (!page) 1330 return -ENOMEM; 1331 } 1332 1333 ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); 1334 if (unlikely(ret)) 1335 goto out; 1336 1337 if (is_partial_io(bvec)) { 1338 void *src = kmap_atomic(page); 1339 1340 memcpy_to_bvec(bvec, src + offset); 1341 kunmap_atomic(src); 1342 } 1343 out: 1344 if (is_partial_io(bvec)) 1345 __free_page(page); 1346 1347 return ret; 1348 } 1349 1350 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1351 u32 index, struct bio *bio) 1352 { 1353 int ret = 0; 1354 unsigned long alloced_pages; 1355 unsigned long handle = -ENOMEM; 1356 unsigned int comp_len = 0; 1357 void *src, *dst, *mem; 1358 struct zcomp_strm *zstrm; 1359 struct page *page = bvec->bv_page; 1360 unsigned long element = 0; 1361 enum zram_pageflags flags = 0; 1362 1363 mem = kmap_atomic(page); 1364 if (page_same_filled(mem, &element)) { 1365 kunmap_atomic(mem); 1366 /* Free memory associated with this sector now. */ 1367 flags = ZRAM_SAME; 1368 atomic64_inc(&zram->stats.same_pages); 1369 goto out; 1370 } 1371 kunmap_atomic(mem); 1372 1373 compress_again: 1374 zstrm = zcomp_stream_get(zram->comp); 1375 src = kmap_atomic(page); 1376 ret = zcomp_compress(zstrm, src, &comp_len); 1377 kunmap_atomic(src); 1378 1379 if (unlikely(ret)) { 1380 zcomp_stream_put(zram->comp); 1381 pr_err("Compression failed! err=%d\n", ret); 1382 zs_free(zram->mem_pool, handle); 1383 return ret; 1384 } 1385 1386 if (comp_len >= huge_class_size) 1387 comp_len = PAGE_SIZE; 1388 /* 1389 * handle allocation has 2 paths: 1390 * a) fast path is executed with preemption disabled (for 1391 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, 1392 * since we can't sleep; 1393 * b) slow path enables preemption and attempts to allocate 1394 * the page with __GFP_DIRECT_RECLAIM bit set. we have to 1395 * put per-cpu compression stream and, thus, to re-do 1396 * the compression once handle is allocated. 1397 * 1398 * if we have a 'non-null' handle here then we are coming 1399 * from the slow path and handle has already been allocated. 1400 */ 1401 if (IS_ERR((void *)handle)) 1402 handle = zs_malloc(zram->mem_pool, comp_len, 1403 __GFP_KSWAPD_RECLAIM | 1404 __GFP_NOWARN | 1405 __GFP_HIGHMEM | 1406 __GFP_MOVABLE); 1407 if (IS_ERR((void *)handle)) { 1408 zcomp_stream_put(zram->comp); 1409 atomic64_inc(&zram->stats.writestall); 1410 handle = zs_malloc(zram->mem_pool, comp_len, 1411 GFP_NOIO | __GFP_HIGHMEM | 1412 __GFP_MOVABLE); 1413 if (!IS_ERR((void *)handle)) 1414 goto compress_again; 1415 return PTR_ERR((void *)handle); 1416 } 1417 1418 alloced_pages = zs_get_total_pages(zram->mem_pool); 1419 update_used_max(zram, alloced_pages); 1420 1421 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 1422 zcomp_stream_put(zram->comp); 1423 zs_free(zram->mem_pool, handle); 1424 return -ENOMEM; 1425 } 1426 1427 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1428 1429 src = zstrm->buffer; 1430 if (comp_len == PAGE_SIZE) 1431 src = kmap_atomic(page); 1432 memcpy(dst, src, comp_len); 1433 if (comp_len == PAGE_SIZE) 1434 kunmap_atomic(src); 1435 1436 zcomp_stream_put(zram->comp); 1437 zs_unmap_object(zram->mem_pool, handle); 1438 atomic64_add(comp_len, &zram->stats.compr_data_size); 1439 out: 1440 /* 1441 * Free memory associated with this sector 1442 * before overwriting unused sectors. 1443 */ 1444 zram_slot_lock(zram, index); 1445 zram_free_page(zram, index); 1446 1447 if (comp_len == PAGE_SIZE) { 1448 zram_set_flag(zram, index, ZRAM_HUGE); 1449 atomic64_inc(&zram->stats.huge_pages); 1450 atomic64_inc(&zram->stats.huge_pages_since); 1451 } 1452 1453 if (flags) { 1454 zram_set_flag(zram, index, flags); 1455 zram_set_element(zram, index, element); 1456 } else { 1457 zram_set_handle(zram, index, handle); 1458 zram_set_obj_size(zram, index, comp_len); 1459 } 1460 zram_slot_unlock(zram, index); 1461 1462 /* Update stats */ 1463 atomic64_inc(&zram->stats.pages_stored); 1464 return ret; 1465 } 1466 1467 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1468 u32 index, int offset, struct bio *bio) 1469 { 1470 int ret; 1471 struct page *page = NULL; 1472 struct bio_vec vec; 1473 1474 vec = *bvec; 1475 if (is_partial_io(bvec)) { 1476 void *dst; 1477 /* 1478 * This is a partial IO. We need to read the full page 1479 * before to write the changes. 1480 */ 1481 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1482 if (!page) 1483 return -ENOMEM; 1484 1485 ret = __zram_bvec_read(zram, page, index, bio, true); 1486 if (ret) 1487 goto out; 1488 1489 dst = kmap_atomic(page); 1490 memcpy_from_bvec(dst + offset, bvec); 1491 kunmap_atomic(dst); 1492 1493 vec.bv_page = page; 1494 vec.bv_len = PAGE_SIZE; 1495 vec.bv_offset = 0; 1496 } 1497 1498 ret = __zram_bvec_write(zram, &vec, index, bio); 1499 out: 1500 if (is_partial_io(bvec)) 1501 __free_page(page); 1502 return ret; 1503 } 1504 1505 /* 1506 * zram_bio_discard - handler on discard request 1507 * @index: physical block index in PAGE_SIZE units 1508 * @offset: byte offset within physical block 1509 */ 1510 static void zram_bio_discard(struct zram *zram, u32 index, 1511 int offset, struct bio *bio) 1512 { 1513 size_t n = bio->bi_iter.bi_size; 1514 1515 /* 1516 * zram manages data in physical block size units. Because logical block 1517 * size isn't identical with physical block size on some arch, we 1518 * could get a discard request pointing to a specific offset within a 1519 * certain physical block. Although we can handle this request by 1520 * reading that physiclal block and decompressing and partially zeroing 1521 * and re-compressing and then re-storing it, this isn't reasonable 1522 * because our intent with a discard request is to save memory. So 1523 * skipping this logical block is appropriate here. 1524 */ 1525 if (offset) { 1526 if (n <= (PAGE_SIZE - offset)) 1527 return; 1528 1529 n -= (PAGE_SIZE - offset); 1530 index++; 1531 } 1532 1533 while (n >= PAGE_SIZE) { 1534 zram_slot_lock(zram, index); 1535 zram_free_page(zram, index); 1536 zram_slot_unlock(zram, index); 1537 atomic64_inc(&zram->stats.notify_free); 1538 index++; 1539 n -= PAGE_SIZE; 1540 } 1541 } 1542 1543 /* 1544 * Returns errno if it has some problem. Otherwise return 0 or 1. 1545 * Returns 0 if IO request was done synchronously 1546 * Returns 1 if IO request was successfully submitted. 1547 */ 1548 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 1549 int offset, enum req_op op, struct bio *bio) 1550 { 1551 int ret; 1552 1553 if (!op_is_write(op)) { 1554 atomic64_inc(&zram->stats.num_reads); 1555 ret = zram_bvec_read(zram, bvec, index, offset, bio); 1556 flush_dcache_page(bvec->bv_page); 1557 } else { 1558 atomic64_inc(&zram->stats.num_writes); 1559 ret = zram_bvec_write(zram, bvec, index, offset, bio); 1560 } 1561 1562 zram_slot_lock(zram, index); 1563 zram_accessed(zram, index); 1564 zram_slot_unlock(zram, index); 1565 1566 if (unlikely(ret < 0)) { 1567 if (!op_is_write(op)) 1568 atomic64_inc(&zram->stats.failed_reads); 1569 else 1570 atomic64_inc(&zram->stats.failed_writes); 1571 } 1572 1573 return ret; 1574 } 1575 1576 static void __zram_make_request(struct zram *zram, struct bio *bio) 1577 { 1578 int offset; 1579 u32 index; 1580 struct bio_vec bvec; 1581 struct bvec_iter iter; 1582 unsigned long start_time; 1583 1584 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1585 offset = (bio->bi_iter.bi_sector & 1586 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1587 1588 switch (bio_op(bio)) { 1589 case REQ_OP_DISCARD: 1590 case REQ_OP_WRITE_ZEROES: 1591 zram_bio_discard(zram, index, offset, bio); 1592 bio_endio(bio); 1593 return; 1594 default: 1595 break; 1596 } 1597 1598 start_time = bio_start_io_acct(bio); 1599 bio_for_each_segment(bvec, bio, iter) { 1600 struct bio_vec bv = bvec; 1601 unsigned int unwritten = bvec.bv_len; 1602 1603 do { 1604 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, 1605 unwritten); 1606 if (zram_bvec_rw(zram, &bv, index, offset, 1607 bio_op(bio), bio) < 0) { 1608 bio->bi_status = BLK_STS_IOERR; 1609 break; 1610 } 1611 1612 bv.bv_offset += bv.bv_len; 1613 unwritten -= bv.bv_len; 1614 1615 update_position(&index, &offset, &bv); 1616 } while (unwritten); 1617 } 1618 bio_end_io_acct(bio, start_time); 1619 bio_endio(bio); 1620 } 1621 1622 /* 1623 * Handler function for all zram I/O requests. 1624 */ 1625 static void zram_submit_bio(struct bio *bio) 1626 { 1627 struct zram *zram = bio->bi_bdev->bd_disk->private_data; 1628 1629 if (!valid_io_request(zram, bio->bi_iter.bi_sector, 1630 bio->bi_iter.bi_size)) { 1631 atomic64_inc(&zram->stats.invalid_io); 1632 bio_io_error(bio); 1633 return; 1634 } 1635 1636 __zram_make_request(zram, bio); 1637 } 1638 1639 static void zram_slot_free_notify(struct block_device *bdev, 1640 unsigned long index) 1641 { 1642 struct zram *zram; 1643 1644 zram = bdev->bd_disk->private_data; 1645 1646 atomic64_inc(&zram->stats.notify_free); 1647 if (!zram_slot_trylock(zram, index)) { 1648 atomic64_inc(&zram->stats.miss_free); 1649 return; 1650 } 1651 1652 zram_free_page(zram, index); 1653 zram_slot_unlock(zram, index); 1654 } 1655 1656 static int zram_rw_page(struct block_device *bdev, sector_t sector, 1657 struct page *page, enum req_op op) 1658 { 1659 int offset, ret; 1660 u32 index; 1661 struct zram *zram; 1662 struct bio_vec bv; 1663 unsigned long start_time; 1664 1665 if (PageTransHuge(page)) 1666 return -ENOTSUPP; 1667 zram = bdev->bd_disk->private_data; 1668 1669 if (!valid_io_request(zram, sector, PAGE_SIZE)) { 1670 atomic64_inc(&zram->stats.invalid_io); 1671 ret = -EINVAL; 1672 goto out; 1673 } 1674 1675 index = sector >> SECTORS_PER_PAGE_SHIFT; 1676 offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1677 1678 bv.bv_page = page; 1679 bv.bv_len = PAGE_SIZE; 1680 bv.bv_offset = 0; 1681 1682 start_time = bdev_start_io_acct(bdev->bd_disk->part0, 1683 SECTORS_PER_PAGE, op, jiffies); 1684 ret = zram_bvec_rw(zram, &bv, index, offset, op, NULL); 1685 bdev_end_io_acct(bdev->bd_disk->part0, op, start_time); 1686 out: 1687 /* 1688 * If I/O fails, just return error(ie, non-zero) without 1689 * calling page_endio. 1690 * It causes resubmit the I/O with bio request by upper functions 1691 * of rw_page(e.g., swap_readpage, __swap_writepage) and 1692 * bio->bi_end_io does things to handle the error 1693 * (e.g., SetPageError, set_page_dirty and extra works). 1694 */ 1695 if (unlikely(ret < 0)) 1696 return ret; 1697 1698 switch (ret) { 1699 case 0: 1700 page_endio(page, op_is_write(op), 0); 1701 break; 1702 case 1: 1703 ret = 0; 1704 break; 1705 default: 1706 WARN_ON(1); 1707 } 1708 return ret; 1709 } 1710 1711 static void zram_reset_device(struct zram *zram) 1712 { 1713 struct zcomp *comp; 1714 u64 disksize; 1715 1716 down_write(&zram->init_lock); 1717 1718 zram->limit_pages = 0; 1719 1720 if (!init_done(zram)) { 1721 up_write(&zram->init_lock); 1722 return; 1723 } 1724 1725 comp = zram->comp; 1726 disksize = zram->disksize; 1727 zram->disksize = 0; 1728 1729 set_capacity_and_notify(zram->disk, 0); 1730 part_stat_set_all(zram->disk->part0, 0); 1731 1732 /* I/O operation under all of CPU are done so let's free */ 1733 zram_meta_free(zram, disksize); 1734 memset(&zram->stats, 0, sizeof(zram->stats)); 1735 zcomp_destroy(comp); 1736 reset_bdev(zram); 1737 1738 up_write(&zram->init_lock); 1739 } 1740 1741 static ssize_t disksize_store(struct device *dev, 1742 struct device_attribute *attr, const char *buf, size_t len) 1743 { 1744 u64 disksize; 1745 struct zcomp *comp; 1746 struct zram *zram = dev_to_zram(dev); 1747 int err; 1748 1749 disksize = memparse(buf, NULL); 1750 if (!disksize) 1751 return -EINVAL; 1752 1753 down_write(&zram->init_lock); 1754 if (init_done(zram)) { 1755 pr_info("Cannot change disksize for initialized device\n"); 1756 err = -EBUSY; 1757 goto out_unlock; 1758 } 1759 1760 disksize = PAGE_ALIGN(disksize); 1761 if (!zram_meta_alloc(zram, disksize)) { 1762 err = -ENOMEM; 1763 goto out_unlock; 1764 } 1765 1766 comp = zcomp_create(zram->compressor); 1767 if (IS_ERR(comp)) { 1768 pr_err("Cannot initialise %s compressing backend\n", 1769 zram->compressor); 1770 err = PTR_ERR(comp); 1771 goto out_free_meta; 1772 } 1773 1774 zram->comp = comp; 1775 zram->disksize = disksize; 1776 set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 1777 up_write(&zram->init_lock); 1778 1779 return len; 1780 1781 out_free_meta: 1782 zram_meta_free(zram, disksize); 1783 out_unlock: 1784 up_write(&zram->init_lock); 1785 return err; 1786 } 1787 1788 static ssize_t reset_store(struct device *dev, 1789 struct device_attribute *attr, const char *buf, size_t len) 1790 { 1791 int ret; 1792 unsigned short do_reset; 1793 struct zram *zram; 1794 struct gendisk *disk; 1795 1796 ret = kstrtou16(buf, 10, &do_reset); 1797 if (ret) 1798 return ret; 1799 1800 if (!do_reset) 1801 return -EINVAL; 1802 1803 zram = dev_to_zram(dev); 1804 disk = zram->disk; 1805 1806 mutex_lock(&disk->open_mutex); 1807 /* Do not reset an active device or claimed device */ 1808 if (disk_openers(disk) || zram->claim) { 1809 mutex_unlock(&disk->open_mutex); 1810 return -EBUSY; 1811 } 1812 1813 /* From now on, anyone can't open /dev/zram[0-9] */ 1814 zram->claim = true; 1815 mutex_unlock(&disk->open_mutex); 1816 1817 /* Make sure all the pending I/O are finished */ 1818 sync_blockdev(disk->part0); 1819 zram_reset_device(zram); 1820 1821 mutex_lock(&disk->open_mutex); 1822 zram->claim = false; 1823 mutex_unlock(&disk->open_mutex); 1824 1825 return len; 1826 } 1827 1828 static int zram_open(struct block_device *bdev, fmode_t mode) 1829 { 1830 int ret = 0; 1831 struct zram *zram; 1832 1833 WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex)); 1834 1835 zram = bdev->bd_disk->private_data; 1836 /* zram was claimed to reset so open request fails */ 1837 if (zram->claim) 1838 ret = -EBUSY; 1839 1840 return ret; 1841 } 1842 1843 static const struct block_device_operations zram_devops = { 1844 .open = zram_open, 1845 .submit_bio = zram_submit_bio, 1846 .swap_slot_free_notify = zram_slot_free_notify, 1847 .rw_page = zram_rw_page, 1848 .owner = THIS_MODULE 1849 }; 1850 1851 #ifdef CONFIG_ZRAM_WRITEBACK 1852 static const struct block_device_operations zram_wb_devops = { 1853 .open = zram_open, 1854 .submit_bio = zram_submit_bio, 1855 .swap_slot_free_notify = zram_slot_free_notify, 1856 .owner = THIS_MODULE 1857 }; 1858 #endif 1859 1860 static DEVICE_ATTR_WO(compact); 1861 static DEVICE_ATTR_RW(disksize); 1862 static DEVICE_ATTR_RO(initstate); 1863 static DEVICE_ATTR_WO(reset); 1864 static DEVICE_ATTR_WO(mem_limit); 1865 static DEVICE_ATTR_WO(mem_used_max); 1866 static DEVICE_ATTR_WO(idle); 1867 static DEVICE_ATTR_RW(max_comp_streams); 1868 static DEVICE_ATTR_RW(comp_algorithm); 1869 #ifdef CONFIG_ZRAM_WRITEBACK 1870 static DEVICE_ATTR_RW(backing_dev); 1871 static DEVICE_ATTR_WO(writeback); 1872 static DEVICE_ATTR_RW(writeback_limit); 1873 static DEVICE_ATTR_RW(writeback_limit_enable); 1874 #endif 1875 1876 static struct attribute *zram_disk_attrs[] = { 1877 &dev_attr_disksize.attr, 1878 &dev_attr_initstate.attr, 1879 &dev_attr_reset.attr, 1880 &dev_attr_compact.attr, 1881 &dev_attr_mem_limit.attr, 1882 &dev_attr_mem_used_max.attr, 1883 &dev_attr_idle.attr, 1884 &dev_attr_max_comp_streams.attr, 1885 &dev_attr_comp_algorithm.attr, 1886 #ifdef CONFIG_ZRAM_WRITEBACK 1887 &dev_attr_backing_dev.attr, 1888 &dev_attr_writeback.attr, 1889 &dev_attr_writeback_limit.attr, 1890 &dev_attr_writeback_limit_enable.attr, 1891 #endif 1892 &dev_attr_io_stat.attr, 1893 &dev_attr_mm_stat.attr, 1894 #ifdef CONFIG_ZRAM_WRITEBACK 1895 &dev_attr_bd_stat.attr, 1896 #endif 1897 &dev_attr_debug_stat.attr, 1898 NULL, 1899 }; 1900 1901 ATTRIBUTE_GROUPS(zram_disk); 1902 1903 /* 1904 * Allocate and initialize new zram device. the function returns 1905 * '>= 0' device_id upon success, and negative value otherwise. 1906 */ 1907 static int zram_add(void) 1908 { 1909 struct zram *zram; 1910 int ret, device_id; 1911 1912 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 1913 if (!zram) 1914 return -ENOMEM; 1915 1916 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 1917 if (ret < 0) 1918 goto out_free_dev; 1919 device_id = ret; 1920 1921 init_rwsem(&zram->init_lock); 1922 #ifdef CONFIG_ZRAM_WRITEBACK 1923 spin_lock_init(&zram->wb_limit_lock); 1924 #endif 1925 1926 /* gendisk structure */ 1927 zram->disk = blk_alloc_disk(NUMA_NO_NODE); 1928 if (!zram->disk) { 1929 pr_err("Error allocating disk structure for device %d\n", 1930 device_id); 1931 ret = -ENOMEM; 1932 goto out_free_idr; 1933 } 1934 1935 zram->disk->major = zram_major; 1936 zram->disk->first_minor = device_id; 1937 zram->disk->minors = 1; 1938 zram->disk->flags |= GENHD_FL_NO_PART; 1939 zram->disk->fops = &zram_devops; 1940 zram->disk->private_data = zram; 1941 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 1942 1943 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1944 set_capacity(zram->disk, 0); 1945 /* zram devices sort of resembles non-rotational disks */ 1946 blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue); 1947 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1948 1949 /* 1950 * To ensure that we always get PAGE_SIZE aligned 1951 * and n*PAGE_SIZED sized I/O requests. 1952 */ 1953 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 1954 blk_queue_logical_block_size(zram->disk->queue, 1955 ZRAM_LOGICAL_BLOCK_SIZE); 1956 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 1957 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1958 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1959 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 1960 1961 /* 1962 * zram_bio_discard() will clear all logical blocks if logical block 1963 * size is identical with physical block size(PAGE_SIZE). But if it is 1964 * different, we will skip discarding some parts of logical blocks in 1965 * the part of the request range which isn't aligned to physical block 1966 * size. So we can't ensure that all discarded logical blocks are 1967 * zeroed. 1968 */ 1969 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 1970 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); 1971 1972 blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue); 1973 ret = device_add_disk(NULL, zram->disk, zram_disk_groups); 1974 if (ret) 1975 goto out_cleanup_disk; 1976 1977 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1978 1979 zram_debugfs_register(zram); 1980 pr_info("Added device: %s\n", zram->disk->disk_name); 1981 return device_id; 1982 1983 out_cleanup_disk: 1984 put_disk(zram->disk); 1985 out_free_idr: 1986 idr_remove(&zram_index_idr, device_id); 1987 out_free_dev: 1988 kfree(zram); 1989 return ret; 1990 } 1991 1992 static int zram_remove(struct zram *zram) 1993 { 1994 bool claimed; 1995 1996 mutex_lock(&zram->disk->open_mutex); 1997 if (disk_openers(zram->disk)) { 1998 mutex_unlock(&zram->disk->open_mutex); 1999 return -EBUSY; 2000 } 2001 2002 claimed = zram->claim; 2003 if (!claimed) 2004 zram->claim = true; 2005 mutex_unlock(&zram->disk->open_mutex); 2006 2007 zram_debugfs_unregister(zram); 2008 2009 if (claimed) { 2010 /* 2011 * If we were claimed by reset_store(), del_gendisk() will 2012 * wait until reset_store() is done, so nothing need to do. 2013 */ 2014 ; 2015 } else { 2016 /* Make sure all the pending I/O are finished */ 2017 sync_blockdev(zram->disk->part0); 2018 zram_reset_device(zram); 2019 } 2020 2021 pr_info("Removed device: %s\n", zram->disk->disk_name); 2022 2023 del_gendisk(zram->disk); 2024 2025 /* del_gendisk drains pending reset_store */ 2026 WARN_ON_ONCE(claimed && zram->claim); 2027 2028 /* 2029 * disksize_store() may be called in between zram_reset_device() 2030 * and del_gendisk(), so run the last reset to avoid leaking 2031 * anything allocated with disksize_store() 2032 */ 2033 zram_reset_device(zram); 2034 2035 put_disk(zram->disk); 2036 kfree(zram); 2037 return 0; 2038 } 2039 2040 /* zram-control sysfs attributes */ 2041 2042 /* 2043 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 2044 * sense that reading from this file does alter the state of your system -- it 2045 * creates a new un-initialized zram device and returns back this device's 2046 * device_id (or an error code if it fails to create a new device). 2047 */ 2048 static ssize_t hot_add_show(struct class *class, 2049 struct class_attribute *attr, 2050 char *buf) 2051 { 2052 int ret; 2053 2054 mutex_lock(&zram_index_mutex); 2055 ret = zram_add(); 2056 mutex_unlock(&zram_index_mutex); 2057 2058 if (ret < 0) 2059 return ret; 2060 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 2061 } 2062 static struct class_attribute class_attr_hot_add = 2063 __ATTR(hot_add, 0400, hot_add_show, NULL); 2064 2065 static ssize_t hot_remove_store(struct class *class, 2066 struct class_attribute *attr, 2067 const char *buf, 2068 size_t count) 2069 { 2070 struct zram *zram; 2071 int ret, dev_id; 2072 2073 /* dev_id is gendisk->first_minor, which is `int' */ 2074 ret = kstrtoint(buf, 10, &dev_id); 2075 if (ret) 2076 return ret; 2077 if (dev_id < 0) 2078 return -EINVAL; 2079 2080 mutex_lock(&zram_index_mutex); 2081 2082 zram = idr_find(&zram_index_idr, dev_id); 2083 if (zram) { 2084 ret = zram_remove(zram); 2085 if (!ret) 2086 idr_remove(&zram_index_idr, dev_id); 2087 } else { 2088 ret = -ENODEV; 2089 } 2090 2091 mutex_unlock(&zram_index_mutex); 2092 return ret ? ret : count; 2093 } 2094 static CLASS_ATTR_WO(hot_remove); 2095 2096 static struct attribute *zram_control_class_attrs[] = { 2097 &class_attr_hot_add.attr, 2098 &class_attr_hot_remove.attr, 2099 NULL, 2100 }; 2101 ATTRIBUTE_GROUPS(zram_control_class); 2102 2103 static struct class zram_control_class = { 2104 .name = "zram-control", 2105 .owner = THIS_MODULE, 2106 .class_groups = zram_control_class_groups, 2107 }; 2108 2109 static int zram_remove_cb(int id, void *ptr, void *data) 2110 { 2111 WARN_ON_ONCE(zram_remove(ptr)); 2112 return 0; 2113 } 2114 2115 static void destroy_devices(void) 2116 { 2117 class_unregister(&zram_control_class); 2118 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 2119 zram_debugfs_destroy(); 2120 idr_destroy(&zram_index_idr); 2121 unregister_blkdev(zram_major, "zram"); 2122 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2123 } 2124 2125 static int __init zram_init(void) 2126 { 2127 int ret; 2128 2129 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 2130 zcomp_cpu_up_prepare, zcomp_cpu_dead); 2131 if (ret < 0) 2132 return ret; 2133 2134 ret = class_register(&zram_control_class); 2135 if (ret) { 2136 pr_err("Unable to register zram-control class\n"); 2137 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2138 return ret; 2139 } 2140 2141 zram_debugfs_create(); 2142 zram_major = register_blkdev(0, "zram"); 2143 if (zram_major <= 0) { 2144 pr_err("Unable to get major number\n"); 2145 class_unregister(&zram_control_class); 2146 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 2147 return -EBUSY; 2148 } 2149 2150 while (num_devices != 0) { 2151 mutex_lock(&zram_index_mutex); 2152 ret = zram_add(); 2153 mutex_unlock(&zram_index_mutex); 2154 if (ret < 0) 2155 goto out_error; 2156 num_devices--; 2157 } 2158 2159 return 0; 2160 2161 out_error: 2162 destroy_devices(); 2163 return ret; 2164 } 2165 2166 static void __exit zram_exit(void) 2167 { 2168 destroy_devices(); 2169 } 2170 2171 module_init(zram_init); 2172 module_exit(zram_exit); 2173 2174 module_param(num_devices, uint, 0); 2175 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 2176 2177 MODULE_LICENSE("Dual BSD/GPL"); 2178 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 2179 MODULE_DESCRIPTION("Compressed RAM Block Device"); 2180