1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/genhd.h> 26 #include <linux/highmem.h> 27 #include <linux/slab.h> 28 #include <linux/backing-dev.h> 29 #include <linux/string.h> 30 #include <linux/vmalloc.h> 31 #include <linux/err.h> 32 #include <linux/idr.h> 33 #include <linux/sysfs.h> 34 #include <linux/cpuhotplug.h> 35 36 #include "zram_drv.h" 37 38 static DEFINE_IDR(zram_index_idr); 39 /* idr index must be protected */ 40 static DEFINE_MUTEX(zram_index_mutex); 41 42 static int zram_major; 43 static const char *default_compressor = "lzo"; 44 45 /* Module params (documentation at end) */ 46 static unsigned int num_devices = 1; 47 48 static void zram_free_page(struct zram *zram, size_t index); 49 50 static inline bool init_done(struct zram *zram) 51 { 52 return zram->disksize; 53 } 54 55 static inline struct zram *dev_to_zram(struct device *dev) 56 { 57 return (struct zram *)dev_to_disk(dev)->private_data; 58 } 59 60 static unsigned long zram_get_handle(struct zram *zram, u32 index) 61 { 62 return zram->table[index].handle; 63 } 64 65 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 66 { 67 zram->table[index].handle = handle; 68 } 69 70 /* flag operations require table entry bit_spin_lock() being held */ 71 static int zram_test_flag(struct zram *zram, u32 index, 72 enum zram_pageflags flag) 73 { 74 return zram->table[index].value & BIT(flag); 75 } 76 77 static void zram_set_flag(struct zram *zram, u32 index, 78 enum zram_pageflags flag) 79 { 80 zram->table[index].value |= BIT(flag); 81 } 82 83 static void zram_clear_flag(struct zram *zram, u32 index, 84 enum zram_pageflags flag) 85 { 86 zram->table[index].value &= ~BIT(flag); 87 } 88 89 static inline void zram_set_element(struct zram *zram, u32 index, 90 unsigned long element) 91 { 92 zram->table[index].element = element; 93 } 94 95 static unsigned long zram_get_element(struct zram *zram, u32 index) 96 { 97 return zram->table[index].element; 98 } 99 100 static size_t zram_get_obj_size(struct zram *zram, u32 index) 101 { 102 return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); 103 } 104 105 static void zram_set_obj_size(struct zram *zram, 106 u32 index, size_t size) 107 { 108 unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT; 109 110 zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; 111 } 112 113 #if PAGE_SIZE != 4096 114 static inline bool is_partial_io(struct bio_vec *bvec) 115 { 116 return bvec->bv_len != PAGE_SIZE; 117 } 118 #else 119 static inline bool is_partial_io(struct bio_vec *bvec) 120 { 121 return false; 122 } 123 #endif 124 125 static void zram_revalidate_disk(struct zram *zram) 126 { 127 revalidate_disk(zram->disk); 128 /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ 129 zram->disk->queue->backing_dev_info->capabilities |= 130 BDI_CAP_STABLE_WRITES; 131 } 132 133 /* 134 * Check if request is within bounds and aligned on zram logical blocks. 135 */ 136 static inline bool valid_io_request(struct zram *zram, 137 sector_t start, unsigned int size) 138 { 139 u64 end, bound; 140 141 /* unaligned request */ 142 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 143 return false; 144 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 145 return false; 146 147 end = start + (size >> SECTOR_SHIFT); 148 bound = zram->disksize >> SECTOR_SHIFT; 149 /* out of range range */ 150 if (unlikely(start >= bound || end > bound || start > end)) 151 return false; 152 153 /* I/O request is valid */ 154 return true; 155 } 156 157 static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 158 { 159 *index += (*offset + bvec->bv_len) / PAGE_SIZE; 160 *offset = (*offset + bvec->bv_len) % PAGE_SIZE; 161 } 162 163 static inline void update_used_max(struct zram *zram, 164 const unsigned long pages) 165 { 166 unsigned long old_max, cur_max; 167 168 old_max = atomic_long_read(&zram->stats.max_used_pages); 169 170 do { 171 cur_max = old_max; 172 if (pages > cur_max) 173 old_max = atomic_long_cmpxchg( 174 &zram->stats.max_used_pages, cur_max, pages); 175 } while (old_max != cur_max); 176 } 177 178 static inline void zram_fill_page(void *ptr, unsigned long len, 179 unsigned long value) 180 { 181 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 182 memset_l(ptr, value, len / sizeof(unsigned long)); 183 } 184 185 static bool page_same_filled(void *ptr, unsigned long *element) 186 { 187 unsigned int pos; 188 unsigned long *page; 189 unsigned long val; 190 191 page = (unsigned long *)ptr; 192 val = page[0]; 193 194 for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { 195 if (val != page[pos]) 196 return false; 197 } 198 199 *element = val; 200 201 return true; 202 } 203 204 static ssize_t initstate_show(struct device *dev, 205 struct device_attribute *attr, char *buf) 206 { 207 u32 val; 208 struct zram *zram = dev_to_zram(dev); 209 210 down_read(&zram->init_lock); 211 val = init_done(zram); 212 up_read(&zram->init_lock); 213 214 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 215 } 216 217 static ssize_t disksize_show(struct device *dev, 218 struct device_attribute *attr, char *buf) 219 { 220 struct zram *zram = dev_to_zram(dev); 221 222 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 223 } 224 225 static ssize_t mem_limit_store(struct device *dev, 226 struct device_attribute *attr, const char *buf, size_t len) 227 { 228 u64 limit; 229 char *tmp; 230 struct zram *zram = dev_to_zram(dev); 231 232 limit = memparse(buf, &tmp); 233 if (buf == tmp) /* no chars parsed, invalid input */ 234 return -EINVAL; 235 236 down_write(&zram->init_lock); 237 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 238 up_write(&zram->init_lock); 239 240 return len; 241 } 242 243 static ssize_t mem_used_max_store(struct device *dev, 244 struct device_attribute *attr, const char *buf, size_t len) 245 { 246 int err; 247 unsigned long val; 248 struct zram *zram = dev_to_zram(dev); 249 250 err = kstrtoul(buf, 10, &val); 251 if (err || val != 0) 252 return -EINVAL; 253 254 down_read(&zram->init_lock); 255 if (init_done(zram)) { 256 atomic_long_set(&zram->stats.max_used_pages, 257 zs_get_total_pages(zram->mem_pool)); 258 } 259 up_read(&zram->init_lock); 260 261 return len; 262 } 263 264 #ifdef CONFIG_ZRAM_WRITEBACK 265 static bool zram_wb_enabled(struct zram *zram) 266 { 267 return zram->backing_dev; 268 } 269 270 static void reset_bdev(struct zram *zram) 271 { 272 struct block_device *bdev; 273 274 if (!zram_wb_enabled(zram)) 275 return; 276 277 bdev = zram->bdev; 278 if (zram->old_block_size) 279 set_blocksize(bdev, zram->old_block_size); 280 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 281 /* hope filp_close flush all of IO */ 282 filp_close(zram->backing_dev, NULL); 283 zram->backing_dev = NULL; 284 zram->old_block_size = 0; 285 zram->bdev = NULL; 286 287 kvfree(zram->bitmap); 288 zram->bitmap = NULL; 289 } 290 291 static ssize_t backing_dev_show(struct device *dev, 292 struct device_attribute *attr, char *buf) 293 { 294 struct zram *zram = dev_to_zram(dev); 295 struct file *file = zram->backing_dev; 296 char *p; 297 ssize_t ret; 298 299 down_read(&zram->init_lock); 300 if (!zram_wb_enabled(zram)) { 301 memcpy(buf, "none\n", 5); 302 up_read(&zram->init_lock); 303 return 5; 304 } 305 306 p = file_path(file, buf, PAGE_SIZE - 1); 307 if (IS_ERR(p)) { 308 ret = PTR_ERR(p); 309 goto out; 310 } 311 312 ret = strlen(p); 313 memmove(buf, p, ret); 314 buf[ret++] = '\n'; 315 out: 316 up_read(&zram->init_lock); 317 return ret; 318 } 319 320 static ssize_t backing_dev_store(struct device *dev, 321 struct device_attribute *attr, const char *buf, size_t len) 322 { 323 char *file_name; 324 struct file *backing_dev = NULL; 325 struct inode *inode; 326 struct address_space *mapping; 327 unsigned int bitmap_sz, old_block_size = 0; 328 unsigned long nr_pages, *bitmap = NULL; 329 struct block_device *bdev = NULL; 330 int err; 331 struct zram *zram = dev_to_zram(dev); 332 333 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 334 if (!file_name) 335 return -ENOMEM; 336 337 down_write(&zram->init_lock); 338 if (init_done(zram)) { 339 pr_info("Can't setup backing device for initialized device\n"); 340 err = -EBUSY; 341 goto out; 342 } 343 344 strlcpy(file_name, buf, len); 345 346 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); 347 if (IS_ERR(backing_dev)) { 348 err = PTR_ERR(backing_dev); 349 backing_dev = NULL; 350 goto out; 351 } 352 353 mapping = backing_dev->f_mapping; 354 inode = mapping->host; 355 356 /* Support only block device in this moment */ 357 if (!S_ISBLK(inode->i_mode)) { 358 err = -ENOTBLK; 359 goto out; 360 } 361 362 bdev = bdgrab(I_BDEV(inode)); 363 err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); 364 if (err < 0) 365 goto out; 366 367 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 368 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 369 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 370 if (!bitmap) { 371 err = -ENOMEM; 372 goto out; 373 } 374 375 old_block_size = block_size(bdev); 376 err = set_blocksize(bdev, PAGE_SIZE); 377 if (err) 378 goto out; 379 380 reset_bdev(zram); 381 spin_lock_init(&zram->bitmap_lock); 382 383 zram->old_block_size = old_block_size; 384 zram->bdev = bdev; 385 zram->backing_dev = backing_dev; 386 zram->bitmap = bitmap; 387 zram->nr_pages = nr_pages; 388 up_write(&zram->init_lock); 389 390 pr_info("setup backing device %s\n", file_name); 391 kfree(file_name); 392 393 return len; 394 out: 395 if (bitmap) 396 kvfree(bitmap); 397 398 if (bdev) 399 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 400 401 if (backing_dev) 402 filp_close(backing_dev, NULL); 403 404 up_write(&zram->init_lock); 405 406 kfree(file_name); 407 408 return err; 409 } 410 411 static unsigned long get_entry_bdev(struct zram *zram) 412 { 413 unsigned long entry; 414 415 spin_lock(&zram->bitmap_lock); 416 /* skip 0 bit to confuse zram.handle = 0 */ 417 entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1); 418 if (entry == zram->nr_pages) { 419 spin_unlock(&zram->bitmap_lock); 420 return 0; 421 } 422 423 set_bit(entry, zram->bitmap); 424 spin_unlock(&zram->bitmap_lock); 425 426 return entry; 427 } 428 429 static void put_entry_bdev(struct zram *zram, unsigned long entry) 430 { 431 int was_set; 432 433 spin_lock(&zram->bitmap_lock); 434 was_set = test_and_clear_bit(entry, zram->bitmap); 435 spin_unlock(&zram->bitmap_lock); 436 WARN_ON_ONCE(!was_set); 437 } 438 439 void zram_page_end_io(struct bio *bio) 440 { 441 struct page *page = bio->bi_io_vec[0].bv_page; 442 443 page_endio(page, op_is_write(bio_op(bio)), 444 blk_status_to_errno(bio->bi_status)); 445 bio_put(bio); 446 } 447 448 /* 449 * Returns 1 if the submission is successful. 450 */ 451 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, 452 unsigned long entry, struct bio *parent) 453 { 454 struct bio *bio; 455 456 bio = bio_alloc(GFP_ATOMIC, 1); 457 if (!bio) 458 return -ENOMEM; 459 460 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 461 bio_set_dev(bio, zram->bdev); 462 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { 463 bio_put(bio); 464 return -EIO; 465 } 466 467 if (!parent) { 468 bio->bi_opf = REQ_OP_READ; 469 bio->bi_end_io = zram_page_end_io; 470 } else { 471 bio->bi_opf = parent->bi_opf; 472 bio_chain(bio, parent); 473 } 474 475 submit_bio(bio); 476 return 1; 477 } 478 479 struct zram_work { 480 struct work_struct work; 481 struct zram *zram; 482 unsigned long entry; 483 struct bio *bio; 484 }; 485 486 #if PAGE_SIZE != 4096 487 static void zram_sync_read(struct work_struct *work) 488 { 489 struct bio_vec bvec; 490 struct zram_work *zw = container_of(work, struct zram_work, work); 491 struct zram *zram = zw->zram; 492 unsigned long entry = zw->entry; 493 struct bio *bio = zw->bio; 494 495 read_from_bdev_async(zram, &bvec, entry, bio); 496 } 497 498 /* 499 * Block layer want one ->make_request_fn to be active at a time 500 * so if we use chained IO with parent IO in same context, 501 * it's a deadlock. To avoid, it, it uses worker thread context. 502 */ 503 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 504 unsigned long entry, struct bio *bio) 505 { 506 struct zram_work work; 507 508 work.zram = zram; 509 work.entry = entry; 510 work.bio = bio; 511 512 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 513 queue_work(system_unbound_wq, &work.work); 514 flush_work(&work.work); 515 destroy_work_on_stack(&work.work); 516 517 return 1; 518 } 519 #else 520 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 521 unsigned long entry, struct bio *bio) 522 { 523 WARN_ON(1); 524 return -EIO; 525 } 526 #endif 527 528 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 529 unsigned long entry, struct bio *parent, bool sync) 530 { 531 if (sync) 532 return read_from_bdev_sync(zram, bvec, entry, parent); 533 else 534 return read_from_bdev_async(zram, bvec, entry, parent); 535 } 536 537 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, 538 u32 index, struct bio *parent, 539 unsigned long *pentry) 540 { 541 struct bio *bio; 542 unsigned long entry; 543 544 bio = bio_alloc(GFP_ATOMIC, 1); 545 if (!bio) 546 return -ENOMEM; 547 548 entry = get_entry_bdev(zram); 549 if (!entry) { 550 bio_put(bio); 551 return -ENOSPC; 552 } 553 554 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 555 bio_set_dev(bio, zram->bdev); 556 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, 557 bvec->bv_offset)) { 558 bio_put(bio); 559 put_entry_bdev(zram, entry); 560 return -EIO; 561 } 562 563 if (!parent) { 564 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; 565 bio->bi_end_io = zram_page_end_io; 566 } else { 567 bio->bi_opf = parent->bi_opf; 568 bio_chain(bio, parent); 569 } 570 571 submit_bio(bio); 572 *pentry = entry; 573 574 return 0; 575 } 576 577 static void zram_wb_clear(struct zram *zram, u32 index) 578 { 579 unsigned long entry; 580 581 zram_clear_flag(zram, index, ZRAM_WB); 582 entry = zram_get_element(zram, index); 583 zram_set_element(zram, index, 0); 584 put_entry_bdev(zram, entry); 585 } 586 587 #else 588 static bool zram_wb_enabled(struct zram *zram) { return false; } 589 static inline void reset_bdev(struct zram *zram) {}; 590 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, 591 u32 index, struct bio *parent, 592 unsigned long *pentry) 593 594 { 595 return -EIO; 596 } 597 598 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 599 unsigned long entry, struct bio *parent, bool sync) 600 { 601 return -EIO; 602 } 603 static void zram_wb_clear(struct zram *zram, u32 index) {} 604 #endif 605 606 607 /* 608 * We switched to per-cpu streams and this attr is not needed anymore. 609 * However, we will keep it around for some time, because: 610 * a) we may revert per-cpu streams in the future 611 * b) it's visible to user space and we need to follow our 2 years 612 * retirement rule; but we already have a number of 'soon to be 613 * altered' attrs, so max_comp_streams need to wait for the next 614 * layoff cycle. 615 */ 616 static ssize_t max_comp_streams_show(struct device *dev, 617 struct device_attribute *attr, char *buf) 618 { 619 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); 620 } 621 622 static ssize_t max_comp_streams_store(struct device *dev, 623 struct device_attribute *attr, const char *buf, size_t len) 624 { 625 return len; 626 } 627 628 static ssize_t comp_algorithm_show(struct device *dev, 629 struct device_attribute *attr, char *buf) 630 { 631 size_t sz; 632 struct zram *zram = dev_to_zram(dev); 633 634 down_read(&zram->init_lock); 635 sz = zcomp_available_show(zram->compressor, buf); 636 up_read(&zram->init_lock); 637 638 return sz; 639 } 640 641 static ssize_t comp_algorithm_store(struct device *dev, 642 struct device_attribute *attr, const char *buf, size_t len) 643 { 644 struct zram *zram = dev_to_zram(dev); 645 char compressor[ARRAY_SIZE(zram->compressor)]; 646 size_t sz; 647 648 strlcpy(compressor, buf, sizeof(compressor)); 649 /* ignore trailing newline */ 650 sz = strlen(compressor); 651 if (sz > 0 && compressor[sz - 1] == '\n') 652 compressor[sz - 1] = 0x00; 653 654 if (!zcomp_available_algorithm(compressor)) 655 return -EINVAL; 656 657 down_write(&zram->init_lock); 658 if (init_done(zram)) { 659 up_write(&zram->init_lock); 660 pr_info("Can't change algorithm for initialized device\n"); 661 return -EBUSY; 662 } 663 664 strcpy(zram->compressor, compressor); 665 up_write(&zram->init_lock); 666 return len; 667 } 668 669 static ssize_t compact_store(struct device *dev, 670 struct device_attribute *attr, const char *buf, size_t len) 671 { 672 struct zram *zram = dev_to_zram(dev); 673 674 down_read(&zram->init_lock); 675 if (!init_done(zram)) { 676 up_read(&zram->init_lock); 677 return -EINVAL; 678 } 679 680 zs_compact(zram->mem_pool); 681 up_read(&zram->init_lock); 682 683 return len; 684 } 685 686 static ssize_t io_stat_show(struct device *dev, 687 struct device_attribute *attr, char *buf) 688 { 689 struct zram *zram = dev_to_zram(dev); 690 ssize_t ret; 691 692 down_read(&zram->init_lock); 693 ret = scnprintf(buf, PAGE_SIZE, 694 "%8llu %8llu %8llu %8llu\n", 695 (u64)atomic64_read(&zram->stats.failed_reads), 696 (u64)atomic64_read(&zram->stats.failed_writes), 697 (u64)atomic64_read(&zram->stats.invalid_io), 698 (u64)atomic64_read(&zram->stats.notify_free)); 699 up_read(&zram->init_lock); 700 701 return ret; 702 } 703 704 static ssize_t mm_stat_show(struct device *dev, 705 struct device_attribute *attr, char *buf) 706 { 707 struct zram *zram = dev_to_zram(dev); 708 struct zs_pool_stats pool_stats; 709 u64 orig_size, mem_used = 0; 710 long max_used; 711 ssize_t ret; 712 713 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 714 715 down_read(&zram->init_lock); 716 if (init_done(zram)) { 717 mem_used = zs_get_total_pages(zram->mem_pool); 718 zs_pool_stats(zram->mem_pool, &pool_stats); 719 } 720 721 orig_size = atomic64_read(&zram->stats.pages_stored); 722 max_used = atomic_long_read(&zram->stats.max_used_pages); 723 724 ret = scnprintf(buf, PAGE_SIZE, 725 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", 726 orig_size << PAGE_SHIFT, 727 (u64)atomic64_read(&zram->stats.compr_data_size), 728 mem_used << PAGE_SHIFT, 729 zram->limit_pages << PAGE_SHIFT, 730 max_used << PAGE_SHIFT, 731 (u64)atomic64_read(&zram->stats.same_pages), 732 pool_stats.pages_compacted); 733 up_read(&zram->init_lock); 734 735 return ret; 736 } 737 738 static ssize_t debug_stat_show(struct device *dev, 739 struct device_attribute *attr, char *buf) 740 { 741 int version = 1; 742 struct zram *zram = dev_to_zram(dev); 743 ssize_t ret; 744 745 down_read(&zram->init_lock); 746 ret = scnprintf(buf, PAGE_SIZE, 747 "version: %d\n%8llu\n", 748 version, 749 (u64)atomic64_read(&zram->stats.writestall)); 750 up_read(&zram->init_lock); 751 752 return ret; 753 } 754 755 static DEVICE_ATTR_RO(io_stat); 756 static DEVICE_ATTR_RO(mm_stat); 757 static DEVICE_ATTR_RO(debug_stat); 758 759 static void zram_slot_lock(struct zram *zram, u32 index) 760 { 761 bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value); 762 } 763 764 static void zram_slot_unlock(struct zram *zram, u32 index) 765 { 766 bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); 767 } 768 769 static void zram_meta_free(struct zram *zram, u64 disksize) 770 { 771 size_t num_pages = disksize >> PAGE_SHIFT; 772 size_t index; 773 774 /* Free all pages that are still in this zram device */ 775 for (index = 0; index < num_pages; index++) 776 zram_free_page(zram, index); 777 778 zs_destroy_pool(zram->mem_pool); 779 vfree(zram->table); 780 } 781 782 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 783 { 784 size_t num_pages; 785 786 num_pages = disksize >> PAGE_SHIFT; 787 zram->table = vzalloc(num_pages * sizeof(*zram->table)); 788 if (!zram->table) 789 return false; 790 791 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 792 if (!zram->mem_pool) { 793 vfree(zram->table); 794 return false; 795 } 796 797 return true; 798 } 799 800 /* 801 * To protect concurrent access to the same index entry, 802 * caller should hold this table index entry's bit_spinlock to 803 * indicate this index entry is accessing. 804 */ 805 static void zram_free_page(struct zram *zram, size_t index) 806 { 807 unsigned long handle; 808 809 if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { 810 zram_wb_clear(zram, index); 811 atomic64_dec(&zram->stats.pages_stored); 812 return; 813 } 814 815 /* 816 * No memory is allocated for same element filled pages. 817 * Simply clear same page flag. 818 */ 819 if (zram_test_flag(zram, index, ZRAM_SAME)) { 820 zram_clear_flag(zram, index, ZRAM_SAME); 821 zram_set_element(zram, index, 0); 822 atomic64_dec(&zram->stats.same_pages); 823 atomic64_dec(&zram->stats.pages_stored); 824 return; 825 } 826 827 handle = zram_get_handle(zram, index); 828 if (!handle) 829 return; 830 831 zs_free(zram->mem_pool, handle); 832 833 atomic64_sub(zram_get_obj_size(zram, index), 834 &zram->stats.compr_data_size); 835 atomic64_dec(&zram->stats.pages_stored); 836 837 zram_set_handle(zram, index, 0); 838 zram_set_obj_size(zram, index, 0); 839 } 840 841 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, 842 struct bio *bio, bool partial_io) 843 { 844 int ret; 845 unsigned long handle; 846 unsigned int size; 847 void *src, *dst; 848 849 if (zram_wb_enabled(zram)) { 850 zram_slot_lock(zram, index); 851 if (zram_test_flag(zram, index, ZRAM_WB)) { 852 struct bio_vec bvec; 853 854 zram_slot_unlock(zram, index); 855 856 bvec.bv_page = page; 857 bvec.bv_len = PAGE_SIZE; 858 bvec.bv_offset = 0; 859 return read_from_bdev(zram, &bvec, 860 zram_get_element(zram, index), 861 bio, partial_io); 862 } 863 zram_slot_unlock(zram, index); 864 } 865 866 zram_slot_lock(zram, index); 867 handle = zram_get_handle(zram, index); 868 if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) { 869 unsigned long value; 870 void *mem; 871 872 value = handle ? zram_get_element(zram, index) : 0; 873 mem = kmap_atomic(page); 874 zram_fill_page(mem, PAGE_SIZE, value); 875 kunmap_atomic(mem); 876 zram_slot_unlock(zram, index); 877 return 0; 878 } 879 880 size = zram_get_obj_size(zram, index); 881 882 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 883 if (size == PAGE_SIZE) { 884 dst = kmap_atomic(page); 885 memcpy(dst, src, PAGE_SIZE); 886 kunmap_atomic(dst); 887 ret = 0; 888 } else { 889 struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); 890 891 dst = kmap_atomic(page); 892 ret = zcomp_decompress(zstrm, src, size, dst); 893 kunmap_atomic(dst); 894 zcomp_stream_put(zram->comp); 895 } 896 zs_unmap_object(zram->mem_pool, handle); 897 zram_slot_unlock(zram, index); 898 899 /* Should NEVER happen. Return bio error if it does. */ 900 if (unlikely(ret)) 901 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 902 903 return ret; 904 } 905 906 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 907 u32 index, int offset, struct bio *bio) 908 { 909 int ret; 910 struct page *page; 911 912 page = bvec->bv_page; 913 if (is_partial_io(bvec)) { 914 /* Use a temporary buffer to decompress the page */ 915 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 916 if (!page) 917 return -ENOMEM; 918 } 919 920 ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); 921 if (unlikely(ret)) 922 goto out; 923 924 if (is_partial_io(bvec)) { 925 void *dst = kmap_atomic(bvec->bv_page); 926 void *src = kmap_atomic(page); 927 928 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len); 929 kunmap_atomic(src); 930 kunmap_atomic(dst); 931 } 932 out: 933 if (is_partial_io(bvec)) 934 __free_page(page); 935 936 return ret; 937 } 938 939 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 940 u32 index, struct bio *bio) 941 { 942 int ret = 0; 943 unsigned long alloced_pages; 944 unsigned long handle = 0; 945 unsigned int comp_len = 0; 946 void *src, *dst, *mem; 947 struct zcomp_strm *zstrm; 948 struct page *page = bvec->bv_page; 949 unsigned long element = 0; 950 enum zram_pageflags flags = 0; 951 bool allow_wb = true; 952 953 mem = kmap_atomic(page); 954 if (page_same_filled(mem, &element)) { 955 kunmap_atomic(mem); 956 /* Free memory associated with this sector now. */ 957 flags = ZRAM_SAME; 958 atomic64_inc(&zram->stats.same_pages); 959 goto out; 960 } 961 kunmap_atomic(mem); 962 963 compress_again: 964 zstrm = zcomp_stream_get(zram->comp); 965 src = kmap_atomic(page); 966 ret = zcomp_compress(zstrm, src, &comp_len); 967 kunmap_atomic(src); 968 969 if (unlikely(ret)) { 970 zcomp_stream_put(zram->comp); 971 pr_err("Compression failed! err=%d\n", ret); 972 zs_free(zram->mem_pool, handle); 973 return ret; 974 } 975 976 if (unlikely(comp_len > max_zpage_size)) { 977 if (zram_wb_enabled(zram) && allow_wb) { 978 zcomp_stream_put(zram->comp); 979 ret = write_to_bdev(zram, bvec, index, bio, &element); 980 if (!ret) { 981 flags = ZRAM_WB; 982 ret = 1; 983 goto out; 984 } 985 allow_wb = false; 986 goto compress_again; 987 } 988 comp_len = PAGE_SIZE; 989 } 990 991 /* 992 * handle allocation has 2 paths: 993 * a) fast path is executed with preemption disabled (for 994 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, 995 * since we can't sleep; 996 * b) slow path enables preemption and attempts to allocate 997 * the page with __GFP_DIRECT_RECLAIM bit set. we have to 998 * put per-cpu compression stream and, thus, to re-do 999 * the compression once handle is allocated. 1000 * 1001 * if we have a 'non-null' handle here then we are coming 1002 * from the slow path and handle has already been allocated. 1003 */ 1004 if (!handle) 1005 handle = zs_malloc(zram->mem_pool, comp_len, 1006 __GFP_KSWAPD_RECLAIM | 1007 __GFP_NOWARN | 1008 __GFP_HIGHMEM | 1009 __GFP_MOVABLE); 1010 if (!handle) { 1011 zcomp_stream_put(zram->comp); 1012 atomic64_inc(&zram->stats.writestall); 1013 handle = zs_malloc(zram->mem_pool, comp_len, 1014 GFP_NOIO | __GFP_HIGHMEM | 1015 __GFP_MOVABLE); 1016 if (handle) 1017 goto compress_again; 1018 return -ENOMEM; 1019 } 1020 1021 alloced_pages = zs_get_total_pages(zram->mem_pool); 1022 update_used_max(zram, alloced_pages); 1023 1024 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 1025 zcomp_stream_put(zram->comp); 1026 zs_free(zram->mem_pool, handle); 1027 return -ENOMEM; 1028 } 1029 1030 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1031 1032 src = zstrm->buffer; 1033 if (comp_len == PAGE_SIZE) 1034 src = kmap_atomic(page); 1035 memcpy(dst, src, comp_len); 1036 if (comp_len == PAGE_SIZE) 1037 kunmap_atomic(src); 1038 1039 zcomp_stream_put(zram->comp); 1040 zs_unmap_object(zram->mem_pool, handle); 1041 atomic64_add(comp_len, &zram->stats.compr_data_size); 1042 out: 1043 /* 1044 * Free memory associated with this sector 1045 * before overwriting unused sectors. 1046 */ 1047 zram_slot_lock(zram, index); 1048 zram_free_page(zram, index); 1049 1050 if (flags) { 1051 zram_set_flag(zram, index, flags); 1052 zram_set_element(zram, index, element); 1053 } else { 1054 zram_set_handle(zram, index, handle); 1055 zram_set_obj_size(zram, index, comp_len); 1056 } 1057 zram_slot_unlock(zram, index); 1058 1059 /* Update stats */ 1060 atomic64_inc(&zram->stats.pages_stored); 1061 return ret; 1062 } 1063 1064 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1065 u32 index, int offset, struct bio *bio) 1066 { 1067 int ret; 1068 struct page *page = NULL; 1069 void *src; 1070 struct bio_vec vec; 1071 1072 vec = *bvec; 1073 if (is_partial_io(bvec)) { 1074 void *dst; 1075 /* 1076 * This is a partial IO. We need to read the full page 1077 * before to write the changes. 1078 */ 1079 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1080 if (!page) 1081 return -ENOMEM; 1082 1083 ret = __zram_bvec_read(zram, page, index, bio, true); 1084 if (ret) 1085 goto out; 1086 1087 src = kmap_atomic(bvec->bv_page); 1088 dst = kmap_atomic(page); 1089 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len); 1090 kunmap_atomic(dst); 1091 kunmap_atomic(src); 1092 1093 vec.bv_page = page; 1094 vec.bv_len = PAGE_SIZE; 1095 vec.bv_offset = 0; 1096 } 1097 1098 ret = __zram_bvec_write(zram, &vec, index, bio); 1099 out: 1100 if (is_partial_io(bvec)) 1101 __free_page(page); 1102 return ret; 1103 } 1104 1105 /* 1106 * zram_bio_discard - handler on discard request 1107 * @index: physical block index in PAGE_SIZE units 1108 * @offset: byte offset within physical block 1109 */ 1110 static void zram_bio_discard(struct zram *zram, u32 index, 1111 int offset, struct bio *bio) 1112 { 1113 size_t n = bio->bi_iter.bi_size; 1114 1115 /* 1116 * zram manages data in physical block size units. Because logical block 1117 * size isn't identical with physical block size on some arch, we 1118 * could get a discard request pointing to a specific offset within a 1119 * certain physical block. Although we can handle this request by 1120 * reading that physiclal block and decompressing and partially zeroing 1121 * and re-compressing and then re-storing it, this isn't reasonable 1122 * because our intent with a discard request is to save memory. So 1123 * skipping this logical block is appropriate here. 1124 */ 1125 if (offset) { 1126 if (n <= (PAGE_SIZE - offset)) 1127 return; 1128 1129 n -= (PAGE_SIZE - offset); 1130 index++; 1131 } 1132 1133 while (n >= PAGE_SIZE) { 1134 zram_slot_lock(zram, index); 1135 zram_free_page(zram, index); 1136 zram_slot_unlock(zram, index); 1137 atomic64_inc(&zram->stats.notify_free); 1138 index++; 1139 n -= PAGE_SIZE; 1140 } 1141 } 1142 1143 /* 1144 * Returns errno if it has some problem. Otherwise return 0 or 1. 1145 * Returns 0 if IO request was done synchronously 1146 * Returns 1 if IO request was successfully submitted. 1147 */ 1148 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 1149 int offset, bool is_write, struct bio *bio) 1150 { 1151 unsigned long start_time = jiffies; 1152 int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; 1153 struct request_queue *q = zram->disk->queue; 1154 int ret; 1155 1156 generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT, 1157 &zram->disk->part0); 1158 1159 if (!is_write) { 1160 atomic64_inc(&zram->stats.num_reads); 1161 ret = zram_bvec_read(zram, bvec, index, offset, bio); 1162 flush_dcache_page(bvec->bv_page); 1163 } else { 1164 atomic64_inc(&zram->stats.num_writes); 1165 ret = zram_bvec_write(zram, bvec, index, offset, bio); 1166 } 1167 1168 generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time); 1169 1170 if (unlikely(ret < 0)) { 1171 if (!is_write) 1172 atomic64_inc(&zram->stats.failed_reads); 1173 else 1174 atomic64_inc(&zram->stats.failed_writes); 1175 } 1176 1177 return ret; 1178 } 1179 1180 static void __zram_make_request(struct zram *zram, struct bio *bio) 1181 { 1182 int offset; 1183 u32 index; 1184 struct bio_vec bvec; 1185 struct bvec_iter iter; 1186 1187 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1188 offset = (bio->bi_iter.bi_sector & 1189 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1190 1191 switch (bio_op(bio)) { 1192 case REQ_OP_DISCARD: 1193 case REQ_OP_WRITE_ZEROES: 1194 zram_bio_discard(zram, index, offset, bio); 1195 bio_endio(bio); 1196 return; 1197 default: 1198 break; 1199 } 1200 1201 bio_for_each_segment(bvec, bio, iter) { 1202 struct bio_vec bv = bvec; 1203 unsigned int unwritten = bvec.bv_len; 1204 1205 do { 1206 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, 1207 unwritten); 1208 if (zram_bvec_rw(zram, &bv, index, offset, 1209 op_is_write(bio_op(bio)), bio) < 0) 1210 goto out; 1211 1212 bv.bv_offset += bv.bv_len; 1213 unwritten -= bv.bv_len; 1214 1215 update_position(&index, &offset, &bv); 1216 } while (unwritten); 1217 } 1218 1219 bio_endio(bio); 1220 return; 1221 1222 out: 1223 bio_io_error(bio); 1224 } 1225 1226 /* 1227 * Handler function for all zram I/O requests. 1228 */ 1229 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) 1230 { 1231 struct zram *zram = queue->queuedata; 1232 1233 if (!valid_io_request(zram, bio->bi_iter.bi_sector, 1234 bio->bi_iter.bi_size)) { 1235 atomic64_inc(&zram->stats.invalid_io); 1236 goto error; 1237 } 1238 1239 __zram_make_request(zram, bio); 1240 return BLK_QC_T_NONE; 1241 1242 error: 1243 bio_io_error(bio); 1244 return BLK_QC_T_NONE; 1245 } 1246 1247 static void zram_slot_free_notify(struct block_device *bdev, 1248 unsigned long index) 1249 { 1250 struct zram *zram; 1251 1252 zram = bdev->bd_disk->private_data; 1253 1254 zram_slot_lock(zram, index); 1255 zram_free_page(zram, index); 1256 zram_slot_unlock(zram, index); 1257 atomic64_inc(&zram->stats.notify_free); 1258 } 1259 1260 static int zram_rw_page(struct block_device *bdev, sector_t sector, 1261 struct page *page, bool is_write) 1262 { 1263 int offset, ret; 1264 u32 index; 1265 struct zram *zram; 1266 struct bio_vec bv; 1267 1268 if (PageTransHuge(page)) 1269 return -ENOTSUPP; 1270 zram = bdev->bd_disk->private_data; 1271 1272 if (!valid_io_request(zram, sector, PAGE_SIZE)) { 1273 atomic64_inc(&zram->stats.invalid_io); 1274 ret = -EINVAL; 1275 goto out; 1276 } 1277 1278 index = sector >> SECTORS_PER_PAGE_SHIFT; 1279 offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1280 1281 bv.bv_page = page; 1282 bv.bv_len = PAGE_SIZE; 1283 bv.bv_offset = 0; 1284 1285 ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL); 1286 out: 1287 /* 1288 * If I/O fails, just return error(ie, non-zero) without 1289 * calling page_endio. 1290 * It causes resubmit the I/O with bio request by upper functions 1291 * of rw_page(e.g., swap_readpage, __swap_writepage) and 1292 * bio->bi_end_io does things to handle the error 1293 * (e.g., SetPageError, set_page_dirty and extra works). 1294 */ 1295 if (unlikely(ret < 0)) 1296 return ret; 1297 1298 switch (ret) { 1299 case 0: 1300 page_endio(page, is_write, 0); 1301 break; 1302 case 1: 1303 ret = 0; 1304 break; 1305 default: 1306 WARN_ON(1); 1307 } 1308 return ret; 1309 } 1310 1311 static void zram_reset_device(struct zram *zram) 1312 { 1313 struct zcomp *comp; 1314 u64 disksize; 1315 1316 down_write(&zram->init_lock); 1317 1318 zram->limit_pages = 0; 1319 1320 if (!init_done(zram)) { 1321 up_write(&zram->init_lock); 1322 return; 1323 } 1324 1325 comp = zram->comp; 1326 disksize = zram->disksize; 1327 zram->disksize = 0; 1328 1329 set_capacity(zram->disk, 0); 1330 part_stat_set_all(&zram->disk->part0, 0); 1331 1332 up_write(&zram->init_lock); 1333 /* I/O operation under all of CPU are done so let's free */ 1334 zram_meta_free(zram, disksize); 1335 memset(&zram->stats, 0, sizeof(zram->stats)); 1336 zcomp_destroy(comp); 1337 reset_bdev(zram); 1338 } 1339 1340 static ssize_t disksize_store(struct device *dev, 1341 struct device_attribute *attr, const char *buf, size_t len) 1342 { 1343 u64 disksize; 1344 struct zcomp *comp; 1345 struct zram *zram = dev_to_zram(dev); 1346 int err; 1347 1348 disksize = memparse(buf, NULL); 1349 if (!disksize) 1350 return -EINVAL; 1351 1352 down_write(&zram->init_lock); 1353 if (init_done(zram)) { 1354 pr_info("Cannot change disksize for initialized device\n"); 1355 err = -EBUSY; 1356 goto out_unlock; 1357 } 1358 1359 disksize = PAGE_ALIGN(disksize); 1360 if (!zram_meta_alloc(zram, disksize)) { 1361 err = -ENOMEM; 1362 goto out_unlock; 1363 } 1364 1365 comp = zcomp_create(zram->compressor); 1366 if (IS_ERR(comp)) { 1367 pr_err("Cannot initialise %s compressing backend\n", 1368 zram->compressor); 1369 err = PTR_ERR(comp); 1370 goto out_free_meta; 1371 } 1372 1373 zram->comp = comp; 1374 zram->disksize = disksize; 1375 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); 1376 zram_revalidate_disk(zram); 1377 up_write(&zram->init_lock); 1378 1379 return len; 1380 1381 out_free_meta: 1382 zram_meta_free(zram, disksize); 1383 out_unlock: 1384 up_write(&zram->init_lock); 1385 return err; 1386 } 1387 1388 static ssize_t reset_store(struct device *dev, 1389 struct device_attribute *attr, const char *buf, size_t len) 1390 { 1391 int ret; 1392 unsigned short do_reset; 1393 struct zram *zram; 1394 struct block_device *bdev; 1395 1396 ret = kstrtou16(buf, 10, &do_reset); 1397 if (ret) 1398 return ret; 1399 1400 if (!do_reset) 1401 return -EINVAL; 1402 1403 zram = dev_to_zram(dev); 1404 bdev = bdget_disk(zram->disk, 0); 1405 if (!bdev) 1406 return -ENOMEM; 1407 1408 mutex_lock(&bdev->bd_mutex); 1409 /* Do not reset an active device or claimed device */ 1410 if (bdev->bd_openers || zram->claim) { 1411 mutex_unlock(&bdev->bd_mutex); 1412 bdput(bdev); 1413 return -EBUSY; 1414 } 1415 1416 /* From now on, anyone can't open /dev/zram[0-9] */ 1417 zram->claim = true; 1418 mutex_unlock(&bdev->bd_mutex); 1419 1420 /* Make sure all the pending I/O are finished */ 1421 fsync_bdev(bdev); 1422 zram_reset_device(zram); 1423 zram_revalidate_disk(zram); 1424 bdput(bdev); 1425 1426 mutex_lock(&bdev->bd_mutex); 1427 zram->claim = false; 1428 mutex_unlock(&bdev->bd_mutex); 1429 1430 return len; 1431 } 1432 1433 static int zram_open(struct block_device *bdev, fmode_t mode) 1434 { 1435 int ret = 0; 1436 struct zram *zram; 1437 1438 WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); 1439 1440 zram = bdev->bd_disk->private_data; 1441 /* zram was claimed to reset so open request fails */ 1442 if (zram->claim) 1443 ret = -EBUSY; 1444 1445 return ret; 1446 } 1447 1448 static const struct block_device_operations zram_devops = { 1449 .open = zram_open, 1450 .swap_slot_free_notify = zram_slot_free_notify, 1451 .rw_page = zram_rw_page, 1452 .owner = THIS_MODULE 1453 }; 1454 1455 static DEVICE_ATTR_WO(compact); 1456 static DEVICE_ATTR_RW(disksize); 1457 static DEVICE_ATTR_RO(initstate); 1458 static DEVICE_ATTR_WO(reset); 1459 static DEVICE_ATTR_WO(mem_limit); 1460 static DEVICE_ATTR_WO(mem_used_max); 1461 static DEVICE_ATTR_RW(max_comp_streams); 1462 static DEVICE_ATTR_RW(comp_algorithm); 1463 #ifdef CONFIG_ZRAM_WRITEBACK 1464 static DEVICE_ATTR_RW(backing_dev); 1465 #endif 1466 1467 static struct attribute *zram_disk_attrs[] = { 1468 &dev_attr_disksize.attr, 1469 &dev_attr_initstate.attr, 1470 &dev_attr_reset.attr, 1471 &dev_attr_compact.attr, 1472 &dev_attr_mem_limit.attr, 1473 &dev_attr_mem_used_max.attr, 1474 &dev_attr_max_comp_streams.attr, 1475 &dev_attr_comp_algorithm.attr, 1476 #ifdef CONFIG_ZRAM_WRITEBACK 1477 &dev_attr_backing_dev.attr, 1478 #endif 1479 &dev_attr_io_stat.attr, 1480 &dev_attr_mm_stat.attr, 1481 &dev_attr_debug_stat.attr, 1482 NULL, 1483 }; 1484 1485 static const struct attribute_group zram_disk_attr_group = { 1486 .attrs = zram_disk_attrs, 1487 }; 1488 1489 /* 1490 * Allocate and initialize new zram device. the function returns 1491 * '>= 0' device_id upon success, and negative value otherwise. 1492 */ 1493 static int zram_add(void) 1494 { 1495 struct zram *zram; 1496 struct request_queue *queue; 1497 int ret, device_id; 1498 1499 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 1500 if (!zram) 1501 return -ENOMEM; 1502 1503 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 1504 if (ret < 0) 1505 goto out_free_dev; 1506 device_id = ret; 1507 1508 init_rwsem(&zram->init_lock); 1509 1510 queue = blk_alloc_queue(GFP_KERNEL); 1511 if (!queue) { 1512 pr_err("Error allocating disk queue for device %d\n", 1513 device_id); 1514 ret = -ENOMEM; 1515 goto out_free_idr; 1516 } 1517 1518 blk_queue_make_request(queue, zram_make_request); 1519 1520 /* gendisk structure */ 1521 zram->disk = alloc_disk(1); 1522 if (!zram->disk) { 1523 pr_err("Error allocating disk structure for device %d\n", 1524 device_id); 1525 ret = -ENOMEM; 1526 goto out_free_queue; 1527 } 1528 1529 zram->disk->major = zram_major; 1530 zram->disk->first_minor = device_id; 1531 zram->disk->fops = &zram_devops; 1532 zram->disk->queue = queue; 1533 zram->disk->queue->queuedata = zram; 1534 zram->disk->private_data = zram; 1535 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 1536 1537 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1538 set_capacity(zram->disk, 0); 1539 /* zram devices sort of resembles non-rotational disks */ 1540 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); 1541 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1542 /* 1543 * To ensure that we always get PAGE_SIZE aligned 1544 * and n*PAGE_SIZED sized I/O requests. 1545 */ 1546 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 1547 blk_queue_logical_block_size(zram->disk->queue, 1548 ZRAM_LOGICAL_BLOCK_SIZE); 1549 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 1550 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1551 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1552 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 1553 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); 1554 1555 /* 1556 * zram_bio_discard() will clear all logical blocks if logical block 1557 * size is identical with physical block size(PAGE_SIZE). But if it is 1558 * different, we will skip discarding some parts of logical blocks in 1559 * the part of the request range which isn't aligned to physical block 1560 * size. So we can't ensure that all discarded logical blocks are 1561 * zeroed. 1562 */ 1563 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 1564 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); 1565 1566 add_disk(zram->disk); 1567 1568 ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, 1569 &zram_disk_attr_group); 1570 if (ret < 0) { 1571 pr_err("Error creating sysfs group for device %d\n", 1572 device_id); 1573 goto out_free_disk; 1574 } 1575 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1576 1577 pr_info("Added device: %s\n", zram->disk->disk_name); 1578 return device_id; 1579 1580 out_free_disk: 1581 del_gendisk(zram->disk); 1582 put_disk(zram->disk); 1583 out_free_queue: 1584 blk_cleanup_queue(queue); 1585 out_free_idr: 1586 idr_remove(&zram_index_idr, device_id); 1587 out_free_dev: 1588 kfree(zram); 1589 return ret; 1590 } 1591 1592 static int zram_remove(struct zram *zram) 1593 { 1594 struct block_device *bdev; 1595 1596 bdev = bdget_disk(zram->disk, 0); 1597 if (!bdev) 1598 return -ENOMEM; 1599 1600 mutex_lock(&bdev->bd_mutex); 1601 if (bdev->bd_openers || zram->claim) { 1602 mutex_unlock(&bdev->bd_mutex); 1603 bdput(bdev); 1604 return -EBUSY; 1605 } 1606 1607 zram->claim = true; 1608 mutex_unlock(&bdev->bd_mutex); 1609 1610 /* 1611 * Remove sysfs first, so no one will perform a disksize 1612 * store while we destroy the devices. This also helps during 1613 * hot_remove -- zram_reset_device() is the last holder of 1614 * ->init_lock, no later/concurrent disksize_store() or any 1615 * other sysfs handlers are possible. 1616 */ 1617 sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, 1618 &zram_disk_attr_group); 1619 1620 /* Make sure all the pending I/O are finished */ 1621 fsync_bdev(bdev); 1622 zram_reset_device(zram); 1623 bdput(bdev); 1624 1625 pr_info("Removed device: %s\n", zram->disk->disk_name); 1626 1627 blk_cleanup_queue(zram->disk->queue); 1628 del_gendisk(zram->disk); 1629 put_disk(zram->disk); 1630 kfree(zram); 1631 return 0; 1632 } 1633 1634 /* zram-control sysfs attributes */ 1635 1636 /* 1637 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 1638 * sense that reading from this file does alter the state of your system -- it 1639 * creates a new un-initialized zram device and returns back this device's 1640 * device_id (or an error code if it fails to create a new device). 1641 */ 1642 static ssize_t hot_add_show(struct class *class, 1643 struct class_attribute *attr, 1644 char *buf) 1645 { 1646 int ret; 1647 1648 mutex_lock(&zram_index_mutex); 1649 ret = zram_add(); 1650 mutex_unlock(&zram_index_mutex); 1651 1652 if (ret < 0) 1653 return ret; 1654 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 1655 } 1656 static CLASS_ATTR_RO(hot_add); 1657 1658 static ssize_t hot_remove_store(struct class *class, 1659 struct class_attribute *attr, 1660 const char *buf, 1661 size_t count) 1662 { 1663 struct zram *zram; 1664 int ret, dev_id; 1665 1666 /* dev_id is gendisk->first_minor, which is `int' */ 1667 ret = kstrtoint(buf, 10, &dev_id); 1668 if (ret) 1669 return ret; 1670 if (dev_id < 0) 1671 return -EINVAL; 1672 1673 mutex_lock(&zram_index_mutex); 1674 1675 zram = idr_find(&zram_index_idr, dev_id); 1676 if (zram) { 1677 ret = zram_remove(zram); 1678 if (!ret) 1679 idr_remove(&zram_index_idr, dev_id); 1680 } else { 1681 ret = -ENODEV; 1682 } 1683 1684 mutex_unlock(&zram_index_mutex); 1685 return ret ? ret : count; 1686 } 1687 static CLASS_ATTR_WO(hot_remove); 1688 1689 static struct attribute *zram_control_class_attrs[] = { 1690 &class_attr_hot_add.attr, 1691 &class_attr_hot_remove.attr, 1692 NULL, 1693 }; 1694 ATTRIBUTE_GROUPS(zram_control_class); 1695 1696 static struct class zram_control_class = { 1697 .name = "zram-control", 1698 .owner = THIS_MODULE, 1699 .class_groups = zram_control_class_groups, 1700 }; 1701 1702 static int zram_remove_cb(int id, void *ptr, void *data) 1703 { 1704 zram_remove(ptr); 1705 return 0; 1706 } 1707 1708 static void destroy_devices(void) 1709 { 1710 class_unregister(&zram_control_class); 1711 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 1712 idr_destroy(&zram_index_idr); 1713 unregister_blkdev(zram_major, "zram"); 1714 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1715 } 1716 1717 static int __init zram_init(void) 1718 { 1719 int ret; 1720 1721 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 1722 zcomp_cpu_up_prepare, zcomp_cpu_dead); 1723 if (ret < 0) 1724 return ret; 1725 1726 ret = class_register(&zram_control_class); 1727 if (ret) { 1728 pr_err("Unable to register zram-control class\n"); 1729 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1730 return ret; 1731 } 1732 1733 zram_major = register_blkdev(0, "zram"); 1734 if (zram_major <= 0) { 1735 pr_err("Unable to get major number\n"); 1736 class_unregister(&zram_control_class); 1737 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1738 return -EBUSY; 1739 } 1740 1741 while (num_devices != 0) { 1742 mutex_lock(&zram_index_mutex); 1743 ret = zram_add(); 1744 mutex_unlock(&zram_index_mutex); 1745 if (ret < 0) 1746 goto out_error; 1747 num_devices--; 1748 } 1749 1750 return 0; 1751 1752 out_error: 1753 destroy_devices(); 1754 return ret; 1755 } 1756 1757 static void __exit zram_exit(void) 1758 { 1759 destroy_devices(); 1760 } 1761 1762 module_init(zram_init); 1763 module_exit(zram_exit); 1764 1765 module_param(num_devices, uint, 0); 1766 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 1767 1768 MODULE_LICENSE("Dual BSD/GPL"); 1769 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 1770 MODULE_DESCRIPTION("Compressed RAM Block Device"); 1771