1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/genhd.h> 26 #include <linux/highmem.h> 27 #include <linux/slab.h> 28 #include <linux/backing-dev.h> 29 #include <linux/string.h> 30 #include <linux/vmalloc.h> 31 #include <linux/err.h> 32 #include <linux/idr.h> 33 #include <linux/sysfs.h> 34 #include <linux/cpuhotplug.h> 35 36 #include "zram_drv.h" 37 38 static DEFINE_IDR(zram_index_idr); 39 /* idr index must be protected */ 40 static DEFINE_MUTEX(zram_index_mutex); 41 42 static int zram_major; 43 static const char *default_compressor = "lzo"; 44 45 /* Module params (documentation at end) */ 46 static unsigned int num_devices = 1; 47 48 static void zram_free_page(struct zram *zram, size_t index); 49 50 static inline bool init_done(struct zram *zram) 51 { 52 return zram->disksize; 53 } 54 55 static inline struct zram *dev_to_zram(struct device *dev) 56 { 57 return (struct zram *)dev_to_disk(dev)->private_data; 58 } 59 60 static unsigned long zram_get_handle(struct zram *zram, u32 index) 61 { 62 return zram->table[index].handle; 63 } 64 65 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle) 66 { 67 zram->table[index].handle = handle; 68 } 69 70 /* flag operations require table entry bit_spin_lock() being held */ 71 static int zram_test_flag(struct zram *zram, u32 index, 72 enum zram_pageflags flag) 73 { 74 return zram->table[index].value & BIT(flag); 75 } 76 77 static void zram_set_flag(struct zram *zram, u32 index, 78 enum zram_pageflags flag) 79 { 80 zram->table[index].value |= BIT(flag); 81 } 82 83 static void zram_clear_flag(struct zram *zram, u32 index, 84 enum zram_pageflags flag) 85 { 86 zram->table[index].value &= ~BIT(flag); 87 } 88 89 static inline void zram_set_element(struct zram *zram, u32 index, 90 unsigned long element) 91 { 92 zram->table[index].element = element; 93 } 94 95 static unsigned long zram_get_element(struct zram *zram, u32 index) 96 { 97 return zram->table[index].element; 98 } 99 100 static size_t zram_get_obj_size(struct zram *zram, u32 index) 101 { 102 return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); 103 } 104 105 static void zram_set_obj_size(struct zram *zram, 106 u32 index, size_t size) 107 { 108 unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT; 109 110 zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; 111 } 112 113 #if PAGE_SIZE != 4096 114 static inline bool is_partial_io(struct bio_vec *bvec) 115 { 116 return bvec->bv_len != PAGE_SIZE; 117 } 118 #else 119 static inline bool is_partial_io(struct bio_vec *bvec) 120 { 121 return false; 122 } 123 #endif 124 125 static void zram_revalidate_disk(struct zram *zram) 126 { 127 revalidate_disk(zram->disk); 128 /* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */ 129 zram->disk->queue->backing_dev_info->capabilities |= 130 BDI_CAP_STABLE_WRITES; 131 } 132 133 /* 134 * Check if request is within bounds and aligned on zram logical blocks. 135 */ 136 static inline bool valid_io_request(struct zram *zram, 137 sector_t start, unsigned int size) 138 { 139 u64 end, bound; 140 141 /* unaligned request */ 142 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 143 return false; 144 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 145 return false; 146 147 end = start + (size >> SECTOR_SHIFT); 148 bound = zram->disksize >> SECTOR_SHIFT; 149 /* out of range range */ 150 if (unlikely(start >= bound || end > bound || start > end)) 151 return false; 152 153 /* I/O request is valid */ 154 return true; 155 } 156 157 static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 158 { 159 *index += (*offset + bvec->bv_len) / PAGE_SIZE; 160 *offset = (*offset + bvec->bv_len) % PAGE_SIZE; 161 } 162 163 static inline void update_used_max(struct zram *zram, 164 const unsigned long pages) 165 { 166 unsigned long old_max, cur_max; 167 168 old_max = atomic_long_read(&zram->stats.max_used_pages); 169 170 do { 171 cur_max = old_max; 172 if (pages > cur_max) 173 old_max = atomic_long_cmpxchg( 174 &zram->stats.max_used_pages, cur_max, pages); 175 } while (old_max != cur_max); 176 } 177 178 static inline void zram_fill_page(void *ptr, unsigned long len, 179 unsigned long value) 180 { 181 WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); 182 memset_l(ptr, value, len / sizeof(unsigned long)); 183 } 184 185 static bool page_same_filled(void *ptr, unsigned long *element) 186 { 187 unsigned int pos; 188 unsigned long *page; 189 unsigned long val; 190 191 page = (unsigned long *)ptr; 192 val = page[0]; 193 194 for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { 195 if (val != page[pos]) 196 return false; 197 } 198 199 *element = val; 200 201 return true; 202 } 203 204 static ssize_t initstate_show(struct device *dev, 205 struct device_attribute *attr, char *buf) 206 { 207 u32 val; 208 struct zram *zram = dev_to_zram(dev); 209 210 down_read(&zram->init_lock); 211 val = init_done(zram); 212 up_read(&zram->init_lock); 213 214 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 215 } 216 217 static ssize_t disksize_show(struct device *dev, 218 struct device_attribute *attr, char *buf) 219 { 220 struct zram *zram = dev_to_zram(dev); 221 222 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 223 } 224 225 static ssize_t mem_limit_store(struct device *dev, 226 struct device_attribute *attr, const char *buf, size_t len) 227 { 228 u64 limit; 229 char *tmp; 230 struct zram *zram = dev_to_zram(dev); 231 232 limit = memparse(buf, &tmp); 233 if (buf == tmp) /* no chars parsed, invalid input */ 234 return -EINVAL; 235 236 down_write(&zram->init_lock); 237 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 238 up_write(&zram->init_lock); 239 240 return len; 241 } 242 243 static ssize_t mem_used_max_store(struct device *dev, 244 struct device_attribute *attr, const char *buf, size_t len) 245 { 246 int err; 247 unsigned long val; 248 struct zram *zram = dev_to_zram(dev); 249 250 err = kstrtoul(buf, 10, &val); 251 if (err || val != 0) 252 return -EINVAL; 253 254 down_read(&zram->init_lock); 255 if (init_done(zram)) { 256 atomic_long_set(&zram->stats.max_used_pages, 257 zs_get_total_pages(zram->mem_pool)); 258 } 259 up_read(&zram->init_lock); 260 261 return len; 262 } 263 264 #ifdef CONFIG_ZRAM_WRITEBACK 265 static bool zram_wb_enabled(struct zram *zram) 266 { 267 return zram->backing_dev; 268 } 269 270 static void reset_bdev(struct zram *zram) 271 { 272 struct block_device *bdev; 273 274 if (!zram_wb_enabled(zram)) 275 return; 276 277 bdev = zram->bdev; 278 if (zram->old_block_size) 279 set_blocksize(bdev, zram->old_block_size); 280 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 281 /* hope filp_close flush all of IO */ 282 filp_close(zram->backing_dev, NULL); 283 zram->backing_dev = NULL; 284 zram->old_block_size = 0; 285 zram->bdev = NULL; 286 287 kvfree(zram->bitmap); 288 zram->bitmap = NULL; 289 } 290 291 static ssize_t backing_dev_show(struct device *dev, 292 struct device_attribute *attr, char *buf) 293 { 294 struct zram *zram = dev_to_zram(dev); 295 struct file *file = zram->backing_dev; 296 char *p; 297 ssize_t ret; 298 299 down_read(&zram->init_lock); 300 if (!zram_wb_enabled(zram)) { 301 memcpy(buf, "none\n", 5); 302 up_read(&zram->init_lock); 303 return 5; 304 } 305 306 p = file_path(file, buf, PAGE_SIZE - 1); 307 if (IS_ERR(p)) { 308 ret = PTR_ERR(p); 309 goto out; 310 } 311 312 ret = strlen(p); 313 memmove(buf, p, ret); 314 buf[ret++] = '\n'; 315 out: 316 up_read(&zram->init_lock); 317 return ret; 318 } 319 320 static ssize_t backing_dev_store(struct device *dev, 321 struct device_attribute *attr, const char *buf, size_t len) 322 { 323 char *file_name; 324 struct file *backing_dev = NULL; 325 struct inode *inode; 326 struct address_space *mapping; 327 unsigned int bitmap_sz, old_block_size = 0; 328 unsigned long nr_pages, *bitmap = NULL; 329 struct block_device *bdev = NULL; 330 int err; 331 struct zram *zram = dev_to_zram(dev); 332 333 file_name = kmalloc(PATH_MAX, GFP_KERNEL); 334 if (!file_name) 335 return -ENOMEM; 336 337 down_write(&zram->init_lock); 338 if (init_done(zram)) { 339 pr_info("Can't setup backing device for initialized device\n"); 340 err = -EBUSY; 341 goto out; 342 } 343 344 strlcpy(file_name, buf, len); 345 346 backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); 347 if (IS_ERR(backing_dev)) { 348 err = PTR_ERR(backing_dev); 349 backing_dev = NULL; 350 goto out; 351 } 352 353 mapping = backing_dev->f_mapping; 354 inode = mapping->host; 355 356 /* Support only block device in this moment */ 357 if (!S_ISBLK(inode->i_mode)) { 358 err = -ENOTBLK; 359 goto out; 360 } 361 362 bdev = bdgrab(I_BDEV(inode)); 363 err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); 364 if (err < 0) 365 goto out; 366 367 nr_pages = i_size_read(inode) >> PAGE_SHIFT; 368 bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); 369 bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); 370 if (!bitmap) { 371 err = -ENOMEM; 372 goto out; 373 } 374 375 old_block_size = block_size(bdev); 376 err = set_blocksize(bdev, PAGE_SIZE); 377 if (err) 378 goto out; 379 380 reset_bdev(zram); 381 spin_lock_init(&zram->bitmap_lock); 382 383 zram->old_block_size = old_block_size; 384 zram->bdev = bdev; 385 zram->backing_dev = backing_dev; 386 zram->bitmap = bitmap; 387 zram->nr_pages = nr_pages; 388 up_write(&zram->init_lock); 389 390 pr_info("setup backing device %s\n", file_name); 391 kfree(file_name); 392 393 return len; 394 out: 395 if (bitmap) 396 kvfree(bitmap); 397 398 if (bdev) 399 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 400 401 if (backing_dev) 402 filp_close(backing_dev, NULL); 403 404 up_write(&zram->init_lock); 405 406 kfree(file_name); 407 408 return err; 409 } 410 411 static unsigned long get_entry_bdev(struct zram *zram) 412 { 413 unsigned long entry; 414 415 spin_lock(&zram->bitmap_lock); 416 /* skip 0 bit to confuse zram.handle = 0 */ 417 entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1); 418 if (entry == zram->nr_pages) { 419 spin_unlock(&zram->bitmap_lock); 420 return 0; 421 } 422 423 set_bit(entry, zram->bitmap); 424 spin_unlock(&zram->bitmap_lock); 425 426 return entry; 427 } 428 429 static void put_entry_bdev(struct zram *zram, unsigned long entry) 430 { 431 int was_set; 432 433 spin_lock(&zram->bitmap_lock); 434 was_set = test_and_clear_bit(entry, zram->bitmap); 435 spin_unlock(&zram->bitmap_lock); 436 WARN_ON_ONCE(!was_set); 437 } 438 439 void zram_page_end_io(struct bio *bio) 440 { 441 struct page *page = bio->bi_io_vec[0].bv_page; 442 443 page_endio(page, op_is_write(bio_op(bio)), 444 blk_status_to_errno(bio->bi_status)); 445 bio_put(bio); 446 } 447 448 /* 449 * Returns 1 if the submission is successful. 450 */ 451 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, 452 unsigned long entry, struct bio *parent) 453 { 454 struct bio *bio; 455 456 bio = bio_alloc(GFP_ATOMIC, 1); 457 if (!bio) 458 return -ENOMEM; 459 460 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 461 bio_set_dev(bio, zram->bdev); 462 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { 463 bio_put(bio); 464 return -EIO; 465 } 466 467 if (!parent) { 468 bio->bi_opf = REQ_OP_READ; 469 bio->bi_end_io = zram_page_end_io; 470 } else { 471 bio->bi_opf = parent->bi_opf; 472 bio_chain(bio, parent); 473 } 474 475 submit_bio(bio); 476 return 1; 477 } 478 479 struct zram_work { 480 struct work_struct work; 481 struct zram *zram; 482 unsigned long entry; 483 struct bio *bio; 484 }; 485 486 #if PAGE_SIZE != 4096 487 static void zram_sync_read(struct work_struct *work) 488 { 489 struct bio_vec bvec; 490 struct zram_work *zw = container_of(work, struct zram_work, work); 491 struct zram *zram = zw->zram; 492 unsigned long entry = zw->entry; 493 struct bio *bio = zw->bio; 494 495 read_from_bdev_async(zram, &bvec, entry, bio); 496 } 497 498 /* 499 * Block layer want one ->make_request_fn to be active at a time 500 * so if we use chained IO with parent IO in same context, 501 * it's a deadlock. To avoid, it, it uses worker thread context. 502 */ 503 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 504 unsigned long entry, struct bio *bio) 505 { 506 struct zram_work work; 507 508 work.zram = zram; 509 work.entry = entry; 510 work.bio = bio; 511 512 INIT_WORK_ONSTACK(&work.work, zram_sync_read); 513 queue_work(system_unbound_wq, &work.work); 514 flush_work(&work.work); 515 destroy_work_on_stack(&work.work); 516 517 return 1; 518 } 519 #else 520 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, 521 unsigned long entry, struct bio *bio) 522 { 523 WARN_ON(1); 524 return -EIO; 525 } 526 #endif 527 528 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 529 unsigned long entry, struct bio *parent, bool sync) 530 { 531 if (sync) 532 return read_from_bdev_sync(zram, bvec, entry, parent); 533 else 534 return read_from_bdev_async(zram, bvec, entry, parent); 535 } 536 537 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, 538 u32 index, struct bio *parent, 539 unsigned long *pentry) 540 { 541 struct bio *bio; 542 unsigned long entry; 543 544 bio = bio_alloc(GFP_ATOMIC, 1); 545 if (!bio) 546 return -ENOMEM; 547 548 entry = get_entry_bdev(zram); 549 if (!entry) { 550 bio_put(bio); 551 return -ENOSPC; 552 } 553 554 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 555 bio_set_dev(bio, zram->bdev); 556 if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, 557 bvec->bv_offset)) { 558 bio_put(bio); 559 put_entry_bdev(zram, entry); 560 return -EIO; 561 } 562 563 if (!parent) { 564 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; 565 bio->bi_end_io = zram_page_end_io; 566 } else { 567 bio->bi_opf = parent->bi_opf; 568 bio_chain(bio, parent); 569 } 570 571 submit_bio(bio); 572 *pentry = entry; 573 574 return 0; 575 } 576 577 static void zram_wb_clear(struct zram *zram, u32 index) 578 { 579 unsigned long entry; 580 581 zram_clear_flag(zram, index, ZRAM_WB); 582 entry = zram_get_element(zram, index); 583 zram_set_element(zram, index, 0); 584 put_entry_bdev(zram, entry); 585 } 586 587 #else 588 static bool zram_wb_enabled(struct zram *zram) { return false; } 589 static inline void reset_bdev(struct zram *zram) {}; 590 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, 591 u32 index, struct bio *parent, 592 unsigned long *pentry) 593 594 { 595 return -EIO; 596 } 597 598 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, 599 unsigned long entry, struct bio *parent, bool sync) 600 { 601 return -EIO; 602 } 603 static void zram_wb_clear(struct zram *zram, u32 index) {} 604 #endif 605 606 607 /* 608 * We switched to per-cpu streams and this attr is not needed anymore. 609 * However, we will keep it around for some time, because: 610 * a) we may revert per-cpu streams in the future 611 * b) it's visible to user space and we need to follow our 2 years 612 * retirement rule; but we already have a number of 'soon to be 613 * altered' attrs, so max_comp_streams need to wait for the next 614 * layoff cycle. 615 */ 616 static ssize_t max_comp_streams_show(struct device *dev, 617 struct device_attribute *attr, char *buf) 618 { 619 return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus()); 620 } 621 622 static ssize_t max_comp_streams_store(struct device *dev, 623 struct device_attribute *attr, const char *buf, size_t len) 624 { 625 return len; 626 } 627 628 static ssize_t comp_algorithm_show(struct device *dev, 629 struct device_attribute *attr, char *buf) 630 { 631 size_t sz; 632 struct zram *zram = dev_to_zram(dev); 633 634 down_read(&zram->init_lock); 635 sz = zcomp_available_show(zram->compressor, buf); 636 up_read(&zram->init_lock); 637 638 return sz; 639 } 640 641 static ssize_t comp_algorithm_store(struct device *dev, 642 struct device_attribute *attr, const char *buf, size_t len) 643 { 644 struct zram *zram = dev_to_zram(dev); 645 char compressor[ARRAY_SIZE(zram->compressor)]; 646 size_t sz; 647 648 strlcpy(compressor, buf, sizeof(compressor)); 649 /* ignore trailing newline */ 650 sz = strlen(compressor); 651 if (sz > 0 && compressor[sz - 1] == '\n') 652 compressor[sz - 1] = 0x00; 653 654 if (!zcomp_available_algorithm(compressor)) 655 return -EINVAL; 656 657 down_write(&zram->init_lock); 658 if (init_done(zram)) { 659 up_write(&zram->init_lock); 660 pr_info("Can't change algorithm for initialized device\n"); 661 return -EBUSY; 662 } 663 664 strcpy(zram->compressor, compressor); 665 up_write(&zram->init_lock); 666 return len; 667 } 668 669 static ssize_t compact_store(struct device *dev, 670 struct device_attribute *attr, const char *buf, size_t len) 671 { 672 struct zram *zram = dev_to_zram(dev); 673 674 down_read(&zram->init_lock); 675 if (!init_done(zram)) { 676 up_read(&zram->init_lock); 677 return -EINVAL; 678 } 679 680 zs_compact(zram->mem_pool); 681 up_read(&zram->init_lock); 682 683 return len; 684 } 685 686 static ssize_t io_stat_show(struct device *dev, 687 struct device_attribute *attr, char *buf) 688 { 689 struct zram *zram = dev_to_zram(dev); 690 ssize_t ret; 691 692 down_read(&zram->init_lock); 693 ret = scnprintf(buf, PAGE_SIZE, 694 "%8llu %8llu %8llu %8llu\n", 695 (u64)atomic64_read(&zram->stats.failed_reads), 696 (u64)atomic64_read(&zram->stats.failed_writes), 697 (u64)atomic64_read(&zram->stats.invalid_io), 698 (u64)atomic64_read(&zram->stats.notify_free)); 699 up_read(&zram->init_lock); 700 701 return ret; 702 } 703 704 static ssize_t mm_stat_show(struct device *dev, 705 struct device_attribute *attr, char *buf) 706 { 707 struct zram *zram = dev_to_zram(dev); 708 struct zs_pool_stats pool_stats; 709 u64 orig_size, mem_used = 0; 710 long max_used; 711 ssize_t ret; 712 713 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 714 715 down_read(&zram->init_lock); 716 if (init_done(zram)) { 717 mem_used = zs_get_total_pages(zram->mem_pool); 718 zs_pool_stats(zram->mem_pool, &pool_stats); 719 } 720 721 orig_size = atomic64_read(&zram->stats.pages_stored); 722 max_used = atomic_long_read(&zram->stats.max_used_pages); 723 724 ret = scnprintf(buf, PAGE_SIZE, 725 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", 726 orig_size << PAGE_SHIFT, 727 (u64)atomic64_read(&zram->stats.compr_data_size), 728 mem_used << PAGE_SHIFT, 729 zram->limit_pages << PAGE_SHIFT, 730 max_used << PAGE_SHIFT, 731 (u64)atomic64_read(&zram->stats.same_pages), 732 pool_stats.pages_compacted); 733 up_read(&zram->init_lock); 734 735 return ret; 736 } 737 738 static ssize_t debug_stat_show(struct device *dev, 739 struct device_attribute *attr, char *buf) 740 { 741 int version = 1; 742 struct zram *zram = dev_to_zram(dev); 743 ssize_t ret; 744 745 down_read(&zram->init_lock); 746 ret = scnprintf(buf, PAGE_SIZE, 747 "version: %d\n%8llu\n", 748 version, 749 (u64)atomic64_read(&zram->stats.writestall)); 750 up_read(&zram->init_lock); 751 752 return ret; 753 } 754 755 static DEVICE_ATTR_RO(io_stat); 756 static DEVICE_ATTR_RO(mm_stat); 757 static DEVICE_ATTR_RO(debug_stat); 758 759 static void zram_slot_lock(struct zram *zram, u32 index) 760 { 761 bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value); 762 } 763 764 static void zram_slot_unlock(struct zram *zram, u32 index) 765 { 766 bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); 767 } 768 769 static bool zram_same_page_read(struct zram *zram, u32 index, 770 struct page *page, 771 unsigned int offset, unsigned int len) 772 { 773 zram_slot_lock(zram, index); 774 if (unlikely(!zram_get_handle(zram, index) || 775 zram_test_flag(zram, index, ZRAM_SAME))) { 776 void *mem; 777 778 zram_slot_unlock(zram, index); 779 mem = kmap_atomic(page); 780 zram_fill_page(mem + offset, len, 781 zram_get_element(zram, index)); 782 kunmap_atomic(mem); 783 return true; 784 } 785 zram_slot_unlock(zram, index); 786 787 return false; 788 } 789 790 static void zram_meta_free(struct zram *zram, u64 disksize) 791 { 792 size_t num_pages = disksize >> PAGE_SHIFT; 793 size_t index; 794 795 /* Free all pages that are still in this zram device */ 796 for (index = 0; index < num_pages; index++) 797 zram_free_page(zram, index); 798 799 zs_destroy_pool(zram->mem_pool); 800 vfree(zram->table); 801 } 802 803 static bool zram_meta_alloc(struct zram *zram, u64 disksize) 804 { 805 size_t num_pages; 806 807 num_pages = disksize >> PAGE_SHIFT; 808 zram->table = vzalloc(num_pages * sizeof(*zram->table)); 809 if (!zram->table) 810 return false; 811 812 zram->mem_pool = zs_create_pool(zram->disk->disk_name); 813 if (!zram->mem_pool) { 814 vfree(zram->table); 815 return false; 816 } 817 818 return true; 819 } 820 821 /* 822 * To protect concurrent access to the same index entry, 823 * caller should hold this table index entry's bit_spinlock to 824 * indicate this index entry is accessing. 825 */ 826 static void zram_free_page(struct zram *zram, size_t index) 827 { 828 unsigned long handle; 829 830 if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { 831 zram_wb_clear(zram, index); 832 atomic64_dec(&zram->stats.pages_stored); 833 return; 834 } 835 836 /* 837 * No memory is allocated for same element filled pages. 838 * Simply clear same page flag. 839 */ 840 if (zram_test_flag(zram, index, ZRAM_SAME)) { 841 zram_clear_flag(zram, index, ZRAM_SAME); 842 zram_set_element(zram, index, 0); 843 atomic64_dec(&zram->stats.same_pages); 844 atomic64_dec(&zram->stats.pages_stored); 845 return; 846 } 847 848 handle = zram_get_handle(zram, index); 849 if (!handle) 850 return; 851 852 zs_free(zram->mem_pool, handle); 853 854 atomic64_sub(zram_get_obj_size(zram, index), 855 &zram->stats.compr_data_size); 856 atomic64_dec(&zram->stats.pages_stored); 857 858 zram_set_handle(zram, index, 0); 859 zram_set_obj_size(zram, index, 0); 860 } 861 862 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, 863 struct bio *bio, bool partial_io) 864 { 865 int ret; 866 unsigned long handle; 867 unsigned int size; 868 void *src, *dst; 869 870 if (zram_wb_enabled(zram)) { 871 zram_slot_lock(zram, index); 872 if (zram_test_flag(zram, index, ZRAM_WB)) { 873 struct bio_vec bvec; 874 875 zram_slot_unlock(zram, index); 876 877 bvec.bv_page = page; 878 bvec.bv_len = PAGE_SIZE; 879 bvec.bv_offset = 0; 880 return read_from_bdev(zram, &bvec, 881 zram_get_element(zram, index), 882 bio, partial_io); 883 } 884 zram_slot_unlock(zram, index); 885 } 886 887 if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE)) 888 return 0; 889 890 zram_slot_lock(zram, index); 891 handle = zram_get_handle(zram, index); 892 size = zram_get_obj_size(zram, index); 893 894 src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); 895 if (size == PAGE_SIZE) { 896 dst = kmap_atomic(page); 897 memcpy(dst, src, PAGE_SIZE); 898 kunmap_atomic(dst); 899 ret = 0; 900 } else { 901 struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); 902 903 dst = kmap_atomic(page); 904 ret = zcomp_decompress(zstrm, src, size, dst); 905 kunmap_atomic(dst); 906 zcomp_stream_put(zram->comp); 907 } 908 zs_unmap_object(zram->mem_pool, handle); 909 zram_slot_unlock(zram, index); 910 911 /* Should NEVER happen. Return bio error if it does. */ 912 if (unlikely(ret)) 913 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 914 915 return ret; 916 } 917 918 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 919 u32 index, int offset, struct bio *bio) 920 { 921 int ret; 922 struct page *page; 923 924 page = bvec->bv_page; 925 if (is_partial_io(bvec)) { 926 /* Use a temporary buffer to decompress the page */ 927 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 928 if (!page) 929 return -ENOMEM; 930 } 931 932 ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); 933 if (unlikely(ret)) 934 goto out; 935 936 if (is_partial_io(bvec)) { 937 void *dst = kmap_atomic(bvec->bv_page); 938 void *src = kmap_atomic(page); 939 940 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len); 941 kunmap_atomic(src); 942 kunmap_atomic(dst); 943 } 944 out: 945 if (is_partial_io(bvec)) 946 __free_page(page); 947 948 return ret; 949 } 950 951 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 952 u32 index, struct bio *bio) 953 { 954 int ret = 0; 955 unsigned long alloced_pages; 956 unsigned long handle = 0; 957 unsigned int comp_len = 0; 958 void *src, *dst, *mem; 959 struct zcomp_strm *zstrm; 960 struct page *page = bvec->bv_page; 961 unsigned long element = 0; 962 enum zram_pageflags flags = 0; 963 bool allow_wb = true; 964 965 mem = kmap_atomic(page); 966 if (page_same_filled(mem, &element)) { 967 kunmap_atomic(mem); 968 /* Free memory associated with this sector now. */ 969 flags = ZRAM_SAME; 970 atomic64_inc(&zram->stats.same_pages); 971 goto out; 972 } 973 kunmap_atomic(mem); 974 975 compress_again: 976 zstrm = zcomp_stream_get(zram->comp); 977 src = kmap_atomic(page); 978 ret = zcomp_compress(zstrm, src, &comp_len); 979 kunmap_atomic(src); 980 981 if (unlikely(ret)) { 982 zcomp_stream_put(zram->comp); 983 pr_err("Compression failed! err=%d\n", ret); 984 zs_free(zram->mem_pool, handle); 985 return ret; 986 } 987 988 if (unlikely(comp_len > max_zpage_size)) { 989 if (zram_wb_enabled(zram) && allow_wb) { 990 zcomp_stream_put(zram->comp); 991 ret = write_to_bdev(zram, bvec, index, bio, &element); 992 if (!ret) { 993 flags = ZRAM_WB; 994 ret = 1; 995 goto out; 996 } 997 allow_wb = false; 998 goto compress_again; 999 } 1000 comp_len = PAGE_SIZE; 1001 } 1002 1003 /* 1004 * handle allocation has 2 paths: 1005 * a) fast path is executed with preemption disabled (for 1006 * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear, 1007 * since we can't sleep; 1008 * b) slow path enables preemption and attempts to allocate 1009 * the page with __GFP_DIRECT_RECLAIM bit set. we have to 1010 * put per-cpu compression stream and, thus, to re-do 1011 * the compression once handle is allocated. 1012 * 1013 * if we have a 'non-null' handle here then we are coming 1014 * from the slow path and handle has already been allocated. 1015 */ 1016 if (!handle) 1017 handle = zs_malloc(zram->mem_pool, comp_len, 1018 __GFP_KSWAPD_RECLAIM | 1019 __GFP_NOWARN | 1020 __GFP_HIGHMEM | 1021 __GFP_MOVABLE); 1022 if (!handle) { 1023 zcomp_stream_put(zram->comp); 1024 atomic64_inc(&zram->stats.writestall); 1025 handle = zs_malloc(zram->mem_pool, comp_len, 1026 GFP_NOIO | __GFP_HIGHMEM | 1027 __GFP_MOVABLE); 1028 if (handle) 1029 goto compress_again; 1030 return -ENOMEM; 1031 } 1032 1033 alloced_pages = zs_get_total_pages(zram->mem_pool); 1034 update_used_max(zram, alloced_pages); 1035 1036 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 1037 zcomp_stream_put(zram->comp); 1038 zs_free(zram->mem_pool, handle); 1039 return -ENOMEM; 1040 } 1041 1042 dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); 1043 1044 src = zstrm->buffer; 1045 if (comp_len == PAGE_SIZE) 1046 src = kmap_atomic(page); 1047 memcpy(dst, src, comp_len); 1048 if (comp_len == PAGE_SIZE) 1049 kunmap_atomic(src); 1050 1051 zcomp_stream_put(zram->comp); 1052 zs_unmap_object(zram->mem_pool, handle); 1053 atomic64_add(comp_len, &zram->stats.compr_data_size); 1054 out: 1055 /* 1056 * Free memory associated with this sector 1057 * before overwriting unused sectors. 1058 */ 1059 zram_slot_lock(zram, index); 1060 zram_free_page(zram, index); 1061 1062 if (flags) { 1063 zram_set_flag(zram, index, flags); 1064 zram_set_element(zram, index, element); 1065 } else { 1066 zram_set_handle(zram, index, handle); 1067 zram_set_obj_size(zram, index, comp_len); 1068 } 1069 zram_slot_unlock(zram, index); 1070 1071 /* Update stats */ 1072 atomic64_inc(&zram->stats.pages_stored); 1073 return ret; 1074 } 1075 1076 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, 1077 u32 index, int offset, struct bio *bio) 1078 { 1079 int ret; 1080 struct page *page = NULL; 1081 void *src; 1082 struct bio_vec vec; 1083 1084 vec = *bvec; 1085 if (is_partial_io(bvec)) { 1086 void *dst; 1087 /* 1088 * This is a partial IO. We need to read the full page 1089 * before to write the changes. 1090 */ 1091 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM); 1092 if (!page) 1093 return -ENOMEM; 1094 1095 ret = __zram_bvec_read(zram, page, index, bio, true); 1096 if (ret) 1097 goto out; 1098 1099 src = kmap_atomic(bvec->bv_page); 1100 dst = kmap_atomic(page); 1101 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len); 1102 kunmap_atomic(dst); 1103 kunmap_atomic(src); 1104 1105 vec.bv_page = page; 1106 vec.bv_len = PAGE_SIZE; 1107 vec.bv_offset = 0; 1108 } 1109 1110 ret = __zram_bvec_write(zram, &vec, index, bio); 1111 out: 1112 if (is_partial_io(bvec)) 1113 __free_page(page); 1114 return ret; 1115 } 1116 1117 /* 1118 * zram_bio_discard - handler on discard request 1119 * @index: physical block index in PAGE_SIZE units 1120 * @offset: byte offset within physical block 1121 */ 1122 static void zram_bio_discard(struct zram *zram, u32 index, 1123 int offset, struct bio *bio) 1124 { 1125 size_t n = bio->bi_iter.bi_size; 1126 1127 /* 1128 * zram manages data in physical block size units. Because logical block 1129 * size isn't identical with physical block size on some arch, we 1130 * could get a discard request pointing to a specific offset within a 1131 * certain physical block. Although we can handle this request by 1132 * reading that physiclal block and decompressing and partially zeroing 1133 * and re-compressing and then re-storing it, this isn't reasonable 1134 * because our intent with a discard request is to save memory. So 1135 * skipping this logical block is appropriate here. 1136 */ 1137 if (offset) { 1138 if (n <= (PAGE_SIZE - offset)) 1139 return; 1140 1141 n -= (PAGE_SIZE - offset); 1142 index++; 1143 } 1144 1145 while (n >= PAGE_SIZE) { 1146 zram_slot_lock(zram, index); 1147 zram_free_page(zram, index); 1148 zram_slot_unlock(zram, index); 1149 atomic64_inc(&zram->stats.notify_free); 1150 index++; 1151 n -= PAGE_SIZE; 1152 } 1153 } 1154 1155 /* 1156 * Returns errno if it has some problem. Otherwise return 0 or 1. 1157 * Returns 0 if IO request was done synchronously 1158 * Returns 1 if IO request was successfully submitted. 1159 */ 1160 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 1161 int offset, bool is_write, struct bio *bio) 1162 { 1163 unsigned long start_time = jiffies; 1164 int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; 1165 struct request_queue *q = zram->disk->queue; 1166 int ret; 1167 1168 generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT, 1169 &zram->disk->part0); 1170 1171 if (!is_write) { 1172 atomic64_inc(&zram->stats.num_reads); 1173 ret = zram_bvec_read(zram, bvec, index, offset, bio); 1174 flush_dcache_page(bvec->bv_page); 1175 } else { 1176 atomic64_inc(&zram->stats.num_writes); 1177 ret = zram_bvec_write(zram, bvec, index, offset, bio); 1178 } 1179 1180 generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time); 1181 1182 if (unlikely(ret < 0)) { 1183 if (!is_write) 1184 atomic64_inc(&zram->stats.failed_reads); 1185 else 1186 atomic64_inc(&zram->stats.failed_writes); 1187 } 1188 1189 return ret; 1190 } 1191 1192 static void __zram_make_request(struct zram *zram, struct bio *bio) 1193 { 1194 int offset; 1195 u32 index; 1196 struct bio_vec bvec; 1197 struct bvec_iter iter; 1198 1199 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 1200 offset = (bio->bi_iter.bi_sector & 1201 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1202 1203 switch (bio_op(bio)) { 1204 case REQ_OP_DISCARD: 1205 case REQ_OP_WRITE_ZEROES: 1206 zram_bio_discard(zram, index, offset, bio); 1207 bio_endio(bio); 1208 return; 1209 default: 1210 break; 1211 } 1212 1213 bio_for_each_segment(bvec, bio, iter) { 1214 struct bio_vec bv = bvec; 1215 unsigned int unwritten = bvec.bv_len; 1216 1217 do { 1218 bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, 1219 unwritten); 1220 if (zram_bvec_rw(zram, &bv, index, offset, 1221 op_is_write(bio_op(bio)), bio) < 0) 1222 goto out; 1223 1224 bv.bv_offset += bv.bv_len; 1225 unwritten -= bv.bv_len; 1226 1227 update_position(&index, &offset, &bv); 1228 } while (unwritten); 1229 } 1230 1231 bio_endio(bio); 1232 return; 1233 1234 out: 1235 bio_io_error(bio); 1236 } 1237 1238 /* 1239 * Handler function for all zram I/O requests. 1240 */ 1241 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) 1242 { 1243 struct zram *zram = queue->queuedata; 1244 1245 if (!valid_io_request(zram, bio->bi_iter.bi_sector, 1246 bio->bi_iter.bi_size)) { 1247 atomic64_inc(&zram->stats.invalid_io); 1248 goto error; 1249 } 1250 1251 __zram_make_request(zram, bio); 1252 return BLK_QC_T_NONE; 1253 1254 error: 1255 bio_io_error(bio); 1256 return BLK_QC_T_NONE; 1257 } 1258 1259 static void zram_slot_free_notify(struct block_device *bdev, 1260 unsigned long index) 1261 { 1262 struct zram *zram; 1263 1264 zram = bdev->bd_disk->private_data; 1265 1266 zram_slot_lock(zram, index); 1267 zram_free_page(zram, index); 1268 zram_slot_unlock(zram, index); 1269 atomic64_inc(&zram->stats.notify_free); 1270 } 1271 1272 static int zram_rw_page(struct block_device *bdev, sector_t sector, 1273 struct page *page, bool is_write) 1274 { 1275 int offset, ret; 1276 u32 index; 1277 struct zram *zram; 1278 struct bio_vec bv; 1279 1280 if (PageTransHuge(page)) 1281 return -ENOTSUPP; 1282 zram = bdev->bd_disk->private_data; 1283 1284 if (!valid_io_request(zram, sector, PAGE_SIZE)) { 1285 atomic64_inc(&zram->stats.invalid_io); 1286 ret = -EINVAL; 1287 goto out; 1288 } 1289 1290 index = sector >> SECTORS_PER_PAGE_SHIFT; 1291 offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 1292 1293 bv.bv_page = page; 1294 bv.bv_len = PAGE_SIZE; 1295 bv.bv_offset = 0; 1296 1297 ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL); 1298 out: 1299 /* 1300 * If I/O fails, just return error(ie, non-zero) without 1301 * calling page_endio. 1302 * It causes resubmit the I/O with bio request by upper functions 1303 * of rw_page(e.g., swap_readpage, __swap_writepage) and 1304 * bio->bi_end_io does things to handle the error 1305 * (e.g., SetPageError, set_page_dirty and extra works). 1306 */ 1307 if (unlikely(ret < 0)) 1308 return ret; 1309 1310 switch (ret) { 1311 case 0: 1312 page_endio(page, is_write, 0); 1313 break; 1314 case 1: 1315 ret = 0; 1316 break; 1317 default: 1318 WARN_ON(1); 1319 } 1320 return ret; 1321 } 1322 1323 static void zram_reset_device(struct zram *zram) 1324 { 1325 struct zcomp *comp; 1326 u64 disksize; 1327 1328 down_write(&zram->init_lock); 1329 1330 zram->limit_pages = 0; 1331 1332 if (!init_done(zram)) { 1333 up_write(&zram->init_lock); 1334 return; 1335 } 1336 1337 comp = zram->comp; 1338 disksize = zram->disksize; 1339 zram->disksize = 0; 1340 1341 set_capacity(zram->disk, 0); 1342 part_stat_set_all(&zram->disk->part0, 0); 1343 1344 up_write(&zram->init_lock); 1345 /* I/O operation under all of CPU are done so let's free */ 1346 zram_meta_free(zram, disksize); 1347 memset(&zram->stats, 0, sizeof(zram->stats)); 1348 zcomp_destroy(comp); 1349 reset_bdev(zram); 1350 } 1351 1352 static ssize_t disksize_store(struct device *dev, 1353 struct device_attribute *attr, const char *buf, size_t len) 1354 { 1355 u64 disksize; 1356 struct zcomp *comp; 1357 struct zram *zram = dev_to_zram(dev); 1358 int err; 1359 1360 disksize = memparse(buf, NULL); 1361 if (!disksize) 1362 return -EINVAL; 1363 1364 down_write(&zram->init_lock); 1365 if (init_done(zram)) { 1366 pr_info("Cannot change disksize for initialized device\n"); 1367 err = -EBUSY; 1368 goto out_unlock; 1369 } 1370 1371 disksize = PAGE_ALIGN(disksize); 1372 if (!zram_meta_alloc(zram, disksize)) { 1373 err = -ENOMEM; 1374 goto out_unlock; 1375 } 1376 1377 comp = zcomp_create(zram->compressor); 1378 if (IS_ERR(comp)) { 1379 pr_err("Cannot initialise %s compressing backend\n", 1380 zram->compressor); 1381 err = PTR_ERR(comp); 1382 goto out_free_meta; 1383 } 1384 1385 zram->comp = comp; 1386 zram->disksize = disksize; 1387 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); 1388 zram_revalidate_disk(zram); 1389 up_write(&zram->init_lock); 1390 1391 return len; 1392 1393 out_free_meta: 1394 zram_meta_free(zram, disksize); 1395 out_unlock: 1396 up_write(&zram->init_lock); 1397 return err; 1398 } 1399 1400 static ssize_t reset_store(struct device *dev, 1401 struct device_attribute *attr, const char *buf, size_t len) 1402 { 1403 int ret; 1404 unsigned short do_reset; 1405 struct zram *zram; 1406 struct block_device *bdev; 1407 1408 ret = kstrtou16(buf, 10, &do_reset); 1409 if (ret) 1410 return ret; 1411 1412 if (!do_reset) 1413 return -EINVAL; 1414 1415 zram = dev_to_zram(dev); 1416 bdev = bdget_disk(zram->disk, 0); 1417 if (!bdev) 1418 return -ENOMEM; 1419 1420 mutex_lock(&bdev->bd_mutex); 1421 /* Do not reset an active device or claimed device */ 1422 if (bdev->bd_openers || zram->claim) { 1423 mutex_unlock(&bdev->bd_mutex); 1424 bdput(bdev); 1425 return -EBUSY; 1426 } 1427 1428 /* From now on, anyone can't open /dev/zram[0-9] */ 1429 zram->claim = true; 1430 mutex_unlock(&bdev->bd_mutex); 1431 1432 /* Make sure all the pending I/O are finished */ 1433 fsync_bdev(bdev); 1434 zram_reset_device(zram); 1435 zram_revalidate_disk(zram); 1436 bdput(bdev); 1437 1438 mutex_lock(&bdev->bd_mutex); 1439 zram->claim = false; 1440 mutex_unlock(&bdev->bd_mutex); 1441 1442 return len; 1443 } 1444 1445 static int zram_open(struct block_device *bdev, fmode_t mode) 1446 { 1447 int ret = 0; 1448 struct zram *zram; 1449 1450 WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); 1451 1452 zram = bdev->bd_disk->private_data; 1453 /* zram was claimed to reset so open request fails */ 1454 if (zram->claim) 1455 ret = -EBUSY; 1456 1457 return ret; 1458 } 1459 1460 static const struct block_device_operations zram_devops = { 1461 .open = zram_open, 1462 .swap_slot_free_notify = zram_slot_free_notify, 1463 .rw_page = zram_rw_page, 1464 .owner = THIS_MODULE 1465 }; 1466 1467 static DEVICE_ATTR_WO(compact); 1468 static DEVICE_ATTR_RW(disksize); 1469 static DEVICE_ATTR_RO(initstate); 1470 static DEVICE_ATTR_WO(reset); 1471 static DEVICE_ATTR_WO(mem_limit); 1472 static DEVICE_ATTR_WO(mem_used_max); 1473 static DEVICE_ATTR_RW(max_comp_streams); 1474 static DEVICE_ATTR_RW(comp_algorithm); 1475 #ifdef CONFIG_ZRAM_WRITEBACK 1476 static DEVICE_ATTR_RW(backing_dev); 1477 #endif 1478 1479 static struct attribute *zram_disk_attrs[] = { 1480 &dev_attr_disksize.attr, 1481 &dev_attr_initstate.attr, 1482 &dev_attr_reset.attr, 1483 &dev_attr_compact.attr, 1484 &dev_attr_mem_limit.attr, 1485 &dev_attr_mem_used_max.attr, 1486 &dev_attr_max_comp_streams.attr, 1487 &dev_attr_comp_algorithm.attr, 1488 #ifdef CONFIG_ZRAM_WRITEBACK 1489 &dev_attr_backing_dev.attr, 1490 #endif 1491 &dev_attr_io_stat.attr, 1492 &dev_attr_mm_stat.attr, 1493 &dev_attr_debug_stat.attr, 1494 NULL, 1495 }; 1496 1497 static const struct attribute_group zram_disk_attr_group = { 1498 .attrs = zram_disk_attrs, 1499 }; 1500 1501 /* 1502 * Allocate and initialize new zram device. the function returns 1503 * '>= 0' device_id upon success, and negative value otherwise. 1504 */ 1505 static int zram_add(void) 1506 { 1507 struct zram *zram; 1508 struct request_queue *queue; 1509 int ret, device_id; 1510 1511 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 1512 if (!zram) 1513 return -ENOMEM; 1514 1515 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 1516 if (ret < 0) 1517 goto out_free_dev; 1518 device_id = ret; 1519 1520 init_rwsem(&zram->init_lock); 1521 1522 queue = blk_alloc_queue(GFP_KERNEL); 1523 if (!queue) { 1524 pr_err("Error allocating disk queue for device %d\n", 1525 device_id); 1526 ret = -ENOMEM; 1527 goto out_free_idr; 1528 } 1529 1530 blk_queue_make_request(queue, zram_make_request); 1531 1532 /* gendisk structure */ 1533 zram->disk = alloc_disk(1); 1534 if (!zram->disk) { 1535 pr_err("Error allocating disk structure for device %d\n", 1536 device_id); 1537 ret = -ENOMEM; 1538 goto out_free_queue; 1539 } 1540 1541 zram->disk->major = zram_major; 1542 zram->disk->first_minor = device_id; 1543 zram->disk->fops = &zram_devops; 1544 zram->disk->queue = queue; 1545 zram->disk->queue->queuedata = zram; 1546 zram->disk->private_data = zram; 1547 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 1548 1549 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1550 set_capacity(zram->disk, 0); 1551 /* zram devices sort of resembles non-rotational disks */ 1552 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); 1553 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1554 /* 1555 * To ensure that we always get PAGE_SIZE aligned 1556 * and n*PAGE_SIZED sized I/O requests. 1557 */ 1558 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 1559 blk_queue_logical_block_size(zram->disk->queue, 1560 ZRAM_LOGICAL_BLOCK_SIZE); 1561 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 1562 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1563 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1564 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 1565 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); 1566 1567 /* 1568 * zram_bio_discard() will clear all logical blocks if logical block 1569 * size is identical with physical block size(PAGE_SIZE). But if it is 1570 * different, we will skip discarding some parts of logical blocks in 1571 * the part of the request range which isn't aligned to physical block 1572 * size. So we can't ensure that all discarded logical blocks are 1573 * zeroed. 1574 */ 1575 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 1576 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX); 1577 1578 add_disk(zram->disk); 1579 1580 ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, 1581 &zram_disk_attr_group); 1582 if (ret < 0) { 1583 pr_err("Error creating sysfs group for device %d\n", 1584 device_id); 1585 goto out_free_disk; 1586 } 1587 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1588 1589 pr_info("Added device: %s\n", zram->disk->disk_name); 1590 return device_id; 1591 1592 out_free_disk: 1593 del_gendisk(zram->disk); 1594 put_disk(zram->disk); 1595 out_free_queue: 1596 blk_cleanup_queue(queue); 1597 out_free_idr: 1598 idr_remove(&zram_index_idr, device_id); 1599 out_free_dev: 1600 kfree(zram); 1601 return ret; 1602 } 1603 1604 static int zram_remove(struct zram *zram) 1605 { 1606 struct block_device *bdev; 1607 1608 bdev = bdget_disk(zram->disk, 0); 1609 if (!bdev) 1610 return -ENOMEM; 1611 1612 mutex_lock(&bdev->bd_mutex); 1613 if (bdev->bd_openers || zram->claim) { 1614 mutex_unlock(&bdev->bd_mutex); 1615 bdput(bdev); 1616 return -EBUSY; 1617 } 1618 1619 zram->claim = true; 1620 mutex_unlock(&bdev->bd_mutex); 1621 1622 /* 1623 * Remove sysfs first, so no one will perform a disksize 1624 * store while we destroy the devices. This also helps during 1625 * hot_remove -- zram_reset_device() is the last holder of 1626 * ->init_lock, no later/concurrent disksize_store() or any 1627 * other sysfs handlers are possible. 1628 */ 1629 sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, 1630 &zram_disk_attr_group); 1631 1632 /* Make sure all the pending I/O are finished */ 1633 fsync_bdev(bdev); 1634 zram_reset_device(zram); 1635 bdput(bdev); 1636 1637 pr_info("Removed device: %s\n", zram->disk->disk_name); 1638 1639 blk_cleanup_queue(zram->disk->queue); 1640 del_gendisk(zram->disk); 1641 put_disk(zram->disk); 1642 kfree(zram); 1643 return 0; 1644 } 1645 1646 /* zram-control sysfs attributes */ 1647 1648 /* 1649 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a 1650 * sense that reading from this file does alter the state of your system -- it 1651 * creates a new un-initialized zram device and returns back this device's 1652 * device_id (or an error code if it fails to create a new device). 1653 */ 1654 static ssize_t hot_add_show(struct class *class, 1655 struct class_attribute *attr, 1656 char *buf) 1657 { 1658 int ret; 1659 1660 mutex_lock(&zram_index_mutex); 1661 ret = zram_add(); 1662 mutex_unlock(&zram_index_mutex); 1663 1664 if (ret < 0) 1665 return ret; 1666 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 1667 } 1668 static CLASS_ATTR_RO(hot_add); 1669 1670 static ssize_t hot_remove_store(struct class *class, 1671 struct class_attribute *attr, 1672 const char *buf, 1673 size_t count) 1674 { 1675 struct zram *zram; 1676 int ret, dev_id; 1677 1678 /* dev_id is gendisk->first_minor, which is `int' */ 1679 ret = kstrtoint(buf, 10, &dev_id); 1680 if (ret) 1681 return ret; 1682 if (dev_id < 0) 1683 return -EINVAL; 1684 1685 mutex_lock(&zram_index_mutex); 1686 1687 zram = idr_find(&zram_index_idr, dev_id); 1688 if (zram) { 1689 ret = zram_remove(zram); 1690 if (!ret) 1691 idr_remove(&zram_index_idr, dev_id); 1692 } else { 1693 ret = -ENODEV; 1694 } 1695 1696 mutex_unlock(&zram_index_mutex); 1697 return ret ? ret : count; 1698 } 1699 static CLASS_ATTR_WO(hot_remove); 1700 1701 static struct attribute *zram_control_class_attrs[] = { 1702 &class_attr_hot_add.attr, 1703 &class_attr_hot_remove.attr, 1704 NULL, 1705 }; 1706 ATTRIBUTE_GROUPS(zram_control_class); 1707 1708 static struct class zram_control_class = { 1709 .name = "zram-control", 1710 .owner = THIS_MODULE, 1711 .class_groups = zram_control_class_groups, 1712 }; 1713 1714 static int zram_remove_cb(int id, void *ptr, void *data) 1715 { 1716 zram_remove(ptr); 1717 return 0; 1718 } 1719 1720 static void destroy_devices(void) 1721 { 1722 class_unregister(&zram_control_class); 1723 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 1724 idr_destroy(&zram_index_idr); 1725 unregister_blkdev(zram_major, "zram"); 1726 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1727 } 1728 1729 static int __init zram_init(void) 1730 { 1731 int ret; 1732 1733 ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", 1734 zcomp_cpu_up_prepare, zcomp_cpu_dead); 1735 if (ret < 0) 1736 return ret; 1737 1738 ret = class_register(&zram_control_class); 1739 if (ret) { 1740 pr_err("Unable to register zram-control class\n"); 1741 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1742 return ret; 1743 } 1744 1745 zram_major = register_blkdev(0, "zram"); 1746 if (zram_major <= 0) { 1747 pr_err("Unable to get major number\n"); 1748 class_unregister(&zram_control_class); 1749 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); 1750 return -EBUSY; 1751 } 1752 1753 while (num_devices != 0) { 1754 mutex_lock(&zram_index_mutex); 1755 ret = zram_add(); 1756 mutex_unlock(&zram_index_mutex); 1757 if (ret < 0) 1758 goto out_error; 1759 num_devices--; 1760 } 1761 1762 return 0; 1763 1764 out_error: 1765 destroy_devices(); 1766 return ret; 1767 } 1768 1769 static void __exit zram_exit(void) 1770 { 1771 destroy_devices(); 1772 } 1773 1774 module_init(zram_init); 1775 module_exit(zram_exit); 1776 1777 module_param(num_devices, uint, 0); 1778 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 1779 1780 MODULE_LICENSE("Dual BSD/GPL"); 1781 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 1782 MODULE_DESCRIPTION("Compressed RAM Block Device"); 1783