1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #ifdef CONFIG_ZRAM_DEBUG 19 #define DEBUG 20 #endif 21 22 #include <linux/module.h> 23 #include <linux/kernel.h> 24 #include <linux/bio.h> 25 #include <linux/bitops.h> 26 #include <linux/blkdev.h> 27 #include <linux/buffer_head.h> 28 #include <linux/device.h> 29 #include <linux/genhd.h> 30 #include <linux/highmem.h> 31 #include <linux/slab.h> 32 #include <linux/string.h> 33 #include <linux/vmalloc.h> 34 #include <linux/err.h> 35 36 #include "zram_drv.h" 37 38 /* Globals */ 39 static int zram_major; 40 static struct zram *zram_devices; 41 static const char *default_compressor = "lzo"; 42 43 /* Module params (documentation at end) */ 44 static unsigned int num_devices = 1; 45 46 static inline void deprecated_attr_warn(const char *name) 47 { 48 pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n", 49 task_pid_nr(current), 50 current->comm, 51 name, 52 "See zram documentation."); 53 } 54 55 #define ZRAM_ATTR_RO(name) \ 56 static ssize_t name##_show(struct device *d, \ 57 struct device_attribute *attr, char *b) \ 58 { \ 59 struct zram *zram = dev_to_zram(d); \ 60 \ 61 deprecated_attr_warn(__stringify(name)); \ 62 return scnprintf(b, PAGE_SIZE, "%llu\n", \ 63 (u64)atomic64_read(&zram->stats.name)); \ 64 } \ 65 static DEVICE_ATTR_RO(name); 66 67 static inline bool init_done(struct zram *zram) 68 { 69 return zram->disksize; 70 } 71 72 static inline struct zram *dev_to_zram(struct device *dev) 73 { 74 return (struct zram *)dev_to_disk(dev)->private_data; 75 } 76 77 static ssize_t compact_store(struct device *dev, 78 struct device_attribute *attr, const char *buf, size_t len) 79 { 80 unsigned long nr_migrated; 81 struct zram *zram = dev_to_zram(dev); 82 struct zram_meta *meta; 83 84 down_read(&zram->init_lock); 85 if (!init_done(zram)) { 86 up_read(&zram->init_lock); 87 return -EINVAL; 88 } 89 90 meta = zram->meta; 91 nr_migrated = zs_compact(meta->mem_pool); 92 atomic64_add(nr_migrated, &zram->stats.num_migrated); 93 up_read(&zram->init_lock); 94 95 return len; 96 } 97 98 static ssize_t disksize_show(struct device *dev, 99 struct device_attribute *attr, char *buf) 100 { 101 struct zram *zram = dev_to_zram(dev); 102 103 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 104 } 105 106 static ssize_t initstate_show(struct device *dev, 107 struct device_attribute *attr, char *buf) 108 { 109 u32 val; 110 struct zram *zram = dev_to_zram(dev); 111 112 down_read(&zram->init_lock); 113 val = init_done(zram); 114 up_read(&zram->init_lock); 115 116 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 117 } 118 119 static ssize_t orig_data_size_show(struct device *dev, 120 struct device_attribute *attr, char *buf) 121 { 122 struct zram *zram = dev_to_zram(dev); 123 124 deprecated_attr_warn("orig_data_size"); 125 return scnprintf(buf, PAGE_SIZE, "%llu\n", 126 (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); 127 } 128 129 static ssize_t mem_used_total_show(struct device *dev, 130 struct device_attribute *attr, char *buf) 131 { 132 u64 val = 0; 133 struct zram *zram = dev_to_zram(dev); 134 135 deprecated_attr_warn("mem_used_total"); 136 down_read(&zram->init_lock); 137 if (init_done(zram)) { 138 struct zram_meta *meta = zram->meta; 139 val = zs_get_total_pages(meta->mem_pool); 140 } 141 up_read(&zram->init_lock); 142 143 return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); 144 } 145 146 static ssize_t max_comp_streams_show(struct device *dev, 147 struct device_attribute *attr, char *buf) 148 { 149 int val; 150 struct zram *zram = dev_to_zram(dev); 151 152 down_read(&zram->init_lock); 153 val = zram->max_comp_streams; 154 up_read(&zram->init_lock); 155 156 return scnprintf(buf, PAGE_SIZE, "%d\n", val); 157 } 158 159 static ssize_t mem_limit_show(struct device *dev, 160 struct device_attribute *attr, char *buf) 161 { 162 u64 val; 163 struct zram *zram = dev_to_zram(dev); 164 165 deprecated_attr_warn("mem_limit"); 166 down_read(&zram->init_lock); 167 val = zram->limit_pages; 168 up_read(&zram->init_lock); 169 170 return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); 171 } 172 173 static ssize_t mem_limit_store(struct device *dev, 174 struct device_attribute *attr, const char *buf, size_t len) 175 { 176 u64 limit; 177 char *tmp; 178 struct zram *zram = dev_to_zram(dev); 179 180 limit = memparse(buf, &tmp); 181 if (buf == tmp) /* no chars parsed, invalid input */ 182 return -EINVAL; 183 184 down_write(&zram->init_lock); 185 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 186 up_write(&zram->init_lock); 187 188 return len; 189 } 190 191 static ssize_t mem_used_max_show(struct device *dev, 192 struct device_attribute *attr, char *buf) 193 { 194 u64 val = 0; 195 struct zram *zram = dev_to_zram(dev); 196 197 deprecated_attr_warn("mem_used_max"); 198 down_read(&zram->init_lock); 199 if (init_done(zram)) 200 val = atomic_long_read(&zram->stats.max_used_pages); 201 up_read(&zram->init_lock); 202 203 return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); 204 } 205 206 static ssize_t mem_used_max_store(struct device *dev, 207 struct device_attribute *attr, const char *buf, size_t len) 208 { 209 int err; 210 unsigned long val; 211 struct zram *zram = dev_to_zram(dev); 212 213 err = kstrtoul(buf, 10, &val); 214 if (err || val != 0) 215 return -EINVAL; 216 217 down_read(&zram->init_lock); 218 if (init_done(zram)) { 219 struct zram_meta *meta = zram->meta; 220 atomic_long_set(&zram->stats.max_used_pages, 221 zs_get_total_pages(meta->mem_pool)); 222 } 223 up_read(&zram->init_lock); 224 225 return len; 226 } 227 228 static ssize_t max_comp_streams_store(struct device *dev, 229 struct device_attribute *attr, const char *buf, size_t len) 230 { 231 int num; 232 struct zram *zram = dev_to_zram(dev); 233 int ret; 234 235 ret = kstrtoint(buf, 0, &num); 236 if (ret < 0) 237 return ret; 238 if (num < 1) 239 return -EINVAL; 240 241 down_write(&zram->init_lock); 242 if (init_done(zram)) { 243 if (!zcomp_set_max_streams(zram->comp, num)) { 244 pr_info("Cannot change max compression streams\n"); 245 ret = -EINVAL; 246 goto out; 247 } 248 } 249 250 zram->max_comp_streams = num; 251 ret = len; 252 out: 253 up_write(&zram->init_lock); 254 return ret; 255 } 256 257 static ssize_t comp_algorithm_show(struct device *dev, 258 struct device_attribute *attr, char *buf) 259 { 260 size_t sz; 261 struct zram *zram = dev_to_zram(dev); 262 263 down_read(&zram->init_lock); 264 sz = zcomp_available_show(zram->compressor, buf); 265 up_read(&zram->init_lock); 266 267 return sz; 268 } 269 270 static ssize_t comp_algorithm_store(struct device *dev, 271 struct device_attribute *attr, const char *buf, size_t len) 272 { 273 struct zram *zram = dev_to_zram(dev); 274 down_write(&zram->init_lock); 275 if (init_done(zram)) { 276 up_write(&zram->init_lock); 277 pr_info("Can't change algorithm for initialized device\n"); 278 return -EBUSY; 279 } 280 strlcpy(zram->compressor, buf, sizeof(zram->compressor)); 281 up_write(&zram->init_lock); 282 return len; 283 } 284 285 /* flag operations needs meta->tb_lock */ 286 static int zram_test_flag(struct zram_meta *meta, u32 index, 287 enum zram_pageflags flag) 288 { 289 return meta->table[index].value & BIT(flag); 290 } 291 292 static void zram_set_flag(struct zram_meta *meta, u32 index, 293 enum zram_pageflags flag) 294 { 295 meta->table[index].value |= BIT(flag); 296 } 297 298 static void zram_clear_flag(struct zram_meta *meta, u32 index, 299 enum zram_pageflags flag) 300 { 301 meta->table[index].value &= ~BIT(flag); 302 } 303 304 static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) 305 { 306 return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); 307 } 308 309 static void zram_set_obj_size(struct zram_meta *meta, 310 u32 index, size_t size) 311 { 312 unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; 313 314 meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; 315 } 316 317 static inline int is_partial_io(struct bio_vec *bvec) 318 { 319 return bvec->bv_len != PAGE_SIZE; 320 } 321 322 /* 323 * Check if request is within bounds and aligned on zram logical blocks. 324 */ 325 static inline int valid_io_request(struct zram *zram, 326 sector_t start, unsigned int size) 327 { 328 u64 end, bound; 329 330 /* unaligned request */ 331 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 332 return 0; 333 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 334 return 0; 335 336 end = start + (size >> SECTOR_SHIFT); 337 bound = zram->disksize >> SECTOR_SHIFT; 338 /* out of range range */ 339 if (unlikely(start >= bound || end > bound || start > end)) 340 return 0; 341 342 /* I/O request is valid */ 343 return 1; 344 } 345 346 static void zram_meta_free(struct zram_meta *meta, u64 disksize) 347 { 348 size_t num_pages = disksize >> PAGE_SHIFT; 349 size_t index; 350 351 /* Free all pages that are still in this zram device */ 352 for (index = 0; index < num_pages; index++) { 353 unsigned long handle = meta->table[index].handle; 354 355 if (!handle) 356 continue; 357 358 zs_free(meta->mem_pool, handle); 359 } 360 361 zs_destroy_pool(meta->mem_pool); 362 vfree(meta->table); 363 kfree(meta); 364 } 365 366 static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize) 367 { 368 size_t num_pages; 369 char pool_name[8]; 370 struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); 371 372 if (!meta) 373 return NULL; 374 375 num_pages = disksize >> PAGE_SHIFT; 376 meta->table = vzalloc(num_pages * sizeof(*meta->table)); 377 if (!meta->table) { 378 pr_err("Error allocating zram address table\n"); 379 goto out_error; 380 } 381 382 snprintf(pool_name, sizeof(pool_name), "zram%d", device_id); 383 meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM); 384 if (!meta->mem_pool) { 385 pr_err("Error creating memory pool\n"); 386 goto out_error; 387 } 388 389 return meta; 390 391 out_error: 392 vfree(meta->table); 393 kfree(meta); 394 return NULL; 395 } 396 397 static inline bool zram_meta_get(struct zram *zram) 398 { 399 if (atomic_inc_not_zero(&zram->refcount)) 400 return true; 401 return false; 402 } 403 404 static inline void zram_meta_put(struct zram *zram) 405 { 406 atomic_dec(&zram->refcount); 407 } 408 409 static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 410 { 411 if (*offset + bvec->bv_len >= PAGE_SIZE) 412 (*index)++; 413 *offset = (*offset + bvec->bv_len) % PAGE_SIZE; 414 } 415 416 static int page_zero_filled(void *ptr) 417 { 418 unsigned int pos; 419 unsigned long *page; 420 421 page = (unsigned long *)ptr; 422 423 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { 424 if (page[pos]) 425 return 0; 426 } 427 428 return 1; 429 } 430 431 static void handle_zero_page(struct bio_vec *bvec) 432 { 433 struct page *page = bvec->bv_page; 434 void *user_mem; 435 436 user_mem = kmap_atomic(page); 437 if (is_partial_io(bvec)) 438 memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); 439 else 440 clear_page(user_mem); 441 kunmap_atomic(user_mem); 442 443 flush_dcache_page(page); 444 } 445 446 447 /* 448 * To protect concurrent access to the same index entry, 449 * caller should hold this table index entry's bit_spinlock to 450 * indicate this index entry is accessing. 451 */ 452 static void zram_free_page(struct zram *zram, size_t index) 453 { 454 struct zram_meta *meta = zram->meta; 455 unsigned long handle = meta->table[index].handle; 456 457 if (unlikely(!handle)) { 458 /* 459 * No memory is allocated for zero filled pages. 460 * Simply clear zero page flag. 461 */ 462 if (zram_test_flag(meta, index, ZRAM_ZERO)) { 463 zram_clear_flag(meta, index, ZRAM_ZERO); 464 atomic64_dec(&zram->stats.zero_pages); 465 } 466 return; 467 } 468 469 zs_free(meta->mem_pool, handle); 470 471 atomic64_sub(zram_get_obj_size(meta, index), 472 &zram->stats.compr_data_size); 473 atomic64_dec(&zram->stats.pages_stored); 474 475 meta->table[index].handle = 0; 476 zram_set_obj_size(meta, index, 0); 477 } 478 479 static int zram_decompress_page(struct zram *zram, char *mem, u32 index) 480 { 481 int ret = 0; 482 unsigned char *cmem; 483 struct zram_meta *meta = zram->meta; 484 unsigned long handle; 485 size_t size; 486 487 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 488 handle = meta->table[index].handle; 489 size = zram_get_obj_size(meta, index); 490 491 if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { 492 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 493 clear_page(mem); 494 return 0; 495 } 496 497 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); 498 if (size == PAGE_SIZE) 499 copy_page(mem, cmem); 500 else 501 ret = zcomp_decompress(zram->comp, cmem, size, mem); 502 zs_unmap_object(meta->mem_pool, handle); 503 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 504 505 /* Should NEVER happen. Return bio error if it does. */ 506 if (unlikely(ret)) { 507 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 508 return ret; 509 } 510 511 return 0; 512 } 513 514 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 515 u32 index, int offset) 516 { 517 int ret; 518 struct page *page; 519 unsigned char *user_mem, *uncmem = NULL; 520 struct zram_meta *meta = zram->meta; 521 page = bvec->bv_page; 522 523 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 524 if (unlikely(!meta->table[index].handle) || 525 zram_test_flag(meta, index, ZRAM_ZERO)) { 526 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 527 handle_zero_page(bvec); 528 return 0; 529 } 530 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 531 532 if (is_partial_io(bvec)) 533 /* Use a temporary buffer to decompress the page */ 534 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); 535 536 user_mem = kmap_atomic(page); 537 if (!is_partial_io(bvec)) 538 uncmem = user_mem; 539 540 if (!uncmem) { 541 pr_info("Unable to allocate temp memory\n"); 542 ret = -ENOMEM; 543 goto out_cleanup; 544 } 545 546 ret = zram_decompress_page(zram, uncmem, index); 547 /* Should NEVER happen. Return bio error if it does. */ 548 if (unlikely(ret)) 549 goto out_cleanup; 550 551 if (is_partial_io(bvec)) 552 memcpy(user_mem + bvec->bv_offset, uncmem + offset, 553 bvec->bv_len); 554 555 flush_dcache_page(page); 556 ret = 0; 557 out_cleanup: 558 kunmap_atomic(user_mem); 559 if (is_partial_io(bvec)) 560 kfree(uncmem); 561 return ret; 562 } 563 564 static inline void update_used_max(struct zram *zram, 565 const unsigned long pages) 566 { 567 unsigned long old_max, cur_max; 568 569 old_max = atomic_long_read(&zram->stats.max_used_pages); 570 571 do { 572 cur_max = old_max; 573 if (pages > cur_max) 574 old_max = atomic_long_cmpxchg( 575 &zram->stats.max_used_pages, cur_max, pages); 576 } while (old_max != cur_max); 577 } 578 579 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, 580 int offset) 581 { 582 int ret = 0; 583 size_t clen; 584 unsigned long handle; 585 struct page *page; 586 unsigned char *user_mem, *cmem, *src, *uncmem = NULL; 587 struct zram_meta *meta = zram->meta; 588 struct zcomp_strm *zstrm; 589 bool locked = false; 590 unsigned long alloced_pages; 591 592 page = bvec->bv_page; 593 if (is_partial_io(bvec)) { 594 /* 595 * This is a partial IO. We need to read the full page 596 * before to write the changes. 597 */ 598 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); 599 if (!uncmem) { 600 ret = -ENOMEM; 601 goto out; 602 } 603 ret = zram_decompress_page(zram, uncmem, index); 604 if (ret) 605 goto out; 606 } 607 608 zstrm = zcomp_strm_find(zram->comp); 609 locked = true; 610 user_mem = kmap_atomic(page); 611 612 if (is_partial_io(bvec)) { 613 memcpy(uncmem + offset, user_mem + bvec->bv_offset, 614 bvec->bv_len); 615 kunmap_atomic(user_mem); 616 user_mem = NULL; 617 } else { 618 uncmem = user_mem; 619 } 620 621 if (page_zero_filled(uncmem)) { 622 if (user_mem) 623 kunmap_atomic(user_mem); 624 /* Free memory associated with this sector now. */ 625 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 626 zram_free_page(zram, index); 627 zram_set_flag(meta, index, ZRAM_ZERO); 628 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 629 630 atomic64_inc(&zram->stats.zero_pages); 631 ret = 0; 632 goto out; 633 } 634 635 ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen); 636 if (!is_partial_io(bvec)) { 637 kunmap_atomic(user_mem); 638 user_mem = NULL; 639 uncmem = NULL; 640 } 641 642 if (unlikely(ret)) { 643 pr_err("Compression failed! err=%d\n", ret); 644 goto out; 645 } 646 src = zstrm->buffer; 647 if (unlikely(clen > max_zpage_size)) { 648 clen = PAGE_SIZE; 649 if (is_partial_io(bvec)) 650 src = uncmem; 651 } 652 653 handle = zs_malloc(meta->mem_pool, clen); 654 if (!handle) { 655 pr_info("Error allocating memory for compressed page: %u, size=%zu\n", 656 index, clen); 657 ret = -ENOMEM; 658 goto out; 659 } 660 661 alloced_pages = zs_get_total_pages(meta->mem_pool); 662 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 663 zs_free(meta->mem_pool, handle); 664 ret = -ENOMEM; 665 goto out; 666 } 667 668 update_used_max(zram, alloced_pages); 669 670 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); 671 672 if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { 673 src = kmap_atomic(page); 674 copy_page(cmem, src); 675 kunmap_atomic(src); 676 } else { 677 memcpy(cmem, src, clen); 678 } 679 680 zcomp_strm_release(zram->comp, zstrm); 681 locked = false; 682 zs_unmap_object(meta->mem_pool, handle); 683 684 /* 685 * Free memory associated with this sector 686 * before overwriting unused sectors. 687 */ 688 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 689 zram_free_page(zram, index); 690 691 meta->table[index].handle = handle; 692 zram_set_obj_size(meta, index, clen); 693 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 694 695 /* Update stats */ 696 atomic64_add(clen, &zram->stats.compr_data_size); 697 atomic64_inc(&zram->stats.pages_stored); 698 out: 699 if (locked) 700 zcomp_strm_release(zram->comp, zstrm); 701 if (is_partial_io(bvec)) 702 kfree(uncmem); 703 return ret; 704 } 705 706 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 707 int offset, int rw) 708 { 709 unsigned long start_time = jiffies; 710 int ret; 711 712 generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT, 713 &zram->disk->part0); 714 715 if (rw == READ) { 716 atomic64_inc(&zram->stats.num_reads); 717 ret = zram_bvec_read(zram, bvec, index, offset); 718 } else { 719 atomic64_inc(&zram->stats.num_writes); 720 ret = zram_bvec_write(zram, bvec, index, offset); 721 } 722 723 generic_end_io_acct(rw, &zram->disk->part0, start_time); 724 725 if (unlikely(ret)) { 726 if (rw == READ) 727 atomic64_inc(&zram->stats.failed_reads); 728 else 729 atomic64_inc(&zram->stats.failed_writes); 730 } 731 732 return ret; 733 } 734 735 /* 736 * zram_bio_discard - handler on discard request 737 * @index: physical block index in PAGE_SIZE units 738 * @offset: byte offset within physical block 739 */ 740 static void zram_bio_discard(struct zram *zram, u32 index, 741 int offset, struct bio *bio) 742 { 743 size_t n = bio->bi_iter.bi_size; 744 struct zram_meta *meta = zram->meta; 745 746 /* 747 * zram manages data in physical block size units. Because logical block 748 * size isn't identical with physical block size on some arch, we 749 * could get a discard request pointing to a specific offset within a 750 * certain physical block. Although we can handle this request by 751 * reading that physiclal block and decompressing and partially zeroing 752 * and re-compressing and then re-storing it, this isn't reasonable 753 * because our intent with a discard request is to save memory. So 754 * skipping this logical block is appropriate here. 755 */ 756 if (offset) { 757 if (n <= (PAGE_SIZE - offset)) 758 return; 759 760 n -= (PAGE_SIZE - offset); 761 index++; 762 } 763 764 while (n >= PAGE_SIZE) { 765 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 766 zram_free_page(zram, index); 767 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 768 atomic64_inc(&zram->stats.notify_free); 769 index++; 770 n -= PAGE_SIZE; 771 } 772 } 773 774 static void zram_reset_device(struct zram *zram) 775 { 776 struct zram_meta *meta; 777 struct zcomp *comp; 778 u64 disksize; 779 780 down_write(&zram->init_lock); 781 782 zram->limit_pages = 0; 783 784 if (!init_done(zram)) { 785 up_write(&zram->init_lock); 786 return; 787 } 788 789 meta = zram->meta; 790 comp = zram->comp; 791 disksize = zram->disksize; 792 /* 793 * Refcount will go down to 0 eventually and r/w handler 794 * cannot handle further I/O so it will bail out by 795 * check zram_meta_get. 796 */ 797 zram_meta_put(zram); 798 /* 799 * We want to free zram_meta in process context to avoid 800 * deadlock between reclaim path and any other locks. 801 */ 802 wait_event(zram->io_done, atomic_read(&zram->refcount) == 0); 803 804 /* Reset stats */ 805 memset(&zram->stats, 0, sizeof(zram->stats)); 806 zram->disksize = 0; 807 zram->max_comp_streams = 1; 808 set_capacity(zram->disk, 0); 809 810 up_write(&zram->init_lock); 811 /* I/O operation under all of CPU are done so let's free */ 812 zram_meta_free(meta, disksize); 813 zcomp_destroy(comp); 814 } 815 816 static ssize_t disksize_store(struct device *dev, 817 struct device_attribute *attr, const char *buf, size_t len) 818 { 819 u64 disksize; 820 struct zcomp *comp; 821 struct zram_meta *meta; 822 struct zram *zram = dev_to_zram(dev); 823 int err; 824 825 disksize = memparse(buf, NULL); 826 if (!disksize) 827 return -EINVAL; 828 829 disksize = PAGE_ALIGN(disksize); 830 meta = zram_meta_alloc(zram->disk->first_minor, disksize); 831 if (!meta) 832 return -ENOMEM; 833 834 comp = zcomp_create(zram->compressor, zram->max_comp_streams); 835 if (IS_ERR(comp)) { 836 pr_info("Cannot initialise %s compressing backend\n", 837 zram->compressor); 838 err = PTR_ERR(comp); 839 goto out_free_meta; 840 } 841 842 down_write(&zram->init_lock); 843 if (init_done(zram)) { 844 pr_info("Cannot change disksize for initialized device\n"); 845 err = -EBUSY; 846 goto out_destroy_comp; 847 } 848 849 init_waitqueue_head(&zram->io_done); 850 atomic_set(&zram->refcount, 1); 851 zram->meta = meta; 852 zram->comp = comp; 853 zram->disksize = disksize; 854 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); 855 up_write(&zram->init_lock); 856 857 /* 858 * Revalidate disk out of the init_lock to avoid lockdep splat. 859 * It's okay because disk's capacity is protected by init_lock 860 * so that revalidate_disk always sees up-to-date capacity. 861 */ 862 revalidate_disk(zram->disk); 863 864 return len; 865 866 out_destroy_comp: 867 up_write(&zram->init_lock); 868 zcomp_destroy(comp); 869 out_free_meta: 870 zram_meta_free(meta, disksize); 871 return err; 872 } 873 874 static ssize_t reset_store(struct device *dev, 875 struct device_attribute *attr, const char *buf, size_t len) 876 { 877 int ret; 878 unsigned short do_reset; 879 struct zram *zram; 880 struct block_device *bdev; 881 882 zram = dev_to_zram(dev); 883 bdev = bdget_disk(zram->disk, 0); 884 885 if (!bdev) 886 return -ENOMEM; 887 888 mutex_lock(&bdev->bd_mutex); 889 /* Do not reset an active device! */ 890 if (bdev->bd_openers) { 891 ret = -EBUSY; 892 goto out; 893 } 894 895 ret = kstrtou16(buf, 10, &do_reset); 896 if (ret) 897 goto out; 898 899 if (!do_reset) { 900 ret = -EINVAL; 901 goto out; 902 } 903 904 /* Make sure all pending I/O is finished */ 905 fsync_bdev(bdev); 906 zram_reset_device(zram); 907 908 mutex_unlock(&bdev->bd_mutex); 909 revalidate_disk(zram->disk); 910 bdput(bdev); 911 912 return len; 913 914 out: 915 mutex_unlock(&bdev->bd_mutex); 916 bdput(bdev); 917 return ret; 918 } 919 920 static void __zram_make_request(struct zram *zram, struct bio *bio) 921 { 922 int offset, rw; 923 u32 index; 924 struct bio_vec bvec; 925 struct bvec_iter iter; 926 927 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 928 offset = (bio->bi_iter.bi_sector & 929 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 930 931 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 932 zram_bio_discard(zram, index, offset, bio); 933 bio_endio(bio, 0); 934 return; 935 } 936 937 rw = bio_data_dir(bio); 938 bio_for_each_segment(bvec, bio, iter) { 939 int max_transfer_size = PAGE_SIZE - offset; 940 941 if (bvec.bv_len > max_transfer_size) { 942 /* 943 * zram_bvec_rw() can only make operation on a single 944 * zram page. Split the bio vector. 945 */ 946 struct bio_vec bv; 947 948 bv.bv_page = bvec.bv_page; 949 bv.bv_len = max_transfer_size; 950 bv.bv_offset = bvec.bv_offset; 951 952 if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0) 953 goto out; 954 955 bv.bv_len = bvec.bv_len - max_transfer_size; 956 bv.bv_offset += max_transfer_size; 957 if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0) 958 goto out; 959 } else 960 if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0) 961 goto out; 962 963 update_position(&index, &offset, &bvec); 964 } 965 966 set_bit(BIO_UPTODATE, &bio->bi_flags); 967 bio_endio(bio, 0); 968 return; 969 970 out: 971 bio_io_error(bio); 972 } 973 974 /* 975 * Handler function for all zram I/O requests. 976 */ 977 static void zram_make_request(struct request_queue *queue, struct bio *bio) 978 { 979 struct zram *zram = queue->queuedata; 980 981 if (unlikely(!zram_meta_get(zram))) 982 goto error; 983 984 if (!valid_io_request(zram, bio->bi_iter.bi_sector, 985 bio->bi_iter.bi_size)) { 986 atomic64_inc(&zram->stats.invalid_io); 987 goto put_zram; 988 } 989 990 __zram_make_request(zram, bio); 991 zram_meta_put(zram); 992 return; 993 put_zram: 994 zram_meta_put(zram); 995 error: 996 bio_io_error(bio); 997 } 998 999 static void zram_slot_free_notify(struct block_device *bdev, 1000 unsigned long index) 1001 { 1002 struct zram *zram; 1003 struct zram_meta *meta; 1004 1005 zram = bdev->bd_disk->private_data; 1006 meta = zram->meta; 1007 1008 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 1009 zram_free_page(zram, index); 1010 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 1011 atomic64_inc(&zram->stats.notify_free); 1012 } 1013 1014 static int zram_rw_page(struct block_device *bdev, sector_t sector, 1015 struct page *page, int rw) 1016 { 1017 int offset, err = -EIO; 1018 u32 index; 1019 struct zram *zram; 1020 struct bio_vec bv; 1021 1022 zram = bdev->bd_disk->private_data; 1023 if (unlikely(!zram_meta_get(zram))) 1024 goto out; 1025 1026 if (!valid_io_request(zram, sector, PAGE_SIZE)) { 1027 atomic64_inc(&zram->stats.invalid_io); 1028 err = -EINVAL; 1029 goto put_zram; 1030 } 1031 1032 index = sector >> SECTORS_PER_PAGE_SHIFT; 1033 offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT; 1034 1035 bv.bv_page = page; 1036 bv.bv_len = PAGE_SIZE; 1037 bv.bv_offset = 0; 1038 1039 err = zram_bvec_rw(zram, &bv, index, offset, rw); 1040 put_zram: 1041 zram_meta_put(zram); 1042 out: 1043 /* 1044 * If I/O fails, just return error(ie, non-zero) without 1045 * calling page_endio. 1046 * It causes resubmit the I/O with bio request by upper functions 1047 * of rw_page(e.g., swap_readpage, __swap_writepage) and 1048 * bio->bi_end_io does things to handle the error 1049 * (e.g., SetPageError, set_page_dirty and extra works). 1050 */ 1051 if (err == 0) 1052 page_endio(page, rw, 0); 1053 return err; 1054 } 1055 1056 static const struct block_device_operations zram_devops = { 1057 .swap_slot_free_notify = zram_slot_free_notify, 1058 .rw_page = zram_rw_page, 1059 .owner = THIS_MODULE 1060 }; 1061 1062 static DEVICE_ATTR_WO(compact); 1063 static DEVICE_ATTR_RW(disksize); 1064 static DEVICE_ATTR_RO(initstate); 1065 static DEVICE_ATTR_WO(reset); 1066 static DEVICE_ATTR_RO(orig_data_size); 1067 static DEVICE_ATTR_RO(mem_used_total); 1068 static DEVICE_ATTR_RW(mem_limit); 1069 static DEVICE_ATTR_RW(mem_used_max); 1070 static DEVICE_ATTR_RW(max_comp_streams); 1071 static DEVICE_ATTR_RW(comp_algorithm); 1072 1073 static ssize_t io_stat_show(struct device *dev, 1074 struct device_attribute *attr, char *buf) 1075 { 1076 struct zram *zram = dev_to_zram(dev); 1077 ssize_t ret; 1078 1079 down_read(&zram->init_lock); 1080 ret = scnprintf(buf, PAGE_SIZE, 1081 "%8llu %8llu %8llu %8llu\n", 1082 (u64)atomic64_read(&zram->stats.failed_reads), 1083 (u64)atomic64_read(&zram->stats.failed_writes), 1084 (u64)atomic64_read(&zram->stats.invalid_io), 1085 (u64)atomic64_read(&zram->stats.notify_free)); 1086 up_read(&zram->init_lock); 1087 1088 return ret; 1089 } 1090 1091 static ssize_t mm_stat_show(struct device *dev, 1092 struct device_attribute *attr, char *buf) 1093 { 1094 struct zram *zram = dev_to_zram(dev); 1095 u64 orig_size, mem_used = 0; 1096 long max_used; 1097 ssize_t ret; 1098 1099 down_read(&zram->init_lock); 1100 if (init_done(zram)) 1101 mem_used = zs_get_total_pages(zram->meta->mem_pool); 1102 1103 orig_size = atomic64_read(&zram->stats.pages_stored); 1104 max_used = atomic_long_read(&zram->stats.max_used_pages); 1105 1106 ret = scnprintf(buf, PAGE_SIZE, 1107 "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n", 1108 orig_size << PAGE_SHIFT, 1109 (u64)atomic64_read(&zram->stats.compr_data_size), 1110 mem_used << PAGE_SHIFT, 1111 zram->limit_pages << PAGE_SHIFT, 1112 max_used << PAGE_SHIFT, 1113 (u64)atomic64_read(&zram->stats.zero_pages), 1114 (u64)atomic64_read(&zram->stats.num_migrated)); 1115 up_read(&zram->init_lock); 1116 1117 return ret; 1118 } 1119 1120 static DEVICE_ATTR_RO(io_stat); 1121 static DEVICE_ATTR_RO(mm_stat); 1122 ZRAM_ATTR_RO(num_reads); 1123 ZRAM_ATTR_RO(num_writes); 1124 ZRAM_ATTR_RO(failed_reads); 1125 ZRAM_ATTR_RO(failed_writes); 1126 ZRAM_ATTR_RO(invalid_io); 1127 ZRAM_ATTR_RO(notify_free); 1128 ZRAM_ATTR_RO(zero_pages); 1129 ZRAM_ATTR_RO(compr_data_size); 1130 1131 static struct attribute *zram_disk_attrs[] = { 1132 &dev_attr_disksize.attr, 1133 &dev_attr_initstate.attr, 1134 &dev_attr_reset.attr, 1135 &dev_attr_num_reads.attr, 1136 &dev_attr_num_writes.attr, 1137 &dev_attr_failed_reads.attr, 1138 &dev_attr_failed_writes.attr, 1139 &dev_attr_compact.attr, 1140 &dev_attr_invalid_io.attr, 1141 &dev_attr_notify_free.attr, 1142 &dev_attr_zero_pages.attr, 1143 &dev_attr_orig_data_size.attr, 1144 &dev_attr_compr_data_size.attr, 1145 &dev_attr_mem_used_total.attr, 1146 &dev_attr_mem_limit.attr, 1147 &dev_attr_mem_used_max.attr, 1148 &dev_attr_max_comp_streams.attr, 1149 &dev_attr_comp_algorithm.attr, 1150 &dev_attr_io_stat.attr, 1151 &dev_attr_mm_stat.attr, 1152 NULL, 1153 }; 1154 1155 static struct attribute_group zram_disk_attr_group = { 1156 .attrs = zram_disk_attrs, 1157 }; 1158 1159 static int create_device(struct zram *zram, int device_id) 1160 { 1161 struct request_queue *queue; 1162 int ret = -ENOMEM; 1163 1164 init_rwsem(&zram->init_lock); 1165 1166 queue = blk_alloc_queue(GFP_KERNEL); 1167 if (!queue) { 1168 pr_err("Error allocating disk queue for device %d\n", 1169 device_id); 1170 goto out; 1171 } 1172 1173 blk_queue_make_request(queue, zram_make_request); 1174 1175 /* gendisk structure */ 1176 zram->disk = alloc_disk(1); 1177 if (!zram->disk) { 1178 pr_warn("Error allocating disk structure for device %d\n", 1179 device_id); 1180 ret = -ENOMEM; 1181 goto out_free_queue; 1182 } 1183 1184 zram->disk->major = zram_major; 1185 zram->disk->first_minor = device_id; 1186 zram->disk->fops = &zram_devops; 1187 zram->disk->queue = queue; 1188 zram->disk->queue->queuedata = zram; 1189 zram->disk->private_data = zram; 1190 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 1191 1192 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1193 set_capacity(zram->disk, 0); 1194 /* zram devices sort of resembles non-rotational disks */ 1195 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); 1196 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1197 /* 1198 * To ensure that we always get PAGE_SIZE aligned 1199 * and n*PAGE_SIZED sized I/O requests. 1200 */ 1201 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 1202 blk_queue_logical_block_size(zram->disk->queue, 1203 ZRAM_LOGICAL_BLOCK_SIZE); 1204 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 1205 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1206 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1207 zram->disk->queue->limits.max_discard_sectors = UINT_MAX; 1208 /* 1209 * zram_bio_discard() will clear all logical blocks if logical block 1210 * size is identical with physical block size(PAGE_SIZE). But if it is 1211 * different, we will skip discarding some parts of logical blocks in 1212 * the part of the request range which isn't aligned to physical block 1213 * size. So we can't ensure that all discarded logical blocks are 1214 * zeroed. 1215 */ 1216 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 1217 zram->disk->queue->limits.discard_zeroes_data = 1; 1218 else 1219 zram->disk->queue->limits.discard_zeroes_data = 0; 1220 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); 1221 1222 add_disk(zram->disk); 1223 1224 ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, 1225 &zram_disk_attr_group); 1226 if (ret < 0) { 1227 pr_warn("Error creating sysfs group"); 1228 goto out_free_disk; 1229 } 1230 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1231 zram->meta = NULL; 1232 zram->max_comp_streams = 1; 1233 return 0; 1234 1235 out_free_disk: 1236 del_gendisk(zram->disk); 1237 put_disk(zram->disk); 1238 out_free_queue: 1239 blk_cleanup_queue(queue); 1240 out: 1241 return ret; 1242 } 1243 1244 static void destroy_devices(unsigned int nr) 1245 { 1246 struct zram *zram; 1247 unsigned int i; 1248 1249 for (i = 0; i < nr; i++) { 1250 zram = &zram_devices[i]; 1251 /* 1252 * Remove sysfs first, so no one will perform a disksize 1253 * store while we destroy the devices 1254 */ 1255 sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, 1256 &zram_disk_attr_group); 1257 1258 zram_reset_device(zram); 1259 1260 blk_cleanup_queue(zram->disk->queue); 1261 del_gendisk(zram->disk); 1262 put_disk(zram->disk); 1263 } 1264 1265 kfree(zram_devices); 1266 unregister_blkdev(zram_major, "zram"); 1267 pr_info("Destroyed %u device(s)\n", nr); 1268 } 1269 1270 static int __init zram_init(void) 1271 { 1272 int ret, dev_id; 1273 1274 if (num_devices > max_num_devices) { 1275 pr_warn("Invalid value for num_devices: %u\n", 1276 num_devices); 1277 return -EINVAL; 1278 } 1279 1280 zram_major = register_blkdev(0, "zram"); 1281 if (zram_major <= 0) { 1282 pr_warn("Unable to get major number\n"); 1283 return -EBUSY; 1284 } 1285 1286 /* Allocate the device array and initialize each one */ 1287 zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL); 1288 if (!zram_devices) { 1289 unregister_blkdev(zram_major, "zram"); 1290 return -ENOMEM; 1291 } 1292 1293 for (dev_id = 0; dev_id < num_devices; dev_id++) { 1294 ret = create_device(&zram_devices[dev_id], dev_id); 1295 if (ret) 1296 goto out_error; 1297 } 1298 1299 pr_info("Created %u device(s)\n", num_devices); 1300 return 0; 1301 1302 out_error: 1303 destroy_devices(dev_id); 1304 return ret; 1305 } 1306 1307 static void __exit zram_exit(void) 1308 { 1309 destroy_devices(num_devices); 1310 } 1311 1312 module_init(zram_init); 1313 module_exit(zram_exit); 1314 1315 module_param(num_devices, uint, 0); 1316 MODULE_PARM_DESC(num_devices, "Number of zram devices"); 1317 1318 MODULE_LICENSE("Dual BSD/GPL"); 1319 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 1320 MODULE_DESCRIPTION("Compressed RAM Block Device"); 1321