1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/genhd.h> 26 #include <linux/highmem.h> 27 #include <linux/slab.h> 28 #include <linux/string.h> 29 #include <linux/vmalloc.h> 30 #include <linux/err.h> 31 #include <linux/idr.h> 32 #include <linux/sysfs.h> 33 34 #include "zram_drv.h" 35 36 static DEFINE_IDR(zram_index_idr); 37 /* idr index must be protected */ 38 static DEFINE_MUTEX(zram_index_mutex); 39 40 static int zram_major; 41 static const char *default_compressor = "lzo"; 42 43 /* Module params (documentation at end) */ 44 static unsigned int num_devices = 1; 45 46 static inline void deprecated_attr_warn(const char *name) 47 { 48 pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n", 49 task_pid_nr(current), 50 current->comm, 51 name, 52 "See zram documentation."); 53 } 54 55 #define ZRAM_ATTR_RO(name) \ 56 static ssize_t name##_show(struct device *d, \ 57 struct device_attribute *attr, char *b) \ 58 { \ 59 struct zram *zram = dev_to_zram(d); \ 60 \ 61 deprecated_attr_warn(__stringify(name)); \ 62 return scnprintf(b, PAGE_SIZE, "%llu\n", \ 63 (u64)atomic64_read(&zram->stats.name)); \ 64 } \ 65 static DEVICE_ATTR_RO(name); 66 67 static inline bool init_done(struct zram *zram) 68 { 69 return zram->disksize; 70 } 71 72 static inline struct zram *dev_to_zram(struct device *dev) 73 { 74 return (struct zram *)dev_to_disk(dev)->private_data; 75 } 76 77 /* flag operations require table entry bit_spin_lock() being held */ 78 static int zram_test_flag(struct zram_meta *meta, u32 index, 79 enum zram_pageflags flag) 80 { 81 return meta->table[index].value & BIT(flag); 82 } 83 84 static void zram_set_flag(struct zram_meta *meta, u32 index, 85 enum zram_pageflags flag) 86 { 87 meta->table[index].value |= BIT(flag); 88 } 89 90 static void zram_clear_flag(struct zram_meta *meta, u32 index, 91 enum zram_pageflags flag) 92 { 93 meta->table[index].value &= ~BIT(flag); 94 } 95 96 static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) 97 { 98 return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); 99 } 100 101 static void zram_set_obj_size(struct zram_meta *meta, 102 u32 index, size_t size) 103 { 104 unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; 105 106 meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; 107 } 108 109 static inline bool is_partial_io(struct bio_vec *bvec) 110 { 111 return bvec->bv_len != PAGE_SIZE; 112 } 113 114 /* 115 * Check if request is within bounds and aligned on zram logical blocks. 116 */ 117 static inline bool valid_io_request(struct zram *zram, 118 sector_t start, unsigned int size) 119 { 120 u64 end, bound; 121 122 /* unaligned request */ 123 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 124 return false; 125 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 126 return false; 127 128 end = start + (size >> SECTOR_SHIFT); 129 bound = zram->disksize >> SECTOR_SHIFT; 130 /* out of range range */ 131 if (unlikely(start >= bound || end > bound || start > end)) 132 return false; 133 134 /* I/O request is valid */ 135 return true; 136 } 137 138 static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 139 { 140 if (*offset + bvec->bv_len >= PAGE_SIZE) 141 (*index)++; 142 *offset = (*offset + bvec->bv_len) % PAGE_SIZE; 143 } 144 145 static inline void update_used_max(struct zram *zram, 146 const unsigned long pages) 147 { 148 unsigned long old_max, cur_max; 149 150 old_max = atomic_long_read(&zram->stats.max_used_pages); 151 152 do { 153 cur_max = old_max; 154 if (pages > cur_max) 155 old_max = atomic_long_cmpxchg( 156 &zram->stats.max_used_pages, cur_max, pages); 157 } while (old_max != cur_max); 158 } 159 160 static bool page_zero_filled(void *ptr) 161 { 162 unsigned int pos; 163 unsigned long *page; 164 165 page = (unsigned long *)ptr; 166 167 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { 168 if (page[pos]) 169 return false; 170 } 171 172 return true; 173 } 174 175 static void handle_zero_page(struct bio_vec *bvec) 176 { 177 struct page *page = bvec->bv_page; 178 void *user_mem; 179 180 user_mem = kmap_atomic(page); 181 if (is_partial_io(bvec)) 182 memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); 183 else 184 clear_page(user_mem); 185 kunmap_atomic(user_mem); 186 187 flush_dcache_page(page); 188 } 189 190 static ssize_t initstate_show(struct device *dev, 191 struct device_attribute *attr, char *buf) 192 { 193 u32 val; 194 struct zram *zram = dev_to_zram(dev); 195 196 down_read(&zram->init_lock); 197 val = init_done(zram); 198 up_read(&zram->init_lock); 199 200 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 201 } 202 203 static ssize_t disksize_show(struct device *dev, 204 struct device_attribute *attr, char *buf) 205 { 206 struct zram *zram = dev_to_zram(dev); 207 208 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 209 } 210 211 static ssize_t orig_data_size_show(struct device *dev, 212 struct device_attribute *attr, char *buf) 213 { 214 struct zram *zram = dev_to_zram(dev); 215 216 deprecated_attr_warn("orig_data_size"); 217 return scnprintf(buf, PAGE_SIZE, "%llu\n", 218 (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); 219 } 220 221 static ssize_t mem_used_total_show(struct device *dev, 222 struct device_attribute *attr, char *buf) 223 { 224 u64 val = 0; 225 struct zram *zram = dev_to_zram(dev); 226 227 deprecated_attr_warn("mem_used_total"); 228 down_read(&zram->init_lock); 229 if (init_done(zram)) { 230 struct zram_meta *meta = zram->meta; 231 val = zs_get_total_pages(meta->mem_pool); 232 } 233 up_read(&zram->init_lock); 234 235 return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); 236 } 237 238 static ssize_t mem_limit_show(struct device *dev, 239 struct device_attribute *attr, char *buf) 240 { 241 u64 val; 242 struct zram *zram = dev_to_zram(dev); 243 244 deprecated_attr_warn("mem_limit"); 245 down_read(&zram->init_lock); 246 val = zram->limit_pages; 247 up_read(&zram->init_lock); 248 249 return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); 250 } 251 252 static ssize_t mem_limit_store(struct device *dev, 253 struct device_attribute *attr, const char *buf, size_t len) 254 { 255 u64 limit; 256 char *tmp; 257 struct zram *zram = dev_to_zram(dev); 258 259 limit = memparse(buf, &tmp); 260 if (buf == tmp) /* no chars parsed, invalid input */ 261 return -EINVAL; 262 263 down_write(&zram->init_lock); 264 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 265 up_write(&zram->init_lock); 266 267 return len; 268 } 269 270 static ssize_t mem_used_max_show(struct device *dev, 271 struct device_attribute *attr, char *buf) 272 { 273 u64 val = 0; 274 struct zram *zram = dev_to_zram(dev); 275 276 deprecated_attr_warn("mem_used_max"); 277 down_read(&zram->init_lock); 278 if (init_done(zram)) 279 val = atomic_long_read(&zram->stats.max_used_pages); 280 up_read(&zram->init_lock); 281 282 return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); 283 } 284 285 static ssize_t mem_used_max_store(struct device *dev, 286 struct device_attribute *attr, const char *buf, size_t len) 287 { 288 int err; 289 unsigned long val; 290 struct zram *zram = dev_to_zram(dev); 291 292 err = kstrtoul(buf, 10, &val); 293 if (err || val != 0) 294 return -EINVAL; 295 296 down_read(&zram->init_lock); 297 if (init_done(zram)) { 298 struct zram_meta *meta = zram->meta; 299 atomic_long_set(&zram->stats.max_used_pages, 300 zs_get_total_pages(meta->mem_pool)); 301 } 302 up_read(&zram->init_lock); 303 304 return len; 305 } 306 307 static ssize_t max_comp_streams_show(struct device *dev, 308 struct device_attribute *attr, char *buf) 309 { 310 int val; 311 struct zram *zram = dev_to_zram(dev); 312 313 down_read(&zram->init_lock); 314 val = zram->max_comp_streams; 315 up_read(&zram->init_lock); 316 317 return scnprintf(buf, PAGE_SIZE, "%d\n", val); 318 } 319 320 static ssize_t max_comp_streams_store(struct device *dev, 321 struct device_attribute *attr, const char *buf, size_t len) 322 { 323 int num; 324 struct zram *zram = dev_to_zram(dev); 325 int ret; 326 327 ret = kstrtoint(buf, 0, &num); 328 if (ret < 0) 329 return ret; 330 if (num < 1) 331 return -EINVAL; 332 333 down_write(&zram->init_lock); 334 if (init_done(zram)) { 335 if (!zcomp_set_max_streams(zram->comp, num)) { 336 pr_info("Cannot change max compression streams\n"); 337 ret = -EINVAL; 338 goto out; 339 } 340 } 341 342 zram->max_comp_streams = num; 343 ret = len; 344 out: 345 up_write(&zram->init_lock); 346 return ret; 347 } 348 349 static ssize_t comp_algorithm_show(struct device *dev, 350 struct device_attribute *attr, char *buf) 351 { 352 size_t sz; 353 struct zram *zram = dev_to_zram(dev); 354 355 down_read(&zram->init_lock); 356 sz = zcomp_available_show(zram->compressor, buf); 357 up_read(&zram->init_lock); 358 359 return sz; 360 } 361 362 static ssize_t comp_algorithm_store(struct device *dev, 363 struct device_attribute *attr, const char *buf, size_t len) 364 { 365 struct zram *zram = dev_to_zram(dev); 366 size_t sz; 367 368 if (!zcomp_available_algorithm(buf)) 369 return -EINVAL; 370 371 down_write(&zram->init_lock); 372 if (init_done(zram)) { 373 up_write(&zram->init_lock); 374 pr_info("Can't change algorithm for initialized device\n"); 375 return -EBUSY; 376 } 377 strlcpy(zram->compressor, buf, sizeof(zram->compressor)); 378 379 /* ignore trailing newline */ 380 sz = strlen(zram->compressor); 381 if (sz > 0 && zram->compressor[sz - 1] == '\n') 382 zram->compressor[sz - 1] = 0x00; 383 384 up_write(&zram->init_lock); 385 return len; 386 } 387 388 static ssize_t compact_store(struct device *dev, 389 struct device_attribute *attr, const char *buf, size_t len) 390 { 391 struct zram *zram = dev_to_zram(dev); 392 struct zram_meta *meta; 393 394 down_read(&zram->init_lock); 395 if (!init_done(zram)) { 396 up_read(&zram->init_lock); 397 return -EINVAL; 398 } 399 400 meta = zram->meta; 401 zs_compact(meta->mem_pool); 402 up_read(&zram->init_lock); 403 404 return len; 405 } 406 407 static ssize_t io_stat_show(struct device *dev, 408 struct device_attribute *attr, char *buf) 409 { 410 struct zram *zram = dev_to_zram(dev); 411 ssize_t ret; 412 413 down_read(&zram->init_lock); 414 ret = scnprintf(buf, PAGE_SIZE, 415 "%8llu %8llu %8llu %8llu\n", 416 (u64)atomic64_read(&zram->stats.failed_reads), 417 (u64)atomic64_read(&zram->stats.failed_writes), 418 (u64)atomic64_read(&zram->stats.invalid_io), 419 (u64)atomic64_read(&zram->stats.notify_free)); 420 up_read(&zram->init_lock); 421 422 return ret; 423 } 424 425 static ssize_t mm_stat_show(struct device *dev, 426 struct device_attribute *attr, char *buf) 427 { 428 struct zram *zram = dev_to_zram(dev); 429 struct zs_pool_stats pool_stats; 430 u64 orig_size, mem_used = 0; 431 long max_used; 432 ssize_t ret; 433 434 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 435 436 down_read(&zram->init_lock); 437 if (init_done(zram)) { 438 mem_used = zs_get_total_pages(zram->meta->mem_pool); 439 zs_pool_stats(zram->meta->mem_pool, &pool_stats); 440 } 441 442 orig_size = atomic64_read(&zram->stats.pages_stored); 443 max_used = atomic_long_read(&zram->stats.max_used_pages); 444 445 ret = scnprintf(buf, PAGE_SIZE, 446 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", 447 orig_size << PAGE_SHIFT, 448 (u64)atomic64_read(&zram->stats.compr_data_size), 449 mem_used << PAGE_SHIFT, 450 zram->limit_pages << PAGE_SHIFT, 451 max_used << PAGE_SHIFT, 452 (u64)atomic64_read(&zram->stats.zero_pages), 453 pool_stats.pages_compacted); 454 up_read(&zram->init_lock); 455 456 return ret; 457 } 458 459 static DEVICE_ATTR_RO(io_stat); 460 static DEVICE_ATTR_RO(mm_stat); 461 ZRAM_ATTR_RO(num_reads); 462 ZRAM_ATTR_RO(num_writes); 463 ZRAM_ATTR_RO(failed_reads); 464 ZRAM_ATTR_RO(failed_writes); 465 ZRAM_ATTR_RO(invalid_io); 466 ZRAM_ATTR_RO(notify_free); 467 ZRAM_ATTR_RO(zero_pages); 468 ZRAM_ATTR_RO(compr_data_size); 469 470 static inline bool zram_meta_get(struct zram *zram) 471 { 472 if (atomic_inc_not_zero(&zram->refcount)) 473 return true; 474 return false; 475 } 476 477 static inline void zram_meta_put(struct zram *zram) 478 { 479 atomic_dec(&zram->refcount); 480 } 481 482 static void zram_meta_free(struct zram_meta *meta, u64 disksize) 483 { 484 size_t num_pages = disksize >> PAGE_SHIFT; 485 size_t index; 486 487 /* Free all pages that are still in this zram device */ 488 for (index = 0; index < num_pages; index++) { 489 unsigned long handle = meta->table[index].handle; 490 491 if (!handle) 492 continue; 493 494 zs_free(meta->mem_pool, handle); 495 } 496 497 zs_destroy_pool(meta->mem_pool); 498 vfree(meta->table); 499 kfree(meta); 500 } 501 502 static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) 503 { 504 size_t num_pages; 505 struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); 506 507 if (!meta) 508 return NULL; 509 510 num_pages = disksize >> PAGE_SHIFT; 511 meta->table = vzalloc(num_pages * sizeof(*meta->table)); 512 if (!meta->table) { 513 pr_err("Error allocating zram address table\n"); 514 goto out_error; 515 } 516 517 meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM); 518 if (!meta->mem_pool) { 519 pr_err("Error creating memory pool\n"); 520 goto out_error; 521 } 522 523 return meta; 524 525 out_error: 526 vfree(meta->table); 527 kfree(meta); 528 return NULL; 529 } 530 531 /* 532 * To protect concurrent access to the same index entry, 533 * caller should hold this table index entry's bit_spinlock to 534 * indicate this index entry is accessing. 535 */ 536 static void zram_free_page(struct zram *zram, size_t index) 537 { 538 struct zram_meta *meta = zram->meta; 539 unsigned long handle = meta->table[index].handle; 540 541 if (unlikely(!handle)) { 542 /* 543 * No memory is allocated for zero filled pages. 544 * Simply clear zero page flag. 545 */ 546 if (zram_test_flag(meta, index, ZRAM_ZERO)) { 547 zram_clear_flag(meta, index, ZRAM_ZERO); 548 atomic64_dec(&zram->stats.zero_pages); 549 } 550 return; 551 } 552 553 zs_free(meta->mem_pool, handle); 554 555 atomic64_sub(zram_get_obj_size(meta, index), 556 &zram->stats.compr_data_size); 557 atomic64_dec(&zram->stats.pages_stored); 558 559 meta->table[index].handle = 0; 560 zram_set_obj_size(meta, index, 0); 561 } 562 563 static int zram_decompress_page(struct zram *zram, char *mem, u32 index) 564 { 565 int ret = 0; 566 unsigned char *cmem; 567 struct zram_meta *meta = zram->meta; 568 unsigned long handle; 569 size_t size; 570 571 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 572 handle = meta->table[index].handle; 573 size = zram_get_obj_size(meta, index); 574 575 if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { 576 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 577 clear_page(mem); 578 return 0; 579 } 580 581 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); 582 if (size == PAGE_SIZE) 583 copy_page(mem, cmem); 584 else 585 ret = zcomp_decompress(zram->comp, cmem, size, mem); 586 zs_unmap_object(meta->mem_pool, handle); 587 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 588 589 /* Should NEVER happen. Return bio error if it does. */ 590 if (unlikely(ret)) { 591 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 592 return ret; 593 } 594 595 return 0; 596 } 597 598 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 599 u32 index, int offset) 600 { 601 int ret; 602 struct page *page; 603 unsigned char *user_mem, *uncmem = NULL; 604 struct zram_meta *meta = zram->meta; 605 page = bvec->bv_page; 606 607 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 608 if (unlikely(!meta->table[index].handle) || 609 zram_test_flag(meta, index, ZRAM_ZERO)) { 610 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 611 handle_zero_page(bvec); 612 return 0; 613 } 614 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 615 616 if (is_partial_io(bvec)) 617 /* Use a temporary buffer to decompress the page */ 618 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); 619 620 user_mem = kmap_atomic(page); 621 if (!is_partial_io(bvec)) 622 uncmem = user_mem; 623 624 if (!uncmem) { 625 pr_err("Unable to allocate temp memory\n"); 626 ret = -ENOMEM; 627 goto out_cleanup; 628 } 629 630 ret = zram_decompress_page(zram, uncmem, index); 631 /* Should NEVER happen. Return bio error if it does. */ 632 if (unlikely(ret)) 633 goto out_cleanup; 634 635 if (is_partial_io(bvec)) 636 memcpy(user_mem + bvec->bv_offset, uncmem + offset, 637 bvec->bv_len); 638 639 flush_dcache_page(page); 640 ret = 0; 641 out_cleanup: 642 kunmap_atomic(user_mem); 643 if (is_partial_io(bvec)) 644 kfree(uncmem); 645 return ret; 646 } 647 648 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, 649 int offset) 650 { 651 int ret = 0; 652 size_t clen; 653 unsigned long handle; 654 struct page *page; 655 unsigned char *user_mem, *cmem, *src, *uncmem = NULL; 656 struct zram_meta *meta = zram->meta; 657 struct zcomp_strm *zstrm = NULL; 658 unsigned long alloced_pages; 659 660 page = bvec->bv_page; 661 if (is_partial_io(bvec)) { 662 /* 663 * This is a partial IO. We need to read the full page 664 * before to write the changes. 665 */ 666 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); 667 if (!uncmem) { 668 ret = -ENOMEM; 669 goto out; 670 } 671 ret = zram_decompress_page(zram, uncmem, index); 672 if (ret) 673 goto out; 674 } 675 676 zstrm = zcomp_strm_find(zram->comp); 677 user_mem = kmap_atomic(page); 678 679 if (is_partial_io(bvec)) { 680 memcpy(uncmem + offset, user_mem + bvec->bv_offset, 681 bvec->bv_len); 682 kunmap_atomic(user_mem); 683 user_mem = NULL; 684 } else { 685 uncmem = user_mem; 686 } 687 688 if (page_zero_filled(uncmem)) { 689 if (user_mem) 690 kunmap_atomic(user_mem); 691 /* Free memory associated with this sector now. */ 692 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 693 zram_free_page(zram, index); 694 zram_set_flag(meta, index, ZRAM_ZERO); 695 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 696 697 atomic64_inc(&zram->stats.zero_pages); 698 ret = 0; 699 goto out; 700 } 701 702 ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen); 703 if (!is_partial_io(bvec)) { 704 kunmap_atomic(user_mem); 705 user_mem = NULL; 706 uncmem = NULL; 707 } 708 709 if (unlikely(ret)) { 710 pr_err("Compression failed! err=%d\n", ret); 711 goto out; 712 } 713 src = zstrm->buffer; 714 if (unlikely(clen > max_zpage_size)) { 715 clen = PAGE_SIZE; 716 if (is_partial_io(bvec)) 717 src = uncmem; 718 } 719 720 handle = zs_malloc(meta->mem_pool, clen); 721 if (!handle) { 722 pr_err("Error allocating memory for compressed page: %u, size=%zu\n", 723 index, clen); 724 ret = -ENOMEM; 725 goto out; 726 } 727 728 alloced_pages = zs_get_total_pages(meta->mem_pool); 729 update_used_max(zram, alloced_pages); 730 731 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 732 zs_free(meta->mem_pool, handle); 733 ret = -ENOMEM; 734 goto out; 735 } 736 737 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); 738 739 if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { 740 src = kmap_atomic(page); 741 copy_page(cmem, src); 742 kunmap_atomic(src); 743 } else { 744 memcpy(cmem, src, clen); 745 } 746 747 zcomp_strm_release(zram->comp, zstrm); 748 zstrm = NULL; 749 zs_unmap_object(meta->mem_pool, handle); 750 751 /* 752 * Free memory associated with this sector 753 * before overwriting unused sectors. 754 */ 755 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 756 zram_free_page(zram, index); 757 758 meta->table[index].handle = handle; 759 zram_set_obj_size(meta, index, clen); 760 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 761 762 /* Update stats */ 763 atomic64_add(clen, &zram->stats.compr_data_size); 764 atomic64_inc(&zram->stats.pages_stored); 765 out: 766 if (zstrm) 767 zcomp_strm_release(zram->comp, zstrm); 768 if (is_partial_io(bvec)) 769 kfree(uncmem); 770 return ret; 771 } 772 773 /* 774 * zram_bio_discard - handler on discard request 775 * @index: physical block index in PAGE_SIZE units 776 * @offset: byte offset within physical block 777 */ 778 static void zram_bio_discard(struct zram *zram, u32 index, 779 int offset, struct bio *bio) 780 { 781 size_t n = bio->bi_iter.bi_size; 782 struct zram_meta *meta = zram->meta; 783 784 /* 785 * zram manages data in physical block size units. Because logical block 786 * size isn't identical with physical block size on some arch, we 787 * could get a discard request pointing to a specific offset within a 788 * certain physical block. Although we can handle this request by 789 * reading that physiclal block and decompressing and partially zeroing 790 * and re-compressing and then re-storing it, this isn't reasonable 791 * because our intent with a discard request is to save memory. So 792 * skipping this logical block is appropriate here. 793 */ 794 if (offset) { 795 if (n <= (PAGE_SIZE - offset)) 796 return; 797 798 n -= (PAGE_SIZE - offset); 799 index++; 800 } 801 802 while (n >= PAGE_SIZE) { 803 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 804 zram_free_page(zram, index); 805 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 806 atomic64_inc(&zram->stats.notify_free); 807 index++; 808 n -= PAGE_SIZE; 809 } 810 } 811 812 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 813 int offset, int rw) 814 { 815 unsigned long start_time = jiffies; 816 int ret; 817 818 generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT, 819 &zram->disk->part0); 820 821 if (rw == READ) { 822 atomic64_inc(&zram->stats.num_reads); 823 ret = zram_bvec_read(zram, bvec, index, offset); 824 } else { 825 atomic64_inc(&zram->stats.num_writes); 826 ret = zram_bvec_write(zram, bvec, index, offset); 827 } 828 829 generic_end_io_acct(rw, &zram->disk->part0, start_time); 830 831 if (unlikely(ret)) { 832 if (rw == READ) 833 atomic64_inc(&zram->stats.failed_reads); 834 else 835 atomic64_inc(&zram->stats.failed_writes); 836 } 837 838 return ret; 839 } 840 841 static void __zram_make_request(struct zram *zram, struct bio *bio) 842 { 843 int offset, rw; 844 u32 index; 845 struct bio_vec bvec; 846 struct bvec_iter iter; 847 848 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 849 offset = (bio->bi_iter.bi_sector & 850 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 851 852 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 853 zram_bio_discard(zram, index, offset, bio); 854 bio_endio(bio); 855 return; 856 } 857 858 rw = bio_data_dir(bio); 859 bio_for_each_segment(bvec, bio, iter) { 860 int max_transfer_size = PAGE_SIZE - offset; 861 862 if (bvec.bv_len > max_transfer_size) { 863 /* 864 * zram_bvec_rw() can only make operation on a single 865 * zram page. Split the bio vector. 866 */ 867 struct bio_vec bv; 868 869 bv.bv_page = bvec.bv_page; 870 bv.bv_len = max_transfer_size; 871 bv.bv_offset = bvec.bv_offset; 872 873 if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0) 874 goto out; 875 876 bv.bv_len = bvec.bv_len - max_transfer_size; 877 bv.bv_offset += max_transfer_size; 878 if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0) 879 goto out; 880 } else 881 if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0) 882 goto out; 883 884 update_position(&index, &offset, &bvec); 885 } 886 887 bio_endio(bio); 888 return; 889 890 out: 891 bio_io_error(bio); 892 } 893 894 /* 895 * Handler function for all zram I/O requests. 896 */ 897 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio) 898 { 899 struct zram *zram = queue->queuedata; 900 901 if (unlikely(!zram_meta_get(zram))) 902 goto error; 903 904 blk_queue_split(queue, &bio, queue->bio_split); 905 906 if (!valid_io_request(zram, bio->bi_iter.bi_sector, 907 bio->bi_iter.bi_size)) { 908 atomic64_inc(&zram->stats.invalid_io); 909 goto put_zram; 910 } 911 912 __zram_make_request(zram, bio); 913 zram_meta_put(zram); 914 return BLK_QC_T_NONE; 915 put_zram: 916 zram_meta_put(zram); 917 error: 918 bio_io_error(bio); 919 return BLK_QC_T_NONE; 920 } 921 922 static void zram_slot_free_notify(struct block_device *bdev, 923 unsigned long index) 924 { 925 struct zram *zram; 926 struct zram_meta *meta; 927 928 zram = bdev->bd_disk->private_data; 929 meta = zram->meta; 930 931 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 932 zram_free_page(zram, index); 933 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 934 atomic64_inc(&zram->stats.notify_free); 935 } 936 937 static int zram_rw_page(struct block_device *bdev, sector_t sector, 938 struct page *page, int rw) 939 { 940 int offset, err = -EIO; 941 u32 index; 942 struct zram *zram; 943 struct bio_vec bv; 944 945 zram = bdev->bd_disk->private_data; 946 if (unlikely(!zram_meta_get(zram))) 947 goto out; 948 949 if (!valid_io_request(zram, sector, PAGE_SIZE)) { 950 atomic64_inc(&zram->stats.invalid_io); 951 err = -EINVAL; 952 goto put_zram; 953 } 954 955 index = sector >> SECTORS_PER_PAGE_SHIFT; 956 offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT; 957 958 bv.bv_page = page; 959 bv.bv_len = PAGE_SIZE; 960 bv.bv_offset = 0; 961 962 err = zram_bvec_rw(zram, &bv, index, offset, rw); 963 put_zram: 964 zram_meta_put(zram); 965 out: 966 /* 967 * If I/O fails, just return error(ie, non-zero) without 968 * calling page_endio. 969 * It causes resubmit the I/O with bio request by upper functions 970 * of rw_page(e.g., swap_readpage, __swap_writepage) and 971 * bio->bi_end_io does things to handle the error 972 * (e.g., SetPageError, set_page_dirty and extra works). 973 */ 974 if (err == 0) 975 page_endio(page, rw, 0); 976 return err; 977 } 978 979 static void zram_reset_device(struct zram *zram) 980 { 981 struct zram_meta *meta; 982 struct zcomp *comp; 983 u64 disksize; 984 985 down_write(&zram->init_lock); 986 987 zram->limit_pages = 0; 988 989 if (!init_done(zram)) { 990 up_write(&zram->init_lock); 991 return; 992 } 993 994 meta = zram->meta; 995 comp = zram->comp; 996 disksize = zram->disksize; 997 /* 998 * Refcount will go down to 0 eventually and r/w handler 999 * cannot handle further I/O so it will bail out by 1000 * check zram_meta_get. 1001 */ 1002 zram_meta_put(zram); 1003 /* 1004 * We want to free zram_meta in process context to avoid 1005 * deadlock between reclaim path and any other locks. 1006 */ 1007 wait_event(zram->io_done, atomic_read(&zram->refcount) == 0); 1008 1009 /* Reset stats */ 1010 memset(&zram->stats, 0, sizeof(zram->stats)); 1011 zram->disksize = 0; 1012 zram->max_comp_streams = 1; 1013 1014 set_capacity(zram->disk, 0); 1015 part_stat_set_all(&zram->disk->part0, 0); 1016 1017 up_write(&zram->init_lock); 1018 /* I/O operation under all of CPU are done so let's free */ 1019 zram_meta_free(meta, disksize); 1020 zcomp_destroy(comp); 1021 } 1022 1023 static ssize_t disksize_store(struct device *dev, 1024 struct device_attribute *attr, const char *buf, size_t len) 1025 { 1026 u64 disksize; 1027 struct zcomp *comp; 1028 struct zram_meta *meta; 1029 struct zram *zram = dev_to_zram(dev); 1030 int err; 1031 1032 disksize = memparse(buf, NULL); 1033 if (!disksize) 1034 return -EINVAL; 1035 1036 disksize = PAGE_ALIGN(disksize); 1037 meta = zram_meta_alloc(zram->disk->disk_name, disksize); 1038 if (!meta) 1039 return -ENOMEM; 1040 1041 comp = zcomp_create(zram->compressor, zram->max_comp_streams); 1042 if (IS_ERR(comp)) { 1043 pr_err("Cannot initialise %s compressing backend\n", 1044 zram->compressor); 1045 err = PTR_ERR(comp); 1046 goto out_free_meta; 1047 } 1048 1049 down_write(&zram->init_lock); 1050 if (init_done(zram)) { 1051 pr_info("Cannot change disksize for initialized device\n"); 1052 err = -EBUSY; 1053 goto out_destroy_comp; 1054 } 1055 1056 init_waitqueue_head(&zram->io_done); 1057 atomic_set(&zram->refcount, 1); 1058 zram->meta = meta; 1059 zram->comp = comp; 1060 zram->disksize = disksize; 1061 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); 1062 up_write(&zram->init_lock); 1063 1064 /* 1065 * Revalidate disk out of the init_lock to avoid lockdep splat. 1066 * It's okay because disk's capacity is protected by init_lock 1067 * so that revalidate_disk always sees up-to-date capacity. 1068 */ 1069 revalidate_disk(zram->disk); 1070 1071 return len; 1072 1073 out_destroy_comp: 1074 up_write(&zram->init_lock); 1075 zcomp_destroy(comp); 1076 out_free_meta: 1077 zram_meta_free(meta, disksize); 1078 return err; 1079 } 1080 1081 static ssize_t reset_store(struct device *dev, 1082 struct device_attribute *attr, const char *buf, size_t len) 1083 { 1084 int ret; 1085 unsigned short do_reset; 1086 struct zram *zram; 1087 struct block_device *bdev; 1088 1089 ret = kstrtou16(buf, 10, &do_reset); 1090 if (ret) 1091 return ret; 1092 1093 if (!do_reset) 1094 return -EINVAL; 1095 1096 zram = dev_to_zram(dev); 1097 bdev = bdget_disk(zram->disk, 0); 1098 if (!bdev) 1099 return -ENOMEM; 1100 1101 mutex_lock(&bdev->bd_mutex); 1102 /* Do not reset an active device or claimed device */ 1103 if (bdev->bd_openers || zram->claim) { 1104 mutex_unlock(&bdev->bd_mutex); 1105 bdput(bdev); 1106 return -EBUSY; 1107 } 1108 1109 /* From now on, anyone can't open /dev/zram[0-9] */ 1110 zram->claim = true; 1111 mutex_unlock(&bdev->bd_mutex); 1112 1113 /* Make sure all the pending I/O are finished */ 1114 fsync_bdev(bdev); 1115 zram_reset_device(zram); 1116 revalidate_disk(zram->disk); 1117 bdput(bdev); 1118 1119 mutex_lock(&bdev->bd_mutex); 1120 zram->claim = false; 1121 mutex_unlock(&bdev->bd_mutex); 1122 1123 return len; 1124 } 1125 1126 static int zram_open(struct block_device *bdev, fmode_t mode) 1127 { 1128 int ret = 0; 1129 struct zram *zram; 1130 1131 WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); 1132 1133 zram = bdev->bd_disk->private_data; 1134 /* zram was claimed to reset so open request fails */ 1135 if (zram->claim) 1136 ret = -EBUSY; 1137 1138 return ret; 1139 } 1140 1141 static const struct block_device_operations zram_devops = { 1142 .open = zram_open, 1143 .swap_slot_free_notify = zram_slot_free_notify, 1144 .rw_page = zram_rw_page, 1145 .owner = THIS_MODULE 1146 }; 1147 1148 static DEVICE_ATTR_WO(compact); 1149 static DEVICE_ATTR_RW(disksize); 1150 static DEVICE_ATTR_RO(initstate); 1151 static DEVICE_ATTR_WO(reset); 1152 static DEVICE_ATTR_RO(orig_data_size); 1153 static DEVICE_ATTR_RO(mem_used_total); 1154 static DEVICE_ATTR_RW(mem_limit); 1155 static DEVICE_ATTR_RW(mem_used_max); 1156 static DEVICE_ATTR_RW(max_comp_streams); 1157 static DEVICE_ATTR_RW(comp_algorithm); 1158 1159 static struct attribute *zram_disk_attrs[] = { 1160 &dev_attr_disksize.attr, 1161 &dev_attr_initstate.attr, 1162 &dev_attr_reset.attr, 1163 &dev_attr_num_reads.attr, 1164 &dev_attr_num_writes.attr, 1165 &dev_attr_failed_reads.attr, 1166 &dev_attr_failed_writes.attr, 1167 &dev_attr_compact.attr, 1168 &dev_attr_invalid_io.attr, 1169 &dev_attr_notify_free.attr, 1170 &dev_attr_zero_pages.attr, 1171 &dev_attr_orig_data_size.attr, 1172 &dev_attr_compr_data_size.attr, 1173 &dev_attr_mem_used_total.attr, 1174 &dev_attr_mem_limit.attr, 1175 &dev_attr_mem_used_max.attr, 1176 &dev_attr_max_comp_streams.attr, 1177 &dev_attr_comp_algorithm.attr, 1178 &dev_attr_io_stat.attr, 1179 &dev_attr_mm_stat.attr, 1180 NULL, 1181 }; 1182 1183 static struct attribute_group zram_disk_attr_group = { 1184 .attrs = zram_disk_attrs, 1185 }; 1186 1187 /* 1188 * Allocate and initialize new zram device. the function returns 1189 * '>= 0' device_id upon success, and negative value otherwise. 1190 */ 1191 static int zram_add(void) 1192 { 1193 struct zram *zram; 1194 struct request_queue *queue; 1195 int ret, device_id; 1196 1197 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 1198 if (!zram) 1199 return -ENOMEM; 1200 1201 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 1202 if (ret < 0) 1203 goto out_free_dev; 1204 device_id = ret; 1205 1206 init_rwsem(&zram->init_lock); 1207 1208 queue = blk_alloc_queue(GFP_KERNEL); 1209 if (!queue) { 1210 pr_err("Error allocating disk queue for device %d\n", 1211 device_id); 1212 ret = -ENOMEM; 1213 goto out_free_idr; 1214 } 1215 1216 blk_queue_make_request(queue, zram_make_request); 1217 1218 /* gendisk structure */ 1219 zram->disk = alloc_disk(1); 1220 if (!zram->disk) { 1221 pr_err("Error allocating disk structure for device %d\n", 1222 device_id); 1223 ret = -ENOMEM; 1224 goto out_free_queue; 1225 } 1226 1227 zram->disk->major = zram_major; 1228 zram->disk->first_minor = device_id; 1229 zram->disk->fops = &zram_devops; 1230 zram->disk->queue = queue; 1231 zram->disk->queue->queuedata = zram; 1232 zram->disk->private_data = zram; 1233 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 1234 1235 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1236 set_capacity(zram->disk, 0); 1237 /* zram devices sort of resembles non-rotational disks */ 1238 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); 1239 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1240 /* 1241 * To ensure that we always get PAGE_SIZE aligned 1242 * and n*PAGE_SIZED sized I/O requests. 1243 */ 1244 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 1245 blk_queue_logical_block_size(zram->disk->queue, 1246 ZRAM_LOGICAL_BLOCK_SIZE); 1247 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 1248 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1249 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1250 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 1251 /* 1252 * zram_bio_discard() will clear all logical blocks if logical block 1253 * size is identical with physical block size(PAGE_SIZE). But if it is 1254 * different, we will skip discarding some parts of logical blocks in 1255 * the part of the request range which isn't aligned to physical block 1256 * size. So we can't ensure that all discarded logical blocks are 1257 * zeroed. 1258 */ 1259 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 1260 zram->disk->queue->limits.discard_zeroes_data = 1; 1261 else 1262 zram->disk->queue->limits.discard_zeroes_data = 0; 1263 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); 1264 1265 add_disk(zram->disk); 1266 1267 ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, 1268 &zram_disk_attr_group); 1269 if (ret < 0) { 1270 pr_err("Error creating sysfs group for device %d\n", 1271 device_id); 1272 goto out_free_disk; 1273 } 1274 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1275 zram->meta = NULL; 1276 zram->max_comp_streams = 1; 1277 1278 pr_info("Added device: %s\n", zram->disk->disk_name); 1279 return device_id; 1280 1281 out_free_disk: 1282 del_gendisk(zram->disk); 1283 put_disk(zram->disk); 1284 out_free_queue: 1285 blk_cleanup_queue(queue); 1286 out_free_idr: 1287 idr_remove(&zram_index_idr, device_id); 1288 out_free_dev: 1289 kfree(zram); 1290 return ret; 1291 } 1292 1293 static int zram_remove(struct zram *zram) 1294 { 1295 struct block_device *bdev; 1296 1297 bdev = bdget_disk(zram->disk, 0); 1298 if (!bdev) 1299 return -ENOMEM; 1300 1301 mutex_lock(&bdev->bd_mutex); 1302 if (bdev->bd_openers || zram->claim) { 1303 mutex_unlock(&bdev->bd_mutex); 1304 bdput(bdev); 1305 return -EBUSY; 1306 } 1307 1308 zram->claim = true; 1309 mutex_unlock(&bdev->bd_mutex); 1310 1311 /* 1312 * Remove sysfs first, so no one will perform a disksize 1313 * store while we destroy the devices. This also helps during 1314 * hot_remove -- zram_reset_device() is the last holder of 1315 * ->init_lock, no later/concurrent disksize_store() or any 1316 * other sysfs handlers are possible. 1317 */ 1318 sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, 1319 &zram_disk_attr_group); 1320 1321 /* Make sure all the pending I/O are finished */ 1322 fsync_bdev(bdev); 1323 zram_reset_device(zram); 1324 bdput(bdev); 1325 1326 pr_info("Removed device: %s\n", zram->disk->disk_name); 1327 1328 idr_remove(&zram_index_idr, zram->disk->first_minor); 1329 blk_cleanup_queue(zram->disk->queue); 1330 del_gendisk(zram->disk); 1331 put_disk(zram->disk); 1332 kfree(zram); 1333 return 0; 1334 } 1335 1336 /* zram-control sysfs attributes */ 1337 static ssize_t hot_add_show(struct class *class, 1338 struct class_attribute *attr, 1339 char *buf) 1340 { 1341 int ret; 1342 1343 mutex_lock(&zram_index_mutex); 1344 ret = zram_add(); 1345 mutex_unlock(&zram_index_mutex); 1346 1347 if (ret < 0) 1348 return ret; 1349 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 1350 } 1351 1352 static ssize_t hot_remove_store(struct class *class, 1353 struct class_attribute *attr, 1354 const char *buf, 1355 size_t count) 1356 { 1357 struct zram *zram; 1358 int ret, dev_id; 1359 1360 /* dev_id is gendisk->first_minor, which is `int' */ 1361 ret = kstrtoint(buf, 10, &dev_id); 1362 if (ret) 1363 return ret; 1364 if (dev_id < 0) 1365 return -EINVAL; 1366 1367 mutex_lock(&zram_index_mutex); 1368 1369 zram = idr_find(&zram_index_idr, dev_id); 1370 if (zram) 1371 ret = zram_remove(zram); 1372 else 1373 ret = -ENODEV; 1374 1375 mutex_unlock(&zram_index_mutex); 1376 return ret ? ret : count; 1377 } 1378 1379 static struct class_attribute zram_control_class_attrs[] = { 1380 __ATTR_RO(hot_add), 1381 __ATTR_WO(hot_remove), 1382 __ATTR_NULL, 1383 }; 1384 1385 static struct class zram_control_class = { 1386 .name = "zram-control", 1387 .owner = THIS_MODULE, 1388 .class_attrs = zram_control_class_attrs, 1389 }; 1390 1391 static int zram_remove_cb(int id, void *ptr, void *data) 1392 { 1393 zram_remove(ptr); 1394 return 0; 1395 } 1396 1397 static void destroy_devices(void) 1398 { 1399 class_unregister(&zram_control_class); 1400 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 1401 idr_destroy(&zram_index_idr); 1402 unregister_blkdev(zram_major, "zram"); 1403 } 1404 1405 static int __init zram_init(void) 1406 { 1407 int ret; 1408 1409 ret = class_register(&zram_control_class); 1410 if (ret) { 1411 pr_err("Unable to register zram-control class\n"); 1412 return ret; 1413 } 1414 1415 zram_major = register_blkdev(0, "zram"); 1416 if (zram_major <= 0) { 1417 pr_err("Unable to get major number\n"); 1418 class_unregister(&zram_control_class); 1419 return -EBUSY; 1420 } 1421 1422 while (num_devices != 0) { 1423 mutex_lock(&zram_index_mutex); 1424 ret = zram_add(); 1425 mutex_unlock(&zram_index_mutex); 1426 if (ret < 0) 1427 goto out_error; 1428 num_devices--; 1429 } 1430 1431 return 0; 1432 1433 out_error: 1434 destroy_devices(); 1435 return ret; 1436 } 1437 1438 static void __exit zram_exit(void) 1439 { 1440 destroy_devices(); 1441 } 1442 1443 module_init(zram_init); 1444 module_exit(zram_exit); 1445 1446 module_param(num_devices, uint, 0); 1447 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 1448 1449 MODULE_LICENSE("Dual BSD/GPL"); 1450 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 1451 MODULE_DESCRIPTION("Compressed RAM Block Device"); 1452