1 /* 2 * Compressed RAM block device 3 * 4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta 5 * 2012, 2013 Minchan Kim 6 * 7 * This code is released using a dual license strategy: BSD/GPL 8 * You can choose the licence that better fits your requirements. 9 * 10 * Released under the terms of 3-clause BSD License 11 * Released under the terms of GNU General Public License Version 2.0 12 * 13 */ 14 15 #define KMSG_COMPONENT "zram" 16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 17 18 #include <linux/module.h> 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/bitops.h> 22 #include <linux/blkdev.h> 23 #include <linux/buffer_head.h> 24 #include <linux/device.h> 25 #include <linux/genhd.h> 26 #include <linux/highmem.h> 27 #include <linux/slab.h> 28 #include <linux/string.h> 29 #include <linux/vmalloc.h> 30 #include <linux/err.h> 31 #include <linux/idr.h> 32 #include <linux/sysfs.h> 33 34 #include "zram_drv.h" 35 36 static DEFINE_IDR(zram_index_idr); 37 /* idr index must be protected */ 38 static DEFINE_MUTEX(zram_index_mutex); 39 40 static int zram_major; 41 static const char *default_compressor = "lzo"; 42 43 /* Module params (documentation at end) */ 44 static unsigned int num_devices = 1; 45 46 static inline void deprecated_attr_warn(const char *name) 47 { 48 pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n", 49 task_pid_nr(current), 50 current->comm, 51 name, 52 "See zram documentation."); 53 } 54 55 #define ZRAM_ATTR_RO(name) \ 56 static ssize_t name##_show(struct device *d, \ 57 struct device_attribute *attr, char *b) \ 58 { \ 59 struct zram *zram = dev_to_zram(d); \ 60 \ 61 deprecated_attr_warn(__stringify(name)); \ 62 return scnprintf(b, PAGE_SIZE, "%llu\n", \ 63 (u64)atomic64_read(&zram->stats.name)); \ 64 } \ 65 static DEVICE_ATTR_RO(name); 66 67 static inline bool init_done(struct zram *zram) 68 { 69 return zram->disksize; 70 } 71 72 static inline struct zram *dev_to_zram(struct device *dev) 73 { 74 return (struct zram *)dev_to_disk(dev)->private_data; 75 } 76 77 /* flag operations require table entry bit_spin_lock() being held */ 78 static int zram_test_flag(struct zram_meta *meta, u32 index, 79 enum zram_pageflags flag) 80 { 81 return meta->table[index].value & BIT(flag); 82 } 83 84 static void zram_set_flag(struct zram_meta *meta, u32 index, 85 enum zram_pageflags flag) 86 { 87 meta->table[index].value |= BIT(flag); 88 } 89 90 static void zram_clear_flag(struct zram_meta *meta, u32 index, 91 enum zram_pageflags flag) 92 { 93 meta->table[index].value &= ~BIT(flag); 94 } 95 96 static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) 97 { 98 return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); 99 } 100 101 static void zram_set_obj_size(struct zram_meta *meta, 102 u32 index, size_t size) 103 { 104 unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; 105 106 meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; 107 } 108 109 static inline int is_partial_io(struct bio_vec *bvec) 110 { 111 return bvec->bv_len != PAGE_SIZE; 112 } 113 114 /* 115 * Check if request is within bounds and aligned on zram logical blocks. 116 */ 117 static inline int valid_io_request(struct zram *zram, 118 sector_t start, unsigned int size) 119 { 120 u64 end, bound; 121 122 /* unaligned request */ 123 if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) 124 return 0; 125 if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) 126 return 0; 127 128 end = start + (size >> SECTOR_SHIFT); 129 bound = zram->disksize >> SECTOR_SHIFT; 130 /* out of range range */ 131 if (unlikely(start >= bound || end > bound || start > end)) 132 return 0; 133 134 /* I/O request is valid */ 135 return 1; 136 } 137 138 static void update_position(u32 *index, int *offset, struct bio_vec *bvec) 139 { 140 if (*offset + bvec->bv_len >= PAGE_SIZE) 141 (*index)++; 142 *offset = (*offset + bvec->bv_len) % PAGE_SIZE; 143 } 144 145 static inline void update_used_max(struct zram *zram, 146 const unsigned long pages) 147 { 148 unsigned long old_max, cur_max; 149 150 old_max = atomic_long_read(&zram->stats.max_used_pages); 151 152 do { 153 cur_max = old_max; 154 if (pages > cur_max) 155 old_max = atomic_long_cmpxchg( 156 &zram->stats.max_used_pages, cur_max, pages); 157 } while (old_max != cur_max); 158 } 159 160 static int page_zero_filled(void *ptr) 161 { 162 unsigned int pos; 163 unsigned long *page; 164 165 page = (unsigned long *)ptr; 166 167 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { 168 if (page[pos]) 169 return 0; 170 } 171 172 return 1; 173 } 174 175 static void handle_zero_page(struct bio_vec *bvec) 176 { 177 struct page *page = bvec->bv_page; 178 void *user_mem; 179 180 user_mem = kmap_atomic(page); 181 if (is_partial_io(bvec)) 182 memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); 183 else 184 clear_page(user_mem); 185 kunmap_atomic(user_mem); 186 187 flush_dcache_page(page); 188 } 189 190 static ssize_t initstate_show(struct device *dev, 191 struct device_attribute *attr, char *buf) 192 { 193 u32 val; 194 struct zram *zram = dev_to_zram(dev); 195 196 down_read(&zram->init_lock); 197 val = init_done(zram); 198 up_read(&zram->init_lock); 199 200 return scnprintf(buf, PAGE_SIZE, "%u\n", val); 201 } 202 203 static ssize_t disksize_show(struct device *dev, 204 struct device_attribute *attr, char *buf) 205 { 206 struct zram *zram = dev_to_zram(dev); 207 208 return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); 209 } 210 211 static ssize_t orig_data_size_show(struct device *dev, 212 struct device_attribute *attr, char *buf) 213 { 214 struct zram *zram = dev_to_zram(dev); 215 216 deprecated_attr_warn("orig_data_size"); 217 return scnprintf(buf, PAGE_SIZE, "%llu\n", 218 (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); 219 } 220 221 static ssize_t mem_used_total_show(struct device *dev, 222 struct device_attribute *attr, char *buf) 223 { 224 u64 val = 0; 225 struct zram *zram = dev_to_zram(dev); 226 227 deprecated_attr_warn("mem_used_total"); 228 down_read(&zram->init_lock); 229 if (init_done(zram)) { 230 struct zram_meta *meta = zram->meta; 231 val = zs_get_total_pages(meta->mem_pool); 232 } 233 up_read(&zram->init_lock); 234 235 return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); 236 } 237 238 static ssize_t mem_limit_show(struct device *dev, 239 struct device_attribute *attr, char *buf) 240 { 241 u64 val; 242 struct zram *zram = dev_to_zram(dev); 243 244 deprecated_attr_warn("mem_limit"); 245 down_read(&zram->init_lock); 246 val = zram->limit_pages; 247 up_read(&zram->init_lock); 248 249 return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); 250 } 251 252 static ssize_t mem_limit_store(struct device *dev, 253 struct device_attribute *attr, const char *buf, size_t len) 254 { 255 u64 limit; 256 char *tmp; 257 struct zram *zram = dev_to_zram(dev); 258 259 limit = memparse(buf, &tmp); 260 if (buf == tmp) /* no chars parsed, invalid input */ 261 return -EINVAL; 262 263 down_write(&zram->init_lock); 264 zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; 265 up_write(&zram->init_lock); 266 267 return len; 268 } 269 270 static ssize_t mem_used_max_show(struct device *dev, 271 struct device_attribute *attr, char *buf) 272 { 273 u64 val = 0; 274 struct zram *zram = dev_to_zram(dev); 275 276 deprecated_attr_warn("mem_used_max"); 277 down_read(&zram->init_lock); 278 if (init_done(zram)) 279 val = atomic_long_read(&zram->stats.max_used_pages); 280 up_read(&zram->init_lock); 281 282 return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); 283 } 284 285 static ssize_t mem_used_max_store(struct device *dev, 286 struct device_attribute *attr, const char *buf, size_t len) 287 { 288 int err; 289 unsigned long val; 290 struct zram *zram = dev_to_zram(dev); 291 292 err = kstrtoul(buf, 10, &val); 293 if (err || val != 0) 294 return -EINVAL; 295 296 down_read(&zram->init_lock); 297 if (init_done(zram)) { 298 struct zram_meta *meta = zram->meta; 299 atomic_long_set(&zram->stats.max_used_pages, 300 zs_get_total_pages(meta->mem_pool)); 301 } 302 up_read(&zram->init_lock); 303 304 return len; 305 } 306 307 static ssize_t max_comp_streams_show(struct device *dev, 308 struct device_attribute *attr, char *buf) 309 { 310 int val; 311 struct zram *zram = dev_to_zram(dev); 312 313 down_read(&zram->init_lock); 314 val = zram->max_comp_streams; 315 up_read(&zram->init_lock); 316 317 return scnprintf(buf, PAGE_SIZE, "%d\n", val); 318 } 319 320 static ssize_t max_comp_streams_store(struct device *dev, 321 struct device_attribute *attr, const char *buf, size_t len) 322 { 323 int num; 324 struct zram *zram = dev_to_zram(dev); 325 int ret; 326 327 ret = kstrtoint(buf, 0, &num); 328 if (ret < 0) 329 return ret; 330 if (num < 1) 331 return -EINVAL; 332 333 down_write(&zram->init_lock); 334 if (init_done(zram)) { 335 if (!zcomp_set_max_streams(zram->comp, num)) { 336 pr_info("Cannot change max compression streams\n"); 337 ret = -EINVAL; 338 goto out; 339 } 340 } 341 342 zram->max_comp_streams = num; 343 ret = len; 344 out: 345 up_write(&zram->init_lock); 346 return ret; 347 } 348 349 static ssize_t comp_algorithm_show(struct device *dev, 350 struct device_attribute *attr, char *buf) 351 { 352 size_t sz; 353 struct zram *zram = dev_to_zram(dev); 354 355 down_read(&zram->init_lock); 356 sz = zcomp_available_show(zram->compressor, buf); 357 up_read(&zram->init_lock); 358 359 return sz; 360 } 361 362 static ssize_t comp_algorithm_store(struct device *dev, 363 struct device_attribute *attr, const char *buf, size_t len) 364 { 365 struct zram *zram = dev_to_zram(dev); 366 size_t sz; 367 368 down_write(&zram->init_lock); 369 if (init_done(zram)) { 370 up_write(&zram->init_lock); 371 pr_info("Can't change algorithm for initialized device\n"); 372 return -EBUSY; 373 } 374 strlcpy(zram->compressor, buf, sizeof(zram->compressor)); 375 376 /* ignore trailing newline */ 377 sz = strlen(zram->compressor); 378 if (sz > 0 && zram->compressor[sz - 1] == '\n') 379 zram->compressor[sz - 1] = 0x00; 380 381 if (!zcomp_available_algorithm(zram->compressor)) 382 len = -EINVAL; 383 384 up_write(&zram->init_lock); 385 return len; 386 } 387 388 static ssize_t compact_store(struct device *dev, 389 struct device_attribute *attr, const char *buf, size_t len) 390 { 391 struct zram *zram = dev_to_zram(dev); 392 struct zram_meta *meta; 393 394 down_read(&zram->init_lock); 395 if (!init_done(zram)) { 396 up_read(&zram->init_lock); 397 return -EINVAL; 398 } 399 400 meta = zram->meta; 401 zs_compact(meta->mem_pool); 402 up_read(&zram->init_lock); 403 404 return len; 405 } 406 407 static ssize_t io_stat_show(struct device *dev, 408 struct device_attribute *attr, char *buf) 409 { 410 struct zram *zram = dev_to_zram(dev); 411 ssize_t ret; 412 413 down_read(&zram->init_lock); 414 ret = scnprintf(buf, PAGE_SIZE, 415 "%8llu %8llu %8llu %8llu\n", 416 (u64)atomic64_read(&zram->stats.failed_reads), 417 (u64)atomic64_read(&zram->stats.failed_writes), 418 (u64)atomic64_read(&zram->stats.invalid_io), 419 (u64)atomic64_read(&zram->stats.notify_free)); 420 up_read(&zram->init_lock); 421 422 return ret; 423 } 424 425 static ssize_t mm_stat_show(struct device *dev, 426 struct device_attribute *attr, char *buf) 427 { 428 struct zram *zram = dev_to_zram(dev); 429 struct zs_pool_stats pool_stats; 430 u64 orig_size, mem_used = 0; 431 long max_used; 432 ssize_t ret; 433 434 memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats)); 435 436 down_read(&zram->init_lock); 437 if (init_done(zram)) { 438 mem_used = zs_get_total_pages(zram->meta->mem_pool); 439 zs_pool_stats(zram->meta->mem_pool, &pool_stats); 440 } 441 442 orig_size = atomic64_read(&zram->stats.pages_stored); 443 max_used = atomic_long_read(&zram->stats.max_used_pages); 444 445 ret = scnprintf(buf, PAGE_SIZE, 446 "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n", 447 orig_size << PAGE_SHIFT, 448 (u64)atomic64_read(&zram->stats.compr_data_size), 449 mem_used << PAGE_SHIFT, 450 zram->limit_pages << PAGE_SHIFT, 451 max_used << PAGE_SHIFT, 452 (u64)atomic64_read(&zram->stats.zero_pages), 453 pool_stats.pages_compacted); 454 up_read(&zram->init_lock); 455 456 return ret; 457 } 458 459 static DEVICE_ATTR_RO(io_stat); 460 static DEVICE_ATTR_RO(mm_stat); 461 ZRAM_ATTR_RO(num_reads); 462 ZRAM_ATTR_RO(num_writes); 463 ZRAM_ATTR_RO(failed_reads); 464 ZRAM_ATTR_RO(failed_writes); 465 ZRAM_ATTR_RO(invalid_io); 466 ZRAM_ATTR_RO(notify_free); 467 ZRAM_ATTR_RO(zero_pages); 468 ZRAM_ATTR_RO(compr_data_size); 469 470 static inline bool zram_meta_get(struct zram *zram) 471 { 472 if (atomic_inc_not_zero(&zram->refcount)) 473 return true; 474 return false; 475 } 476 477 static inline void zram_meta_put(struct zram *zram) 478 { 479 atomic_dec(&zram->refcount); 480 } 481 482 static void zram_meta_free(struct zram_meta *meta, u64 disksize) 483 { 484 size_t num_pages = disksize >> PAGE_SHIFT; 485 size_t index; 486 487 /* Free all pages that are still in this zram device */ 488 for (index = 0; index < num_pages; index++) { 489 unsigned long handle = meta->table[index].handle; 490 491 if (!handle) 492 continue; 493 494 zs_free(meta->mem_pool, handle); 495 } 496 497 zs_destroy_pool(meta->mem_pool); 498 vfree(meta->table); 499 kfree(meta); 500 } 501 502 static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) 503 { 504 size_t num_pages; 505 struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); 506 507 if (!meta) 508 return NULL; 509 510 num_pages = disksize >> PAGE_SHIFT; 511 meta->table = vzalloc(num_pages * sizeof(*meta->table)); 512 if (!meta->table) { 513 pr_err("Error allocating zram address table\n"); 514 goto out_error; 515 } 516 517 meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM); 518 if (!meta->mem_pool) { 519 pr_err("Error creating memory pool\n"); 520 goto out_error; 521 } 522 523 return meta; 524 525 out_error: 526 vfree(meta->table); 527 kfree(meta); 528 return NULL; 529 } 530 531 /* 532 * To protect concurrent access to the same index entry, 533 * caller should hold this table index entry's bit_spinlock to 534 * indicate this index entry is accessing. 535 */ 536 static void zram_free_page(struct zram *zram, size_t index) 537 { 538 struct zram_meta *meta = zram->meta; 539 unsigned long handle = meta->table[index].handle; 540 541 if (unlikely(!handle)) { 542 /* 543 * No memory is allocated for zero filled pages. 544 * Simply clear zero page flag. 545 */ 546 if (zram_test_flag(meta, index, ZRAM_ZERO)) { 547 zram_clear_flag(meta, index, ZRAM_ZERO); 548 atomic64_dec(&zram->stats.zero_pages); 549 } 550 return; 551 } 552 553 zs_free(meta->mem_pool, handle); 554 555 atomic64_sub(zram_get_obj_size(meta, index), 556 &zram->stats.compr_data_size); 557 atomic64_dec(&zram->stats.pages_stored); 558 559 meta->table[index].handle = 0; 560 zram_set_obj_size(meta, index, 0); 561 } 562 563 static int zram_decompress_page(struct zram *zram, char *mem, u32 index) 564 { 565 int ret = 0; 566 unsigned char *cmem; 567 struct zram_meta *meta = zram->meta; 568 unsigned long handle; 569 size_t size; 570 571 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 572 handle = meta->table[index].handle; 573 size = zram_get_obj_size(meta, index); 574 575 if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { 576 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 577 clear_page(mem); 578 return 0; 579 } 580 581 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); 582 if (size == PAGE_SIZE) 583 copy_page(mem, cmem); 584 else 585 ret = zcomp_decompress(zram->comp, cmem, size, mem); 586 zs_unmap_object(meta->mem_pool, handle); 587 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 588 589 /* Should NEVER happen. Return bio error if it does. */ 590 if (unlikely(ret)) { 591 pr_err("Decompression failed! err=%d, page=%u\n", ret, index); 592 return ret; 593 } 594 595 return 0; 596 } 597 598 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, 599 u32 index, int offset) 600 { 601 int ret; 602 struct page *page; 603 unsigned char *user_mem, *uncmem = NULL; 604 struct zram_meta *meta = zram->meta; 605 page = bvec->bv_page; 606 607 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 608 if (unlikely(!meta->table[index].handle) || 609 zram_test_flag(meta, index, ZRAM_ZERO)) { 610 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 611 handle_zero_page(bvec); 612 return 0; 613 } 614 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 615 616 if (is_partial_io(bvec)) 617 /* Use a temporary buffer to decompress the page */ 618 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); 619 620 user_mem = kmap_atomic(page); 621 if (!is_partial_io(bvec)) 622 uncmem = user_mem; 623 624 if (!uncmem) { 625 pr_err("Unable to allocate temp memory\n"); 626 ret = -ENOMEM; 627 goto out_cleanup; 628 } 629 630 ret = zram_decompress_page(zram, uncmem, index); 631 /* Should NEVER happen. Return bio error if it does. */ 632 if (unlikely(ret)) 633 goto out_cleanup; 634 635 if (is_partial_io(bvec)) 636 memcpy(user_mem + bvec->bv_offset, uncmem + offset, 637 bvec->bv_len); 638 639 flush_dcache_page(page); 640 ret = 0; 641 out_cleanup: 642 kunmap_atomic(user_mem); 643 if (is_partial_io(bvec)) 644 kfree(uncmem); 645 return ret; 646 } 647 648 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, 649 int offset) 650 { 651 int ret = 0; 652 size_t clen; 653 unsigned long handle; 654 struct page *page; 655 unsigned char *user_mem, *cmem, *src, *uncmem = NULL; 656 struct zram_meta *meta = zram->meta; 657 struct zcomp_strm *zstrm = NULL; 658 unsigned long alloced_pages; 659 660 page = bvec->bv_page; 661 if (is_partial_io(bvec)) { 662 /* 663 * This is a partial IO. We need to read the full page 664 * before to write the changes. 665 */ 666 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); 667 if (!uncmem) { 668 ret = -ENOMEM; 669 goto out; 670 } 671 ret = zram_decompress_page(zram, uncmem, index); 672 if (ret) 673 goto out; 674 } 675 676 zstrm = zcomp_strm_find(zram->comp); 677 user_mem = kmap_atomic(page); 678 679 if (is_partial_io(bvec)) { 680 memcpy(uncmem + offset, user_mem + bvec->bv_offset, 681 bvec->bv_len); 682 kunmap_atomic(user_mem); 683 user_mem = NULL; 684 } else { 685 uncmem = user_mem; 686 } 687 688 if (page_zero_filled(uncmem)) { 689 if (user_mem) 690 kunmap_atomic(user_mem); 691 /* Free memory associated with this sector now. */ 692 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 693 zram_free_page(zram, index); 694 zram_set_flag(meta, index, ZRAM_ZERO); 695 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 696 697 atomic64_inc(&zram->stats.zero_pages); 698 ret = 0; 699 goto out; 700 } 701 702 ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen); 703 if (!is_partial_io(bvec)) { 704 kunmap_atomic(user_mem); 705 user_mem = NULL; 706 uncmem = NULL; 707 } 708 709 if (unlikely(ret)) { 710 pr_err("Compression failed! err=%d\n", ret); 711 goto out; 712 } 713 src = zstrm->buffer; 714 if (unlikely(clen > max_zpage_size)) { 715 clen = PAGE_SIZE; 716 if (is_partial_io(bvec)) 717 src = uncmem; 718 } 719 720 handle = zs_malloc(meta->mem_pool, clen); 721 if (!handle) { 722 pr_err("Error allocating memory for compressed page: %u, size=%zu\n", 723 index, clen); 724 ret = -ENOMEM; 725 goto out; 726 } 727 728 alloced_pages = zs_get_total_pages(meta->mem_pool); 729 if (zram->limit_pages && alloced_pages > zram->limit_pages) { 730 zs_free(meta->mem_pool, handle); 731 ret = -ENOMEM; 732 goto out; 733 } 734 735 update_used_max(zram, alloced_pages); 736 737 cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); 738 739 if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { 740 src = kmap_atomic(page); 741 copy_page(cmem, src); 742 kunmap_atomic(src); 743 } else { 744 memcpy(cmem, src, clen); 745 } 746 747 zcomp_strm_release(zram->comp, zstrm); 748 zstrm = NULL; 749 zs_unmap_object(meta->mem_pool, handle); 750 751 /* 752 * Free memory associated with this sector 753 * before overwriting unused sectors. 754 */ 755 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 756 zram_free_page(zram, index); 757 758 meta->table[index].handle = handle; 759 zram_set_obj_size(meta, index, clen); 760 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 761 762 /* Update stats */ 763 atomic64_add(clen, &zram->stats.compr_data_size); 764 atomic64_inc(&zram->stats.pages_stored); 765 out: 766 if (zstrm) 767 zcomp_strm_release(zram->comp, zstrm); 768 if (is_partial_io(bvec)) 769 kfree(uncmem); 770 return ret; 771 } 772 773 /* 774 * zram_bio_discard - handler on discard request 775 * @index: physical block index in PAGE_SIZE units 776 * @offset: byte offset within physical block 777 */ 778 static void zram_bio_discard(struct zram *zram, u32 index, 779 int offset, struct bio *bio) 780 { 781 size_t n = bio->bi_iter.bi_size; 782 struct zram_meta *meta = zram->meta; 783 784 /* 785 * zram manages data in physical block size units. Because logical block 786 * size isn't identical with physical block size on some arch, we 787 * could get a discard request pointing to a specific offset within a 788 * certain physical block. Although we can handle this request by 789 * reading that physiclal block and decompressing and partially zeroing 790 * and re-compressing and then re-storing it, this isn't reasonable 791 * because our intent with a discard request is to save memory. So 792 * skipping this logical block is appropriate here. 793 */ 794 if (offset) { 795 if (n <= (PAGE_SIZE - offset)) 796 return; 797 798 n -= (PAGE_SIZE - offset); 799 index++; 800 } 801 802 while (n >= PAGE_SIZE) { 803 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 804 zram_free_page(zram, index); 805 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 806 atomic64_inc(&zram->stats.notify_free); 807 index++; 808 n -= PAGE_SIZE; 809 } 810 } 811 812 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, 813 int offset, int rw) 814 { 815 unsigned long start_time = jiffies; 816 int ret; 817 818 generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT, 819 &zram->disk->part0); 820 821 if (rw == READ) { 822 atomic64_inc(&zram->stats.num_reads); 823 ret = zram_bvec_read(zram, bvec, index, offset); 824 } else { 825 atomic64_inc(&zram->stats.num_writes); 826 ret = zram_bvec_write(zram, bvec, index, offset); 827 } 828 829 generic_end_io_acct(rw, &zram->disk->part0, start_time); 830 831 if (unlikely(ret)) { 832 if (rw == READ) 833 atomic64_inc(&zram->stats.failed_reads); 834 else 835 atomic64_inc(&zram->stats.failed_writes); 836 } 837 838 return ret; 839 } 840 841 static void __zram_make_request(struct zram *zram, struct bio *bio) 842 { 843 int offset, rw; 844 u32 index; 845 struct bio_vec bvec; 846 struct bvec_iter iter; 847 848 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT; 849 offset = (bio->bi_iter.bi_sector & 850 (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; 851 852 if (unlikely(bio->bi_rw & REQ_DISCARD)) { 853 zram_bio_discard(zram, index, offset, bio); 854 bio_endio(bio); 855 return; 856 } 857 858 rw = bio_data_dir(bio); 859 bio_for_each_segment(bvec, bio, iter) { 860 int max_transfer_size = PAGE_SIZE - offset; 861 862 if (bvec.bv_len > max_transfer_size) { 863 /* 864 * zram_bvec_rw() can only make operation on a single 865 * zram page. Split the bio vector. 866 */ 867 struct bio_vec bv; 868 869 bv.bv_page = bvec.bv_page; 870 bv.bv_len = max_transfer_size; 871 bv.bv_offset = bvec.bv_offset; 872 873 if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0) 874 goto out; 875 876 bv.bv_len = bvec.bv_len - max_transfer_size; 877 bv.bv_offset += max_transfer_size; 878 if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0) 879 goto out; 880 } else 881 if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0) 882 goto out; 883 884 update_position(&index, &offset, &bvec); 885 } 886 887 bio_endio(bio); 888 return; 889 890 out: 891 bio_io_error(bio); 892 } 893 894 /* 895 * Handler function for all zram I/O requests. 896 */ 897 static void zram_make_request(struct request_queue *queue, struct bio *bio) 898 { 899 struct zram *zram = queue->queuedata; 900 901 if (unlikely(!zram_meta_get(zram))) 902 goto error; 903 904 blk_queue_split(queue, &bio, queue->bio_split); 905 906 if (!valid_io_request(zram, bio->bi_iter.bi_sector, 907 bio->bi_iter.bi_size)) { 908 atomic64_inc(&zram->stats.invalid_io); 909 goto put_zram; 910 } 911 912 __zram_make_request(zram, bio); 913 zram_meta_put(zram); 914 return; 915 put_zram: 916 zram_meta_put(zram); 917 error: 918 bio_io_error(bio); 919 } 920 921 static void zram_slot_free_notify(struct block_device *bdev, 922 unsigned long index) 923 { 924 struct zram *zram; 925 struct zram_meta *meta; 926 927 zram = bdev->bd_disk->private_data; 928 meta = zram->meta; 929 930 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); 931 zram_free_page(zram, index); 932 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); 933 atomic64_inc(&zram->stats.notify_free); 934 } 935 936 static int zram_rw_page(struct block_device *bdev, sector_t sector, 937 struct page *page, int rw) 938 { 939 int offset, err = -EIO; 940 u32 index; 941 struct zram *zram; 942 struct bio_vec bv; 943 944 zram = bdev->bd_disk->private_data; 945 if (unlikely(!zram_meta_get(zram))) 946 goto out; 947 948 if (!valid_io_request(zram, sector, PAGE_SIZE)) { 949 atomic64_inc(&zram->stats.invalid_io); 950 err = -EINVAL; 951 goto put_zram; 952 } 953 954 index = sector >> SECTORS_PER_PAGE_SHIFT; 955 offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT; 956 957 bv.bv_page = page; 958 bv.bv_len = PAGE_SIZE; 959 bv.bv_offset = 0; 960 961 err = zram_bvec_rw(zram, &bv, index, offset, rw); 962 put_zram: 963 zram_meta_put(zram); 964 out: 965 /* 966 * If I/O fails, just return error(ie, non-zero) without 967 * calling page_endio. 968 * It causes resubmit the I/O with bio request by upper functions 969 * of rw_page(e.g., swap_readpage, __swap_writepage) and 970 * bio->bi_end_io does things to handle the error 971 * (e.g., SetPageError, set_page_dirty and extra works). 972 */ 973 if (err == 0) 974 page_endio(page, rw, 0); 975 return err; 976 } 977 978 static void zram_reset_device(struct zram *zram) 979 { 980 struct zram_meta *meta; 981 struct zcomp *comp; 982 u64 disksize; 983 984 down_write(&zram->init_lock); 985 986 zram->limit_pages = 0; 987 988 if (!init_done(zram)) { 989 up_write(&zram->init_lock); 990 return; 991 } 992 993 meta = zram->meta; 994 comp = zram->comp; 995 disksize = zram->disksize; 996 /* 997 * Refcount will go down to 0 eventually and r/w handler 998 * cannot handle further I/O so it will bail out by 999 * check zram_meta_get. 1000 */ 1001 zram_meta_put(zram); 1002 /* 1003 * We want to free zram_meta in process context to avoid 1004 * deadlock between reclaim path and any other locks. 1005 */ 1006 wait_event(zram->io_done, atomic_read(&zram->refcount) == 0); 1007 1008 /* Reset stats */ 1009 memset(&zram->stats, 0, sizeof(zram->stats)); 1010 zram->disksize = 0; 1011 zram->max_comp_streams = 1; 1012 1013 set_capacity(zram->disk, 0); 1014 part_stat_set_all(&zram->disk->part0, 0); 1015 1016 up_write(&zram->init_lock); 1017 /* I/O operation under all of CPU are done so let's free */ 1018 zram_meta_free(meta, disksize); 1019 zcomp_destroy(comp); 1020 } 1021 1022 static ssize_t disksize_store(struct device *dev, 1023 struct device_attribute *attr, const char *buf, size_t len) 1024 { 1025 u64 disksize; 1026 struct zcomp *comp; 1027 struct zram_meta *meta; 1028 struct zram *zram = dev_to_zram(dev); 1029 int err; 1030 1031 disksize = memparse(buf, NULL); 1032 if (!disksize) 1033 return -EINVAL; 1034 1035 disksize = PAGE_ALIGN(disksize); 1036 meta = zram_meta_alloc(zram->disk->disk_name, disksize); 1037 if (!meta) 1038 return -ENOMEM; 1039 1040 comp = zcomp_create(zram->compressor, zram->max_comp_streams); 1041 if (IS_ERR(comp)) { 1042 pr_err("Cannot initialise %s compressing backend\n", 1043 zram->compressor); 1044 err = PTR_ERR(comp); 1045 goto out_free_meta; 1046 } 1047 1048 down_write(&zram->init_lock); 1049 if (init_done(zram)) { 1050 pr_info("Cannot change disksize for initialized device\n"); 1051 err = -EBUSY; 1052 goto out_destroy_comp; 1053 } 1054 1055 init_waitqueue_head(&zram->io_done); 1056 atomic_set(&zram->refcount, 1); 1057 zram->meta = meta; 1058 zram->comp = comp; 1059 zram->disksize = disksize; 1060 set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); 1061 up_write(&zram->init_lock); 1062 1063 /* 1064 * Revalidate disk out of the init_lock to avoid lockdep splat. 1065 * It's okay because disk's capacity is protected by init_lock 1066 * so that revalidate_disk always sees up-to-date capacity. 1067 */ 1068 revalidate_disk(zram->disk); 1069 1070 return len; 1071 1072 out_destroy_comp: 1073 up_write(&zram->init_lock); 1074 zcomp_destroy(comp); 1075 out_free_meta: 1076 zram_meta_free(meta, disksize); 1077 return err; 1078 } 1079 1080 static ssize_t reset_store(struct device *dev, 1081 struct device_attribute *attr, const char *buf, size_t len) 1082 { 1083 int ret; 1084 unsigned short do_reset; 1085 struct zram *zram; 1086 struct block_device *bdev; 1087 1088 ret = kstrtou16(buf, 10, &do_reset); 1089 if (ret) 1090 return ret; 1091 1092 if (!do_reset) 1093 return -EINVAL; 1094 1095 zram = dev_to_zram(dev); 1096 bdev = bdget_disk(zram->disk, 0); 1097 if (!bdev) 1098 return -ENOMEM; 1099 1100 mutex_lock(&bdev->bd_mutex); 1101 /* Do not reset an active device or claimed device */ 1102 if (bdev->bd_openers || zram->claim) { 1103 mutex_unlock(&bdev->bd_mutex); 1104 bdput(bdev); 1105 return -EBUSY; 1106 } 1107 1108 /* From now on, anyone can't open /dev/zram[0-9] */ 1109 zram->claim = true; 1110 mutex_unlock(&bdev->bd_mutex); 1111 1112 /* Make sure all the pending I/O are finished */ 1113 fsync_bdev(bdev); 1114 zram_reset_device(zram); 1115 revalidate_disk(zram->disk); 1116 bdput(bdev); 1117 1118 mutex_lock(&bdev->bd_mutex); 1119 zram->claim = false; 1120 mutex_unlock(&bdev->bd_mutex); 1121 1122 return len; 1123 } 1124 1125 static int zram_open(struct block_device *bdev, fmode_t mode) 1126 { 1127 int ret = 0; 1128 struct zram *zram; 1129 1130 WARN_ON(!mutex_is_locked(&bdev->bd_mutex)); 1131 1132 zram = bdev->bd_disk->private_data; 1133 /* zram was claimed to reset so open request fails */ 1134 if (zram->claim) 1135 ret = -EBUSY; 1136 1137 return ret; 1138 } 1139 1140 static const struct block_device_operations zram_devops = { 1141 .open = zram_open, 1142 .swap_slot_free_notify = zram_slot_free_notify, 1143 .rw_page = zram_rw_page, 1144 .owner = THIS_MODULE 1145 }; 1146 1147 static DEVICE_ATTR_WO(compact); 1148 static DEVICE_ATTR_RW(disksize); 1149 static DEVICE_ATTR_RO(initstate); 1150 static DEVICE_ATTR_WO(reset); 1151 static DEVICE_ATTR_RO(orig_data_size); 1152 static DEVICE_ATTR_RO(mem_used_total); 1153 static DEVICE_ATTR_RW(mem_limit); 1154 static DEVICE_ATTR_RW(mem_used_max); 1155 static DEVICE_ATTR_RW(max_comp_streams); 1156 static DEVICE_ATTR_RW(comp_algorithm); 1157 1158 static struct attribute *zram_disk_attrs[] = { 1159 &dev_attr_disksize.attr, 1160 &dev_attr_initstate.attr, 1161 &dev_attr_reset.attr, 1162 &dev_attr_num_reads.attr, 1163 &dev_attr_num_writes.attr, 1164 &dev_attr_failed_reads.attr, 1165 &dev_attr_failed_writes.attr, 1166 &dev_attr_compact.attr, 1167 &dev_attr_invalid_io.attr, 1168 &dev_attr_notify_free.attr, 1169 &dev_attr_zero_pages.attr, 1170 &dev_attr_orig_data_size.attr, 1171 &dev_attr_compr_data_size.attr, 1172 &dev_attr_mem_used_total.attr, 1173 &dev_attr_mem_limit.attr, 1174 &dev_attr_mem_used_max.attr, 1175 &dev_attr_max_comp_streams.attr, 1176 &dev_attr_comp_algorithm.attr, 1177 &dev_attr_io_stat.attr, 1178 &dev_attr_mm_stat.attr, 1179 NULL, 1180 }; 1181 1182 static struct attribute_group zram_disk_attr_group = { 1183 .attrs = zram_disk_attrs, 1184 }; 1185 1186 /* 1187 * Allocate and initialize new zram device. the function returns 1188 * '>= 0' device_id upon success, and negative value otherwise. 1189 */ 1190 static int zram_add(void) 1191 { 1192 struct zram *zram; 1193 struct request_queue *queue; 1194 int ret, device_id; 1195 1196 zram = kzalloc(sizeof(struct zram), GFP_KERNEL); 1197 if (!zram) 1198 return -ENOMEM; 1199 1200 ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL); 1201 if (ret < 0) 1202 goto out_free_dev; 1203 device_id = ret; 1204 1205 init_rwsem(&zram->init_lock); 1206 1207 queue = blk_alloc_queue(GFP_KERNEL); 1208 if (!queue) { 1209 pr_err("Error allocating disk queue for device %d\n", 1210 device_id); 1211 ret = -ENOMEM; 1212 goto out_free_idr; 1213 } 1214 1215 blk_queue_make_request(queue, zram_make_request); 1216 1217 /* gendisk structure */ 1218 zram->disk = alloc_disk(1); 1219 if (!zram->disk) { 1220 pr_err("Error allocating disk structure for device %d\n", 1221 device_id); 1222 ret = -ENOMEM; 1223 goto out_free_queue; 1224 } 1225 1226 zram->disk->major = zram_major; 1227 zram->disk->first_minor = device_id; 1228 zram->disk->fops = &zram_devops; 1229 zram->disk->queue = queue; 1230 zram->disk->queue->queuedata = zram; 1231 zram->disk->private_data = zram; 1232 snprintf(zram->disk->disk_name, 16, "zram%d", device_id); 1233 1234 /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */ 1235 set_capacity(zram->disk, 0); 1236 /* zram devices sort of resembles non-rotational disks */ 1237 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); 1238 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue); 1239 /* 1240 * To ensure that we always get PAGE_SIZE aligned 1241 * and n*PAGE_SIZED sized I/O requests. 1242 */ 1243 blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); 1244 blk_queue_logical_block_size(zram->disk->queue, 1245 ZRAM_LOGICAL_BLOCK_SIZE); 1246 blk_queue_io_min(zram->disk->queue, PAGE_SIZE); 1247 blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); 1248 zram->disk->queue->limits.discard_granularity = PAGE_SIZE; 1249 blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX); 1250 /* 1251 * zram_bio_discard() will clear all logical blocks if logical block 1252 * size is identical with physical block size(PAGE_SIZE). But if it is 1253 * different, we will skip discarding some parts of logical blocks in 1254 * the part of the request range which isn't aligned to physical block 1255 * size. So we can't ensure that all discarded logical blocks are 1256 * zeroed. 1257 */ 1258 if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) 1259 zram->disk->queue->limits.discard_zeroes_data = 1; 1260 else 1261 zram->disk->queue->limits.discard_zeroes_data = 0; 1262 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); 1263 1264 add_disk(zram->disk); 1265 1266 ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, 1267 &zram_disk_attr_group); 1268 if (ret < 0) { 1269 pr_err("Error creating sysfs group for device %d\n", 1270 device_id); 1271 goto out_free_disk; 1272 } 1273 strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); 1274 zram->meta = NULL; 1275 zram->max_comp_streams = 1; 1276 1277 pr_info("Added device: %s\n", zram->disk->disk_name); 1278 return device_id; 1279 1280 out_free_disk: 1281 del_gendisk(zram->disk); 1282 put_disk(zram->disk); 1283 out_free_queue: 1284 blk_cleanup_queue(queue); 1285 out_free_idr: 1286 idr_remove(&zram_index_idr, device_id); 1287 out_free_dev: 1288 kfree(zram); 1289 return ret; 1290 } 1291 1292 static int zram_remove(struct zram *zram) 1293 { 1294 struct block_device *bdev; 1295 1296 bdev = bdget_disk(zram->disk, 0); 1297 if (!bdev) 1298 return -ENOMEM; 1299 1300 mutex_lock(&bdev->bd_mutex); 1301 if (bdev->bd_openers || zram->claim) { 1302 mutex_unlock(&bdev->bd_mutex); 1303 bdput(bdev); 1304 return -EBUSY; 1305 } 1306 1307 zram->claim = true; 1308 mutex_unlock(&bdev->bd_mutex); 1309 1310 /* 1311 * Remove sysfs first, so no one will perform a disksize 1312 * store while we destroy the devices. This also helps during 1313 * hot_remove -- zram_reset_device() is the last holder of 1314 * ->init_lock, no later/concurrent disksize_store() or any 1315 * other sysfs handlers are possible. 1316 */ 1317 sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, 1318 &zram_disk_attr_group); 1319 1320 /* Make sure all the pending I/O are finished */ 1321 fsync_bdev(bdev); 1322 zram_reset_device(zram); 1323 bdput(bdev); 1324 1325 pr_info("Removed device: %s\n", zram->disk->disk_name); 1326 1327 idr_remove(&zram_index_idr, zram->disk->first_minor); 1328 blk_cleanup_queue(zram->disk->queue); 1329 del_gendisk(zram->disk); 1330 put_disk(zram->disk); 1331 kfree(zram); 1332 return 0; 1333 } 1334 1335 /* zram-control sysfs attributes */ 1336 static ssize_t hot_add_show(struct class *class, 1337 struct class_attribute *attr, 1338 char *buf) 1339 { 1340 int ret; 1341 1342 mutex_lock(&zram_index_mutex); 1343 ret = zram_add(); 1344 mutex_unlock(&zram_index_mutex); 1345 1346 if (ret < 0) 1347 return ret; 1348 return scnprintf(buf, PAGE_SIZE, "%d\n", ret); 1349 } 1350 1351 static ssize_t hot_remove_store(struct class *class, 1352 struct class_attribute *attr, 1353 const char *buf, 1354 size_t count) 1355 { 1356 struct zram *zram; 1357 int ret, dev_id; 1358 1359 /* dev_id is gendisk->first_minor, which is `int' */ 1360 ret = kstrtoint(buf, 10, &dev_id); 1361 if (ret) 1362 return ret; 1363 if (dev_id < 0) 1364 return -EINVAL; 1365 1366 mutex_lock(&zram_index_mutex); 1367 1368 zram = idr_find(&zram_index_idr, dev_id); 1369 if (zram) 1370 ret = zram_remove(zram); 1371 else 1372 ret = -ENODEV; 1373 1374 mutex_unlock(&zram_index_mutex); 1375 return ret ? ret : count; 1376 } 1377 1378 static struct class_attribute zram_control_class_attrs[] = { 1379 __ATTR_RO(hot_add), 1380 __ATTR_WO(hot_remove), 1381 __ATTR_NULL, 1382 }; 1383 1384 static struct class zram_control_class = { 1385 .name = "zram-control", 1386 .owner = THIS_MODULE, 1387 .class_attrs = zram_control_class_attrs, 1388 }; 1389 1390 static int zram_remove_cb(int id, void *ptr, void *data) 1391 { 1392 zram_remove(ptr); 1393 return 0; 1394 } 1395 1396 static void destroy_devices(void) 1397 { 1398 class_unregister(&zram_control_class); 1399 idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); 1400 idr_destroy(&zram_index_idr); 1401 unregister_blkdev(zram_major, "zram"); 1402 } 1403 1404 static int __init zram_init(void) 1405 { 1406 int ret; 1407 1408 ret = class_register(&zram_control_class); 1409 if (ret) { 1410 pr_err("Unable to register zram-control class\n"); 1411 return ret; 1412 } 1413 1414 zram_major = register_blkdev(0, "zram"); 1415 if (zram_major <= 0) { 1416 pr_err("Unable to get major number\n"); 1417 class_unregister(&zram_control_class); 1418 return -EBUSY; 1419 } 1420 1421 while (num_devices != 0) { 1422 mutex_lock(&zram_index_mutex); 1423 ret = zram_add(); 1424 mutex_unlock(&zram_index_mutex); 1425 if (ret < 0) 1426 goto out_error; 1427 num_devices--; 1428 } 1429 1430 return 0; 1431 1432 out_error: 1433 destroy_devices(); 1434 return ret; 1435 } 1436 1437 static void __exit zram_exit(void) 1438 { 1439 destroy_devices(); 1440 } 1441 1442 module_init(zram_init); 1443 module_exit(zram_exit); 1444 1445 module_param(num_devices, uint, 0); 1446 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices"); 1447 1448 MODULE_LICENSE("Dual BSD/GPL"); 1449 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); 1450 MODULE_DESCRIPTION("Compressed RAM Block Device"); 1451