1 /* 2 * zswap.c - zswap driver file 3 * 4 * zswap is a backend for frontswap that takes pages that are in the process 5 * of being swapped out and attempts to compress and store them in a 6 * RAM-based memory pool. This can result in a significant I/O reduction on 7 * the swap device and, in the case where decompressing from RAM is faster 8 * than reading from the swap device, can also improve workload performance. 9 * 10 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 2 15 * of the License, or (at your option) any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 */ 22 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/module.h> 26 #include <linux/cpu.h> 27 #include <linux/highmem.h> 28 #include <linux/slab.h> 29 #include <linux/spinlock.h> 30 #include <linux/types.h> 31 #include <linux/atomic.h> 32 #include <linux/frontswap.h> 33 #include <linux/rbtree.h> 34 #include <linux/swap.h> 35 #include <linux/crypto.h> 36 #include <linux/mempool.h> 37 #include <linux/zbud.h> 38 39 #include <linux/mm_types.h> 40 #include <linux/page-flags.h> 41 #include <linux/swapops.h> 42 #include <linux/writeback.h> 43 #include <linux/pagemap.h> 44 45 /********************************* 46 * statistics 47 **********************************/ 48 /* Number of memory pages used by the compressed pool */ 49 static u64 zswap_pool_pages; 50 /* The number of compressed pages currently stored in zswap */ 51 static atomic_t zswap_stored_pages = ATOMIC_INIT(0); 52 53 /* 54 * The statistics below are not protected from concurrent access for 55 * performance reasons so they may not be a 100% accurate. However, 56 * they do provide useful information on roughly how many times a 57 * certain event is occurring. 58 */ 59 60 /* Pool limit was hit (see zswap_max_pool_percent) */ 61 static u64 zswap_pool_limit_hit; 62 /* Pages written back when pool limit was reached */ 63 static u64 zswap_written_back_pages; 64 /* Store failed due to a reclaim failure after pool limit was reached */ 65 static u64 zswap_reject_reclaim_fail; 66 /* Compressed page was too big for the allocator to (optimally) store */ 67 static u64 zswap_reject_compress_poor; 68 /* Store failed because underlying allocator could not get memory */ 69 static u64 zswap_reject_alloc_fail; 70 /* Store failed because the entry metadata could not be allocated (rare) */ 71 static u64 zswap_reject_kmemcache_fail; 72 /* Duplicate store was encountered (rare) */ 73 static u64 zswap_duplicate_entry; 74 75 /********************************* 76 * tunables 77 **********************************/ 78 /* Enable/disable zswap (disabled by default, fixed at boot for now) */ 79 static bool zswap_enabled __read_mostly; 80 module_param_named(enabled, zswap_enabled, bool, 0); 81 82 /* Compressor to be used by zswap (fixed at boot for now) */ 83 #define ZSWAP_COMPRESSOR_DEFAULT "lzo" 84 static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; 85 module_param_named(compressor, zswap_compressor, charp, 0); 86 87 /* The maximum percentage of memory that the compressed pool can occupy */ 88 static unsigned int zswap_max_pool_percent = 20; 89 module_param_named(max_pool_percent, 90 zswap_max_pool_percent, uint, 0644); 91 92 /********************************* 93 * compression functions 94 **********************************/ 95 /* per-cpu compression transforms */ 96 static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms; 97 98 enum comp_op { 99 ZSWAP_COMPOP_COMPRESS, 100 ZSWAP_COMPOP_DECOMPRESS 101 }; 102 103 static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen, 104 u8 *dst, unsigned int *dlen) 105 { 106 struct crypto_comp *tfm; 107 int ret; 108 109 tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu()); 110 switch (op) { 111 case ZSWAP_COMPOP_COMPRESS: 112 ret = crypto_comp_compress(tfm, src, slen, dst, dlen); 113 break; 114 case ZSWAP_COMPOP_DECOMPRESS: 115 ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); 116 break; 117 default: 118 ret = -EINVAL; 119 } 120 121 put_cpu(); 122 return ret; 123 } 124 125 static int __init zswap_comp_init(void) 126 { 127 if (!crypto_has_comp(zswap_compressor, 0, 0)) { 128 pr_info("%s compressor not available\n", zswap_compressor); 129 /* fall back to default compressor */ 130 zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; 131 if (!crypto_has_comp(zswap_compressor, 0, 0)) 132 /* can't even load the default compressor */ 133 return -ENODEV; 134 } 135 pr_info("using %s compressor\n", zswap_compressor); 136 137 /* alloc percpu transforms */ 138 zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); 139 if (!zswap_comp_pcpu_tfms) 140 return -ENOMEM; 141 return 0; 142 } 143 144 static void zswap_comp_exit(void) 145 { 146 /* free percpu transforms */ 147 if (zswap_comp_pcpu_tfms) 148 free_percpu(zswap_comp_pcpu_tfms); 149 } 150 151 /********************************* 152 * data structures 153 **********************************/ 154 /* 155 * struct zswap_entry 156 * 157 * This structure contains the metadata for tracking a single compressed 158 * page within zswap. 159 * 160 * rbnode - links the entry into red-black tree for the appropriate swap type 161 * refcount - the number of outstanding reference to the entry. This is needed 162 * to protect against premature freeing of the entry by code 163 * concurent calls to load, invalidate, and writeback. The lock 164 * for the zswap_tree structure that contains the entry must 165 * be held while changing the refcount. Since the lock must 166 * be held, there is no reason to also make refcount atomic. 167 * offset - the swap offset for the entry. Index into the red-black tree. 168 * handle - zsmalloc allocation handle that stores the compressed page data 169 * length - the length in bytes of the compressed page data. Needed during 170 * decompression 171 */ 172 struct zswap_entry { 173 struct rb_node rbnode; 174 pgoff_t offset; 175 int refcount; 176 unsigned int length; 177 unsigned long handle; 178 }; 179 180 struct zswap_header { 181 swp_entry_t swpentry; 182 }; 183 184 /* 185 * The tree lock in the zswap_tree struct protects a few things: 186 * - the rbtree 187 * - the refcount field of each entry in the tree 188 */ 189 struct zswap_tree { 190 struct rb_root rbroot; 191 spinlock_t lock; 192 struct zbud_pool *pool; 193 }; 194 195 static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 196 197 /********************************* 198 * zswap entry functions 199 **********************************/ 200 static struct kmem_cache *zswap_entry_cache; 201 202 static int zswap_entry_cache_create(void) 203 { 204 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 205 return (zswap_entry_cache == NULL); 206 } 207 208 static void zswap_entry_cache_destory(void) 209 { 210 kmem_cache_destroy(zswap_entry_cache); 211 } 212 213 static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp) 214 { 215 struct zswap_entry *entry; 216 entry = kmem_cache_alloc(zswap_entry_cache, gfp); 217 if (!entry) 218 return NULL; 219 entry->refcount = 1; 220 return entry; 221 } 222 223 static void zswap_entry_cache_free(struct zswap_entry *entry) 224 { 225 kmem_cache_free(zswap_entry_cache, entry); 226 } 227 228 /* caller must hold the tree lock */ 229 static void zswap_entry_get(struct zswap_entry *entry) 230 { 231 entry->refcount++; 232 } 233 234 /* caller must hold the tree lock */ 235 static int zswap_entry_put(struct zswap_entry *entry) 236 { 237 entry->refcount--; 238 return entry->refcount; 239 } 240 241 /********************************* 242 * rbtree functions 243 **********************************/ 244 static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 245 { 246 struct rb_node *node = root->rb_node; 247 struct zswap_entry *entry; 248 249 while (node) { 250 entry = rb_entry(node, struct zswap_entry, rbnode); 251 if (entry->offset > offset) 252 node = node->rb_left; 253 else if (entry->offset < offset) 254 node = node->rb_right; 255 else 256 return entry; 257 } 258 return NULL; 259 } 260 261 /* 262 * In the case that a entry with the same offset is found, a pointer to 263 * the existing entry is stored in dupentry and the function returns -EEXIST 264 */ 265 static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 266 struct zswap_entry **dupentry) 267 { 268 struct rb_node **link = &root->rb_node, *parent = NULL; 269 struct zswap_entry *myentry; 270 271 while (*link) { 272 parent = *link; 273 myentry = rb_entry(parent, struct zswap_entry, rbnode); 274 if (myentry->offset > entry->offset) 275 link = &(*link)->rb_left; 276 else if (myentry->offset < entry->offset) 277 link = &(*link)->rb_right; 278 else { 279 *dupentry = myentry; 280 return -EEXIST; 281 } 282 } 283 rb_link_node(&entry->rbnode, parent, link); 284 rb_insert_color(&entry->rbnode, root); 285 return 0; 286 } 287 288 /********************************* 289 * per-cpu code 290 **********************************/ 291 static DEFINE_PER_CPU(u8 *, zswap_dstmem); 292 293 static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu) 294 { 295 struct crypto_comp *tfm; 296 u8 *dst; 297 298 switch (action) { 299 case CPU_UP_PREPARE: 300 tfm = crypto_alloc_comp(zswap_compressor, 0, 0); 301 if (IS_ERR(tfm)) { 302 pr_err("can't allocate compressor transform\n"); 303 return NOTIFY_BAD; 304 } 305 *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm; 306 dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); 307 if (!dst) { 308 pr_err("can't allocate compressor buffer\n"); 309 crypto_free_comp(tfm); 310 *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; 311 return NOTIFY_BAD; 312 } 313 per_cpu(zswap_dstmem, cpu) = dst; 314 break; 315 case CPU_DEAD: 316 case CPU_UP_CANCELED: 317 tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu); 318 if (tfm) { 319 crypto_free_comp(tfm); 320 *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; 321 } 322 dst = per_cpu(zswap_dstmem, cpu); 323 kfree(dst); 324 per_cpu(zswap_dstmem, cpu) = NULL; 325 break; 326 default: 327 break; 328 } 329 return NOTIFY_OK; 330 } 331 332 static int zswap_cpu_notifier(struct notifier_block *nb, 333 unsigned long action, void *pcpu) 334 { 335 unsigned long cpu = (unsigned long)pcpu; 336 return __zswap_cpu_notifier(action, cpu); 337 } 338 339 static struct notifier_block zswap_cpu_notifier_block = { 340 .notifier_call = zswap_cpu_notifier 341 }; 342 343 static int zswap_cpu_init(void) 344 { 345 unsigned long cpu; 346 347 get_online_cpus(); 348 for_each_online_cpu(cpu) 349 if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK) 350 goto cleanup; 351 register_cpu_notifier(&zswap_cpu_notifier_block); 352 put_online_cpus(); 353 return 0; 354 355 cleanup: 356 for_each_online_cpu(cpu) 357 __zswap_cpu_notifier(CPU_UP_CANCELED, cpu); 358 put_online_cpus(); 359 return -ENOMEM; 360 } 361 362 /********************************* 363 * helpers 364 **********************************/ 365 static bool zswap_is_full(void) 366 { 367 return (totalram_pages * zswap_max_pool_percent / 100 < 368 zswap_pool_pages); 369 } 370 371 /* 372 * Carries out the common pattern of freeing and entry's zsmalloc allocation, 373 * freeing the entry itself, and decrementing the number of stored pages. 374 */ 375 static void zswap_free_entry(struct zswap_tree *tree, struct zswap_entry *entry) 376 { 377 zbud_free(tree->pool, entry->handle); 378 zswap_entry_cache_free(entry); 379 atomic_dec(&zswap_stored_pages); 380 zswap_pool_pages = zbud_get_pool_size(tree->pool); 381 } 382 383 /********************************* 384 * writeback code 385 **********************************/ 386 /* return enum for zswap_get_swap_cache_page */ 387 enum zswap_get_swap_ret { 388 ZSWAP_SWAPCACHE_NEW, 389 ZSWAP_SWAPCACHE_EXIST, 390 ZSWAP_SWAPCACHE_NOMEM 391 }; 392 393 /* 394 * zswap_get_swap_cache_page 395 * 396 * This is an adaption of read_swap_cache_async() 397 * 398 * This function tries to find a page with the given swap entry 399 * in the swapper_space address space (the swap cache). If the page 400 * is found, it is returned in retpage. Otherwise, a page is allocated, 401 * added to the swap cache, and returned in retpage. 402 * 403 * If success, the swap cache page is returned in retpage 404 * Returns 0 if page was already in the swap cache, page is not locked 405 * Returns 1 if the new page needs to be populated, page is locked 406 * Returns <0 on error 407 */ 408 static int zswap_get_swap_cache_page(swp_entry_t entry, 409 struct page **retpage) 410 { 411 struct page *found_page, *new_page = NULL; 412 struct address_space *swapper_space = swap_address_space(entry); 413 int err; 414 415 *retpage = NULL; 416 do { 417 /* 418 * First check the swap cache. Since this is normally 419 * called after lookup_swap_cache() failed, re-calling 420 * that would confuse statistics. 421 */ 422 found_page = find_get_page(swapper_space, entry.val); 423 if (found_page) 424 break; 425 426 /* 427 * Get a new page to read into from swap. 428 */ 429 if (!new_page) { 430 new_page = alloc_page(GFP_KERNEL); 431 if (!new_page) 432 break; /* Out of memory */ 433 } 434 435 /* 436 * call radix_tree_preload() while we can wait. 437 */ 438 err = radix_tree_preload(GFP_KERNEL); 439 if (err) 440 break; 441 442 /* 443 * Swap entry may have been freed since our caller observed it. 444 */ 445 err = swapcache_prepare(entry); 446 if (err == -EEXIST) { /* seems racy */ 447 radix_tree_preload_end(); 448 continue; 449 } 450 if (err) { /* swp entry is obsolete ? */ 451 radix_tree_preload_end(); 452 break; 453 } 454 455 /* May fail (-ENOMEM) if radix-tree node allocation failed. */ 456 __set_page_locked(new_page); 457 SetPageSwapBacked(new_page); 458 err = __add_to_swap_cache(new_page, entry); 459 if (likely(!err)) { 460 radix_tree_preload_end(); 461 lru_cache_add_anon(new_page); 462 *retpage = new_page; 463 return ZSWAP_SWAPCACHE_NEW; 464 } 465 radix_tree_preload_end(); 466 ClearPageSwapBacked(new_page); 467 __clear_page_locked(new_page); 468 /* 469 * add_to_swap_cache() doesn't return -EEXIST, so we can safely 470 * clear SWAP_HAS_CACHE flag. 471 */ 472 swapcache_free(entry, NULL); 473 } while (err != -ENOMEM); 474 475 if (new_page) 476 page_cache_release(new_page); 477 if (!found_page) 478 return ZSWAP_SWAPCACHE_NOMEM; 479 *retpage = found_page; 480 return ZSWAP_SWAPCACHE_EXIST; 481 } 482 483 /* 484 * Attempts to free an entry by adding a page to the swap cache, 485 * decompressing the entry data into the page, and issuing a 486 * bio write to write the page back to the swap device. 487 * 488 * This can be thought of as a "resumed writeback" of the page 489 * to the swap device. We are basically resuming the same swap 490 * writeback path that was intercepted with the frontswap_store() 491 * in the first place. After the page has been decompressed into 492 * the swap cache, the compressed version stored by zswap can be 493 * freed. 494 */ 495 static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle) 496 { 497 struct zswap_header *zhdr; 498 swp_entry_t swpentry; 499 struct zswap_tree *tree; 500 pgoff_t offset; 501 struct zswap_entry *entry; 502 struct page *page; 503 u8 *src, *dst; 504 unsigned int dlen; 505 int ret, refcount; 506 struct writeback_control wbc = { 507 .sync_mode = WB_SYNC_NONE, 508 }; 509 510 /* extract swpentry from data */ 511 zhdr = zbud_map(pool, handle); 512 swpentry = zhdr->swpentry; /* here */ 513 zbud_unmap(pool, handle); 514 tree = zswap_trees[swp_type(swpentry)]; 515 offset = swp_offset(swpentry); 516 BUG_ON(pool != tree->pool); 517 518 /* find and ref zswap entry */ 519 spin_lock(&tree->lock); 520 entry = zswap_rb_search(&tree->rbroot, offset); 521 if (!entry) { 522 /* entry was invalidated */ 523 spin_unlock(&tree->lock); 524 return 0; 525 } 526 zswap_entry_get(entry); 527 spin_unlock(&tree->lock); 528 BUG_ON(offset != entry->offset); 529 530 /* try to allocate swap cache page */ 531 switch (zswap_get_swap_cache_page(swpentry, &page)) { 532 case ZSWAP_SWAPCACHE_NOMEM: /* no memory */ 533 ret = -ENOMEM; 534 goto fail; 535 536 case ZSWAP_SWAPCACHE_EXIST: /* page is unlocked */ 537 /* page is already in the swap cache, ignore for now */ 538 page_cache_release(page); 539 ret = -EEXIST; 540 goto fail; 541 542 case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 543 /* decompress */ 544 dlen = PAGE_SIZE; 545 src = (u8 *)zbud_map(tree->pool, entry->handle) + 546 sizeof(struct zswap_header); 547 dst = kmap_atomic(page); 548 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, 549 entry->length, dst, &dlen); 550 kunmap_atomic(dst); 551 zbud_unmap(tree->pool, entry->handle); 552 BUG_ON(ret); 553 BUG_ON(dlen != PAGE_SIZE); 554 555 /* page is up to date */ 556 SetPageUptodate(page); 557 } 558 559 /* start writeback */ 560 __swap_writepage(page, &wbc, end_swap_bio_write); 561 page_cache_release(page); 562 zswap_written_back_pages++; 563 564 spin_lock(&tree->lock); 565 566 /* drop local reference */ 567 zswap_entry_put(entry); 568 /* drop the initial reference from entry creation */ 569 refcount = zswap_entry_put(entry); 570 571 /* 572 * There are three possible values for refcount here: 573 * (1) refcount is 1, load is in progress, unlink from rbtree, 574 * load will free 575 * (2) refcount is 0, (normal case) entry is valid, 576 * remove from rbtree and free entry 577 * (3) refcount is -1, invalidate happened during writeback, 578 * free entry 579 */ 580 if (refcount >= 0) { 581 /* no invalidate yet, remove from rbtree */ 582 rb_erase(&entry->rbnode, &tree->rbroot); 583 } 584 spin_unlock(&tree->lock); 585 if (refcount <= 0) { 586 /* free the entry */ 587 zswap_free_entry(tree, entry); 588 return 0; 589 } 590 return -EAGAIN; 591 592 fail: 593 spin_lock(&tree->lock); 594 zswap_entry_put(entry); 595 spin_unlock(&tree->lock); 596 return ret; 597 } 598 599 /********************************* 600 * frontswap hooks 601 **********************************/ 602 /* attempts to compress and store an single page */ 603 static int zswap_frontswap_store(unsigned type, pgoff_t offset, 604 struct page *page) 605 { 606 struct zswap_tree *tree = zswap_trees[type]; 607 struct zswap_entry *entry, *dupentry; 608 int ret; 609 unsigned int dlen = PAGE_SIZE, len; 610 unsigned long handle; 611 char *buf; 612 u8 *src, *dst; 613 struct zswap_header *zhdr; 614 615 if (!tree) { 616 ret = -ENODEV; 617 goto reject; 618 } 619 620 /* reclaim space if needed */ 621 if (zswap_is_full()) { 622 zswap_pool_limit_hit++; 623 if (zbud_reclaim_page(tree->pool, 8)) { 624 zswap_reject_reclaim_fail++; 625 ret = -ENOMEM; 626 goto reject; 627 } 628 } 629 630 /* allocate entry */ 631 entry = zswap_entry_cache_alloc(GFP_KERNEL); 632 if (!entry) { 633 zswap_reject_kmemcache_fail++; 634 ret = -ENOMEM; 635 goto reject; 636 } 637 638 /* compress */ 639 dst = get_cpu_var(zswap_dstmem); 640 src = kmap_atomic(page); 641 ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen); 642 kunmap_atomic(src); 643 if (ret) { 644 ret = -EINVAL; 645 goto freepage; 646 } 647 648 /* store */ 649 len = dlen + sizeof(struct zswap_header); 650 ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN, 651 &handle); 652 if (ret == -ENOSPC) { 653 zswap_reject_compress_poor++; 654 goto freepage; 655 } 656 if (ret) { 657 zswap_reject_alloc_fail++; 658 goto freepage; 659 } 660 zhdr = zbud_map(tree->pool, handle); 661 zhdr->swpentry = swp_entry(type, offset); 662 buf = (u8 *)(zhdr + 1); 663 memcpy(buf, dst, dlen); 664 zbud_unmap(tree->pool, handle); 665 put_cpu_var(zswap_dstmem); 666 667 /* populate entry */ 668 entry->offset = offset; 669 entry->handle = handle; 670 entry->length = dlen; 671 672 /* map */ 673 spin_lock(&tree->lock); 674 do { 675 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry); 676 if (ret == -EEXIST) { 677 zswap_duplicate_entry++; 678 /* remove from rbtree */ 679 rb_erase(&dupentry->rbnode, &tree->rbroot); 680 if (!zswap_entry_put(dupentry)) { 681 /* free */ 682 zswap_free_entry(tree, dupentry); 683 } 684 } 685 } while (ret == -EEXIST); 686 spin_unlock(&tree->lock); 687 688 /* update stats */ 689 atomic_inc(&zswap_stored_pages); 690 zswap_pool_pages = zbud_get_pool_size(tree->pool); 691 692 return 0; 693 694 freepage: 695 put_cpu_var(zswap_dstmem); 696 zswap_entry_cache_free(entry); 697 reject: 698 return ret; 699 } 700 701 /* 702 * returns 0 if the page was successfully decompressed 703 * return -1 on entry not found or error 704 */ 705 static int zswap_frontswap_load(unsigned type, pgoff_t offset, 706 struct page *page) 707 { 708 struct zswap_tree *tree = zswap_trees[type]; 709 struct zswap_entry *entry; 710 u8 *src, *dst; 711 unsigned int dlen; 712 int refcount, ret; 713 714 /* find */ 715 spin_lock(&tree->lock); 716 entry = zswap_rb_search(&tree->rbroot, offset); 717 if (!entry) { 718 /* entry was written back */ 719 spin_unlock(&tree->lock); 720 return -1; 721 } 722 zswap_entry_get(entry); 723 spin_unlock(&tree->lock); 724 725 /* decompress */ 726 dlen = PAGE_SIZE; 727 src = (u8 *)zbud_map(tree->pool, entry->handle) + 728 sizeof(struct zswap_header); 729 dst = kmap_atomic(page); 730 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, 731 dst, &dlen); 732 kunmap_atomic(dst); 733 zbud_unmap(tree->pool, entry->handle); 734 BUG_ON(ret); 735 736 spin_lock(&tree->lock); 737 refcount = zswap_entry_put(entry); 738 if (likely(refcount)) { 739 spin_unlock(&tree->lock); 740 return 0; 741 } 742 spin_unlock(&tree->lock); 743 744 /* 745 * We don't have to unlink from the rbtree because 746 * zswap_writeback_entry() or zswap_frontswap_invalidate page() 747 * has already done this for us if we are the last reference. 748 */ 749 /* free */ 750 751 zswap_free_entry(tree, entry); 752 753 return 0; 754 } 755 756 /* frees an entry in zswap */ 757 static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset) 758 { 759 struct zswap_tree *tree = zswap_trees[type]; 760 struct zswap_entry *entry; 761 int refcount; 762 763 /* find */ 764 spin_lock(&tree->lock); 765 entry = zswap_rb_search(&tree->rbroot, offset); 766 if (!entry) { 767 /* entry was written back */ 768 spin_unlock(&tree->lock); 769 return; 770 } 771 772 /* remove from rbtree */ 773 rb_erase(&entry->rbnode, &tree->rbroot); 774 775 /* drop the initial reference from entry creation */ 776 refcount = zswap_entry_put(entry); 777 778 spin_unlock(&tree->lock); 779 780 if (refcount) { 781 /* writeback in progress, writeback will free */ 782 return; 783 } 784 785 /* free */ 786 zswap_free_entry(tree, entry); 787 } 788 789 /* frees all zswap entries for the given swap type */ 790 static void zswap_frontswap_invalidate_area(unsigned type) 791 { 792 struct zswap_tree *tree = zswap_trees[type]; 793 struct zswap_entry *entry, *n; 794 795 if (!tree) 796 return; 797 798 /* walk the tree and free everything */ 799 spin_lock(&tree->lock); 800 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) { 801 zbud_free(tree->pool, entry->handle); 802 zswap_entry_cache_free(entry); 803 atomic_dec(&zswap_stored_pages); 804 } 805 tree->rbroot = RB_ROOT; 806 spin_unlock(&tree->lock); 807 808 zbud_destroy_pool(tree->pool); 809 kfree(tree); 810 zswap_trees[type] = NULL; 811 } 812 813 static struct zbud_ops zswap_zbud_ops = { 814 .evict = zswap_writeback_entry 815 }; 816 817 static void zswap_frontswap_init(unsigned type) 818 { 819 struct zswap_tree *tree; 820 821 tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL); 822 if (!tree) 823 goto err; 824 tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops); 825 if (!tree->pool) 826 goto freetree; 827 tree->rbroot = RB_ROOT; 828 spin_lock_init(&tree->lock); 829 zswap_trees[type] = tree; 830 return; 831 832 freetree: 833 kfree(tree); 834 err: 835 pr_err("alloc failed, zswap disabled for swap type %d\n", type); 836 } 837 838 static struct frontswap_ops zswap_frontswap_ops = { 839 .store = zswap_frontswap_store, 840 .load = zswap_frontswap_load, 841 .invalidate_page = zswap_frontswap_invalidate_page, 842 .invalidate_area = zswap_frontswap_invalidate_area, 843 .init = zswap_frontswap_init 844 }; 845 846 /********************************* 847 * debugfs functions 848 **********************************/ 849 #ifdef CONFIG_DEBUG_FS 850 #include <linux/debugfs.h> 851 852 static struct dentry *zswap_debugfs_root; 853 854 static int __init zswap_debugfs_init(void) 855 { 856 if (!debugfs_initialized()) 857 return -ENODEV; 858 859 zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 860 if (!zswap_debugfs_root) 861 return -ENOMEM; 862 863 debugfs_create_u64("pool_limit_hit", S_IRUGO, 864 zswap_debugfs_root, &zswap_pool_limit_hit); 865 debugfs_create_u64("reject_reclaim_fail", S_IRUGO, 866 zswap_debugfs_root, &zswap_reject_reclaim_fail); 867 debugfs_create_u64("reject_alloc_fail", S_IRUGO, 868 zswap_debugfs_root, &zswap_reject_alloc_fail); 869 debugfs_create_u64("reject_kmemcache_fail", S_IRUGO, 870 zswap_debugfs_root, &zswap_reject_kmemcache_fail); 871 debugfs_create_u64("reject_compress_poor", S_IRUGO, 872 zswap_debugfs_root, &zswap_reject_compress_poor); 873 debugfs_create_u64("written_back_pages", S_IRUGO, 874 zswap_debugfs_root, &zswap_written_back_pages); 875 debugfs_create_u64("duplicate_entry", S_IRUGO, 876 zswap_debugfs_root, &zswap_duplicate_entry); 877 debugfs_create_u64("pool_pages", S_IRUGO, 878 zswap_debugfs_root, &zswap_pool_pages); 879 debugfs_create_atomic_t("stored_pages", S_IRUGO, 880 zswap_debugfs_root, &zswap_stored_pages); 881 882 return 0; 883 } 884 885 static void __exit zswap_debugfs_exit(void) 886 { 887 debugfs_remove_recursive(zswap_debugfs_root); 888 } 889 #else 890 static int __init zswap_debugfs_init(void) 891 { 892 return 0; 893 } 894 895 static void __exit zswap_debugfs_exit(void) { } 896 #endif 897 898 /********************************* 899 * module init and exit 900 **********************************/ 901 static int __init init_zswap(void) 902 { 903 if (!zswap_enabled) 904 return 0; 905 906 pr_info("loading zswap\n"); 907 if (zswap_entry_cache_create()) { 908 pr_err("entry cache creation failed\n"); 909 goto error; 910 } 911 if (zswap_comp_init()) { 912 pr_err("compressor initialization failed\n"); 913 goto compfail; 914 } 915 if (zswap_cpu_init()) { 916 pr_err("per-cpu initialization failed\n"); 917 goto pcpufail; 918 } 919 frontswap_register_ops(&zswap_frontswap_ops); 920 if (zswap_debugfs_init()) 921 pr_warn("debugfs initialization failed\n"); 922 return 0; 923 pcpufail: 924 zswap_comp_exit(); 925 compfail: 926 zswap_entry_cache_destory(); 927 error: 928 return -ENOMEM; 929 } 930 /* must be late so crypto has time to come up */ 931 late_initcall(init_zswap); 932 933 MODULE_LICENSE("GPL"); 934 MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>"); 935 MODULE_DESCRIPTION("Compressed cache for swap pages"); 936