1 /* 2 * zswap.c - zswap driver file 3 * 4 * zswap is a backend for frontswap that takes pages that are in the process 5 * of being swapped out and attempts to compress and store them in a 6 * RAM-based memory pool. This can result in a significant I/O reduction on 7 * the swap device and, in the case where decompressing from RAM is faster 8 * than reading from the swap device, can also improve workload performance. 9 * 10 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 2 15 * of the License, or (at your option) any later version. 16 * 17 * This program is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU General Public License for more details. 21 */ 22 23 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 24 25 #include <linux/module.h> 26 #include <linux/cpu.h> 27 #include <linux/highmem.h> 28 #include <linux/slab.h> 29 #include <linux/spinlock.h> 30 #include <linux/types.h> 31 #include <linux/atomic.h> 32 #include <linux/frontswap.h> 33 #include <linux/rbtree.h> 34 #include <linux/swap.h> 35 #include <linux/crypto.h> 36 #include <linux/mempool.h> 37 #include <linux/zbud.h> 38 39 #include <linux/mm_types.h> 40 #include <linux/page-flags.h> 41 #include <linux/swapops.h> 42 #include <linux/writeback.h> 43 #include <linux/pagemap.h> 44 45 /********************************* 46 * statistics 47 **********************************/ 48 /* Number of memory pages used by the compressed pool */ 49 static u64 zswap_pool_pages; 50 /* The number of compressed pages currently stored in zswap */ 51 static atomic_t zswap_stored_pages = ATOMIC_INIT(0); 52 53 /* 54 * The statistics below are not protected from concurrent access for 55 * performance reasons so they may not be a 100% accurate. However, 56 * they do provide useful information on roughly how many times a 57 * certain event is occurring. 58 */ 59 60 /* Pool limit was hit (see zswap_max_pool_percent) */ 61 static u64 zswap_pool_limit_hit; 62 /* Pages written back when pool limit was reached */ 63 static u64 zswap_written_back_pages; 64 /* Store failed due to a reclaim failure after pool limit was reached */ 65 static u64 zswap_reject_reclaim_fail; 66 /* Compressed page was too big for the allocator to (optimally) store */ 67 static u64 zswap_reject_compress_poor; 68 /* Store failed because underlying allocator could not get memory */ 69 static u64 zswap_reject_alloc_fail; 70 /* Store failed because the entry metadata could not be allocated (rare) */ 71 static u64 zswap_reject_kmemcache_fail; 72 /* Duplicate store was encountered (rare) */ 73 static u64 zswap_duplicate_entry; 74 75 /********************************* 76 * tunables 77 **********************************/ 78 /* Enable/disable zswap (disabled by default, fixed at boot for now) */ 79 static bool zswap_enabled __read_mostly; 80 module_param_named(enabled, zswap_enabled, bool, 0444); 81 82 /* Compressor to be used by zswap (fixed at boot for now) */ 83 #define ZSWAP_COMPRESSOR_DEFAULT "lzo" 84 static char *zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; 85 module_param_named(compressor, zswap_compressor, charp, 0444); 86 87 /* The maximum percentage of memory that the compressed pool can occupy */ 88 static unsigned int zswap_max_pool_percent = 20; 89 module_param_named(max_pool_percent, 90 zswap_max_pool_percent, uint, 0644); 91 92 /* zbud_pool is shared by all of zswap backend */ 93 static struct zbud_pool *zswap_pool; 94 95 /********************************* 96 * compression functions 97 **********************************/ 98 /* per-cpu compression transforms */ 99 static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms; 100 101 enum comp_op { 102 ZSWAP_COMPOP_COMPRESS, 103 ZSWAP_COMPOP_DECOMPRESS 104 }; 105 106 static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen, 107 u8 *dst, unsigned int *dlen) 108 { 109 struct crypto_comp *tfm; 110 int ret; 111 112 tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu()); 113 switch (op) { 114 case ZSWAP_COMPOP_COMPRESS: 115 ret = crypto_comp_compress(tfm, src, slen, dst, dlen); 116 break; 117 case ZSWAP_COMPOP_DECOMPRESS: 118 ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); 119 break; 120 default: 121 ret = -EINVAL; 122 } 123 124 put_cpu(); 125 return ret; 126 } 127 128 static int __init zswap_comp_init(void) 129 { 130 if (!crypto_has_comp(zswap_compressor, 0, 0)) { 131 pr_info("%s compressor not available\n", zswap_compressor); 132 /* fall back to default compressor */ 133 zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; 134 if (!crypto_has_comp(zswap_compressor, 0, 0)) 135 /* can't even load the default compressor */ 136 return -ENODEV; 137 } 138 pr_info("using %s compressor\n", zswap_compressor); 139 140 /* alloc percpu transforms */ 141 zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); 142 if (!zswap_comp_pcpu_tfms) 143 return -ENOMEM; 144 return 0; 145 } 146 147 static void zswap_comp_exit(void) 148 { 149 /* free percpu transforms */ 150 if (zswap_comp_pcpu_tfms) 151 free_percpu(zswap_comp_pcpu_tfms); 152 } 153 154 /********************************* 155 * data structures 156 **********************************/ 157 /* 158 * struct zswap_entry 159 * 160 * This structure contains the metadata for tracking a single compressed 161 * page within zswap. 162 * 163 * rbnode - links the entry into red-black tree for the appropriate swap type 164 * refcount - the number of outstanding reference to the entry. This is needed 165 * to protect against premature freeing of the entry by code 166 * concurrent calls to load, invalidate, and writeback. The lock 167 * for the zswap_tree structure that contains the entry must 168 * be held while changing the refcount. Since the lock must 169 * be held, there is no reason to also make refcount atomic. 170 * offset - the swap offset for the entry. Index into the red-black tree. 171 * handle - zbud allocation handle that stores the compressed page data 172 * length - the length in bytes of the compressed page data. Needed during 173 * decompression 174 */ 175 struct zswap_entry { 176 struct rb_node rbnode; 177 pgoff_t offset; 178 int refcount; 179 unsigned int length; 180 unsigned long handle; 181 }; 182 183 struct zswap_header { 184 swp_entry_t swpentry; 185 }; 186 187 /* 188 * The tree lock in the zswap_tree struct protects a few things: 189 * - the rbtree 190 * - the refcount field of each entry in the tree 191 */ 192 struct zswap_tree { 193 struct rb_root rbroot; 194 spinlock_t lock; 195 }; 196 197 static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 198 199 /********************************* 200 * zswap entry functions 201 **********************************/ 202 static struct kmem_cache *zswap_entry_cache; 203 204 static int zswap_entry_cache_create(void) 205 { 206 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 207 return zswap_entry_cache == NULL; 208 } 209 210 static void zswap_entry_cache_destory(void) 211 { 212 kmem_cache_destroy(zswap_entry_cache); 213 } 214 215 static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp) 216 { 217 struct zswap_entry *entry; 218 entry = kmem_cache_alloc(zswap_entry_cache, gfp); 219 if (!entry) 220 return NULL; 221 entry->refcount = 1; 222 RB_CLEAR_NODE(&entry->rbnode); 223 return entry; 224 } 225 226 static void zswap_entry_cache_free(struct zswap_entry *entry) 227 { 228 kmem_cache_free(zswap_entry_cache, entry); 229 } 230 231 /********************************* 232 * rbtree functions 233 **********************************/ 234 static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 235 { 236 struct rb_node *node = root->rb_node; 237 struct zswap_entry *entry; 238 239 while (node) { 240 entry = rb_entry(node, struct zswap_entry, rbnode); 241 if (entry->offset > offset) 242 node = node->rb_left; 243 else if (entry->offset < offset) 244 node = node->rb_right; 245 else 246 return entry; 247 } 248 return NULL; 249 } 250 251 /* 252 * In the case that a entry with the same offset is found, a pointer to 253 * the existing entry is stored in dupentry and the function returns -EEXIST 254 */ 255 static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 256 struct zswap_entry **dupentry) 257 { 258 struct rb_node **link = &root->rb_node, *parent = NULL; 259 struct zswap_entry *myentry; 260 261 while (*link) { 262 parent = *link; 263 myentry = rb_entry(parent, struct zswap_entry, rbnode); 264 if (myentry->offset > entry->offset) 265 link = &(*link)->rb_left; 266 else if (myentry->offset < entry->offset) 267 link = &(*link)->rb_right; 268 else { 269 *dupentry = myentry; 270 return -EEXIST; 271 } 272 } 273 rb_link_node(&entry->rbnode, parent, link); 274 rb_insert_color(&entry->rbnode, root); 275 return 0; 276 } 277 278 static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 279 { 280 if (!RB_EMPTY_NODE(&entry->rbnode)) { 281 rb_erase(&entry->rbnode, root); 282 RB_CLEAR_NODE(&entry->rbnode); 283 } 284 } 285 286 /* 287 * Carries out the common pattern of freeing and entry's zbud allocation, 288 * freeing the entry itself, and decrementing the number of stored pages. 289 */ 290 static void zswap_free_entry(struct zswap_entry *entry) 291 { 292 zbud_free(zswap_pool, entry->handle); 293 zswap_entry_cache_free(entry); 294 atomic_dec(&zswap_stored_pages); 295 zswap_pool_pages = zbud_get_pool_size(zswap_pool); 296 } 297 298 /* caller must hold the tree lock */ 299 static void zswap_entry_get(struct zswap_entry *entry) 300 { 301 entry->refcount++; 302 } 303 304 /* caller must hold the tree lock 305 * remove from the tree and free it, if nobody reference the entry 306 */ 307 static void zswap_entry_put(struct zswap_tree *tree, 308 struct zswap_entry *entry) 309 { 310 int refcount = --entry->refcount; 311 312 BUG_ON(refcount < 0); 313 if (refcount == 0) { 314 zswap_rb_erase(&tree->rbroot, entry); 315 zswap_free_entry(entry); 316 } 317 } 318 319 /* caller must hold the tree lock */ 320 static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, 321 pgoff_t offset) 322 { 323 struct zswap_entry *entry = NULL; 324 325 entry = zswap_rb_search(root, offset); 326 if (entry) 327 zswap_entry_get(entry); 328 329 return entry; 330 } 331 332 /********************************* 333 * per-cpu code 334 **********************************/ 335 static DEFINE_PER_CPU(u8 *, zswap_dstmem); 336 337 static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu) 338 { 339 struct crypto_comp *tfm; 340 u8 *dst; 341 342 switch (action) { 343 case CPU_UP_PREPARE: 344 tfm = crypto_alloc_comp(zswap_compressor, 0, 0); 345 if (IS_ERR(tfm)) { 346 pr_err("can't allocate compressor transform\n"); 347 return NOTIFY_BAD; 348 } 349 *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm; 350 dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); 351 if (!dst) { 352 pr_err("can't allocate compressor buffer\n"); 353 crypto_free_comp(tfm); 354 *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; 355 return NOTIFY_BAD; 356 } 357 per_cpu(zswap_dstmem, cpu) = dst; 358 break; 359 case CPU_DEAD: 360 case CPU_UP_CANCELED: 361 tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu); 362 if (tfm) { 363 crypto_free_comp(tfm); 364 *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; 365 } 366 dst = per_cpu(zswap_dstmem, cpu); 367 kfree(dst); 368 per_cpu(zswap_dstmem, cpu) = NULL; 369 break; 370 default: 371 break; 372 } 373 return NOTIFY_OK; 374 } 375 376 static int zswap_cpu_notifier(struct notifier_block *nb, 377 unsigned long action, void *pcpu) 378 { 379 unsigned long cpu = (unsigned long)pcpu; 380 return __zswap_cpu_notifier(action, cpu); 381 } 382 383 static struct notifier_block zswap_cpu_notifier_block = { 384 .notifier_call = zswap_cpu_notifier 385 }; 386 387 static int zswap_cpu_init(void) 388 { 389 unsigned long cpu; 390 391 cpu_notifier_register_begin(); 392 for_each_online_cpu(cpu) 393 if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK) 394 goto cleanup; 395 __register_cpu_notifier(&zswap_cpu_notifier_block); 396 cpu_notifier_register_done(); 397 return 0; 398 399 cleanup: 400 for_each_online_cpu(cpu) 401 __zswap_cpu_notifier(CPU_UP_CANCELED, cpu); 402 cpu_notifier_register_done(); 403 return -ENOMEM; 404 } 405 406 /********************************* 407 * helpers 408 **********************************/ 409 static bool zswap_is_full(void) 410 { 411 return totalram_pages * zswap_max_pool_percent / 100 < 412 zswap_pool_pages; 413 } 414 415 /********************************* 416 * writeback code 417 **********************************/ 418 /* return enum for zswap_get_swap_cache_page */ 419 enum zswap_get_swap_ret { 420 ZSWAP_SWAPCACHE_NEW, 421 ZSWAP_SWAPCACHE_EXIST, 422 ZSWAP_SWAPCACHE_FAIL, 423 }; 424 425 /* 426 * zswap_get_swap_cache_page 427 * 428 * This is an adaption of read_swap_cache_async() 429 * 430 * This function tries to find a page with the given swap entry 431 * in the swapper_space address space (the swap cache). If the page 432 * is found, it is returned in retpage. Otherwise, a page is allocated, 433 * added to the swap cache, and returned in retpage. 434 * 435 * If success, the swap cache page is returned in retpage 436 * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache 437 * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated, 438 * the new page is added to swapcache and locked 439 * Returns ZSWAP_SWAPCACHE_FAIL on error 440 */ 441 static int zswap_get_swap_cache_page(swp_entry_t entry, 442 struct page **retpage) 443 { 444 struct page *found_page, *new_page = NULL; 445 struct address_space *swapper_space = swap_address_space(entry); 446 int err; 447 448 *retpage = NULL; 449 do { 450 /* 451 * First check the swap cache. Since this is normally 452 * called after lookup_swap_cache() failed, re-calling 453 * that would confuse statistics. 454 */ 455 found_page = find_get_page(swapper_space, entry.val); 456 if (found_page) 457 break; 458 459 /* 460 * Get a new page to read into from swap. 461 */ 462 if (!new_page) { 463 new_page = alloc_page(GFP_KERNEL); 464 if (!new_page) 465 break; /* Out of memory */ 466 } 467 468 /* 469 * call radix_tree_preload() while we can wait. 470 */ 471 err = radix_tree_preload(GFP_KERNEL); 472 if (err) 473 break; 474 475 /* 476 * Swap entry may have been freed since our caller observed it. 477 */ 478 err = swapcache_prepare(entry); 479 if (err == -EEXIST) { /* seems racy */ 480 radix_tree_preload_end(); 481 continue; 482 } 483 if (err) { /* swp entry is obsolete ? */ 484 radix_tree_preload_end(); 485 break; 486 } 487 488 /* May fail (-ENOMEM) if radix-tree node allocation failed. */ 489 __set_page_locked(new_page); 490 SetPageSwapBacked(new_page); 491 err = __add_to_swap_cache(new_page, entry); 492 if (likely(!err)) { 493 radix_tree_preload_end(); 494 lru_cache_add_anon(new_page); 495 *retpage = new_page; 496 return ZSWAP_SWAPCACHE_NEW; 497 } 498 radix_tree_preload_end(); 499 ClearPageSwapBacked(new_page); 500 __clear_page_locked(new_page); 501 /* 502 * add_to_swap_cache() doesn't return -EEXIST, so we can safely 503 * clear SWAP_HAS_CACHE flag. 504 */ 505 swapcache_free(entry, NULL); 506 } while (err != -ENOMEM); 507 508 if (new_page) 509 page_cache_release(new_page); 510 if (!found_page) 511 return ZSWAP_SWAPCACHE_FAIL; 512 *retpage = found_page; 513 return ZSWAP_SWAPCACHE_EXIST; 514 } 515 516 /* 517 * Attempts to free an entry by adding a page to the swap cache, 518 * decompressing the entry data into the page, and issuing a 519 * bio write to write the page back to the swap device. 520 * 521 * This can be thought of as a "resumed writeback" of the page 522 * to the swap device. We are basically resuming the same swap 523 * writeback path that was intercepted with the frontswap_store() 524 * in the first place. After the page has been decompressed into 525 * the swap cache, the compressed version stored by zswap can be 526 * freed. 527 */ 528 static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle) 529 { 530 struct zswap_header *zhdr; 531 swp_entry_t swpentry; 532 struct zswap_tree *tree; 533 pgoff_t offset; 534 struct zswap_entry *entry; 535 struct page *page; 536 u8 *src, *dst; 537 unsigned int dlen; 538 int ret; 539 struct writeback_control wbc = { 540 .sync_mode = WB_SYNC_NONE, 541 }; 542 543 /* extract swpentry from data */ 544 zhdr = zbud_map(pool, handle); 545 swpentry = zhdr->swpentry; /* here */ 546 zbud_unmap(pool, handle); 547 tree = zswap_trees[swp_type(swpentry)]; 548 offset = swp_offset(swpentry); 549 550 /* find and ref zswap entry */ 551 spin_lock(&tree->lock); 552 entry = zswap_entry_find_get(&tree->rbroot, offset); 553 if (!entry) { 554 /* entry was invalidated */ 555 spin_unlock(&tree->lock); 556 return 0; 557 } 558 spin_unlock(&tree->lock); 559 BUG_ON(offset != entry->offset); 560 561 /* try to allocate swap cache page */ 562 switch (zswap_get_swap_cache_page(swpentry, &page)) { 563 case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */ 564 ret = -ENOMEM; 565 goto fail; 566 567 case ZSWAP_SWAPCACHE_EXIST: 568 /* page is already in the swap cache, ignore for now */ 569 page_cache_release(page); 570 ret = -EEXIST; 571 goto fail; 572 573 case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 574 /* decompress */ 575 dlen = PAGE_SIZE; 576 src = (u8 *)zbud_map(zswap_pool, entry->handle) + 577 sizeof(struct zswap_header); 578 dst = kmap_atomic(page); 579 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, 580 entry->length, dst, &dlen); 581 kunmap_atomic(dst); 582 zbud_unmap(zswap_pool, entry->handle); 583 BUG_ON(ret); 584 BUG_ON(dlen != PAGE_SIZE); 585 586 /* page is up to date */ 587 SetPageUptodate(page); 588 } 589 590 /* move it to the tail of the inactive list after end_writeback */ 591 SetPageReclaim(page); 592 593 /* start writeback */ 594 __swap_writepage(page, &wbc, end_swap_bio_write); 595 page_cache_release(page); 596 zswap_written_back_pages++; 597 598 spin_lock(&tree->lock); 599 /* drop local reference */ 600 zswap_entry_put(tree, entry); 601 602 /* 603 * There are two possible situations for entry here: 604 * (1) refcount is 1(normal case), entry is valid and on the tree 605 * (2) refcount is 0, entry is freed and not on the tree 606 * because invalidate happened during writeback 607 * search the tree and free the entry if find entry 608 */ 609 if (entry == zswap_rb_search(&tree->rbroot, offset)) 610 zswap_entry_put(tree, entry); 611 spin_unlock(&tree->lock); 612 613 goto end; 614 615 /* 616 * if we get here due to ZSWAP_SWAPCACHE_EXIST 617 * a load may happening concurrently 618 * it is safe and okay to not free the entry 619 * if we free the entry in the following put 620 * it it either okay to return !0 621 */ 622 fail: 623 spin_lock(&tree->lock); 624 zswap_entry_put(tree, entry); 625 spin_unlock(&tree->lock); 626 627 end: 628 return ret; 629 } 630 631 /********************************* 632 * frontswap hooks 633 **********************************/ 634 /* attempts to compress and store an single page */ 635 static int zswap_frontswap_store(unsigned type, pgoff_t offset, 636 struct page *page) 637 { 638 struct zswap_tree *tree = zswap_trees[type]; 639 struct zswap_entry *entry, *dupentry; 640 int ret; 641 unsigned int dlen = PAGE_SIZE, len; 642 unsigned long handle; 643 char *buf; 644 u8 *src, *dst; 645 struct zswap_header *zhdr; 646 647 if (!tree) { 648 ret = -ENODEV; 649 goto reject; 650 } 651 652 /* reclaim space if needed */ 653 if (zswap_is_full()) { 654 zswap_pool_limit_hit++; 655 if (zbud_reclaim_page(zswap_pool, 8)) { 656 zswap_reject_reclaim_fail++; 657 ret = -ENOMEM; 658 goto reject; 659 } 660 } 661 662 /* allocate entry */ 663 entry = zswap_entry_cache_alloc(GFP_KERNEL); 664 if (!entry) { 665 zswap_reject_kmemcache_fail++; 666 ret = -ENOMEM; 667 goto reject; 668 } 669 670 /* compress */ 671 dst = get_cpu_var(zswap_dstmem); 672 src = kmap_atomic(page); 673 ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen); 674 kunmap_atomic(src); 675 if (ret) { 676 ret = -EINVAL; 677 goto freepage; 678 } 679 680 /* store */ 681 len = dlen + sizeof(struct zswap_header); 682 ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN, 683 &handle); 684 if (ret == -ENOSPC) { 685 zswap_reject_compress_poor++; 686 goto freepage; 687 } 688 if (ret) { 689 zswap_reject_alloc_fail++; 690 goto freepage; 691 } 692 zhdr = zbud_map(zswap_pool, handle); 693 zhdr->swpentry = swp_entry(type, offset); 694 buf = (u8 *)(zhdr + 1); 695 memcpy(buf, dst, dlen); 696 zbud_unmap(zswap_pool, handle); 697 put_cpu_var(zswap_dstmem); 698 699 /* populate entry */ 700 entry->offset = offset; 701 entry->handle = handle; 702 entry->length = dlen; 703 704 /* map */ 705 spin_lock(&tree->lock); 706 do { 707 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry); 708 if (ret == -EEXIST) { 709 zswap_duplicate_entry++; 710 /* remove from rbtree */ 711 zswap_rb_erase(&tree->rbroot, dupentry); 712 zswap_entry_put(tree, dupentry); 713 } 714 } while (ret == -EEXIST); 715 spin_unlock(&tree->lock); 716 717 /* update stats */ 718 atomic_inc(&zswap_stored_pages); 719 zswap_pool_pages = zbud_get_pool_size(zswap_pool); 720 721 return 0; 722 723 freepage: 724 put_cpu_var(zswap_dstmem); 725 zswap_entry_cache_free(entry); 726 reject: 727 return ret; 728 } 729 730 /* 731 * returns 0 if the page was successfully decompressed 732 * return -1 on entry not found or error 733 */ 734 static int zswap_frontswap_load(unsigned type, pgoff_t offset, 735 struct page *page) 736 { 737 struct zswap_tree *tree = zswap_trees[type]; 738 struct zswap_entry *entry; 739 u8 *src, *dst; 740 unsigned int dlen; 741 int ret; 742 743 /* find */ 744 spin_lock(&tree->lock); 745 entry = zswap_entry_find_get(&tree->rbroot, offset); 746 if (!entry) { 747 /* entry was written back */ 748 spin_unlock(&tree->lock); 749 return -1; 750 } 751 spin_unlock(&tree->lock); 752 753 /* decompress */ 754 dlen = PAGE_SIZE; 755 src = (u8 *)zbud_map(zswap_pool, entry->handle) + 756 sizeof(struct zswap_header); 757 dst = kmap_atomic(page); 758 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, 759 dst, &dlen); 760 kunmap_atomic(dst); 761 zbud_unmap(zswap_pool, entry->handle); 762 BUG_ON(ret); 763 764 spin_lock(&tree->lock); 765 zswap_entry_put(tree, entry); 766 spin_unlock(&tree->lock); 767 768 return 0; 769 } 770 771 /* frees an entry in zswap */ 772 static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset) 773 { 774 struct zswap_tree *tree = zswap_trees[type]; 775 struct zswap_entry *entry; 776 777 /* find */ 778 spin_lock(&tree->lock); 779 entry = zswap_rb_search(&tree->rbroot, offset); 780 if (!entry) { 781 /* entry was written back */ 782 spin_unlock(&tree->lock); 783 return; 784 } 785 786 /* remove from rbtree */ 787 zswap_rb_erase(&tree->rbroot, entry); 788 789 /* drop the initial reference from entry creation */ 790 zswap_entry_put(tree, entry); 791 792 spin_unlock(&tree->lock); 793 } 794 795 /* frees all zswap entries for the given swap type */ 796 static void zswap_frontswap_invalidate_area(unsigned type) 797 { 798 struct zswap_tree *tree = zswap_trees[type]; 799 struct zswap_entry *entry, *n; 800 801 if (!tree) 802 return; 803 804 /* walk the tree and free everything */ 805 spin_lock(&tree->lock); 806 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) 807 zswap_free_entry(entry); 808 tree->rbroot = RB_ROOT; 809 spin_unlock(&tree->lock); 810 kfree(tree); 811 zswap_trees[type] = NULL; 812 } 813 814 static struct zbud_ops zswap_zbud_ops = { 815 .evict = zswap_writeback_entry 816 }; 817 818 static void zswap_frontswap_init(unsigned type) 819 { 820 struct zswap_tree *tree; 821 822 tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL); 823 if (!tree) { 824 pr_err("alloc failed, zswap disabled for swap type %d\n", type); 825 return; 826 } 827 828 tree->rbroot = RB_ROOT; 829 spin_lock_init(&tree->lock); 830 zswap_trees[type] = tree; 831 } 832 833 static struct frontswap_ops zswap_frontswap_ops = { 834 .store = zswap_frontswap_store, 835 .load = zswap_frontswap_load, 836 .invalidate_page = zswap_frontswap_invalidate_page, 837 .invalidate_area = zswap_frontswap_invalidate_area, 838 .init = zswap_frontswap_init 839 }; 840 841 /********************************* 842 * debugfs functions 843 **********************************/ 844 #ifdef CONFIG_DEBUG_FS 845 #include <linux/debugfs.h> 846 847 static struct dentry *zswap_debugfs_root; 848 849 static int __init zswap_debugfs_init(void) 850 { 851 if (!debugfs_initialized()) 852 return -ENODEV; 853 854 zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 855 if (!zswap_debugfs_root) 856 return -ENOMEM; 857 858 debugfs_create_u64("pool_limit_hit", S_IRUGO, 859 zswap_debugfs_root, &zswap_pool_limit_hit); 860 debugfs_create_u64("reject_reclaim_fail", S_IRUGO, 861 zswap_debugfs_root, &zswap_reject_reclaim_fail); 862 debugfs_create_u64("reject_alloc_fail", S_IRUGO, 863 zswap_debugfs_root, &zswap_reject_alloc_fail); 864 debugfs_create_u64("reject_kmemcache_fail", S_IRUGO, 865 zswap_debugfs_root, &zswap_reject_kmemcache_fail); 866 debugfs_create_u64("reject_compress_poor", S_IRUGO, 867 zswap_debugfs_root, &zswap_reject_compress_poor); 868 debugfs_create_u64("written_back_pages", S_IRUGO, 869 zswap_debugfs_root, &zswap_written_back_pages); 870 debugfs_create_u64("duplicate_entry", S_IRUGO, 871 zswap_debugfs_root, &zswap_duplicate_entry); 872 debugfs_create_u64("pool_pages", S_IRUGO, 873 zswap_debugfs_root, &zswap_pool_pages); 874 debugfs_create_atomic_t("stored_pages", S_IRUGO, 875 zswap_debugfs_root, &zswap_stored_pages); 876 877 return 0; 878 } 879 880 static void __exit zswap_debugfs_exit(void) 881 { 882 debugfs_remove_recursive(zswap_debugfs_root); 883 } 884 #else 885 static int __init zswap_debugfs_init(void) 886 { 887 return 0; 888 } 889 890 static void __exit zswap_debugfs_exit(void) { } 891 #endif 892 893 /********************************* 894 * module init and exit 895 **********************************/ 896 static int __init init_zswap(void) 897 { 898 if (!zswap_enabled) 899 return 0; 900 901 pr_info("loading zswap\n"); 902 903 zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops); 904 if (!zswap_pool) { 905 pr_err("zbud pool creation failed\n"); 906 goto error; 907 } 908 909 if (zswap_entry_cache_create()) { 910 pr_err("entry cache creation failed\n"); 911 goto cachefail; 912 } 913 if (zswap_comp_init()) { 914 pr_err("compressor initialization failed\n"); 915 goto compfail; 916 } 917 if (zswap_cpu_init()) { 918 pr_err("per-cpu initialization failed\n"); 919 goto pcpufail; 920 } 921 922 frontswap_register_ops(&zswap_frontswap_ops); 923 if (zswap_debugfs_init()) 924 pr_warn("debugfs initialization failed\n"); 925 return 0; 926 pcpufail: 927 zswap_comp_exit(); 928 compfail: 929 zswap_entry_cache_destory(); 930 cachefail: 931 zbud_destroy_pool(zswap_pool); 932 error: 933 return -ENOMEM; 934 } 935 /* must be late so crypto has time to come up */ 936 late_initcall(init_zswap); 937 938 MODULE_LICENSE("GPL"); 939 MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>"); 940 MODULE_DESCRIPTION("Compressed cache for swap pages"); 941