1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * zswap.c - zswap driver file 4 * 5 * zswap is a backend for frontswap that takes pages that are in the process 6 * of being swapped out and attempts to compress and store them in a 7 * RAM-based memory pool. This can result in a significant I/O reduction on 8 * the swap device and, in the case where decompressing from RAM is faster 9 * than reading from the swap device, can also improve workload performance. 10 * 11 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/module.h> 17 #include <linux/cpu.h> 18 #include <linux/highmem.h> 19 #include <linux/slab.h> 20 #include <linux/spinlock.h> 21 #include <linux/types.h> 22 #include <linux/atomic.h> 23 #include <linux/frontswap.h> 24 #include <linux/rbtree.h> 25 #include <linux/swap.h> 26 #include <linux/crypto.h> 27 #include <linux/scatterlist.h> 28 #include <linux/mempool.h> 29 #include <linux/zpool.h> 30 #include <crypto/acompress.h> 31 32 #include <linux/mm_types.h> 33 #include <linux/page-flags.h> 34 #include <linux/swapops.h> 35 #include <linux/writeback.h> 36 #include <linux/pagemap.h> 37 #include <linux/workqueue.h> 38 39 #include "swap.h" 40 #include "internal.h" 41 42 /********************************* 43 * statistics 44 **********************************/ 45 /* Total bytes used by the compressed storage */ 46 u64 zswap_pool_total_size; 47 /* The number of compressed pages currently stored in zswap */ 48 atomic_t zswap_stored_pages = ATOMIC_INIT(0); 49 /* The number of same-value filled pages currently stored in zswap */ 50 static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); 51 52 /* 53 * The statistics below are not protected from concurrent access for 54 * performance reasons so they may not be a 100% accurate. However, 55 * they do provide useful information on roughly how many times a 56 * certain event is occurring. 57 */ 58 59 /* Pool limit was hit (see zswap_max_pool_percent) */ 60 static u64 zswap_pool_limit_hit; 61 /* Pages written back when pool limit was reached */ 62 static u64 zswap_written_back_pages; 63 /* Store failed due to a reclaim failure after pool limit was reached */ 64 static u64 zswap_reject_reclaim_fail; 65 /* Compressed page was too big for the allocator to (optimally) store */ 66 static u64 zswap_reject_compress_poor; 67 /* Store failed because underlying allocator could not get memory */ 68 static u64 zswap_reject_alloc_fail; 69 /* Store failed because the entry metadata could not be allocated (rare) */ 70 static u64 zswap_reject_kmemcache_fail; 71 /* Duplicate store was encountered (rare) */ 72 static u64 zswap_duplicate_entry; 73 74 /* Shrinker work queue */ 75 static struct workqueue_struct *shrink_wq; 76 /* Pool limit was hit, we need to calm down */ 77 static bool zswap_pool_reached_full; 78 79 /********************************* 80 * tunables 81 **********************************/ 82 83 #define ZSWAP_PARAM_UNSET "" 84 85 static int zswap_setup(void); 86 87 /* Enable/disable zswap */ 88 static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); 89 static int zswap_enabled_param_set(const char *, 90 const struct kernel_param *); 91 static const struct kernel_param_ops zswap_enabled_param_ops = { 92 .set = zswap_enabled_param_set, 93 .get = param_get_bool, 94 }; 95 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); 96 97 /* Crypto compressor to use */ 98 static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 99 static int zswap_compressor_param_set(const char *, 100 const struct kernel_param *); 101 static const struct kernel_param_ops zswap_compressor_param_ops = { 102 .set = zswap_compressor_param_set, 103 .get = param_get_charp, 104 .free = param_free_charp, 105 }; 106 module_param_cb(compressor, &zswap_compressor_param_ops, 107 &zswap_compressor, 0644); 108 109 /* Compressed storage zpool to use */ 110 static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 111 static int zswap_zpool_param_set(const char *, const struct kernel_param *); 112 static const struct kernel_param_ops zswap_zpool_param_ops = { 113 .set = zswap_zpool_param_set, 114 .get = param_get_charp, 115 .free = param_free_charp, 116 }; 117 module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); 118 119 /* The maximum percentage of memory that the compressed pool can occupy */ 120 static unsigned int zswap_max_pool_percent = 20; 121 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); 122 123 /* The threshold for accepting new pages after the max_pool_percent was hit */ 124 static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ 125 module_param_named(accept_threshold_percent, zswap_accept_thr_percent, 126 uint, 0644); 127 128 /* 129 * Enable/disable handling same-value filled pages (enabled by default). 130 * If disabled every page is considered non-same-value filled. 131 */ 132 static bool zswap_same_filled_pages_enabled = true; 133 module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled, 134 bool, 0644); 135 136 /* Enable/disable handling non-same-value filled pages (enabled by default) */ 137 static bool zswap_non_same_filled_pages_enabled = true; 138 module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled, 139 bool, 0644); 140 141 static bool zswap_exclusive_loads_enabled = IS_ENABLED( 142 CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON); 143 module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644); 144 145 /********************************* 146 * data structures 147 **********************************/ 148 149 struct crypto_acomp_ctx { 150 struct crypto_acomp *acomp; 151 struct acomp_req *req; 152 struct crypto_wait wait; 153 u8 *dstmem; 154 struct mutex *mutex; 155 }; 156 157 /* 158 * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock. 159 * The only case where lru_lock is not acquired while holding tree.lock is 160 * when a zswap_entry is taken off the lru for writeback, in that case it 161 * needs to be verified that it's still valid in the tree. 162 */ 163 struct zswap_pool { 164 struct zpool *zpool; 165 struct crypto_acomp_ctx __percpu *acomp_ctx; 166 struct kref kref; 167 struct list_head list; 168 struct work_struct release_work; 169 struct work_struct shrink_work; 170 struct hlist_node node; 171 char tfm_name[CRYPTO_MAX_ALG_NAME]; 172 struct list_head lru; 173 spinlock_t lru_lock; 174 }; 175 176 /* 177 * struct zswap_entry 178 * 179 * This structure contains the metadata for tracking a single compressed 180 * page within zswap. 181 * 182 * rbnode - links the entry into red-black tree for the appropriate swap type 183 * offset - the swap offset for the entry. Index into the red-black tree. 184 * refcount - the number of outstanding reference to the entry. This is needed 185 * to protect against premature freeing of the entry by code 186 * concurrent calls to load, invalidate, and writeback. The lock 187 * for the zswap_tree structure that contains the entry must 188 * be held while changing the refcount. Since the lock must 189 * be held, there is no reason to also make refcount atomic. 190 * length - the length in bytes of the compressed page data. Needed during 191 * decompression. For a same value filled page length is 0, and both 192 * pool and lru are invalid and must be ignored. 193 * pool - the zswap_pool the entry's data is in 194 * handle - zpool allocation handle that stores the compressed page data 195 * value - value of the same-value filled pages which have same content 196 * lru - handle to the pool's lru used to evict pages. 197 */ 198 struct zswap_entry { 199 struct rb_node rbnode; 200 swp_entry_t swpentry; 201 int refcount; 202 unsigned int length; 203 struct zswap_pool *pool; 204 union { 205 unsigned long handle; 206 unsigned long value; 207 }; 208 struct obj_cgroup *objcg; 209 struct list_head lru; 210 }; 211 212 /* 213 * The tree lock in the zswap_tree struct protects a few things: 214 * - the rbtree 215 * - the refcount field of each entry in the tree 216 */ 217 struct zswap_tree { 218 struct rb_root rbroot; 219 spinlock_t lock; 220 }; 221 222 static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 223 224 /* RCU-protected iteration */ 225 static LIST_HEAD(zswap_pools); 226 /* protects zswap_pools list modification */ 227 static DEFINE_SPINLOCK(zswap_pools_lock); 228 /* pool counter to provide unique names to zpool */ 229 static atomic_t zswap_pools_count = ATOMIC_INIT(0); 230 231 enum zswap_init_type { 232 ZSWAP_UNINIT, 233 ZSWAP_INIT_SUCCEED, 234 ZSWAP_INIT_FAILED 235 }; 236 237 static enum zswap_init_type zswap_init_state; 238 239 /* used to ensure the integrity of initialization */ 240 static DEFINE_MUTEX(zswap_init_lock); 241 242 /* init completed, but couldn't create the initial pool */ 243 static bool zswap_has_pool; 244 245 /********************************* 246 * helpers and fwd declarations 247 **********************************/ 248 249 #define zswap_pool_debug(msg, p) \ 250 pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ 251 zpool_get_type((p)->zpool)) 252 253 static int zswap_writeback_entry(struct zswap_entry *entry, 254 struct zswap_tree *tree); 255 static int zswap_pool_get(struct zswap_pool *pool); 256 static void zswap_pool_put(struct zswap_pool *pool); 257 258 static bool zswap_is_full(void) 259 { 260 return totalram_pages() * zswap_max_pool_percent / 100 < 261 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 262 } 263 264 static bool zswap_can_accept(void) 265 { 266 return totalram_pages() * zswap_accept_thr_percent / 100 * 267 zswap_max_pool_percent / 100 > 268 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 269 } 270 271 static void zswap_update_total_size(void) 272 { 273 struct zswap_pool *pool; 274 u64 total = 0; 275 276 rcu_read_lock(); 277 278 list_for_each_entry_rcu(pool, &zswap_pools, list) 279 total += zpool_get_total_size(pool->zpool); 280 281 rcu_read_unlock(); 282 283 zswap_pool_total_size = total; 284 } 285 286 /********************************* 287 * zswap entry functions 288 **********************************/ 289 static struct kmem_cache *zswap_entry_cache; 290 291 static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp) 292 { 293 struct zswap_entry *entry; 294 entry = kmem_cache_alloc(zswap_entry_cache, gfp); 295 if (!entry) 296 return NULL; 297 entry->refcount = 1; 298 RB_CLEAR_NODE(&entry->rbnode); 299 return entry; 300 } 301 302 static void zswap_entry_cache_free(struct zswap_entry *entry) 303 { 304 kmem_cache_free(zswap_entry_cache, entry); 305 } 306 307 /********************************* 308 * rbtree functions 309 **********************************/ 310 static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 311 { 312 struct rb_node *node = root->rb_node; 313 struct zswap_entry *entry; 314 pgoff_t entry_offset; 315 316 while (node) { 317 entry = rb_entry(node, struct zswap_entry, rbnode); 318 entry_offset = swp_offset(entry->swpentry); 319 if (entry_offset > offset) 320 node = node->rb_left; 321 else if (entry_offset < offset) 322 node = node->rb_right; 323 else 324 return entry; 325 } 326 return NULL; 327 } 328 329 /* 330 * In the case that a entry with the same offset is found, a pointer to 331 * the existing entry is stored in dupentry and the function returns -EEXIST 332 */ 333 static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 334 struct zswap_entry **dupentry) 335 { 336 struct rb_node **link = &root->rb_node, *parent = NULL; 337 struct zswap_entry *myentry; 338 pgoff_t myentry_offset, entry_offset = swp_offset(entry->swpentry); 339 340 while (*link) { 341 parent = *link; 342 myentry = rb_entry(parent, struct zswap_entry, rbnode); 343 myentry_offset = swp_offset(myentry->swpentry); 344 if (myentry_offset > entry_offset) 345 link = &(*link)->rb_left; 346 else if (myentry_offset < entry_offset) 347 link = &(*link)->rb_right; 348 else { 349 *dupentry = myentry; 350 return -EEXIST; 351 } 352 } 353 rb_link_node(&entry->rbnode, parent, link); 354 rb_insert_color(&entry->rbnode, root); 355 return 0; 356 } 357 358 static bool zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 359 { 360 if (!RB_EMPTY_NODE(&entry->rbnode)) { 361 rb_erase(&entry->rbnode, root); 362 RB_CLEAR_NODE(&entry->rbnode); 363 return true; 364 } 365 return false; 366 } 367 368 /* 369 * Carries out the common pattern of freeing and entry's zpool allocation, 370 * freeing the entry itself, and decrementing the number of stored pages. 371 */ 372 static void zswap_free_entry(struct zswap_entry *entry) 373 { 374 if (entry->objcg) { 375 obj_cgroup_uncharge_zswap(entry->objcg, entry->length); 376 obj_cgroup_put(entry->objcg); 377 } 378 if (!entry->length) 379 atomic_dec(&zswap_same_filled_pages); 380 else { 381 spin_lock(&entry->pool->lru_lock); 382 list_del(&entry->lru); 383 spin_unlock(&entry->pool->lru_lock); 384 zpool_free(entry->pool->zpool, entry->handle); 385 zswap_pool_put(entry->pool); 386 } 387 zswap_entry_cache_free(entry); 388 atomic_dec(&zswap_stored_pages); 389 zswap_update_total_size(); 390 } 391 392 /* caller must hold the tree lock */ 393 static void zswap_entry_get(struct zswap_entry *entry) 394 { 395 entry->refcount++; 396 } 397 398 /* caller must hold the tree lock 399 * remove from the tree and free it, if nobody reference the entry 400 */ 401 static void zswap_entry_put(struct zswap_tree *tree, 402 struct zswap_entry *entry) 403 { 404 int refcount = --entry->refcount; 405 406 BUG_ON(refcount < 0); 407 if (refcount == 0) { 408 zswap_rb_erase(&tree->rbroot, entry); 409 zswap_free_entry(entry); 410 } 411 } 412 413 /* caller must hold the tree lock */ 414 static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, 415 pgoff_t offset) 416 { 417 struct zswap_entry *entry; 418 419 entry = zswap_rb_search(root, offset); 420 if (entry) 421 zswap_entry_get(entry); 422 423 return entry; 424 } 425 426 /********************************* 427 * per-cpu code 428 **********************************/ 429 static DEFINE_PER_CPU(u8 *, zswap_dstmem); 430 /* 431 * If users dynamically change the zpool type and compressor at runtime, i.e. 432 * zswap is running, zswap can have more than one zpool on one cpu, but they 433 * are sharing dtsmem. So we need this mutex to be per-cpu. 434 */ 435 static DEFINE_PER_CPU(struct mutex *, zswap_mutex); 436 437 static int zswap_dstmem_prepare(unsigned int cpu) 438 { 439 struct mutex *mutex; 440 u8 *dst; 441 442 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 443 if (!dst) 444 return -ENOMEM; 445 446 mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu)); 447 if (!mutex) { 448 kfree(dst); 449 return -ENOMEM; 450 } 451 452 mutex_init(mutex); 453 per_cpu(zswap_dstmem, cpu) = dst; 454 per_cpu(zswap_mutex, cpu) = mutex; 455 return 0; 456 } 457 458 static int zswap_dstmem_dead(unsigned int cpu) 459 { 460 struct mutex *mutex; 461 u8 *dst; 462 463 mutex = per_cpu(zswap_mutex, cpu); 464 kfree(mutex); 465 per_cpu(zswap_mutex, cpu) = NULL; 466 467 dst = per_cpu(zswap_dstmem, cpu); 468 kfree(dst); 469 per_cpu(zswap_dstmem, cpu) = NULL; 470 471 return 0; 472 } 473 474 static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) 475 { 476 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 477 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 478 struct crypto_acomp *acomp; 479 struct acomp_req *req; 480 481 acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); 482 if (IS_ERR(acomp)) { 483 pr_err("could not alloc crypto acomp %s : %ld\n", 484 pool->tfm_name, PTR_ERR(acomp)); 485 return PTR_ERR(acomp); 486 } 487 acomp_ctx->acomp = acomp; 488 489 req = acomp_request_alloc(acomp_ctx->acomp); 490 if (!req) { 491 pr_err("could not alloc crypto acomp_request %s\n", 492 pool->tfm_name); 493 crypto_free_acomp(acomp_ctx->acomp); 494 return -ENOMEM; 495 } 496 acomp_ctx->req = req; 497 498 crypto_init_wait(&acomp_ctx->wait); 499 /* 500 * if the backend of acomp is async zip, crypto_req_done() will wakeup 501 * crypto_wait_req(); if the backend of acomp is scomp, the callback 502 * won't be called, crypto_wait_req() will return without blocking. 503 */ 504 acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 505 crypto_req_done, &acomp_ctx->wait); 506 507 acomp_ctx->mutex = per_cpu(zswap_mutex, cpu); 508 acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu); 509 510 return 0; 511 } 512 513 static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) 514 { 515 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 516 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 517 518 if (!IS_ERR_OR_NULL(acomp_ctx)) { 519 if (!IS_ERR_OR_NULL(acomp_ctx->req)) 520 acomp_request_free(acomp_ctx->req); 521 if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 522 crypto_free_acomp(acomp_ctx->acomp); 523 } 524 525 return 0; 526 } 527 528 /********************************* 529 * pool functions 530 **********************************/ 531 532 static struct zswap_pool *__zswap_pool_current(void) 533 { 534 struct zswap_pool *pool; 535 536 pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); 537 WARN_ONCE(!pool && zswap_has_pool, 538 "%s: no page storage pool!\n", __func__); 539 540 return pool; 541 } 542 543 static struct zswap_pool *zswap_pool_current(void) 544 { 545 assert_spin_locked(&zswap_pools_lock); 546 547 return __zswap_pool_current(); 548 } 549 550 static struct zswap_pool *zswap_pool_current_get(void) 551 { 552 struct zswap_pool *pool; 553 554 rcu_read_lock(); 555 556 pool = __zswap_pool_current(); 557 if (!zswap_pool_get(pool)) 558 pool = NULL; 559 560 rcu_read_unlock(); 561 562 return pool; 563 } 564 565 static struct zswap_pool *zswap_pool_last_get(void) 566 { 567 struct zswap_pool *pool, *last = NULL; 568 569 rcu_read_lock(); 570 571 list_for_each_entry_rcu(pool, &zswap_pools, list) 572 last = pool; 573 WARN_ONCE(!last && zswap_has_pool, 574 "%s: no page storage pool!\n", __func__); 575 if (!zswap_pool_get(last)) 576 last = NULL; 577 578 rcu_read_unlock(); 579 580 return last; 581 } 582 583 /* type and compressor must be null-terminated */ 584 static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) 585 { 586 struct zswap_pool *pool; 587 588 assert_spin_locked(&zswap_pools_lock); 589 590 list_for_each_entry_rcu(pool, &zswap_pools, list) { 591 if (strcmp(pool->tfm_name, compressor)) 592 continue; 593 if (strcmp(zpool_get_type(pool->zpool), type)) 594 continue; 595 /* if we can't get it, it's about to be destroyed */ 596 if (!zswap_pool_get(pool)) 597 continue; 598 return pool; 599 } 600 601 return NULL; 602 } 603 604 /* 605 * If the entry is still valid in the tree, drop the initial ref and remove it 606 * from the tree. This function must be called with an additional ref held, 607 * otherwise it may race with another invalidation freeing the entry. 608 */ 609 static void zswap_invalidate_entry(struct zswap_tree *tree, 610 struct zswap_entry *entry) 611 { 612 if (zswap_rb_erase(&tree->rbroot, entry)) 613 zswap_entry_put(tree, entry); 614 } 615 616 static int zswap_reclaim_entry(struct zswap_pool *pool) 617 { 618 struct zswap_entry *entry; 619 struct zswap_tree *tree; 620 pgoff_t swpoffset; 621 int ret; 622 623 /* Get an entry off the LRU */ 624 spin_lock(&pool->lru_lock); 625 if (list_empty(&pool->lru)) { 626 spin_unlock(&pool->lru_lock); 627 return -EINVAL; 628 } 629 entry = list_last_entry(&pool->lru, struct zswap_entry, lru); 630 list_del_init(&entry->lru); 631 /* 632 * Once the lru lock is dropped, the entry might get freed. The 633 * swpoffset is copied to the stack, and entry isn't deref'd again 634 * until the entry is verified to still be alive in the tree. 635 */ 636 swpoffset = swp_offset(entry->swpentry); 637 tree = zswap_trees[swp_type(entry->swpentry)]; 638 spin_unlock(&pool->lru_lock); 639 640 /* Check for invalidate() race */ 641 spin_lock(&tree->lock); 642 if (entry != zswap_rb_search(&tree->rbroot, swpoffset)) { 643 ret = -EAGAIN; 644 goto unlock; 645 } 646 /* Hold a reference to prevent a free during writeback */ 647 zswap_entry_get(entry); 648 spin_unlock(&tree->lock); 649 650 ret = zswap_writeback_entry(entry, tree); 651 652 spin_lock(&tree->lock); 653 if (ret) { 654 /* Writeback failed, put entry back on LRU */ 655 spin_lock(&pool->lru_lock); 656 list_move(&entry->lru, &pool->lru); 657 spin_unlock(&pool->lru_lock); 658 goto put_unlock; 659 } 660 661 /* 662 * Writeback started successfully, the page now belongs to the 663 * swapcache. Drop the entry from zswap - unless invalidate already 664 * took it out while we had the tree->lock released for IO. 665 */ 666 zswap_invalidate_entry(tree, entry); 667 668 put_unlock: 669 /* Drop local reference */ 670 zswap_entry_put(tree, entry); 671 unlock: 672 spin_unlock(&tree->lock); 673 return ret ? -EAGAIN : 0; 674 } 675 676 static void shrink_worker(struct work_struct *w) 677 { 678 struct zswap_pool *pool = container_of(w, typeof(*pool), 679 shrink_work); 680 int ret, failures = 0; 681 682 do { 683 ret = zswap_reclaim_entry(pool); 684 if (ret) { 685 zswap_reject_reclaim_fail++; 686 if (ret != -EAGAIN) 687 break; 688 if (++failures == MAX_RECLAIM_RETRIES) 689 break; 690 } 691 cond_resched(); 692 } while (!zswap_can_accept()); 693 zswap_pool_put(pool); 694 } 695 696 static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 697 { 698 struct zswap_pool *pool; 699 char name[38]; /* 'zswap' + 32 char (max) num + \0 */ 700 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 701 int ret; 702 703 if (!zswap_has_pool) { 704 /* if either are unset, pool initialization failed, and we 705 * need both params to be set correctly before trying to 706 * create a pool. 707 */ 708 if (!strcmp(type, ZSWAP_PARAM_UNSET)) 709 return NULL; 710 if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) 711 return NULL; 712 } 713 714 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 715 if (!pool) 716 return NULL; 717 718 /* unique name for each pool specifically required by zsmalloc */ 719 snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); 720 721 pool->zpool = zpool_create_pool(type, name, gfp); 722 if (!pool->zpool) { 723 pr_err("%s zpool not available\n", type); 724 goto error; 725 } 726 pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); 727 728 strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); 729 730 pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); 731 if (!pool->acomp_ctx) { 732 pr_err("percpu alloc failed\n"); 733 goto error; 734 } 735 736 ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, 737 &pool->node); 738 if (ret) 739 goto error; 740 pr_debug("using %s compressor\n", pool->tfm_name); 741 742 /* being the current pool takes 1 ref; this func expects the 743 * caller to always add the new pool as the current pool 744 */ 745 kref_init(&pool->kref); 746 INIT_LIST_HEAD(&pool->list); 747 INIT_LIST_HEAD(&pool->lru); 748 spin_lock_init(&pool->lru_lock); 749 INIT_WORK(&pool->shrink_work, shrink_worker); 750 751 zswap_pool_debug("created", pool); 752 753 return pool; 754 755 error: 756 if (pool->acomp_ctx) 757 free_percpu(pool->acomp_ctx); 758 if (pool->zpool) 759 zpool_destroy_pool(pool->zpool); 760 kfree(pool); 761 return NULL; 762 } 763 764 static struct zswap_pool *__zswap_pool_create_fallback(void) 765 { 766 bool has_comp, has_zpool; 767 768 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 769 if (!has_comp && strcmp(zswap_compressor, 770 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { 771 pr_err("compressor %s not available, using default %s\n", 772 zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); 773 param_free_charp(&zswap_compressor); 774 zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 775 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 776 } 777 if (!has_comp) { 778 pr_err("default compressor %s not available\n", 779 zswap_compressor); 780 param_free_charp(&zswap_compressor); 781 zswap_compressor = ZSWAP_PARAM_UNSET; 782 } 783 784 has_zpool = zpool_has_pool(zswap_zpool_type); 785 if (!has_zpool && strcmp(zswap_zpool_type, 786 CONFIG_ZSWAP_ZPOOL_DEFAULT)) { 787 pr_err("zpool %s not available, using default %s\n", 788 zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); 789 param_free_charp(&zswap_zpool_type); 790 zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 791 has_zpool = zpool_has_pool(zswap_zpool_type); 792 } 793 if (!has_zpool) { 794 pr_err("default zpool %s not available\n", 795 zswap_zpool_type); 796 param_free_charp(&zswap_zpool_type); 797 zswap_zpool_type = ZSWAP_PARAM_UNSET; 798 } 799 800 if (!has_comp || !has_zpool) 801 return NULL; 802 803 return zswap_pool_create(zswap_zpool_type, zswap_compressor); 804 } 805 806 static void zswap_pool_destroy(struct zswap_pool *pool) 807 { 808 zswap_pool_debug("destroying", pool); 809 810 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 811 free_percpu(pool->acomp_ctx); 812 zpool_destroy_pool(pool->zpool); 813 kfree(pool); 814 } 815 816 static int __must_check zswap_pool_get(struct zswap_pool *pool) 817 { 818 if (!pool) 819 return 0; 820 821 return kref_get_unless_zero(&pool->kref); 822 } 823 824 static void __zswap_pool_release(struct work_struct *work) 825 { 826 struct zswap_pool *pool = container_of(work, typeof(*pool), 827 release_work); 828 829 synchronize_rcu(); 830 831 /* nobody should have been able to get a kref... */ 832 WARN_ON(kref_get_unless_zero(&pool->kref)); 833 834 /* pool is now off zswap_pools list and has no references. */ 835 zswap_pool_destroy(pool); 836 } 837 838 static void __zswap_pool_empty(struct kref *kref) 839 { 840 struct zswap_pool *pool; 841 842 pool = container_of(kref, typeof(*pool), kref); 843 844 spin_lock(&zswap_pools_lock); 845 846 WARN_ON(pool == zswap_pool_current()); 847 848 list_del_rcu(&pool->list); 849 850 INIT_WORK(&pool->release_work, __zswap_pool_release); 851 schedule_work(&pool->release_work); 852 853 spin_unlock(&zswap_pools_lock); 854 } 855 856 static void zswap_pool_put(struct zswap_pool *pool) 857 { 858 kref_put(&pool->kref, __zswap_pool_empty); 859 } 860 861 /********************************* 862 * param callbacks 863 **********************************/ 864 865 static bool zswap_pool_changed(const char *s, const struct kernel_param *kp) 866 { 867 /* no change required */ 868 if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) 869 return false; 870 return true; 871 } 872 873 /* val must be a null-terminated string */ 874 static int __zswap_param_set(const char *val, const struct kernel_param *kp, 875 char *type, char *compressor) 876 { 877 struct zswap_pool *pool, *put_pool = NULL; 878 char *s = strstrip((char *)val); 879 int ret = 0; 880 bool new_pool = false; 881 882 mutex_lock(&zswap_init_lock); 883 switch (zswap_init_state) { 884 case ZSWAP_UNINIT: 885 /* if this is load-time (pre-init) param setting, 886 * don't create a pool; that's done during init. 887 */ 888 ret = param_set_charp(s, kp); 889 break; 890 case ZSWAP_INIT_SUCCEED: 891 new_pool = zswap_pool_changed(s, kp); 892 break; 893 case ZSWAP_INIT_FAILED: 894 pr_err("can't set param, initialization failed\n"); 895 ret = -ENODEV; 896 } 897 mutex_unlock(&zswap_init_lock); 898 899 /* no need to create a new pool, return directly */ 900 if (!new_pool) 901 return ret; 902 903 if (!type) { 904 if (!zpool_has_pool(s)) { 905 pr_err("zpool %s not available\n", s); 906 return -ENOENT; 907 } 908 type = s; 909 } else if (!compressor) { 910 if (!crypto_has_acomp(s, 0, 0)) { 911 pr_err("compressor %s not available\n", s); 912 return -ENOENT; 913 } 914 compressor = s; 915 } else { 916 WARN_ON(1); 917 return -EINVAL; 918 } 919 920 spin_lock(&zswap_pools_lock); 921 922 pool = zswap_pool_find_get(type, compressor); 923 if (pool) { 924 zswap_pool_debug("using existing", pool); 925 WARN_ON(pool == zswap_pool_current()); 926 list_del_rcu(&pool->list); 927 } 928 929 spin_unlock(&zswap_pools_lock); 930 931 if (!pool) 932 pool = zswap_pool_create(type, compressor); 933 934 if (pool) 935 ret = param_set_charp(s, kp); 936 else 937 ret = -EINVAL; 938 939 spin_lock(&zswap_pools_lock); 940 941 if (!ret) { 942 put_pool = zswap_pool_current(); 943 list_add_rcu(&pool->list, &zswap_pools); 944 zswap_has_pool = true; 945 } else if (pool) { 946 /* add the possibly pre-existing pool to the end of the pools 947 * list; if it's new (and empty) then it'll be removed and 948 * destroyed by the put after we drop the lock 949 */ 950 list_add_tail_rcu(&pool->list, &zswap_pools); 951 put_pool = pool; 952 } 953 954 spin_unlock(&zswap_pools_lock); 955 956 if (!zswap_has_pool && !pool) { 957 /* if initial pool creation failed, and this pool creation also 958 * failed, maybe both compressor and zpool params were bad. 959 * Allow changing this param, so pool creation will succeed 960 * when the other param is changed. We already verified this 961 * param is ok in the zpool_has_pool() or crypto_has_acomp() 962 * checks above. 963 */ 964 ret = param_set_charp(s, kp); 965 } 966 967 /* drop the ref from either the old current pool, 968 * or the new pool we failed to add 969 */ 970 if (put_pool) 971 zswap_pool_put(put_pool); 972 973 return ret; 974 } 975 976 static int zswap_compressor_param_set(const char *val, 977 const struct kernel_param *kp) 978 { 979 return __zswap_param_set(val, kp, zswap_zpool_type, NULL); 980 } 981 982 static int zswap_zpool_param_set(const char *val, 983 const struct kernel_param *kp) 984 { 985 return __zswap_param_set(val, kp, NULL, zswap_compressor); 986 } 987 988 static int zswap_enabled_param_set(const char *val, 989 const struct kernel_param *kp) 990 { 991 int ret = -ENODEV; 992 993 /* if this is load-time (pre-init) param setting, only set param. */ 994 if (system_state != SYSTEM_RUNNING) 995 return param_set_bool(val, kp); 996 997 mutex_lock(&zswap_init_lock); 998 switch (zswap_init_state) { 999 case ZSWAP_UNINIT: 1000 if (zswap_setup()) 1001 break; 1002 fallthrough; 1003 case ZSWAP_INIT_SUCCEED: 1004 if (!zswap_has_pool) 1005 pr_err("can't enable, no pool configured\n"); 1006 else 1007 ret = param_set_bool(val, kp); 1008 break; 1009 case ZSWAP_INIT_FAILED: 1010 pr_err("can't enable, initialization failed\n"); 1011 } 1012 mutex_unlock(&zswap_init_lock); 1013 1014 return ret; 1015 } 1016 1017 /********************************* 1018 * writeback code 1019 **********************************/ 1020 /* return enum for zswap_get_swap_cache_page */ 1021 enum zswap_get_swap_ret { 1022 ZSWAP_SWAPCACHE_NEW, 1023 ZSWAP_SWAPCACHE_EXIST, 1024 ZSWAP_SWAPCACHE_FAIL, 1025 }; 1026 1027 /* 1028 * zswap_get_swap_cache_page 1029 * 1030 * This is an adaption of read_swap_cache_async() 1031 * 1032 * This function tries to find a page with the given swap entry 1033 * in the swapper_space address space (the swap cache). If the page 1034 * is found, it is returned in retpage. Otherwise, a page is allocated, 1035 * added to the swap cache, and returned in retpage. 1036 * 1037 * If success, the swap cache page is returned in retpage 1038 * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache 1039 * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated, 1040 * the new page is added to swapcache and locked 1041 * Returns ZSWAP_SWAPCACHE_FAIL on error 1042 */ 1043 static int zswap_get_swap_cache_page(swp_entry_t entry, 1044 struct page **retpage) 1045 { 1046 bool page_was_allocated; 1047 1048 *retpage = __read_swap_cache_async(entry, GFP_KERNEL, 1049 NULL, 0, &page_was_allocated); 1050 if (page_was_allocated) 1051 return ZSWAP_SWAPCACHE_NEW; 1052 if (!*retpage) 1053 return ZSWAP_SWAPCACHE_FAIL; 1054 return ZSWAP_SWAPCACHE_EXIST; 1055 } 1056 1057 /* 1058 * Attempts to free an entry by adding a page to the swap cache, 1059 * decompressing the entry data into the page, and issuing a 1060 * bio write to write the page back to the swap device. 1061 * 1062 * This can be thought of as a "resumed writeback" of the page 1063 * to the swap device. We are basically resuming the same swap 1064 * writeback path that was intercepted with the frontswap_store() 1065 * in the first place. After the page has been decompressed into 1066 * the swap cache, the compressed version stored by zswap can be 1067 * freed. 1068 */ 1069 static int zswap_writeback_entry(struct zswap_entry *entry, 1070 struct zswap_tree *tree) 1071 { 1072 swp_entry_t swpentry = entry->swpentry; 1073 struct page *page; 1074 struct scatterlist input, output; 1075 struct crypto_acomp_ctx *acomp_ctx; 1076 struct zpool *pool = entry->pool->zpool; 1077 1078 u8 *src, *tmp = NULL; 1079 unsigned int dlen; 1080 int ret; 1081 struct writeback_control wbc = { 1082 .sync_mode = WB_SYNC_NONE, 1083 }; 1084 1085 if (!zpool_can_sleep_mapped(pool)) { 1086 tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); 1087 if (!tmp) 1088 return -ENOMEM; 1089 } 1090 1091 /* try to allocate swap cache page */ 1092 switch (zswap_get_swap_cache_page(swpentry, &page)) { 1093 case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */ 1094 ret = -ENOMEM; 1095 goto fail; 1096 1097 case ZSWAP_SWAPCACHE_EXIST: 1098 /* page is already in the swap cache, ignore for now */ 1099 put_page(page); 1100 ret = -EEXIST; 1101 goto fail; 1102 1103 case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 1104 /* 1105 * Having a local reference to the zswap entry doesn't exclude 1106 * swapping from invalidating and recycling the swap slot. Once 1107 * the swapcache is secured against concurrent swapping to and 1108 * from the slot, recheck that the entry is still current before 1109 * writing. 1110 */ 1111 spin_lock(&tree->lock); 1112 if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) { 1113 spin_unlock(&tree->lock); 1114 delete_from_swap_cache(page_folio(page)); 1115 ret = -ENOMEM; 1116 goto fail; 1117 } 1118 spin_unlock(&tree->lock); 1119 1120 /* decompress */ 1121 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1122 dlen = PAGE_SIZE; 1123 1124 src = zpool_map_handle(pool, entry->handle, ZPOOL_MM_RO); 1125 if (!zpool_can_sleep_mapped(pool)) { 1126 memcpy(tmp, src, entry->length); 1127 src = tmp; 1128 zpool_unmap_handle(pool, entry->handle); 1129 } 1130 1131 mutex_lock(acomp_ctx->mutex); 1132 sg_init_one(&input, src, entry->length); 1133 sg_init_table(&output, 1); 1134 sg_set_page(&output, page, PAGE_SIZE, 0); 1135 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1136 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1137 dlen = acomp_ctx->req->dlen; 1138 mutex_unlock(acomp_ctx->mutex); 1139 1140 if (!zpool_can_sleep_mapped(pool)) 1141 kfree(tmp); 1142 else 1143 zpool_unmap_handle(pool, entry->handle); 1144 1145 BUG_ON(ret); 1146 BUG_ON(dlen != PAGE_SIZE); 1147 1148 /* page is up to date */ 1149 SetPageUptodate(page); 1150 } 1151 1152 /* move it to the tail of the inactive list after end_writeback */ 1153 SetPageReclaim(page); 1154 1155 /* start writeback */ 1156 __swap_writepage(page, &wbc); 1157 put_page(page); 1158 zswap_written_back_pages++; 1159 1160 return ret; 1161 fail: 1162 if (!zpool_can_sleep_mapped(pool)) 1163 kfree(tmp); 1164 1165 /* 1166 * if we get here due to ZSWAP_SWAPCACHE_EXIST 1167 * a load may be happening concurrently. 1168 * it is safe and okay to not free the entry. 1169 * it is also okay to return !0 1170 */ 1171 return ret; 1172 } 1173 1174 static int zswap_is_page_same_filled(void *ptr, unsigned long *value) 1175 { 1176 unsigned long *page; 1177 unsigned long val; 1178 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 1179 1180 page = (unsigned long *)ptr; 1181 val = page[0]; 1182 1183 if (val != page[last_pos]) 1184 return 0; 1185 1186 for (pos = 1; pos < last_pos; pos++) { 1187 if (val != page[pos]) 1188 return 0; 1189 } 1190 1191 *value = val; 1192 1193 return 1; 1194 } 1195 1196 static void zswap_fill_page(void *ptr, unsigned long value) 1197 { 1198 unsigned long *page; 1199 1200 page = (unsigned long *)ptr; 1201 memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); 1202 } 1203 1204 /********************************* 1205 * frontswap hooks 1206 **********************************/ 1207 /* attempts to compress and store an single page */ 1208 static int zswap_frontswap_store(unsigned type, pgoff_t offset, 1209 struct page *page) 1210 { 1211 struct zswap_tree *tree = zswap_trees[type]; 1212 struct zswap_entry *entry, *dupentry; 1213 struct scatterlist input, output; 1214 struct crypto_acomp_ctx *acomp_ctx; 1215 struct obj_cgroup *objcg = NULL; 1216 struct zswap_pool *pool; 1217 int ret; 1218 unsigned int dlen = PAGE_SIZE; 1219 unsigned long handle, value; 1220 char *buf; 1221 u8 *src, *dst; 1222 gfp_t gfp; 1223 1224 /* THP isn't supported */ 1225 if (PageTransHuge(page)) { 1226 ret = -EINVAL; 1227 goto reject; 1228 } 1229 1230 if (!zswap_enabled || !tree) { 1231 ret = -ENODEV; 1232 goto reject; 1233 } 1234 1235 /* 1236 * XXX: zswap reclaim does not work with cgroups yet. Without a 1237 * cgroup-aware entry LRU, we will push out entries system-wide based on 1238 * local cgroup limits. 1239 */ 1240 objcg = get_obj_cgroup_from_page(page); 1241 if (objcg && !obj_cgroup_may_zswap(objcg)) { 1242 ret = -ENOMEM; 1243 goto reject; 1244 } 1245 1246 /* reclaim space if needed */ 1247 if (zswap_is_full()) { 1248 zswap_pool_limit_hit++; 1249 zswap_pool_reached_full = true; 1250 goto shrink; 1251 } 1252 1253 if (zswap_pool_reached_full) { 1254 if (!zswap_can_accept()) { 1255 ret = -ENOMEM; 1256 goto shrink; 1257 } else 1258 zswap_pool_reached_full = false; 1259 } 1260 1261 /* allocate entry */ 1262 entry = zswap_entry_cache_alloc(GFP_KERNEL); 1263 if (!entry) { 1264 zswap_reject_kmemcache_fail++; 1265 ret = -ENOMEM; 1266 goto reject; 1267 } 1268 1269 if (zswap_same_filled_pages_enabled) { 1270 src = kmap_atomic(page); 1271 if (zswap_is_page_same_filled(src, &value)) { 1272 kunmap_atomic(src); 1273 entry->swpentry = swp_entry(type, offset); 1274 entry->length = 0; 1275 entry->value = value; 1276 atomic_inc(&zswap_same_filled_pages); 1277 goto insert_entry; 1278 } 1279 kunmap_atomic(src); 1280 } 1281 1282 if (!zswap_non_same_filled_pages_enabled) { 1283 ret = -EINVAL; 1284 goto freepage; 1285 } 1286 1287 /* if entry is successfully added, it keeps the reference */ 1288 entry->pool = zswap_pool_current_get(); 1289 if (!entry->pool) { 1290 ret = -EINVAL; 1291 goto freepage; 1292 } 1293 1294 /* compress */ 1295 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1296 1297 mutex_lock(acomp_ctx->mutex); 1298 1299 dst = acomp_ctx->dstmem; 1300 sg_init_table(&input, 1); 1301 sg_set_page(&input, page, PAGE_SIZE, 0); 1302 1303 /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */ 1304 sg_init_one(&output, dst, PAGE_SIZE * 2); 1305 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 1306 /* 1307 * it maybe looks a little bit silly that we send an asynchronous request, 1308 * then wait for its completion synchronously. This makes the process look 1309 * synchronous in fact. 1310 * Theoretically, acomp supports users send multiple acomp requests in one 1311 * acomp instance, then get those requests done simultaneously. but in this 1312 * case, frontswap actually does store and load page by page, there is no 1313 * existing method to send the second page before the first page is done 1314 * in one thread doing frontswap. 1315 * but in different threads running on different cpu, we have different 1316 * acomp instance, so multiple threads can do (de)compression in parallel. 1317 */ 1318 ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 1319 dlen = acomp_ctx->req->dlen; 1320 1321 if (ret) { 1322 ret = -EINVAL; 1323 goto put_dstmem; 1324 } 1325 1326 /* store */ 1327 gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1328 if (zpool_malloc_support_movable(entry->pool->zpool)) 1329 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 1330 ret = zpool_malloc(entry->pool->zpool, dlen, gfp, &handle); 1331 if (ret == -ENOSPC) { 1332 zswap_reject_compress_poor++; 1333 goto put_dstmem; 1334 } 1335 if (ret) { 1336 zswap_reject_alloc_fail++; 1337 goto put_dstmem; 1338 } 1339 buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO); 1340 memcpy(buf, dst, dlen); 1341 zpool_unmap_handle(entry->pool->zpool, handle); 1342 mutex_unlock(acomp_ctx->mutex); 1343 1344 /* populate entry */ 1345 entry->swpentry = swp_entry(type, offset); 1346 entry->handle = handle; 1347 entry->length = dlen; 1348 1349 insert_entry: 1350 entry->objcg = objcg; 1351 if (objcg) { 1352 obj_cgroup_charge_zswap(objcg, entry->length); 1353 /* Account before objcg ref is moved to tree */ 1354 count_objcg_event(objcg, ZSWPOUT); 1355 } 1356 1357 /* map */ 1358 spin_lock(&tree->lock); 1359 do { 1360 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry); 1361 if (ret == -EEXIST) { 1362 zswap_duplicate_entry++; 1363 /* remove from rbtree */ 1364 zswap_rb_erase(&tree->rbroot, dupentry); 1365 zswap_entry_put(tree, dupentry); 1366 } 1367 } while (ret == -EEXIST); 1368 if (entry->length) { 1369 spin_lock(&entry->pool->lru_lock); 1370 list_add(&entry->lru, &entry->pool->lru); 1371 spin_unlock(&entry->pool->lru_lock); 1372 } 1373 spin_unlock(&tree->lock); 1374 1375 /* update stats */ 1376 atomic_inc(&zswap_stored_pages); 1377 zswap_update_total_size(); 1378 count_vm_event(ZSWPOUT); 1379 1380 return 0; 1381 1382 put_dstmem: 1383 mutex_unlock(acomp_ctx->mutex); 1384 zswap_pool_put(entry->pool); 1385 freepage: 1386 zswap_entry_cache_free(entry); 1387 reject: 1388 if (objcg) 1389 obj_cgroup_put(objcg); 1390 return ret; 1391 1392 shrink: 1393 pool = zswap_pool_last_get(); 1394 if (pool) 1395 queue_work(shrink_wq, &pool->shrink_work); 1396 ret = -ENOMEM; 1397 goto reject; 1398 } 1399 1400 /* 1401 * returns 0 if the page was successfully decompressed 1402 * return -1 on entry not found or error 1403 */ 1404 static int zswap_frontswap_load(unsigned type, pgoff_t offset, 1405 struct page *page, bool *exclusive) 1406 { 1407 struct zswap_tree *tree = zswap_trees[type]; 1408 struct zswap_entry *entry; 1409 struct scatterlist input, output; 1410 struct crypto_acomp_ctx *acomp_ctx; 1411 u8 *src, *dst, *tmp; 1412 unsigned int dlen; 1413 int ret; 1414 1415 /* find */ 1416 spin_lock(&tree->lock); 1417 entry = zswap_entry_find_get(&tree->rbroot, offset); 1418 if (!entry) { 1419 /* entry was written back */ 1420 spin_unlock(&tree->lock); 1421 return -1; 1422 } 1423 spin_unlock(&tree->lock); 1424 1425 if (!entry->length) { 1426 dst = kmap_atomic(page); 1427 zswap_fill_page(dst, entry->value); 1428 kunmap_atomic(dst); 1429 ret = 0; 1430 goto stats; 1431 } 1432 1433 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1434 tmp = kmalloc(entry->length, GFP_KERNEL); 1435 if (!tmp) { 1436 ret = -ENOMEM; 1437 goto freeentry; 1438 } 1439 } 1440 1441 /* decompress */ 1442 dlen = PAGE_SIZE; 1443 src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO); 1444 1445 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1446 memcpy(tmp, src, entry->length); 1447 src = tmp; 1448 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1449 } 1450 1451 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1452 mutex_lock(acomp_ctx->mutex); 1453 sg_init_one(&input, src, entry->length); 1454 sg_init_table(&output, 1); 1455 sg_set_page(&output, page, PAGE_SIZE, 0); 1456 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1457 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1458 mutex_unlock(acomp_ctx->mutex); 1459 1460 if (zpool_can_sleep_mapped(entry->pool->zpool)) 1461 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1462 else 1463 kfree(tmp); 1464 1465 BUG_ON(ret); 1466 stats: 1467 count_vm_event(ZSWPIN); 1468 if (entry->objcg) 1469 count_objcg_event(entry->objcg, ZSWPIN); 1470 freeentry: 1471 spin_lock(&tree->lock); 1472 if (!ret && zswap_exclusive_loads_enabled) { 1473 zswap_invalidate_entry(tree, entry); 1474 *exclusive = true; 1475 } else if (entry->length) { 1476 spin_lock(&entry->pool->lru_lock); 1477 list_move(&entry->lru, &entry->pool->lru); 1478 spin_unlock(&entry->pool->lru_lock); 1479 } 1480 zswap_entry_put(tree, entry); 1481 spin_unlock(&tree->lock); 1482 1483 return ret; 1484 } 1485 1486 /* frees an entry in zswap */ 1487 static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset) 1488 { 1489 struct zswap_tree *tree = zswap_trees[type]; 1490 struct zswap_entry *entry; 1491 1492 /* find */ 1493 spin_lock(&tree->lock); 1494 entry = zswap_rb_search(&tree->rbroot, offset); 1495 if (!entry) { 1496 /* entry was written back */ 1497 spin_unlock(&tree->lock); 1498 return; 1499 } 1500 zswap_invalidate_entry(tree, entry); 1501 spin_unlock(&tree->lock); 1502 } 1503 1504 /* frees all zswap entries for the given swap type */ 1505 static void zswap_frontswap_invalidate_area(unsigned type) 1506 { 1507 struct zswap_tree *tree = zswap_trees[type]; 1508 struct zswap_entry *entry, *n; 1509 1510 if (!tree) 1511 return; 1512 1513 /* walk the tree and free everything */ 1514 spin_lock(&tree->lock); 1515 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) 1516 zswap_free_entry(entry); 1517 tree->rbroot = RB_ROOT; 1518 spin_unlock(&tree->lock); 1519 kfree(tree); 1520 zswap_trees[type] = NULL; 1521 } 1522 1523 static void zswap_frontswap_init(unsigned type) 1524 { 1525 struct zswap_tree *tree; 1526 1527 tree = kzalloc(sizeof(*tree), GFP_KERNEL); 1528 if (!tree) { 1529 pr_err("alloc failed, zswap disabled for swap type %d\n", type); 1530 return; 1531 } 1532 1533 tree->rbroot = RB_ROOT; 1534 spin_lock_init(&tree->lock); 1535 zswap_trees[type] = tree; 1536 } 1537 1538 static const struct frontswap_ops zswap_frontswap_ops = { 1539 .store = zswap_frontswap_store, 1540 .load = zswap_frontswap_load, 1541 .invalidate_page = zswap_frontswap_invalidate_page, 1542 .invalidate_area = zswap_frontswap_invalidate_area, 1543 .init = zswap_frontswap_init 1544 }; 1545 1546 /********************************* 1547 * debugfs functions 1548 **********************************/ 1549 #ifdef CONFIG_DEBUG_FS 1550 #include <linux/debugfs.h> 1551 1552 static struct dentry *zswap_debugfs_root; 1553 1554 static int zswap_debugfs_init(void) 1555 { 1556 if (!debugfs_initialized()) 1557 return -ENODEV; 1558 1559 zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 1560 1561 debugfs_create_u64("pool_limit_hit", 0444, 1562 zswap_debugfs_root, &zswap_pool_limit_hit); 1563 debugfs_create_u64("reject_reclaim_fail", 0444, 1564 zswap_debugfs_root, &zswap_reject_reclaim_fail); 1565 debugfs_create_u64("reject_alloc_fail", 0444, 1566 zswap_debugfs_root, &zswap_reject_alloc_fail); 1567 debugfs_create_u64("reject_kmemcache_fail", 0444, 1568 zswap_debugfs_root, &zswap_reject_kmemcache_fail); 1569 debugfs_create_u64("reject_compress_poor", 0444, 1570 zswap_debugfs_root, &zswap_reject_compress_poor); 1571 debugfs_create_u64("written_back_pages", 0444, 1572 zswap_debugfs_root, &zswap_written_back_pages); 1573 debugfs_create_u64("duplicate_entry", 0444, 1574 zswap_debugfs_root, &zswap_duplicate_entry); 1575 debugfs_create_u64("pool_total_size", 0444, 1576 zswap_debugfs_root, &zswap_pool_total_size); 1577 debugfs_create_atomic_t("stored_pages", 0444, 1578 zswap_debugfs_root, &zswap_stored_pages); 1579 debugfs_create_atomic_t("same_filled_pages", 0444, 1580 zswap_debugfs_root, &zswap_same_filled_pages); 1581 1582 return 0; 1583 } 1584 #else 1585 static int zswap_debugfs_init(void) 1586 { 1587 return 0; 1588 } 1589 #endif 1590 1591 /********************************* 1592 * module init and exit 1593 **********************************/ 1594 static int zswap_setup(void) 1595 { 1596 struct zswap_pool *pool; 1597 int ret; 1598 1599 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 1600 if (!zswap_entry_cache) { 1601 pr_err("entry cache creation failed\n"); 1602 goto cache_fail; 1603 } 1604 1605 ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare", 1606 zswap_dstmem_prepare, zswap_dstmem_dead); 1607 if (ret) { 1608 pr_err("dstmem alloc failed\n"); 1609 goto dstmem_fail; 1610 } 1611 1612 ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, 1613 "mm/zswap_pool:prepare", 1614 zswap_cpu_comp_prepare, 1615 zswap_cpu_comp_dead); 1616 if (ret) 1617 goto hp_fail; 1618 1619 pool = __zswap_pool_create_fallback(); 1620 if (pool) { 1621 pr_info("loaded using pool %s/%s\n", pool->tfm_name, 1622 zpool_get_type(pool->zpool)); 1623 list_add(&pool->list, &zswap_pools); 1624 zswap_has_pool = true; 1625 } else { 1626 pr_err("pool creation failed\n"); 1627 zswap_enabled = false; 1628 } 1629 1630 shrink_wq = create_workqueue("zswap-shrink"); 1631 if (!shrink_wq) 1632 goto fallback_fail; 1633 1634 ret = frontswap_register_ops(&zswap_frontswap_ops); 1635 if (ret) 1636 goto destroy_wq; 1637 if (zswap_debugfs_init()) 1638 pr_warn("debugfs initialization failed\n"); 1639 zswap_init_state = ZSWAP_INIT_SUCCEED; 1640 return 0; 1641 1642 destroy_wq: 1643 destroy_workqueue(shrink_wq); 1644 fallback_fail: 1645 if (pool) 1646 zswap_pool_destroy(pool); 1647 hp_fail: 1648 cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE); 1649 dstmem_fail: 1650 kmem_cache_destroy(zswap_entry_cache); 1651 cache_fail: 1652 /* if built-in, we aren't unloaded on failure; don't allow use */ 1653 zswap_init_state = ZSWAP_INIT_FAILED; 1654 zswap_enabled = false; 1655 return -ENOMEM; 1656 } 1657 1658 static int __init zswap_init(void) 1659 { 1660 if (!zswap_enabled) 1661 return 0; 1662 return zswap_setup(); 1663 } 1664 /* must be late so crypto has time to come up */ 1665 late_initcall(zswap_init); 1666 1667 MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 1668 MODULE_DESCRIPTION("Compressed cache for swap pages"); 1669