1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * zswap.c - zswap driver file 4 * 5 * zswap is a backend for frontswap that takes pages that are in the process 6 * of being swapped out and attempts to compress and store them in a 7 * RAM-based memory pool. This can result in a significant I/O reduction on 8 * the swap device and, in the case where decompressing from RAM is faster 9 * than reading from the swap device, can also improve workload performance. 10 * 11 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/module.h> 17 #include <linux/cpu.h> 18 #include <linux/highmem.h> 19 #include <linux/slab.h> 20 #include <linux/spinlock.h> 21 #include <linux/types.h> 22 #include <linux/atomic.h> 23 #include <linux/frontswap.h> 24 #include <linux/rbtree.h> 25 #include <linux/swap.h> 26 #include <linux/crypto.h> 27 #include <linux/scatterlist.h> 28 #include <linux/mempool.h> 29 #include <linux/zpool.h> 30 #include <crypto/acompress.h> 31 32 #include <linux/mm_types.h> 33 #include <linux/page-flags.h> 34 #include <linux/swapops.h> 35 #include <linux/writeback.h> 36 #include <linux/pagemap.h> 37 #include <linux/workqueue.h> 38 39 #include "swap.h" 40 #include "internal.h" 41 42 /********************************* 43 * statistics 44 **********************************/ 45 /* Total bytes used by the compressed storage */ 46 u64 zswap_pool_total_size; 47 /* The number of compressed pages currently stored in zswap */ 48 atomic_t zswap_stored_pages = ATOMIC_INIT(0); 49 /* The number of same-value filled pages currently stored in zswap */ 50 static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); 51 52 /* 53 * The statistics below are not protected from concurrent access for 54 * performance reasons so they may not be a 100% accurate. However, 55 * they do provide useful information on roughly how many times a 56 * certain event is occurring. 57 */ 58 59 /* Pool limit was hit (see zswap_max_pool_percent) */ 60 static u64 zswap_pool_limit_hit; 61 /* Pages written back when pool limit was reached */ 62 static u64 zswap_written_back_pages; 63 /* Store failed due to a reclaim failure after pool limit was reached */ 64 static u64 zswap_reject_reclaim_fail; 65 /* Compressed page was too big for the allocator to (optimally) store */ 66 static u64 zswap_reject_compress_poor; 67 /* Store failed because underlying allocator could not get memory */ 68 static u64 zswap_reject_alloc_fail; 69 /* Store failed because the entry metadata could not be allocated (rare) */ 70 static u64 zswap_reject_kmemcache_fail; 71 /* Duplicate store was encountered (rare) */ 72 static u64 zswap_duplicate_entry; 73 74 /* Shrinker work queue */ 75 static struct workqueue_struct *shrink_wq; 76 /* Pool limit was hit, we need to calm down */ 77 static bool zswap_pool_reached_full; 78 79 /********************************* 80 * tunables 81 **********************************/ 82 83 #define ZSWAP_PARAM_UNSET "" 84 85 static int zswap_setup(void); 86 87 /* Enable/disable zswap */ 88 static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); 89 static int zswap_enabled_param_set(const char *, 90 const struct kernel_param *); 91 static const struct kernel_param_ops zswap_enabled_param_ops = { 92 .set = zswap_enabled_param_set, 93 .get = param_get_bool, 94 }; 95 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); 96 97 /* Crypto compressor to use */ 98 static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 99 static int zswap_compressor_param_set(const char *, 100 const struct kernel_param *); 101 static const struct kernel_param_ops zswap_compressor_param_ops = { 102 .set = zswap_compressor_param_set, 103 .get = param_get_charp, 104 .free = param_free_charp, 105 }; 106 module_param_cb(compressor, &zswap_compressor_param_ops, 107 &zswap_compressor, 0644); 108 109 /* Compressed storage zpool to use */ 110 static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 111 static int zswap_zpool_param_set(const char *, const struct kernel_param *); 112 static const struct kernel_param_ops zswap_zpool_param_ops = { 113 .set = zswap_zpool_param_set, 114 .get = param_get_charp, 115 .free = param_free_charp, 116 }; 117 module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); 118 119 /* The maximum percentage of memory that the compressed pool can occupy */ 120 static unsigned int zswap_max_pool_percent = 20; 121 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); 122 123 /* The threshold for accepting new pages after the max_pool_percent was hit */ 124 static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ 125 module_param_named(accept_threshold_percent, zswap_accept_thr_percent, 126 uint, 0644); 127 128 /* 129 * Enable/disable handling same-value filled pages (enabled by default). 130 * If disabled every page is considered non-same-value filled. 131 */ 132 static bool zswap_same_filled_pages_enabled = true; 133 module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled, 134 bool, 0644); 135 136 /* Enable/disable handling non-same-value filled pages (enabled by default) */ 137 static bool zswap_non_same_filled_pages_enabled = true; 138 module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled, 139 bool, 0644); 140 141 static bool zswap_exclusive_loads_enabled = IS_ENABLED( 142 CONFIG_ZSWAP_EXCLUSIVE_LOADS_DEFAULT_ON); 143 module_param_named(exclusive_loads, zswap_exclusive_loads_enabled, bool, 0644); 144 145 /********************************* 146 * data structures 147 **********************************/ 148 149 struct crypto_acomp_ctx { 150 struct crypto_acomp *acomp; 151 struct acomp_req *req; 152 struct crypto_wait wait; 153 u8 *dstmem; 154 struct mutex *mutex; 155 }; 156 157 struct zswap_pool { 158 struct zpool *zpool; 159 struct crypto_acomp_ctx __percpu *acomp_ctx; 160 struct kref kref; 161 struct list_head list; 162 struct work_struct release_work; 163 struct work_struct shrink_work; 164 struct hlist_node node; 165 char tfm_name[CRYPTO_MAX_ALG_NAME]; 166 }; 167 168 /* 169 * struct zswap_entry 170 * 171 * This structure contains the metadata for tracking a single compressed 172 * page within zswap. 173 * 174 * rbnode - links the entry into red-black tree for the appropriate swap type 175 * offset - the swap offset for the entry. Index into the red-black tree. 176 * refcount - the number of outstanding reference to the entry. This is needed 177 * to protect against premature freeing of the entry by code 178 * concurrent calls to load, invalidate, and writeback. The lock 179 * for the zswap_tree structure that contains the entry must 180 * be held while changing the refcount. Since the lock must 181 * be held, there is no reason to also make refcount atomic. 182 * length - the length in bytes of the compressed page data. Needed during 183 * decompression. For a same value filled page length is 0. 184 * pool - the zswap_pool the entry's data is in 185 * handle - zpool allocation handle that stores the compressed page data 186 * value - value of the same-value filled pages which have same content 187 */ 188 struct zswap_entry { 189 struct rb_node rbnode; 190 pgoff_t offset; 191 int refcount; 192 unsigned int length; 193 struct zswap_pool *pool; 194 union { 195 unsigned long handle; 196 unsigned long value; 197 }; 198 struct obj_cgroup *objcg; 199 }; 200 201 struct zswap_header { 202 swp_entry_t swpentry; 203 }; 204 205 /* 206 * The tree lock in the zswap_tree struct protects a few things: 207 * - the rbtree 208 * - the refcount field of each entry in the tree 209 */ 210 struct zswap_tree { 211 struct rb_root rbroot; 212 spinlock_t lock; 213 }; 214 215 static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 216 217 /* RCU-protected iteration */ 218 static LIST_HEAD(zswap_pools); 219 /* protects zswap_pools list modification */ 220 static DEFINE_SPINLOCK(zswap_pools_lock); 221 /* pool counter to provide unique names to zpool */ 222 static atomic_t zswap_pools_count = ATOMIC_INIT(0); 223 224 enum zswap_init_type { 225 ZSWAP_UNINIT, 226 ZSWAP_INIT_SUCCEED, 227 ZSWAP_INIT_FAILED 228 }; 229 230 static enum zswap_init_type zswap_init_state; 231 232 /* used to ensure the integrity of initialization */ 233 static DEFINE_MUTEX(zswap_init_lock); 234 235 /* init completed, but couldn't create the initial pool */ 236 static bool zswap_has_pool; 237 238 /********************************* 239 * helpers and fwd declarations 240 **********************************/ 241 242 #define zswap_pool_debug(msg, p) \ 243 pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ 244 zpool_get_type((p)->zpool)) 245 246 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle); 247 static int zswap_pool_get(struct zswap_pool *pool); 248 static void zswap_pool_put(struct zswap_pool *pool); 249 250 static const struct zpool_ops zswap_zpool_ops = { 251 .evict = zswap_writeback_entry 252 }; 253 254 static bool zswap_is_full(void) 255 { 256 return totalram_pages() * zswap_max_pool_percent / 100 < 257 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 258 } 259 260 static bool zswap_can_accept(void) 261 { 262 return totalram_pages() * zswap_accept_thr_percent / 100 * 263 zswap_max_pool_percent / 100 > 264 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 265 } 266 267 static void zswap_update_total_size(void) 268 { 269 struct zswap_pool *pool; 270 u64 total = 0; 271 272 rcu_read_lock(); 273 274 list_for_each_entry_rcu(pool, &zswap_pools, list) 275 total += zpool_get_total_size(pool->zpool); 276 277 rcu_read_unlock(); 278 279 zswap_pool_total_size = total; 280 } 281 282 /********************************* 283 * zswap entry functions 284 **********************************/ 285 static struct kmem_cache *zswap_entry_cache; 286 287 static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp) 288 { 289 struct zswap_entry *entry; 290 entry = kmem_cache_alloc(zswap_entry_cache, gfp); 291 if (!entry) 292 return NULL; 293 entry->refcount = 1; 294 RB_CLEAR_NODE(&entry->rbnode); 295 return entry; 296 } 297 298 static void zswap_entry_cache_free(struct zswap_entry *entry) 299 { 300 kmem_cache_free(zswap_entry_cache, entry); 301 } 302 303 /********************************* 304 * rbtree functions 305 **********************************/ 306 static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 307 { 308 struct rb_node *node = root->rb_node; 309 struct zswap_entry *entry; 310 311 while (node) { 312 entry = rb_entry(node, struct zswap_entry, rbnode); 313 if (entry->offset > offset) 314 node = node->rb_left; 315 else if (entry->offset < offset) 316 node = node->rb_right; 317 else 318 return entry; 319 } 320 return NULL; 321 } 322 323 /* 324 * In the case that a entry with the same offset is found, a pointer to 325 * the existing entry is stored in dupentry and the function returns -EEXIST 326 */ 327 static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 328 struct zswap_entry **dupentry) 329 { 330 struct rb_node **link = &root->rb_node, *parent = NULL; 331 struct zswap_entry *myentry; 332 333 while (*link) { 334 parent = *link; 335 myentry = rb_entry(parent, struct zswap_entry, rbnode); 336 if (myentry->offset > entry->offset) 337 link = &(*link)->rb_left; 338 else if (myentry->offset < entry->offset) 339 link = &(*link)->rb_right; 340 else { 341 *dupentry = myentry; 342 return -EEXIST; 343 } 344 } 345 rb_link_node(&entry->rbnode, parent, link); 346 rb_insert_color(&entry->rbnode, root); 347 return 0; 348 } 349 350 static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 351 { 352 if (!RB_EMPTY_NODE(&entry->rbnode)) { 353 rb_erase(&entry->rbnode, root); 354 RB_CLEAR_NODE(&entry->rbnode); 355 } 356 } 357 358 /* 359 * Carries out the common pattern of freeing and entry's zpool allocation, 360 * freeing the entry itself, and decrementing the number of stored pages. 361 */ 362 static void zswap_free_entry(struct zswap_entry *entry) 363 { 364 if (entry->objcg) { 365 obj_cgroup_uncharge_zswap(entry->objcg, entry->length); 366 obj_cgroup_put(entry->objcg); 367 } 368 if (!entry->length) 369 atomic_dec(&zswap_same_filled_pages); 370 else { 371 zpool_free(entry->pool->zpool, entry->handle); 372 zswap_pool_put(entry->pool); 373 } 374 zswap_entry_cache_free(entry); 375 atomic_dec(&zswap_stored_pages); 376 zswap_update_total_size(); 377 } 378 379 /* caller must hold the tree lock */ 380 static void zswap_entry_get(struct zswap_entry *entry) 381 { 382 entry->refcount++; 383 } 384 385 /* caller must hold the tree lock 386 * remove from the tree and free it, if nobody reference the entry 387 */ 388 static void zswap_entry_put(struct zswap_tree *tree, 389 struct zswap_entry *entry) 390 { 391 int refcount = --entry->refcount; 392 393 BUG_ON(refcount < 0); 394 if (refcount == 0) { 395 zswap_rb_erase(&tree->rbroot, entry); 396 zswap_free_entry(entry); 397 } 398 } 399 400 /* caller must hold the tree lock */ 401 static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, 402 pgoff_t offset) 403 { 404 struct zswap_entry *entry; 405 406 entry = zswap_rb_search(root, offset); 407 if (entry) 408 zswap_entry_get(entry); 409 410 return entry; 411 } 412 413 /********************************* 414 * per-cpu code 415 **********************************/ 416 static DEFINE_PER_CPU(u8 *, zswap_dstmem); 417 /* 418 * If users dynamically change the zpool type and compressor at runtime, i.e. 419 * zswap is running, zswap can have more than one zpool on one cpu, but they 420 * are sharing dtsmem. So we need this mutex to be per-cpu. 421 */ 422 static DEFINE_PER_CPU(struct mutex *, zswap_mutex); 423 424 static int zswap_dstmem_prepare(unsigned int cpu) 425 { 426 struct mutex *mutex; 427 u8 *dst; 428 429 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 430 if (!dst) 431 return -ENOMEM; 432 433 mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu)); 434 if (!mutex) { 435 kfree(dst); 436 return -ENOMEM; 437 } 438 439 mutex_init(mutex); 440 per_cpu(zswap_dstmem, cpu) = dst; 441 per_cpu(zswap_mutex, cpu) = mutex; 442 return 0; 443 } 444 445 static int zswap_dstmem_dead(unsigned int cpu) 446 { 447 struct mutex *mutex; 448 u8 *dst; 449 450 mutex = per_cpu(zswap_mutex, cpu); 451 kfree(mutex); 452 per_cpu(zswap_mutex, cpu) = NULL; 453 454 dst = per_cpu(zswap_dstmem, cpu); 455 kfree(dst); 456 per_cpu(zswap_dstmem, cpu) = NULL; 457 458 return 0; 459 } 460 461 static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) 462 { 463 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 464 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 465 struct crypto_acomp *acomp; 466 struct acomp_req *req; 467 468 acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); 469 if (IS_ERR(acomp)) { 470 pr_err("could not alloc crypto acomp %s : %ld\n", 471 pool->tfm_name, PTR_ERR(acomp)); 472 return PTR_ERR(acomp); 473 } 474 acomp_ctx->acomp = acomp; 475 476 req = acomp_request_alloc(acomp_ctx->acomp); 477 if (!req) { 478 pr_err("could not alloc crypto acomp_request %s\n", 479 pool->tfm_name); 480 crypto_free_acomp(acomp_ctx->acomp); 481 return -ENOMEM; 482 } 483 acomp_ctx->req = req; 484 485 crypto_init_wait(&acomp_ctx->wait); 486 /* 487 * if the backend of acomp is async zip, crypto_req_done() will wakeup 488 * crypto_wait_req(); if the backend of acomp is scomp, the callback 489 * won't be called, crypto_wait_req() will return without blocking. 490 */ 491 acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 492 crypto_req_done, &acomp_ctx->wait); 493 494 acomp_ctx->mutex = per_cpu(zswap_mutex, cpu); 495 acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu); 496 497 return 0; 498 } 499 500 static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) 501 { 502 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 503 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 504 505 if (!IS_ERR_OR_NULL(acomp_ctx)) { 506 if (!IS_ERR_OR_NULL(acomp_ctx->req)) 507 acomp_request_free(acomp_ctx->req); 508 if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 509 crypto_free_acomp(acomp_ctx->acomp); 510 } 511 512 return 0; 513 } 514 515 /********************************* 516 * pool functions 517 **********************************/ 518 519 static struct zswap_pool *__zswap_pool_current(void) 520 { 521 struct zswap_pool *pool; 522 523 pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); 524 WARN_ONCE(!pool && zswap_has_pool, 525 "%s: no page storage pool!\n", __func__); 526 527 return pool; 528 } 529 530 static struct zswap_pool *zswap_pool_current(void) 531 { 532 assert_spin_locked(&zswap_pools_lock); 533 534 return __zswap_pool_current(); 535 } 536 537 static struct zswap_pool *zswap_pool_current_get(void) 538 { 539 struct zswap_pool *pool; 540 541 rcu_read_lock(); 542 543 pool = __zswap_pool_current(); 544 if (!zswap_pool_get(pool)) 545 pool = NULL; 546 547 rcu_read_unlock(); 548 549 return pool; 550 } 551 552 static struct zswap_pool *zswap_pool_last_get(void) 553 { 554 struct zswap_pool *pool, *last = NULL; 555 556 rcu_read_lock(); 557 558 list_for_each_entry_rcu(pool, &zswap_pools, list) 559 last = pool; 560 WARN_ONCE(!last && zswap_has_pool, 561 "%s: no page storage pool!\n", __func__); 562 if (!zswap_pool_get(last)) 563 last = NULL; 564 565 rcu_read_unlock(); 566 567 return last; 568 } 569 570 /* type and compressor must be null-terminated */ 571 static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) 572 { 573 struct zswap_pool *pool; 574 575 assert_spin_locked(&zswap_pools_lock); 576 577 list_for_each_entry_rcu(pool, &zswap_pools, list) { 578 if (strcmp(pool->tfm_name, compressor)) 579 continue; 580 if (strcmp(zpool_get_type(pool->zpool), type)) 581 continue; 582 /* if we can't get it, it's about to be destroyed */ 583 if (!zswap_pool_get(pool)) 584 continue; 585 return pool; 586 } 587 588 return NULL; 589 } 590 591 static void shrink_worker(struct work_struct *w) 592 { 593 struct zswap_pool *pool = container_of(w, typeof(*pool), 594 shrink_work); 595 int ret, failures = 0; 596 597 do { 598 ret = zpool_shrink(pool->zpool, 1, NULL); 599 if (ret) { 600 zswap_reject_reclaim_fail++; 601 if (ret != -EAGAIN) 602 break; 603 if (++failures == MAX_RECLAIM_RETRIES) 604 break; 605 } 606 cond_resched(); 607 } while (!zswap_can_accept()); 608 zswap_pool_put(pool); 609 } 610 611 static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 612 { 613 struct zswap_pool *pool; 614 char name[38]; /* 'zswap' + 32 char (max) num + \0 */ 615 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 616 int ret; 617 618 if (!zswap_has_pool) { 619 /* if either are unset, pool initialization failed, and we 620 * need both params to be set correctly before trying to 621 * create a pool. 622 */ 623 if (!strcmp(type, ZSWAP_PARAM_UNSET)) 624 return NULL; 625 if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) 626 return NULL; 627 } 628 629 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 630 if (!pool) 631 return NULL; 632 633 /* unique name for each pool specifically required by zsmalloc */ 634 snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); 635 636 pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops); 637 if (!pool->zpool) { 638 pr_err("%s zpool not available\n", type); 639 goto error; 640 } 641 pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); 642 643 strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); 644 645 pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); 646 if (!pool->acomp_ctx) { 647 pr_err("percpu alloc failed\n"); 648 goto error; 649 } 650 651 ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, 652 &pool->node); 653 if (ret) 654 goto error; 655 pr_debug("using %s compressor\n", pool->tfm_name); 656 657 /* being the current pool takes 1 ref; this func expects the 658 * caller to always add the new pool as the current pool 659 */ 660 kref_init(&pool->kref); 661 INIT_LIST_HEAD(&pool->list); 662 INIT_WORK(&pool->shrink_work, shrink_worker); 663 664 zswap_pool_debug("created", pool); 665 666 return pool; 667 668 error: 669 if (pool->acomp_ctx) 670 free_percpu(pool->acomp_ctx); 671 if (pool->zpool) 672 zpool_destroy_pool(pool->zpool); 673 kfree(pool); 674 return NULL; 675 } 676 677 static struct zswap_pool *__zswap_pool_create_fallback(void) 678 { 679 bool has_comp, has_zpool; 680 681 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 682 if (!has_comp && strcmp(zswap_compressor, 683 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { 684 pr_err("compressor %s not available, using default %s\n", 685 zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); 686 param_free_charp(&zswap_compressor); 687 zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 688 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 689 } 690 if (!has_comp) { 691 pr_err("default compressor %s not available\n", 692 zswap_compressor); 693 param_free_charp(&zswap_compressor); 694 zswap_compressor = ZSWAP_PARAM_UNSET; 695 } 696 697 has_zpool = zpool_has_pool(zswap_zpool_type); 698 if (!has_zpool && strcmp(zswap_zpool_type, 699 CONFIG_ZSWAP_ZPOOL_DEFAULT)) { 700 pr_err("zpool %s not available, using default %s\n", 701 zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); 702 param_free_charp(&zswap_zpool_type); 703 zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 704 has_zpool = zpool_has_pool(zswap_zpool_type); 705 } 706 if (!has_zpool) { 707 pr_err("default zpool %s not available\n", 708 zswap_zpool_type); 709 param_free_charp(&zswap_zpool_type); 710 zswap_zpool_type = ZSWAP_PARAM_UNSET; 711 } 712 713 if (!has_comp || !has_zpool) 714 return NULL; 715 716 return zswap_pool_create(zswap_zpool_type, zswap_compressor); 717 } 718 719 static void zswap_pool_destroy(struct zswap_pool *pool) 720 { 721 zswap_pool_debug("destroying", pool); 722 723 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 724 free_percpu(pool->acomp_ctx); 725 zpool_destroy_pool(pool->zpool); 726 kfree(pool); 727 } 728 729 static int __must_check zswap_pool_get(struct zswap_pool *pool) 730 { 731 if (!pool) 732 return 0; 733 734 return kref_get_unless_zero(&pool->kref); 735 } 736 737 static void __zswap_pool_release(struct work_struct *work) 738 { 739 struct zswap_pool *pool = container_of(work, typeof(*pool), 740 release_work); 741 742 synchronize_rcu(); 743 744 /* nobody should have been able to get a kref... */ 745 WARN_ON(kref_get_unless_zero(&pool->kref)); 746 747 /* pool is now off zswap_pools list and has no references. */ 748 zswap_pool_destroy(pool); 749 } 750 751 static void __zswap_pool_empty(struct kref *kref) 752 { 753 struct zswap_pool *pool; 754 755 pool = container_of(kref, typeof(*pool), kref); 756 757 spin_lock(&zswap_pools_lock); 758 759 WARN_ON(pool == zswap_pool_current()); 760 761 list_del_rcu(&pool->list); 762 763 INIT_WORK(&pool->release_work, __zswap_pool_release); 764 schedule_work(&pool->release_work); 765 766 spin_unlock(&zswap_pools_lock); 767 } 768 769 static void zswap_pool_put(struct zswap_pool *pool) 770 { 771 kref_put(&pool->kref, __zswap_pool_empty); 772 } 773 774 /********************************* 775 * param callbacks 776 **********************************/ 777 778 static bool zswap_pool_changed(const char *s, const struct kernel_param *kp) 779 { 780 /* no change required */ 781 if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) 782 return false; 783 return true; 784 } 785 786 /* val must be a null-terminated string */ 787 static int __zswap_param_set(const char *val, const struct kernel_param *kp, 788 char *type, char *compressor) 789 { 790 struct zswap_pool *pool, *put_pool = NULL; 791 char *s = strstrip((char *)val); 792 int ret = 0; 793 bool new_pool = false; 794 795 mutex_lock(&zswap_init_lock); 796 switch (zswap_init_state) { 797 case ZSWAP_UNINIT: 798 /* if this is load-time (pre-init) param setting, 799 * don't create a pool; that's done during init. 800 */ 801 ret = param_set_charp(s, kp); 802 break; 803 case ZSWAP_INIT_SUCCEED: 804 new_pool = zswap_pool_changed(s, kp); 805 break; 806 case ZSWAP_INIT_FAILED: 807 pr_err("can't set param, initialization failed\n"); 808 ret = -ENODEV; 809 } 810 mutex_unlock(&zswap_init_lock); 811 812 /* no need to create a new pool, return directly */ 813 if (!new_pool) 814 return ret; 815 816 if (!type) { 817 if (!zpool_has_pool(s)) { 818 pr_err("zpool %s not available\n", s); 819 return -ENOENT; 820 } 821 type = s; 822 } else if (!compressor) { 823 if (!crypto_has_acomp(s, 0, 0)) { 824 pr_err("compressor %s not available\n", s); 825 return -ENOENT; 826 } 827 compressor = s; 828 } else { 829 WARN_ON(1); 830 return -EINVAL; 831 } 832 833 spin_lock(&zswap_pools_lock); 834 835 pool = zswap_pool_find_get(type, compressor); 836 if (pool) { 837 zswap_pool_debug("using existing", pool); 838 WARN_ON(pool == zswap_pool_current()); 839 list_del_rcu(&pool->list); 840 } 841 842 spin_unlock(&zswap_pools_lock); 843 844 if (!pool) 845 pool = zswap_pool_create(type, compressor); 846 847 if (pool) 848 ret = param_set_charp(s, kp); 849 else 850 ret = -EINVAL; 851 852 spin_lock(&zswap_pools_lock); 853 854 if (!ret) { 855 put_pool = zswap_pool_current(); 856 list_add_rcu(&pool->list, &zswap_pools); 857 zswap_has_pool = true; 858 } else if (pool) { 859 /* add the possibly pre-existing pool to the end of the pools 860 * list; if it's new (and empty) then it'll be removed and 861 * destroyed by the put after we drop the lock 862 */ 863 list_add_tail_rcu(&pool->list, &zswap_pools); 864 put_pool = pool; 865 } 866 867 spin_unlock(&zswap_pools_lock); 868 869 if (!zswap_has_pool && !pool) { 870 /* if initial pool creation failed, and this pool creation also 871 * failed, maybe both compressor and zpool params were bad. 872 * Allow changing this param, so pool creation will succeed 873 * when the other param is changed. We already verified this 874 * param is ok in the zpool_has_pool() or crypto_has_acomp() 875 * checks above. 876 */ 877 ret = param_set_charp(s, kp); 878 } 879 880 /* drop the ref from either the old current pool, 881 * or the new pool we failed to add 882 */ 883 if (put_pool) 884 zswap_pool_put(put_pool); 885 886 return ret; 887 } 888 889 static int zswap_compressor_param_set(const char *val, 890 const struct kernel_param *kp) 891 { 892 return __zswap_param_set(val, kp, zswap_zpool_type, NULL); 893 } 894 895 static int zswap_zpool_param_set(const char *val, 896 const struct kernel_param *kp) 897 { 898 return __zswap_param_set(val, kp, NULL, zswap_compressor); 899 } 900 901 static int zswap_enabled_param_set(const char *val, 902 const struct kernel_param *kp) 903 { 904 int ret = -ENODEV; 905 906 /* if this is load-time (pre-init) param setting, only set param. */ 907 if (system_state != SYSTEM_RUNNING) 908 return param_set_bool(val, kp); 909 910 mutex_lock(&zswap_init_lock); 911 switch (zswap_init_state) { 912 case ZSWAP_UNINIT: 913 if (zswap_setup()) 914 break; 915 fallthrough; 916 case ZSWAP_INIT_SUCCEED: 917 if (!zswap_has_pool) 918 pr_err("can't enable, no pool configured\n"); 919 else 920 ret = param_set_bool(val, kp); 921 break; 922 case ZSWAP_INIT_FAILED: 923 pr_err("can't enable, initialization failed\n"); 924 } 925 mutex_unlock(&zswap_init_lock); 926 927 return ret; 928 } 929 930 /********************************* 931 * writeback code 932 **********************************/ 933 /* return enum for zswap_get_swap_cache_page */ 934 enum zswap_get_swap_ret { 935 ZSWAP_SWAPCACHE_NEW, 936 ZSWAP_SWAPCACHE_EXIST, 937 ZSWAP_SWAPCACHE_FAIL, 938 }; 939 940 /* 941 * zswap_get_swap_cache_page 942 * 943 * This is an adaption of read_swap_cache_async() 944 * 945 * This function tries to find a page with the given swap entry 946 * in the swapper_space address space (the swap cache). If the page 947 * is found, it is returned in retpage. Otherwise, a page is allocated, 948 * added to the swap cache, and returned in retpage. 949 * 950 * If success, the swap cache page is returned in retpage 951 * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache 952 * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated, 953 * the new page is added to swapcache and locked 954 * Returns ZSWAP_SWAPCACHE_FAIL on error 955 */ 956 static int zswap_get_swap_cache_page(swp_entry_t entry, 957 struct page **retpage) 958 { 959 bool page_was_allocated; 960 961 *retpage = __read_swap_cache_async(entry, GFP_KERNEL, 962 NULL, 0, &page_was_allocated); 963 if (page_was_allocated) 964 return ZSWAP_SWAPCACHE_NEW; 965 if (!*retpage) 966 return ZSWAP_SWAPCACHE_FAIL; 967 return ZSWAP_SWAPCACHE_EXIST; 968 } 969 970 /* 971 * Attempts to free an entry by adding a page to the swap cache, 972 * decompressing the entry data into the page, and issuing a 973 * bio write to write the page back to the swap device. 974 * 975 * This can be thought of as a "resumed writeback" of the page 976 * to the swap device. We are basically resuming the same swap 977 * writeback path that was intercepted with the frontswap_store() 978 * in the first place. After the page has been decompressed into 979 * the swap cache, the compressed version stored by zswap can be 980 * freed. 981 */ 982 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) 983 { 984 struct zswap_header *zhdr; 985 swp_entry_t swpentry; 986 struct zswap_tree *tree; 987 pgoff_t offset; 988 struct zswap_entry *entry; 989 struct page *page; 990 struct scatterlist input, output; 991 struct crypto_acomp_ctx *acomp_ctx; 992 993 u8 *src, *tmp = NULL; 994 unsigned int dlen; 995 int ret; 996 struct writeback_control wbc = { 997 .sync_mode = WB_SYNC_NONE, 998 }; 999 1000 if (!zpool_can_sleep_mapped(pool)) { 1001 tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); 1002 if (!tmp) 1003 return -ENOMEM; 1004 } 1005 1006 /* extract swpentry from data */ 1007 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); 1008 swpentry = zhdr->swpentry; /* here */ 1009 tree = zswap_trees[swp_type(swpentry)]; 1010 offset = swp_offset(swpentry); 1011 zpool_unmap_handle(pool, handle); 1012 1013 /* find and ref zswap entry */ 1014 spin_lock(&tree->lock); 1015 entry = zswap_entry_find_get(&tree->rbroot, offset); 1016 if (!entry) { 1017 /* entry was invalidated */ 1018 spin_unlock(&tree->lock); 1019 kfree(tmp); 1020 return 0; 1021 } 1022 spin_unlock(&tree->lock); 1023 BUG_ON(offset != entry->offset); 1024 1025 /* try to allocate swap cache page */ 1026 switch (zswap_get_swap_cache_page(swpentry, &page)) { 1027 case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */ 1028 ret = -ENOMEM; 1029 goto fail; 1030 1031 case ZSWAP_SWAPCACHE_EXIST: 1032 /* page is already in the swap cache, ignore for now */ 1033 put_page(page); 1034 ret = -EEXIST; 1035 goto fail; 1036 1037 case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 1038 /* 1039 * Having a local reference to the zswap entry doesn't exclude 1040 * swapping from invalidating and recycling the swap slot. Once 1041 * the swapcache is secured against concurrent swapping to and 1042 * from the slot, recheck that the entry is still current before 1043 * writing. 1044 */ 1045 spin_lock(&tree->lock); 1046 if (zswap_rb_search(&tree->rbroot, entry->offset) != entry) { 1047 spin_unlock(&tree->lock); 1048 delete_from_swap_cache(page_folio(page)); 1049 ret = -ENOMEM; 1050 goto fail; 1051 } 1052 spin_unlock(&tree->lock); 1053 1054 /* decompress */ 1055 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1056 dlen = PAGE_SIZE; 1057 1058 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); 1059 src = (u8 *)zhdr + sizeof(struct zswap_header); 1060 if (!zpool_can_sleep_mapped(pool)) { 1061 memcpy(tmp, src, entry->length); 1062 src = tmp; 1063 zpool_unmap_handle(pool, handle); 1064 } 1065 1066 mutex_lock(acomp_ctx->mutex); 1067 sg_init_one(&input, src, entry->length); 1068 sg_init_table(&output, 1); 1069 sg_set_page(&output, page, PAGE_SIZE, 0); 1070 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1071 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1072 dlen = acomp_ctx->req->dlen; 1073 mutex_unlock(acomp_ctx->mutex); 1074 1075 if (!zpool_can_sleep_mapped(pool)) 1076 kfree(tmp); 1077 else 1078 zpool_unmap_handle(pool, handle); 1079 1080 BUG_ON(ret); 1081 BUG_ON(dlen != PAGE_SIZE); 1082 1083 /* page is up to date */ 1084 SetPageUptodate(page); 1085 } 1086 1087 /* move it to the tail of the inactive list after end_writeback */ 1088 SetPageReclaim(page); 1089 1090 /* start writeback */ 1091 __swap_writepage(page, &wbc); 1092 put_page(page); 1093 zswap_written_back_pages++; 1094 1095 spin_lock(&tree->lock); 1096 /* drop local reference */ 1097 zswap_entry_put(tree, entry); 1098 1099 /* 1100 * There are two possible situations for entry here: 1101 * (1) refcount is 1(normal case), entry is valid and on the tree 1102 * (2) refcount is 0, entry is freed and not on the tree 1103 * because invalidate happened during writeback 1104 * search the tree and free the entry if find entry 1105 */ 1106 if (entry == zswap_rb_search(&tree->rbroot, offset)) 1107 zswap_entry_put(tree, entry); 1108 spin_unlock(&tree->lock); 1109 1110 return ret; 1111 1112 fail: 1113 if (!zpool_can_sleep_mapped(pool)) 1114 kfree(tmp); 1115 1116 /* 1117 * if we get here due to ZSWAP_SWAPCACHE_EXIST 1118 * a load may be happening concurrently. 1119 * it is safe and okay to not free the entry. 1120 * if we free the entry in the following put 1121 * it is also okay to return !0 1122 */ 1123 spin_lock(&tree->lock); 1124 zswap_entry_put(tree, entry); 1125 spin_unlock(&tree->lock); 1126 1127 return ret; 1128 } 1129 1130 static int zswap_is_page_same_filled(void *ptr, unsigned long *value) 1131 { 1132 unsigned long *page; 1133 unsigned long val; 1134 unsigned int pos, last_pos = PAGE_SIZE / sizeof(*page) - 1; 1135 1136 page = (unsigned long *)ptr; 1137 val = page[0]; 1138 1139 if (val != page[last_pos]) 1140 return 0; 1141 1142 for (pos = 1; pos < last_pos; pos++) { 1143 if (val != page[pos]) 1144 return 0; 1145 } 1146 1147 *value = val; 1148 1149 return 1; 1150 } 1151 1152 static void zswap_fill_page(void *ptr, unsigned long value) 1153 { 1154 unsigned long *page; 1155 1156 page = (unsigned long *)ptr; 1157 memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); 1158 } 1159 1160 /********************************* 1161 * frontswap hooks 1162 **********************************/ 1163 /* attempts to compress and store an single page */ 1164 static int zswap_frontswap_store(unsigned type, pgoff_t offset, 1165 struct page *page) 1166 { 1167 struct zswap_tree *tree = zswap_trees[type]; 1168 struct zswap_entry *entry, *dupentry; 1169 struct scatterlist input, output; 1170 struct crypto_acomp_ctx *acomp_ctx; 1171 struct obj_cgroup *objcg = NULL; 1172 struct zswap_pool *pool; 1173 int ret; 1174 unsigned int hlen, dlen = PAGE_SIZE; 1175 unsigned long handle, value; 1176 char *buf; 1177 u8 *src, *dst; 1178 struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) }; 1179 gfp_t gfp; 1180 1181 /* THP isn't supported */ 1182 if (PageTransHuge(page)) { 1183 ret = -EINVAL; 1184 goto reject; 1185 } 1186 1187 if (!zswap_enabled || !tree) { 1188 ret = -ENODEV; 1189 goto reject; 1190 } 1191 1192 objcg = get_obj_cgroup_from_page(page); 1193 if (objcg && !obj_cgroup_may_zswap(objcg)) 1194 goto shrink; 1195 1196 /* reclaim space if needed */ 1197 if (zswap_is_full()) { 1198 zswap_pool_limit_hit++; 1199 zswap_pool_reached_full = true; 1200 goto shrink; 1201 } 1202 1203 if (zswap_pool_reached_full) { 1204 if (!zswap_can_accept()) { 1205 ret = -ENOMEM; 1206 goto shrink; 1207 } else 1208 zswap_pool_reached_full = false; 1209 } 1210 1211 /* allocate entry */ 1212 entry = zswap_entry_cache_alloc(GFP_KERNEL); 1213 if (!entry) { 1214 zswap_reject_kmemcache_fail++; 1215 ret = -ENOMEM; 1216 goto reject; 1217 } 1218 1219 if (zswap_same_filled_pages_enabled) { 1220 src = kmap_atomic(page); 1221 if (zswap_is_page_same_filled(src, &value)) { 1222 kunmap_atomic(src); 1223 entry->offset = offset; 1224 entry->length = 0; 1225 entry->value = value; 1226 atomic_inc(&zswap_same_filled_pages); 1227 goto insert_entry; 1228 } 1229 kunmap_atomic(src); 1230 } 1231 1232 if (!zswap_non_same_filled_pages_enabled) { 1233 ret = -EINVAL; 1234 goto freepage; 1235 } 1236 1237 /* if entry is successfully added, it keeps the reference */ 1238 entry->pool = zswap_pool_current_get(); 1239 if (!entry->pool) { 1240 ret = -EINVAL; 1241 goto freepage; 1242 } 1243 1244 /* compress */ 1245 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1246 1247 mutex_lock(acomp_ctx->mutex); 1248 1249 dst = acomp_ctx->dstmem; 1250 sg_init_table(&input, 1); 1251 sg_set_page(&input, page, PAGE_SIZE, 0); 1252 1253 /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */ 1254 sg_init_one(&output, dst, PAGE_SIZE * 2); 1255 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 1256 /* 1257 * it maybe looks a little bit silly that we send an asynchronous request, 1258 * then wait for its completion synchronously. This makes the process look 1259 * synchronous in fact. 1260 * Theoretically, acomp supports users send multiple acomp requests in one 1261 * acomp instance, then get those requests done simultaneously. but in this 1262 * case, frontswap actually does store and load page by page, there is no 1263 * existing method to send the second page before the first page is done 1264 * in one thread doing frontswap. 1265 * but in different threads running on different cpu, we have different 1266 * acomp instance, so multiple threads can do (de)compression in parallel. 1267 */ 1268 ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 1269 dlen = acomp_ctx->req->dlen; 1270 1271 if (ret) { 1272 ret = -EINVAL; 1273 goto put_dstmem; 1274 } 1275 1276 /* store */ 1277 hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0; 1278 gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1279 if (zpool_malloc_support_movable(entry->pool->zpool)) 1280 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 1281 ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle); 1282 if (ret == -ENOSPC) { 1283 zswap_reject_compress_poor++; 1284 goto put_dstmem; 1285 } 1286 if (ret) { 1287 zswap_reject_alloc_fail++; 1288 goto put_dstmem; 1289 } 1290 buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO); 1291 memcpy(buf, &zhdr, hlen); 1292 memcpy(buf + hlen, dst, dlen); 1293 zpool_unmap_handle(entry->pool->zpool, handle); 1294 mutex_unlock(acomp_ctx->mutex); 1295 1296 /* populate entry */ 1297 entry->offset = offset; 1298 entry->handle = handle; 1299 entry->length = dlen; 1300 1301 insert_entry: 1302 entry->objcg = objcg; 1303 if (objcg) { 1304 obj_cgroup_charge_zswap(objcg, entry->length); 1305 /* Account before objcg ref is moved to tree */ 1306 count_objcg_event(objcg, ZSWPOUT); 1307 } 1308 1309 /* map */ 1310 spin_lock(&tree->lock); 1311 do { 1312 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry); 1313 if (ret == -EEXIST) { 1314 zswap_duplicate_entry++; 1315 /* remove from rbtree */ 1316 zswap_rb_erase(&tree->rbroot, dupentry); 1317 zswap_entry_put(tree, dupentry); 1318 } 1319 } while (ret == -EEXIST); 1320 spin_unlock(&tree->lock); 1321 1322 /* update stats */ 1323 atomic_inc(&zswap_stored_pages); 1324 zswap_update_total_size(); 1325 count_vm_event(ZSWPOUT); 1326 1327 return 0; 1328 1329 put_dstmem: 1330 mutex_unlock(acomp_ctx->mutex); 1331 zswap_pool_put(entry->pool); 1332 freepage: 1333 zswap_entry_cache_free(entry); 1334 reject: 1335 if (objcg) 1336 obj_cgroup_put(objcg); 1337 return ret; 1338 1339 shrink: 1340 pool = zswap_pool_last_get(); 1341 if (pool) 1342 queue_work(shrink_wq, &pool->shrink_work); 1343 ret = -ENOMEM; 1344 goto reject; 1345 } 1346 1347 static void zswap_invalidate_entry(struct zswap_tree *tree, 1348 struct zswap_entry *entry) 1349 { 1350 /* remove from rbtree */ 1351 zswap_rb_erase(&tree->rbroot, entry); 1352 1353 /* drop the initial reference from entry creation */ 1354 zswap_entry_put(tree, entry); 1355 } 1356 1357 /* 1358 * returns 0 if the page was successfully decompressed 1359 * return -1 on entry not found or error 1360 */ 1361 static int zswap_frontswap_load(unsigned type, pgoff_t offset, 1362 struct page *page, bool *exclusive) 1363 { 1364 struct zswap_tree *tree = zswap_trees[type]; 1365 struct zswap_entry *entry; 1366 struct scatterlist input, output; 1367 struct crypto_acomp_ctx *acomp_ctx; 1368 u8 *src, *dst, *tmp; 1369 unsigned int dlen; 1370 int ret; 1371 1372 /* find */ 1373 spin_lock(&tree->lock); 1374 entry = zswap_entry_find_get(&tree->rbroot, offset); 1375 if (!entry) { 1376 /* entry was written back */ 1377 spin_unlock(&tree->lock); 1378 return -1; 1379 } 1380 spin_unlock(&tree->lock); 1381 1382 if (!entry->length) { 1383 dst = kmap_atomic(page); 1384 zswap_fill_page(dst, entry->value); 1385 kunmap_atomic(dst); 1386 ret = 0; 1387 goto stats; 1388 } 1389 1390 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1391 tmp = kmalloc(entry->length, GFP_KERNEL); 1392 if (!tmp) { 1393 ret = -ENOMEM; 1394 goto freeentry; 1395 } 1396 } 1397 1398 /* decompress */ 1399 dlen = PAGE_SIZE; 1400 src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO); 1401 if (zpool_evictable(entry->pool->zpool)) 1402 src += sizeof(struct zswap_header); 1403 1404 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1405 memcpy(tmp, src, entry->length); 1406 src = tmp; 1407 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1408 } 1409 1410 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1411 mutex_lock(acomp_ctx->mutex); 1412 sg_init_one(&input, src, entry->length); 1413 sg_init_table(&output, 1); 1414 sg_set_page(&output, page, PAGE_SIZE, 0); 1415 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1416 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1417 mutex_unlock(acomp_ctx->mutex); 1418 1419 if (zpool_can_sleep_mapped(entry->pool->zpool)) 1420 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1421 else 1422 kfree(tmp); 1423 1424 BUG_ON(ret); 1425 stats: 1426 count_vm_event(ZSWPIN); 1427 if (entry->objcg) 1428 count_objcg_event(entry->objcg, ZSWPIN); 1429 freeentry: 1430 spin_lock(&tree->lock); 1431 zswap_entry_put(tree, entry); 1432 if (!ret && zswap_exclusive_loads_enabled) { 1433 zswap_invalidate_entry(tree, entry); 1434 *exclusive = true; 1435 } 1436 spin_unlock(&tree->lock); 1437 1438 return ret; 1439 } 1440 1441 /* frees an entry in zswap */ 1442 static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset) 1443 { 1444 struct zswap_tree *tree = zswap_trees[type]; 1445 struct zswap_entry *entry; 1446 1447 /* find */ 1448 spin_lock(&tree->lock); 1449 entry = zswap_rb_search(&tree->rbroot, offset); 1450 if (!entry) { 1451 /* entry was written back */ 1452 spin_unlock(&tree->lock); 1453 return; 1454 } 1455 zswap_invalidate_entry(tree, entry); 1456 spin_unlock(&tree->lock); 1457 } 1458 1459 /* frees all zswap entries for the given swap type */ 1460 static void zswap_frontswap_invalidate_area(unsigned type) 1461 { 1462 struct zswap_tree *tree = zswap_trees[type]; 1463 struct zswap_entry *entry, *n; 1464 1465 if (!tree) 1466 return; 1467 1468 /* walk the tree and free everything */ 1469 spin_lock(&tree->lock); 1470 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) 1471 zswap_free_entry(entry); 1472 tree->rbroot = RB_ROOT; 1473 spin_unlock(&tree->lock); 1474 kfree(tree); 1475 zswap_trees[type] = NULL; 1476 } 1477 1478 static void zswap_frontswap_init(unsigned type) 1479 { 1480 struct zswap_tree *tree; 1481 1482 tree = kzalloc(sizeof(*tree), GFP_KERNEL); 1483 if (!tree) { 1484 pr_err("alloc failed, zswap disabled for swap type %d\n", type); 1485 return; 1486 } 1487 1488 tree->rbroot = RB_ROOT; 1489 spin_lock_init(&tree->lock); 1490 zswap_trees[type] = tree; 1491 } 1492 1493 static const struct frontswap_ops zswap_frontswap_ops = { 1494 .store = zswap_frontswap_store, 1495 .load = zswap_frontswap_load, 1496 .invalidate_page = zswap_frontswap_invalidate_page, 1497 .invalidate_area = zswap_frontswap_invalidate_area, 1498 .init = zswap_frontswap_init 1499 }; 1500 1501 /********************************* 1502 * debugfs functions 1503 **********************************/ 1504 #ifdef CONFIG_DEBUG_FS 1505 #include <linux/debugfs.h> 1506 1507 static struct dentry *zswap_debugfs_root; 1508 1509 static int zswap_debugfs_init(void) 1510 { 1511 if (!debugfs_initialized()) 1512 return -ENODEV; 1513 1514 zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 1515 1516 debugfs_create_u64("pool_limit_hit", 0444, 1517 zswap_debugfs_root, &zswap_pool_limit_hit); 1518 debugfs_create_u64("reject_reclaim_fail", 0444, 1519 zswap_debugfs_root, &zswap_reject_reclaim_fail); 1520 debugfs_create_u64("reject_alloc_fail", 0444, 1521 zswap_debugfs_root, &zswap_reject_alloc_fail); 1522 debugfs_create_u64("reject_kmemcache_fail", 0444, 1523 zswap_debugfs_root, &zswap_reject_kmemcache_fail); 1524 debugfs_create_u64("reject_compress_poor", 0444, 1525 zswap_debugfs_root, &zswap_reject_compress_poor); 1526 debugfs_create_u64("written_back_pages", 0444, 1527 zswap_debugfs_root, &zswap_written_back_pages); 1528 debugfs_create_u64("duplicate_entry", 0444, 1529 zswap_debugfs_root, &zswap_duplicate_entry); 1530 debugfs_create_u64("pool_total_size", 0444, 1531 zswap_debugfs_root, &zswap_pool_total_size); 1532 debugfs_create_atomic_t("stored_pages", 0444, 1533 zswap_debugfs_root, &zswap_stored_pages); 1534 debugfs_create_atomic_t("same_filled_pages", 0444, 1535 zswap_debugfs_root, &zswap_same_filled_pages); 1536 1537 return 0; 1538 } 1539 #else 1540 static int zswap_debugfs_init(void) 1541 { 1542 return 0; 1543 } 1544 #endif 1545 1546 /********************************* 1547 * module init and exit 1548 **********************************/ 1549 static int zswap_setup(void) 1550 { 1551 struct zswap_pool *pool; 1552 int ret; 1553 1554 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 1555 if (!zswap_entry_cache) { 1556 pr_err("entry cache creation failed\n"); 1557 goto cache_fail; 1558 } 1559 1560 ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare", 1561 zswap_dstmem_prepare, zswap_dstmem_dead); 1562 if (ret) { 1563 pr_err("dstmem alloc failed\n"); 1564 goto dstmem_fail; 1565 } 1566 1567 ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, 1568 "mm/zswap_pool:prepare", 1569 zswap_cpu_comp_prepare, 1570 zswap_cpu_comp_dead); 1571 if (ret) 1572 goto hp_fail; 1573 1574 pool = __zswap_pool_create_fallback(); 1575 if (pool) { 1576 pr_info("loaded using pool %s/%s\n", pool->tfm_name, 1577 zpool_get_type(pool->zpool)); 1578 list_add(&pool->list, &zswap_pools); 1579 zswap_has_pool = true; 1580 } else { 1581 pr_err("pool creation failed\n"); 1582 zswap_enabled = false; 1583 } 1584 1585 shrink_wq = create_workqueue("zswap-shrink"); 1586 if (!shrink_wq) 1587 goto fallback_fail; 1588 1589 ret = frontswap_register_ops(&zswap_frontswap_ops); 1590 if (ret) 1591 goto destroy_wq; 1592 if (zswap_debugfs_init()) 1593 pr_warn("debugfs initialization failed\n"); 1594 zswap_init_state = ZSWAP_INIT_SUCCEED; 1595 return 0; 1596 1597 destroy_wq: 1598 destroy_workqueue(shrink_wq); 1599 fallback_fail: 1600 if (pool) 1601 zswap_pool_destroy(pool); 1602 hp_fail: 1603 cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE); 1604 dstmem_fail: 1605 kmem_cache_destroy(zswap_entry_cache); 1606 cache_fail: 1607 /* if built-in, we aren't unloaded on failure; don't allow use */ 1608 zswap_init_state = ZSWAP_INIT_FAILED; 1609 zswap_enabled = false; 1610 return -ENOMEM; 1611 } 1612 1613 static int __init zswap_init(void) 1614 { 1615 if (!zswap_enabled) 1616 return 0; 1617 return zswap_setup(); 1618 } 1619 /* must be late so crypto has time to come up */ 1620 late_initcall(zswap_init); 1621 1622 MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 1623 MODULE_DESCRIPTION("Compressed cache for swap pages"); 1624