1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * zswap.c - zswap driver file 4 * 5 * zswap is a backend for frontswap that takes pages that are in the process 6 * of being swapped out and attempts to compress and store them in a 7 * RAM-based memory pool. This can result in a significant I/O reduction on 8 * the swap device and, in the case where decompressing from RAM is faster 9 * than reading from the swap device, can also improve workload performance. 10 * 11 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/module.h> 17 #include <linux/cpu.h> 18 #include <linux/highmem.h> 19 #include <linux/slab.h> 20 #include <linux/spinlock.h> 21 #include <linux/types.h> 22 #include <linux/atomic.h> 23 #include <linux/frontswap.h> 24 #include <linux/rbtree.h> 25 #include <linux/swap.h> 26 #include <linux/crypto.h> 27 #include <linux/scatterlist.h> 28 #include <linux/mempool.h> 29 #include <linux/zpool.h> 30 #include <crypto/acompress.h> 31 32 #include <linux/mm_types.h> 33 #include <linux/page-flags.h> 34 #include <linux/swapops.h> 35 #include <linux/writeback.h> 36 #include <linux/pagemap.h> 37 #include <linux/workqueue.h> 38 39 #include "swap.h" 40 41 /********************************* 42 * statistics 43 **********************************/ 44 /* Total bytes used by the compressed storage */ 45 u64 zswap_pool_total_size; 46 /* The number of compressed pages currently stored in zswap */ 47 atomic_t zswap_stored_pages = ATOMIC_INIT(0); 48 /* The number of same-value filled pages currently stored in zswap */ 49 static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); 50 51 /* 52 * The statistics below are not protected from concurrent access for 53 * performance reasons so they may not be a 100% accurate. However, 54 * they do provide useful information on roughly how many times a 55 * certain event is occurring. 56 */ 57 58 /* Pool limit was hit (see zswap_max_pool_percent) */ 59 static u64 zswap_pool_limit_hit; 60 /* Pages written back when pool limit was reached */ 61 static u64 zswap_written_back_pages; 62 /* Store failed due to a reclaim failure after pool limit was reached */ 63 static u64 zswap_reject_reclaim_fail; 64 /* Compressed page was too big for the allocator to (optimally) store */ 65 static u64 zswap_reject_compress_poor; 66 /* Store failed because underlying allocator could not get memory */ 67 static u64 zswap_reject_alloc_fail; 68 /* Store failed because the entry metadata could not be allocated (rare) */ 69 static u64 zswap_reject_kmemcache_fail; 70 /* Duplicate store was encountered (rare) */ 71 static u64 zswap_duplicate_entry; 72 73 /* Shrinker work queue */ 74 static struct workqueue_struct *shrink_wq; 75 /* Pool limit was hit, we need to calm down */ 76 static bool zswap_pool_reached_full; 77 78 /********************************* 79 * tunables 80 **********************************/ 81 82 #define ZSWAP_PARAM_UNSET "" 83 84 /* Enable/disable zswap */ 85 static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); 86 static int zswap_enabled_param_set(const char *, 87 const struct kernel_param *); 88 static const struct kernel_param_ops zswap_enabled_param_ops = { 89 .set = zswap_enabled_param_set, 90 .get = param_get_bool, 91 }; 92 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); 93 94 /* Crypto compressor to use */ 95 static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 96 static int zswap_compressor_param_set(const char *, 97 const struct kernel_param *); 98 static const struct kernel_param_ops zswap_compressor_param_ops = { 99 .set = zswap_compressor_param_set, 100 .get = param_get_charp, 101 .free = param_free_charp, 102 }; 103 module_param_cb(compressor, &zswap_compressor_param_ops, 104 &zswap_compressor, 0644); 105 106 /* Compressed storage zpool to use */ 107 static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 108 static int zswap_zpool_param_set(const char *, const struct kernel_param *); 109 static const struct kernel_param_ops zswap_zpool_param_ops = { 110 .set = zswap_zpool_param_set, 111 .get = param_get_charp, 112 .free = param_free_charp, 113 }; 114 module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); 115 116 /* The maximum percentage of memory that the compressed pool can occupy */ 117 static unsigned int zswap_max_pool_percent = 20; 118 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); 119 120 /* The threshold for accepting new pages after the max_pool_percent was hit */ 121 static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ 122 module_param_named(accept_threshold_percent, zswap_accept_thr_percent, 123 uint, 0644); 124 125 /* 126 * Enable/disable handling same-value filled pages (enabled by default). 127 * If disabled every page is considered non-same-value filled. 128 */ 129 static bool zswap_same_filled_pages_enabled = true; 130 module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled, 131 bool, 0644); 132 133 /* Enable/disable handling non-same-value filled pages (enabled by default) */ 134 static bool zswap_non_same_filled_pages_enabled = true; 135 module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled, 136 bool, 0644); 137 138 /********************************* 139 * data structures 140 **********************************/ 141 142 struct crypto_acomp_ctx { 143 struct crypto_acomp *acomp; 144 struct acomp_req *req; 145 struct crypto_wait wait; 146 u8 *dstmem; 147 struct mutex *mutex; 148 }; 149 150 struct zswap_pool { 151 struct zpool *zpool; 152 struct crypto_acomp_ctx __percpu *acomp_ctx; 153 struct kref kref; 154 struct list_head list; 155 struct work_struct release_work; 156 struct work_struct shrink_work; 157 struct hlist_node node; 158 char tfm_name[CRYPTO_MAX_ALG_NAME]; 159 }; 160 161 /* 162 * struct zswap_entry 163 * 164 * This structure contains the metadata for tracking a single compressed 165 * page within zswap. 166 * 167 * rbnode - links the entry into red-black tree for the appropriate swap type 168 * offset - the swap offset for the entry. Index into the red-black tree. 169 * refcount - the number of outstanding reference to the entry. This is needed 170 * to protect against premature freeing of the entry by code 171 * concurrent calls to load, invalidate, and writeback. The lock 172 * for the zswap_tree structure that contains the entry must 173 * be held while changing the refcount. Since the lock must 174 * be held, there is no reason to also make refcount atomic. 175 * length - the length in bytes of the compressed page data. Needed during 176 * decompression. For a same value filled page length is 0. 177 * pool - the zswap_pool the entry's data is in 178 * handle - zpool allocation handle that stores the compressed page data 179 * value - value of the same-value filled pages which have same content 180 */ 181 struct zswap_entry { 182 struct rb_node rbnode; 183 pgoff_t offset; 184 int refcount; 185 unsigned int length; 186 struct zswap_pool *pool; 187 union { 188 unsigned long handle; 189 unsigned long value; 190 }; 191 struct obj_cgroup *objcg; 192 }; 193 194 struct zswap_header { 195 swp_entry_t swpentry; 196 }; 197 198 /* 199 * The tree lock in the zswap_tree struct protects a few things: 200 * - the rbtree 201 * - the refcount field of each entry in the tree 202 */ 203 struct zswap_tree { 204 struct rb_root rbroot; 205 spinlock_t lock; 206 }; 207 208 static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 209 210 /* RCU-protected iteration */ 211 static LIST_HEAD(zswap_pools); 212 /* protects zswap_pools list modification */ 213 static DEFINE_SPINLOCK(zswap_pools_lock); 214 /* pool counter to provide unique names to zpool */ 215 static atomic_t zswap_pools_count = ATOMIC_INIT(0); 216 217 /* used by param callback function */ 218 static bool zswap_init_started; 219 220 /* fatal error during init */ 221 static bool zswap_init_failed; 222 223 /* init completed, but couldn't create the initial pool */ 224 static bool zswap_has_pool; 225 226 /********************************* 227 * helpers and fwd declarations 228 **********************************/ 229 230 #define zswap_pool_debug(msg, p) \ 231 pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ 232 zpool_get_type((p)->zpool)) 233 234 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle); 235 static int zswap_pool_get(struct zswap_pool *pool); 236 static void zswap_pool_put(struct zswap_pool *pool); 237 238 static const struct zpool_ops zswap_zpool_ops = { 239 .evict = zswap_writeback_entry 240 }; 241 242 static bool zswap_is_full(void) 243 { 244 return totalram_pages() * zswap_max_pool_percent / 100 < 245 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 246 } 247 248 static bool zswap_can_accept(void) 249 { 250 return totalram_pages() * zswap_accept_thr_percent / 100 * 251 zswap_max_pool_percent / 100 > 252 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 253 } 254 255 static void zswap_update_total_size(void) 256 { 257 struct zswap_pool *pool; 258 u64 total = 0; 259 260 rcu_read_lock(); 261 262 list_for_each_entry_rcu(pool, &zswap_pools, list) 263 total += zpool_get_total_size(pool->zpool); 264 265 rcu_read_unlock(); 266 267 zswap_pool_total_size = total; 268 } 269 270 /********************************* 271 * zswap entry functions 272 **********************************/ 273 static struct kmem_cache *zswap_entry_cache; 274 275 static int __init zswap_entry_cache_create(void) 276 { 277 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 278 return zswap_entry_cache == NULL; 279 } 280 281 static void __init zswap_entry_cache_destroy(void) 282 { 283 kmem_cache_destroy(zswap_entry_cache); 284 } 285 286 static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp) 287 { 288 struct zswap_entry *entry; 289 entry = kmem_cache_alloc(zswap_entry_cache, gfp); 290 if (!entry) 291 return NULL; 292 entry->refcount = 1; 293 RB_CLEAR_NODE(&entry->rbnode); 294 return entry; 295 } 296 297 static void zswap_entry_cache_free(struct zswap_entry *entry) 298 { 299 kmem_cache_free(zswap_entry_cache, entry); 300 } 301 302 /********************************* 303 * rbtree functions 304 **********************************/ 305 static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 306 { 307 struct rb_node *node = root->rb_node; 308 struct zswap_entry *entry; 309 310 while (node) { 311 entry = rb_entry(node, struct zswap_entry, rbnode); 312 if (entry->offset > offset) 313 node = node->rb_left; 314 else if (entry->offset < offset) 315 node = node->rb_right; 316 else 317 return entry; 318 } 319 return NULL; 320 } 321 322 /* 323 * In the case that a entry with the same offset is found, a pointer to 324 * the existing entry is stored in dupentry and the function returns -EEXIST 325 */ 326 static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 327 struct zswap_entry **dupentry) 328 { 329 struct rb_node **link = &root->rb_node, *parent = NULL; 330 struct zswap_entry *myentry; 331 332 while (*link) { 333 parent = *link; 334 myentry = rb_entry(parent, struct zswap_entry, rbnode); 335 if (myentry->offset > entry->offset) 336 link = &(*link)->rb_left; 337 else if (myentry->offset < entry->offset) 338 link = &(*link)->rb_right; 339 else { 340 *dupentry = myentry; 341 return -EEXIST; 342 } 343 } 344 rb_link_node(&entry->rbnode, parent, link); 345 rb_insert_color(&entry->rbnode, root); 346 return 0; 347 } 348 349 static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 350 { 351 if (!RB_EMPTY_NODE(&entry->rbnode)) { 352 rb_erase(&entry->rbnode, root); 353 RB_CLEAR_NODE(&entry->rbnode); 354 } 355 } 356 357 /* 358 * Carries out the common pattern of freeing and entry's zpool allocation, 359 * freeing the entry itself, and decrementing the number of stored pages. 360 */ 361 static void zswap_free_entry(struct zswap_entry *entry) 362 { 363 if (entry->objcg) { 364 obj_cgroup_uncharge_zswap(entry->objcg, entry->length); 365 obj_cgroup_put(entry->objcg); 366 } 367 if (!entry->length) 368 atomic_dec(&zswap_same_filled_pages); 369 else { 370 zpool_free(entry->pool->zpool, entry->handle); 371 zswap_pool_put(entry->pool); 372 } 373 zswap_entry_cache_free(entry); 374 atomic_dec(&zswap_stored_pages); 375 zswap_update_total_size(); 376 } 377 378 /* caller must hold the tree lock */ 379 static void zswap_entry_get(struct zswap_entry *entry) 380 { 381 entry->refcount++; 382 } 383 384 /* caller must hold the tree lock 385 * remove from the tree and free it, if nobody reference the entry 386 */ 387 static void zswap_entry_put(struct zswap_tree *tree, 388 struct zswap_entry *entry) 389 { 390 int refcount = --entry->refcount; 391 392 BUG_ON(refcount < 0); 393 if (refcount == 0) { 394 zswap_rb_erase(&tree->rbroot, entry); 395 zswap_free_entry(entry); 396 } 397 } 398 399 /* caller must hold the tree lock */ 400 static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, 401 pgoff_t offset) 402 { 403 struct zswap_entry *entry; 404 405 entry = zswap_rb_search(root, offset); 406 if (entry) 407 zswap_entry_get(entry); 408 409 return entry; 410 } 411 412 /********************************* 413 * per-cpu code 414 **********************************/ 415 static DEFINE_PER_CPU(u8 *, zswap_dstmem); 416 /* 417 * If users dynamically change the zpool type and compressor at runtime, i.e. 418 * zswap is running, zswap can have more than one zpool on one cpu, but they 419 * are sharing dtsmem. So we need this mutex to be per-cpu. 420 */ 421 static DEFINE_PER_CPU(struct mutex *, zswap_mutex); 422 423 static int zswap_dstmem_prepare(unsigned int cpu) 424 { 425 struct mutex *mutex; 426 u8 *dst; 427 428 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 429 if (!dst) 430 return -ENOMEM; 431 432 mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu)); 433 if (!mutex) { 434 kfree(dst); 435 return -ENOMEM; 436 } 437 438 mutex_init(mutex); 439 per_cpu(zswap_dstmem, cpu) = dst; 440 per_cpu(zswap_mutex, cpu) = mutex; 441 return 0; 442 } 443 444 static int zswap_dstmem_dead(unsigned int cpu) 445 { 446 struct mutex *mutex; 447 u8 *dst; 448 449 mutex = per_cpu(zswap_mutex, cpu); 450 kfree(mutex); 451 per_cpu(zswap_mutex, cpu) = NULL; 452 453 dst = per_cpu(zswap_dstmem, cpu); 454 kfree(dst); 455 per_cpu(zswap_dstmem, cpu) = NULL; 456 457 return 0; 458 } 459 460 static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) 461 { 462 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 463 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 464 struct crypto_acomp *acomp; 465 struct acomp_req *req; 466 467 acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); 468 if (IS_ERR(acomp)) { 469 pr_err("could not alloc crypto acomp %s : %ld\n", 470 pool->tfm_name, PTR_ERR(acomp)); 471 return PTR_ERR(acomp); 472 } 473 acomp_ctx->acomp = acomp; 474 475 req = acomp_request_alloc(acomp_ctx->acomp); 476 if (!req) { 477 pr_err("could not alloc crypto acomp_request %s\n", 478 pool->tfm_name); 479 crypto_free_acomp(acomp_ctx->acomp); 480 return -ENOMEM; 481 } 482 acomp_ctx->req = req; 483 484 crypto_init_wait(&acomp_ctx->wait); 485 /* 486 * if the backend of acomp is async zip, crypto_req_done() will wakeup 487 * crypto_wait_req(); if the backend of acomp is scomp, the callback 488 * won't be called, crypto_wait_req() will return without blocking. 489 */ 490 acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 491 crypto_req_done, &acomp_ctx->wait); 492 493 acomp_ctx->mutex = per_cpu(zswap_mutex, cpu); 494 acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu); 495 496 return 0; 497 } 498 499 static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) 500 { 501 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 502 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 503 504 if (!IS_ERR_OR_NULL(acomp_ctx)) { 505 if (!IS_ERR_OR_NULL(acomp_ctx->req)) 506 acomp_request_free(acomp_ctx->req); 507 if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 508 crypto_free_acomp(acomp_ctx->acomp); 509 } 510 511 return 0; 512 } 513 514 /********************************* 515 * pool functions 516 **********************************/ 517 518 static struct zswap_pool *__zswap_pool_current(void) 519 { 520 struct zswap_pool *pool; 521 522 pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); 523 WARN_ONCE(!pool && zswap_has_pool, 524 "%s: no page storage pool!\n", __func__); 525 526 return pool; 527 } 528 529 static struct zswap_pool *zswap_pool_current(void) 530 { 531 assert_spin_locked(&zswap_pools_lock); 532 533 return __zswap_pool_current(); 534 } 535 536 static struct zswap_pool *zswap_pool_current_get(void) 537 { 538 struct zswap_pool *pool; 539 540 rcu_read_lock(); 541 542 pool = __zswap_pool_current(); 543 if (!zswap_pool_get(pool)) 544 pool = NULL; 545 546 rcu_read_unlock(); 547 548 return pool; 549 } 550 551 static struct zswap_pool *zswap_pool_last_get(void) 552 { 553 struct zswap_pool *pool, *last = NULL; 554 555 rcu_read_lock(); 556 557 list_for_each_entry_rcu(pool, &zswap_pools, list) 558 last = pool; 559 WARN_ONCE(!last && zswap_has_pool, 560 "%s: no page storage pool!\n", __func__); 561 if (!zswap_pool_get(last)) 562 last = NULL; 563 564 rcu_read_unlock(); 565 566 return last; 567 } 568 569 /* type and compressor must be null-terminated */ 570 static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) 571 { 572 struct zswap_pool *pool; 573 574 assert_spin_locked(&zswap_pools_lock); 575 576 list_for_each_entry_rcu(pool, &zswap_pools, list) { 577 if (strcmp(pool->tfm_name, compressor)) 578 continue; 579 if (strcmp(zpool_get_type(pool->zpool), type)) 580 continue; 581 /* if we can't get it, it's about to be destroyed */ 582 if (!zswap_pool_get(pool)) 583 continue; 584 return pool; 585 } 586 587 return NULL; 588 } 589 590 static void shrink_worker(struct work_struct *w) 591 { 592 struct zswap_pool *pool = container_of(w, typeof(*pool), 593 shrink_work); 594 595 if (zpool_shrink(pool->zpool, 1, NULL)) 596 zswap_reject_reclaim_fail++; 597 zswap_pool_put(pool); 598 } 599 600 static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 601 { 602 struct zswap_pool *pool; 603 char name[38]; /* 'zswap' + 32 char (max) num + \0 */ 604 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 605 int ret; 606 607 if (!zswap_has_pool) { 608 /* if either are unset, pool initialization failed, and we 609 * need both params to be set correctly before trying to 610 * create a pool. 611 */ 612 if (!strcmp(type, ZSWAP_PARAM_UNSET)) 613 return NULL; 614 if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) 615 return NULL; 616 } 617 618 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 619 if (!pool) 620 return NULL; 621 622 /* unique name for each pool specifically required by zsmalloc */ 623 snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); 624 625 pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops); 626 if (!pool->zpool) { 627 pr_err("%s zpool not available\n", type); 628 goto error; 629 } 630 pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); 631 632 strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); 633 634 pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); 635 if (!pool->acomp_ctx) { 636 pr_err("percpu alloc failed\n"); 637 goto error; 638 } 639 640 ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, 641 &pool->node); 642 if (ret) 643 goto error; 644 pr_debug("using %s compressor\n", pool->tfm_name); 645 646 /* being the current pool takes 1 ref; this func expects the 647 * caller to always add the new pool as the current pool 648 */ 649 kref_init(&pool->kref); 650 INIT_LIST_HEAD(&pool->list); 651 INIT_WORK(&pool->shrink_work, shrink_worker); 652 653 zswap_pool_debug("created", pool); 654 655 return pool; 656 657 error: 658 if (pool->acomp_ctx) 659 free_percpu(pool->acomp_ctx); 660 if (pool->zpool) 661 zpool_destroy_pool(pool->zpool); 662 kfree(pool); 663 return NULL; 664 } 665 666 static __init struct zswap_pool *__zswap_pool_create_fallback(void) 667 { 668 bool has_comp, has_zpool; 669 670 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 671 if (!has_comp && strcmp(zswap_compressor, 672 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { 673 pr_err("compressor %s not available, using default %s\n", 674 zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); 675 param_free_charp(&zswap_compressor); 676 zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 677 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 678 } 679 if (!has_comp) { 680 pr_err("default compressor %s not available\n", 681 zswap_compressor); 682 param_free_charp(&zswap_compressor); 683 zswap_compressor = ZSWAP_PARAM_UNSET; 684 } 685 686 has_zpool = zpool_has_pool(zswap_zpool_type); 687 if (!has_zpool && strcmp(zswap_zpool_type, 688 CONFIG_ZSWAP_ZPOOL_DEFAULT)) { 689 pr_err("zpool %s not available, using default %s\n", 690 zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); 691 param_free_charp(&zswap_zpool_type); 692 zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 693 has_zpool = zpool_has_pool(zswap_zpool_type); 694 } 695 if (!has_zpool) { 696 pr_err("default zpool %s not available\n", 697 zswap_zpool_type); 698 param_free_charp(&zswap_zpool_type); 699 zswap_zpool_type = ZSWAP_PARAM_UNSET; 700 } 701 702 if (!has_comp || !has_zpool) 703 return NULL; 704 705 return zswap_pool_create(zswap_zpool_type, zswap_compressor); 706 } 707 708 static void zswap_pool_destroy(struct zswap_pool *pool) 709 { 710 zswap_pool_debug("destroying", pool); 711 712 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 713 free_percpu(pool->acomp_ctx); 714 zpool_destroy_pool(pool->zpool); 715 kfree(pool); 716 } 717 718 static int __must_check zswap_pool_get(struct zswap_pool *pool) 719 { 720 if (!pool) 721 return 0; 722 723 return kref_get_unless_zero(&pool->kref); 724 } 725 726 static void __zswap_pool_release(struct work_struct *work) 727 { 728 struct zswap_pool *pool = container_of(work, typeof(*pool), 729 release_work); 730 731 synchronize_rcu(); 732 733 /* nobody should have been able to get a kref... */ 734 WARN_ON(kref_get_unless_zero(&pool->kref)); 735 736 /* pool is now off zswap_pools list and has no references. */ 737 zswap_pool_destroy(pool); 738 } 739 740 static void __zswap_pool_empty(struct kref *kref) 741 { 742 struct zswap_pool *pool; 743 744 pool = container_of(kref, typeof(*pool), kref); 745 746 spin_lock(&zswap_pools_lock); 747 748 WARN_ON(pool == zswap_pool_current()); 749 750 list_del_rcu(&pool->list); 751 752 INIT_WORK(&pool->release_work, __zswap_pool_release); 753 schedule_work(&pool->release_work); 754 755 spin_unlock(&zswap_pools_lock); 756 } 757 758 static void zswap_pool_put(struct zswap_pool *pool) 759 { 760 kref_put(&pool->kref, __zswap_pool_empty); 761 } 762 763 /********************************* 764 * param callbacks 765 **********************************/ 766 767 /* val must be a null-terminated string */ 768 static int __zswap_param_set(const char *val, const struct kernel_param *kp, 769 char *type, char *compressor) 770 { 771 struct zswap_pool *pool, *put_pool = NULL; 772 char *s = strstrip((char *)val); 773 int ret; 774 775 if (zswap_init_failed) { 776 pr_err("can't set param, initialization failed\n"); 777 return -ENODEV; 778 } 779 780 /* no change required */ 781 if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) 782 return 0; 783 784 /* if this is load-time (pre-init) param setting, 785 * don't create a pool; that's done during init. 786 */ 787 if (!zswap_init_started) 788 return param_set_charp(s, kp); 789 790 if (!type) { 791 if (!zpool_has_pool(s)) { 792 pr_err("zpool %s not available\n", s); 793 return -ENOENT; 794 } 795 type = s; 796 } else if (!compressor) { 797 if (!crypto_has_acomp(s, 0, 0)) { 798 pr_err("compressor %s not available\n", s); 799 return -ENOENT; 800 } 801 compressor = s; 802 } else { 803 WARN_ON(1); 804 return -EINVAL; 805 } 806 807 spin_lock(&zswap_pools_lock); 808 809 pool = zswap_pool_find_get(type, compressor); 810 if (pool) { 811 zswap_pool_debug("using existing", pool); 812 WARN_ON(pool == zswap_pool_current()); 813 list_del_rcu(&pool->list); 814 } 815 816 spin_unlock(&zswap_pools_lock); 817 818 if (!pool) 819 pool = zswap_pool_create(type, compressor); 820 821 if (pool) 822 ret = param_set_charp(s, kp); 823 else 824 ret = -EINVAL; 825 826 spin_lock(&zswap_pools_lock); 827 828 if (!ret) { 829 put_pool = zswap_pool_current(); 830 list_add_rcu(&pool->list, &zswap_pools); 831 zswap_has_pool = true; 832 } else if (pool) { 833 /* add the possibly pre-existing pool to the end of the pools 834 * list; if it's new (and empty) then it'll be removed and 835 * destroyed by the put after we drop the lock 836 */ 837 list_add_tail_rcu(&pool->list, &zswap_pools); 838 put_pool = pool; 839 } 840 841 spin_unlock(&zswap_pools_lock); 842 843 if (!zswap_has_pool && !pool) { 844 /* if initial pool creation failed, and this pool creation also 845 * failed, maybe both compressor and zpool params were bad. 846 * Allow changing this param, so pool creation will succeed 847 * when the other param is changed. We already verified this 848 * param is ok in the zpool_has_pool() or crypto_has_acomp() 849 * checks above. 850 */ 851 ret = param_set_charp(s, kp); 852 } 853 854 /* drop the ref from either the old current pool, 855 * or the new pool we failed to add 856 */ 857 if (put_pool) 858 zswap_pool_put(put_pool); 859 860 return ret; 861 } 862 863 static int zswap_compressor_param_set(const char *val, 864 const struct kernel_param *kp) 865 { 866 return __zswap_param_set(val, kp, zswap_zpool_type, NULL); 867 } 868 869 static int zswap_zpool_param_set(const char *val, 870 const struct kernel_param *kp) 871 { 872 return __zswap_param_set(val, kp, NULL, zswap_compressor); 873 } 874 875 static int zswap_enabled_param_set(const char *val, 876 const struct kernel_param *kp) 877 { 878 if (zswap_init_failed) { 879 pr_err("can't enable, initialization failed\n"); 880 return -ENODEV; 881 } 882 if (!zswap_has_pool && zswap_init_started) { 883 pr_err("can't enable, no pool configured\n"); 884 return -ENODEV; 885 } 886 887 return param_set_bool(val, kp); 888 } 889 890 /********************************* 891 * writeback code 892 **********************************/ 893 /* return enum for zswap_get_swap_cache_page */ 894 enum zswap_get_swap_ret { 895 ZSWAP_SWAPCACHE_NEW, 896 ZSWAP_SWAPCACHE_EXIST, 897 ZSWAP_SWAPCACHE_FAIL, 898 }; 899 900 /* 901 * zswap_get_swap_cache_page 902 * 903 * This is an adaption of read_swap_cache_async() 904 * 905 * This function tries to find a page with the given swap entry 906 * in the swapper_space address space (the swap cache). If the page 907 * is found, it is returned in retpage. Otherwise, a page is allocated, 908 * added to the swap cache, and returned in retpage. 909 * 910 * If success, the swap cache page is returned in retpage 911 * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache 912 * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated, 913 * the new page is added to swapcache and locked 914 * Returns ZSWAP_SWAPCACHE_FAIL on error 915 */ 916 static int zswap_get_swap_cache_page(swp_entry_t entry, 917 struct page **retpage) 918 { 919 bool page_was_allocated; 920 921 *retpage = __read_swap_cache_async(entry, GFP_KERNEL, 922 NULL, 0, &page_was_allocated); 923 if (page_was_allocated) 924 return ZSWAP_SWAPCACHE_NEW; 925 if (!*retpage) 926 return ZSWAP_SWAPCACHE_FAIL; 927 return ZSWAP_SWAPCACHE_EXIST; 928 } 929 930 /* 931 * Attempts to free an entry by adding a page to the swap cache, 932 * decompressing the entry data into the page, and issuing a 933 * bio write to write the page back to the swap device. 934 * 935 * This can be thought of as a "resumed writeback" of the page 936 * to the swap device. We are basically resuming the same swap 937 * writeback path that was intercepted with the frontswap_store() 938 * in the first place. After the page has been decompressed into 939 * the swap cache, the compressed version stored by zswap can be 940 * freed. 941 */ 942 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) 943 { 944 struct zswap_header *zhdr; 945 swp_entry_t swpentry; 946 struct zswap_tree *tree; 947 pgoff_t offset; 948 struct zswap_entry *entry; 949 struct page *page; 950 struct scatterlist input, output; 951 struct crypto_acomp_ctx *acomp_ctx; 952 953 u8 *src, *tmp = NULL; 954 unsigned int dlen; 955 int ret; 956 struct writeback_control wbc = { 957 .sync_mode = WB_SYNC_NONE, 958 }; 959 960 if (!zpool_can_sleep_mapped(pool)) { 961 tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); 962 if (!tmp) 963 return -ENOMEM; 964 } 965 966 /* extract swpentry from data */ 967 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); 968 swpentry = zhdr->swpentry; /* here */ 969 tree = zswap_trees[swp_type(swpentry)]; 970 offset = swp_offset(swpentry); 971 zpool_unmap_handle(pool, handle); 972 973 /* find and ref zswap entry */ 974 spin_lock(&tree->lock); 975 entry = zswap_entry_find_get(&tree->rbroot, offset); 976 if (!entry) { 977 /* entry was invalidated */ 978 spin_unlock(&tree->lock); 979 kfree(tmp); 980 return 0; 981 } 982 spin_unlock(&tree->lock); 983 BUG_ON(offset != entry->offset); 984 985 /* try to allocate swap cache page */ 986 switch (zswap_get_swap_cache_page(swpentry, &page)) { 987 case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */ 988 ret = -ENOMEM; 989 goto fail; 990 991 case ZSWAP_SWAPCACHE_EXIST: 992 /* page is already in the swap cache, ignore for now */ 993 put_page(page); 994 ret = -EEXIST; 995 goto fail; 996 997 case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 998 /* decompress */ 999 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1000 dlen = PAGE_SIZE; 1001 1002 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); 1003 src = (u8 *)zhdr + sizeof(struct zswap_header); 1004 if (!zpool_can_sleep_mapped(pool)) { 1005 memcpy(tmp, src, entry->length); 1006 src = tmp; 1007 zpool_unmap_handle(pool, handle); 1008 } 1009 1010 mutex_lock(acomp_ctx->mutex); 1011 sg_init_one(&input, src, entry->length); 1012 sg_init_table(&output, 1); 1013 sg_set_page(&output, page, PAGE_SIZE, 0); 1014 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1015 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1016 dlen = acomp_ctx->req->dlen; 1017 mutex_unlock(acomp_ctx->mutex); 1018 1019 if (!zpool_can_sleep_mapped(pool)) 1020 kfree(tmp); 1021 else 1022 zpool_unmap_handle(pool, handle); 1023 1024 BUG_ON(ret); 1025 BUG_ON(dlen != PAGE_SIZE); 1026 1027 /* page is up to date */ 1028 SetPageUptodate(page); 1029 } 1030 1031 /* move it to the tail of the inactive list after end_writeback */ 1032 SetPageReclaim(page); 1033 1034 /* start writeback */ 1035 __swap_writepage(page, &wbc); 1036 put_page(page); 1037 zswap_written_back_pages++; 1038 1039 spin_lock(&tree->lock); 1040 /* drop local reference */ 1041 zswap_entry_put(tree, entry); 1042 1043 /* 1044 * There are two possible situations for entry here: 1045 * (1) refcount is 1(normal case), entry is valid and on the tree 1046 * (2) refcount is 0, entry is freed and not on the tree 1047 * because invalidate happened during writeback 1048 * search the tree and free the entry if find entry 1049 */ 1050 if (entry == zswap_rb_search(&tree->rbroot, offset)) 1051 zswap_entry_put(tree, entry); 1052 spin_unlock(&tree->lock); 1053 1054 return ret; 1055 1056 fail: 1057 if (!zpool_can_sleep_mapped(pool)) 1058 kfree(tmp); 1059 1060 /* 1061 * if we get here due to ZSWAP_SWAPCACHE_EXIST 1062 * a load may be happening concurrently. 1063 * it is safe and okay to not free the entry. 1064 * if we free the entry in the following put 1065 * it is also okay to return !0 1066 */ 1067 spin_lock(&tree->lock); 1068 zswap_entry_put(tree, entry); 1069 spin_unlock(&tree->lock); 1070 1071 return ret; 1072 } 1073 1074 static int zswap_is_page_same_filled(void *ptr, unsigned long *value) 1075 { 1076 unsigned int pos; 1077 unsigned long *page; 1078 1079 page = (unsigned long *)ptr; 1080 for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { 1081 if (page[pos] != page[0]) 1082 return 0; 1083 } 1084 *value = page[0]; 1085 return 1; 1086 } 1087 1088 static void zswap_fill_page(void *ptr, unsigned long value) 1089 { 1090 unsigned long *page; 1091 1092 page = (unsigned long *)ptr; 1093 memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); 1094 } 1095 1096 /********************************* 1097 * frontswap hooks 1098 **********************************/ 1099 /* attempts to compress and store an single page */ 1100 static int zswap_frontswap_store(unsigned type, pgoff_t offset, 1101 struct page *page) 1102 { 1103 struct zswap_tree *tree = zswap_trees[type]; 1104 struct zswap_entry *entry, *dupentry; 1105 struct scatterlist input, output; 1106 struct crypto_acomp_ctx *acomp_ctx; 1107 struct obj_cgroup *objcg = NULL; 1108 struct zswap_pool *pool; 1109 int ret; 1110 unsigned int hlen, dlen = PAGE_SIZE; 1111 unsigned long handle, value; 1112 char *buf; 1113 u8 *src, *dst; 1114 struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) }; 1115 gfp_t gfp; 1116 1117 /* THP isn't supported */ 1118 if (PageTransHuge(page)) { 1119 ret = -EINVAL; 1120 goto reject; 1121 } 1122 1123 if (!zswap_enabled || !tree) { 1124 ret = -ENODEV; 1125 goto reject; 1126 } 1127 1128 objcg = get_obj_cgroup_from_page(page); 1129 if (objcg && !obj_cgroup_may_zswap(objcg)) 1130 goto shrink; 1131 1132 /* reclaim space if needed */ 1133 if (zswap_is_full()) { 1134 zswap_pool_limit_hit++; 1135 zswap_pool_reached_full = true; 1136 goto shrink; 1137 } 1138 1139 if (zswap_pool_reached_full) { 1140 if (!zswap_can_accept()) { 1141 ret = -ENOMEM; 1142 goto reject; 1143 } else 1144 zswap_pool_reached_full = false; 1145 } 1146 1147 /* allocate entry */ 1148 entry = zswap_entry_cache_alloc(GFP_KERNEL); 1149 if (!entry) { 1150 zswap_reject_kmemcache_fail++; 1151 ret = -ENOMEM; 1152 goto reject; 1153 } 1154 1155 if (zswap_same_filled_pages_enabled) { 1156 src = kmap_atomic(page); 1157 if (zswap_is_page_same_filled(src, &value)) { 1158 kunmap_atomic(src); 1159 entry->offset = offset; 1160 entry->length = 0; 1161 entry->value = value; 1162 atomic_inc(&zswap_same_filled_pages); 1163 goto insert_entry; 1164 } 1165 kunmap_atomic(src); 1166 } 1167 1168 if (!zswap_non_same_filled_pages_enabled) { 1169 ret = -EINVAL; 1170 goto freepage; 1171 } 1172 1173 /* if entry is successfully added, it keeps the reference */ 1174 entry->pool = zswap_pool_current_get(); 1175 if (!entry->pool) { 1176 ret = -EINVAL; 1177 goto freepage; 1178 } 1179 1180 /* compress */ 1181 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1182 1183 mutex_lock(acomp_ctx->mutex); 1184 1185 dst = acomp_ctx->dstmem; 1186 sg_init_table(&input, 1); 1187 sg_set_page(&input, page, PAGE_SIZE, 0); 1188 1189 /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */ 1190 sg_init_one(&output, dst, PAGE_SIZE * 2); 1191 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 1192 /* 1193 * it maybe looks a little bit silly that we send an asynchronous request, 1194 * then wait for its completion synchronously. This makes the process look 1195 * synchronous in fact. 1196 * Theoretically, acomp supports users send multiple acomp requests in one 1197 * acomp instance, then get those requests done simultaneously. but in this 1198 * case, frontswap actually does store and load page by page, there is no 1199 * existing method to send the second page before the first page is done 1200 * in one thread doing frontswap. 1201 * but in different threads running on different cpu, we have different 1202 * acomp instance, so multiple threads can do (de)compression in parallel. 1203 */ 1204 ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 1205 dlen = acomp_ctx->req->dlen; 1206 1207 if (ret) { 1208 ret = -EINVAL; 1209 goto put_dstmem; 1210 } 1211 1212 /* store */ 1213 hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0; 1214 gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1215 if (zpool_malloc_support_movable(entry->pool->zpool)) 1216 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 1217 ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle); 1218 if (ret == -ENOSPC) { 1219 zswap_reject_compress_poor++; 1220 goto put_dstmem; 1221 } 1222 if (ret) { 1223 zswap_reject_alloc_fail++; 1224 goto put_dstmem; 1225 } 1226 buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO); 1227 memcpy(buf, &zhdr, hlen); 1228 memcpy(buf + hlen, dst, dlen); 1229 zpool_unmap_handle(entry->pool->zpool, handle); 1230 mutex_unlock(acomp_ctx->mutex); 1231 1232 /* populate entry */ 1233 entry->offset = offset; 1234 entry->handle = handle; 1235 entry->length = dlen; 1236 1237 insert_entry: 1238 entry->objcg = objcg; 1239 if (objcg) { 1240 obj_cgroup_charge_zswap(objcg, entry->length); 1241 /* Account before objcg ref is moved to tree */ 1242 count_objcg_event(objcg, ZSWPOUT); 1243 } 1244 1245 /* map */ 1246 spin_lock(&tree->lock); 1247 do { 1248 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry); 1249 if (ret == -EEXIST) { 1250 zswap_duplicate_entry++; 1251 /* remove from rbtree */ 1252 zswap_rb_erase(&tree->rbroot, dupentry); 1253 zswap_entry_put(tree, dupentry); 1254 } 1255 } while (ret == -EEXIST); 1256 spin_unlock(&tree->lock); 1257 1258 /* update stats */ 1259 atomic_inc(&zswap_stored_pages); 1260 zswap_update_total_size(); 1261 count_vm_event(ZSWPOUT); 1262 1263 return 0; 1264 1265 put_dstmem: 1266 mutex_unlock(acomp_ctx->mutex); 1267 zswap_pool_put(entry->pool); 1268 freepage: 1269 zswap_entry_cache_free(entry); 1270 reject: 1271 if (objcg) 1272 obj_cgroup_put(objcg); 1273 return ret; 1274 1275 shrink: 1276 pool = zswap_pool_last_get(); 1277 if (pool) 1278 queue_work(shrink_wq, &pool->shrink_work); 1279 ret = -ENOMEM; 1280 goto reject; 1281 } 1282 1283 /* 1284 * returns 0 if the page was successfully decompressed 1285 * return -1 on entry not found or error 1286 */ 1287 static int zswap_frontswap_load(unsigned type, pgoff_t offset, 1288 struct page *page) 1289 { 1290 struct zswap_tree *tree = zswap_trees[type]; 1291 struct zswap_entry *entry; 1292 struct scatterlist input, output; 1293 struct crypto_acomp_ctx *acomp_ctx; 1294 u8 *src, *dst, *tmp; 1295 unsigned int dlen; 1296 int ret; 1297 1298 /* find */ 1299 spin_lock(&tree->lock); 1300 entry = zswap_entry_find_get(&tree->rbroot, offset); 1301 if (!entry) { 1302 /* entry was written back */ 1303 spin_unlock(&tree->lock); 1304 return -1; 1305 } 1306 spin_unlock(&tree->lock); 1307 1308 if (!entry->length) { 1309 dst = kmap_atomic(page); 1310 zswap_fill_page(dst, entry->value); 1311 kunmap_atomic(dst); 1312 ret = 0; 1313 goto stats; 1314 } 1315 1316 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1317 tmp = kmalloc(entry->length, GFP_KERNEL); 1318 if (!tmp) { 1319 ret = -ENOMEM; 1320 goto freeentry; 1321 } 1322 } 1323 1324 /* decompress */ 1325 dlen = PAGE_SIZE; 1326 src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO); 1327 if (zpool_evictable(entry->pool->zpool)) 1328 src += sizeof(struct zswap_header); 1329 1330 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1331 memcpy(tmp, src, entry->length); 1332 src = tmp; 1333 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1334 } 1335 1336 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1337 mutex_lock(acomp_ctx->mutex); 1338 sg_init_one(&input, src, entry->length); 1339 sg_init_table(&output, 1); 1340 sg_set_page(&output, page, PAGE_SIZE, 0); 1341 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1342 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1343 mutex_unlock(acomp_ctx->mutex); 1344 1345 if (zpool_can_sleep_mapped(entry->pool->zpool)) 1346 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1347 else 1348 kfree(tmp); 1349 1350 BUG_ON(ret); 1351 stats: 1352 count_vm_event(ZSWPIN); 1353 if (entry->objcg) 1354 count_objcg_event(entry->objcg, ZSWPIN); 1355 freeentry: 1356 spin_lock(&tree->lock); 1357 zswap_entry_put(tree, entry); 1358 spin_unlock(&tree->lock); 1359 1360 return ret; 1361 } 1362 1363 /* frees an entry in zswap */ 1364 static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset) 1365 { 1366 struct zswap_tree *tree = zswap_trees[type]; 1367 struct zswap_entry *entry; 1368 1369 /* find */ 1370 spin_lock(&tree->lock); 1371 entry = zswap_rb_search(&tree->rbroot, offset); 1372 if (!entry) { 1373 /* entry was written back */ 1374 spin_unlock(&tree->lock); 1375 return; 1376 } 1377 1378 /* remove from rbtree */ 1379 zswap_rb_erase(&tree->rbroot, entry); 1380 1381 /* drop the initial reference from entry creation */ 1382 zswap_entry_put(tree, entry); 1383 1384 spin_unlock(&tree->lock); 1385 } 1386 1387 /* frees all zswap entries for the given swap type */ 1388 static void zswap_frontswap_invalidate_area(unsigned type) 1389 { 1390 struct zswap_tree *tree = zswap_trees[type]; 1391 struct zswap_entry *entry, *n; 1392 1393 if (!tree) 1394 return; 1395 1396 /* walk the tree and free everything */ 1397 spin_lock(&tree->lock); 1398 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) 1399 zswap_free_entry(entry); 1400 tree->rbroot = RB_ROOT; 1401 spin_unlock(&tree->lock); 1402 kfree(tree); 1403 zswap_trees[type] = NULL; 1404 } 1405 1406 static void zswap_frontswap_init(unsigned type) 1407 { 1408 struct zswap_tree *tree; 1409 1410 tree = kzalloc(sizeof(*tree), GFP_KERNEL); 1411 if (!tree) { 1412 pr_err("alloc failed, zswap disabled for swap type %d\n", type); 1413 return; 1414 } 1415 1416 tree->rbroot = RB_ROOT; 1417 spin_lock_init(&tree->lock); 1418 zswap_trees[type] = tree; 1419 } 1420 1421 static const struct frontswap_ops zswap_frontswap_ops = { 1422 .store = zswap_frontswap_store, 1423 .load = zswap_frontswap_load, 1424 .invalidate_page = zswap_frontswap_invalidate_page, 1425 .invalidate_area = zswap_frontswap_invalidate_area, 1426 .init = zswap_frontswap_init 1427 }; 1428 1429 /********************************* 1430 * debugfs functions 1431 **********************************/ 1432 #ifdef CONFIG_DEBUG_FS 1433 #include <linux/debugfs.h> 1434 1435 static struct dentry *zswap_debugfs_root; 1436 1437 static int __init zswap_debugfs_init(void) 1438 { 1439 if (!debugfs_initialized()) 1440 return -ENODEV; 1441 1442 zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 1443 1444 debugfs_create_u64("pool_limit_hit", 0444, 1445 zswap_debugfs_root, &zswap_pool_limit_hit); 1446 debugfs_create_u64("reject_reclaim_fail", 0444, 1447 zswap_debugfs_root, &zswap_reject_reclaim_fail); 1448 debugfs_create_u64("reject_alloc_fail", 0444, 1449 zswap_debugfs_root, &zswap_reject_alloc_fail); 1450 debugfs_create_u64("reject_kmemcache_fail", 0444, 1451 zswap_debugfs_root, &zswap_reject_kmemcache_fail); 1452 debugfs_create_u64("reject_compress_poor", 0444, 1453 zswap_debugfs_root, &zswap_reject_compress_poor); 1454 debugfs_create_u64("written_back_pages", 0444, 1455 zswap_debugfs_root, &zswap_written_back_pages); 1456 debugfs_create_u64("duplicate_entry", 0444, 1457 zswap_debugfs_root, &zswap_duplicate_entry); 1458 debugfs_create_u64("pool_total_size", 0444, 1459 zswap_debugfs_root, &zswap_pool_total_size); 1460 debugfs_create_atomic_t("stored_pages", 0444, 1461 zswap_debugfs_root, &zswap_stored_pages); 1462 debugfs_create_atomic_t("same_filled_pages", 0444, 1463 zswap_debugfs_root, &zswap_same_filled_pages); 1464 1465 return 0; 1466 } 1467 #else 1468 static int __init zswap_debugfs_init(void) 1469 { 1470 return 0; 1471 } 1472 #endif 1473 1474 /********************************* 1475 * module init and exit 1476 **********************************/ 1477 static int __init init_zswap(void) 1478 { 1479 struct zswap_pool *pool; 1480 int ret; 1481 1482 zswap_init_started = true; 1483 1484 if (zswap_entry_cache_create()) { 1485 pr_err("entry cache creation failed\n"); 1486 goto cache_fail; 1487 } 1488 1489 ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare", 1490 zswap_dstmem_prepare, zswap_dstmem_dead); 1491 if (ret) { 1492 pr_err("dstmem alloc failed\n"); 1493 goto dstmem_fail; 1494 } 1495 1496 ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, 1497 "mm/zswap_pool:prepare", 1498 zswap_cpu_comp_prepare, 1499 zswap_cpu_comp_dead); 1500 if (ret) 1501 goto hp_fail; 1502 1503 pool = __zswap_pool_create_fallback(); 1504 if (pool) { 1505 pr_info("loaded using pool %s/%s\n", pool->tfm_name, 1506 zpool_get_type(pool->zpool)); 1507 list_add(&pool->list, &zswap_pools); 1508 zswap_has_pool = true; 1509 } else { 1510 pr_err("pool creation failed\n"); 1511 zswap_enabled = false; 1512 } 1513 1514 shrink_wq = create_workqueue("zswap-shrink"); 1515 if (!shrink_wq) 1516 goto fallback_fail; 1517 1518 ret = frontswap_register_ops(&zswap_frontswap_ops); 1519 if (ret) 1520 goto destroy_wq; 1521 if (zswap_debugfs_init()) 1522 pr_warn("debugfs initialization failed\n"); 1523 return 0; 1524 1525 destroy_wq: 1526 destroy_workqueue(shrink_wq); 1527 fallback_fail: 1528 if (pool) 1529 zswap_pool_destroy(pool); 1530 hp_fail: 1531 cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE); 1532 dstmem_fail: 1533 zswap_entry_cache_destroy(); 1534 cache_fail: 1535 /* if built-in, we aren't unloaded on failure; don't allow use */ 1536 zswap_init_failed = true; 1537 zswap_enabled = false; 1538 return -ENOMEM; 1539 } 1540 /* must be late so crypto has time to come up */ 1541 late_initcall(init_zswap); 1542 1543 MODULE_LICENSE("GPL"); 1544 MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 1545 MODULE_DESCRIPTION("Compressed cache for swap pages"); 1546