1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * zswap.c - zswap driver file 4 * 5 * zswap is a backend for frontswap that takes pages that are in the process 6 * of being swapped out and attempts to compress and store them in a 7 * RAM-based memory pool. This can result in a significant I/O reduction on 8 * the swap device and, in the case where decompressing from RAM is faster 9 * than reading from the swap device, can also improve workload performance. 10 * 11 * Copyright (C) 2012 Seth Jennings <sjenning@linux.vnet.ibm.com> 12 */ 13 14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16 #include <linux/module.h> 17 #include <linux/cpu.h> 18 #include <linux/highmem.h> 19 #include <linux/slab.h> 20 #include <linux/spinlock.h> 21 #include <linux/types.h> 22 #include <linux/atomic.h> 23 #include <linux/frontswap.h> 24 #include <linux/rbtree.h> 25 #include <linux/swap.h> 26 #include <linux/crypto.h> 27 #include <linux/scatterlist.h> 28 #include <linux/mempool.h> 29 #include <linux/zpool.h> 30 #include <crypto/acompress.h> 31 32 #include <linux/mm_types.h> 33 #include <linux/page-flags.h> 34 #include <linux/swapops.h> 35 #include <linux/writeback.h> 36 #include <linux/pagemap.h> 37 #include <linux/workqueue.h> 38 39 #include "swap.h" 40 41 /********************************* 42 * statistics 43 **********************************/ 44 /* Total bytes used by the compressed storage */ 45 u64 zswap_pool_total_size; 46 /* The number of compressed pages currently stored in zswap */ 47 atomic_t zswap_stored_pages = ATOMIC_INIT(0); 48 /* The number of same-value filled pages currently stored in zswap */ 49 static atomic_t zswap_same_filled_pages = ATOMIC_INIT(0); 50 51 /* 52 * The statistics below are not protected from concurrent access for 53 * performance reasons so they may not be a 100% accurate. However, 54 * they do provide useful information on roughly how many times a 55 * certain event is occurring. 56 */ 57 58 /* Pool limit was hit (see zswap_max_pool_percent) */ 59 static u64 zswap_pool_limit_hit; 60 /* Pages written back when pool limit was reached */ 61 static u64 zswap_written_back_pages; 62 /* Store failed due to a reclaim failure after pool limit was reached */ 63 static u64 zswap_reject_reclaim_fail; 64 /* Compressed page was too big for the allocator to (optimally) store */ 65 static u64 zswap_reject_compress_poor; 66 /* Store failed because underlying allocator could not get memory */ 67 static u64 zswap_reject_alloc_fail; 68 /* Store failed because the entry metadata could not be allocated (rare) */ 69 static u64 zswap_reject_kmemcache_fail; 70 /* Duplicate store was encountered (rare) */ 71 static u64 zswap_duplicate_entry; 72 73 /* Shrinker work queue */ 74 static struct workqueue_struct *shrink_wq; 75 /* Pool limit was hit, we need to calm down */ 76 static bool zswap_pool_reached_full; 77 78 /********************************* 79 * tunables 80 **********************************/ 81 82 #define ZSWAP_PARAM_UNSET "" 83 84 /* Enable/disable zswap */ 85 static bool zswap_enabled = IS_ENABLED(CONFIG_ZSWAP_DEFAULT_ON); 86 static int zswap_enabled_param_set(const char *, 87 const struct kernel_param *); 88 static const struct kernel_param_ops zswap_enabled_param_ops = { 89 .set = zswap_enabled_param_set, 90 .get = param_get_bool, 91 }; 92 module_param_cb(enabled, &zswap_enabled_param_ops, &zswap_enabled, 0644); 93 94 /* Crypto compressor to use */ 95 static char *zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 96 static int zswap_compressor_param_set(const char *, 97 const struct kernel_param *); 98 static const struct kernel_param_ops zswap_compressor_param_ops = { 99 .set = zswap_compressor_param_set, 100 .get = param_get_charp, 101 .free = param_free_charp, 102 }; 103 module_param_cb(compressor, &zswap_compressor_param_ops, 104 &zswap_compressor, 0644); 105 106 /* Compressed storage zpool to use */ 107 static char *zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 108 static int zswap_zpool_param_set(const char *, const struct kernel_param *); 109 static const struct kernel_param_ops zswap_zpool_param_ops = { 110 .set = zswap_zpool_param_set, 111 .get = param_get_charp, 112 .free = param_free_charp, 113 }; 114 module_param_cb(zpool, &zswap_zpool_param_ops, &zswap_zpool_type, 0644); 115 116 /* The maximum percentage of memory that the compressed pool can occupy */ 117 static unsigned int zswap_max_pool_percent = 20; 118 module_param_named(max_pool_percent, zswap_max_pool_percent, uint, 0644); 119 120 /* The threshold for accepting new pages after the max_pool_percent was hit */ 121 static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ 122 module_param_named(accept_threshold_percent, zswap_accept_thr_percent, 123 uint, 0644); 124 125 /* 126 * Enable/disable handling same-value filled pages (enabled by default). 127 * If disabled every page is considered non-same-value filled. 128 */ 129 static bool zswap_same_filled_pages_enabled = true; 130 module_param_named(same_filled_pages_enabled, zswap_same_filled_pages_enabled, 131 bool, 0644); 132 133 /* Enable/disable handling non-same-value filled pages (enabled by default) */ 134 static bool zswap_non_same_filled_pages_enabled = true; 135 module_param_named(non_same_filled_pages_enabled, zswap_non_same_filled_pages_enabled, 136 bool, 0644); 137 138 /********************************* 139 * data structures 140 **********************************/ 141 142 struct crypto_acomp_ctx { 143 struct crypto_acomp *acomp; 144 struct acomp_req *req; 145 struct crypto_wait wait; 146 u8 *dstmem; 147 struct mutex *mutex; 148 }; 149 150 struct zswap_pool { 151 struct zpool *zpool; 152 struct crypto_acomp_ctx __percpu *acomp_ctx; 153 struct kref kref; 154 struct list_head list; 155 struct work_struct release_work; 156 struct work_struct shrink_work; 157 struct hlist_node node; 158 char tfm_name[CRYPTO_MAX_ALG_NAME]; 159 }; 160 161 /* 162 * struct zswap_entry 163 * 164 * This structure contains the metadata for tracking a single compressed 165 * page within zswap. 166 * 167 * rbnode - links the entry into red-black tree for the appropriate swap type 168 * offset - the swap offset for the entry. Index into the red-black tree. 169 * refcount - the number of outstanding reference to the entry. This is needed 170 * to protect against premature freeing of the entry by code 171 * concurrent calls to load, invalidate, and writeback. The lock 172 * for the zswap_tree structure that contains the entry must 173 * be held while changing the refcount. Since the lock must 174 * be held, there is no reason to also make refcount atomic. 175 * length - the length in bytes of the compressed page data. Needed during 176 * decompression. For a same value filled page length is 0. 177 * pool - the zswap_pool the entry's data is in 178 * handle - zpool allocation handle that stores the compressed page data 179 * value - value of the same-value filled pages which have same content 180 */ 181 struct zswap_entry { 182 struct rb_node rbnode; 183 pgoff_t offset; 184 int refcount; 185 unsigned int length; 186 struct zswap_pool *pool; 187 union { 188 unsigned long handle; 189 unsigned long value; 190 }; 191 struct obj_cgroup *objcg; 192 }; 193 194 struct zswap_header { 195 swp_entry_t swpentry; 196 }; 197 198 /* 199 * The tree lock in the zswap_tree struct protects a few things: 200 * - the rbtree 201 * - the refcount field of each entry in the tree 202 */ 203 struct zswap_tree { 204 struct rb_root rbroot; 205 spinlock_t lock; 206 }; 207 208 static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 209 210 /* RCU-protected iteration */ 211 static LIST_HEAD(zswap_pools); 212 /* protects zswap_pools list modification */ 213 static DEFINE_SPINLOCK(zswap_pools_lock); 214 /* pool counter to provide unique names to zpool */ 215 static atomic_t zswap_pools_count = ATOMIC_INIT(0); 216 217 /* used by param callback function */ 218 static bool zswap_init_started; 219 220 /* fatal error during init */ 221 static bool zswap_init_failed; 222 223 /* init completed, but couldn't create the initial pool */ 224 static bool zswap_has_pool; 225 226 /********************************* 227 * helpers and fwd declarations 228 **********************************/ 229 230 #define zswap_pool_debug(msg, p) \ 231 pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ 232 zpool_get_type((p)->zpool)) 233 234 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle); 235 static int zswap_pool_get(struct zswap_pool *pool); 236 static void zswap_pool_put(struct zswap_pool *pool); 237 238 static const struct zpool_ops zswap_zpool_ops = { 239 .evict = zswap_writeback_entry 240 }; 241 242 static bool zswap_is_full(void) 243 { 244 return totalram_pages() * zswap_max_pool_percent / 100 < 245 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 246 } 247 248 static bool zswap_can_accept(void) 249 { 250 return totalram_pages() * zswap_accept_thr_percent / 100 * 251 zswap_max_pool_percent / 100 > 252 DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); 253 } 254 255 static void zswap_update_total_size(void) 256 { 257 struct zswap_pool *pool; 258 u64 total = 0; 259 260 rcu_read_lock(); 261 262 list_for_each_entry_rcu(pool, &zswap_pools, list) 263 total += zpool_get_total_size(pool->zpool); 264 265 rcu_read_unlock(); 266 267 zswap_pool_total_size = total; 268 } 269 270 /********************************* 271 * zswap entry functions 272 **********************************/ 273 static struct kmem_cache *zswap_entry_cache; 274 275 static int __init zswap_entry_cache_create(void) 276 { 277 zswap_entry_cache = KMEM_CACHE(zswap_entry, 0); 278 return zswap_entry_cache == NULL; 279 } 280 281 static void __init zswap_entry_cache_destroy(void) 282 { 283 kmem_cache_destroy(zswap_entry_cache); 284 } 285 286 static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp) 287 { 288 struct zswap_entry *entry; 289 entry = kmem_cache_alloc(zswap_entry_cache, gfp); 290 if (!entry) 291 return NULL; 292 entry->refcount = 1; 293 RB_CLEAR_NODE(&entry->rbnode); 294 return entry; 295 } 296 297 static void zswap_entry_cache_free(struct zswap_entry *entry) 298 { 299 kmem_cache_free(zswap_entry_cache, entry); 300 } 301 302 /********************************* 303 * rbtree functions 304 **********************************/ 305 static struct zswap_entry *zswap_rb_search(struct rb_root *root, pgoff_t offset) 306 { 307 struct rb_node *node = root->rb_node; 308 struct zswap_entry *entry; 309 310 while (node) { 311 entry = rb_entry(node, struct zswap_entry, rbnode); 312 if (entry->offset > offset) 313 node = node->rb_left; 314 else if (entry->offset < offset) 315 node = node->rb_right; 316 else 317 return entry; 318 } 319 return NULL; 320 } 321 322 /* 323 * In the case that a entry with the same offset is found, a pointer to 324 * the existing entry is stored in dupentry and the function returns -EEXIST 325 */ 326 static int zswap_rb_insert(struct rb_root *root, struct zswap_entry *entry, 327 struct zswap_entry **dupentry) 328 { 329 struct rb_node **link = &root->rb_node, *parent = NULL; 330 struct zswap_entry *myentry; 331 332 while (*link) { 333 parent = *link; 334 myentry = rb_entry(parent, struct zswap_entry, rbnode); 335 if (myentry->offset > entry->offset) 336 link = &(*link)->rb_left; 337 else if (myentry->offset < entry->offset) 338 link = &(*link)->rb_right; 339 else { 340 *dupentry = myentry; 341 return -EEXIST; 342 } 343 } 344 rb_link_node(&entry->rbnode, parent, link); 345 rb_insert_color(&entry->rbnode, root); 346 return 0; 347 } 348 349 static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) 350 { 351 if (!RB_EMPTY_NODE(&entry->rbnode)) { 352 rb_erase(&entry->rbnode, root); 353 RB_CLEAR_NODE(&entry->rbnode); 354 } 355 } 356 357 /* 358 * Carries out the common pattern of freeing and entry's zpool allocation, 359 * freeing the entry itself, and decrementing the number of stored pages. 360 */ 361 static void zswap_free_entry(struct zswap_entry *entry) 362 { 363 if (entry->objcg) { 364 obj_cgroup_uncharge_zswap(entry->objcg, entry->length); 365 obj_cgroup_put(entry->objcg); 366 } 367 if (!entry->length) 368 atomic_dec(&zswap_same_filled_pages); 369 else { 370 zpool_free(entry->pool->zpool, entry->handle); 371 zswap_pool_put(entry->pool); 372 } 373 zswap_entry_cache_free(entry); 374 atomic_dec(&zswap_stored_pages); 375 zswap_update_total_size(); 376 } 377 378 /* caller must hold the tree lock */ 379 static void zswap_entry_get(struct zswap_entry *entry) 380 { 381 entry->refcount++; 382 } 383 384 /* caller must hold the tree lock 385 * remove from the tree and free it, if nobody reference the entry 386 */ 387 static void zswap_entry_put(struct zswap_tree *tree, 388 struct zswap_entry *entry) 389 { 390 int refcount = --entry->refcount; 391 392 BUG_ON(refcount < 0); 393 if (refcount == 0) { 394 zswap_rb_erase(&tree->rbroot, entry); 395 zswap_free_entry(entry); 396 } 397 } 398 399 /* caller must hold the tree lock */ 400 static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, 401 pgoff_t offset) 402 { 403 struct zswap_entry *entry; 404 405 entry = zswap_rb_search(root, offset); 406 if (entry) 407 zswap_entry_get(entry); 408 409 return entry; 410 } 411 412 /********************************* 413 * per-cpu code 414 **********************************/ 415 static DEFINE_PER_CPU(u8 *, zswap_dstmem); 416 /* 417 * If users dynamically change the zpool type and compressor at runtime, i.e. 418 * zswap is running, zswap can have more than one zpool on one cpu, but they 419 * are sharing dtsmem. So we need this mutex to be per-cpu. 420 */ 421 static DEFINE_PER_CPU(struct mutex *, zswap_mutex); 422 423 static int zswap_dstmem_prepare(unsigned int cpu) 424 { 425 struct mutex *mutex; 426 u8 *dst; 427 428 dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); 429 if (!dst) 430 return -ENOMEM; 431 432 mutex = kmalloc_node(sizeof(*mutex), GFP_KERNEL, cpu_to_node(cpu)); 433 if (!mutex) { 434 kfree(dst); 435 return -ENOMEM; 436 } 437 438 mutex_init(mutex); 439 per_cpu(zswap_dstmem, cpu) = dst; 440 per_cpu(zswap_mutex, cpu) = mutex; 441 return 0; 442 } 443 444 static int zswap_dstmem_dead(unsigned int cpu) 445 { 446 struct mutex *mutex; 447 u8 *dst; 448 449 mutex = per_cpu(zswap_mutex, cpu); 450 kfree(mutex); 451 per_cpu(zswap_mutex, cpu) = NULL; 452 453 dst = per_cpu(zswap_dstmem, cpu); 454 kfree(dst); 455 per_cpu(zswap_dstmem, cpu) = NULL; 456 457 return 0; 458 } 459 460 static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) 461 { 462 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 463 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 464 struct crypto_acomp *acomp; 465 struct acomp_req *req; 466 467 acomp = crypto_alloc_acomp_node(pool->tfm_name, 0, 0, cpu_to_node(cpu)); 468 if (IS_ERR(acomp)) { 469 pr_err("could not alloc crypto acomp %s : %ld\n", 470 pool->tfm_name, PTR_ERR(acomp)); 471 return PTR_ERR(acomp); 472 } 473 acomp_ctx->acomp = acomp; 474 475 req = acomp_request_alloc(acomp_ctx->acomp); 476 if (!req) { 477 pr_err("could not alloc crypto acomp_request %s\n", 478 pool->tfm_name); 479 crypto_free_acomp(acomp_ctx->acomp); 480 return -ENOMEM; 481 } 482 acomp_ctx->req = req; 483 484 crypto_init_wait(&acomp_ctx->wait); 485 /* 486 * if the backend of acomp is async zip, crypto_req_done() will wakeup 487 * crypto_wait_req(); if the backend of acomp is scomp, the callback 488 * won't be called, crypto_wait_req() will return without blocking. 489 */ 490 acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, 491 crypto_req_done, &acomp_ctx->wait); 492 493 acomp_ctx->mutex = per_cpu(zswap_mutex, cpu); 494 acomp_ctx->dstmem = per_cpu(zswap_dstmem, cpu); 495 496 return 0; 497 } 498 499 static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node) 500 { 501 struct zswap_pool *pool = hlist_entry(node, struct zswap_pool, node); 502 struct crypto_acomp_ctx *acomp_ctx = per_cpu_ptr(pool->acomp_ctx, cpu); 503 504 if (!IS_ERR_OR_NULL(acomp_ctx)) { 505 if (!IS_ERR_OR_NULL(acomp_ctx->req)) 506 acomp_request_free(acomp_ctx->req); 507 if (!IS_ERR_OR_NULL(acomp_ctx->acomp)) 508 crypto_free_acomp(acomp_ctx->acomp); 509 } 510 511 return 0; 512 } 513 514 /********************************* 515 * pool functions 516 **********************************/ 517 518 static struct zswap_pool *__zswap_pool_current(void) 519 { 520 struct zswap_pool *pool; 521 522 pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); 523 WARN_ONCE(!pool && zswap_has_pool, 524 "%s: no page storage pool!\n", __func__); 525 526 return pool; 527 } 528 529 static struct zswap_pool *zswap_pool_current(void) 530 { 531 assert_spin_locked(&zswap_pools_lock); 532 533 return __zswap_pool_current(); 534 } 535 536 static struct zswap_pool *zswap_pool_current_get(void) 537 { 538 struct zswap_pool *pool; 539 540 rcu_read_lock(); 541 542 pool = __zswap_pool_current(); 543 if (!zswap_pool_get(pool)) 544 pool = NULL; 545 546 rcu_read_unlock(); 547 548 return pool; 549 } 550 551 static struct zswap_pool *zswap_pool_last_get(void) 552 { 553 struct zswap_pool *pool, *last = NULL; 554 555 rcu_read_lock(); 556 557 list_for_each_entry_rcu(pool, &zswap_pools, list) 558 last = pool; 559 WARN_ONCE(!last && zswap_has_pool, 560 "%s: no page storage pool!\n", __func__); 561 if (!zswap_pool_get(last)) 562 last = NULL; 563 564 rcu_read_unlock(); 565 566 return last; 567 } 568 569 /* type and compressor must be null-terminated */ 570 static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) 571 { 572 struct zswap_pool *pool; 573 574 assert_spin_locked(&zswap_pools_lock); 575 576 list_for_each_entry_rcu(pool, &zswap_pools, list) { 577 if (strcmp(pool->tfm_name, compressor)) 578 continue; 579 if (strcmp(zpool_get_type(pool->zpool), type)) 580 continue; 581 /* if we can't get it, it's about to be destroyed */ 582 if (!zswap_pool_get(pool)) 583 continue; 584 return pool; 585 } 586 587 return NULL; 588 } 589 590 static void shrink_worker(struct work_struct *w) 591 { 592 struct zswap_pool *pool = container_of(w, typeof(*pool), 593 shrink_work); 594 595 if (zpool_shrink(pool->zpool, 1, NULL)) 596 zswap_reject_reclaim_fail++; 597 zswap_pool_put(pool); 598 } 599 600 static struct zswap_pool *zswap_pool_create(char *type, char *compressor) 601 { 602 struct zswap_pool *pool; 603 char name[38]; /* 'zswap' + 32 char (max) num + \0 */ 604 gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 605 int ret; 606 607 if (!zswap_has_pool) { 608 /* if either are unset, pool initialization failed, and we 609 * need both params to be set correctly before trying to 610 * create a pool. 611 */ 612 if (!strcmp(type, ZSWAP_PARAM_UNSET)) 613 return NULL; 614 if (!strcmp(compressor, ZSWAP_PARAM_UNSET)) 615 return NULL; 616 } 617 618 pool = kzalloc(sizeof(*pool), GFP_KERNEL); 619 if (!pool) 620 return NULL; 621 622 /* unique name for each pool specifically required by zsmalloc */ 623 snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); 624 625 pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops); 626 if (!pool->zpool) { 627 pr_err("%s zpool not available\n", type); 628 goto error; 629 } 630 pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); 631 632 strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); 633 634 pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx); 635 if (!pool->acomp_ctx) { 636 pr_err("percpu alloc failed\n"); 637 goto error; 638 } 639 640 ret = cpuhp_state_add_instance(CPUHP_MM_ZSWP_POOL_PREPARE, 641 &pool->node); 642 if (ret) 643 goto error; 644 pr_debug("using %s compressor\n", pool->tfm_name); 645 646 /* being the current pool takes 1 ref; this func expects the 647 * caller to always add the new pool as the current pool 648 */ 649 kref_init(&pool->kref); 650 INIT_LIST_HEAD(&pool->list); 651 INIT_WORK(&pool->shrink_work, shrink_worker); 652 653 zswap_pool_debug("created", pool); 654 655 return pool; 656 657 error: 658 if (pool->acomp_ctx) 659 free_percpu(pool->acomp_ctx); 660 if (pool->zpool) 661 zpool_destroy_pool(pool->zpool); 662 kfree(pool); 663 return NULL; 664 } 665 666 static __init struct zswap_pool *__zswap_pool_create_fallback(void) 667 { 668 bool has_comp, has_zpool; 669 670 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 671 if (!has_comp && strcmp(zswap_compressor, 672 CONFIG_ZSWAP_COMPRESSOR_DEFAULT)) { 673 pr_err("compressor %s not available, using default %s\n", 674 zswap_compressor, CONFIG_ZSWAP_COMPRESSOR_DEFAULT); 675 param_free_charp(&zswap_compressor); 676 zswap_compressor = CONFIG_ZSWAP_COMPRESSOR_DEFAULT; 677 has_comp = crypto_has_acomp(zswap_compressor, 0, 0); 678 } 679 if (!has_comp) { 680 pr_err("default compressor %s not available\n", 681 zswap_compressor); 682 param_free_charp(&zswap_compressor); 683 zswap_compressor = ZSWAP_PARAM_UNSET; 684 } 685 686 has_zpool = zpool_has_pool(zswap_zpool_type); 687 if (!has_zpool && strcmp(zswap_zpool_type, 688 CONFIG_ZSWAP_ZPOOL_DEFAULT)) { 689 pr_err("zpool %s not available, using default %s\n", 690 zswap_zpool_type, CONFIG_ZSWAP_ZPOOL_DEFAULT); 691 param_free_charp(&zswap_zpool_type); 692 zswap_zpool_type = CONFIG_ZSWAP_ZPOOL_DEFAULT; 693 has_zpool = zpool_has_pool(zswap_zpool_type); 694 } 695 if (!has_zpool) { 696 pr_err("default zpool %s not available\n", 697 zswap_zpool_type); 698 param_free_charp(&zswap_zpool_type); 699 zswap_zpool_type = ZSWAP_PARAM_UNSET; 700 } 701 702 if (!has_comp || !has_zpool) 703 return NULL; 704 705 return zswap_pool_create(zswap_zpool_type, zswap_compressor); 706 } 707 708 static void zswap_pool_destroy(struct zswap_pool *pool) 709 { 710 zswap_pool_debug("destroying", pool); 711 712 cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); 713 free_percpu(pool->acomp_ctx); 714 zpool_destroy_pool(pool->zpool); 715 kfree(pool); 716 } 717 718 static int __must_check zswap_pool_get(struct zswap_pool *pool) 719 { 720 if (!pool) 721 return 0; 722 723 return kref_get_unless_zero(&pool->kref); 724 } 725 726 static void __zswap_pool_release(struct work_struct *work) 727 { 728 struct zswap_pool *pool = container_of(work, typeof(*pool), 729 release_work); 730 731 synchronize_rcu(); 732 733 /* nobody should have been able to get a kref... */ 734 WARN_ON(kref_get_unless_zero(&pool->kref)); 735 736 /* pool is now off zswap_pools list and has no references. */ 737 zswap_pool_destroy(pool); 738 } 739 740 static void __zswap_pool_empty(struct kref *kref) 741 { 742 struct zswap_pool *pool; 743 744 pool = container_of(kref, typeof(*pool), kref); 745 746 spin_lock(&zswap_pools_lock); 747 748 WARN_ON(pool == zswap_pool_current()); 749 750 list_del_rcu(&pool->list); 751 752 INIT_WORK(&pool->release_work, __zswap_pool_release); 753 schedule_work(&pool->release_work); 754 755 spin_unlock(&zswap_pools_lock); 756 } 757 758 static void zswap_pool_put(struct zswap_pool *pool) 759 { 760 kref_put(&pool->kref, __zswap_pool_empty); 761 } 762 763 /********************************* 764 * param callbacks 765 **********************************/ 766 767 /* val must be a null-terminated string */ 768 static int __zswap_param_set(const char *val, const struct kernel_param *kp, 769 char *type, char *compressor) 770 { 771 struct zswap_pool *pool, *put_pool = NULL; 772 char *s = strstrip((char *)val); 773 int ret; 774 775 if (zswap_init_failed) { 776 pr_err("can't set param, initialization failed\n"); 777 return -ENODEV; 778 } 779 780 /* no change required */ 781 if (!strcmp(s, *(char **)kp->arg) && zswap_has_pool) 782 return 0; 783 784 /* if this is load-time (pre-init) param setting, 785 * don't create a pool; that's done during init. 786 */ 787 if (!zswap_init_started) 788 return param_set_charp(s, kp); 789 790 if (!type) { 791 if (!zpool_has_pool(s)) { 792 pr_err("zpool %s not available\n", s); 793 return -ENOENT; 794 } 795 type = s; 796 } else if (!compressor) { 797 if (!crypto_has_acomp(s, 0, 0)) { 798 pr_err("compressor %s not available\n", s); 799 return -ENOENT; 800 } 801 compressor = s; 802 } else { 803 WARN_ON(1); 804 return -EINVAL; 805 } 806 807 spin_lock(&zswap_pools_lock); 808 809 pool = zswap_pool_find_get(type, compressor); 810 if (pool) { 811 zswap_pool_debug("using existing", pool); 812 WARN_ON(pool == zswap_pool_current()); 813 list_del_rcu(&pool->list); 814 } 815 816 spin_unlock(&zswap_pools_lock); 817 818 if (!pool) 819 pool = zswap_pool_create(type, compressor); 820 821 if (pool) 822 ret = param_set_charp(s, kp); 823 else 824 ret = -EINVAL; 825 826 spin_lock(&zswap_pools_lock); 827 828 if (!ret) { 829 put_pool = zswap_pool_current(); 830 list_add_rcu(&pool->list, &zswap_pools); 831 zswap_has_pool = true; 832 } else if (pool) { 833 /* add the possibly pre-existing pool to the end of the pools 834 * list; if it's new (and empty) then it'll be removed and 835 * destroyed by the put after we drop the lock 836 */ 837 list_add_tail_rcu(&pool->list, &zswap_pools); 838 put_pool = pool; 839 } 840 841 spin_unlock(&zswap_pools_lock); 842 843 if (!zswap_has_pool && !pool) { 844 /* if initial pool creation failed, and this pool creation also 845 * failed, maybe both compressor and zpool params were bad. 846 * Allow changing this param, so pool creation will succeed 847 * when the other param is changed. We already verified this 848 * param is ok in the zpool_has_pool() or crypto_has_acomp() 849 * checks above. 850 */ 851 ret = param_set_charp(s, kp); 852 } 853 854 /* drop the ref from either the old current pool, 855 * or the new pool we failed to add 856 */ 857 if (put_pool) 858 zswap_pool_put(put_pool); 859 860 return ret; 861 } 862 863 static int zswap_compressor_param_set(const char *val, 864 const struct kernel_param *kp) 865 { 866 return __zswap_param_set(val, kp, zswap_zpool_type, NULL); 867 } 868 869 static int zswap_zpool_param_set(const char *val, 870 const struct kernel_param *kp) 871 { 872 return __zswap_param_set(val, kp, NULL, zswap_compressor); 873 } 874 875 static int zswap_enabled_param_set(const char *val, 876 const struct kernel_param *kp) 877 { 878 if (zswap_init_failed) { 879 pr_err("can't enable, initialization failed\n"); 880 return -ENODEV; 881 } 882 if (!zswap_has_pool && zswap_init_started) { 883 pr_err("can't enable, no pool configured\n"); 884 return -ENODEV; 885 } 886 887 return param_set_bool(val, kp); 888 } 889 890 /********************************* 891 * writeback code 892 **********************************/ 893 /* return enum for zswap_get_swap_cache_page */ 894 enum zswap_get_swap_ret { 895 ZSWAP_SWAPCACHE_NEW, 896 ZSWAP_SWAPCACHE_EXIST, 897 ZSWAP_SWAPCACHE_FAIL, 898 }; 899 900 /* 901 * zswap_get_swap_cache_page 902 * 903 * This is an adaption of read_swap_cache_async() 904 * 905 * This function tries to find a page with the given swap entry 906 * in the swapper_space address space (the swap cache). If the page 907 * is found, it is returned in retpage. Otherwise, a page is allocated, 908 * added to the swap cache, and returned in retpage. 909 * 910 * If success, the swap cache page is returned in retpage 911 * Returns ZSWAP_SWAPCACHE_EXIST if page was already in the swap cache 912 * Returns ZSWAP_SWAPCACHE_NEW if the new page needs to be populated, 913 * the new page is added to swapcache and locked 914 * Returns ZSWAP_SWAPCACHE_FAIL on error 915 */ 916 static int zswap_get_swap_cache_page(swp_entry_t entry, 917 struct page **retpage) 918 { 919 bool page_was_allocated; 920 921 *retpage = __read_swap_cache_async(entry, GFP_KERNEL, 922 NULL, 0, &page_was_allocated); 923 if (page_was_allocated) 924 return ZSWAP_SWAPCACHE_NEW; 925 if (!*retpage) 926 return ZSWAP_SWAPCACHE_FAIL; 927 return ZSWAP_SWAPCACHE_EXIST; 928 } 929 930 /* 931 * Attempts to free an entry by adding a page to the swap cache, 932 * decompressing the entry data into the page, and issuing a 933 * bio write to write the page back to the swap device. 934 * 935 * This can be thought of as a "resumed writeback" of the page 936 * to the swap device. We are basically resuming the same swap 937 * writeback path that was intercepted with the frontswap_store() 938 * in the first place. After the page has been decompressed into 939 * the swap cache, the compressed version stored by zswap can be 940 * freed. 941 */ 942 static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) 943 { 944 struct zswap_header *zhdr; 945 swp_entry_t swpentry; 946 struct zswap_tree *tree; 947 pgoff_t offset; 948 struct zswap_entry *entry; 949 struct page *page; 950 struct scatterlist input, output; 951 struct crypto_acomp_ctx *acomp_ctx; 952 953 u8 *src, *tmp = NULL; 954 unsigned int dlen; 955 int ret; 956 struct writeback_control wbc = { 957 .sync_mode = WB_SYNC_NONE, 958 }; 959 960 if (!zpool_can_sleep_mapped(pool)) { 961 tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC); 962 if (!tmp) 963 return -ENOMEM; 964 } 965 966 /* extract swpentry from data */ 967 zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); 968 swpentry = zhdr->swpentry; /* here */ 969 tree = zswap_trees[swp_type(swpentry)]; 970 offset = swp_offset(swpentry); 971 972 /* find and ref zswap entry */ 973 spin_lock(&tree->lock); 974 entry = zswap_entry_find_get(&tree->rbroot, offset); 975 if (!entry) { 976 /* entry was invalidated */ 977 spin_unlock(&tree->lock); 978 zpool_unmap_handle(pool, handle); 979 kfree(tmp); 980 return 0; 981 } 982 spin_unlock(&tree->lock); 983 BUG_ON(offset != entry->offset); 984 985 src = (u8 *)zhdr + sizeof(struct zswap_header); 986 if (!zpool_can_sleep_mapped(pool)) { 987 memcpy(tmp, src, entry->length); 988 src = tmp; 989 zpool_unmap_handle(pool, handle); 990 } 991 992 /* try to allocate swap cache page */ 993 switch (zswap_get_swap_cache_page(swpentry, &page)) { 994 case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */ 995 ret = -ENOMEM; 996 goto fail; 997 998 case ZSWAP_SWAPCACHE_EXIST: 999 /* page is already in the swap cache, ignore for now */ 1000 put_page(page); 1001 ret = -EEXIST; 1002 goto fail; 1003 1004 case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 1005 /* decompress */ 1006 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1007 dlen = PAGE_SIZE; 1008 1009 mutex_lock(acomp_ctx->mutex); 1010 sg_init_one(&input, src, entry->length); 1011 sg_init_table(&output, 1); 1012 sg_set_page(&output, page, PAGE_SIZE, 0); 1013 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1014 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1015 dlen = acomp_ctx->req->dlen; 1016 mutex_unlock(acomp_ctx->mutex); 1017 1018 BUG_ON(ret); 1019 BUG_ON(dlen != PAGE_SIZE); 1020 1021 /* page is up to date */ 1022 SetPageUptodate(page); 1023 } 1024 1025 /* move it to the tail of the inactive list after end_writeback */ 1026 SetPageReclaim(page); 1027 1028 /* start writeback */ 1029 __swap_writepage(page, &wbc, end_swap_bio_write); 1030 put_page(page); 1031 zswap_written_back_pages++; 1032 1033 spin_lock(&tree->lock); 1034 /* drop local reference */ 1035 zswap_entry_put(tree, entry); 1036 1037 /* 1038 * There are two possible situations for entry here: 1039 * (1) refcount is 1(normal case), entry is valid and on the tree 1040 * (2) refcount is 0, entry is freed and not on the tree 1041 * because invalidate happened during writeback 1042 * search the tree and free the entry if find entry 1043 */ 1044 if (entry == zswap_rb_search(&tree->rbroot, offset)) 1045 zswap_entry_put(tree, entry); 1046 spin_unlock(&tree->lock); 1047 1048 goto end; 1049 1050 /* 1051 * if we get here due to ZSWAP_SWAPCACHE_EXIST 1052 * a load may be happening concurrently. 1053 * it is safe and okay to not free the entry. 1054 * if we free the entry in the following put 1055 * it is also okay to return !0 1056 */ 1057 fail: 1058 spin_lock(&tree->lock); 1059 zswap_entry_put(tree, entry); 1060 spin_unlock(&tree->lock); 1061 1062 end: 1063 if (zpool_can_sleep_mapped(pool)) 1064 zpool_unmap_handle(pool, handle); 1065 else 1066 kfree(tmp); 1067 1068 return ret; 1069 } 1070 1071 static int zswap_is_page_same_filled(void *ptr, unsigned long *value) 1072 { 1073 unsigned int pos; 1074 unsigned long *page; 1075 1076 page = (unsigned long *)ptr; 1077 for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) { 1078 if (page[pos] != page[0]) 1079 return 0; 1080 } 1081 *value = page[0]; 1082 return 1; 1083 } 1084 1085 static void zswap_fill_page(void *ptr, unsigned long value) 1086 { 1087 unsigned long *page; 1088 1089 page = (unsigned long *)ptr; 1090 memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); 1091 } 1092 1093 /********************************* 1094 * frontswap hooks 1095 **********************************/ 1096 /* attempts to compress and store an single page */ 1097 static int zswap_frontswap_store(unsigned type, pgoff_t offset, 1098 struct page *page) 1099 { 1100 struct zswap_tree *tree = zswap_trees[type]; 1101 struct zswap_entry *entry, *dupentry; 1102 struct scatterlist input, output; 1103 struct crypto_acomp_ctx *acomp_ctx; 1104 struct obj_cgroup *objcg = NULL; 1105 struct zswap_pool *pool; 1106 int ret; 1107 unsigned int hlen, dlen = PAGE_SIZE; 1108 unsigned long handle, value; 1109 char *buf; 1110 u8 *src, *dst; 1111 struct zswap_header zhdr = { .swpentry = swp_entry(type, offset) }; 1112 gfp_t gfp; 1113 1114 /* THP isn't supported */ 1115 if (PageTransHuge(page)) { 1116 ret = -EINVAL; 1117 goto reject; 1118 } 1119 1120 if (!zswap_enabled || !tree) { 1121 ret = -ENODEV; 1122 goto reject; 1123 } 1124 1125 objcg = get_obj_cgroup_from_page(page); 1126 if (objcg && !obj_cgroup_may_zswap(objcg)) 1127 goto shrink; 1128 1129 /* reclaim space if needed */ 1130 if (zswap_is_full()) { 1131 zswap_pool_limit_hit++; 1132 zswap_pool_reached_full = true; 1133 goto shrink; 1134 } 1135 1136 if (zswap_pool_reached_full) { 1137 if (!zswap_can_accept()) { 1138 ret = -ENOMEM; 1139 goto reject; 1140 } else 1141 zswap_pool_reached_full = false; 1142 } 1143 1144 /* allocate entry */ 1145 entry = zswap_entry_cache_alloc(GFP_KERNEL); 1146 if (!entry) { 1147 zswap_reject_kmemcache_fail++; 1148 ret = -ENOMEM; 1149 goto reject; 1150 } 1151 1152 if (zswap_same_filled_pages_enabled) { 1153 src = kmap_atomic(page); 1154 if (zswap_is_page_same_filled(src, &value)) { 1155 kunmap_atomic(src); 1156 entry->offset = offset; 1157 entry->length = 0; 1158 entry->value = value; 1159 atomic_inc(&zswap_same_filled_pages); 1160 goto insert_entry; 1161 } 1162 kunmap_atomic(src); 1163 } 1164 1165 if (!zswap_non_same_filled_pages_enabled) { 1166 ret = -EINVAL; 1167 goto freepage; 1168 } 1169 1170 /* if entry is successfully added, it keeps the reference */ 1171 entry->pool = zswap_pool_current_get(); 1172 if (!entry->pool) { 1173 ret = -EINVAL; 1174 goto freepage; 1175 } 1176 1177 /* compress */ 1178 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1179 1180 mutex_lock(acomp_ctx->mutex); 1181 1182 dst = acomp_ctx->dstmem; 1183 sg_init_table(&input, 1); 1184 sg_set_page(&input, page, PAGE_SIZE, 0); 1185 1186 /* zswap_dstmem is of size (PAGE_SIZE * 2). Reflect same in sg_list */ 1187 sg_init_one(&output, dst, PAGE_SIZE * 2); 1188 acomp_request_set_params(acomp_ctx->req, &input, &output, PAGE_SIZE, dlen); 1189 /* 1190 * it maybe looks a little bit silly that we send an asynchronous request, 1191 * then wait for its completion synchronously. This makes the process look 1192 * synchronous in fact. 1193 * Theoretically, acomp supports users send multiple acomp requests in one 1194 * acomp instance, then get those requests done simultaneously. but in this 1195 * case, frontswap actually does store and load page by page, there is no 1196 * existing method to send the second page before the first page is done 1197 * in one thread doing frontswap. 1198 * but in different threads running on different cpu, we have different 1199 * acomp instance, so multiple threads can do (de)compression in parallel. 1200 */ 1201 ret = crypto_wait_req(crypto_acomp_compress(acomp_ctx->req), &acomp_ctx->wait); 1202 dlen = acomp_ctx->req->dlen; 1203 1204 if (ret) { 1205 ret = -EINVAL; 1206 goto put_dstmem; 1207 } 1208 1209 /* store */ 1210 hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0; 1211 gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; 1212 if (zpool_malloc_support_movable(entry->pool->zpool)) 1213 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; 1214 ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle); 1215 if (ret == -ENOSPC) { 1216 zswap_reject_compress_poor++; 1217 goto put_dstmem; 1218 } 1219 if (ret) { 1220 zswap_reject_alloc_fail++; 1221 goto put_dstmem; 1222 } 1223 buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO); 1224 memcpy(buf, &zhdr, hlen); 1225 memcpy(buf + hlen, dst, dlen); 1226 zpool_unmap_handle(entry->pool->zpool, handle); 1227 mutex_unlock(acomp_ctx->mutex); 1228 1229 /* populate entry */ 1230 entry->offset = offset; 1231 entry->handle = handle; 1232 entry->length = dlen; 1233 1234 insert_entry: 1235 entry->objcg = objcg; 1236 if (objcg) { 1237 obj_cgroup_charge_zswap(objcg, entry->length); 1238 /* Account before objcg ref is moved to tree */ 1239 count_objcg_event(objcg, ZSWPOUT); 1240 } 1241 1242 /* map */ 1243 spin_lock(&tree->lock); 1244 do { 1245 ret = zswap_rb_insert(&tree->rbroot, entry, &dupentry); 1246 if (ret == -EEXIST) { 1247 zswap_duplicate_entry++; 1248 /* remove from rbtree */ 1249 zswap_rb_erase(&tree->rbroot, dupentry); 1250 zswap_entry_put(tree, dupentry); 1251 } 1252 } while (ret == -EEXIST); 1253 spin_unlock(&tree->lock); 1254 1255 /* update stats */ 1256 atomic_inc(&zswap_stored_pages); 1257 zswap_update_total_size(); 1258 count_vm_event(ZSWPOUT); 1259 1260 return 0; 1261 1262 put_dstmem: 1263 mutex_unlock(acomp_ctx->mutex); 1264 zswap_pool_put(entry->pool); 1265 freepage: 1266 zswap_entry_cache_free(entry); 1267 reject: 1268 if (objcg) 1269 obj_cgroup_put(objcg); 1270 return ret; 1271 1272 shrink: 1273 pool = zswap_pool_last_get(); 1274 if (pool) 1275 queue_work(shrink_wq, &pool->shrink_work); 1276 ret = -ENOMEM; 1277 goto reject; 1278 } 1279 1280 /* 1281 * returns 0 if the page was successfully decompressed 1282 * return -1 on entry not found or error 1283 */ 1284 static int zswap_frontswap_load(unsigned type, pgoff_t offset, 1285 struct page *page) 1286 { 1287 struct zswap_tree *tree = zswap_trees[type]; 1288 struct zswap_entry *entry; 1289 struct scatterlist input, output; 1290 struct crypto_acomp_ctx *acomp_ctx; 1291 u8 *src, *dst, *tmp; 1292 unsigned int dlen; 1293 int ret; 1294 1295 /* find */ 1296 spin_lock(&tree->lock); 1297 entry = zswap_entry_find_get(&tree->rbroot, offset); 1298 if (!entry) { 1299 /* entry was written back */ 1300 spin_unlock(&tree->lock); 1301 return -1; 1302 } 1303 spin_unlock(&tree->lock); 1304 1305 if (!entry->length) { 1306 dst = kmap_atomic(page); 1307 zswap_fill_page(dst, entry->value); 1308 kunmap_atomic(dst); 1309 ret = 0; 1310 goto stats; 1311 } 1312 1313 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1314 tmp = kmalloc(entry->length, GFP_ATOMIC); 1315 if (!tmp) { 1316 ret = -ENOMEM; 1317 goto freeentry; 1318 } 1319 } 1320 1321 /* decompress */ 1322 dlen = PAGE_SIZE; 1323 src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO); 1324 if (zpool_evictable(entry->pool->zpool)) 1325 src += sizeof(struct zswap_header); 1326 1327 if (!zpool_can_sleep_mapped(entry->pool->zpool)) { 1328 memcpy(tmp, src, entry->length); 1329 src = tmp; 1330 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1331 } 1332 1333 acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); 1334 mutex_lock(acomp_ctx->mutex); 1335 sg_init_one(&input, src, entry->length); 1336 sg_init_table(&output, 1); 1337 sg_set_page(&output, page, PAGE_SIZE, 0); 1338 acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen); 1339 ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); 1340 mutex_unlock(acomp_ctx->mutex); 1341 1342 if (zpool_can_sleep_mapped(entry->pool->zpool)) 1343 zpool_unmap_handle(entry->pool->zpool, entry->handle); 1344 else 1345 kfree(tmp); 1346 1347 BUG_ON(ret); 1348 stats: 1349 count_vm_event(ZSWPIN); 1350 if (entry->objcg) 1351 count_objcg_event(entry->objcg, ZSWPIN); 1352 freeentry: 1353 spin_lock(&tree->lock); 1354 zswap_entry_put(tree, entry); 1355 spin_unlock(&tree->lock); 1356 1357 return ret; 1358 } 1359 1360 /* frees an entry in zswap */ 1361 static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset) 1362 { 1363 struct zswap_tree *tree = zswap_trees[type]; 1364 struct zswap_entry *entry; 1365 1366 /* find */ 1367 spin_lock(&tree->lock); 1368 entry = zswap_rb_search(&tree->rbroot, offset); 1369 if (!entry) { 1370 /* entry was written back */ 1371 spin_unlock(&tree->lock); 1372 return; 1373 } 1374 1375 /* remove from rbtree */ 1376 zswap_rb_erase(&tree->rbroot, entry); 1377 1378 /* drop the initial reference from entry creation */ 1379 zswap_entry_put(tree, entry); 1380 1381 spin_unlock(&tree->lock); 1382 } 1383 1384 /* frees all zswap entries for the given swap type */ 1385 static void zswap_frontswap_invalidate_area(unsigned type) 1386 { 1387 struct zswap_tree *tree = zswap_trees[type]; 1388 struct zswap_entry *entry, *n; 1389 1390 if (!tree) 1391 return; 1392 1393 /* walk the tree and free everything */ 1394 spin_lock(&tree->lock); 1395 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) 1396 zswap_free_entry(entry); 1397 tree->rbroot = RB_ROOT; 1398 spin_unlock(&tree->lock); 1399 kfree(tree); 1400 zswap_trees[type] = NULL; 1401 } 1402 1403 static void zswap_frontswap_init(unsigned type) 1404 { 1405 struct zswap_tree *tree; 1406 1407 tree = kzalloc(sizeof(*tree), GFP_KERNEL); 1408 if (!tree) { 1409 pr_err("alloc failed, zswap disabled for swap type %d\n", type); 1410 return; 1411 } 1412 1413 tree->rbroot = RB_ROOT; 1414 spin_lock_init(&tree->lock); 1415 zswap_trees[type] = tree; 1416 } 1417 1418 static const struct frontswap_ops zswap_frontswap_ops = { 1419 .store = zswap_frontswap_store, 1420 .load = zswap_frontswap_load, 1421 .invalidate_page = zswap_frontswap_invalidate_page, 1422 .invalidate_area = zswap_frontswap_invalidate_area, 1423 .init = zswap_frontswap_init 1424 }; 1425 1426 /********************************* 1427 * debugfs functions 1428 **********************************/ 1429 #ifdef CONFIG_DEBUG_FS 1430 #include <linux/debugfs.h> 1431 1432 static struct dentry *zswap_debugfs_root; 1433 1434 static int __init zswap_debugfs_init(void) 1435 { 1436 if (!debugfs_initialized()) 1437 return -ENODEV; 1438 1439 zswap_debugfs_root = debugfs_create_dir("zswap", NULL); 1440 1441 debugfs_create_u64("pool_limit_hit", 0444, 1442 zswap_debugfs_root, &zswap_pool_limit_hit); 1443 debugfs_create_u64("reject_reclaim_fail", 0444, 1444 zswap_debugfs_root, &zswap_reject_reclaim_fail); 1445 debugfs_create_u64("reject_alloc_fail", 0444, 1446 zswap_debugfs_root, &zswap_reject_alloc_fail); 1447 debugfs_create_u64("reject_kmemcache_fail", 0444, 1448 zswap_debugfs_root, &zswap_reject_kmemcache_fail); 1449 debugfs_create_u64("reject_compress_poor", 0444, 1450 zswap_debugfs_root, &zswap_reject_compress_poor); 1451 debugfs_create_u64("written_back_pages", 0444, 1452 zswap_debugfs_root, &zswap_written_back_pages); 1453 debugfs_create_u64("duplicate_entry", 0444, 1454 zswap_debugfs_root, &zswap_duplicate_entry); 1455 debugfs_create_u64("pool_total_size", 0444, 1456 zswap_debugfs_root, &zswap_pool_total_size); 1457 debugfs_create_atomic_t("stored_pages", 0444, 1458 zswap_debugfs_root, &zswap_stored_pages); 1459 debugfs_create_atomic_t("same_filled_pages", 0444, 1460 zswap_debugfs_root, &zswap_same_filled_pages); 1461 1462 return 0; 1463 } 1464 #else 1465 static int __init zswap_debugfs_init(void) 1466 { 1467 return 0; 1468 } 1469 #endif 1470 1471 /********************************* 1472 * module init and exit 1473 **********************************/ 1474 static int __init init_zswap(void) 1475 { 1476 struct zswap_pool *pool; 1477 int ret; 1478 1479 zswap_init_started = true; 1480 1481 if (zswap_entry_cache_create()) { 1482 pr_err("entry cache creation failed\n"); 1483 goto cache_fail; 1484 } 1485 1486 ret = cpuhp_setup_state(CPUHP_MM_ZSWP_MEM_PREPARE, "mm/zswap:prepare", 1487 zswap_dstmem_prepare, zswap_dstmem_dead); 1488 if (ret) { 1489 pr_err("dstmem alloc failed\n"); 1490 goto dstmem_fail; 1491 } 1492 1493 ret = cpuhp_setup_state_multi(CPUHP_MM_ZSWP_POOL_PREPARE, 1494 "mm/zswap_pool:prepare", 1495 zswap_cpu_comp_prepare, 1496 zswap_cpu_comp_dead); 1497 if (ret) 1498 goto hp_fail; 1499 1500 pool = __zswap_pool_create_fallback(); 1501 if (pool) { 1502 pr_info("loaded using pool %s/%s\n", pool->tfm_name, 1503 zpool_get_type(pool->zpool)); 1504 list_add(&pool->list, &zswap_pools); 1505 zswap_has_pool = true; 1506 } else { 1507 pr_err("pool creation failed\n"); 1508 zswap_enabled = false; 1509 } 1510 1511 shrink_wq = create_workqueue("zswap-shrink"); 1512 if (!shrink_wq) 1513 goto fallback_fail; 1514 1515 ret = frontswap_register_ops(&zswap_frontswap_ops); 1516 if (ret) 1517 goto destroy_wq; 1518 if (zswap_debugfs_init()) 1519 pr_warn("debugfs initialization failed\n"); 1520 return 0; 1521 1522 destroy_wq: 1523 destroy_workqueue(shrink_wq); 1524 fallback_fail: 1525 if (pool) 1526 zswap_pool_destroy(pool); 1527 hp_fail: 1528 cpuhp_remove_state(CPUHP_MM_ZSWP_MEM_PREPARE); 1529 dstmem_fail: 1530 zswap_entry_cache_destroy(); 1531 cache_fail: 1532 /* if built-in, we aren't unloaded on failure; don't allow use */ 1533 zswap_init_failed = true; 1534 zswap_enabled = false; 1535 return -ENOMEM; 1536 } 1537 /* must be late so crypto has time to come up */ 1538 late_initcall(init_zswap); 1539 1540 MODULE_LICENSE("GPL"); 1541 MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>"); 1542 MODULE_DESCRIPTION("Compressed cache for swap pages"); 1543