1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 4 * Authors: David Chinner and Glauber Costa 5 * 6 * Generic LRU infrastructure 7 */ 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/list_lru.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/memcontrol.h> 15 16 #ifdef CONFIG_MEMCG_KMEM 17 static LIST_HEAD(list_lrus); 18 static DEFINE_MUTEX(list_lrus_mutex); 19 20 static void list_lru_register(struct list_lru *lru) 21 { 22 mutex_lock(&list_lrus_mutex); 23 list_add(&lru->list, &list_lrus); 24 mutex_unlock(&list_lrus_mutex); 25 } 26 27 static void list_lru_unregister(struct list_lru *lru) 28 { 29 mutex_lock(&list_lrus_mutex); 30 list_del(&lru->list); 31 mutex_unlock(&list_lrus_mutex); 32 } 33 34 static int lru_shrinker_id(struct list_lru *lru) 35 { 36 return lru->shrinker_id; 37 } 38 39 static inline bool list_lru_memcg_aware(struct list_lru *lru) 40 { 41 return lru->memcg_aware; 42 } 43 44 static inline struct list_lru_one * 45 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 46 { 47 struct list_lru_memcg *memcg_lrus; 48 /* 49 * Either lock or RCU protects the array of per cgroup lists 50 * from relocation (see memcg_update_list_lru_node). 51 */ 52 memcg_lrus = rcu_dereference_check(nlru->memcg_lrus, 53 lockdep_is_held(&nlru->lock)); 54 if (memcg_lrus && idx >= 0) 55 return memcg_lrus->lru[idx]; 56 return &nlru->lru; 57 } 58 59 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) 60 { 61 struct page *page; 62 63 if (!memcg_kmem_enabled()) 64 return NULL; 65 page = virt_to_head_page(ptr); 66 return page->mem_cgroup; 67 } 68 69 static inline struct list_lru_one * 70 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr, 71 struct mem_cgroup **memcg_ptr) 72 { 73 struct list_lru_one *l = &nlru->lru; 74 struct mem_cgroup *memcg = NULL; 75 76 if (!nlru->memcg_lrus) 77 goto out; 78 79 memcg = mem_cgroup_from_kmem(ptr); 80 if (!memcg) 81 goto out; 82 83 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 84 out: 85 if (memcg_ptr) 86 *memcg_ptr = memcg; 87 return l; 88 } 89 #else 90 static void list_lru_register(struct list_lru *lru) 91 { 92 } 93 94 static void list_lru_unregister(struct list_lru *lru) 95 { 96 } 97 98 static int lru_shrinker_id(struct list_lru *lru) 99 { 100 return -1; 101 } 102 103 static inline bool list_lru_memcg_aware(struct list_lru *lru) 104 { 105 return false; 106 } 107 108 static inline struct list_lru_one * 109 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 110 { 111 return &nlru->lru; 112 } 113 114 static inline struct list_lru_one * 115 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr, 116 struct mem_cgroup **memcg_ptr) 117 { 118 if (memcg_ptr) 119 *memcg_ptr = NULL; 120 return &nlru->lru; 121 } 122 #endif /* CONFIG_MEMCG_KMEM */ 123 124 bool list_lru_add(struct list_lru *lru, struct list_head *item) 125 { 126 int nid = page_to_nid(virt_to_page(item)); 127 struct list_lru_node *nlru = &lru->node[nid]; 128 struct mem_cgroup *memcg; 129 struct list_lru_one *l; 130 131 spin_lock(&nlru->lock); 132 if (list_empty(item)) { 133 l = list_lru_from_kmem(nlru, item, &memcg); 134 list_add_tail(item, &l->list); 135 /* Set shrinker bit if the first element was added */ 136 if (!l->nr_items++) 137 memcg_set_shrinker_bit(memcg, nid, 138 lru_shrinker_id(lru)); 139 nlru->nr_items++; 140 spin_unlock(&nlru->lock); 141 return true; 142 } 143 spin_unlock(&nlru->lock); 144 return false; 145 } 146 EXPORT_SYMBOL_GPL(list_lru_add); 147 148 bool list_lru_del(struct list_lru *lru, struct list_head *item) 149 { 150 int nid = page_to_nid(virt_to_page(item)); 151 struct list_lru_node *nlru = &lru->node[nid]; 152 struct list_lru_one *l; 153 154 spin_lock(&nlru->lock); 155 if (!list_empty(item)) { 156 l = list_lru_from_kmem(nlru, item, NULL); 157 list_del_init(item); 158 l->nr_items--; 159 nlru->nr_items--; 160 spin_unlock(&nlru->lock); 161 return true; 162 } 163 spin_unlock(&nlru->lock); 164 return false; 165 } 166 EXPORT_SYMBOL_GPL(list_lru_del); 167 168 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 169 { 170 list_del_init(item); 171 list->nr_items--; 172 } 173 EXPORT_SYMBOL_GPL(list_lru_isolate); 174 175 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 176 struct list_head *head) 177 { 178 list_move(item, head); 179 list->nr_items--; 180 } 181 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 182 183 unsigned long list_lru_count_one(struct list_lru *lru, 184 int nid, struct mem_cgroup *memcg) 185 { 186 struct list_lru_node *nlru = &lru->node[nid]; 187 struct list_lru_one *l; 188 unsigned long count; 189 190 rcu_read_lock(); 191 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 192 count = l->nr_items; 193 rcu_read_unlock(); 194 195 return count; 196 } 197 EXPORT_SYMBOL_GPL(list_lru_count_one); 198 199 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 200 { 201 struct list_lru_node *nlru; 202 203 nlru = &lru->node[nid]; 204 return nlru->nr_items; 205 } 206 EXPORT_SYMBOL_GPL(list_lru_count_node); 207 208 static unsigned long 209 __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx, 210 list_lru_walk_cb isolate, void *cb_arg, 211 unsigned long *nr_to_walk) 212 { 213 214 struct list_lru_one *l; 215 struct list_head *item, *n; 216 unsigned long isolated = 0; 217 218 l = list_lru_from_memcg_idx(nlru, memcg_idx); 219 restart: 220 list_for_each_safe(item, n, &l->list) { 221 enum lru_status ret; 222 223 /* 224 * decrement nr_to_walk first so that we don't livelock if we 225 * get stuck on large numbesr of LRU_RETRY items 226 */ 227 if (!*nr_to_walk) 228 break; 229 --*nr_to_walk; 230 231 ret = isolate(item, l, &nlru->lock, cb_arg); 232 switch (ret) { 233 case LRU_REMOVED_RETRY: 234 assert_spin_locked(&nlru->lock); 235 /* fall through */ 236 case LRU_REMOVED: 237 isolated++; 238 nlru->nr_items--; 239 /* 240 * If the lru lock has been dropped, our list 241 * traversal is now invalid and so we have to 242 * restart from scratch. 243 */ 244 if (ret == LRU_REMOVED_RETRY) 245 goto restart; 246 break; 247 case LRU_ROTATE: 248 list_move_tail(item, &l->list); 249 break; 250 case LRU_SKIP: 251 break; 252 case LRU_RETRY: 253 /* 254 * The lru lock has been dropped, our list traversal is 255 * now invalid and so we have to restart from scratch. 256 */ 257 assert_spin_locked(&nlru->lock); 258 goto restart; 259 default: 260 BUG(); 261 } 262 } 263 return isolated; 264 } 265 266 unsigned long 267 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 268 list_lru_walk_cb isolate, void *cb_arg, 269 unsigned long *nr_to_walk) 270 { 271 struct list_lru_node *nlru = &lru->node[nid]; 272 unsigned long ret; 273 274 spin_lock(&nlru->lock); 275 ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg, 276 nr_to_walk); 277 spin_unlock(&nlru->lock); 278 return ret; 279 } 280 EXPORT_SYMBOL_GPL(list_lru_walk_one); 281 282 unsigned long 283 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 284 list_lru_walk_cb isolate, void *cb_arg, 285 unsigned long *nr_to_walk) 286 { 287 struct list_lru_node *nlru = &lru->node[nid]; 288 unsigned long ret; 289 290 spin_lock_irq(&nlru->lock); 291 ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg, 292 nr_to_walk); 293 spin_unlock_irq(&nlru->lock); 294 return ret; 295 } 296 297 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 298 list_lru_walk_cb isolate, void *cb_arg, 299 unsigned long *nr_to_walk) 300 { 301 long isolated = 0; 302 int memcg_idx; 303 304 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 305 nr_to_walk); 306 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 307 for_each_memcg_cache_index(memcg_idx) { 308 struct list_lru_node *nlru = &lru->node[nid]; 309 310 spin_lock(&nlru->lock); 311 isolated += __list_lru_walk_one(nlru, memcg_idx, 312 isolate, cb_arg, 313 nr_to_walk); 314 spin_unlock(&nlru->lock); 315 316 if (*nr_to_walk <= 0) 317 break; 318 } 319 } 320 return isolated; 321 } 322 EXPORT_SYMBOL_GPL(list_lru_walk_node); 323 324 static void init_one_lru(struct list_lru_one *l) 325 { 326 INIT_LIST_HEAD(&l->list); 327 l->nr_items = 0; 328 } 329 330 #ifdef CONFIG_MEMCG_KMEM 331 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, 332 int begin, int end) 333 { 334 int i; 335 336 for (i = begin; i < end; i++) 337 kfree(memcg_lrus->lru[i]); 338 } 339 340 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, 341 int begin, int end) 342 { 343 int i; 344 345 for (i = begin; i < end; i++) { 346 struct list_lru_one *l; 347 348 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); 349 if (!l) 350 goto fail; 351 352 init_one_lru(l); 353 memcg_lrus->lru[i] = l; 354 } 355 return 0; 356 fail: 357 __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); 358 return -ENOMEM; 359 } 360 361 static int memcg_init_list_lru_node(struct list_lru_node *nlru) 362 { 363 struct list_lru_memcg *memcg_lrus; 364 int size = memcg_nr_cache_ids; 365 366 memcg_lrus = kvmalloc(sizeof(*memcg_lrus) + 367 size * sizeof(void *), GFP_KERNEL); 368 if (!memcg_lrus) 369 return -ENOMEM; 370 371 if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) { 372 kvfree(memcg_lrus); 373 return -ENOMEM; 374 } 375 RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus); 376 377 return 0; 378 } 379 380 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) 381 { 382 struct list_lru_memcg *memcg_lrus; 383 /* 384 * This is called when shrinker has already been unregistered, 385 * and nobody can use it. So, there is no need to use kvfree_rcu(). 386 */ 387 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true); 388 __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids); 389 kvfree(memcg_lrus); 390 } 391 392 static void kvfree_rcu(struct rcu_head *head) 393 { 394 struct list_lru_memcg *mlru; 395 396 mlru = container_of(head, struct list_lru_memcg, rcu); 397 kvfree(mlru); 398 } 399 400 static int memcg_update_list_lru_node(struct list_lru_node *nlru, 401 int old_size, int new_size) 402 { 403 struct list_lru_memcg *old, *new; 404 405 BUG_ON(old_size > new_size); 406 407 old = rcu_dereference_protected(nlru->memcg_lrus, 408 lockdep_is_held(&list_lrus_mutex)); 409 new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL); 410 if (!new) 411 return -ENOMEM; 412 413 if (__memcg_init_list_lru_node(new, old_size, new_size)) { 414 kvfree(new); 415 return -ENOMEM; 416 } 417 418 memcpy(&new->lru, &old->lru, old_size * sizeof(void *)); 419 420 /* 421 * The locking below allows readers that hold nlru->lock avoid taking 422 * rcu_read_lock (see list_lru_from_memcg_idx). 423 * 424 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 425 * we have to use IRQ-safe primitives here to avoid deadlock. 426 */ 427 spin_lock_irq(&nlru->lock); 428 rcu_assign_pointer(nlru->memcg_lrus, new); 429 spin_unlock_irq(&nlru->lock); 430 431 call_rcu(&old->rcu, kvfree_rcu); 432 return 0; 433 } 434 435 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, 436 int old_size, int new_size) 437 { 438 struct list_lru_memcg *memcg_lrus; 439 440 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, 441 lockdep_is_held(&list_lrus_mutex)); 442 /* do not bother shrinking the array back to the old size, because we 443 * cannot handle allocation failures here */ 444 __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size); 445 } 446 447 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 448 { 449 int i; 450 451 lru->memcg_aware = memcg_aware; 452 453 if (!memcg_aware) 454 return 0; 455 456 for_each_node(i) { 457 if (memcg_init_list_lru_node(&lru->node[i])) 458 goto fail; 459 } 460 return 0; 461 fail: 462 for (i = i - 1; i >= 0; i--) { 463 if (!lru->node[i].memcg_lrus) 464 continue; 465 memcg_destroy_list_lru_node(&lru->node[i]); 466 } 467 return -ENOMEM; 468 } 469 470 static void memcg_destroy_list_lru(struct list_lru *lru) 471 { 472 int i; 473 474 if (!list_lru_memcg_aware(lru)) 475 return; 476 477 for_each_node(i) 478 memcg_destroy_list_lru_node(&lru->node[i]); 479 } 480 481 static int memcg_update_list_lru(struct list_lru *lru, 482 int old_size, int new_size) 483 { 484 int i; 485 486 if (!list_lru_memcg_aware(lru)) 487 return 0; 488 489 for_each_node(i) { 490 if (memcg_update_list_lru_node(&lru->node[i], 491 old_size, new_size)) 492 goto fail; 493 } 494 return 0; 495 fail: 496 for (i = i - 1; i >= 0; i--) { 497 if (!lru->node[i].memcg_lrus) 498 continue; 499 500 memcg_cancel_update_list_lru_node(&lru->node[i], 501 old_size, new_size); 502 } 503 return -ENOMEM; 504 } 505 506 static void memcg_cancel_update_list_lru(struct list_lru *lru, 507 int old_size, int new_size) 508 { 509 int i; 510 511 if (!list_lru_memcg_aware(lru)) 512 return; 513 514 for_each_node(i) 515 memcg_cancel_update_list_lru_node(&lru->node[i], 516 old_size, new_size); 517 } 518 519 int memcg_update_all_list_lrus(int new_size) 520 { 521 int ret = 0; 522 struct list_lru *lru; 523 int old_size = memcg_nr_cache_ids; 524 525 mutex_lock(&list_lrus_mutex); 526 list_for_each_entry(lru, &list_lrus, list) { 527 ret = memcg_update_list_lru(lru, old_size, new_size); 528 if (ret) 529 goto fail; 530 } 531 out: 532 mutex_unlock(&list_lrus_mutex); 533 return ret; 534 fail: 535 list_for_each_entry_continue_reverse(lru, &list_lrus, list) 536 memcg_cancel_update_list_lru(lru, old_size, new_size); 537 goto out; 538 } 539 540 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid, 541 int src_idx, struct mem_cgroup *dst_memcg) 542 { 543 struct list_lru_node *nlru = &lru->node[nid]; 544 int dst_idx = dst_memcg->kmemcg_id; 545 struct list_lru_one *src, *dst; 546 bool set; 547 548 /* 549 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 550 * we have to use IRQ-safe primitives here to avoid deadlock. 551 */ 552 spin_lock_irq(&nlru->lock); 553 554 src = list_lru_from_memcg_idx(nlru, src_idx); 555 dst = list_lru_from_memcg_idx(nlru, dst_idx); 556 557 list_splice_init(&src->list, &dst->list); 558 set = (!dst->nr_items && src->nr_items); 559 dst->nr_items += src->nr_items; 560 if (set) 561 memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 562 src->nr_items = 0; 563 564 spin_unlock_irq(&nlru->lock); 565 } 566 567 static void memcg_drain_list_lru(struct list_lru *lru, 568 int src_idx, struct mem_cgroup *dst_memcg) 569 { 570 int i; 571 572 if (!list_lru_memcg_aware(lru)) 573 return; 574 575 for_each_node(i) 576 memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg); 577 } 578 579 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg) 580 { 581 struct list_lru *lru; 582 583 mutex_lock(&list_lrus_mutex); 584 list_for_each_entry(lru, &list_lrus, list) 585 memcg_drain_list_lru(lru, src_idx, dst_memcg); 586 mutex_unlock(&list_lrus_mutex); 587 } 588 #else 589 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 590 { 591 return 0; 592 } 593 594 static void memcg_destroy_list_lru(struct list_lru *lru) 595 { 596 } 597 #endif /* CONFIG_MEMCG_KMEM */ 598 599 int __list_lru_init(struct list_lru *lru, bool memcg_aware, 600 struct lock_class_key *key, struct shrinker *shrinker) 601 { 602 int i; 603 int err = -ENOMEM; 604 605 #ifdef CONFIG_MEMCG_KMEM 606 if (shrinker) 607 lru->shrinker_id = shrinker->id; 608 else 609 lru->shrinker_id = -1; 610 #endif 611 memcg_get_cache_ids(); 612 613 lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL); 614 if (!lru->node) 615 goto out; 616 617 for_each_node(i) { 618 spin_lock_init(&lru->node[i].lock); 619 if (key) 620 lockdep_set_class(&lru->node[i].lock, key); 621 init_one_lru(&lru->node[i].lru); 622 } 623 624 err = memcg_init_list_lru(lru, memcg_aware); 625 if (err) { 626 kfree(lru->node); 627 /* Do this so a list_lru_destroy() doesn't crash: */ 628 lru->node = NULL; 629 goto out; 630 } 631 632 list_lru_register(lru); 633 out: 634 memcg_put_cache_ids(); 635 return err; 636 } 637 EXPORT_SYMBOL_GPL(__list_lru_init); 638 639 void list_lru_destroy(struct list_lru *lru) 640 { 641 /* Already destroyed or not yet initialized? */ 642 if (!lru->node) 643 return; 644 645 memcg_get_cache_ids(); 646 647 list_lru_unregister(lru); 648 649 memcg_destroy_list_lru(lru); 650 kfree(lru->node); 651 lru->node = NULL; 652 653 #ifdef CONFIG_MEMCG_KMEM 654 lru->shrinker_id = -1; 655 #endif 656 memcg_put_cache_ids(); 657 } 658 EXPORT_SYMBOL_GPL(list_lru_destroy); 659