1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 4 * Authors: David Chinner and Glauber Costa 5 * 6 * Generic LRU infrastructure 7 */ 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/list_lru.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/memcontrol.h> 15 16 #ifdef CONFIG_MEMCG_KMEM 17 static LIST_HEAD(list_lrus); 18 static DEFINE_MUTEX(list_lrus_mutex); 19 20 static void list_lru_register(struct list_lru *lru) 21 { 22 mutex_lock(&list_lrus_mutex); 23 list_add(&lru->list, &list_lrus); 24 mutex_unlock(&list_lrus_mutex); 25 } 26 27 static void list_lru_unregister(struct list_lru *lru) 28 { 29 mutex_lock(&list_lrus_mutex); 30 list_del(&lru->list); 31 mutex_unlock(&list_lrus_mutex); 32 } 33 34 static int lru_shrinker_id(struct list_lru *lru) 35 { 36 return lru->shrinker_id; 37 } 38 39 static inline bool list_lru_memcg_aware(struct list_lru *lru) 40 { 41 /* 42 * This needs node 0 to be always present, even 43 * in the systems supporting sparse numa ids. 44 */ 45 return !!lru->node[0].memcg_lrus; 46 } 47 48 static inline struct list_lru_one * 49 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 50 { 51 struct list_lru_memcg *memcg_lrus; 52 /* 53 * Either lock or RCU protects the array of per cgroup lists 54 * from relocation (see memcg_update_list_lru_node). 55 */ 56 memcg_lrus = rcu_dereference_check(nlru->memcg_lrus, 57 lockdep_is_held(&nlru->lock)); 58 if (memcg_lrus && idx >= 0) 59 return memcg_lrus->lru[idx]; 60 return &nlru->lru; 61 } 62 63 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) 64 { 65 struct page *page; 66 67 if (!memcg_kmem_enabled()) 68 return NULL; 69 page = virt_to_head_page(ptr); 70 return page->mem_cgroup; 71 } 72 73 static inline struct list_lru_one * 74 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr, 75 struct mem_cgroup **memcg_ptr) 76 { 77 struct list_lru_one *l = &nlru->lru; 78 struct mem_cgroup *memcg = NULL; 79 80 if (!nlru->memcg_lrus) 81 goto out; 82 83 memcg = mem_cgroup_from_kmem(ptr); 84 if (!memcg) 85 goto out; 86 87 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 88 out: 89 if (memcg_ptr) 90 *memcg_ptr = memcg; 91 return l; 92 } 93 #else 94 static void list_lru_register(struct list_lru *lru) 95 { 96 } 97 98 static void list_lru_unregister(struct list_lru *lru) 99 { 100 } 101 102 static int lru_shrinker_id(struct list_lru *lru) 103 { 104 return -1; 105 } 106 107 static inline bool list_lru_memcg_aware(struct list_lru *lru) 108 { 109 return false; 110 } 111 112 static inline struct list_lru_one * 113 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 114 { 115 return &nlru->lru; 116 } 117 118 static inline struct list_lru_one * 119 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr, 120 struct mem_cgroup **memcg_ptr) 121 { 122 if (memcg_ptr) 123 *memcg_ptr = NULL; 124 return &nlru->lru; 125 } 126 #endif /* CONFIG_MEMCG_KMEM */ 127 128 bool list_lru_add(struct list_lru *lru, struct list_head *item) 129 { 130 int nid = page_to_nid(virt_to_page(item)); 131 struct list_lru_node *nlru = &lru->node[nid]; 132 struct mem_cgroup *memcg; 133 struct list_lru_one *l; 134 135 spin_lock(&nlru->lock); 136 if (list_empty(item)) { 137 l = list_lru_from_kmem(nlru, item, &memcg); 138 list_add_tail(item, &l->list); 139 /* Set shrinker bit if the first element was added */ 140 if (!l->nr_items++) 141 memcg_set_shrinker_bit(memcg, nid, 142 lru_shrinker_id(lru)); 143 nlru->nr_items++; 144 spin_unlock(&nlru->lock); 145 return true; 146 } 147 spin_unlock(&nlru->lock); 148 return false; 149 } 150 EXPORT_SYMBOL_GPL(list_lru_add); 151 152 bool list_lru_del(struct list_lru *lru, struct list_head *item) 153 { 154 int nid = page_to_nid(virt_to_page(item)); 155 struct list_lru_node *nlru = &lru->node[nid]; 156 struct list_lru_one *l; 157 158 spin_lock(&nlru->lock); 159 if (!list_empty(item)) { 160 l = list_lru_from_kmem(nlru, item, NULL); 161 list_del_init(item); 162 l->nr_items--; 163 nlru->nr_items--; 164 spin_unlock(&nlru->lock); 165 return true; 166 } 167 spin_unlock(&nlru->lock); 168 return false; 169 } 170 EXPORT_SYMBOL_GPL(list_lru_del); 171 172 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 173 { 174 list_del_init(item); 175 list->nr_items--; 176 } 177 EXPORT_SYMBOL_GPL(list_lru_isolate); 178 179 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 180 struct list_head *head) 181 { 182 list_move(item, head); 183 list->nr_items--; 184 } 185 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 186 187 unsigned long list_lru_count_one(struct list_lru *lru, 188 int nid, struct mem_cgroup *memcg) 189 { 190 struct list_lru_node *nlru = &lru->node[nid]; 191 struct list_lru_one *l; 192 unsigned long count; 193 194 rcu_read_lock(); 195 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 196 count = l->nr_items; 197 rcu_read_unlock(); 198 199 return count; 200 } 201 EXPORT_SYMBOL_GPL(list_lru_count_one); 202 203 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 204 { 205 struct list_lru_node *nlru; 206 207 nlru = &lru->node[nid]; 208 return nlru->nr_items; 209 } 210 EXPORT_SYMBOL_GPL(list_lru_count_node); 211 212 static unsigned long 213 __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx, 214 list_lru_walk_cb isolate, void *cb_arg, 215 unsigned long *nr_to_walk) 216 { 217 218 struct list_lru_one *l; 219 struct list_head *item, *n; 220 unsigned long isolated = 0; 221 222 l = list_lru_from_memcg_idx(nlru, memcg_idx); 223 restart: 224 list_for_each_safe(item, n, &l->list) { 225 enum lru_status ret; 226 227 /* 228 * decrement nr_to_walk first so that we don't livelock if we 229 * get stuck on large numbesr of LRU_RETRY items 230 */ 231 if (!*nr_to_walk) 232 break; 233 --*nr_to_walk; 234 235 ret = isolate(item, l, &nlru->lock, cb_arg); 236 switch (ret) { 237 case LRU_REMOVED_RETRY: 238 assert_spin_locked(&nlru->lock); 239 /* fall through */ 240 case LRU_REMOVED: 241 isolated++; 242 nlru->nr_items--; 243 /* 244 * If the lru lock has been dropped, our list 245 * traversal is now invalid and so we have to 246 * restart from scratch. 247 */ 248 if (ret == LRU_REMOVED_RETRY) 249 goto restart; 250 break; 251 case LRU_ROTATE: 252 list_move_tail(item, &l->list); 253 break; 254 case LRU_SKIP: 255 break; 256 case LRU_RETRY: 257 /* 258 * The lru lock has been dropped, our list traversal is 259 * now invalid and so we have to restart from scratch. 260 */ 261 assert_spin_locked(&nlru->lock); 262 goto restart; 263 default: 264 BUG(); 265 } 266 } 267 return isolated; 268 } 269 270 unsigned long 271 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 272 list_lru_walk_cb isolate, void *cb_arg, 273 unsigned long *nr_to_walk) 274 { 275 struct list_lru_node *nlru = &lru->node[nid]; 276 unsigned long ret; 277 278 spin_lock(&nlru->lock); 279 ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg, 280 nr_to_walk); 281 spin_unlock(&nlru->lock); 282 return ret; 283 } 284 EXPORT_SYMBOL_GPL(list_lru_walk_one); 285 286 unsigned long 287 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 288 list_lru_walk_cb isolate, void *cb_arg, 289 unsigned long *nr_to_walk) 290 { 291 struct list_lru_node *nlru = &lru->node[nid]; 292 unsigned long ret; 293 294 spin_lock_irq(&nlru->lock); 295 ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg, 296 nr_to_walk); 297 spin_unlock_irq(&nlru->lock); 298 return ret; 299 } 300 301 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 302 list_lru_walk_cb isolate, void *cb_arg, 303 unsigned long *nr_to_walk) 304 { 305 long isolated = 0; 306 int memcg_idx; 307 308 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 309 nr_to_walk); 310 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 311 for_each_memcg_cache_index(memcg_idx) { 312 struct list_lru_node *nlru = &lru->node[nid]; 313 314 spin_lock(&nlru->lock); 315 isolated += __list_lru_walk_one(nlru, memcg_idx, 316 isolate, cb_arg, 317 nr_to_walk); 318 spin_unlock(&nlru->lock); 319 320 if (*nr_to_walk <= 0) 321 break; 322 } 323 } 324 return isolated; 325 } 326 EXPORT_SYMBOL_GPL(list_lru_walk_node); 327 328 static void init_one_lru(struct list_lru_one *l) 329 { 330 INIT_LIST_HEAD(&l->list); 331 l->nr_items = 0; 332 } 333 334 #ifdef CONFIG_MEMCG_KMEM 335 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, 336 int begin, int end) 337 { 338 int i; 339 340 for (i = begin; i < end; i++) 341 kfree(memcg_lrus->lru[i]); 342 } 343 344 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, 345 int begin, int end) 346 { 347 int i; 348 349 for (i = begin; i < end; i++) { 350 struct list_lru_one *l; 351 352 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); 353 if (!l) 354 goto fail; 355 356 init_one_lru(l); 357 memcg_lrus->lru[i] = l; 358 } 359 return 0; 360 fail: 361 __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); 362 return -ENOMEM; 363 } 364 365 static int memcg_init_list_lru_node(struct list_lru_node *nlru) 366 { 367 struct list_lru_memcg *memcg_lrus; 368 int size = memcg_nr_cache_ids; 369 370 memcg_lrus = kvmalloc(sizeof(*memcg_lrus) + 371 size * sizeof(void *), GFP_KERNEL); 372 if (!memcg_lrus) 373 return -ENOMEM; 374 375 if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) { 376 kvfree(memcg_lrus); 377 return -ENOMEM; 378 } 379 RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus); 380 381 return 0; 382 } 383 384 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) 385 { 386 struct list_lru_memcg *memcg_lrus; 387 /* 388 * This is called when shrinker has already been unregistered, 389 * and nobody can use it. So, there is no need to use kvfree_rcu(). 390 */ 391 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true); 392 __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids); 393 kvfree(memcg_lrus); 394 } 395 396 static void kvfree_rcu(struct rcu_head *head) 397 { 398 struct list_lru_memcg *mlru; 399 400 mlru = container_of(head, struct list_lru_memcg, rcu); 401 kvfree(mlru); 402 } 403 404 static int memcg_update_list_lru_node(struct list_lru_node *nlru, 405 int old_size, int new_size) 406 { 407 struct list_lru_memcg *old, *new; 408 409 BUG_ON(old_size > new_size); 410 411 old = rcu_dereference_protected(nlru->memcg_lrus, 412 lockdep_is_held(&list_lrus_mutex)); 413 new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL); 414 if (!new) 415 return -ENOMEM; 416 417 if (__memcg_init_list_lru_node(new, old_size, new_size)) { 418 kvfree(new); 419 return -ENOMEM; 420 } 421 422 memcpy(&new->lru, &old->lru, old_size * sizeof(void *)); 423 424 /* 425 * The locking below allows readers that hold nlru->lock avoid taking 426 * rcu_read_lock (see list_lru_from_memcg_idx). 427 * 428 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 429 * we have to use IRQ-safe primitives here to avoid deadlock. 430 */ 431 spin_lock_irq(&nlru->lock); 432 rcu_assign_pointer(nlru->memcg_lrus, new); 433 spin_unlock_irq(&nlru->lock); 434 435 call_rcu(&old->rcu, kvfree_rcu); 436 return 0; 437 } 438 439 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, 440 int old_size, int new_size) 441 { 442 struct list_lru_memcg *memcg_lrus; 443 444 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, 445 lockdep_is_held(&list_lrus_mutex)); 446 /* do not bother shrinking the array back to the old size, because we 447 * cannot handle allocation failures here */ 448 __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size); 449 } 450 451 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 452 { 453 int i; 454 455 if (!memcg_aware) 456 return 0; 457 458 for_each_node(i) { 459 if (memcg_init_list_lru_node(&lru->node[i])) 460 goto fail; 461 } 462 return 0; 463 fail: 464 for (i = i - 1; i >= 0; i--) { 465 if (!lru->node[i].memcg_lrus) 466 continue; 467 memcg_destroy_list_lru_node(&lru->node[i]); 468 } 469 return -ENOMEM; 470 } 471 472 static void memcg_destroy_list_lru(struct list_lru *lru) 473 { 474 int i; 475 476 if (!list_lru_memcg_aware(lru)) 477 return; 478 479 for_each_node(i) 480 memcg_destroy_list_lru_node(&lru->node[i]); 481 } 482 483 static int memcg_update_list_lru(struct list_lru *lru, 484 int old_size, int new_size) 485 { 486 int i; 487 488 if (!list_lru_memcg_aware(lru)) 489 return 0; 490 491 for_each_node(i) { 492 if (memcg_update_list_lru_node(&lru->node[i], 493 old_size, new_size)) 494 goto fail; 495 } 496 return 0; 497 fail: 498 for (i = i - 1; i >= 0; i--) { 499 if (!lru->node[i].memcg_lrus) 500 continue; 501 502 memcg_cancel_update_list_lru_node(&lru->node[i], 503 old_size, new_size); 504 } 505 return -ENOMEM; 506 } 507 508 static void memcg_cancel_update_list_lru(struct list_lru *lru, 509 int old_size, int new_size) 510 { 511 int i; 512 513 if (!list_lru_memcg_aware(lru)) 514 return; 515 516 for_each_node(i) 517 memcg_cancel_update_list_lru_node(&lru->node[i], 518 old_size, new_size); 519 } 520 521 int memcg_update_all_list_lrus(int new_size) 522 { 523 int ret = 0; 524 struct list_lru *lru; 525 int old_size = memcg_nr_cache_ids; 526 527 mutex_lock(&list_lrus_mutex); 528 list_for_each_entry(lru, &list_lrus, list) { 529 ret = memcg_update_list_lru(lru, old_size, new_size); 530 if (ret) 531 goto fail; 532 } 533 out: 534 mutex_unlock(&list_lrus_mutex); 535 return ret; 536 fail: 537 list_for_each_entry_continue_reverse(lru, &list_lrus, list) 538 memcg_cancel_update_list_lru(lru, old_size, new_size); 539 goto out; 540 } 541 542 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid, 543 int src_idx, struct mem_cgroup *dst_memcg) 544 { 545 struct list_lru_node *nlru = &lru->node[nid]; 546 int dst_idx = dst_memcg->kmemcg_id; 547 struct list_lru_one *src, *dst; 548 bool set; 549 550 /* 551 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 552 * we have to use IRQ-safe primitives here to avoid deadlock. 553 */ 554 spin_lock_irq(&nlru->lock); 555 556 src = list_lru_from_memcg_idx(nlru, src_idx); 557 dst = list_lru_from_memcg_idx(nlru, dst_idx); 558 559 list_splice_init(&src->list, &dst->list); 560 set = (!dst->nr_items && src->nr_items); 561 dst->nr_items += src->nr_items; 562 if (set) 563 memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 564 src->nr_items = 0; 565 566 spin_unlock_irq(&nlru->lock); 567 } 568 569 static void memcg_drain_list_lru(struct list_lru *lru, 570 int src_idx, struct mem_cgroup *dst_memcg) 571 { 572 int i; 573 574 if (!list_lru_memcg_aware(lru)) 575 return; 576 577 for_each_node(i) 578 memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg); 579 } 580 581 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg) 582 { 583 struct list_lru *lru; 584 585 mutex_lock(&list_lrus_mutex); 586 list_for_each_entry(lru, &list_lrus, list) 587 memcg_drain_list_lru(lru, src_idx, dst_memcg); 588 mutex_unlock(&list_lrus_mutex); 589 } 590 #else 591 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 592 { 593 return 0; 594 } 595 596 static void memcg_destroy_list_lru(struct list_lru *lru) 597 { 598 } 599 #endif /* CONFIG_MEMCG_KMEM */ 600 601 int __list_lru_init(struct list_lru *lru, bool memcg_aware, 602 struct lock_class_key *key, struct shrinker *shrinker) 603 { 604 int i; 605 int err = -ENOMEM; 606 607 #ifdef CONFIG_MEMCG_KMEM 608 if (shrinker) 609 lru->shrinker_id = shrinker->id; 610 else 611 lru->shrinker_id = -1; 612 #endif 613 memcg_get_cache_ids(); 614 615 lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL); 616 if (!lru->node) 617 goto out; 618 619 for_each_node(i) { 620 spin_lock_init(&lru->node[i].lock); 621 if (key) 622 lockdep_set_class(&lru->node[i].lock, key); 623 init_one_lru(&lru->node[i].lru); 624 } 625 626 err = memcg_init_list_lru(lru, memcg_aware); 627 if (err) { 628 kfree(lru->node); 629 /* Do this so a list_lru_destroy() doesn't crash: */ 630 lru->node = NULL; 631 goto out; 632 } 633 634 list_lru_register(lru); 635 out: 636 memcg_put_cache_ids(); 637 return err; 638 } 639 EXPORT_SYMBOL_GPL(__list_lru_init); 640 641 void list_lru_destroy(struct list_lru *lru) 642 { 643 /* Already destroyed or not yet initialized? */ 644 if (!lru->node) 645 return; 646 647 memcg_get_cache_ids(); 648 649 list_lru_unregister(lru); 650 651 memcg_destroy_list_lru(lru); 652 kfree(lru->node); 653 lru->node = NULL; 654 655 #ifdef CONFIG_MEMCG_KMEM 656 lru->shrinker_id = -1; 657 #endif 658 memcg_put_cache_ids(); 659 } 660 EXPORT_SYMBOL_GPL(list_lru_destroy); 661