1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 4 * Authors: David Chinner and Glauber Costa 5 * 6 * Generic LRU infrastructure 7 */ 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/list_lru.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/memcontrol.h> 15 #include "slab.h" 16 17 #ifdef CONFIG_MEMCG_KMEM 18 static LIST_HEAD(memcg_list_lrus); 19 static DEFINE_MUTEX(list_lrus_mutex); 20 21 static inline bool list_lru_memcg_aware(struct list_lru *lru) 22 { 23 return lru->memcg_aware; 24 } 25 26 static void list_lru_register(struct list_lru *lru) 27 { 28 if (!list_lru_memcg_aware(lru)) 29 return; 30 31 mutex_lock(&list_lrus_mutex); 32 list_add(&lru->list, &memcg_list_lrus); 33 mutex_unlock(&list_lrus_mutex); 34 } 35 36 static void list_lru_unregister(struct list_lru *lru) 37 { 38 if (!list_lru_memcg_aware(lru)) 39 return; 40 41 mutex_lock(&list_lrus_mutex); 42 list_del(&lru->list); 43 mutex_unlock(&list_lrus_mutex); 44 } 45 46 static int lru_shrinker_id(struct list_lru *lru) 47 { 48 return lru->shrinker_id; 49 } 50 51 static inline struct list_lru_one * 52 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 53 { 54 struct list_lru_memcg *memcg_lrus; 55 /* 56 * Either lock or RCU protects the array of per cgroup lists 57 * from relocation (see memcg_update_list_lru_node). 58 */ 59 memcg_lrus = rcu_dereference_check(nlru->memcg_lrus, 60 lockdep_is_held(&nlru->lock)); 61 if (memcg_lrus && idx >= 0) 62 return memcg_lrus->lru[idx]; 63 return &nlru->lru; 64 } 65 66 static inline struct list_lru_one * 67 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr, 68 struct mem_cgroup **memcg_ptr) 69 { 70 struct list_lru_one *l = &nlru->lru; 71 struct mem_cgroup *memcg = NULL; 72 73 if (!nlru->memcg_lrus) 74 goto out; 75 76 memcg = mem_cgroup_from_obj(ptr); 77 if (!memcg) 78 goto out; 79 80 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 81 out: 82 if (memcg_ptr) 83 *memcg_ptr = memcg; 84 return l; 85 } 86 #else 87 static void list_lru_register(struct list_lru *lru) 88 { 89 } 90 91 static void list_lru_unregister(struct list_lru *lru) 92 { 93 } 94 95 static int lru_shrinker_id(struct list_lru *lru) 96 { 97 return -1; 98 } 99 100 static inline bool list_lru_memcg_aware(struct list_lru *lru) 101 { 102 return false; 103 } 104 105 static inline struct list_lru_one * 106 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 107 { 108 return &nlru->lru; 109 } 110 111 static inline struct list_lru_one * 112 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr, 113 struct mem_cgroup **memcg_ptr) 114 { 115 if (memcg_ptr) 116 *memcg_ptr = NULL; 117 return &nlru->lru; 118 } 119 #endif /* CONFIG_MEMCG_KMEM */ 120 121 bool list_lru_add(struct list_lru *lru, struct list_head *item) 122 { 123 int nid = page_to_nid(virt_to_page(item)); 124 struct list_lru_node *nlru = &lru->node[nid]; 125 struct mem_cgroup *memcg; 126 struct list_lru_one *l; 127 128 spin_lock(&nlru->lock); 129 if (list_empty(item)) { 130 l = list_lru_from_kmem(nlru, item, &memcg); 131 list_add_tail(item, &l->list); 132 /* Set shrinker bit if the first element was added */ 133 if (!l->nr_items++) 134 set_shrinker_bit(memcg, nid, 135 lru_shrinker_id(lru)); 136 nlru->nr_items++; 137 spin_unlock(&nlru->lock); 138 return true; 139 } 140 spin_unlock(&nlru->lock); 141 return false; 142 } 143 EXPORT_SYMBOL_GPL(list_lru_add); 144 145 bool list_lru_del(struct list_lru *lru, struct list_head *item) 146 { 147 int nid = page_to_nid(virt_to_page(item)); 148 struct list_lru_node *nlru = &lru->node[nid]; 149 struct list_lru_one *l; 150 151 spin_lock(&nlru->lock); 152 if (!list_empty(item)) { 153 l = list_lru_from_kmem(nlru, item, NULL); 154 list_del_init(item); 155 l->nr_items--; 156 nlru->nr_items--; 157 spin_unlock(&nlru->lock); 158 return true; 159 } 160 spin_unlock(&nlru->lock); 161 return false; 162 } 163 EXPORT_SYMBOL_GPL(list_lru_del); 164 165 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 166 { 167 list_del_init(item); 168 list->nr_items--; 169 } 170 EXPORT_SYMBOL_GPL(list_lru_isolate); 171 172 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 173 struct list_head *head) 174 { 175 list_move(item, head); 176 list->nr_items--; 177 } 178 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 179 180 unsigned long list_lru_count_one(struct list_lru *lru, 181 int nid, struct mem_cgroup *memcg) 182 { 183 struct list_lru_node *nlru = &lru->node[nid]; 184 struct list_lru_one *l; 185 long count; 186 187 rcu_read_lock(); 188 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 189 count = READ_ONCE(l->nr_items); 190 rcu_read_unlock(); 191 192 if (unlikely(count < 0)) 193 count = 0; 194 195 return count; 196 } 197 EXPORT_SYMBOL_GPL(list_lru_count_one); 198 199 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 200 { 201 struct list_lru_node *nlru; 202 203 nlru = &lru->node[nid]; 204 return nlru->nr_items; 205 } 206 EXPORT_SYMBOL_GPL(list_lru_count_node); 207 208 static unsigned long 209 __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx, 210 list_lru_walk_cb isolate, void *cb_arg, 211 unsigned long *nr_to_walk) 212 { 213 214 struct list_lru_one *l; 215 struct list_head *item, *n; 216 unsigned long isolated = 0; 217 218 l = list_lru_from_memcg_idx(nlru, memcg_idx); 219 restart: 220 list_for_each_safe(item, n, &l->list) { 221 enum lru_status ret; 222 223 /* 224 * decrement nr_to_walk first so that we don't livelock if we 225 * get stuck on large numbers of LRU_RETRY items 226 */ 227 if (!*nr_to_walk) 228 break; 229 --*nr_to_walk; 230 231 ret = isolate(item, l, &nlru->lock, cb_arg); 232 switch (ret) { 233 case LRU_REMOVED_RETRY: 234 assert_spin_locked(&nlru->lock); 235 fallthrough; 236 case LRU_REMOVED: 237 isolated++; 238 nlru->nr_items--; 239 /* 240 * If the lru lock has been dropped, our list 241 * traversal is now invalid and so we have to 242 * restart from scratch. 243 */ 244 if (ret == LRU_REMOVED_RETRY) 245 goto restart; 246 break; 247 case LRU_ROTATE: 248 list_move_tail(item, &l->list); 249 break; 250 case LRU_SKIP: 251 break; 252 case LRU_RETRY: 253 /* 254 * The lru lock has been dropped, our list traversal is 255 * now invalid and so we have to restart from scratch. 256 */ 257 assert_spin_locked(&nlru->lock); 258 goto restart; 259 default: 260 BUG(); 261 } 262 } 263 return isolated; 264 } 265 266 unsigned long 267 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 268 list_lru_walk_cb isolate, void *cb_arg, 269 unsigned long *nr_to_walk) 270 { 271 struct list_lru_node *nlru = &lru->node[nid]; 272 unsigned long ret; 273 274 spin_lock(&nlru->lock); 275 ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg, 276 nr_to_walk); 277 spin_unlock(&nlru->lock); 278 return ret; 279 } 280 EXPORT_SYMBOL_GPL(list_lru_walk_one); 281 282 unsigned long 283 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 284 list_lru_walk_cb isolate, void *cb_arg, 285 unsigned long *nr_to_walk) 286 { 287 struct list_lru_node *nlru = &lru->node[nid]; 288 unsigned long ret; 289 290 spin_lock_irq(&nlru->lock); 291 ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg, 292 nr_to_walk); 293 spin_unlock_irq(&nlru->lock); 294 return ret; 295 } 296 297 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 298 list_lru_walk_cb isolate, void *cb_arg, 299 unsigned long *nr_to_walk) 300 { 301 long isolated = 0; 302 int memcg_idx; 303 304 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 305 nr_to_walk); 306 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 307 for_each_memcg_cache_index(memcg_idx) { 308 struct list_lru_node *nlru = &lru->node[nid]; 309 310 spin_lock(&nlru->lock); 311 isolated += __list_lru_walk_one(nlru, memcg_idx, 312 isolate, cb_arg, 313 nr_to_walk); 314 spin_unlock(&nlru->lock); 315 316 if (*nr_to_walk <= 0) 317 break; 318 } 319 } 320 return isolated; 321 } 322 EXPORT_SYMBOL_GPL(list_lru_walk_node); 323 324 static void init_one_lru(struct list_lru_one *l) 325 { 326 INIT_LIST_HEAD(&l->list); 327 l->nr_items = 0; 328 } 329 330 #ifdef CONFIG_MEMCG_KMEM 331 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, 332 int begin, int end) 333 { 334 int i; 335 336 for (i = begin; i < end; i++) 337 kfree(memcg_lrus->lru[i]); 338 } 339 340 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, 341 int begin, int end) 342 { 343 int i; 344 345 for (i = begin; i < end; i++) { 346 struct list_lru_one *l; 347 348 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); 349 if (!l) 350 goto fail; 351 352 init_one_lru(l); 353 memcg_lrus->lru[i] = l; 354 } 355 return 0; 356 fail: 357 __memcg_destroy_list_lru_node(memcg_lrus, begin, i); 358 return -ENOMEM; 359 } 360 361 static int memcg_init_list_lru_node(struct list_lru_node *nlru) 362 { 363 struct list_lru_memcg *memcg_lrus; 364 int size = memcg_nr_cache_ids; 365 366 memcg_lrus = kvmalloc(struct_size(memcg_lrus, lru, size), GFP_KERNEL); 367 if (!memcg_lrus) 368 return -ENOMEM; 369 370 if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) { 371 kvfree(memcg_lrus); 372 return -ENOMEM; 373 } 374 RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus); 375 376 return 0; 377 } 378 379 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) 380 { 381 struct list_lru_memcg *memcg_lrus; 382 /* 383 * This is called when shrinker has already been unregistered, 384 * and nobody can use it. So, there is no need to use kvfree_rcu(). 385 */ 386 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true); 387 __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids); 388 kvfree(memcg_lrus); 389 } 390 391 static int memcg_update_list_lru_node(struct list_lru_node *nlru, 392 int old_size, int new_size) 393 { 394 struct list_lru_memcg *old, *new; 395 396 BUG_ON(old_size > new_size); 397 398 old = rcu_dereference_protected(nlru->memcg_lrus, 399 lockdep_is_held(&list_lrus_mutex)); 400 new = kvmalloc(struct_size(new, lru, new_size), GFP_KERNEL); 401 if (!new) 402 return -ENOMEM; 403 404 if (__memcg_init_list_lru_node(new, old_size, new_size)) { 405 kvfree(new); 406 return -ENOMEM; 407 } 408 409 memcpy(&new->lru, &old->lru, flex_array_size(new, lru, old_size)); 410 rcu_assign_pointer(nlru->memcg_lrus, new); 411 kvfree_rcu(old, rcu); 412 return 0; 413 } 414 415 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, 416 int old_size, int new_size) 417 { 418 struct list_lru_memcg *memcg_lrus; 419 420 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, 421 lockdep_is_held(&list_lrus_mutex)); 422 /* do not bother shrinking the array back to the old size, because we 423 * cannot handle allocation failures here */ 424 __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size); 425 } 426 427 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 428 { 429 int i; 430 431 lru->memcg_aware = memcg_aware; 432 433 if (!memcg_aware) 434 return 0; 435 436 for_each_node(i) { 437 if (memcg_init_list_lru_node(&lru->node[i])) 438 goto fail; 439 } 440 return 0; 441 fail: 442 for (i = i - 1; i >= 0; i--) { 443 if (!lru->node[i].memcg_lrus) 444 continue; 445 memcg_destroy_list_lru_node(&lru->node[i]); 446 } 447 return -ENOMEM; 448 } 449 450 static void memcg_destroy_list_lru(struct list_lru *lru) 451 { 452 int i; 453 454 if (!list_lru_memcg_aware(lru)) 455 return; 456 457 for_each_node(i) 458 memcg_destroy_list_lru_node(&lru->node[i]); 459 } 460 461 static int memcg_update_list_lru(struct list_lru *lru, 462 int old_size, int new_size) 463 { 464 int i; 465 466 for_each_node(i) { 467 if (memcg_update_list_lru_node(&lru->node[i], 468 old_size, new_size)) 469 goto fail; 470 } 471 return 0; 472 fail: 473 for (i = i - 1; i >= 0; i--) { 474 if (!lru->node[i].memcg_lrus) 475 continue; 476 477 memcg_cancel_update_list_lru_node(&lru->node[i], 478 old_size, new_size); 479 } 480 return -ENOMEM; 481 } 482 483 static void memcg_cancel_update_list_lru(struct list_lru *lru, 484 int old_size, int new_size) 485 { 486 int i; 487 488 for_each_node(i) 489 memcg_cancel_update_list_lru_node(&lru->node[i], 490 old_size, new_size); 491 } 492 493 int memcg_update_all_list_lrus(int new_size) 494 { 495 int ret = 0; 496 struct list_lru *lru; 497 int old_size = memcg_nr_cache_ids; 498 499 mutex_lock(&list_lrus_mutex); 500 list_for_each_entry(lru, &memcg_list_lrus, list) { 501 ret = memcg_update_list_lru(lru, old_size, new_size); 502 if (ret) 503 goto fail; 504 } 505 out: 506 mutex_unlock(&list_lrus_mutex); 507 return ret; 508 fail: 509 list_for_each_entry_continue_reverse(lru, &memcg_list_lrus, list) 510 memcg_cancel_update_list_lru(lru, old_size, new_size); 511 goto out; 512 } 513 514 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid, 515 int src_idx, struct mem_cgroup *dst_memcg) 516 { 517 struct list_lru_node *nlru = &lru->node[nid]; 518 int dst_idx = dst_memcg->kmemcg_id; 519 struct list_lru_one *src, *dst; 520 521 /* 522 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 523 * we have to use IRQ-safe primitives here to avoid deadlock. 524 */ 525 spin_lock_irq(&nlru->lock); 526 527 src = list_lru_from_memcg_idx(nlru, src_idx); 528 dst = list_lru_from_memcg_idx(nlru, dst_idx); 529 530 list_splice_init(&src->list, &dst->list); 531 532 if (src->nr_items) { 533 dst->nr_items += src->nr_items; 534 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 535 src->nr_items = 0; 536 } 537 538 spin_unlock_irq(&nlru->lock); 539 } 540 541 static void memcg_drain_list_lru(struct list_lru *lru, 542 int src_idx, struct mem_cgroup *dst_memcg) 543 { 544 int i; 545 546 for_each_node(i) 547 memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg); 548 } 549 550 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg) 551 { 552 struct list_lru *lru; 553 554 mutex_lock(&list_lrus_mutex); 555 list_for_each_entry(lru, &memcg_list_lrus, list) 556 memcg_drain_list_lru(lru, src_idx, dst_memcg); 557 mutex_unlock(&list_lrus_mutex); 558 } 559 #else 560 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 561 { 562 return 0; 563 } 564 565 static void memcg_destroy_list_lru(struct list_lru *lru) 566 { 567 } 568 #endif /* CONFIG_MEMCG_KMEM */ 569 570 int __list_lru_init(struct list_lru *lru, bool memcg_aware, 571 struct lock_class_key *key, struct shrinker *shrinker) 572 { 573 int i; 574 int err = -ENOMEM; 575 576 #ifdef CONFIG_MEMCG_KMEM 577 if (shrinker) 578 lru->shrinker_id = shrinker->id; 579 else 580 lru->shrinker_id = -1; 581 #endif 582 memcg_get_cache_ids(); 583 584 lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL); 585 if (!lru->node) 586 goto out; 587 588 for_each_node(i) { 589 spin_lock_init(&lru->node[i].lock); 590 if (key) 591 lockdep_set_class(&lru->node[i].lock, key); 592 init_one_lru(&lru->node[i].lru); 593 } 594 595 err = memcg_init_list_lru(lru, memcg_aware); 596 if (err) { 597 kfree(lru->node); 598 /* Do this so a list_lru_destroy() doesn't crash: */ 599 lru->node = NULL; 600 goto out; 601 } 602 603 list_lru_register(lru); 604 out: 605 memcg_put_cache_ids(); 606 return err; 607 } 608 EXPORT_SYMBOL_GPL(__list_lru_init); 609 610 void list_lru_destroy(struct list_lru *lru) 611 { 612 /* Already destroyed or not yet initialized? */ 613 if (!lru->node) 614 return; 615 616 memcg_get_cache_ids(); 617 618 list_lru_unregister(lru); 619 620 memcg_destroy_list_lru(lru); 621 kfree(lru->node); 622 lru->node = NULL; 623 624 #ifdef CONFIG_MEMCG_KMEM 625 lru->shrinker_id = -1; 626 #endif 627 memcg_put_cache_ids(); 628 } 629 EXPORT_SYMBOL_GPL(list_lru_destroy); 630