1 /* 2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 3 * Authors: David Chinner and Glauber Costa 4 * 5 * Generic LRU infrastructure 6 */ 7 #include <linux/kernel.h> 8 #include <linux/module.h> 9 #include <linux/mm.h> 10 #include <linux/list_lru.h> 11 #include <linux/slab.h> 12 #include <linux/mutex.h> 13 #include <linux/memcontrol.h> 14 15 #ifdef CONFIG_MEMCG_KMEM 16 static LIST_HEAD(list_lrus); 17 static DEFINE_MUTEX(list_lrus_mutex); 18 19 static void list_lru_register(struct list_lru *lru) 20 { 21 mutex_lock(&list_lrus_mutex); 22 list_add(&lru->list, &list_lrus); 23 mutex_unlock(&list_lrus_mutex); 24 } 25 26 static void list_lru_unregister(struct list_lru *lru) 27 { 28 mutex_lock(&list_lrus_mutex); 29 list_del(&lru->list); 30 mutex_unlock(&list_lrus_mutex); 31 } 32 33 static int lru_shrinker_id(struct list_lru *lru) 34 { 35 return lru->shrinker_id; 36 } 37 38 static inline bool list_lru_memcg_aware(struct list_lru *lru) 39 { 40 /* 41 * This needs node 0 to be always present, even 42 * in the systems supporting sparse numa ids. 43 */ 44 return !!lru->node[0].memcg_lrus; 45 } 46 47 static inline struct list_lru_one * 48 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 49 { 50 struct list_lru_memcg *memcg_lrus; 51 /* 52 * Either lock or RCU protects the array of per cgroup lists 53 * from relocation (see memcg_update_list_lru_node). 54 */ 55 memcg_lrus = rcu_dereference_check(nlru->memcg_lrus, 56 lockdep_is_held(&nlru->lock)); 57 if (memcg_lrus && idx >= 0) 58 return memcg_lrus->lru[idx]; 59 return &nlru->lru; 60 } 61 62 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) 63 { 64 struct page *page; 65 66 if (!memcg_kmem_enabled()) 67 return NULL; 68 page = virt_to_head_page(ptr); 69 return page->mem_cgroup; 70 } 71 72 static inline struct list_lru_one * 73 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr, 74 struct mem_cgroup **memcg_ptr) 75 { 76 struct list_lru_one *l = &nlru->lru; 77 struct mem_cgroup *memcg = NULL; 78 79 if (!nlru->memcg_lrus) 80 goto out; 81 82 memcg = mem_cgroup_from_kmem(ptr); 83 if (!memcg) 84 goto out; 85 86 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 87 out: 88 if (memcg_ptr) 89 *memcg_ptr = memcg; 90 return l; 91 } 92 #else 93 static void list_lru_register(struct list_lru *lru) 94 { 95 } 96 97 static void list_lru_unregister(struct list_lru *lru) 98 { 99 } 100 101 static int lru_shrinker_id(struct list_lru *lru) 102 { 103 return -1; 104 } 105 106 static inline bool list_lru_memcg_aware(struct list_lru *lru) 107 { 108 return false; 109 } 110 111 static inline struct list_lru_one * 112 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 113 { 114 return &nlru->lru; 115 } 116 117 static inline struct list_lru_one * 118 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr, 119 struct mem_cgroup **memcg_ptr) 120 { 121 if (memcg_ptr) 122 *memcg_ptr = NULL; 123 return &nlru->lru; 124 } 125 #endif /* CONFIG_MEMCG_KMEM */ 126 127 bool list_lru_add(struct list_lru *lru, struct list_head *item) 128 { 129 int nid = page_to_nid(virt_to_page(item)); 130 struct list_lru_node *nlru = &lru->node[nid]; 131 struct mem_cgroup *memcg; 132 struct list_lru_one *l; 133 134 spin_lock(&nlru->lock); 135 if (list_empty(item)) { 136 l = list_lru_from_kmem(nlru, item, &memcg); 137 list_add_tail(item, &l->list); 138 /* Set shrinker bit if the first element was added */ 139 if (!l->nr_items++) 140 memcg_set_shrinker_bit(memcg, nid, 141 lru_shrinker_id(lru)); 142 nlru->nr_items++; 143 spin_unlock(&nlru->lock); 144 return true; 145 } 146 spin_unlock(&nlru->lock); 147 return false; 148 } 149 EXPORT_SYMBOL_GPL(list_lru_add); 150 151 bool list_lru_del(struct list_lru *lru, struct list_head *item) 152 { 153 int nid = page_to_nid(virt_to_page(item)); 154 struct list_lru_node *nlru = &lru->node[nid]; 155 struct list_lru_one *l; 156 157 spin_lock(&nlru->lock); 158 if (!list_empty(item)) { 159 l = list_lru_from_kmem(nlru, item, NULL); 160 list_del_init(item); 161 l->nr_items--; 162 nlru->nr_items--; 163 spin_unlock(&nlru->lock); 164 return true; 165 } 166 spin_unlock(&nlru->lock); 167 return false; 168 } 169 EXPORT_SYMBOL_GPL(list_lru_del); 170 171 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 172 { 173 list_del_init(item); 174 list->nr_items--; 175 } 176 EXPORT_SYMBOL_GPL(list_lru_isolate); 177 178 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 179 struct list_head *head) 180 { 181 list_move(item, head); 182 list->nr_items--; 183 } 184 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 185 186 unsigned long list_lru_count_one(struct list_lru *lru, 187 int nid, struct mem_cgroup *memcg) 188 { 189 struct list_lru_node *nlru = &lru->node[nid]; 190 struct list_lru_one *l; 191 unsigned long count; 192 193 rcu_read_lock(); 194 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 195 count = l->nr_items; 196 rcu_read_unlock(); 197 198 return count; 199 } 200 EXPORT_SYMBOL_GPL(list_lru_count_one); 201 202 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 203 { 204 struct list_lru_node *nlru; 205 206 nlru = &lru->node[nid]; 207 return nlru->nr_items; 208 } 209 EXPORT_SYMBOL_GPL(list_lru_count_node); 210 211 static unsigned long 212 __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx, 213 list_lru_walk_cb isolate, void *cb_arg, 214 unsigned long *nr_to_walk) 215 { 216 217 struct list_lru_one *l; 218 struct list_head *item, *n; 219 unsigned long isolated = 0; 220 221 l = list_lru_from_memcg_idx(nlru, memcg_idx); 222 restart: 223 list_for_each_safe(item, n, &l->list) { 224 enum lru_status ret; 225 226 /* 227 * decrement nr_to_walk first so that we don't livelock if we 228 * get stuck on large numbesr of LRU_RETRY items 229 */ 230 if (!*nr_to_walk) 231 break; 232 --*nr_to_walk; 233 234 ret = isolate(item, l, &nlru->lock, cb_arg); 235 switch (ret) { 236 case LRU_REMOVED_RETRY: 237 assert_spin_locked(&nlru->lock); 238 /* fall through */ 239 case LRU_REMOVED: 240 isolated++; 241 nlru->nr_items--; 242 /* 243 * If the lru lock has been dropped, our list 244 * traversal is now invalid and so we have to 245 * restart from scratch. 246 */ 247 if (ret == LRU_REMOVED_RETRY) 248 goto restart; 249 break; 250 case LRU_ROTATE: 251 list_move_tail(item, &l->list); 252 break; 253 case LRU_SKIP: 254 break; 255 case LRU_RETRY: 256 /* 257 * The lru lock has been dropped, our list traversal is 258 * now invalid and so we have to restart from scratch. 259 */ 260 assert_spin_locked(&nlru->lock); 261 goto restart; 262 default: 263 BUG(); 264 } 265 } 266 return isolated; 267 } 268 269 unsigned long 270 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 271 list_lru_walk_cb isolate, void *cb_arg, 272 unsigned long *nr_to_walk) 273 { 274 struct list_lru_node *nlru = &lru->node[nid]; 275 unsigned long ret; 276 277 spin_lock(&nlru->lock); 278 ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg, 279 nr_to_walk); 280 spin_unlock(&nlru->lock); 281 return ret; 282 } 283 EXPORT_SYMBOL_GPL(list_lru_walk_one); 284 285 unsigned long 286 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 287 list_lru_walk_cb isolate, void *cb_arg, 288 unsigned long *nr_to_walk) 289 { 290 struct list_lru_node *nlru = &lru->node[nid]; 291 unsigned long ret; 292 293 spin_lock_irq(&nlru->lock); 294 ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg, 295 nr_to_walk); 296 spin_unlock_irq(&nlru->lock); 297 return ret; 298 } 299 300 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 301 list_lru_walk_cb isolate, void *cb_arg, 302 unsigned long *nr_to_walk) 303 { 304 long isolated = 0; 305 int memcg_idx; 306 307 isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg, 308 nr_to_walk); 309 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 310 for_each_memcg_cache_index(memcg_idx) { 311 struct list_lru_node *nlru = &lru->node[nid]; 312 313 spin_lock(&nlru->lock); 314 isolated += __list_lru_walk_one(nlru, memcg_idx, 315 isolate, cb_arg, 316 nr_to_walk); 317 spin_unlock(&nlru->lock); 318 319 if (*nr_to_walk <= 0) 320 break; 321 } 322 } 323 return isolated; 324 } 325 EXPORT_SYMBOL_GPL(list_lru_walk_node); 326 327 static void init_one_lru(struct list_lru_one *l) 328 { 329 INIT_LIST_HEAD(&l->list); 330 l->nr_items = 0; 331 } 332 333 #ifdef CONFIG_MEMCG_KMEM 334 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, 335 int begin, int end) 336 { 337 int i; 338 339 for (i = begin; i < end; i++) 340 kfree(memcg_lrus->lru[i]); 341 } 342 343 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, 344 int begin, int end) 345 { 346 int i; 347 348 for (i = begin; i < end; i++) { 349 struct list_lru_one *l; 350 351 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); 352 if (!l) 353 goto fail; 354 355 init_one_lru(l); 356 memcg_lrus->lru[i] = l; 357 } 358 return 0; 359 fail: 360 __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); 361 return -ENOMEM; 362 } 363 364 static int memcg_init_list_lru_node(struct list_lru_node *nlru) 365 { 366 struct list_lru_memcg *memcg_lrus; 367 int size = memcg_nr_cache_ids; 368 369 memcg_lrus = kvmalloc(sizeof(*memcg_lrus) + 370 size * sizeof(void *), GFP_KERNEL); 371 if (!memcg_lrus) 372 return -ENOMEM; 373 374 if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) { 375 kvfree(memcg_lrus); 376 return -ENOMEM; 377 } 378 RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus); 379 380 return 0; 381 } 382 383 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) 384 { 385 struct list_lru_memcg *memcg_lrus; 386 /* 387 * This is called when shrinker has already been unregistered, 388 * and nobody can use it. So, there is no need to use kvfree_rcu(). 389 */ 390 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true); 391 __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids); 392 kvfree(memcg_lrus); 393 } 394 395 static void kvfree_rcu(struct rcu_head *head) 396 { 397 struct list_lru_memcg *mlru; 398 399 mlru = container_of(head, struct list_lru_memcg, rcu); 400 kvfree(mlru); 401 } 402 403 static int memcg_update_list_lru_node(struct list_lru_node *nlru, 404 int old_size, int new_size) 405 { 406 struct list_lru_memcg *old, *new; 407 408 BUG_ON(old_size > new_size); 409 410 old = rcu_dereference_protected(nlru->memcg_lrus, 411 lockdep_is_held(&list_lrus_mutex)); 412 new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL); 413 if (!new) 414 return -ENOMEM; 415 416 if (__memcg_init_list_lru_node(new, old_size, new_size)) { 417 kvfree(new); 418 return -ENOMEM; 419 } 420 421 memcpy(&new->lru, &old->lru, old_size * sizeof(void *)); 422 423 /* 424 * The locking below allows readers that hold nlru->lock avoid taking 425 * rcu_read_lock (see list_lru_from_memcg_idx). 426 * 427 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 428 * we have to use IRQ-safe primitives here to avoid deadlock. 429 */ 430 spin_lock_irq(&nlru->lock); 431 rcu_assign_pointer(nlru->memcg_lrus, new); 432 spin_unlock_irq(&nlru->lock); 433 434 call_rcu(&old->rcu, kvfree_rcu); 435 return 0; 436 } 437 438 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, 439 int old_size, int new_size) 440 { 441 struct list_lru_memcg *memcg_lrus; 442 443 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, 444 lockdep_is_held(&list_lrus_mutex)); 445 /* do not bother shrinking the array back to the old size, because we 446 * cannot handle allocation failures here */ 447 __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size); 448 } 449 450 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 451 { 452 int i; 453 454 if (!memcg_aware) 455 return 0; 456 457 for_each_node(i) { 458 if (memcg_init_list_lru_node(&lru->node[i])) 459 goto fail; 460 } 461 return 0; 462 fail: 463 for (i = i - 1; i >= 0; i--) { 464 if (!lru->node[i].memcg_lrus) 465 continue; 466 memcg_destroy_list_lru_node(&lru->node[i]); 467 } 468 return -ENOMEM; 469 } 470 471 static void memcg_destroy_list_lru(struct list_lru *lru) 472 { 473 int i; 474 475 if (!list_lru_memcg_aware(lru)) 476 return; 477 478 for_each_node(i) 479 memcg_destroy_list_lru_node(&lru->node[i]); 480 } 481 482 static int memcg_update_list_lru(struct list_lru *lru, 483 int old_size, int new_size) 484 { 485 int i; 486 487 if (!list_lru_memcg_aware(lru)) 488 return 0; 489 490 for_each_node(i) { 491 if (memcg_update_list_lru_node(&lru->node[i], 492 old_size, new_size)) 493 goto fail; 494 } 495 return 0; 496 fail: 497 for (i = i - 1; i >= 0; i--) { 498 if (!lru->node[i].memcg_lrus) 499 continue; 500 501 memcg_cancel_update_list_lru_node(&lru->node[i], 502 old_size, new_size); 503 } 504 return -ENOMEM; 505 } 506 507 static void memcg_cancel_update_list_lru(struct list_lru *lru, 508 int old_size, int new_size) 509 { 510 int i; 511 512 if (!list_lru_memcg_aware(lru)) 513 return; 514 515 for_each_node(i) 516 memcg_cancel_update_list_lru_node(&lru->node[i], 517 old_size, new_size); 518 } 519 520 int memcg_update_all_list_lrus(int new_size) 521 { 522 int ret = 0; 523 struct list_lru *lru; 524 int old_size = memcg_nr_cache_ids; 525 526 mutex_lock(&list_lrus_mutex); 527 list_for_each_entry(lru, &list_lrus, list) { 528 ret = memcg_update_list_lru(lru, old_size, new_size); 529 if (ret) 530 goto fail; 531 } 532 out: 533 mutex_unlock(&list_lrus_mutex); 534 return ret; 535 fail: 536 list_for_each_entry_continue_reverse(lru, &list_lrus, list) 537 memcg_cancel_update_list_lru(lru, old_size, new_size); 538 goto out; 539 } 540 541 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid, 542 int src_idx, struct mem_cgroup *dst_memcg) 543 { 544 struct list_lru_node *nlru = &lru->node[nid]; 545 int dst_idx = dst_memcg->kmemcg_id; 546 struct list_lru_one *src, *dst; 547 bool set; 548 549 /* 550 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 551 * we have to use IRQ-safe primitives here to avoid deadlock. 552 */ 553 spin_lock_irq(&nlru->lock); 554 555 src = list_lru_from_memcg_idx(nlru, src_idx); 556 dst = list_lru_from_memcg_idx(nlru, dst_idx); 557 558 list_splice_init(&src->list, &dst->list); 559 set = (!dst->nr_items && src->nr_items); 560 dst->nr_items += src->nr_items; 561 if (set) 562 memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 563 src->nr_items = 0; 564 565 spin_unlock_irq(&nlru->lock); 566 } 567 568 static void memcg_drain_list_lru(struct list_lru *lru, 569 int src_idx, struct mem_cgroup *dst_memcg) 570 { 571 int i; 572 573 if (!list_lru_memcg_aware(lru)) 574 return; 575 576 for_each_node(i) 577 memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg); 578 } 579 580 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg) 581 { 582 struct list_lru *lru; 583 584 mutex_lock(&list_lrus_mutex); 585 list_for_each_entry(lru, &list_lrus, list) 586 memcg_drain_list_lru(lru, src_idx, dst_memcg); 587 mutex_unlock(&list_lrus_mutex); 588 } 589 #else 590 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 591 { 592 return 0; 593 } 594 595 static void memcg_destroy_list_lru(struct list_lru *lru) 596 { 597 } 598 #endif /* CONFIG_MEMCG_KMEM */ 599 600 int __list_lru_init(struct list_lru *lru, bool memcg_aware, 601 struct lock_class_key *key, struct shrinker *shrinker) 602 { 603 int i; 604 size_t size = sizeof(*lru->node) * nr_node_ids; 605 int err = -ENOMEM; 606 607 #ifdef CONFIG_MEMCG_KMEM 608 if (shrinker) 609 lru->shrinker_id = shrinker->id; 610 else 611 lru->shrinker_id = -1; 612 #endif 613 memcg_get_cache_ids(); 614 615 lru->node = kzalloc(size, GFP_KERNEL); 616 if (!lru->node) 617 goto out; 618 619 for_each_node(i) { 620 spin_lock_init(&lru->node[i].lock); 621 if (key) 622 lockdep_set_class(&lru->node[i].lock, key); 623 init_one_lru(&lru->node[i].lru); 624 } 625 626 err = memcg_init_list_lru(lru, memcg_aware); 627 if (err) { 628 kfree(lru->node); 629 /* Do this so a list_lru_destroy() doesn't crash: */ 630 lru->node = NULL; 631 goto out; 632 } 633 634 list_lru_register(lru); 635 out: 636 memcg_put_cache_ids(); 637 return err; 638 } 639 EXPORT_SYMBOL_GPL(__list_lru_init); 640 641 void list_lru_destroy(struct list_lru *lru) 642 { 643 /* Already destroyed or not yet initialized? */ 644 if (!lru->node) 645 return; 646 647 memcg_get_cache_ids(); 648 649 list_lru_unregister(lru); 650 651 memcg_destroy_list_lru(lru); 652 kfree(lru->node); 653 lru->node = NULL; 654 655 #ifdef CONFIG_MEMCG_KMEM 656 lru->shrinker_id = -1; 657 #endif 658 memcg_put_cache_ids(); 659 } 660 EXPORT_SYMBOL_GPL(list_lru_destroy); 661