1 /* 2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 3 * Authors: David Chinner and Glauber Costa 4 * 5 * Generic LRU infrastructure 6 */ 7 #include <linux/kernel.h> 8 #include <linux/module.h> 9 #include <linux/mm.h> 10 #include <linux/list_lru.h> 11 #include <linux/slab.h> 12 #include <linux/mutex.h> 13 #include <linux/memcontrol.h> 14 15 #ifdef CONFIG_MEMCG_KMEM 16 static LIST_HEAD(list_lrus); 17 static DEFINE_MUTEX(list_lrus_mutex); 18 19 static void list_lru_register(struct list_lru *lru) 20 { 21 mutex_lock(&list_lrus_mutex); 22 list_add(&lru->list, &list_lrus); 23 mutex_unlock(&list_lrus_mutex); 24 } 25 26 static void list_lru_unregister(struct list_lru *lru) 27 { 28 mutex_lock(&list_lrus_mutex); 29 list_del(&lru->list); 30 mutex_unlock(&list_lrus_mutex); 31 } 32 33 static int lru_shrinker_id(struct list_lru *lru) 34 { 35 return lru->shrinker_id; 36 } 37 38 static inline bool list_lru_memcg_aware(struct list_lru *lru) 39 { 40 /* 41 * This needs node 0 to be always present, even 42 * in the systems supporting sparse numa ids. 43 */ 44 return !!lru->node[0].memcg_lrus; 45 } 46 47 static inline struct list_lru_one * 48 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 49 { 50 struct list_lru_memcg *memcg_lrus; 51 /* 52 * Either lock or RCU protects the array of per cgroup lists 53 * from relocation (see memcg_update_list_lru_node). 54 */ 55 memcg_lrus = rcu_dereference_check(nlru->memcg_lrus, 56 lockdep_is_held(&nlru->lock)); 57 if (memcg_lrus && idx >= 0) 58 return memcg_lrus->lru[idx]; 59 return &nlru->lru; 60 } 61 62 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) 63 { 64 struct page *page; 65 66 if (!memcg_kmem_enabled()) 67 return NULL; 68 page = virt_to_head_page(ptr); 69 return page->mem_cgroup; 70 } 71 72 static inline struct list_lru_one * 73 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr, 74 struct mem_cgroup **memcg_ptr) 75 { 76 struct list_lru_one *l = &nlru->lru; 77 struct mem_cgroup *memcg = NULL; 78 79 if (!nlru->memcg_lrus) 80 goto out; 81 82 memcg = mem_cgroup_from_kmem(ptr); 83 if (!memcg) 84 goto out; 85 86 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 87 out: 88 if (memcg_ptr) 89 *memcg_ptr = memcg; 90 return l; 91 } 92 #else 93 static void list_lru_register(struct list_lru *lru) 94 { 95 } 96 97 static void list_lru_unregister(struct list_lru *lru) 98 { 99 } 100 101 static int lru_shrinker_id(struct list_lru *lru) 102 { 103 return -1; 104 } 105 106 static inline bool list_lru_memcg_aware(struct list_lru *lru) 107 { 108 return false; 109 } 110 111 static inline struct list_lru_one * 112 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 113 { 114 return &nlru->lru; 115 } 116 117 static inline struct list_lru_one * 118 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr, 119 struct mem_cgroup **memcg_ptr) 120 { 121 if (memcg_ptr) 122 *memcg_ptr = NULL; 123 return &nlru->lru; 124 } 125 #endif /* CONFIG_MEMCG_KMEM */ 126 127 bool list_lru_add(struct list_lru *lru, struct list_head *item) 128 { 129 int nid = page_to_nid(virt_to_page(item)); 130 struct list_lru_node *nlru = &lru->node[nid]; 131 struct mem_cgroup *memcg; 132 struct list_lru_one *l; 133 134 spin_lock(&nlru->lock); 135 if (list_empty(item)) { 136 l = list_lru_from_kmem(nlru, item, &memcg); 137 list_add_tail(item, &l->list); 138 /* Set shrinker bit if the first element was added */ 139 if (!l->nr_items++) 140 memcg_set_shrinker_bit(memcg, nid, 141 lru_shrinker_id(lru)); 142 nlru->nr_items++; 143 spin_unlock(&nlru->lock); 144 return true; 145 } 146 spin_unlock(&nlru->lock); 147 return false; 148 } 149 EXPORT_SYMBOL_GPL(list_lru_add); 150 151 bool list_lru_del(struct list_lru *lru, struct list_head *item) 152 { 153 int nid = page_to_nid(virt_to_page(item)); 154 struct list_lru_node *nlru = &lru->node[nid]; 155 struct list_lru_one *l; 156 157 spin_lock(&nlru->lock); 158 if (!list_empty(item)) { 159 l = list_lru_from_kmem(nlru, item, NULL); 160 list_del_init(item); 161 l->nr_items--; 162 nlru->nr_items--; 163 spin_unlock(&nlru->lock); 164 return true; 165 } 166 spin_unlock(&nlru->lock); 167 return false; 168 } 169 EXPORT_SYMBOL_GPL(list_lru_del); 170 171 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 172 { 173 list_del_init(item); 174 list->nr_items--; 175 } 176 EXPORT_SYMBOL_GPL(list_lru_isolate); 177 178 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 179 struct list_head *head) 180 { 181 list_move(item, head); 182 list->nr_items--; 183 } 184 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 185 186 unsigned long list_lru_count_one(struct list_lru *lru, 187 int nid, struct mem_cgroup *memcg) 188 { 189 struct list_lru_node *nlru = &lru->node[nid]; 190 struct list_lru_one *l; 191 unsigned long count; 192 193 rcu_read_lock(); 194 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 195 count = l->nr_items; 196 rcu_read_unlock(); 197 198 return count; 199 } 200 EXPORT_SYMBOL_GPL(list_lru_count_one); 201 202 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 203 { 204 struct list_lru_node *nlru; 205 206 nlru = &lru->node[nid]; 207 return nlru->nr_items; 208 } 209 EXPORT_SYMBOL_GPL(list_lru_count_node); 210 211 static unsigned long 212 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, 213 list_lru_walk_cb isolate, void *cb_arg, 214 unsigned long *nr_to_walk) 215 { 216 217 struct list_lru_node *nlru = &lru->node[nid]; 218 struct list_lru_one *l; 219 struct list_head *item, *n; 220 unsigned long isolated = 0; 221 222 spin_lock(&nlru->lock); 223 l = list_lru_from_memcg_idx(nlru, memcg_idx); 224 restart: 225 list_for_each_safe(item, n, &l->list) { 226 enum lru_status ret; 227 228 /* 229 * decrement nr_to_walk first so that we don't livelock if we 230 * get stuck on large numbesr of LRU_RETRY items 231 */ 232 if (!*nr_to_walk) 233 break; 234 --*nr_to_walk; 235 236 ret = isolate(item, l, &nlru->lock, cb_arg); 237 switch (ret) { 238 case LRU_REMOVED_RETRY: 239 assert_spin_locked(&nlru->lock); 240 /* fall through */ 241 case LRU_REMOVED: 242 isolated++; 243 nlru->nr_items--; 244 /* 245 * If the lru lock has been dropped, our list 246 * traversal is now invalid and so we have to 247 * restart from scratch. 248 */ 249 if (ret == LRU_REMOVED_RETRY) 250 goto restart; 251 break; 252 case LRU_ROTATE: 253 list_move_tail(item, &l->list); 254 break; 255 case LRU_SKIP: 256 break; 257 case LRU_RETRY: 258 /* 259 * The lru lock has been dropped, our list traversal is 260 * now invalid and so we have to restart from scratch. 261 */ 262 assert_spin_locked(&nlru->lock); 263 goto restart; 264 default: 265 BUG(); 266 } 267 } 268 269 spin_unlock(&nlru->lock); 270 return isolated; 271 } 272 273 unsigned long 274 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 275 list_lru_walk_cb isolate, void *cb_arg, 276 unsigned long *nr_to_walk) 277 { 278 return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), 279 isolate, cb_arg, nr_to_walk); 280 } 281 EXPORT_SYMBOL_GPL(list_lru_walk_one); 282 283 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 284 list_lru_walk_cb isolate, void *cb_arg, 285 unsigned long *nr_to_walk) 286 { 287 long isolated = 0; 288 int memcg_idx; 289 290 isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg, 291 nr_to_walk); 292 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 293 for_each_memcg_cache_index(memcg_idx) { 294 isolated += __list_lru_walk_one(lru, nid, memcg_idx, 295 isolate, cb_arg, nr_to_walk); 296 if (*nr_to_walk <= 0) 297 break; 298 } 299 } 300 return isolated; 301 } 302 EXPORT_SYMBOL_GPL(list_lru_walk_node); 303 304 static void init_one_lru(struct list_lru_one *l) 305 { 306 INIT_LIST_HEAD(&l->list); 307 l->nr_items = 0; 308 } 309 310 #ifdef CONFIG_MEMCG_KMEM 311 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, 312 int begin, int end) 313 { 314 int i; 315 316 for (i = begin; i < end; i++) 317 kfree(memcg_lrus->lru[i]); 318 } 319 320 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, 321 int begin, int end) 322 { 323 int i; 324 325 for (i = begin; i < end; i++) { 326 struct list_lru_one *l; 327 328 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); 329 if (!l) 330 goto fail; 331 332 init_one_lru(l); 333 memcg_lrus->lru[i] = l; 334 } 335 return 0; 336 fail: 337 __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); 338 return -ENOMEM; 339 } 340 341 static int memcg_init_list_lru_node(struct list_lru_node *nlru) 342 { 343 struct list_lru_memcg *memcg_lrus; 344 int size = memcg_nr_cache_ids; 345 346 memcg_lrus = kvmalloc(sizeof(*memcg_lrus) + 347 size * sizeof(void *), GFP_KERNEL); 348 if (!memcg_lrus) 349 return -ENOMEM; 350 351 if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) { 352 kvfree(memcg_lrus); 353 return -ENOMEM; 354 } 355 RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus); 356 357 return 0; 358 } 359 360 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) 361 { 362 struct list_lru_memcg *memcg_lrus; 363 /* 364 * This is called when shrinker has already been unregistered, 365 * and nobody can use it. So, there is no need to use kvfree_rcu(). 366 */ 367 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true); 368 __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids); 369 kvfree(memcg_lrus); 370 } 371 372 static void kvfree_rcu(struct rcu_head *head) 373 { 374 struct list_lru_memcg *mlru; 375 376 mlru = container_of(head, struct list_lru_memcg, rcu); 377 kvfree(mlru); 378 } 379 380 static int memcg_update_list_lru_node(struct list_lru_node *nlru, 381 int old_size, int new_size) 382 { 383 struct list_lru_memcg *old, *new; 384 385 BUG_ON(old_size > new_size); 386 387 old = rcu_dereference_protected(nlru->memcg_lrus, 388 lockdep_is_held(&list_lrus_mutex)); 389 new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL); 390 if (!new) 391 return -ENOMEM; 392 393 if (__memcg_init_list_lru_node(new, old_size, new_size)) { 394 kvfree(new); 395 return -ENOMEM; 396 } 397 398 memcpy(&new->lru, &old->lru, old_size * sizeof(void *)); 399 400 /* 401 * The locking below allows readers that hold nlru->lock avoid taking 402 * rcu_read_lock (see list_lru_from_memcg_idx). 403 * 404 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 405 * we have to use IRQ-safe primitives here to avoid deadlock. 406 */ 407 spin_lock_irq(&nlru->lock); 408 rcu_assign_pointer(nlru->memcg_lrus, new); 409 spin_unlock_irq(&nlru->lock); 410 411 call_rcu(&old->rcu, kvfree_rcu); 412 return 0; 413 } 414 415 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, 416 int old_size, int new_size) 417 { 418 struct list_lru_memcg *memcg_lrus; 419 420 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, 421 lockdep_is_held(&list_lrus_mutex)); 422 /* do not bother shrinking the array back to the old size, because we 423 * cannot handle allocation failures here */ 424 __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size); 425 } 426 427 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 428 { 429 int i; 430 431 if (!memcg_aware) 432 return 0; 433 434 for_each_node(i) { 435 if (memcg_init_list_lru_node(&lru->node[i])) 436 goto fail; 437 } 438 return 0; 439 fail: 440 for (i = i - 1; i >= 0; i--) { 441 if (!lru->node[i].memcg_lrus) 442 continue; 443 memcg_destroy_list_lru_node(&lru->node[i]); 444 } 445 return -ENOMEM; 446 } 447 448 static void memcg_destroy_list_lru(struct list_lru *lru) 449 { 450 int i; 451 452 if (!list_lru_memcg_aware(lru)) 453 return; 454 455 for_each_node(i) 456 memcg_destroy_list_lru_node(&lru->node[i]); 457 } 458 459 static int memcg_update_list_lru(struct list_lru *lru, 460 int old_size, int new_size) 461 { 462 int i; 463 464 if (!list_lru_memcg_aware(lru)) 465 return 0; 466 467 for_each_node(i) { 468 if (memcg_update_list_lru_node(&lru->node[i], 469 old_size, new_size)) 470 goto fail; 471 } 472 return 0; 473 fail: 474 for (i = i - 1; i >= 0; i--) { 475 if (!lru->node[i].memcg_lrus) 476 continue; 477 478 memcg_cancel_update_list_lru_node(&lru->node[i], 479 old_size, new_size); 480 } 481 return -ENOMEM; 482 } 483 484 static void memcg_cancel_update_list_lru(struct list_lru *lru, 485 int old_size, int new_size) 486 { 487 int i; 488 489 if (!list_lru_memcg_aware(lru)) 490 return; 491 492 for_each_node(i) 493 memcg_cancel_update_list_lru_node(&lru->node[i], 494 old_size, new_size); 495 } 496 497 int memcg_update_all_list_lrus(int new_size) 498 { 499 int ret = 0; 500 struct list_lru *lru; 501 int old_size = memcg_nr_cache_ids; 502 503 mutex_lock(&list_lrus_mutex); 504 list_for_each_entry(lru, &list_lrus, list) { 505 ret = memcg_update_list_lru(lru, old_size, new_size); 506 if (ret) 507 goto fail; 508 } 509 out: 510 mutex_unlock(&list_lrus_mutex); 511 return ret; 512 fail: 513 list_for_each_entry_continue_reverse(lru, &list_lrus, list) 514 memcg_cancel_update_list_lru(lru, old_size, new_size); 515 goto out; 516 } 517 518 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid, 519 int src_idx, struct mem_cgroup *dst_memcg) 520 { 521 struct list_lru_node *nlru = &lru->node[nid]; 522 int dst_idx = dst_memcg->kmemcg_id; 523 struct list_lru_one *src, *dst; 524 bool set; 525 526 /* 527 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 528 * we have to use IRQ-safe primitives here to avoid deadlock. 529 */ 530 spin_lock_irq(&nlru->lock); 531 532 src = list_lru_from_memcg_idx(nlru, src_idx); 533 dst = list_lru_from_memcg_idx(nlru, dst_idx); 534 535 list_splice_init(&src->list, &dst->list); 536 set = (!dst->nr_items && src->nr_items); 537 dst->nr_items += src->nr_items; 538 if (set) 539 memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru)); 540 src->nr_items = 0; 541 542 spin_unlock_irq(&nlru->lock); 543 } 544 545 static void memcg_drain_list_lru(struct list_lru *lru, 546 int src_idx, struct mem_cgroup *dst_memcg) 547 { 548 int i; 549 550 if (!list_lru_memcg_aware(lru)) 551 return; 552 553 for_each_node(i) 554 memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg); 555 } 556 557 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg) 558 { 559 struct list_lru *lru; 560 561 mutex_lock(&list_lrus_mutex); 562 list_for_each_entry(lru, &list_lrus, list) 563 memcg_drain_list_lru(lru, src_idx, dst_memcg); 564 mutex_unlock(&list_lrus_mutex); 565 } 566 #else 567 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 568 { 569 return 0; 570 } 571 572 static void memcg_destroy_list_lru(struct list_lru *lru) 573 { 574 } 575 #endif /* CONFIG_MEMCG_KMEM */ 576 577 int __list_lru_init(struct list_lru *lru, bool memcg_aware, 578 struct lock_class_key *key, struct shrinker *shrinker) 579 { 580 int i; 581 size_t size = sizeof(*lru->node) * nr_node_ids; 582 int err = -ENOMEM; 583 584 #ifdef CONFIG_MEMCG_KMEM 585 if (shrinker) 586 lru->shrinker_id = shrinker->id; 587 else 588 lru->shrinker_id = -1; 589 #endif 590 memcg_get_cache_ids(); 591 592 lru->node = kzalloc(size, GFP_KERNEL); 593 if (!lru->node) 594 goto out; 595 596 for_each_node(i) { 597 spin_lock_init(&lru->node[i].lock); 598 if (key) 599 lockdep_set_class(&lru->node[i].lock, key); 600 init_one_lru(&lru->node[i].lru); 601 } 602 603 err = memcg_init_list_lru(lru, memcg_aware); 604 if (err) { 605 kfree(lru->node); 606 /* Do this so a list_lru_destroy() doesn't crash: */ 607 lru->node = NULL; 608 goto out; 609 } 610 611 list_lru_register(lru); 612 out: 613 memcg_put_cache_ids(); 614 return err; 615 } 616 EXPORT_SYMBOL_GPL(__list_lru_init); 617 618 void list_lru_destroy(struct list_lru *lru) 619 { 620 /* Already destroyed or not yet initialized? */ 621 if (!lru->node) 622 return; 623 624 memcg_get_cache_ids(); 625 626 list_lru_unregister(lru); 627 628 memcg_destroy_list_lru(lru); 629 kfree(lru->node); 630 lru->node = NULL; 631 632 #ifdef CONFIG_MEMCG_KMEM 633 lru->shrinker_id = -1; 634 #endif 635 memcg_put_cache_ids(); 636 } 637 EXPORT_SYMBOL_GPL(list_lru_destroy); 638