1 /* 2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 3 * Authors: David Chinner and Glauber Costa 4 * 5 * Generic LRU infrastructure 6 */ 7 #include <linux/kernel.h> 8 #include <linux/module.h> 9 #include <linux/mm.h> 10 #include <linux/list_lru.h> 11 #include <linux/slab.h> 12 #include <linux/mutex.h> 13 #include <linux/memcontrol.h> 14 15 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) 16 static LIST_HEAD(list_lrus); 17 static DEFINE_MUTEX(list_lrus_mutex); 18 19 static void list_lru_register(struct list_lru *lru) 20 { 21 mutex_lock(&list_lrus_mutex); 22 list_add(&lru->list, &list_lrus); 23 mutex_unlock(&list_lrus_mutex); 24 } 25 26 static void list_lru_unregister(struct list_lru *lru) 27 { 28 mutex_lock(&list_lrus_mutex); 29 list_del(&lru->list); 30 mutex_unlock(&list_lrus_mutex); 31 } 32 #else 33 static void list_lru_register(struct list_lru *lru) 34 { 35 } 36 37 static void list_lru_unregister(struct list_lru *lru) 38 { 39 } 40 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ 41 42 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) 43 static inline bool list_lru_memcg_aware(struct list_lru *lru) 44 { 45 /* 46 * This needs node 0 to be always present, even 47 * in the systems supporting sparse numa ids. 48 */ 49 return !!lru->node[0].memcg_lrus; 50 } 51 52 static inline struct list_lru_one * 53 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 54 { 55 struct list_lru_memcg *memcg_lrus; 56 /* 57 * Either lock or RCU protects the array of per cgroup lists 58 * from relocation (see memcg_update_list_lru_node). 59 */ 60 memcg_lrus = rcu_dereference_check(nlru->memcg_lrus, 61 lockdep_is_held(&nlru->lock)); 62 if (memcg_lrus && idx >= 0) 63 return memcg_lrus->lru[idx]; 64 return &nlru->lru; 65 } 66 67 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) 68 { 69 struct page *page; 70 71 if (!memcg_kmem_enabled()) 72 return NULL; 73 page = virt_to_head_page(ptr); 74 return page->mem_cgroup; 75 } 76 77 static inline struct list_lru_one * 78 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) 79 { 80 struct mem_cgroup *memcg; 81 82 if (!nlru->memcg_lrus) 83 return &nlru->lru; 84 85 memcg = mem_cgroup_from_kmem(ptr); 86 if (!memcg) 87 return &nlru->lru; 88 89 return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 90 } 91 #else 92 static inline bool list_lru_memcg_aware(struct list_lru *lru) 93 { 94 return false; 95 } 96 97 static inline struct list_lru_one * 98 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 99 { 100 return &nlru->lru; 101 } 102 103 static inline struct list_lru_one * 104 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) 105 { 106 return &nlru->lru; 107 } 108 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ 109 110 bool list_lru_add(struct list_lru *lru, struct list_head *item) 111 { 112 int nid = page_to_nid(virt_to_page(item)); 113 struct list_lru_node *nlru = &lru->node[nid]; 114 struct list_lru_one *l; 115 116 spin_lock(&nlru->lock); 117 if (list_empty(item)) { 118 l = list_lru_from_kmem(nlru, item); 119 list_add_tail(item, &l->list); 120 l->nr_items++; 121 nlru->nr_items++; 122 spin_unlock(&nlru->lock); 123 return true; 124 } 125 spin_unlock(&nlru->lock); 126 return false; 127 } 128 EXPORT_SYMBOL_GPL(list_lru_add); 129 130 bool list_lru_del(struct list_lru *lru, struct list_head *item) 131 { 132 int nid = page_to_nid(virt_to_page(item)); 133 struct list_lru_node *nlru = &lru->node[nid]; 134 struct list_lru_one *l; 135 136 spin_lock(&nlru->lock); 137 if (!list_empty(item)) { 138 l = list_lru_from_kmem(nlru, item); 139 list_del_init(item); 140 l->nr_items--; 141 nlru->nr_items--; 142 spin_unlock(&nlru->lock); 143 return true; 144 } 145 spin_unlock(&nlru->lock); 146 return false; 147 } 148 EXPORT_SYMBOL_GPL(list_lru_del); 149 150 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 151 { 152 list_del_init(item); 153 list->nr_items--; 154 } 155 EXPORT_SYMBOL_GPL(list_lru_isolate); 156 157 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 158 struct list_head *head) 159 { 160 list_move(item, head); 161 list->nr_items--; 162 } 163 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 164 165 static unsigned long __list_lru_count_one(struct list_lru *lru, 166 int nid, int memcg_idx) 167 { 168 struct list_lru_node *nlru = &lru->node[nid]; 169 struct list_lru_one *l; 170 unsigned long count; 171 172 rcu_read_lock(); 173 l = list_lru_from_memcg_idx(nlru, memcg_idx); 174 count = l->nr_items; 175 rcu_read_unlock(); 176 177 return count; 178 } 179 180 unsigned long list_lru_count_one(struct list_lru *lru, 181 int nid, struct mem_cgroup *memcg) 182 { 183 return __list_lru_count_one(lru, nid, memcg_cache_id(memcg)); 184 } 185 EXPORT_SYMBOL_GPL(list_lru_count_one); 186 187 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 188 { 189 struct list_lru_node *nlru; 190 191 nlru = &lru->node[nid]; 192 return nlru->nr_items; 193 } 194 EXPORT_SYMBOL_GPL(list_lru_count_node); 195 196 static unsigned long 197 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, 198 list_lru_walk_cb isolate, void *cb_arg, 199 unsigned long *nr_to_walk) 200 { 201 202 struct list_lru_node *nlru = &lru->node[nid]; 203 struct list_lru_one *l; 204 struct list_head *item, *n; 205 unsigned long isolated = 0; 206 207 spin_lock(&nlru->lock); 208 l = list_lru_from_memcg_idx(nlru, memcg_idx); 209 restart: 210 list_for_each_safe(item, n, &l->list) { 211 enum lru_status ret; 212 213 /* 214 * decrement nr_to_walk first so that we don't livelock if we 215 * get stuck on large numbesr of LRU_RETRY items 216 */ 217 if (!*nr_to_walk) 218 break; 219 --*nr_to_walk; 220 221 ret = isolate(item, l, &nlru->lock, cb_arg); 222 switch (ret) { 223 case LRU_REMOVED_RETRY: 224 assert_spin_locked(&nlru->lock); 225 /* fall through */ 226 case LRU_REMOVED: 227 isolated++; 228 nlru->nr_items--; 229 /* 230 * If the lru lock has been dropped, our list 231 * traversal is now invalid and so we have to 232 * restart from scratch. 233 */ 234 if (ret == LRU_REMOVED_RETRY) 235 goto restart; 236 break; 237 case LRU_ROTATE: 238 list_move_tail(item, &l->list); 239 break; 240 case LRU_SKIP: 241 break; 242 case LRU_RETRY: 243 /* 244 * The lru lock has been dropped, our list traversal is 245 * now invalid and so we have to restart from scratch. 246 */ 247 assert_spin_locked(&nlru->lock); 248 goto restart; 249 default: 250 BUG(); 251 } 252 } 253 254 spin_unlock(&nlru->lock); 255 return isolated; 256 } 257 258 unsigned long 259 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 260 list_lru_walk_cb isolate, void *cb_arg, 261 unsigned long *nr_to_walk) 262 { 263 return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), 264 isolate, cb_arg, nr_to_walk); 265 } 266 EXPORT_SYMBOL_GPL(list_lru_walk_one); 267 268 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 269 list_lru_walk_cb isolate, void *cb_arg, 270 unsigned long *nr_to_walk) 271 { 272 long isolated = 0; 273 int memcg_idx; 274 275 isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg, 276 nr_to_walk); 277 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 278 for_each_memcg_cache_index(memcg_idx) { 279 isolated += __list_lru_walk_one(lru, nid, memcg_idx, 280 isolate, cb_arg, nr_to_walk); 281 if (*nr_to_walk <= 0) 282 break; 283 } 284 } 285 return isolated; 286 } 287 EXPORT_SYMBOL_GPL(list_lru_walk_node); 288 289 static void init_one_lru(struct list_lru_one *l) 290 { 291 INIT_LIST_HEAD(&l->list); 292 l->nr_items = 0; 293 } 294 295 #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) 296 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, 297 int begin, int end) 298 { 299 int i; 300 301 for (i = begin; i < end; i++) 302 kfree(memcg_lrus->lru[i]); 303 } 304 305 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, 306 int begin, int end) 307 { 308 int i; 309 310 for (i = begin; i < end; i++) { 311 struct list_lru_one *l; 312 313 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); 314 if (!l) 315 goto fail; 316 317 init_one_lru(l); 318 memcg_lrus->lru[i] = l; 319 } 320 return 0; 321 fail: 322 __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); 323 return -ENOMEM; 324 } 325 326 static int memcg_init_list_lru_node(struct list_lru_node *nlru) 327 { 328 struct list_lru_memcg *memcg_lrus; 329 int size = memcg_nr_cache_ids; 330 331 memcg_lrus = kvmalloc(sizeof(*memcg_lrus) + 332 size * sizeof(void *), GFP_KERNEL); 333 if (!memcg_lrus) 334 return -ENOMEM; 335 336 if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) { 337 kvfree(memcg_lrus); 338 return -ENOMEM; 339 } 340 RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus); 341 342 return 0; 343 } 344 345 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) 346 { 347 struct list_lru_memcg *memcg_lrus; 348 /* 349 * This is called when shrinker has already been unregistered, 350 * and nobody can use it. So, there is no need to use kvfree_rcu(). 351 */ 352 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true); 353 __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids); 354 kvfree(memcg_lrus); 355 } 356 357 static void kvfree_rcu(struct rcu_head *head) 358 { 359 struct list_lru_memcg *mlru; 360 361 mlru = container_of(head, struct list_lru_memcg, rcu); 362 kvfree(mlru); 363 } 364 365 static int memcg_update_list_lru_node(struct list_lru_node *nlru, 366 int old_size, int new_size) 367 { 368 struct list_lru_memcg *old, *new; 369 370 BUG_ON(old_size > new_size); 371 372 old = rcu_dereference_protected(nlru->memcg_lrus, 373 lockdep_is_held(&list_lrus_mutex)); 374 new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL); 375 if (!new) 376 return -ENOMEM; 377 378 if (__memcg_init_list_lru_node(new, old_size, new_size)) { 379 kvfree(new); 380 return -ENOMEM; 381 } 382 383 memcpy(&new->lru, &old->lru, old_size * sizeof(void *)); 384 385 /* 386 * The locking below allows readers that hold nlru->lock avoid taking 387 * rcu_read_lock (see list_lru_from_memcg_idx). 388 * 389 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 390 * we have to use IRQ-safe primitives here to avoid deadlock. 391 */ 392 spin_lock_irq(&nlru->lock); 393 rcu_assign_pointer(nlru->memcg_lrus, new); 394 spin_unlock_irq(&nlru->lock); 395 396 call_rcu(&old->rcu, kvfree_rcu); 397 return 0; 398 } 399 400 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, 401 int old_size, int new_size) 402 { 403 struct list_lru_memcg *memcg_lrus; 404 405 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, 406 lockdep_is_held(&list_lrus_mutex)); 407 /* do not bother shrinking the array back to the old size, because we 408 * cannot handle allocation failures here */ 409 __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size); 410 } 411 412 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 413 { 414 int i; 415 416 if (!memcg_aware) 417 return 0; 418 419 for_each_node(i) { 420 if (memcg_init_list_lru_node(&lru->node[i])) 421 goto fail; 422 } 423 return 0; 424 fail: 425 for (i = i - 1; i >= 0; i--) { 426 if (!lru->node[i].memcg_lrus) 427 continue; 428 memcg_destroy_list_lru_node(&lru->node[i]); 429 } 430 return -ENOMEM; 431 } 432 433 static void memcg_destroy_list_lru(struct list_lru *lru) 434 { 435 int i; 436 437 if (!list_lru_memcg_aware(lru)) 438 return; 439 440 for_each_node(i) 441 memcg_destroy_list_lru_node(&lru->node[i]); 442 } 443 444 static int memcg_update_list_lru(struct list_lru *lru, 445 int old_size, int new_size) 446 { 447 int i; 448 449 if (!list_lru_memcg_aware(lru)) 450 return 0; 451 452 for_each_node(i) { 453 if (memcg_update_list_lru_node(&lru->node[i], 454 old_size, new_size)) 455 goto fail; 456 } 457 return 0; 458 fail: 459 for (i = i - 1; i >= 0; i--) { 460 if (!lru->node[i].memcg_lrus) 461 continue; 462 463 memcg_cancel_update_list_lru_node(&lru->node[i], 464 old_size, new_size); 465 } 466 return -ENOMEM; 467 } 468 469 static void memcg_cancel_update_list_lru(struct list_lru *lru, 470 int old_size, int new_size) 471 { 472 int i; 473 474 if (!list_lru_memcg_aware(lru)) 475 return; 476 477 for_each_node(i) 478 memcg_cancel_update_list_lru_node(&lru->node[i], 479 old_size, new_size); 480 } 481 482 int memcg_update_all_list_lrus(int new_size) 483 { 484 int ret = 0; 485 struct list_lru *lru; 486 int old_size = memcg_nr_cache_ids; 487 488 mutex_lock(&list_lrus_mutex); 489 list_for_each_entry(lru, &list_lrus, list) { 490 ret = memcg_update_list_lru(lru, old_size, new_size); 491 if (ret) 492 goto fail; 493 } 494 out: 495 mutex_unlock(&list_lrus_mutex); 496 return ret; 497 fail: 498 list_for_each_entry_continue_reverse(lru, &list_lrus, list) 499 memcg_cancel_update_list_lru(lru, old_size, new_size); 500 goto out; 501 } 502 503 static void memcg_drain_list_lru_node(struct list_lru_node *nlru, 504 int src_idx, int dst_idx) 505 { 506 struct list_lru_one *src, *dst; 507 508 /* 509 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 510 * we have to use IRQ-safe primitives here to avoid deadlock. 511 */ 512 spin_lock_irq(&nlru->lock); 513 514 src = list_lru_from_memcg_idx(nlru, src_idx); 515 dst = list_lru_from_memcg_idx(nlru, dst_idx); 516 517 list_splice_init(&src->list, &dst->list); 518 dst->nr_items += src->nr_items; 519 src->nr_items = 0; 520 521 spin_unlock_irq(&nlru->lock); 522 } 523 524 static void memcg_drain_list_lru(struct list_lru *lru, 525 int src_idx, int dst_idx) 526 { 527 int i; 528 529 if (!list_lru_memcg_aware(lru)) 530 return; 531 532 for_each_node(i) 533 memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx); 534 } 535 536 void memcg_drain_all_list_lrus(int src_idx, int dst_idx) 537 { 538 struct list_lru *lru; 539 540 mutex_lock(&list_lrus_mutex); 541 list_for_each_entry(lru, &list_lrus, list) 542 memcg_drain_list_lru(lru, src_idx, dst_idx); 543 mutex_unlock(&list_lrus_mutex); 544 } 545 #else 546 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 547 { 548 return 0; 549 } 550 551 static void memcg_destroy_list_lru(struct list_lru *lru) 552 { 553 } 554 #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ 555 556 int __list_lru_init(struct list_lru *lru, bool memcg_aware, 557 struct lock_class_key *key) 558 { 559 int i; 560 size_t size = sizeof(*lru->node) * nr_node_ids; 561 int err = -ENOMEM; 562 563 memcg_get_cache_ids(); 564 565 lru->node = kzalloc(size, GFP_KERNEL); 566 if (!lru->node) 567 goto out; 568 569 for_each_node(i) { 570 spin_lock_init(&lru->node[i].lock); 571 if (key) 572 lockdep_set_class(&lru->node[i].lock, key); 573 init_one_lru(&lru->node[i].lru); 574 } 575 576 err = memcg_init_list_lru(lru, memcg_aware); 577 if (err) { 578 kfree(lru->node); 579 /* Do this so a list_lru_destroy() doesn't crash: */ 580 lru->node = NULL; 581 goto out; 582 } 583 584 list_lru_register(lru); 585 out: 586 memcg_put_cache_ids(); 587 return err; 588 } 589 EXPORT_SYMBOL_GPL(__list_lru_init); 590 591 void list_lru_destroy(struct list_lru *lru) 592 { 593 /* Already destroyed or not yet initialized? */ 594 if (!lru->node) 595 return; 596 597 memcg_get_cache_ids(); 598 599 list_lru_unregister(lru); 600 601 memcg_destroy_list_lru(lru); 602 kfree(lru->node); 603 lru->node = NULL; 604 605 memcg_put_cache_ids(); 606 } 607 EXPORT_SYMBOL_GPL(list_lru_destroy); 608