1 /* 2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 3 * Authors: David Chinner and Glauber Costa 4 * 5 * Generic LRU infrastructure 6 */ 7 #include <linux/kernel.h> 8 #include <linux/module.h> 9 #include <linux/mm.h> 10 #include <linux/list_lru.h> 11 #include <linux/slab.h> 12 #include <linux/mutex.h> 13 #include <linux/memcontrol.h> 14 15 #ifdef CONFIG_MEMCG_KMEM 16 static LIST_HEAD(list_lrus); 17 static DEFINE_MUTEX(list_lrus_mutex); 18 19 static void list_lru_register(struct list_lru *lru) 20 { 21 mutex_lock(&list_lrus_mutex); 22 list_add(&lru->list, &list_lrus); 23 mutex_unlock(&list_lrus_mutex); 24 } 25 26 static void list_lru_unregister(struct list_lru *lru) 27 { 28 mutex_lock(&list_lrus_mutex); 29 list_del(&lru->list); 30 mutex_unlock(&list_lrus_mutex); 31 } 32 #else 33 static void list_lru_register(struct list_lru *lru) 34 { 35 } 36 37 static void list_lru_unregister(struct list_lru *lru) 38 { 39 } 40 #endif /* CONFIG_MEMCG_KMEM */ 41 42 #ifdef CONFIG_MEMCG_KMEM 43 static inline bool list_lru_memcg_aware(struct list_lru *lru) 44 { 45 /* 46 * This needs node 0 to be always present, even 47 * in the systems supporting sparse numa ids. 48 */ 49 return !!lru->node[0].memcg_lrus; 50 } 51 52 static inline struct list_lru_one * 53 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 54 { 55 /* 56 * The lock protects the array of per cgroup lists from relocation 57 * (see memcg_update_list_lru_node). 58 */ 59 lockdep_assert_held(&nlru->lock); 60 if (nlru->memcg_lrus && idx >= 0) 61 return nlru->memcg_lrus->lru[idx]; 62 63 return &nlru->lru; 64 } 65 66 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) 67 { 68 struct page *page; 69 70 if (!memcg_kmem_enabled()) 71 return NULL; 72 page = virt_to_head_page(ptr); 73 return page->mem_cgroup; 74 } 75 76 static inline struct list_lru_one * 77 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) 78 { 79 struct mem_cgroup *memcg; 80 81 if (!nlru->memcg_lrus) 82 return &nlru->lru; 83 84 memcg = mem_cgroup_from_kmem(ptr); 85 if (!memcg) 86 return &nlru->lru; 87 88 return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 89 } 90 #else 91 static inline bool list_lru_memcg_aware(struct list_lru *lru) 92 { 93 return false; 94 } 95 96 static inline struct list_lru_one * 97 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 98 { 99 return &nlru->lru; 100 } 101 102 static inline struct list_lru_one * 103 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) 104 { 105 return &nlru->lru; 106 } 107 #endif /* CONFIG_MEMCG_KMEM */ 108 109 bool list_lru_add(struct list_lru *lru, struct list_head *item) 110 { 111 int nid = page_to_nid(virt_to_page(item)); 112 struct list_lru_node *nlru = &lru->node[nid]; 113 struct list_lru_one *l; 114 115 spin_lock(&nlru->lock); 116 if (list_empty(item)) { 117 l = list_lru_from_kmem(nlru, item); 118 list_add_tail(item, &l->list); 119 l->nr_items++; 120 spin_unlock(&nlru->lock); 121 return true; 122 } 123 spin_unlock(&nlru->lock); 124 return false; 125 } 126 EXPORT_SYMBOL_GPL(list_lru_add); 127 128 bool list_lru_del(struct list_lru *lru, struct list_head *item) 129 { 130 int nid = page_to_nid(virt_to_page(item)); 131 struct list_lru_node *nlru = &lru->node[nid]; 132 struct list_lru_one *l; 133 134 spin_lock(&nlru->lock); 135 if (!list_empty(item)) { 136 l = list_lru_from_kmem(nlru, item); 137 list_del_init(item); 138 l->nr_items--; 139 spin_unlock(&nlru->lock); 140 return true; 141 } 142 spin_unlock(&nlru->lock); 143 return false; 144 } 145 EXPORT_SYMBOL_GPL(list_lru_del); 146 147 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 148 { 149 list_del_init(item); 150 list->nr_items--; 151 } 152 EXPORT_SYMBOL_GPL(list_lru_isolate); 153 154 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 155 struct list_head *head) 156 { 157 list_move(item, head); 158 list->nr_items--; 159 } 160 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 161 162 static unsigned long __list_lru_count_one(struct list_lru *lru, 163 int nid, int memcg_idx) 164 { 165 struct list_lru_node *nlru = &lru->node[nid]; 166 struct list_lru_one *l; 167 unsigned long count; 168 169 spin_lock(&nlru->lock); 170 l = list_lru_from_memcg_idx(nlru, memcg_idx); 171 count = l->nr_items; 172 spin_unlock(&nlru->lock); 173 174 return count; 175 } 176 177 unsigned long list_lru_count_one(struct list_lru *lru, 178 int nid, struct mem_cgroup *memcg) 179 { 180 return __list_lru_count_one(lru, nid, memcg_cache_id(memcg)); 181 } 182 EXPORT_SYMBOL_GPL(list_lru_count_one); 183 184 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 185 { 186 long count = 0; 187 int memcg_idx; 188 189 count += __list_lru_count_one(lru, nid, -1); 190 if (list_lru_memcg_aware(lru)) { 191 for_each_memcg_cache_index(memcg_idx) 192 count += __list_lru_count_one(lru, nid, memcg_idx); 193 } 194 return count; 195 } 196 EXPORT_SYMBOL_GPL(list_lru_count_node); 197 198 static unsigned long 199 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, 200 list_lru_walk_cb isolate, void *cb_arg, 201 unsigned long *nr_to_walk) 202 { 203 204 struct list_lru_node *nlru = &lru->node[nid]; 205 struct list_lru_one *l; 206 struct list_head *item, *n; 207 unsigned long isolated = 0; 208 209 spin_lock(&nlru->lock); 210 l = list_lru_from_memcg_idx(nlru, memcg_idx); 211 restart: 212 list_for_each_safe(item, n, &l->list) { 213 enum lru_status ret; 214 215 /* 216 * decrement nr_to_walk first so that we don't livelock if we 217 * get stuck on large numbesr of LRU_RETRY items 218 */ 219 if (!*nr_to_walk) 220 break; 221 --*nr_to_walk; 222 223 ret = isolate(item, l, &nlru->lock, cb_arg); 224 switch (ret) { 225 case LRU_REMOVED_RETRY: 226 assert_spin_locked(&nlru->lock); 227 case LRU_REMOVED: 228 isolated++; 229 /* 230 * If the lru lock has been dropped, our list 231 * traversal is now invalid and so we have to 232 * restart from scratch. 233 */ 234 if (ret == LRU_REMOVED_RETRY) 235 goto restart; 236 break; 237 case LRU_ROTATE: 238 list_move_tail(item, &l->list); 239 break; 240 case LRU_SKIP: 241 break; 242 case LRU_RETRY: 243 /* 244 * The lru lock has been dropped, our list traversal is 245 * now invalid and so we have to restart from scratch. 246 */ 247 assert_spin_locked(&nlru->lock); 248 goto restart; 249 default: 250 BUG(); 251 } 252 } 253 254 spin_unlock(&nlru->lock); 255 return isolated; 256 } 257 258 unsigned long 259 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 260 list_lru_walk_cb isolate, void *cb_arg, 261 unsigned long *nr_to_walk) 262 { 263 return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), 264 isolate, cb_arg, nr_to_walk); 265 } 266 EXPORT_SYMBOL_GPL(list_lru_walk_one); 267 268 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 269 list_lru_walk_cb isolate, void *cb_arg, 270 unsigned long *nr_to_walk) 271 { 272 long isolated = 0; 273 int memcg_idx; 274 275 isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg, 276 nr_to_walk); 277 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 278 for_each_memcg_cache_index(memcg_idx) { 279 isolated += __list_lru_walk_one(lru, nid, memcg_idx, 280 isolate, cb_arg, nr_to_walk); 281 if (*nr_to_walk <= 0) 282 break; 283 } 284 } 285 return isolated; 286 } 287 EXPORT_SYMBOL_GPL(list_lru_walk_node); 288 289 static void init_one_lru(struct list_lru_one *l) 290 { 291 INIT_LIST_HEAD(&l->list); 292 l->nr_items = 0; 293 } 294 295 #ifdef CONFIG_MEMCG_KMEM 296 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, 297 int begin, int end) 298 { 299 int i; 300 301 for (i = begin; i < end; i++) 302 kfree(memcg_lrus->lru[i]); 303 } 304 305 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, 306 int begin, int end) 307 { 308 int i; 309 310 for (i = begin; i < end; i++) { 311 struct list_lru_one *l; 312 313 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); 314 if (!l) 315 goto fail; 316 317 init_one_lru(l); 318 memcg_lrus->lru[i] = l; 319 } 320 return 0; 321 fail: 322 __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); 323 return -ENOMEM; 324 } 325 326 static int memcg_init_list_lru_node(struct list_lru_node *nlru) 327 { 328 int size = memcg_nr_cache_ids; 329 330 nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); 331 if (!nlru->memcg_lrus) 332 return -ENOMEM; 333 334 if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { 335 kfree(nlru->memcg_lrus); 336 return -ENOMEM; 337 } 338 339 return 0; 340 } 341 342 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) 343 { 344 __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); 345 kfree(nlru->memcg_lrus); 346 } 347 348 static int memcg_update_list_lru_node(struct list_lru_node *nlru, 349 int old_size, int new_size) 350 { 351 struct list_lru_memcg *old, *new; 352 353 BUG_ON(old_size > new_size); 354 355 old = nlru->memcg_lrus; 356 new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); 357 if (!new) 358 return -ENOMEM; 359 360 if (__memcg_init_list_lru_node(new, old_size, new_size)) { 361 kfree(new); 362 return -ENOMEM; 363 } 364 365 memcpy(new, old, old_size * sizeof(void *)); 366 367 /* 368 * The lock guarantees that we won't race with a reader 369 * (see list_lru_from_memcg_idx). 370 * 371 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 372 * we have to use IRQ-safe primitives here to avoid deadlock. 373 */ 374 spin_lock_irq(&nlru->lock); 375 nlru->memcg_lrus = new; 376 spin_unlock_irq(&nlru->lock); 377 378 kfree(old); 379 return 0; 380 } 381 382 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, 383 int old_size, int new_size) 384 { 385 /* do not bother shrinking the array back to the old size, because we 386 * cannot handle allocation failures here */ 387 __memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size); 388 } 389 390 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 391 { 392 int i; 393 394 if (!memcg_aware) 395 return 0; 396 397 for_each_node(i) { 398 if (memcg_init_list_lru_node(&lru->node[i])) 399 goto fail; 400 } 401 return 0; 402 fail: 403 for (i = i - 1; i >= 0; i--) { 404 if (!lru->node[i].memcg_lrus) 405 continue; 406 memcg_destroy_list_lru_node(&lru->node[i]); 407 } 408 return -ENOMEM; 409 } 410 411 static void memcg_destroy_list_lru(struct list_lru *lru) 412 { 413 int i; 414 415 if (!list_lru_memcg_aware(lru)) 416 return; 417 418 for_each_node(i) 419 memcg_destroy_list_lru_node(&lru->node[i]); 420 } 421 422 static int memcg_update_list_lru(struct list_lru *lru, 423 int old_size, int new_size) 424 { 425 int i; 426 427 if (!list_lru_memcg_aware(lru)) 428 return 0; 429 430 for_each_node(i) { 431 if (memcg_update_list_lru_node(&lru->node[i], 432 old_size, new_size)) 433 goto fail; 434 } 435 return 0; 436 fail: 437 for (i = i - 1; i >= 0; i--) { 438 if (!lru->node[i].memcg_lrus) 439 continue; 440 441 memcg_cancel_update_list_lru_node(&lru->node[i], 442 old_size, new_size); 443 } 444 return -ENOMEM; 445 } 446 447 static void memcg_cancel_update_list_lru(struct list_lru *lru, 448 int old_size, int new_size) 449 { 450 int i; 451 452 if (!list_lru_memcg_aware(lru)) 453 return; 454 455 for_each_node(i) 456 memcg_cancel_update_list_lru_node(&lru->node[i], 457 old_size, new_size); 458 } 459 460 int memcg_update_all_list_lrus(int new_size) 461 { 462 int ret = 0; 463 struct list_lru *lru; 464 int old_size = memcg_nr_cache_ids; 465 466 mutex_lock(&list_lrus_mutex); 467 list_for_each_entry(lru, &list_lrus, list) { 468 ret = memcg_update_list_lru(lru, old_size, new_size); 469 if (ret) 470 goto fail; 471 } 472 out: 473 mutex_unlock(&list_lrus_mutex); 474 return ret; 475 fail: 476 list_for_each_entry_continue_reverse(lru, &list_lrus, list) 477 memcg_cancel_update_list_lru(lru, old_size, new_size); 478 goto out; 479 } 480 481 static void memcg_drain_list_lru_node(struct list_lru_node *nlru, 482 int src_idx, int dst_idx) 483 { 484 struct list_lru_one *src, *dst; 485 486 /* 487 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 488 * we have to use IRQ-safe primitives here to avoid deadlock. 489 */ 490 spin_lock_irq(&nlru->lock); 491 492 src = list_lru_from_memcg_idx(nlru, src_idx); 493 dst = list_lru_from_memcg_idx(nlru, dst_idx); 494 495 list_splice_init(&src->list, &dst->list); 496 dst->nr_items += src->nr_items; 497 src->nr_items = 0; 498 499 spin_unlock_irq(&nlru->lock); 500 } 501 502 static void memcg_drain_list_lru(struct list_lru *lru, 503 int src_idx, int dst_idx) 504 { 505 int i; 506 507 if (!list_lru_memcg_aware(lru)) 508 return; 509 510 for_each_node(i) 511 memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx); 512 } 513 514 void memcg_drain_all_list_lrus(int src_idx, int dst_idx) 515 { 516 struct list_lru *lru; 517 518 mutex_lock(&list_lrus_mutex); 519 list_for_each_entry(lru, &list_lrus, list) 520 memcg_drain_list_lru(lru, src_idx, dst_idx); 521 mutex_unlock(&list_lrus_mutex); 522 } 523 #else 524 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 525 { 526 return 0; 527 } 528 529 static void memcg_destroy_list_lru(struct list_lru *lru) 530 { 531 } 532 #endif /* CONFIG_MEMCG_KMEM */ 533 534 int __list_lru_init(struct list_lru *lru, bool memcg_aware, 535 struct lock_class_key *key) 536 { 537 int i; 538 size_t size = sizeof(*lru->node) * nr_node_ids; 539 int err = -ENOMEM; 540 541 memcg_get_cache_ids(); 542 543 lru->node = kzalloc(size, GFP_KERNEL); 544 if (!lru->node) 545 goto out; 546 547 for_each_node(i) { 548 spin_lock_init(&lru->node[i].lock); 549 if (key) 550 lockdep_set_class(&lru->node[i].lock, key); 551 init_one_lru(&lru->node[i].lru); 552 } 553 554 err = memcg_init_list_lru(lru, memcg_aware); 555 if (err) { 556 kfree(lru->node); 557 goto out; 558 } 559 560 list_lru_register(lru); 561 out: 562 memcg_put_cache_ids(); 563 return err; 564 } 565 EXPORT_SYMBOL_GPL(__list_lru_init); 566 567 void list_lru_destroy(struct list_lru *lru) 568 { 569 /* Already destroyed or not yet initialized? */ 570 if (!lru->node) 571 return; 572 573 memcg_get_cache_ids(); 574 575 list_lru_unregister(lru); 576 577 memcg_destroy_list_lru(lru); 578 kfree(lru->node); 579 lru->node = NULL; 580 581 memcg_put_cache_ids(); 582 } 583 EXPORT_SYMBOL_GPL(list_lru_destroy); 584