1 /* 2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. 3 * Authors: David Chinner and Glauber Costa 4 * 5 * Generic LRU infrastructure 6 */ 7 #include <linux/kernel.h> 8 #include <linux/module.h> 9 #include <linux/mm.h> 10 #include <linux/list_lru.h> 11 #include <linux/slab.h> 12 #include <linux/mutex.h> 13 #include <linux/memcontrol.h> 14 15 #ifdef CONFIG_MEMCG_KMEM 16 static LIST_HEAD(list_lrus); 17 static DEFINE_MUTEX(list_lrus_mutex); 18 19 static void list_lru_register(struct list_lru *lru) 20 { 21 mutex_lock(&list_lrus_mutex); 22 list_add(&lru->list, &list_lrus); 23 mutex_unlock(&list_lrus_mutex); 24 } 25 26 static void list_lru_unregister(struct list_lru *lru) 27 { 28 mutex_lock(&list_lrus_mutex); 29 list_del(&lru->list); 30 mutex_unlock(&list_lrus_mutex); 31 } 32 #else 33 static void list_lru_register(struct list_lru *lru) 34 { 35 } 36 37 static void list_lru_unregister(struct list_lru *lru) 38 { 39 } 40 #endif /* CONFIG_MEMCG_KMEM */ 41 42 #ifdef CONFIG_MEMCG_KMEM 43 static inline bool list_lru_memcg_aware(struct list_lru *lru) 44 { 45 return !!lru->node[0].memcg_lrus; 46 } 47 48 static inline struct list_lru_one * 49 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 50 { 51 /* 52 * The lock protects the array of per cgroup lists from relocation 53 * (see memcg_update_list_lru_node). 54 */ 55 lockdep_assert_held(&nlru->lock); 56 if (nlru->memcg_lrus && idx >= 0) 57 return nlru->memcg_lrus->lru[idx]; 58 59 return &nlru->lru; 60 } 61 62 static inline struct list_lru_one * 63 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) 64 { 65 struct mem_cgroup *memcg; 66 67 if (!nlru->memcg_lrus) 68 return &nlru->lru; 69 70 memcg = mem_cgroup_from_kmem(ptr); 71 if (!memcg) 72 return &nlru->lru; 73 74 return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg)); 75 } 76 #else 77 static inline bool list_lru_memcg_aware(struct list_lru *lru) 78 { 79 return false; 80 } 81 82 static inline struct list_lru_one * 83 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx) 84 { 85 return &nlru->lru; 86 } 87 88 static inline struct list_lru_one * 89 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr) 90 { 91 return &nlru->lru; 92 } 93 #endif /* CONFIG_MEMCG_KMEM */ 94 95 bool list_lru_add(struct list_lru *lru, struct list_head *item) 96 { 97 int nid = page_to_nid(virt_to_page(item)); 98 struct list_lru_node *nlru = &lru->node[nid]; 99 struct list_lru_one *l; 100 101 spin_lock(&nlru->lock); 102 l = list_lru_from_kmem(nlru, item); 103 if (list_empty(item)) { 104 list_add_tail(item, &l->list); 105 l->nr_items++; 106 spin_unlock(&nlru->lock); 107 return true; 108 } 109 spin_unlock(&nlru->lock); 110 return false; 111 } 112 EXPORT_SYMBOL_GPL(list_lru_add); 113 114 bool list_lru_del(struct list_lru *lru, struct list_head *item) 115 { 116 int nid = page_to_nid(virt_to_page(item)); 117 struct list_lru_node *nlru = &lru->node[nid]; 118 struct list_lru_one *l; 119 120 spin_lock(&nlru->lock); 121 l = list_lru_from_kmem(nlru, item); 122 if (!list_empty(item)) { 123 list_del_init(item); 124 l->nr_items--; 125 spin_unlock(&nlru->lock); 126 return true; 127 } 128 spin_unlock(&nlru->lock); 129 return false; 130 } 131 EXPORT_SYMBOL_GPL(list_lru_del); 132 133 void list_lru_isolate(struct list_lru_one *list, struct list_head *item) 134 { 135 list_del_init(item); 136 list->nr_items--; 137 } 138 EXPORT_SYMBOL_GPL(list_lru_isolate); 139 140 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, 141 struct list_head *head) 142 { 143 list_move(item, head); 144 list->nr_items--; 145 } 146 EXPORT_SYMBOL_GPL(list_lru_isolate_move); 147 148 static unsigned long __list_lru_count_one(struct list_lru *lru, 149 int nid, int memcg_idx) 150 { 151 struct list_lru_node *nlru = &lru->node[nid]; 152 struct list_lru_one *l; 153 unsigned long count; 154 155 spin_lock(&nlru->lock); 156 l = list_lru_from_memcg_idx(nlru, memcg_idx); 157 count = l->nr_items; 158 spin_unlock(&nlru->lock); 159 160 return count; 161 } 162 163 unsigned long list_lru_count_one(struct list_lru *lru, 164 int nid, struct mem_cgroup *memcg) 165 { 166 return __list_lru_count_one(lru, nid, memcg_cache_id(memcg)); 167 } 168 EXPORT_SYMBOL_GPL(list_lru_count_one); 169 170 unsigned long list_lru_count_node(struct list_lru *lru, int nid) 171 { 172 long count = 0; 173 int memcg_idx; 174 175 count += __list_lru_count_one(lru, nid, -1); 176 if (list_lru_memcg_aware(lru)) { 177 for_each_memcg_cache_index(memcg_idx) 178 count += __list_lru_count_one(lru, nid, memcg_idx); 179 } 180 return count; 181 } 182 EXPORT_SYMBOL_GPL(list_lru_count_node); 183 184 static unsigned long 185 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, 186 list_lru_walk_cb isolate, void *cb_arg, 187 unsigned long *nr_to_walk) 188 { 189 190 struct list_lru_node *nlru = &lru->node[nid]; 191 struct list_lru_one *l; 192 struct list_head *item, *n; 193 unsigned long isolated = 0; 194 195 spin_lock(&nlru->lock); 196 l = list_lru_from_memcg_idx(nlru, memcg_idx); 197 restart: 198 list_for_each_safe(item, n, &l->list) { 199 enum lru_status ret; 200 201 /* 202 * decrement nr_to_walk first so that we don't livelock if we 203 * get stuck on large numbesr of LRU_RETRY items 204 */ 205 if (!*nr_to_walk) 206 break; 207 --*nr_to_walk; 208 209 ret = isolate(item, l, &nlru->lock, cb_arg); 210 switch (ret) { 211 case LRU_REMOVED_RETRY: 212 assert_spin_locked(&nlru->lock); 213 case LRU_REMOVED: 214 isolated++; 215 /* 216 * If the lru lock has been dropped, our list 217 * traversal is now invalid and so we have to 218 * restart from scratch. 219 */ 220 if (ret == LRU_REMOVED_RETRY) 221 goto restart; 222 break; 223 case LRU_ROTATE: 224 list_move_tail(item, &l->list); 225 break; 226 case LRU_SKIP: 227 break; 228 case LRU_RETRY: 229 /* 230 * The lru lock has been dropped, our list traversal is 231 * now invalid and so we have to restart from scratch. 232 */ 233 assert_spin_locked(&nlru->lock); 234 goto restart; 235 default: 236 BUG(); 237 } 238 } 239 240 spin_unlock(&nlru->lock); 241 return isolated; 242 } 243 244 unsigned long 245 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, 246 list_lru_walk_cb isolate, void *cb_arg, 247 unsigned long *nr_to_walk) 248 { 249 return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg), 250 isolate, cb_arg, nr_to_walk); 251 } 252 EXPORT_SYMBOL_GPL(list_lru_walk_one); 253 254 unsigned long list_lru_walk_node(struct list_lru *lru, int nid, 255 list_lru_walk_cb isolate, void *cb_arg, 256 unsigned long *nr_to_walk) 257 { 258 long isolated = 0; 259 int memcg_idx; 260 261 isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg, 262 nr_to_walk); 263 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) { 264 for_each_memcg_cache_index(memcg_idx) { 265 isolated += __list_lru_walk_one(lru, nid, memcg_idx, 266 isolate, cb_arg, nr_to_walk); 267 if (*nr_to_walk <= 0) 268 break; 269 } 270 } 271 return isolated; 272 } 273 EXPORT_SYMBOL_GPL(list_lru_walk_node); 274 275 static void init_one_lru(struct list_lru_one *l) 276 { 277 INIT_LIST_HEAD(&l->list); 278 l->nr_items = 0; 279 } 280 281 #ifdef CONFIG_MEMCG_KMEM 282 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus, 283 int begin, int end) 284 { 285 int i; 286 287 for (i = begin; i < end; i++) 288 kfree(memcg_lrus->lru[i]); 289 } 290 291 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus, 292 int begin, int end) 293 { 294 int i; 295 296 for (i = begin; i < end; i++) { 297 struct list_lru_one *l; 298 299 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL); 300 if (!l) 301 goto fail; 302 303 init_one_lru(l); 304 memcg_lrus->lru[i] = l; 305 } 306 return 0; 307 fail: 308 __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1); 309 return -ENOMEM; 310 } 311 312 static int memcg_init_list_lru_node(struct list_lru_node *nlru) 313 { 314 int size = memcg_nr_cache_ids; 315 316 nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL); 317 if (!nlru->memcg_lrus) 318 return -ENOMEM; 319 320 if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) { 321 kfree(nlru->memcg_lrus); 322 return -ENOMEM; 323 } 324 325 return 0; 326 } 327 328 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru) 329 { 330 __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids); 331 kfree(nlru->memcg_lrus); 332 } 333 334 static int memcg_update_list_lru_node(struct list_lru_node *nlru, 335 int old_size, int new_size) 336 { 337 struct list_lru_memcg *old, *new; 338 339 BUG_ON(old_size > new_size); 340 341 old = nlru->memcg_lrus; 342 new = kmalloc(new_size * sizeof(void *), GFP_KERNEL); 343 if (!new) 344 return -ENOMEM; 345 346 if (__memcg_init_list_lru_node(new, old_size, new_size)) { 347 kfree(new); 348 return -ENOMEM; 349 } 350 351 memcpy(new, old, old_size * sizeof(void *)); 352 353 /* 354 * The lock guarantees that we won't race with a reader 355 * (see list_lru_from_memcg_idx). 356 * 357 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 358 * we have to use IRQ-safe primitives here to avoid deadlock. 359 */ 360 spin_lock_irq(&nlru->lock); 361 nlru->memcg_lrus = new; 362 spin_unlock_irq(&nlru->lock); 363 364 kfree(old); 365 return 0; 366 } 367 368 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru, 369 int old_size, int new_size) 370 { 371 /* do not bother shrinking the array back to the old size, because we 372 * cannot handle allocation failures here */ 373 __memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size); 374 } 375 376 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 377 { 378 int i; 379 380 for (i = 0; i < nr_node_ids; i++) { 381 if (!memcg_aware) 382 lru->node[i].memcg_lrus = NULL; 383 else if (memcg_init_list_lru_node(&lru->node[i])) 384 goto fail; 385 } 386 return 0; 387 fail: 388 for (i = i - 1; i >= 0; i--) 389 memcg_destroy_list_lru_node(&lru->node[i]); 390 return -ENOMEM; 391 } 392 393 static void memcg_destroy_list_lru(struct list_lru *lru) 394 { 395 int i; 396 397 if (!list_lru_memcg_aware(lru)) 398 return; 399 400 for (i = 0; i < nr_node_ids; i++) 401 memcg_destroy_list_lru_node(&lru->node[i]); 402 } 403 404 static int memcg_update_list_lru(struct list_lru *lru, 405 int old_size, int new_size) 406 { 407 int i; 408 409 if (!list_lru_memcg_aware(lru)) 410 return 0; 411 412 for (i = 0; i < nr_node_ids; i++) { 413 if (memcg_update_list_lru_node(&lru->node[i], 414 old_size, new_size)) 415 goto fail; 416 } 417 return 0; 418 fail: 419 for (i = i - 1; i >= 0; i--) 420 memcg_cancel_update_list_lru_node(&lru->node[i], 421 old_size, new_size); 422 return -ENOMEM; 423 } 424 425 static void memcg_cancel_update_list_lru(struct list_lru *lru, 426 int old_size, int new_size) 427 { 428 int i; 429 430 if (!list_lru_memcg_aware(lru)) 431 return; 432 433 for (i = 0; i < nr_node_ids; i++) 434 memcg_cancel_update_list_lru_node(&lru->node[i], 435 old_size, new_size); 436 } 437 438 int memcg_update_all_list_lrus(int new_size) 439 { 440 int ret = 0; 441 struct list_lru *lru; 442 int old_size = memcg_nr_cache_ids; 443 444 mutex_lock(&list_lrus_mutex); 445 list_for_each_entry(lru, &list_lrus, list) { 446 ret = memcg_update_list_lru(lru, old_size, new_size); 447 if (ret) 448 goto fail; 449 } 450 out: 451 mutex_unlock(&list_lrus_mutex); 452 return ret; 453 fail: 454 list_for_each_entry_continue_reverse(lru, &list_lrus, list) 455 memcg_cancel_update_list_lru(lru, old_size, new_size); 456 goto out; 457 } 458 459 static void memcg_drain_list_lru_node(struct list_lru_node *nlru, 460 int src_idx, int dst_idx) 461 { 462 struct list_lru_one *src, *dst; 463 464 /* 465 * Since list_lru_{add,del} may be called under an IRQ-safe lock, 466 * we have to use IRQ-safe primitives here to avoid deadlock. 467 */ 468 spin_lock_irq(&nlru->lock); 469 470 src = list_lru_from_memcg_idx(nlru, src_idx); 471 dst = list_lru_from_memcg_idx(nlru, dst_idx); 472 473 list_splice_init(&src->list, &dst->list); 474 dst->nr_items += src->nr_items; 475 src->nr_items = 0; 476 477 spin_unlock_irq(&nlru->lock); 478 } 479 480 static void memcg_drain_list_lru(struct list_lru *lru, 481 int src_idx, int dst_idx) 482 { 483 int i; 484 485 if (!list_lru_memcg_aware(lru)) 486 return; 487 488 for (i = 0; i < nr_node_ids; i++) 489 memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx); 490 } 491 492 void memcg_drain_all_list_lrus(int src_idx, int dst_idx) 493 { 494 struct list_lru *lru; 495 496 mutex_lock(&list_lrus_mutex); 497 list_for_each_entry(lru, &list_lrus, list) 498 memcg_drain_list_lru(lru, src_idx, dst_idx); 499 mutex_unlock(&list_lrus_mutex); 500 } 501 #else 502 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware) 503 { 504 return 0; 505 } 506 507 static void memcg_destroy_list_lru(struct list_lru *lru) 508 { 509 } 510 #endif /* CONFIG_MEMCG_KMEM */ 511 512 int __list_lru_init(struct list_lru *lru, bool memcg_aware, 513 struct lock_class_key *key) 514 { 515 int i; 516 size_t size = sizeof(*lru->node) * nr_node_ids; 517 int err = -ENOMEM; 518 519 memcg_get_cache_ids(); 520 521 lru->node = kzalloc(size, GFP_KERNEL); 522 if (!lru->node) 523 goto out; 524 525 for (i = 0; i < nr_node_ids; i++) { 526 spin_lock_init(&lru->node[i].lock); 527 if (key) 528 lockdep_set_class(&lru->node[i].lock, key); 529 init_one_lru(&lru->node[i].lru); 530 } 531 532 err = memcg_init_list_lru(lru, memcg_aware); 533 if (err) { 534 kfree(lru->node); 535 goto out; 536 } 537 538 list_lru_register(lru); 539 out: 540 memcg_put_cache_ids(); 541 return err; 542 } 543 EXPORT_SYMBOL_GPL(__list_lru_init); 544 545 void list_lru_destroy(struct list_lru *lru) 546 { 547 /* Already destroyed or not yet initialized? */ 548 if (!lru->node) 549 return; 550 551 memcg_get_cache_ids(); 552 553 list_lru_unregister(lru); 554 555 memcg_destroy_list_lru(lru); 556 kfree(lru->node); 557 lru->node = NULL; 558 559 memcg_put_cache_ids(); 560 } 561 EXPORT_SYMBOL_GPL(list_lru_destroy); 562