memcontrol.c (a9dd0a83104c01269ea36a9b4ec42b51edf85427) | memcontrol.c (ef8f2327996b5c20f11420f64e439e87c7a01604) |
---|---|
1/* memcontrol.c - Memory Controller 2 * 3 * Copyright IBM Corporation, 2007 4 * Author Balbir Singh <balbir@linux.vnet.ibm.com> 5 * 6 * Copyright 2007 OpenVZ SWsoft Inc 7 * Author: Pavel Emelianov <xemul@openvz.org> 8 * --- 118 unchanged lines hidden (view full) --- 127#define SOFTLIMIT_EVENTS_TARGET 1024 128#define NUMAINFO_EVENTS_TARGET 1024 129 130/* 131 * Cgroups above their limits are maintained in a RB-Tree, independent of 132 * their hierarchy representation 133 */ 134 | 1/* memcontrol.c - Memory Controller 2 * 3 * Copyright IBM Corporation, 2007 4 * Author Balbir Singh <balbir@linux.vnet.ibm.com> 5 * 6 * Copyright 2007 OpenVZ SWsoft Inc 7 * Author: Pavel Emelianov <xemul@openvz.org> 8 * --- 118 unchanged lines hidden (view full) --- 127#define SOFTLIMIT_EVENTS_TARGET 1024 128#define NUMAINFO_EVENTS_TARGET 1024 129 130/* 131 * Cgroups above their limits are maintained in a RB-Tree, independent of 132 * their hierarchy representation 133 */ 134 |
135struct mem_cgroup_tree_per_zone { | 135struct mem_cgroup_tree_per_node { |
136 struct rb_root rb_root; 137 spinlock_t lock; 138}; 139 | 136 struct rb_root rb_root; 137 spinlock_t lock; 138}; 139 |
140struct mem_cgroup_tree_per_node { 141 struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES]; 142}; 143 | |
144struct mem_cgroup_tree { 145 struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; 146}; 147 148static struct mem_cgroup_tree soft_limit_tree __read_mostly; 149 150/* for OOM */ 151struct mem_cgroup_eventfd_list { --- 217 unchanged lines hidden (view full) --- 369 while (memcg && !(memcg->css.flags & CSS_ONLINE)) 370 memcg = parent_mem_cgroup(memcg); 371 if (memcg) 372 ino = cgroup_ino(memcg->css.cgroup); 373 rcu_read_unlock(); 374 return ino; 375} 376 | 140struct mem_cgroup_tree { 141 struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; 142}; 143 144static struct mem_cgroup_tree soft_limit_tree __read_mostly; 145 146/* for OOM */ 147struct mem_cgroup_eventfd_list { --- 217 unchanged lines hidden (view full) --- 365 while (memcg && !(memcg->css.flags & CSS_ONLINE)) 366 memcg = parent_mem_cgroup(memcg); 367 if (memcg) 368 ino = cgroup_ino(memcg->css.cgroup); 369 rcu_read_unlock(); 370 return ino; 371} 372 |
377static struct mem_cgroup_per_zone * 378mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page) | 373static struct mem_cgroup_per_node * 374mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page) |
379{ 380 int nid = page_to_nid(page); | 375{ 376 int nid = page_to_nid(page); |
381 int zid = page_zonenum(page); | |
382 | 377 |
383 return &memcg->nodeinfo[nid]->zoneinfo[zid]; | 378 return memcg->nodeinfo[nid]; |
384} 385 | 379} 380 |
386static struct mem_cgroup_tree_per_zone * 387soft_limit_tree_node_zone(int nid, int zid) | 381static struct mem_cgroup_tree_per_node * 382soft_limit_tree_node(int nid) |
388{ | 383{ |
389 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; | 384 return soft_limit_tree.rb_tree_per_node[nid]; |
390} 391 | 385} 386 |
392static struct mem_cgroup_tree_per_zone * | 387static struct mem_cgroup_tree_per_node * |
393soft_limit_tree_from_page(struct page *page) 394{ 395 int nid = page_to_nid(page); | 388soft_limit_tree_from_page(struct page *page) 389{ 390 int nid = page_to_nid(page); |
396 int zid = page_zonenum(page); | |
397 | 391 |
398 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; | 392 return soft_limit_tree.rb_tree_per_node[nid]; |
399} 400 | 393} 394 |
401static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz, 402 struct mem_cgroup_tree_per_zone *mctz, | 395static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, 396 struct mem_cgroup_tree_per_node *mctz, |
403 unsigned long new_usage_in_excess) 404{ 405 struct rb_node **p = &mctz->rb_root.rb_node; 406 struct rb_node *parent = NULL; | 397 unsigned long new_usage_in_excess) 398{ 399 struct rb_node **p = &mctz->rb_root.rb_node; 400 struct rb_node *parent = NULL; |
407 struct mem_cgroup_per_zone *mz_node; | 401 struct mem_cgroup_per_node *mz_node; |
408 409 if (mz->on_tree) 410 return; 411 412 mz->usage_in_excess = new_usage_in_excess; 413 if (!mz->usage_in_excess) 414 return; 415 while (*p) { 416 parent = *p; | 402 403 if (mz->on_tree) 404 return; 405 406 mz->usage_in_excess = new_usage_in_excess; 407 if (!mz->usage_in_excess) 408 return; 409 while (*p) { 410 parent = *p; |
417 mz_node = rb_entry(parent, struct mem_cgroup_per_zone, | 411 mz_node = rb_entry(parent, struct mem_cgroup_per_node, |
418 tree_node); 419 if (mz->usage_in_excess < mz_node->usage_in_excess) 420 p = &(*p)->rb_left; 421 /* 422 * We can't avoid mem cgroups that are over their soft 423 * limit by the same amount 424 */ 425 else if (mz->usage_in_excess >= mz_node->usage_in_excess) 426 p = &(*p)->rb_right; 427 } 428 rb_link_node(&mz->tree_node, parent, p); 429 rb_insert_color(&mz->tree_node, &mctz->rb_root); 430 mz->on_tree = true; 431} 432 | 412 tree_node); 413 if (mz->usage_in_excess < mz_node->usage_in_excess) 414 p = &(*p)->rb_left; 415 /* 416 * We can't avoid mem cgroups that are over their soft 417 * limit by the same amount 418 */ 419 else if (mz->usage_in_excess >= mz_node->usage_in_excess) 420 p = &(*p)->rb_right; 421 } 422 rb_link_node(&mz->tree_node, parent, p); 423 rb_insert_color(&mz->tree_node, &mctz->rb_root); 424 mz->on_tree = true; 425} 426 |
433static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, 434 struct mem_cgroup_tree_per_zone *mctz) | 427static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz, 428 struct mem_cgroup_tree_per_node *mctz) |
435{ 436 if (!mz->on_tree) 437 return; 438 rb_erase(&mz->tree_node, &mctz->rb_root); 439 mz->on_tree = false; 440} 441 | 429{ 430 if (!mz->on_tree) 431 return; 432 rb_erase(&mz->tree_node, &mctz->rb_root); 433 mz->on_tree = false; 434} 435 |
442static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, 443 struct mem_cgroup_tree_per_zone *mctz) | 436static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz, 437 struct mem_cgroup_tree_per_node *mctz) |
444{ 445 unsigned long flags; 446 447 spin_lock_irqsave(&mctz->lock, flags); 448 __mem_cgroup_remove_exceeded(mz, mctz); 449 spin_unlock_irqrestore(&mctz->lock, flags); 450} 451 --- 7 unchanged lines hidden (view full) --- 459 excess = nr_pages - soft_limit; 460 461 return excess; 462} 463 464static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) 465{ 466 unsigned long excess; | 438{ 439 unsigned long flags; 440 441 spin_lock_irqsave(&mctz->lock, flags); 442 __mem_cgroup_remove_exceeded(mz, mctz); 443 spin_unlock_irqrestore(&mctz->lock, flags); 444} 445 --- 7 unchanged lines hidden (view full) --- 453 excess = nr_pages - soft_limit; 454 455 return excess; 456} 457 458static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) 459{ 460 unsigned long excess; |
467 struct mem_cgroup_per_zone *mz; 468 struct mem_cgroup_tree_per_zone *mctz; | 461 struct mem_cgroup_per_node *mz; 462 struct mem_cgroup_tree_per_node *mctz; |
469 470 mctz = soft_limit_tree_from_page(page); 471 /* 472 * Necessary to update all ancestors when hierarchy is used. 473 * because their event counter is not touched. 474 */ 475 for (; memcg; memcg = parent_mem_cgroup(memcg)) { | 463 464 mctz = soft_limit_tree_from_page(page); 465 /* 466 * Necessary to update all ancestors when hierarchy is used. 467 * because their event counter is not touched. 468 */ 469 for (; memcg; memcg = parent_mem_cgroup(memcg)) { |
476 mz = mem_cgroup_page_zoneinfo(memcg, page); | 470 mz = mem_cgroup_page_nodeinfo(memcg, page); |
477 excess = soft_limit_excess(memcg); 478 /* 479 * We have to update the tree if mz is on RB-tree or 480 * mem is over its softlimit. 481 */ 482 if (excess || mz->on_tree) { 483 unsigned long flags; 484 --- 8 unchanged lines hidden (view full) --- 493 __mem_cgroup_insert_exceeded(mz, mctz, excess); 494 spin_unlock_irqrestore(&mctz->lock, flags); 495 } 496 } 497} 498 499static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) 500{ | 471 excess = soft_limit_excess(memcg); 472 /* 473 * We have to update the tree if mz is on RB-tree or 474 * mem is over its softlimit. 475 */ 476 if (excess || mz->on_tree) { 477 unsigned long flags; 478 --- 8 unchanged lines hidden (view full) --- 487 __mem_cgroup_insert_exceeded(mz, mctz, excess); 488 spin_unlock_irqrestore(&mctz->lock, flags); 489 } 490 } 491} 492 493static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) 494{ |
501 struct mem_cgroup_tree_per_zone *mctz; 502 struct mem_cgroup_per_zone *mz; 503 int nid, zid; | 495 struct mem_cgroup_tree_per_node *mctz; 496 struct mem_cgroup_per_node *mz; 497 int nid; |
504 505 for_each_node(nid) { | 498 499 for_each_node(nid) { |
506 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 507 mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; 508 mctz = soft_limit_tree_node_zone(nid, zid); 509 mem_cgroup_remove_exceeded(mz, mctz); 510 } | 500 mz = mem_cgroup_nodeinfo(memcg, nid); 501 mctz = soft_limit_tree_node(nid); 502 mem_cgroup_remove_exceeded(mz, mctz); |
511 } 512} 513 | 503 } 504} 505 |
514static struct mem_cgroup_per_zone * 515__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | 506static struct mem_cgroup_per_node * 507__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) |
516{ 517 struct rb_node *rightmost = NULL; | 508{ 509 struct rb_node *rightmost = NULL; |
518 struct mem_cgroup_per_zone *mz; | 510 struct mem_cgroup_per_node *mz; |
519 520retry: 521 mz = NULL; 522 rightmost = rb_last(&mctz->rb_root); 523 if (!rightmost) 524 goto done; /* Nothing to reclaim from */ 525 | 511 512retry: 513 mz = NULL; 514 rightmost = rb_last(&mctz->rb_root); 515 if (!rightmost) 516 goto done; /* Nothing to reclaim from */ 517 |
526 mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node); | 518 mz = rb_entry(rightmost, struct mem_cgroup_per_node, tree_node); |
527 /* 528 * Remove the node now but someone else can add it back, 529 * we will to add it back at the end of reclaim to its correct 530 * position in the tree. 531 */ 532 __mem_cgroup_remove_exceeded(mz, mctz); 533 if (!soft_limit_excess(mz->memcg) || 534 !css_tryget_online(&mz->memcg->css)) 535 goto retry; 536done: 537 return mz; 538} 539 | 519 /* 520 * Remove the node now but someone else can add it back, 521 * we will to add it back at the end of reclaim to its correct 522 * position in the tree. 523 */ 524 __mem_cgroup_remove_exceeded(mz, mctz); 525 if (!soft_limit_excess(mz->memcg) || 526 !css_tryget_online(&mz->memcg->css)) 527 goto retry; 528done: 529 return mz; 530} 531 |
540static struct mem_cgroup_per_zone * 541mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | 532static struct mem_cgroup_per_node * 533mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) |
542{ | 534{ |
543 struct mem_cgroup_per_zone *mz; | 535 struct mem_cgroup_per_node *mz; |
544 545 spin_lock_irq(&mctz->lock); 546 mz = __mem_cgroup_largest_soft_limit_node(mctz); 547 spin_unlock_irq(&mctz->lock); 548 return mz; 549} 550 551/* --- 77 unchanged lines hidden (view full) --- 629 630 __this_cpu_add(memcg->stat->nr_page_events, nr_pages); 631} 632 633unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, 634 int nid, unsigned int lru_mask) 635{ 636 unsigned long nr = 0; | 536 537 spin_lock_irq(&mctz->lock); 538 mz = __mem_cgroup_largest_soft_limit_node(mctz); 539 spin_unlock_irq(&mctz->lock); 540 return mz; 541} 542 543/* --- 77 unchanged lines hidden (view full) --- 621 622 __this_cpu_add(memcg->stat->nr_page_events, nr_pages); 623} 624 625unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, 626 int nid, unsigned int lru_mask) 627{ 628 unsigned long nr = 0; |
637 int zid; | 629 struct mem_cgroup_per_node *mz; 630 enum lru_list lru; |
638 639 VM_BUG_ON((unsigned)nid >= nr_node_ids); 640 | 631 632 VM_BUG_ON((unsigned)nid >= nr_node_ids); 633 |
641 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 642 struct mem_cgroup_per_zone *mz; 643 enum lru_list lru; 644 645 for_each_lru(lru) { 646 if (!(BIT(lru) & lru_mask)) 647 continue; 648 mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; 649 nr += mz->lru_size[lru]; 650 } | 634 for_each_lru(lru) { 635 if (!(BIT(lru) & lru_mask)) 636 continue; 637 mz = mem_cgroup_nodeinfo(memcg, nid); 638 nr += mz->lru_size[lru]; |
651 } 652 return nr; 653} 654 655static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, 656 unsigned int lru_mask) 657{ 658 unsigned long nr = 0; --- 136 unchanged lines hidden (view full) --- 795 if (prev) 796 goto out; 797 return root; 798 } 799 800 rcu_read_lock(); 801 802 if (reclaim) { | 639 } 640 return nr; 641} 642 643static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, 644 unsigned int lru_mask) 645{ 646 unsigned long nr = 0; --- 136 unchanged lines hidden (view full) --- 783 if (prev) 784 goto out; 785 return root; 786 } 787 788 rcu_read_lock(); 789 790 if (reclaim) { |
803 struct mem_cgroup_per_zone *mz; | 791 struct mem_cgroup_per_node *mz; |
804 | 792 |
805 mz = mem_cgroup_zone_zoneinfo(root, reclaim->zone); | 793 mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id); |
806 iter = &mz->iter[reclaim->priority]; 807 808 if (prev && reclaim->generation != iter->generation) 809 goto out_unlock; 810 811 while (1) { 812 pos = READ_ONCE(iter->position); 813 if (!pos || css_tryget(&pos->css)) --- 82 unchanged lines hidden (view full) --- 896 if (prev && prev != root) 897 css_put(&prev->css); 898} 899 900static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) 901{ 902 struct mem_cgroup *memcg = dead_memcg; 903 struct mem_cgroup_reclaim_iter *iter; | 794 iter = &mz->iter[reclaim->priority]; 795 796 if (prev && reclaim->generation != iter->generation) 797 goto out_unlock; 798 799 while (1) { 800 pos = READ_ONCE(iter->position); 801 if (!pos || css_tryget(&pos->css)) --- 82 unchanged lines hidden (view full) --- 884 if (prev && prev != root) 885 css_put(&prev->css); 886} 887 888static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) 889{ 890 struct mem_cgroup *memcg = dead_memcg; 891 struct mem_cgroup_reclaim_iter *iter; |
904 struct mem_cgroup_per_zone *mz; 905 int nid, zid; | 892 struct mem_cgroup_per_node *mz; 893 int nid; |
906 int i; 907 908 while ((memcg = parent_mem_cgroup(memcg))) { 909 for_each_node(nid) { | 894 int i; 895 896 while ((memcg = parent_mem_cgroup(memcg))) { 897 for_each_node(nid) { |
910 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 911 mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; 912 for (i = 0; i <= DEF_PRIORITY; i++) { 913 iter = &mz->iter[i]; 914 cmpxchg(&iter->position, 915 dead_memcg, NULL); 916 } | 898 mz = mem_cgroup_nodeinfo(memcg, nid); 899 for (i = 0; i <= DEF_PRIORITY; i++) { 900 iter = &mz->iter[i]; 901 cmpxchg(&iter->position, 902 dead_memcg, NULL); |
917 } 918 } 919 } 920} 921 922/* 923 * Iteration constructs for visiting all cgroups (under a tree). If 924 * loops are exited prematurely (break), mem_cgroup_iter_break() must --- 15 unchanged lines hidden (view full) --- 940 * @zone: zone of the page 941 * 942 * This function is only safe when following the LRU page isolation 943 * and putback protocol: the LRU lock must be held, and the page must 944 * either be PageLRU() or the caller must have isolated/allocated it. 945 */ 946struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat) 947{ | 903 } 904 } 905 } 906} 907 908/* 909 * Iteration constructs for visiting all cgroups (under a tree). If 910 * loops are exited prematurely (break), mem_cgroup_iter_break() must --- 15 unchanged lines hidden (view full) --- 926 * @zone: zone of the page 927 * 928 * This function is only safe when following the LRU page isolation 929 * and putback protocol: the LRU lock must be held, and the page must 930 * either be PageLRU() or the caller must have isolated/allocated it. 931 */ 932struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat) 933{ |
948 struct mem_cgroup_per_zone *mz; | 934 struct mem_cgroup_per_node *mz; |
949 struct mem_cgroup *memcg; 950 struct lruvec *lruvec; 951 952 if (mem_cgroup_disabled()) { 953 lruvec = &pgdat->lruvec; 954 goto out; 955 } 956 957 memcg = page->mem_cgroup; 958 /* 959 * Swapcache readahead pages are added to the LRU - and 960 * possibly migrated - before they are charged. 961 */ 962 if (!memcg) 963 memcg = root_mem_cgroup; 964 | 935 struct mem_cgroup *memcg; 936 struct lruvec *lruvec; 937 938 if (mem_cgroup_disabled()) { 939 lruvec = &pgdat->lruvec; 940 goto out; 941 } 942 943 memcg = page->mem_cgroup; 944 /* 945 * Swapcache readahead pages are added to the LRU - and 946 * possibly migrated - before they are charged. 947 */ 948 if (!memcg) 949 memcg = root_mem_cgroup; 950 |
965 mz = mem_cgroup_page_zoneinfo(memcg, page); | 951 mz = mem_cgroup_page_nodeinfo(memcg, page); |
966 lruvec = &mz->lruvec; 967out: 968 /* 969 * Since a node can be onlined after the mem_cgroup was created, 970 * we have to be prepared to initialize lruvec->zone here; 971 * and if offlined then reonlined, we need to reinitialize it. 972 */ 973 if (unlikely(lruvec->pgdat != pgdat)) --- 10 unchanged lines hidden (view full) --- 984 * 985 * This function must be called under lru_lock, just before a page is added 986 * to or just after a page is removed from an lru list (that ordering being 987 * so as to allow it to check that lru_size 0 is consistent with list_empty). 988 */ 989void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, 990 enum zone_type zid, int nr_pages) 991{ | 952 lruvec = &mz->lruvec; 953out: 954 /* 955 * Since a node can be onlined after the mem_cgroup was created, 956 * we have to be prepared to initialize lruvec->zone here; 957 * and if offlined then reonlined, we need to reinitialize it. 958 */ 959 if (unlikely(lruvec->pgdat != pgdat)) --- 10 unchanged lines hidden (view full) --- 970 * 971 * This function must be called under lru_lock, just before a page is added 972 * to or just after a page is removed from an lru list (that ordering being 973 * so as to allow it to check that lru_size 0 is consistent with list_empty). 974 */ 975void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, 976 enum zone_type zid, int nr_pages) 977{ |
992 struct mem_cgroup_per_zone *mz; | 978 struct mem_cgroup_per_node *mz; |
993 unsigned long *lru_size; 994 long size; 995 bool empty; 996 997 __update_lru_size(lruvec, lru, zid, nr_pages); 998 999 if (mem_cgroup_disabled()) 1000 return; 1001 | 979 unsigned long *lru_size; 980 long size; 981 bool empty; 982 983 __update_lru_size(lruvec, lru, zid, nr_pages); 984 985 if (mem_cgroup_disabled()) 986 return; 987 |
1002 mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); | 988 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); |
1003 lru_size = mz->lru_size + lru; 1004 empty = list_empty(lruvec->lists + lru); 1005 1006 if (nr_pages < 0) 1007 *lru_size += nr_pages; 1008 1009 size = *lru_size; 1010 if (WARN_ONCE(size < 0 || empty != !size, --- 376 unchanged lines hidden (view full) --- 1387#else 1388int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) 1389{ 1390 return 0; 1391} 1392#endif 1393 1394static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, | 989 lru_size = mz->lru_size + lru; 990 empty = list_empty(lruvec->lists + lru); 991 992 if (nr_pages < 0) 993 *lru_size += nr_pages; 994 995 size = *lru_size; 996 if (WARN_ONCE(size < 0 || empty != !size, --- 376 unchanged lines hidden (view full) --- 1373#else 1374int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) 1375{ 1376 return 0; 1377} 1378#endif 1379 1380static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, |
1395 struct zone *zone, | 1381 pg_data_t *pgdat, |
1396 gfp_t gfp_mask, 1397 unsigned long *total_scanned) 1398{ 1399 struct mem_cgroup *victim = NULL; 1400 int total = 0; 1401 int loop = 0; 1402 unsigned long excess; 1403 unsigned long nr_scanned; 1404 struct mem_cgroup_reclaim_cookie reclaim = { | 1382 gfp_t gfp_mask, 1383 unsigned long *total_scanned) 1384{ 1385 struct mem_cgroup *victim = NULL; 1386 int total = 0; 1387 int loop = 0; 1388 unsigned long excess; 1389 unsigned long nr_scanned; 1390 struct mem_cgroup_reclaim_cookie reclaim = { |
1405 .zone = zone, | 1391 .pgdat = pgdat, |
1406 .priority = 0, 1407 }; 1408 1409 excess = soft_limit_excess(root_memcg); 1410 1411 while (1) { 1412 victim = mem_cgroup_iter(root_memcg, victim, &reclaim); 1413 if (!victim) { --- 14 unchanged lines hidden (view full) --- 1428 */ 1429 if (total >= (excess >> 2) || 1430 (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) 1431 break; 1432 } 1433 continue; 1434 } 1435 total += mem_cgroup_shrink_node(victim, gfp_mask, false, | 1392 .priority = 0, 1393 }; 1394 1395 excess = soft_limit_excess(root_memcg); 1396 1397 while (1) { 1398 victim = mem_cgroup_iter(root_memcg, victim, &reclaim); 1399 if (!victim) { --- 14 unchanged lines hidden (view full) --- 1414 */ 1415 if (total >= (excess >> 2) || 1416 (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) 1417 break; 1418 } 1419 continue; 1420 } 1421 total += mem_cgroup_shrink_node(victim, gfp_mask, false, |
1436 zone, &nr_scanned); | 1422 pgdat, &nr_scanned); |
1437 *total_scanned += nr_scanned; 1438 if (!soft_limit_excess(root_memcg)) 1439 break; 1440 } 1441 mem_cgroup_iter_break(root_memcg, victim); 1442 return total; 1443} 1444 --- 1110 unchanged lines hidden (view full) --- 2555 } while (retry_count); 2556 2557 if (!ret && enlarge) 2558 memcg_oom_recover(memcg); 2559 2560 return ret; 2561} 2562 | 1423 *total_scanned += nr_scanned; 1424 if (!soft_limit_excess(root_memcg)) 1425 break; 1426 } 1427 mem_cgroup_iter_break(root_memcg, victim); 1428 return total; 1429} 1430 --- 1110 unchanged lines hidden (view full) --- 2541 } while (retry_count); 2542 2543 if (!ret && enlarge) 2544 memcg_oom_recover(memcg); 2545 2546 return ret; 2547} 2548 |
2563unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | 2549unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, |
2564 gfp_t gfp_mask, 2565 unsigned long *total_scanned) 2566{ 2567 unsigned long nr_reclaimed = 0; | 2550 gfp_t gfp_mask, 2551 unsigned long *total_scanned) 2552{ 2553 unsigned long nr_reclaimed = 0; |
2568 struct mem_cgroup_per_zone *mz, *next_mz = NULL; | 2554 struct mem_cgroup_per_node *mz, *next_mz = NULL; |
2569 unsigned long reclaimed; 2570 int loop = 0; | 2555 unsigned long reclaimed; 2556 int loop = 0; |
2571 struct mem_cgroup_tree_per_zone *mctz; | 2557 struct mem_cgroup_tree_per_node *mctz; |
2572 unsigned long excess; 2573 unsigned long nr_scanned; 2574 2575 if (order > 0) 2576 return 0; 2577 | 2558 unsigned long excess; 2559 unsigned long nr_scanned; 2560 2561 if (order > 0) 2562 return 0; 2563 |
2578 mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone)); | 2564 mctz = soft_limit_tree_node(pgdat->node_id); |
2579 /* 2580 * This loop can run a while, specially if mem_cgroup's continuously 2581 * keep exceeding their soft limit and putting the system under 2582 * pressure 2583 */ 2584 do { 2585 if (next_mz) 2586 mz = next_mz; 2587 else 2588 mz = mem_cgroup_largest_soft_limit_node(mctz); 2589 if (!mz) 2590 break; 2591 2592 nr_scanned = 0; | 2565 /* 2566 * This loop can run a while, specially if mem_cgroup's continuously 2567 * keep exceeding their soft limit and putting the system under 2568 * pressure 2569 */ 2570 do { 2571 if (next_mz) 2572 mz = next_mz; 2573 else 2574 mz = mem_cgroup_largest_soft_limit_node(mctz); 2575 if (!mz) 2576 break; 2577 2578 nr_scanned = 0; |
2593 reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, | 2579 reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat, |
2594 gfp_mask, &nr_scanned); 2595 nr_reclaimed += reclaimed; 2596 *total_scanned += nr_scanned; 2597 spin_lock_irq(&mctz->lock); 2598 __mem_cgroup_remove_exceeded(mz, mctz); 2599 2600 /* 2601 * If we failed to reclaim anything from this memory cgroup --- 604 unchanged lines hidden (view full) --- 3206 3207 for_each_mem_cgroup_tree(mi, memcg) 3208 val += mem_cgroup_nr_lru_pages(mi, BIT(i)) * PAGE_SIZE; 3209 seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], val); 3210 } 3211 3212#ifdef CONFIG_DEBUG_VM 3213 { | 2580 gfp_mask, &nr_scanned); 2581 nr_reclaimed += reclaimed; 2582 *total_scanned += nr_scanned; 2583 spin_lock_irq(&mctz->lock); 2584 __mem_cgroup_remove_exceeded(mz, mctz); 2585 2586 /* 2587 * If we failed to reclaim anything from this memory cgroup --- 604 unchanged lines hidden (view full) --- 3192 3193 for_each_mem_cgroup_tree(mi, memcg) 3194 val += mem_cgroup_nr_lru_pages(mi, BIT(i)) * PAGE_SIZE; 3195 seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], val); 3196 } 3197 3198#ifdef CONFIG_DEBUG_VM 3199 { |
3214 int nid, zid; 3215 struct mem_cgroup_per_zone *mz; | 3200 pg_data_t *pgdat; 3201 struct mem_cgroup_per_node *mz; |
3216 struct zone_reclaim_stat *rstat; 3217 unsigned long recent_rotated[2] = {0, 0}; 3218 unsigned long recent_scanned[2] = {0, 0}; 3219 | 3202 struct zone_reclaim_stat *rstat; 3203 unsigned long recent_rotated[2] = {0, 0}; 3204 unsigned long recent_scanned[2] = {0, 0}; 3205 |
3220 for_each_online_node(nid) 3221 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 3222 mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; 3223 rstat = &mz->lruvec.reclaim_stat; | 3206 for_each_online_pgdat(pgdat) { 3207 mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id); 3208 rstat = &mz->lruvec.reclaim_stat; |
3224 | 3209 |
3225 recent_rotated[0] += rstat->recent_rotated[0]; 3226 recent_rotated[1] += rstat->recent_rotated[1]; 3227 recent_scanned[0] += rstat->recent_scanned[0]; 3228 recent_scanned[1] += rstat->recent_scanned[1]; 3229 } | 3210 recent_rotated[0] += rstat->recent_rotated[0]; 3211 recent_rotated[1] += rstat->recent_rotated[1]; 3212 recent_scanned[0] += rstat->recent_scanned[0]; 3213 recent_scanned[1] += rstat->recent_scanned[1]; 3214 } |
3230 seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]); 3231 seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]); 3232 seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]); 3233 seq_printf(m, "recent_scanned_file %lu\n", recent_scanned[1]); 3234 } 3235#endif 3236 3237 return 0; --- 863 unchanged lines hidden (view full) --- 4101 * Caller must hold rcu_read_lock(). 4102 */ 4103struct mem_cgroup *mem_cgroup_from_id(unsigned short id) 4104{ 4105 WARN_ON_ONCE(!rcu_read_lock_held()); 4106 return idr_find(&mem_cgroup_idr, id); 4107} 4108 | 3215 seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]); 3216 seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]); 3217 seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]); 3218 seq_printf(m, "recent_scanned_file %lu\n", recent_scanned[1]); 3219 } 3220#endif 3221 3222 return 0; --- 863 unchanged lines hidden (view full) --- 4086 * Caller must hold rcu_read_lock(). 4087 */ 4088struct mem_cgroup *mem_cgroup_from_id(unsigned short id) 4089{ 4090 WARN_ON_ONCE(!rcu_read_lock_held()); 4091 return idr_find(&mem_cgroup_idr, id); 4092} 4093 |
4109static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | 4094static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) |
4110{ 4111 struct mem_cgroup_per_node *pn; | 4095{ 4096 struct mem_cgroup_per_node *pn; |
4112 struct mem_cgroup_per_zone *mz; 4113 int zone, tmp = node; | 4097 int tmp = node; |
4114 /* 4115 * This routine is called against possible nodes. 4116 * But it's BUG to call kmalloc() against offline node. 4117 * 4118 * TODO: this routine can waste much memory for nodes which will 4119 * never be onlined. It's better to use memory hotplug callback 4120 * function. 4121 */ 4122 if (!node_state(node, N_NORMAL_MEMORY)) 4123 tmp = -1; 4124 pn = kzalloc_node(sizeof(*pn), GFP_KERNEL, tmp); 4125 if (!pn) 4126 return 1; 4127 | 4098 /* 4099 * This routine is called against possible nodes. 4100 * But it's BUG to call kmalloc() against offline node. 4101 * 4102 * TODO: this routine can waste much memory for nodes which will 4103 * never be onlined. It's better to use memory hotplug callback 4104 * function. 4105 */ 4106 if (!node_state(node, N_NORMAL_MEMORY)) 4107 tmp = -1; 4108 pn = kzalloc_node(sizeof(*pn), GFP_KERNEL, tmp); 4109 if (!pn) 4110 return 1; 4111 |
4128 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 4129 mz = &pn->zoneinfo[zone]; 4130 lruvec_init(&mz->lruvec); 4131 mz->usage_in_excess = 0; 4132 mz->on_tree = false; 4133 mz->memcg = memcg; 4134 } | 4112 lruvec_init(&pn->lruvec); 4113 pn->usage_in_excess = 0; 4114 pn->on_tree = false; 4115 pn->memcg = memcg; 4116 |
4135 memcg->nodeinfo[node] = pn; 4136 return 0; 4137} 4138 | 4117 memcg->nodeinfo[node] = pn; 4118 return 0; 4119} 4120 |
4139static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | 4121static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) |
4140{ 4141 kfree(memcg->nodeinfo[node]); 4142} 4143 4144static void mem_cgroup_free(struct mem_cgroup *memcg) 4145{ 4146 int node; 4147 4148 memcg_wb_domain_exit(memcg); 4149 for_each_node(node) | 4122{ 4123 kfree(memcg->nodeinfo[node]); 4124} 4125 4126static void mem_cgroup_free(struct mem_cgroup *memcg) 4127{ 4128 int node; 4129 4130 memcg_wb_domain_exit(memcg); 4131 for_each_node(node) |
4150 free_mem_cgroup_per_zone_info(memcg, node); | 4132 free_mem_cgroup_per_node_info(memcg, node); |
4151 free_percpu(memcg->stat); 4152 kfree(memcg); 4153} 4154 4155static struct mem_cgroup *mem_cgroup_alloc(void) 4156{ 4157 struct mem_cgroup *memcg; 4158 size_t size; --- 12 unchanged lines hidden (view full) --- 4171 if (memcg->id.id < 0) 4172 goto fail; 4173 4174 memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu); 4175 if (!memcg->stat) 4176 goto fail; 4177 4178 for_each_node(node) | 4133 free_percpu(memcg->stat); 4134 kfree(memcg); 4135} 4136 4137static struct mem_cgroup *mem_cgroup_alloc(void) 4138{ 4139 struct mem_cgroup *memcg; 4140 size_t size; --- 12 unchanged lines hidden (view full) --- 4153 if (memcg->id.id < 0) 4154 goto fail; 4155 4156 memcg->stat = alloc_percpu(struct mem_cgroup_stat_cpu); 4157 if (!memcg->stat) 4158 goto fail; 4159 4160 for_each_node(node) |
4179 if (alloc_mem_cgroup_per_zone_info(memcg, node)) | 4161 if (alloc_mem_cgroup_per_node_info(memcg, node)) |
4180 goto fail; 4181 4182 if (memcg_wb_domain_init(memcg, GFP_KERNEL)) 4183 goto fail; 4184 4185 INIT_WORK(&memcg->high_work, high_work_func); 4186 memcg->last_scanned_node = MAX_NUMNODES; 4187 INIT_LIST_HEAD(&memcg->oom_notify); --- 1586 unchanged lines hidden (view full) --- 5774 hotcpu_notifier(memcg_cpu_hotplug_callback, 0); 5775 5776 for_each_possible_cpu(cpu) 5777 INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work, 5778 drain_local_stock); 5779 5780 for_each_node(node) { 5781 struct mem_cgroup_tree_per_node *rtpn; | 4162 goto fail; 4163 4164 if (memcg_wb_domain_init(memcg, GFP_KERNEL)) 4165 goto fail; 4166 4167 INIT_WORK(&memcg->high_work, high_work_func); 4168 memcg->last_scanned_node = MAX_NUMNODES; 4169 INIT_LIST_HEAD(&memcg->oom_notify); --- 1586 unchanged lines hidden (view full) --- 5756 hotcpu_notifier(memcg_cpu_hotplug_callback, 0); 5757 5758 for_each_possible_cpu(cpu) 5759 INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work, 5760 drain_local_stock); 5761 5762 for_each_node(node) { 5763 struct mem_cgroup_tree_per_node *rtpn; |
5782 int zone; | |
5783 5784 rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, 5785 node_online(node) ? node : NUMA_NO_NODE); 5786 | 5764 5765 rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, 5766 node_online(node) ? node : NUMA_NO_NODE); 5767 |
5787 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 5788 struct mem_cgroup_tree_per_zone *rtpz; 5789 5790 rtpz = &rtpn->rb_tree_per_zone[zone]; 5791 rtpz->rb_root = RB_ROOT; 5792 spin_lock_init(&rtpz->lock); 5793 } | 5768 rtpn->rb_root = RB_ROOT; 5769 spin_lock_init(&rtpn->lock); |
5794 soft_limit_tree.rb_tree_per_node[node] = rtpn; 5795 } 5796 5797 return 0; 5798} 5799subsys_initcall(mem_cgroup_init); 5800 5801#ifdef CONFIG_MEMCG_SWAP --- 264 unchanged lines hidden --- | 5770 soft_limit_tree.rb_tree_per_node[node] = rtpn; 5771 } 5772 5773 return 0; 5774} 5775subsys_initcall(mem_cgroup_init); 5776 5777#ifdef CONFIG_MEMCG_SWAP --- 264 unchanged lines hidden --- |