memcontrol.c (b7c15a3ce6fea5da3aa836c897a78ac628467d54) | memcontrol.c (5c041f5d1f23d3a172dd0db3215634c484b4acd6) |
---|---|
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* memcontrol.c - Memory Controller 3 * 4 * Copyright IBM Corporation, 2007 5 * Author Balbir Singh <balbir@linux.vnet.ibm.com> 6 * 7 * Copyright 2007 OpenVZ SWsoft Inc 8 * Author: Pavel Emelianov <xemul@openvz.org> --- 53 unchanged lines hidden (view full) --- 62#include <linux/file.h> 63#include <linux/resume_user_mode.h> 64#include <linux/psi.h> 65#include <linux/seq_buf.h> 66#include "internal.h" 67#include <net/sock.h> 68#include <net/ip.h> 69#include "slab.h" | 1// SPDX-License-Identifier: GPL-2.0-or-later 2/* memcontrol.c - Memory Controller 3 * 4 * Copyright IBM Corporation, 2007 5 * Author Balbir Singh <balbir@linux.vnet.ibm.com> 6 * 7 * Copyright 2007 OpenVZ SWsoft Inc 8 * Author: Pavel Emelianov <xemul@openvz.org> --- 53 unchanged lines hidden (view full) --- 62#include <linux/file.h> 63#include <linux/resume_user_mode.h> 64#include <linux/psi.h> 65#include <linux/seq_buf.h> 66#include "internal.h" 67#include <net/sock.h> 68#include <net/ip.h> 69#include "slab.h" |
70#include "swap.h" |
|
70 71#include <linux/uaccess.h> 72 73#include <trace/events/vmscan.h> 74 75struct cgroup_subsys memory_cgrp_subsys __read_mostly; 76EXPORT_SYMBOL(memory_cgrp_subsys); 77 --- 6 unchanged lines hidden (view full) --- 84/* Socket memory accounting disabled? */ 85static bool cgroup_memory_nosocket __ro_after_init; 86 87/* Kernel memory accounting disabled? */ 88static bool cgroup_memory_nokmem __ro_after_init; 89 90/* Whether the swap controller is active */ 91#ifdef CONFIG_MEMCG_SWAP | 71 72#include <linux/uaccess.h> 73 74#include <trace/events/vmscan.h> 75 76struct cgroup_subsys memory_cgrp_subsys __read_mostly; 77EXPORT_SYMBOL(memory_cgrp_subsys); 78 --- 6 unchanged lines hidden (view full) --- 85/* Socket memory accounting disabled? */ 86static bool cgroup_memory_nosocket __ro_after_init; 87 88/* Kernel memory accounting disabled? */ 89static bool cgroup_memory_nokmem __ro_after_init; 90 91/* Whether the swap controller is active */ 92#ifdef CONFIG_MEMCG_SWAP |
92bool cgroup_memory_noswap __ro_after_init; | 93static bool cgroup_memory_noswap __ro_after_init; |
93#else 94#define cgroup_memory_noswap 1 95#endif 96 97#ifdef CONFIG_CGROUP_WRITEBACK 98static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq); 99#endif 100 --- 103 unchanged lines hidden (view full) --- 204 */ 205#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100 206#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2 207 208/* for encoding cft->private value on file */ 209enum res_type { 210 _MEM, 211 _MEMSWAP, | 94#else 95#define cgroup_memory_noswap 1 96#endif 97 98#ifdef CONFIG_CGROUP_WRITEBACK 99static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq); 100#endif 101 --- 103 unchanged lines hidden (view full) --- 205 */ 206#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100 207#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2 208 209/* for encoding cft->private value on file */ 210enum res_type { 211 _MEM, 212 _MEMSWAP, |
212 _OOM_TYPE, | |
213 _KMEM, 214 _TCP, 215}; 216 217#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) 218#define MEMFILE_TYPE(val) ((val) >> 16 & 0xffff) 219#define MEMFILE_ATTR(val) ((val) & 0xffff) | 213 _KMEM, 214 _TCP, 215}; 216 217#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) 218#define MEMFILE_TYPE(val) ((val) >> 16 & 0xffff) 219#define MEMFILE_ATTR(val) ((val) & 0xffff) |
220/* Used for OOM notifier */ 221#define OOM_CONTROL (0) | |
222 223/* 224 * Iteration constructs for visiting all cgroups (under a tree). If 225 * loops are exited prematurely (break), mem_cgroup_iter_break() must 226 * be used for reference counting. 227 */ 228#define for_each_mem_cgroup_tree(iter, root) \ 229 for (iter = mem_cgroup_iter(root, NULL, NULL); \ --- 778 unchanged lines hidden (view full) --- 1008 struct mem_cgroup *pos = NULL; 1009 1010 if (mem_cgroup_disabled()) 1011 return NULL; 1012 1013 if (!root) 1014 root = root_mem_cgroup; 1015 | 220 221/* 222 * Iteration constructs for visiting all cgroups (under a tree). If 223 * loops are exited prematurely (break), mem_cgroup_iter_break() must 224 * be used for reference counting. 225 */ 226#define for_each_mem_cgroup_tree(iter, root) \ 227 for (iter = mem_cgroup_iter(root, NULL, NULL); \ --- 778 unchanged lines hidden (view full) --- 1006 struct mem_cgroup *pos = NULL; 1007 1008 if (mem_cgroup_disabled()) 1009 return NULL; 1010 1011 if (!root) 1012 root = root_mem_cgroup; 1013 |
1016 if (prev && !reclaim) 1017 pos = prev; 1018 | |
1019 rcu_read_lock(); 1020 1021 if (reclaim) { 1022 struct mem_cgroup_per_node *mz; 1023 1024 mz = root->nodeinfo[reclaim->pgdat->node_id]; 1025 iter = &mz->iter; 1026 | 1014 rcu_read_lock(); 1015 1016 if (reclaim) { 1017 struct mem_cgroup_per_node *mz; 1018 1019 mz = root->nodeinfo[reclaim->pgdat->node_id]; 1020 iter = &mz->iter; 1021 |
1027 if (prev && reclaim->generation != iter->generation) | 1022 /* 1023 * On start, join the current reclaim iteration cycle. 1024 * Exit when a concurrent walker completes it. 1025 */ 1026 if (!prev) 1027 reclaim->generation = iter->generation; 1028 else if (reclaim->generation != iter->generation) |
1028 goto out_unlock; 1029 1030 while (1) { 1031 pos = READ_ONCE(iter->position); 1032 if (!pos || css_tryget(&pos->css)) 1033 break; 1034 /* 1035 * css reference reached zero, so iter->position will 1036 * be cleared by ->css_released. However, we should not 1037 * rely on this happening soon, because ->css_released 1038 * is called from a work queue, and by busy-waiting we 1039 * might block it. So we clear iter->position right 1040 * away. 1041 */ 1042 (void)cmpxchg(&iter->position, pos, NULL); 1043 } | 1029 goto out_unlock; 1030 1031 while (1) { 1032 pos = READ_ONCE(iter->position); 1033 if (!pos || css_tryget(&pos->css)) 1034 break; 1035 /* 1036 * css reference reached zero, so iter->position will 1037 * be cleared by ->css_released. However, we should not 1038 * rely on this happening soon, because ->css_released 1039 * is called from a work queue, and by busy-waiting we 1040 * might block it. So we clear iter->position right 1041 * away. 1042 */ 1043 (void)cmpxchg(&iter->position, pos, NULL); 1044 } |
1045 } else if (prev) { 1046 pos = prev; |
|
1044 } 1045 1046 if (pos) 1047 css = &pos->css; 1048 1049 for (;;) { 1050 css = css_next_descendant_pre(css, &root->css); 1051 if (!css) { --- 8 unchanged lines hidden (view full) --- 1060 break; 1061 } 1062 1063 /* 1064 * Verify the css and acquire a reference. The root 1065 * is provided by the caller, so we know it's alive 1066 * and kicking, and don't take an extra reference. 1067 */ | 1047 } 1048 1049 if (pos) 1050 css = &pos->css; 1051 1052 for (;;) { 1053 css = css_next_descendant_pre(css, &root->css); 1054 if (!css) { --- 8 unchanged lines hidden (view full) --- 1063 break; 1064 } 1065 1066 /* 1067 * Verify the css and acquire a reference. The root 1068 * is provided by the caller, so we know it's alive 1069 * and kicking, and don't take an extra reference. 1070 */ |
1068 memcg = mem_cgroup_from_css(css); 1069 1070 if (css == &root->css) | 1071 if (css == &root->css || css_tryget(css)) { 1072 memcg = mem_cgroup_from_css(css); |
1071 break; | 1073 break; |
1072 1073 if (css_tryget(css)) 1074 break; 1075 1076 memcg = NULL; | 1074 } |
1077 } 1078 1079 if (reclaim) { 1080 /* 1081 * The position could have already been updated by a competing 1082 * thread, so check that the value hasn't changed since we read 1083 * it to avoid reclaiming from the same cgroup twice. 1084 */ 1085 (void)cmpxchg(&iter->position, pos, memcg); 1086 1087 if (pos) 1088 css_put(&pos->css); 1089 1090 if (!memcg) 1091 iter->generation++; | 1075 } 1076 1077 if (reclaim) { 1078 /* 1079 * The position could have already been updated by a competing 1080 * thread, so check that the value hasn't changed since we read 1081 * it to avoid reclaiming from the same cgroup twice. 1082 */ 1083 (void)cmpxchg(&iter->position, pos, memcg); 1084 1085 if (pos) 1086 css_put(&pos->css); 1087 1088 if (!memcg) 1089 iter->generation++; |
1092 else if (!prev) 1093 reclaim->generation = iter->generation; | |
1094 } 1095 1096out_unlock: 1097 rcu_read_unlock(); 1098 if (prev && prev != root) 1099 css_put(&prev->css); 1100 1101 return memcg; --- 2280 unchanged lines hidden (view full) --- 3382 unsigned long *total_scanned) 3383{ 3384 unsigned long nr_reclaimed = 0; 3385 struct mem_cgroup_per_node *mz, *next_mz = NULL; 3386 unsigned long reclaimed; 3387 int loop = 0; 3388 struct mem_cgroup_tree_per_node *mctz; 3389 unsigned long excess; | 1090 } 1091 1092out_unlock: 1093 rcu_read_unlock(); 1094 if (prev && prev != root) 1095 css_put(&prev->css); 1096 1097 return memcg; --- 2280 unchanged lines hidden (view full) --- 3378 unsigned long *total_scanned) 3379{ 3380 unsigned long nr_reclaimed = 0; 3381 struct mem_cgroup_per_node *mz, *next_mz = NULL; 3382 unsigned long reclaimed; 3383 int loop = 0; 3384 struct mem_cgroup_tree_per_node *mctz; 3385 unsigned long excess; |
3390 unsigned long nr_scanned; | |
3391 3392 if (order > 0) 3393 return 0; 3394 3395 mctz = soft_limit_tree.rb_tree_per_node[pgdat->node_id]; 3396 3397 /* 3398 * Do not even bother to check the largest node if the root --- 11 unchanged lines hidden (view full) --- 3410 do { 3411 if (next_mz) 3412 mz = next_mz; 3413 else 3414 mz = mem_cgroup_largest_soft_limit_node(mctz); 3415 if (!mz) 3416 break; 3417 | 3386 3387 if (order > 0) 3388 return 0; 3389 3390 mctz = soft_limit_tree.rb_tree_per_node[pgdat->node_id]; 3391 3392 /* 3393 * Do not even bother to check the largest node if the root --- 11 unchanged lines hidden (view full) --- 3405 do { 3406 if (next_mz) 3407 mz = next_mz; 3408 else 3409 mz = mem_cgroup_largest_soft_limit_node(mctz); 3410 if (!mz) 3411 break; 3412 |
3418 nr_scanned = 0; | |
3419 reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat, | 3413 reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat, |
3420 gfp_mask, &nr_scanned); | 3414 gfp_mask, total_scanned); |
3421 nr_reclaimed += reclaimed; | 3415 nr_reclaimed += reclaimed; |
3422 *total_scanned += nr_scanned; | |
3423 spin_lock_irq(&mctz->lock); | 3416 spin_lock_irq(&mctz->lock); |
3424 __mem_cgroup_remove_exceeded(mz, mctz); | |
3425 3426 /* 3427 * If we failed to reclaim anything from this memory cgroup 3428 * it is time to move on to the next cgroup 3429 */ 3430 next_mz = NULL; 3431 if (!reclaimed) 3432 next_mz = __mem_cgroup_largest_soft_limit_node(mctz); --- 1455 unchanged lines hidden (view full) --- 4888 .name = "move_charge_at_immigrate", 4889 .read_u64 = mem_cgroup_move_charge_read, 4890 .write_u64 = mem_cgroup_move_charge_write, 4891 }, 4892 { 4893 .name = "oom_control", 4894 .seq_show = mem_cgroup_oom_control_read, 4895 .write_u64 = mem_cgroup_oom_control_write, | 3417 3418 /* 3419 * If we failed to reclaim anything from this memory cgroup 3420 * it is time to move on to the next cgroup 3421 */ 3422 next_mz = NULL; 3423 if (!reclaimed) 3424 next_mz = __mem_cgroup_largest_soft_limit_node(mctz); --- 1455 unchanged lines hidden (view full) --- 4880 .name = "move_charge_at_immigrate", 4881 .read_u64 = mem_cgroup_move_charge_read, 4882 .write_u64 = mem_cgroup_move_charge_write, 4883 }, 4884 { 4885 .name = "oom_control", 4886 .seq_show = mem_cgroup_oom_control_read, 4887 .write_u64 = mem_cgroup_oom_control_write, |
4896 .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), | |
4897 }, 4898 { 4899 .name = "pressure_level", 4900 }, 4901#ifdef CONFIG_NUMA 4902 { 4903 .name = "numa_stat", 4904 .seq_show = memcg_numa_stat_show, --- 739 unchanged lines hidden (view full) --- 5644 unsigned long addr, pte_t ptent, union mc_target *target) 5645{ 5646 struct page *page = NULL; 5647 enum mc_target_type ret = MC_TARGET_NONE; 5648 swp_entry_t ent = { .val = 0 }; 5649 5650 if (pte_present(ptent)) 5651 page = mc_handle_present_pte(vma, addr, ptent); | 4888 }, 4889 { 4890 .name = "pressure_level", 4891 }, 4892#ifdef CONFIG_NUMA 4893 { 4894 .name = "numa_stat", 4895 .seq_show = memcg_numa_stat_show, --- 739 unchanged lines hidden (view full) --- 5635 unsigned long addr, pte_t ptent, union mc_target *target) 5636{ 5637 struct page *page = NULL; 5638 enum mc_target_type ret = MC_TARGET_NONE; 5639 swp_entry_t ent = { .val = 0 }; 5640 5641 if (pte_present(ptent)) 5642 page = mc_handle_present_pte(vma, addr, ptent); |
5643 else if (pte_none_mostly(ptent)) 5644 /* 5645 * PTE markers should be treated as a none pte here, separated 5646 * from other swap handling below. 5647 */ 5648 page = mc_handle_file_pte(vma, addr, ptent); |
|
5652 else if (is_swap_pte(ptent)) 5653 page = mc_handle_swap_pte(vma, ptent, &ent); | 5649 else if (is_swap_pte(ptent)) 5650 page = mc_handle_swap_pte(vma, ptent, &ent); |
5654 else if (pte_none(ptent)) 5655 page = mc_handle_file_pte(vma, addr, ptent); | |
5656 5657 if (!page && !ent.val) 5658 return ret; 5659 if (page) { 5660 /* 5661 * Do only loose check w/o serialization. 5662 * mem_cgroup_move_account() checks the page is valid or 5663 * not under LRU exclusion. --- 696 unchanged lines hidden (view full) --- 6360 if (oom_group != 0 && oom_group != 1) 6361 return -EINVAL; 6362 6363 memcg->oom_group = oom_group; 6364 6365 return nbytes; 6366} 6367 | 5651 5652 if (!page && !ent.val) 5653 return ret; 5654 if (page) { 5655 /* 5656 * Do only loose check w/o serialization. 5657 * mem_cgroup_move_account() checks the page is valid or 5658 * not under LRU exclusion. --- 696 unchanged lines hidden (view full) --- 6355 if (oom_group != 0 && oom_group != 1) 6356 return -EINVAL; 6357 6358 memcg->oom_group = oom_group; 6359 6360 return nbytes; 6361} 6362 |
6363static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, 6364 size_t nbytes, loff_t off) 6365{ 6366 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); 6367 unsigned int nr_retries = MAX_RECLAIM_RETRIES; 6368 unsigned long nr_to_reclaim, nr_reclaimed = 0; 6369 int err; 6370 6371 buf = strstrip(buf); 6372 err = page_counter_memparse(buf, "", &nr_to_reclaim); 6373 if (err) 6374 return err; 6375 6376 while (nr_reclaimed < nr_to_reclaim) { 6377 unsigned long reclaimed; 6378 6379 if (signal_pending(current)) 6380 return -EINTR; 6381 6382 /* 6383 * This is the final attempt, drain percpu lru caches in the 6384 * hope of introducing more evictable pages for 6385 * try_to_free_mem_cgroup_pages(). 6386 */ 6387 if (!nr_retries) 6388 lru_add_drain_all(); 6389 6390 reclaimed = try_to_free_mem_cgroup_pages(memcg, 6391 nr_to_reclaim - nr_reclaimed, 6392 GFP_KERNEL, true); 6393 6394 if (!reclaimed && !nr_retries--) 6395 return -EAGAIN; 6396 6397 nr_reclaimed += reclaimed; 6398 } 6399 6400 return nbytes; 6401} 6402 |
|
6368static struct cftype memory_files[] = { 6369 { 6370 .name = "current", 6371 .flags = CFTYPE_NOT_ON_ROOT, 6372 .read_u64 = memory_current_read, 6373 }, 6374 { 6375 .name = "min", --- 42 unchanged lines hidden (view full) --- 6418 }, 6419#endif 6420 { 6421 .name = "oom.group", 6422 .flags = CFTYPE_NOT_ON_ROOT | CFTYPE_NS_DELEGATABLE, 6423 .seq_show = memory_oom_group_show, 6424 .write = memory_oom_group_write, 6425 }, | 6403static struct cftype memory_files[] = { 6404 { 6405 .name = "current", 6406 .flags = CFTYPE_NOT_ON_ROOT, 6407 .read_u64 = memory_current_read, 6408 }, 6409 { 6410 .name = "min", --- 42 unchanged lines hidden (view full) --- 6453 }, 6454#endif 6455 { 6456 .name = "oom.group", 6457 .flags = CFTYPE_NOT_ON_ROOT | CFTYPE_NS_DELEGATABLE, 6458 .seq_show = memory_oom_group_show, 6459 .write = memory_oom_group_write, 6460 }, |
6461 { 6462 .name = "reclaim", 6463 .flags = CFTYPE_NS_DELEGATABLE, 6464 .write = memory_reclaim, 6465 }, |
|
6426 { } /* terminate */ 6427}; 6428 6429struct cgroup_subsys memory_cgrp_subsys = { 6430 .css_alloc = mem_cgroup_css_alloc, 6431 .css_online = mem_cgroup_css_online, 6432 .css_offline = mem_cgroup_css_offline, 6433 .css_released = mem_cgroup_css_released, --- 154 unchanged lines hidden (view full) --- 6588 if (memcg == root) 6589 return; 6590 6591 usage = page_counter_read(&memcg->memory); 6592 if (!usage) 6593 return; 6594 6595 parent = parent_mem_cgroup(memcg); | 6466 { } /* terminate */ 6467}; 6468 6469struct cgroup_subsys memory_cgrp_subsys = { 6470 .css_alloc = mem_cgroup_css_alloc, 6471 .css_online = mem_cgroup_css_online, 6472 .css_offline = mem_cgroup_css_offline, 6473 .css_released = mem_cgroup_css_released, --- 154 unchanged lines hidden (view full) --- 6628 if (memcg == root) 6629 return; 6630 6631 usage = page_counter_read(&memcg->memory); 6632 if (!usage) 6633 return; 6634 6635 parent = parent_mem_cgroup(memcg); |
6596 /* No parent means a non-hierarchical mode on v1 memcg */ 6597 if (!parent) 6598 return; | |
6599 6600 if (parent == root) { 6601 memcg->memory.emin = READ_ONCE(memcg->memory.min); 6602 memcg->memory.elow = READ_ONCE(memcg->memory.low); 6603 return; 6604 } 6605 6606 parent_usage = page_counter_read(&parent->memory); --- 791 unchanged lines hidden --- | 6636 6637 if (parent == root) { 6638 memcg->memory.emin = READ_ONCE(memcg->memory.min); 6639 memcg->memory.elow = READ_ONCE(memcg->memory.low); 6640 return; 6641 } 6642 6643 parent_usage = page_counter_read(&parent->memory); --- 791 unchanged lines hidden --- |