1992bf775SAneesh Kumar K.V // SPDX-License-Identifier: GPL-2.0
2992bf775SAneesh Kumar K.V #include <linux/slab.h>
3992bf775SAneesh Kumar K.V #include <linux/lockdep.h>
491952440SAneesh Kumar K.V #include <linux/sysfs.h>
591952440SAneesh Kumar K.V #include <linux/kobject.h>
6c6123a19SAneesh Kumar K.V #include <linux/memory.h>
7992bf775SAneesh Kumar K.V #include <linux/memory-tiers.h>
8992bf775SAneesh Kumar K.V
96c542ab7SAneesh Kumar K.V #include "internal.h"
106c542ab7SAneesh Kumar K.V
11992bf775SAneesh Kumar K.V struct memory_tier {
12992bf775SAneesh Kumar K.V /* hierarchy of memory tiers */
13992bf775SAneesh Kumar K.V struct list_head list;
14992bf775SAneesh Kumar K.V /* list of all memory types part of this tier */
15992bf775SAneesh Kumar K.V struct list_head memory_types;
16992bf775SAneesh Kumar K.V /*
17992bf775SAneesh Kumar K.V * start value of abstract distance. memory tier maps
18992bf775SAneesh Kumar K.V * an abstract distance range,
19992bf775SAneesh Kumar K.V * adistance_start .. adistance_start + MEMTIER_CHUNK_SIZE
20992bf775SAneesh Kumar K.V */
21992bf775SAneesh Kumar K.V int adistance_start;
229832fb87SAneesh Kumar K.V struct device dev;
2332008027SJagdish Gediya /* All the nodes that are part of all the lower memory tiers. */
2432008027SJagdish Gediya nodemask_t lower_tier_mask;
25992bf775SAneesh Kumar K.V };
26992bf775SAneesh Kumar K.V
276c542ab7SAneesh Kumar K.V struct demotion_nodes {
286c542ab7SAneesh Kumar K.V nodemask_t preferred;
296c542ab7SAneesh Kumar K.V };
306c542ab7SAneesh Kumar K.V
317b88bda3SAneesh Kumar K.V struct node_memory_type_map {
327b88bda3SAneesh Kumar K.V struct memory_dev_type *memtype;
337b88bda3SAneesh Kumar K.V int map_count;
34992bf775SAneesh Kumar K.V };
35992bf775SAneesh Kumar K.V
36992bf775SAneesh Kumar K.V static DEFINE_MUTEX(memory_tier_lock);
37992bf775SAneesh Kumar K.V static LIST_HEAD(memory_tiers);
387b88bda3SAneesh Kumar K.V static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
397b88bda3SAneesh Kumar K.V static struct memory_dev_type *default_dram_type;
409832fb87SAneesh Kumar K.V
419832fb87SAneesh Kumar K.V static struct bus_type memory_tier_subsys = {
429832fb87SAneesh Kumar K.V .name = "memory_tiering",
439832fb87SAneesh Kumar K.V .dev_name = "memory_tier",
449832fb87SAneesh Kumar K.V };
459832fb87SAneesh Kumar K.V
466c542ab7SAneesh Kumar K.V #ifdef CONFIG_MIGRATION
47467b171aSAneesh Kumar K.V static int top_tier_adistance;
486c542ab7SAneesh Kumar K.V /*
496c542ab7SAneesh Kumar K.V * node_demotion[] examples:
506c542ab7SAneesh Kumar K.V *
516c542ab7SAneesh Kumar K.V * Example 1:
526c542ab7SAneesh Kumar K.V *
536c542ab7SAneesh Kumar K.V * Node 0 & 1 are CPU + DRAM nodes, node 2 & 3 are PMEM nodes.
546c542ab7SAneesh Kumar K.V *
556c542ab7SAneesh Kumar K.V * node distances:
566c542ab7SAneesh Kumar K.V * node 0 1 2 3
576c542ab7SAneesh Kumar K.V * 0 10 20 30 40
586c542ab7SAneesh Kumar K.V * 1 20 10 40 30
596c542ab7SAneesh Kumar K.V * 2 30 40 10 40
606c542ab7SAneesh Kumar K.V * 3 40 30 40 10
616c542ab7SAneesh Kumar K.V *
626c542ab7SAneesh Kumar K.V * memory_tiers0 = 0-1
636c542ab7SAneesh Kumar K.V * memory_tiers1 = 2-3
646c542ab7SAneesh Kumar K.V *
656c542ab7SAneesh Kumar K.V * node_demotion[0].preferred = 2
666c542ab7SAneesh Kumar K.V * node_demotion[1].preferred = 3
676c542ab7SAneesh Kumar K.V * node_demotion[2].preferred = <empty>
686c542ab7SAneesh Kumar K.V * node_demotion[3].preferred = <empty>
696c542ab7SAneesh Kumar K.V *
706c542ab7SAneesh Kumar K.V * Example 2:
716c542ab7SAneesh Kumar K.V *
726c542ab7SAneesh Kumar K.V * Node 0 & 1 are CPU + DRAM nodes, node 2 is memory-only DRAM node.
736c542ab7SAneesh Kumar K.V *
746c542ab7SAneesh Kumar K.V * node distances:
756c542ab7SAneesh Kumar K.V * node 0 1 2
766c542ab7SAneesh Kumar K.V * 0 10 20 30
776c542ab7SAneesh Kumar K.V * 1 20 10 30
786c542ab7SAneesh Kumar K.V * 2 30 30 10
796c542ab7SAneesh Kumar K.V *
806c542ab7SAneesh Kumar K.V * memory_tiers0 = 0-2
816c542ab7SAneesh Kumar K.V *
826c542ab7SAneesh Kumar K.V * node_demotion[0].preferred = <empty>
836c542ab7SAneesh Kumar K.V * node_demotion[1].preferred = <empty>
846c542ab7SAneesh Kumar K.V * node_demotion[2].preferred = <empty>
856c542ab7SAneesh Kumar K.V *
866c542ab7SAneesh Kumar K.V * Example 3:
876c542ab7SAneesh Kumar K.V *
886c542ab7SAneesh Kumar K.V * Node 0 is CPU + DRAM nodes, Node 1 is HBM node, node 2 is PMEM node.
896c542ab7SAneesh Kumar K.V *
906c542ab7SAneesh Kumar K.V * node distances:
916c542ab7SAneesh Kumar K.V * node 0 1 2
926c542ab7SAneesh Kumar K.V * 0 10 20 30
936c542ab7SAneesh Kumar K.V * 1 20 10 40
946c542ab7SAneesh Kumar K.V * 2 30 40 10
956c542ab7SAneesh Kumar K.V *
966c542ab7SAneesh Kumar K.V * memory_tiers0 = 1
976c542ab7SAneesh Kumar K.V * memory_tiers1 = 0
986c542ab7SAneesh Kumar K.V * memory_tiers2 = 2
996c542ab7SAneesh Kumar K.V *
1006c542ab7SAneesh Kumar K.V * node_demotion[0].preferred = 2
1016c542ab7SAneesh Kumar K.V * node_demotion[1].preferred = 0
1026c542ab7SAneesh Kumar K.V * node_demotion[2].preferred = <empty>
1036c542ab7SAneesh Kumar K.V *
1046c542ab7SAneesh Kumar K.V */
1056c542ab7SAneesh Kumar K.V static struct demotion_nodes *node_demotion __read_mostly;
1066c542ab7SAneesh Kumar K.V #endif /* CONFIG_MIGRATION */
107992bf775SAneesh Kumar K.V
to_memory_tier(struct device * device)1089832fb87SAneesh Kumar K.V static inline struct memory_tier *to_memory_tier(struct device *device)
1099832fb87SAneesh Kumar K.V {
1109832fb87SAneesh Kumar K.V return container_of(device, struct memory_tier, dev);
1119832fb87SAneesh Kumar K.V }
1129832fb87SAneesh Kumar K.V
get_memtier_nodemask(struct memory_tier * memtier)1139832fb87SAneesh Kumar K.V static __always_inline nodemask_t get_memtier_nodemask(struct memory_tier *memtier)
1149832fb87SAneesh Kumar K.V {
1159832fb87SAneesh Kumar K.V nodemask_t nodes = NODE_MASK_NONE;
1169832fb87SAneesh Kumar K.V struct memory_dev_type *memtype;
1179832fb87SAneesh Kumar K.V
1189832fb87SAneesh Kumar K.V list_for_each_entry(memtype, &memtier->memory_types, tier_sibiling)
1199832fb87SAneesh Kumar K.V nodes_or(nodes, nodes, memtype->nodes);
1209832fb87SAneesh Kumar K.V
1219832fb87SAneesh Kumar K.V return nodes;
1229832fb87SAneesh Kumar K.V }
1239832fb87SAneesh Kumar K.V
memory_tier_device_release(struct device * dev)1249832fb87SAneesh Kumar K.V static void memory_tier_device_release(struct device *dev)
1259832fb87SAneesh Kumar K.V {
1269832fb87SAneesh Kumar K.V struct memory_tier *tier = to_memory_tier(dev);
1279832fb87SAneesh Kumar K.V /*
1289832fb87SAneesh Kumar K.V * synchronize_rcu in clear_node_memory_tier makes sure
1299832fb87SAneesh Kumar K.V * we don't have rcu access to this memory tier.
1309832fb87SAneesh Kumar K.V */
1319832fb87SAneesh Kumar K.V kfree(tier);
1329832fb87SAneesh Kumar K.V }
1339832fb87SAneesh Kumar K.V
nodelist_show(struct device * dev,struct device_attribute * attr,char * buf)13427d676a1SHuang Ying static ssize_t nodelist_show(struct device *dev,
1359832fb87SAneesh Kumar K.V struct device_attribute *attr, char *buf)
1369832fb87SAneesh Kumar K.V {
1379832fb87SAneesh Kumar K.V int ret;
1389832fb87SAneesh Kumar K.V nodemask_t nmask;
1399832fb87SAneesh Kumar K.V
1409832fb87SAneesh Kumar K.V mutex_lock(&memory_tier_lock);
1419832fb87SAneesh Kumar K.V nmask = get_memtier_nodemask(to_memory_tier(dev));
1429832fb87SAneesh Kumar K.V ret = sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&nmask));
1439832fb87SAneesh Kumar K.V mutex_unlock(&memory_tier_lock);
1449832fb87SAneesh Kumar K.V return ret;
1459832fb87SAneesh Kumar K.V }
14627d676a1SHuang Ying static DEVICE_ATTR_RO(nodelist);
1479832fb87SAneesh Kumar K.V
1489832fb87SAneesh Kumar K.V static struct attribute *memtier_dev_attrs[] = {
14927d676a1SHuang Ying &dev_attr_nodelist.attr,
1509832fb87SAneesh Kumar K.V NULL
1519832fb87SAneesh Kumar K.V };
1529832fb87SAneesh Kumar K.V
1539832fb87SAneesh Kumar K.V static const struct attribute_group memtier_dev_group = {
1549832fb87SAneesh Kumar K.V .attrs = memtier_dev_attrs,
1559832fb87SAneesh Kumar K.V };
1569832fb87SAneesh Kumar K.V
1579832fb87SAneesh Kumar K.V static const struct attribute_group *memtier_dev_groups[] = {
1589832fb87SAneesh Kumar K.V &memtier_dev_group,
1599832fb87SAneesh Kumar K.V NULL
1609832fb87SAneesh Kumar K.V };
1619832fb87SAneesh Kumar K.V
find_create_memory_tier(struct memory_dev_type * memtype)162992bf775SAneesh Kumar K.V static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype)
163992bf775SAneesh Kumar K.V {
1649832fb87SAneesh Kumar K.V int ret;
165992bf775SAneesh Kumar K.V bool found_slot = false;
166992bf775SAneesh Kumar K.V struct memory_tier *memtier, *new_memtier;
167992bf775SAneesh Kumar K.V int adistance = memtype->adistance;
168992bf775SAneesh Kumar K.V unsigned int memtier_adistance_chunk_size = MEMTIER_CHUNK_SIZE;
169992bf775SAneesh Kumar K.V
170992bf775SAneesh Kumar K.V lockdep_assert_held_once(&memory_tier_lock);
171992bf775SAneesh Kumar K.V
172b26ac6f3SAneesh Kumar K.V adistance = round_down(adistance, memtier_adistance_chunk_size);
173992bf775SAneesh Kumar K.V /*
174992bf775SAneesh Kumar K.V * If the memtype is already part of a memory tier,
175992bf775SAneesh Kumar K.V * just return that.
176992bf775SAneesh Kumar K.V */
177b26ac6f3SAneesh Kumar K.V if (!list_empty(&memtype->tier_sibiling)) {
178b26ac6f3SAneesh Kumar K.V list_for_each_entry(memtier, &memory_tiers, list) {
179b26ac6f3SAneesh Kumar K.V if (adistance == memtier->adistance_start)
180b26ac6f3SAneesh Kumar K.V return memtier;
181b26ac6f3SAneesh Kumar K.V }
182b26ac6f3SAneesh Kumar K.V WARN_ON(1);
183b26ac6f3SAneesh Kumar K.V return ERR_PTR(-EINVAL);
184b26ac6f3SAneesh Kumar K.V }
185992bf775SAneesh Kumar K.V
186992bf775SAneesh Kumar K.V list_for_each_entry(memtier, &memory_tiers, list) {
187992bf775SAneesh Kumar K.V if (adistance == memtier->adistance_start) {
1889832fb87SAneesh Kumar K.V goto link_memtype;
189992bf775SAneesh Kumar K.V } else if (adistance < memtier->adistance_start) {
190992bf775SAneesh Kumar K.V found_slot = true;
191992bf775SAneesh Kumar K.V break;
192992bf775SAneesh Kumar K.V }
193992bf775SAneesh Kumar K.V }
194992bf775SAneesh Kumar K.V
1959832fb87SAneesh Kumar K.V new_memtier = kzalloc(sizeof(struct memory_tier), GFP_KERNEL);
196992bf775SAneesh Kumar K.V if (!new_memtier)
197992bf775SAneesh Kumar K.V return ERR_PTR(-ENOMEM);
198992bf775SAneesh Kumar K.V
199992bf775SAneesh Kumar K.V new_memtier->adistance_start = adistance;
200992bf775SAneesh Kumar K.V INIT_LIST_HEAD(&new_memtier->list);
201992bf775SAneesh Kumar K.V INIT_LIST_HEAD(&new_memtier->memory_types);
202992bf775SAneesh Kumar K.V if (found_slot)
203992bf775SAneesh Kumar K.V list_add_tail(&new_memtier->list, &memtier->list);
204992bf775SAneesh Kumar K.V else
205992bf775SAneesh Kumar K.V list_add_tail(&new_memtier->list, &memory_tiers);
2069832fb87SAneesh Kumar K.V
2079832fb87SAneesh Kumar K.V new_memtier->dev.id = adistance >> MEMTIER_CHUNK_BITS;
2089832fb87SAneesh Kumar K.V new_memtier->dev.bus = &memory_tier_subsys;
2099832fb87SAneesh Kumar K.V new_memtier->dev.release = memory_tier_device_release;
2109832fb87SAneesh Kumar K.V new_memtier->dev.groups = memtier_dev_groups;
2119832fb87SAneesh Kumar K.V
2129832fb87SAneesh Kumar K.V ret = device_register(&new_memtier->dev);
2139832fb87SAneesh Kumar K.V if (ret) {
21493419139STong Tiangen list_del(&new_memtier->list);
21593419139STong Tiangen put_device(&new_memtier->dev);
2169832fb87SAneesh Kumar K.V return ERR_PTR(ret);
2179832fb87SAneesh Kumar K.V }
2189832fb87SAneesh Kumar K.V memtier = new_memtier;
2199832fb87SAneesh Kumar K.V
2209832fb87SAneesh Kumar K.V link_memtype:
2219832fb87SAneesh Kumar K.V list_add(&memtype->tier_sibiling, &memtier->memory_types);
2229832fb87SAneesh Kumar K.V return memtier;
223992bf775SAneesh Kumar K.V }
224992bf775SAneesh Kumar K.V
__node_get_memory_tier(int node)2256c542ab7SAneesh Kumar K.V static struct memory_tier *__node_get_memory_tier(int node)
2266c542ab7SAneesh Kumar K.V {
2277766cf7aSAneesh Kumar K.V pg_data_t *pgdat;
2286c542ab7SAneesh Kumar K.V
2297766cf7aSAneesh Kumar K.V pgdat = NODE_DATA(node);
2307766cf7aSAneesh Kumar K.V if (!pgdat)
2316c542ab7SAneesh Kumar K.V return NULL;
2327766cf7aSAneesh Kumar K.V /*
2337766cf7aSAneesh Kumar K.V * Since we hold memory_tier_lock, we can avoid
2347766cf7aSAneesh Kumar K.V * RCU read locks when accessing the details. No
2357766cf7aSAneesh Kumar K.V * parallel updates are possible here.
2367766cf7aSAneesh Kumar K.V */
2377766cf7aSAneesh Kumar K.V return rcu_dereference_check(pgdat->memtier,
2387766cf7aSAneesh Kumar K.V lockdep_is_held(&memory_tier_lock));
2396c542ab7SAneesh Kumar K.V }
2406c542ab7SAneesh Kumar K.V
2416c542ab7SAneesh Kumar K.V #ifdef CONFIG_MIGRATION
node_is_toptier(int node)242467b171aSAneesh Kumar K.V bool node_is_toptier(int node)
243467b171aSAneesh Kumar K.V {
244467b171aSAneesh Kumar K.V bool toptier;
245467b171aSAneesh Kumar K.V pg_data_t *pgdat;
246467b171aSAneesh Kumar K.V struct memory_tier *memtier;
247467b171aSAneesh Kumar K.V
248467b171aSAneesh Kumar K.V pgdat = NODE_DATA(node);
249467b171aSAneesh Kumar K.V if (!pgdat)
250467b171aSAneesh Kumar K.V return false;
251467b171aSAneesh Kumar K.V
252467b171aSAneesh Kumar K.V rcu_read_lock();
253467b171aSAneesh Kumar K.V memtier = rcu_dereference(pgdat->memtier);
254467b171aSAneesh Kumar K.V if (!memtier) {
255467b171aSAneesh Kumar K.V toptier = true;
256467b171aSAneesh Kumar K.V goto out;
257467b171aSAneesh Kumar K.V }
258467b171aSAneesh Kumar K.V if (memtier->adistance_start <= top_tier_adistance)
259467b171aSAneesh Kumar K.V toptier = true;
260467b171aSAneesh Kumar K.V else
261467b171aSAneesh Kumar K.V toptier = false;
262467b171aSAneesh Kumar K.V out:
263467b171aSAneesh Kumar K.V rcu_read_unlock();
264467b171aSAneesh Kumar K.V return toptier;
265467b171aSAneesh Kumar K.V }
266467b171aSAneesh Kumar K.V
node_get_allowed_targets(pg_data_t * pgdat,nodemask_t * targets)26732008027SJagdish Gediya void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
26832008027SJagdish Gediya {
26932008027SJagdish Gediya struct memory_tier *memtier;
27032008027SJagdish Gediya
27132008027SJagdish Gediya /*
27232008027SJagdish Gediya * pg_data_t.memtier updates includes a synchronize_rcu()
27332008027SJagdish Gediya * which ensures that we either find NULL or a valid memtier
27432008027SJagdish Gediya * in NODE_DATA. protect the access via rcu_read_lock();
27532008027SJagdish Gediya */
27632008027SJagdish Gediya rcu_read_lock();
27732008027SJagdish Gediya memtier = rcu_dereference(pgdat->memtier);
27832008027SJagdish Gediya if (memtier)
27932008027SJagdish Gediya *targets = memtier->lower_tier_mask;
28032008027SJagdish Gediya else
28132008027SJagdish Gediya *targets = NODE_MASK_NONE;
28232008027SJagdish Gediya rcu_read_unlock();
28332008027SJagdish Gediya }
28432008027SJagdish Gediya
2856c542ab7SAneesh Kumar K.V /**
2866c542ab7SAneesh Kumar K.V * next_demotion_node() - Get the next node in the demotion path
2876c542ab7SAneesh Kumar K.V * @node: The starting node to lookup the next node
2886c542ab7SAneesh Kumar K.V *
2896c542ab7SAneesh Kumar K.V * Return: node id for next memory node in the demotion path hierarchy
2906c542ab7SAneesh Kumar K.V * from @node; NUMA_NO_NODE if @node is terminal. This does not keep
2916c542ab7SAneesh Kumar K.V * @node online or guarantee that it *continues* to be the next demotion
2926c542ab7SAneesh Kumar K.V * target.
2936c542ab7SAneesh Kumar K.V */
next_demotion_node(int node)2946c542ab7SAneesh Kumar K.V int next_demotion_node(int node)
2956c542ab7SAneesh Kumar K.V {
2966c542ab7SAneesh Kumar K.V struct demotion_nodes *nd;
2976c542ab7SAneesh Kumar K.V int target;
2986c542ab7SAneesh Kumar K.V
2996c542ab7SAneesh Kumar K.V if (!node_demotion)
3006c542ab7SAneesh Kumar K.V return NUMA_NO_NODE;
3016c542ab7SAneesh Kumar K.V
3026c542ab7SAneesh Kumar K.V nd = &node_demotion[node];
3036c542ab7SAneesh Kumar K.V
3046c542ab7SAneesh Kumar K.V /*
3056c542ab7SAneesh Kumar K.V * node_demotion[] is updated without excluding this
3066c542ab7SAneesh Kumar K.V * function from running.
3076c542ab7SAneesh Kumar K.V *
3086c542ab7SAneesh Kumar K.V * Make sure to use RCU over entire code blocks if
3096c542ab7SAneesh Kumar K.V * node_demotion[] reads need to be consistent.
3106c542ab7SAneesh Kumar K.V */
3116c542ab7SAneesh Kumar K.V rcu_read_lock();
3126c542ab7SAneesh Kumar K.V /*
3136c542ab7SAneesh Kumar K.V * If there are multiple target nodes, just select one
3146c542ab7SAneesh Kumar K.V * target node randomly.
3156c542ab7SAneesh Kumar K.V *
3166c542ab7SAneesh Kumar K.V * In addition, we can also use round-robin to select
3176c542ab7SAneesh Kumar K.V * target node, but we should introduce another variable
3186c542ab7SAneesh Kumar K.V * for node_demotion[] to record last selected target node,
3196c542ab7SAneesh Kumar K.V * that may cause cache ping-pong due to the changing of
3206c542ab7SAneesh Kumar K.V * last target node. Or introducing per-cpu data to avoid
3216c542ab7SAneesh Kumar K.V * caching issue, which seems more complicated. So selecting
3226c542ab7SAneesh Kumar K.V * target node randomly seems better until now.
3236c542ab7SAneesh Kumar K.V */
3246c542ab7SAneesh Kumar K.V target = node_random(&nd->preferred);
3256c542ab7SAneesh Kumar K.V rcu_read_unlock();
3266c542ab7SAneesh Kumar K.V
3276c542ab7SAneesh Kumar K.V return target;
3286c542ab7SAneesh Kumar K.V }
3296c542ab7SAneesh Kumar K.V
disable_all_demotion_targets(void)3306c542ab7SAneesh Kumar K.V static void disable_all_demotion_targets(void)
3316c542ab7SAneesh Kumar K.V {
33232008027SJagdish Gediya struct memory_tier *memtier;
3336c542ab7SAneesh Kumar K.V int node;
3346c542ab7SAneesh Kumar K.V
33532008027SJagdish Gediya for_each_node_state(node, N_MEMORY) {
3366c542ab7SAneesh Kumar K.V node_demotion[node].preferred = NODE_MASK_NONE;
3376c542ab7SAneesh Kumar K.V /*
33832008027SJagdish Gediya * We are holding memory_tier_lock, it is safe
33932008027SJagdish Gediya * to access pgda->memtier.
34032008027SJagdish Gediya */
34132008027SJagdish Gediya memtier = __node_get_memory_tier(node);
34232008027SJagdish Gediya if (memtier)
34332008027SJagdish Gediya memtier->lower_tier_mask = NODE_MASK_NONE;
34432008027SJagdish Gediya }
34532008027SJagdish Gediya /*
3466c542ab7SAneesh Kumar K.V * Ensure that the "disable" is visible across the system.
3476c542ab7SAneesh Kumar K.V * Readers will see either a combination of before+disable
3486c542ab7SAneesh Kumar K.V * state or disable+after. They will never see before and
3496c542ab7SAneesh Kumar K.V * after state together.
3506c542ab7SAneesh Kumar K.V */
3516c542ab7SAneesh Kumar K.V synchronize_rcu();
3526c542ab7SAneesh Kumar K.V }
3536c542ab7SAneesh Kumar K.V
3546c542ab7SAneesh Kumar K.V /*
3556c542ab7SAneesh Kumar K.V * Find an automatic demotion target for all memory
3566c542ab7SAneesh Kumar K.V * nodes. Failing here is OK. It might just indicate
3576c542ab7SAneesh Kumar K.V * being at the end of a chain.
3586c542ab7SAneesh Kumar K.V */
establish_demotion_targets(void)3596c542ab7SAneesh Kumar K.V static void establish_demotion_targets(void)
3606c542ab7SAneesh Kumar K.V {
3616c542ab7SAneesh Kumar K.V struct memory_tier *memtier;
3626c542ab7SAneesh Kumar K.V struct demotion_nodes *nd;
3636c542ab7SAneesh Kumar K.V int target = NUMA_NO_NODE, node;
3646c542ab7SAneesh Kumar K.V int distance, best_distance;
36532008027SJagdish Gediya nodemask_t tier_nodes, lower_tier;
3666c542ab7SAneesh Kumar K.V
3676c542ab7SAneesh Kumar K.V lockdep_assert_held_once(&memory_tier_lock);
3686c542ab7SAneesh Kumar K.V
36933ee4f18SMiaohe Lin if (!node_demotion)
3706c542ab7SAneesh Kumar K.V return;
3716c542ab7SAneesh Kumar K.V
3726c542ab7SAneesh Kumar K.V disable_all_demotion_targets();
3736c542ab7SAneesh Kumar K.V
3746c542ab7SAneesh Kumar K.V for_each_node_state(node, N_MEMORY) {
3756c542ab7SAneesh Kumar K.V best_distance = -1;
3766c542ab7SAneesh Kumar K.V nd = &node_demotion[node];
3776c542ab7SAneesh Kumar K.V
3786c542ab7SAneesh Kumar K.V memtier = __node_get_memory_tier(node);
3796c542ab7SAneesh Kumar K.V if (!memtier || list_is_last(&memtier->list, &memory_tiers))
3806c542ab7SAneesh Kumar K.V continue;
3816c542ab7SAneesh Kumar K.V /*
3826c542ab7SAneesh Kumar K.V * Get the lower memtier to find the demotion node list.
3836c542ab7SAneesh Kumar K.V */
3846c542ab7SAneesh Kumar K.V memtier = list_next_entry(memtier, list);
3856c542ab7SAneesh Kumar K.V tier_nodes = get_memtier_nodemask(memtier);
3866c542ab7SAneesh Kumar K.V /*
3876c542ab7SAneesh Kumar K.V * find_next_best_node, use 'used' nodemask as a skip list.
3886c542ab7SAneesh Kumar K.V * Add all memory nodes except the selected memory tier
3896c542ab7SAneesh Kumar K.V * nodelist to skip list so that we find the best node from the
3906c542ab7SAneesh Kumar K.V * memtier nodelist.
3916c542ab7SAneesh Kumar K.V */
3926c542ab7SAneesh Kumar K.V nodes_andnot(tier_nodes, node_states[N_MEMORY], tier_nodes);
3936c542ab7SAneesh Kumar K.V
3946c542ab7SAneesh Kumar K.V /*
3956c542ab7SAneesh Kumar K.V * Find all the nodes in the memory tier node list of same best distance.
3966c542ab7SAneesh Kumar K.V * add them to the preferred mask. We randomly select between nodes
3976c542ab7SAneesh Kumar K.V * in the preferred mask when allocating pages during demotion.
3986c542ab7SAneesh Kumar K.V */
3996c542ab7SAneesh Kumar K.V do {
4006c542ab7SAneesh Kumar K.V target = find_next_best_node(node, &tier_nodes);
4016c542ab7SAneesh Kumar K.V if (target == NUMA_NO_NODE)
4026c542ab7SAneesh Kumar K.V break;
4036c542ab7SAneesh Kumar K.V
4046c542ab7SAneesh Kumar K.V distance = node_distance(node, target);
4056c542ab7SAneesh Kumar K.V if (distance == best_distance || best_distance == -1) {
4066c542ab7SAneesh Kumar K.V best_distance = distance;
4076c542ab7SAneesh Kumar K.V node_set(target, nd->preferred);
4086c542ab7SAneesh Kumar K.V } else {
4096c542ab7SAneesh Kumar K.V break;
4106c542ab7SAneesh Kumar K.V }
4116c542ab7SAneesh Kumar K.V } while (1);
4126c542ab7SAneesh Kumar K.V }
41332008027SJagdish Gediya /*
414467b171aSAneesh Kumar K.V * Promotion is allowed from a memory tier to higher
415467b171aSAneesh Kumar K.V * memory tier only if the memory tier doesn't include
416467b171aSAneesh Kumar K.V * compute. We want to skip promotion from a memory tier,
417467b171aSAneesh Kumar K.V * if any node that is part of the memory tier have CPUs.
418467b171aSAneesh Kumar K.V * Once we detect such a memory tier, we consider that tier
419467b171aSAneesh Kumar K.V * as top tiper from which promotion is not allowed.
420467b171aSAneesh Kumar K.V */
421467b171aSAneesh Kumar K.V list_for_each_entry_reverse(memtier, &memory_tiers, list) {
422467b171aSAneesh Kumar K.V tier_nodes = get_memtier_nodemask(memtier);
423467b171aSAneesh Kumar K.V nodes_and(tier_nodes, node_states[N_CPU], tier_nodes);
424467b171aSAneesh Kumar K.V if (!nodes_empty(tier_nodes)) {
425467b171aSAneesh Kumar K.V /*
426467b171aSAneesh Kumar K.V * abstract distance below the max value of this memtier
427467b171aSAneesh Kumar K.V * is considered toptier.
428467b171aSAneesh Kumar K.V */
429467b171aSAneesh Kumar K.V top_tier_adistance = memtier->adistance_start +
430467b171aSAneesh Kumar K.V MEMTIER_CHUNK_SIZE - 1;
431467b171aSAneesh Kumar K.V break;
432467b171aSAneesh Kumar K.V }
433467b171aSAneesh Kumar K.V }
434467b171aSAneesh Kumar K.V /*
43532008027SJagdish Gediya * Now build the lower_tier mask for each node collecting node mask from
43632008027SJagdish Gediya * all memory tier below it. This allows us to fallback demotion page
43732008027SJagdish Gediya * allocation to a set of nodes that is closer the above selected
43832008027SJagdish Gediya * perferred node.
43932008027SJagdish Gediya */
44032008027SJagdish Gediya lower_tier = node_states[N_MEMORY];
44132008027SJagdish Gediya list_for_each_entry(memtier, &memory_tiers, list) {
44232008027SJagdish Gediya /*
44332008027SJagdish Gediya * Keep removing current tier from lower_tier nodes,
44432008027SJagdish Gediya * This will remove all nodes in current and above
44532008027SJagdish Gediya * memory tier from the lower_tier mask.
44632008027SJagdish Gediya */
44732008027SJagdish Gediya tier_nodes = get_memtier_nodemask(memtier);
44832008027SJagdish Gediya nodes_andnot(lower_tier, lower_tier, tier_nodes);
44932008027SJagdish Gediya memtier->lower_tier_mask = lower_tier;
45032008027SJagdish Gediya }
4516c542ab7SAneesh Kumar K.V }
4526c542ab7SAneesh Kumar K.V
4536c542ab7SAneesh Kumar K.V #else
establish_demotion_targets(void)4546c542ab7SAneesh Kumar K.V static inline void establish_demotion_targets(void) {}
4556c542ab7SAneesh Kumar K.V #endif /* CONFIG_MIGRATION */
4566c542ab7SAneesh Kumar K.V
__init_node_memory_type(int node,struct memory_dev_type * memtype)4577b88bda3SAneesh Kumar K.V static inline void __init_node_memory_type(int node, struct memory_dev_type *memtype)
4587b88bda3SAneesh Kumar K.V {
4597b88bda3SAneesh Kumar K.V if (!node_memory_types[node].memtype)
4607b88bda3SAneesh Kumar K.V node_memory_types[node].memtype = memtype;
4617b88bda3SAneesh Kumar K.V /*
4627b88bda3SAneesh Kumar K.V * for each device getting added in the same NUMA node
4637b88bda3SAneesh Kumar K.V * with this specific memtype, bump the map count. We
4647b88bda3SAneesh Kumar K.V * Only take memtype device reference once, so that
4657b88bda3SAneesh Kumar K.V * changing a node memtype can be done by droping the
4667b88bda3SAneesh Kumar K.V * only reference count taken here.
4677b88bda3SAneesh Kumar K.V */
4687b88bda3SAneesh Kumar K.V
4697b88bda3SAneesh Kumar K.V if (node_memory_types[node].memtype == memtype) {
4707b88bda3SAneesh Kumar K.V if (!node_memory_types[node].map_count++)
4717b88bda3SAneesh Kumar K.V kref_get(&memtype->kref);
4727b88bda3SAneesh Kumar K.V }
4737b88bda3SAneesh Kumar K.V }
4747b88bda3SAneesh Kumar K.V
set_node_memory_tier(int node)475992bf775SAneesh Kumar K.V static struct memory_tier *set_node_memory_tier(int node)
476992bf775SAneesh Kumar K.V {
477992bf775SAneesh Kumar K.V struct memory_tier *memtier;
478992bf775SAneesh Kumar K.V struct memory_dev_type *memtype;
4797766cf7aSAneesh Kumar K.V pg_data_t *pgdat = NODE_DATA(node);
4807766cf7aSAneesh Kumar K.V
481992bf775SAneesh Kumar K.V
482992bf775SAneesh Kumar K.V lockdep_assert_held_once(&memory_tier_lock);
483992bf775SAneesh Kumar K.V
484992bf775SAneesh Kumar K.V if (!node_state(node, N_MEMORY))
485992bf775SAneesh Kumar K.V return ERR_PTR(-EINVAL);
486992bf775SAneesh Kumar K.V
4877b88bda3SAneesh Kumar K.V __init_node_memory_type(node, default_dram_type);
488992bf775SAneesh Kumar K.V
4897b88bda3SAneesh Kumar K.V memtype = node_memory_types[node].memtype;
490992bf775SAneesh Kumar K.V node_set(node, memtype->nodes);
491992bf775SAneesh Kumar K.V memtier = find_create_memory_tier(memtype);
4927766cf7aSAneesh Kumar K.V if (!IS_ERR(memtier))
4937766cf7aSAneesh Kumar K.V rcu_assign_pointer(pgdat->memtier, memtier);
494992bf775SAneesh Kumar K.V return memtier;
495992bf775SAneesh Kumar K.V }
496992bf775SAneesh Kumar K.V
destroy_memory_tier(struct memory_tier * memtier)497c6123a19SAneesh Kumar K.V static void destroy_memory_tier(struct memory_tier *memtier)
498c6123a19SAneesh Kumar K.V {
499c6123a19SAneesh Kumar K.V list_del(&memtier->list);
5009832fb87SAneesh Kumar K.V device_unregister(&memtier->dev);
501c6123a19SAneesh Kumar K.V }
502c6123a19SAneesh Kumar K.V
clear_node_memory_tier(int node)503c6123a19SAneesh Kumar K.V static bool clear_node_memory_tier(int node)
504c6123a19SAneesh Kumar K.V {
505c6123a19SAneesh Kumar K.V bool cleared = false;
5067766cf7aSAneesh Kumar K.V pg_data_t *pgdat;
507c6123a19SAneesh Kumar K.V struct memory_tier *memtier;
508c6123a19SAneesh Kumar K.V
5097766cf7aSAneesh Kumar K.V pgdat = NODE_DATA(node);
5107766cf7aSAneesh Kumar K.V if (!pgdat)
5117766cf7aSAneesh Kumar K.V return false;
5127766cf7aSAneesh Kumar K.V
5137766cf7aSAneesh Kumar K.V /*
5147766cf7aSAneesh Kumar K.V * Make sure that anybody looking at NODE_DATA who finds
5157766cf7aSAneesh Kumar K.V * a valid memtier finds memory_dev_types with nodes still
5167766cf7aSAneesh Kumar K.V * linked to the memtier. We achieve this by waiting for
5177766cf7aSAneesh Kumar K.V * rcu read section to finish using synchronize_rcu.
5187766cf7aSAneesh Kumar K.V * This also enables us to free the destroyed memory tier
5197766cf7aSAneesh Kumar K.V * with kfree instead of kfree_rcu
5207766cf7aSAneesh Kumar K.V */
521c6123a19SAneesh Kumar K.V memtier = __node_get_memory_tier(node);
522c6123a19SAneesh Kumar K.V if (memtier) {
523c6123a19SAneesh Kumar K.V struct memory_dev_type *memtype;
524c6123a19SAneesh Kumar K.V
5257766cf7aSAneesh Kumar K.V rcu_assign_pointer(pgdat->memtier, NULL);
5267766cf7aSAneesh Kumar K.V synchronize_rcu();
5277b88bda3SAneesh Kumar K.V memtype = node_memory_types[node].memtype;
528c6123a19SAneesh Kumar K.V node_clear(node, memtype->nodes);
529c6123a19SAneesh Kumar K.V if (nodes_empty(memtype->nodes)) {
530c6123a19SAneesh Kumar K.V list_del_init(&memtype->tier_sibiling);
531c6123a19SAneesh Kumar K.V if (list_empty(&memtier->memory_types))
532c6123a19SAneesh Kumar K.V destroy_memory_tier(memtier);
533c6123a19SAneesh Kumar K.V }
534c6123a19SAneesh Kumar K.V cleared = true;
535c6123a19SAneesh Kumar K.V }
536c6123a19SAneesh Kumar K.V return cleared;
537c6123a19SAneesh Kumar K.V }
538c6123a19SAneesh Kumar K.V
release_memtype(struct kref * kref)5397b88bda3SAneesh Kumar K.V static void release_memtype(struct kref *kref)
5407b88bda3SAneesh Kumar K.V {
5417b88bda3SAneesh Kumar K.V struct memory_dev_type *memtype;
5427b88bda3SAneesh Kumar K.V
5437b88bda3SAneesh Kumar K.V memtype = container_of(kref, struct memory_dev_type, kref);
5447b88bda3SAneesh Kumar K.V kfree(memtype);
5457b88bda3SAneesh Kumar K.V }
5467b88bda3SAneesh Kumar K.V
alloc_memory_type(int adistance)5477b88bda3SAneesh Kumar K.V struct memory_dev_type *alloc_memory_type(int adistance)
5487b88bda3SAneesh Kumar K.V {
5497b88bda3SAneesh Kumar K.V struct memory_dev_type *memtype;
5507b88bda3SAneesh Kumar K.V
5517b88bda3SAneesh Kumar K.V memtype = kmalloc(sizeof(*memtype), GFP_KERNEL);
5527b88bda3SAneesh Kumar K.V if (!memtype)
5537b88bda3SAneesh Kumar K.V return ERR_PTR(-ENOMEM);
5547b88bda3SAneesh Kumar K.V
5557b88bda3SAneesh Kumar K.V memtype->adistance = adistance;
5567b88bda3SAneesh Kumar K.V INIT_LIST_HEAD(&memtype->tier_sibiling);
5577b88bda3SAneesh Kumar K.V memtype->nodes = NODE_MASK_NONE;
5587b88bda3SAneesh Kumar K.V kref_init(&memtype->kref);
5597b88bda3SAneesh Kumar K.V return memtype;
5607b88bda3SAneesh Kumar K.V }
5617b88bda3SAneesh Kumar K.V EXPORT_SYMBOL_GPL(alloc_memory_type);
5627b88bda3SAneesh Kumar K.V
put_memory_type(struct memory_dev_type * memtype)563bded67f8SMiaohe Lin void put_memory_type(struct memory_dev_type *memtype)
5647b88bda3SAneesh Kumar K.V {
5657b88bda3SAneesh Kumar K.V kref_put(&memtype->kref, release_memtype);
5667b88bda3SAneesh Kumar K.V }
567bded67f8SMiaohe Lin EXPORT_SYMBOL_GPL(put_memory_type);
5687b88bda3SAneesh Kumar K.V
init_node_memory_type(int node,struct memory_dev_type * memtype)5697b88bda3SAneesh Kumar K.V void init_node_memory_type(int node, struct memory_dev_type *memtype)
5707b88bda3SAneesh Kumar K.V {
5717b88bda3SAneesh Kumar K.V
5727b88bda3SAneesh Kumar K.V mutex_lock(&memory_tier_lock);
5737b88bda3SAneesh Kumar K.V __init_node_memory_type(node, memtype);
5747b88bda3SAneesh Kumar K.V mutex_unlock(&memory_tier_lock);
5757b88bda3SAneesh Kumar K.V }
5767b88bda3SAneesh Kumar K.V EXPORT_SYMBOL_GPL(init_node_memory_type);
5777b88bda3SAneesh Kumar K.V
clear_node_memory_type(int node,struct memory_dev_type * memtype)5787b88bda3SAneesh Kumar K.V void clear_node_memory_type(int node, struct memory_dev_type *memtype)
5797b88bda3SAneesh Kumar K.V {
5807b88bda3SAneesh Kumar K.V mutex_lock(&memory_tier_lock);
5817b88bda3SAneesh Kumar K.V if (node_memory_types[node].memtype == memtype)
5827b88bda3SAneesh Kumar K.V node_memory_types[node].map_count--;
5837b88bda3SAneesh Kumar K.V /*
5847b88bda3SAneesh Kumar K.V * If we umapped all the attached devices to this node,
5857b88bda3SAneesh Kumar K.V * clear the node memory type.
5867b88bda3SAneesh Kumar K.V */
5877b88bda3SAneesh Kumar K.V if (!node_memory_types[node].map_count) {
5887b88bda3SAneesh Kumar K.V node_memory_types[node].memtype = NULL;
589bded67f8SMiaohe Lin put_memory_type(memtype);
5907b88bda3SAneesh Kumar K.V }
5917b88bda3SAneesh Kumar K.V mutex_unlock(&memory_tier_lock);
5927b88bda3SAneesh Kumar K.V }
5937b88bda3SAneesh Kumar K.V EXPORT_SYMBOL_GPL(clear_node_memory_type);
5947b88bda3SAneesh Kumar K.V
memtier_hotplug_callback(struct notifier_block * self,unsigned long action,void * _arg)595c6123a19SAneesh Kumar K.V static int __meminit memtier_hotplug_callback(struct notifier_block *self,
596c6123a19SAneesh Kumar K.V unsigned long action, void *_arg)
597c6123a19SAneesh Kumar K.V {
5986c542ab7SAneesh Kumar K.V struct memory_tier *memtier;
599c6123a19SAneesh Kumar K.V struct memory_notify *arg = _arg;
600c6123a19SAneesh Kumar K.V
601c6123a19SAneesh Kumar K.V /*
602c6123a19SAneesh Kumar K.V * Only update the node migration order when a node is
603c6123a19SAneesh Kumar K.V * changing status, like online->offline.
604c6123a19SAneesh Kumar K.V */
605c6123a19SAneesh Kumar K.V if (arg->status_change_nid < 0)
606c6123a19SAneesh Kumar K.V return notifier_from_errno(0);
607c6123a19SAneesh Kumar K.V
608c6123a19SAneesh Kumar K.V switch (action) {
609c6123a19SAneesh Kumar K.V case MEM_OFFLINE:
610c6123a19SAneesh Kumar K.V mutex_lock(&memory_tier_lock);
6116c542ab7SAneesh Kumar K.V if (clear_node_memory_tier(arg->status_change_nid))
6126c542ab7SAneesh Kumar K.V establish_demotion_targets();
613c6123a19SAneesh Kumar K.V mutex_unlock(&memory_tier_lock);
614c6123a19SAneesh Kumar K.V break;
615c6123a19SAneesh Kumar K.V case MEM_ONLINE:
616c6123a19SAneesh Kumar K.V mutex_lock(&memory_tier_lock);
6176c542ab7SAneesh Kumar K.V memtier = set_node_memory_tier(arg->status_change_nid);
6186c542ab7SAneesh Kumar K.V if (!IS_ERR(memtier))
6196c542ab7SAneesh Kumar K.V establish_demotion_targets();
620c6123a19SAneesh Kumar K.V mutex_unlock(&memory_tier_lock);
621c6123a19SAneesh Kumar K.V break;
622c6123a19SAneesh Kumar K.V }
623c6123a19SAneesh Kumar K.V
624c6123a19SAneesh Kumar K.V return notifier_from_errno(0);
625c6123a19SAneesh Kumar K.V }
626c6123a19SAneesh Kumar K.V
memory_tier_init(void)627992bf775SAneesh Kumar K.V static int __init memory_tier_init(void)
628992bf775SAneesh Kumar K.V {
6299832fb87SAneesh Kumar K.V int ret, node;
630992bf775SAneesh Kumar K.V struct memory_tier *memtier;
631992bf775SAneesh Kumar K.V
6329832fb87SAneesh Kumar K.V ret = subsys_virtual_register(&memory_tier_subsys, NULL);
6339832fb87SAneesh Kumar K.V if (ret)
6349832fb87SAneesh Kumar K.V panic("%s() failed to register memory tier subsystem\n", __func__);
6359832fb87SAneesh Kumar K.V
6366c542ab7SAneesh Kumar K.V #ifdef CONFIG_MIGRATION
6376c542ab7SAneesh Kumar K.V node_demotion = kcalloc(nr_node_ids, sizeof(struct demotion_nodes),
6386c542ab7SAneesh Kumar K.V GFP_KERNEL);
6396c542ab7SAneesh Kumar K.V WARN_ON(!node_demotion);
6406c542ab7SAneesh Kumar K.V #endif
641992bf775SAneesh Kumar K.V mutex_lock(&memory_tier_lock);
642992bf775SAneesh Kumar K.V /*
6437b88bda3SAneesh Kumar K.V * For now we can have 4 faster memory tiers with smaller adistance
6447b88bda3SAneesh Kumar K.V * than default DRAM tier.
6457b88bda3SAneesh Kumar K.V */
6467b88bda3SAneesh Kumar K.V default_dram_type = alloc_memory_type(MEMTIER_ADISTANCE_DRAM);
6474a625ceeSMiaoqian Lin if (IS_ERR(default_dram_type))
6487b88bda3SAneesh Kumar K.V panic("%s() failed to allocate default DRAM tier\n", __func__);
6497b88bda3SAneesh Kumar K.V
6507b88bda3SAneesh Kumar K.V /*
651992bf775SAneesh Kumar K.V * Look at all the existing N_MEMORY nodes and add them to
652992bf775SAneesh Kumar K.V * default memory tier or to a tier if we already have memory
653992bf775SAneesh Kumar K.V * types assigned.
654992bf775SAneesh Kumar K.V */
655992bf775SAneesh Kumar K.V for_each_node_state(node, N_MEMORY) {
656992bf775SAneesh Kumar K.V memtier = set_node_memory_tier(node);
657992bf775SAneesh Kumar K.V if (IS_ERR(memtier))
658992bf775SAneesh Kumar K.V /*
659992bf775SAneesh Kumar K.V * Continue with memtiers we are able to setup
660992bf775SAneesh Kumar K.V */
661992bf775SAneesh Kumar K.V break;
662992bf775SAneesh Kumar K.V }
6636c542ab7SAneesh Kumar K.V establish_demotion_targets();
664992bf775SAneesh Kumar K.V mutex_unlock(&memory_tier_lock);
665992bf775SAneesh Kumar K.V
6661eeaa4fdSLiu Shixin hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI);
667992bf775SAneesh Kumar K.V return 0;
668992bf775SAneesh Kumar K.V }
669992bf775SAneesh Kumar K.V subsys_initcall(memory_tier_init);
67091952440SAneesh Kumar K.V
67191952440SAneesh Kumar K.V bool numa_demotion_enabled = false;
67291952440SAneesh Kumar K.V
67391952440SAneesh Kumar K.V #ifdef CONFIG_MIGRATION
67491952440SAneesh Kumar K.V #ifdef CONFIG_SYSFS
demotion_enabled_show(struct kobject * kobj,struct kobj_attribute * attr,char * buf)675*8d3a7d79SMiaohe Lin static ssize_t demotion_enabled_show(struct kobject *kobj,
67691952440SAneesh Kumar K.V struct kobj_attribute *attr, char *buf)
67791952440SAneesh Kumar K.V {
67891952440SAneesh Kumar K.V return sysfs_emit(buf, "%s\n",
67991952440SAneesh Kumar K.V numa_demotion_enabled ? "true" : "false");
68091952440SAneesh Kumar K.V }
68191952440SAneesh Kumar K.V
demotion_enabled_store(struct kobject * kobj,struct kobj_attribute * attr,const char * buf,size_t count)682*8d3a7d79SMiaohe Lin static ssize_t demotion_enabled_store(struct kobject *kobj,
68391952440SAneesh Kumar K.V struct kobj_attribute *attr,
68491952440SAneesh Kumar K.V const char *buf, size_t count)
68591952440SAneesh Kumar K.V {
68691952440SAneesh Kumar K.V ssize_t ret;
68791952440SAneesh Kumar K.V
68891952440SAneesh Kumar K.V ret = kstrtobool(buf, &numa_demotion_enabled);
68991952440SAneesh Kumar K.V if (ret)
69091952440SAneesh Kumar K.V return ret;
69191952440SAneesh Kumar K.V
69291952440SAneesh Kumar K.V return count;
69391952440SAneesh Kumar K.V }
69491952440SAneesh Kumar K.V
69591952440SAneesh Kumar K.V static struct kobj_attribute numa_demotion_enabled_attr =
696*8d3a7d79SMiaohe Lin __ATTR_RW(demotion_enabled);
69791952440SAneesh Kumar K.V
69891952440SAneesh Kumar K.V static struct attribute *numa_attrs[] = {
69991952440SAneesh Kumar K.V &numa_demotion_enabled_attr.attr,
70091952440SAneesh Kumar K.V NULL,
70191952440SAneesh Kumar K.V };
70291952440SAneesh Kumar K.V
70391952440SAneesh Kumar K.V static const struct attribute_group numa_attr_group = {
70491952440SAneesh Kumar K.V .attrs = numa_attrs,
70591952440SAneesh Kumar K.V };
70691952440SAneesh Kumar K.V
numa_init_sysfs(void)70791952440SAneesh Kumar K.V static int __init numa_init_sysfs(void)
70891952440SAneesh Kumar K.V {
70991952440SAneesh Kumar K.V int err;
71091952440SAneesh Kumar K.V struct kobject *numa_kobj;
71191952440SAneesh Kumar K.V
71291952440SAneesh Kumar K.V numa_kobj = kobject_create_and_add("numa", mm_kobj);
71391952440SAneesh Kumar K.V if (!numa_kobj) {
71491952440SAneesh Kumar K.V pr_err("failed to create numa kobject\n");
71591952440SAneesh Kumar K.V return -ENOMEM;
71691952440SAneesh Kumar K.V }
71791952440SAneesh Kumar K.V err = sysfs_create_group(numa_kobj, &numa_attr_group);
71891952440SAneesh Kumar K.V if (err) {
71991952440SAneesh Kumar K.V pr_err("failed to register numa group\n");
72091952440SAneesh Kumar K.V goto delete_obj;
72191952440SAneesh Kumar K.V }
72291952440SAneesh Kumar K.V return 0;
72391952440SAneesh Kumar K.V
72491952440SAneesh Kumar K.V delete_obj:
72591952440SAneesh Kumar K.V kobject_put(numa_kobj);
72691952440SAneesh Kumar K.V return err;
72791952440SAneesh Kumar K.V }
72891952440SAneesh Kumar K.V subsys_initcall(numa_init_sysfs);
72991952440SAneesh Kumar K.V #endif /* CONFIG_SYSFS */
73091952440SAneesh Kumar K.V #endif
731