xref: /openbmc/linux/drivers/base/node.c (revision 628d06a4)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
310fbcf4cSKay Sievers  * Basic Node interface support
41da177e4SLinus Torvalds  */
51da177e4SLinus Torvalds 
61da177e4SLinus Torvalds #include <linux/module.h>
71da177e4SLinus Torvalds #include <linux/init.h>
81da177e4SLinus Torvalds #include <linux/mm.h>
9c04fc586SGary Hade #include <linux/memory.h>
10fa25c503SKOSAKI Motohiro #include <linux/vmstat.h>
116e259e7dSAndrew Morton #include <linux/notifier.h>
121da177e4SLinus Torvalds #include <linux/node.h>
131da177e4SLinus Torvalds #include <linux/hugetlb.h>
14ed4a6d7fSMel Gorman #include <linux/compaction.h>
151da177e4SLinus Torvalds #include <linux/cpumask.h>
161da177e4SLinus Torvalds #include <linux/topology.h>
171da177e4SLinus Torvalds #include <linux/nodemask.h>
1876b67ed9SKAMEZAWA Hiroyuki #include <linux/cpu.h>
19bde631a5SLee Schermerhorn #include <linux/device.h>
2008d9dbe7SKeith Busch #include <linux/pm_runtime.h>
21af936a16SLee Schermerhorn #include <linux/swap.h>
2218e5b539STejun Heo #include <linux/slab.h>
231da177e4SLinus Torvalds 
2410fbcf4cSKay Sievers static struct bus_type node_subsys = {
25af5ca3f4SKay Sievers 	.name = "node",
2610fbcf4cSKay Sievers 	.dev_name = "node",
271da177e4SLinus Torvalds };
281da177e4SLinus Torvalds 
291da177e4SLinus Torvalds 
305aaba363SSudeep Holla static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
311da177e4SLinus Torvalds {
32064f0e93SZhen Lei 	ssize_t n;
33064f0e93SZhen Lei 	cpumask_var_t mask;
341da177e4SLinus Torvalds 	struct node *node_dev = to_node(dev);
351da177e4SLinus Torvalds 
3639106dcfSMike Travis 	/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
3739106dcfSMike Travis 	BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
381da177e4SLinus Torvalds 
39064f0e93SZhen Lei 	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
40064f0e93SZhen Lei 		return 0;
41064f0e93SZhen Lei 
42064f0e93SZhen Lei 	cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
43064f0e93SZhen Lei 	n = cpumap_print_to_pagebuf(list, buf, mask);
44064f0e93SZhen Lei 	free_cpumask_var(mask);
45064f0e93SZhen Lei 
46064f0e93SZhen Lei 	return n;
471da177e4SLinus Torvalds }
481da177e4SLinus Torvalds 
4910fbcf4cSKay Sievers static inline ssize_t node_read_cpumask(struct device *dev,
5010fbcf4cSKay Sievers 				struct device_attribute *attr, char *buf)
5139106dcfSMike Travis {
525aaba363SSudeep Holla 	return node_read_cpumap(dev, false, buf);
5339106dcfSMike Travis }
5410fbcf4cSKay Sievers static inline ssize_t node_read_cpulist(struct device *dev,
5510fbcf4cSKay Sievers 				struct device_attribute *attr, char *buf)
5639106dcfSMike Travis {
575aaba363SSudeep Holla 	return node_read_cpumap(dev, true, buf);
5839106dcfSMike Travis }
5939106dcfSMike Travis 
6010fbcf4cSKay Sievers static DEVICE_ATTR(cpumap,  S_IRUGO, node_read_cpumask, NULL);
6110fbcf4cSKay Sievers static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
621da177e4SLinus Torvalds 
6308d9dbe7SKeith Busch /**
6408d9dbe7SKeith Busch  * struct node_access_nodes - Access class device to hold user visible
6508d9dbe7SKeith Busch  * 			      relationships to other nodes.
6608d9dbe7SKeith Busch  * @dev:	Device for this memory access class
6708d9dbe7SKeith Busch  * @list_node:	List element in the node's access list
6808d9dbe7SKeith Busch  * @access:	The access class rank
6958cb346cSMauro Carvalho Chehab  * @hmem_attrs: Heterogeneous memory performance attributes
7008d9dbe7SKeith Busch  */
7108d9dbe7SKeith Busch struct node_access_nodes {
7208d9dbe7SKeith Busch 	struct device		dev;
7308d9dbe7SKeith Busch 	struct list_head	list_node;
7408d9dbe7SKeith Busch 	unsigned		access;
75e1cf33aaSKeith Busch #ifdef CONFIG_HMEM_REPORTING
76e1cf33aaSKeith Busch 	struct node_hmem_attrs	hmem_attrs;
77e1cf33aaSKeith Busch #endif
7808d9dbe7SKeith Busch };
7908d9dbe7SKeith Busch #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev)
8008d9dbe7SKeith Busch 
8108d9dbe7SKeith Busch static struct attribute *node_init_access_node_attrs[] = {
8208d9dbe7SKeith Busch 	NULL,
8308d9dbe7SKeith Busch };
8408d9dbe7SKeith Busch 
8508d9dbe7SKeith Busch static struct attribute *node_targ_access_node_attrs[] = {
8608d9dbe7SKeith Busch 	NULL,
8708d9dbe7SKeith Busch };
8808d9dbe7SKeith Busch 
8908d9dbe7SKeith Busch static const struct attribute_group initiators = {
9008d9dbe7SKeith Busch 	.name	= "initiators",
9108d9dbe7SKeith Busch 	.attrs	= node_init_access_node_attrs,
9208d9dbe7SKeith Busch };
9308d9dbe7SKeith Busch 
9408d9dbe7SKeith Busch static const struct attribute_group targets = {
9508d9dbe7SKeith Busch 	.name	= "targets",
9608d9dbe7SKeith Busch 	.attrs	= node_targ_access_node_attrs,
9708d9dbe7SKeith Busch };
9808d9dbe7SKeith Busch 
9908d9dbe7SKeith Busch static const struct attribute_group *node_access_node_groups[] = {
10008d9dbe7SKeith Busch 	&initiators,
10108d9dbe7SKeith Busch 	&targets,
10208d9dbe7SKeith Busch 	NULL,
10308d9dbe7SKeith Busch };
10408d9dbe7SKeith Busch 
10508d9dbe7SKeith Busch static void node_remove_accesses(struct node *node)
10608d9dbe7SKeith Busch {
10708d9dbe7SKeith Busch 	struct node_access_nodes *c, *cnext;
10808d9dbe7SKeith Busch 
10908d9dbe7SKeith Busch 	list_for_each_entry_safe(c, cnext, &node->access_list, list_node) {
11008d9dbe7SKeith Busch 		list_del(&c->list_node);
11108d9dbe7SKeith Busch 		device_unregister(&c->dev);
11208d9dbe7SKeith Busch 	}
11308d9dbe7SKeith Busch }
11408d9dbe7SKeith Busch 
11508d9dbe7SKeith Busch static void node_access_release(struct device *dev)
11608d9dbe7SKeith Busch {
11708d9dbe7SKeith Busch 	kfree(to_access_nodes(dev));
11808d9dbe7SKeith Busch }
11908d9dbe7SKeith Busch 
12008d9dbe7SKeith Busch static struct node_access_nodes *node_init_node_access(struct node *node,
12108d9dbe7SKeith Busch 						       unsigned access)
12208d9dbe7SKeith Busch {
12308d9dbe7SKeith Busch 	struct node_access_nodes *access_node;
12408d9dbe7SKeith Busch 	struct device *dev;
12508d9dbe7SKeith Busch 
12608d9dbe7SKeith Busch 	list_for_each_entry(access_node, &node->access_list, list_node)
12708d9dbe7SKeith Busch 		if (access_node->access == access)
12808d9dbe7SKeith Busch 			return access_node;
12908d9dbe7SKeith Busch 
13008d9dbe7SKeith Busch 	access_node = kzalloc(sizeof(*access_node), GFP_KERNEL);
13108d9dbe7SKeith Busch 	if (!access_node)
13208d9dbe7SKeith Busch 		return NULL;
13308d9dbe7SKeith Busch 
13408d9dbe7SKeith Busch 	access_node->access = access;
13508d9dbe7SKeith Busch 	dev = &access_node->dev;
13608d9dbe7SKeith Busch 	dev->parent = &node->dev;
13708d9dbe7SKeith Busch 	dev->release = node_access_release;
13808d9dbe7SKeith Busch 	dev->groups = node_access_node_groups;
13908d9dbe7SKeith Busch 	if (dev_set_name(dev, "access%u", access))
14008d9dbe7SKeith Busch 		goto free;
14108d9dbe7SKeith Busch 
14208d9dbe7SKeith Busch 	if (device_register(dev))
14308d9dbe7SKeith Busch 		goto free_name;
14408d9dbe7SKeith Busch 
14508d9dbe7SKeith Busch 	pm_runtime_no_callbacks(dev);
14608d9dbe7SKeith Busch 	list_add_tail(&access_node->list_node, &node->access_list);
14708d9dbe7SKeith Busch 	return access_node;
14808d9dbe7SKeith Busch free_name:
14908d9dbe7SKeith Busch 	kfree_const(dev->kobj.name);
15008d9dbe7SKeith Busch free:
15108d9dbe7SKeith Busch 	kfree(access_node);
15208d9dbe7SKeith Busch 	return NULL;
15308d9dbe7SKeith Busch }
15408d9dbe7SKeith Busch 
155e1cf33aaSKeith Busch #ifdef CONFIG_HMEM_REPORTING
156e1cf33aaSKeith Busch #define ACCESS_ATTR(name) 						   \
157e1cf33aaSKeith Busch static ssize_t name##_show(struct device *dev,				   \
158e1cf33aaSKeith Busch 			   struct device_attribute *attr,		   \
159e1cf33aaSKeith Busch 			   char *buf)					   \
160e1cf33aaSKeith Busch {									   \
161e1cf33aaSKeith Busch 	return sprintf(buf, "%u\n", to_access_nodes(dev)->hmem_attrs.name); \
162e1cf33aaSKeith Busch }									   \
163e1cf33aaSKeith Busch static DEVICE_ATTR_RO(name);
164e1cf33aaSKeith Busch 
165e1cf33aaSKeith Busch ACCESS_ATTR(read_bandwidth)
166e1cf33aaSKeith Busch ACCESS_ATTR(read_latency)
167e1cf33aaSKeith Busch ACCESS_ATTR(write_bandwidth)
168e1cf33aaSKeith Busch ACCESS_ATTR(write_latency)
169e1cf33aaSKeith Busch 
170e1cf33aaSKeith Busch static struct attribute *access_attrs[] = {
171e1cf33aaSKeith Busch 	&dev_attr_read_bandwidth.attr,
172e1cf33aaSKeith Busch 	&dev_attr_read_latency.attr,
173e1cf33aaSKeith Busch 	&dev_attr_write_bandwidth.attr,
174e1cf33aaSKeith Busch 	&dev_attr_write_latency.attr,
175e1cf33aaSKeith Busch 	NULL,
176e1cf33aaSKeith Busch };
177e1cf33aaSKeith Busch 
178e1cf33aaSKeith Busch /**
179e1cf33aaSKeith Busch  * node_set_perf_attrs - Set the performance values for given access class
180e1cf33aaSKeith Busch  * @nid: Node identifier to be set
181e1cf33aaSKeith Busch  * @hmem_attrs: Heterogeneous memory performance attributes
182e1cf33aaSKeith Busch  * @access: The access class the for the given attributes
183e1cf33aaSKeith Busch  */
184e1cf33aaSKeith Busch void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
185e1cf33aaSKeith Busch 			 unsigned access)
186e1cf33aaSKeith Busch {
187e1cf33aaSKeith Busch 	struct node_access_nodes *c;
188e1cf33aaSKeith Busch 	struct node *node;
189e1cf33aaSKeith Busch 	int i;
190e1cf33aaSKeith Busch 
191e1cf33aaSKeith Busch 	if (WARN_ON_ONCE(!node_online(nid)))
192e1cf33aaSKeith Busch 		return;
193e1cf33aaSKeith Busch 
194e1cf33aaSKeith Busch 	node = node_devices[nid];
195e1cf33aaSKeith Busch 	c = node_init_node_access(node, access);
196e1cf33aaSKeith Busch 	if (!c)
197e1cf33aaSKeith Busch 		return;
198e1cf33aaSKeith Busch 
199e1cf33aaSKeith Busch 	c->hmem_attrs = *hmem_attrs;
200e1cf33aaSKeith Busch 	for (i = 0; access_attrs[i] != NULL; i++) {
201e1cf33aaSKeith Busch 		if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i],
202e1cf33aaSKeith Busch 					    "initiators")) {
203e1cf33aaSKeith Busch 			pr_info("failed to add performance attribute to node %d\n",
204e1cf33aaSKeith Busch 				nid);
205e1cf33aaSKeith Busch 			break;
206e1cf33aaSKeith Busch 		}
207e1cf33aaSKeith Busch 	}
208e1cf33aaSKeith Busch }
209acc02a10SKeith Busch 
210acc02a10SKeith Busch /**
211acc02a10SKeith Busch  * struct node_cache_info - Internal tracking for memory node caches
212acc02a10SKeith Busch  * @dev:	Device represeting the cache level
213acc02a10SKeith Busch  * @node:	List element for tracking in the node
214acc02a10SKeith Busch  * @cache_attrs:Attributes for this cache level
215acc02a10SKeith Busch  */
216acc02a10SKeith Busch struct node_cache_info {
217acc02a10SKeith Busch 	struct device dev;
218acc02a10SKeith Busch 	struct list_head node;
219acc02a10SKeith Busch 	struct node_cache_attrs cache_attrs;
220acc02a10SKeith Busch };
221acc02a10SKeith Busch #define to_cache_info(device) container_of(device, struct node_cache_info, dev)
222acc02a10SKeith Busch 
223acc02a10SKeith Busch #define CACHE_ATTR(name, fmt) 						\
224acc02a10SKeith Busch static ssize_t name##_show(struct device *dev,				\
225acc02a10SKeith Busch 			   struct device_attribute *attr,		\
226acc02a10SKeith Busch 			   char *buf)					\
227acc02a10SKeith Busch {									\
228acc02a10SKeith Busch 	return sprintf(buf, fmt "\n", to_cache_info(dev)->cache_attrs.name);\
229acc02a10SKeith Busch }									\
230acc02a10SKeith Busch DEVICE_ATTR_RO(name);
231acc02a10SKeith Busch 
232acc02a10SKeith Busch CACHE_ATTR(size, "%llu")
233acc02a10SKeith Busch CACHE_ATTR(line_size, "%u")
234acc02a10SKeith Busch CACHE_ATTR(indexing, "%u")
235acc02a10SKeith Busch CACHE_ATTR(write_policy, "%u")
236acc02a10SKeith Busch 
237acc02a10SKeith Busch static struct attribute *cache_attrs[] = {
238acc02a10SKeith Busch 	&dev_attr_indexing.attr,
239acc02a10SKeith Busch 	&dev_attr_size.attr,
240acc02a10SKeith Busch 	&dev_attr_line_size.attr,
241acc02a10SKeith Busch 	&dev_attr_write_policy.attr,
242acc02a10SKeith Busch 	NULL,
243acc02a10SKeith Busch };
244acc02a10SKeith Busch ATTRIBUTE_GROUPS(cache);
245acc02a10SKeith Busch 
246acc02a10SKeith Busch static void node_cache_release(struct device *dev)
247acc02a10SKeith Busch {
248acc02a10SKeith Busch 	kfree(dev);
249acc02a10SKeith Busch }
250acc02a10SKeith Busch 
251acc02a10SKeith Busch static void node_cacheinfo_release(struct device *dev)
252acc02a10SKeith Busch {
253acc02a10SKeith Busch 	struct node_cache_info *info = to_cache_info(dev);
254acc02a10SKeith Busch 	kfree(info);
255acc02a10SKeith Busch }
256acc02a10SKeith Busch 
257acc02a10SKeith Busch static void node_init_cache_dev(struct node *node)
258acc02a10SKeith Busch {
259acc02a10SKeith Busch 	struct device *dev;
260acc02a10SKeith Busch 
261acc02a10SKeith Busch 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
262acc02a10SKeith Busch 	if (!dev)
263acc02a10SKeith Busch 		return;
264acc02a10SKeith Busch 
265acc02a10SKeith Busch 	dev->parent = &node->dev;
266acc02a10SKeith Busch 	dev->release = node_cache_release;
267acc02a10SKeith Busch 	if (dev_set_name(dev, "memory_side_cache"))
268acc02a10SKeith Busch 		goto free_dev;
269acc02a10SKeith Busch 
270acc02a10SKeith Busch 	if (device_register(dev))
271acc02a10SKeith Busch 		goto free_name;
272acc02a10SKeith Busch 
273acc02a10SKeith Busch 	pm_runtime_no_callbacks(dev);
274acc02a10SKeith Busch 	node->cache_dev = dev;
275acc02a10SKeith Busch 	return;
276acc02a10SKeith Busch free_name:
277acc02a10SKeith Busch 	kfree_const(dev->kobj.name);
278acc02a10SKeith Busch free_dev:
279acc02a10SKeith Busch 	kfree(dev);
280acc02a10SKeith Busch }
281acc02a10SKeith Busch 
282acc02a10SKeith Busch /**
283acc02a10SKeith Busch  * node_add_cache() - add cache attribute to a memory node
284acc02a10SKeith Busch  * @nid: Node identifier that has new cache attributes
285acc02a10SKeith Busch  * @cache_attrs: Attributes for the cache being added
286acc02a10SKeith Busch  */
287acc02a10SKeith Busch void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs)
288acc02a10SKeith Busch {
289acc02a10SKeith Busch 	struct node_cache_info *info;
290acc02a10SKeith Busch 	struct device *dev;
291acc02a10SKeith Busch 	struct node *node;
292acc02a10SKeith Busch 
293acc02a10SKeith Busch 	if (!node_online(nid) || !node_devices[nid])
294acc02a10SKeith Busch 		return;
295acc02a10SKeith Busch 
296acc02a10SKeith Busch 	node = node_devices[nid];
297acc02a10SKeith Busch 	list_for_each_entry(info, &node->cache_attrs, node) {
298acc02a10SKeith Busch 		if (info->cache_attrs.level == cache_attrs->level) {
299acc02a10SKeith Busch 			dev_warn(&node->dev,
300acc02a10SKeith Busch 				"attempt to add duplicate cache level:%d\n",
301acc02a10SKeith Busch 				cache_attrs->level);
302acc02a10SKeith Busch 			return;
303acc02a10SKeith Busch 		}
304acc02a10SKeith Busch 	}
305acc02a10SKeith Busch 
306acc02a10SKeith Busch 	if (!node->cache_dev)
307acc02a10SKeith Busch 		node_init_cache_dev(node);
308acc02a10SKeith Busch 	if (!node->cache_dev)
309acc02a10SKeith Busch 		return;
310acc02a10SKeith Busch 
311acc02a10SKeith Busch 	info = kzalloc(sizeof(*info), GFP_KERNEL);
312acc02a10SKeith Busch 	if (!info)
313acc02a10SKeith Busch 		return;
314acc02a10SKeith Busch 
315acc02a10SKeith Busch 	dev = &info->dev;
316acc02a10SKeith Busch 	dev->parent = node->cache_dev;
317acc02a10SKeith Busch 	dev->release = node_cacheinfo_release;
318acc02a10SKeith Busch 	dev->groups = cache_groups;
319acc02a10SKeith Busch 	if (dev_set_name(dev, "index%d", cache_attrs->level))
320acc02a10SKeith Busch 		goto free_cache;
321acc02a10SKeith Busch 
322acc02a10SKeith Busch 	info->cache_attrs = *cache_attrs;
323acc02a10SKeith Busch 	if (device_register(dev)) {
324acc02a10SKeith Busch 		dev_warn(&node->dev, "failed to add cache level:%d\n",
325acc02a10SKeith Busch 			 cache_attrs->level);
326acc02a10SKeith Busch 		goto free_name;
327acc02a10SKeith Busch 	}
328acc02a10SKeith Busch 	pm_runtime_no_callbacks(dev);
329acc02a10SKeith Busch 	list_add_tail(&info->node, &node->cache_attrs);
330acc02a10SKeith Busch 	return;
331acc02a10SKeith Busch free_name:
332acc02a10SKeith Busch 	kfree_const(dev->kobj.name);
333acc02a10SKeith Busch free_cache:
334acc02a10SKeith Busch 	kfree(info);
335acc02a10SKeith Busch }
336acc02a10SKeith Busch 
337acc02a10SKeith Busch static void node_remove_caches(struct node *node)
338acc02a10SKeith Busch {
339acc02a10SKeith Busch 	struct node_cache_info *info, *next;
340acc02a10SKeith Busch 
341acc02a10SKeith Busch 	if (!node->cache_dev)
342acc02a10SKeith Busch 		return;
343acc02a10SKeith Busch 
344acc02a10SKeith Busch 	list_for_each_entry_safe(info, next, &node->cache_attrs, node) {
345acc02a10SKeith Busch 		list_del(&info->node);
346acc02a10SKeith Busch 		device_unregister(&info->dev);
347acc02a10SKeith Busch 	}
348acc02a10SKeith Busch 	device_unregister(node->cache_dev);
349acc02a10SKeith Busch }
350acc02a10SKeith Busch 
351acc02a10SKeith Busch static void node_init_caches(unsigned int nid)
352acc02a10SKeith Busch {
353acc02a10SKeith Busch 	INIT_LIST_HEAD(&node_devices[nid]->cache_attrs);
354acc02a10SKeith Busch }
355acc02a10SKeith Busch #else
356acc02a10SKeith Busch static void node_init_caches(unsigned int nid) { }
357acc02a10SKeith Busch static void node_remove_caches(struct node *node) { }
358e1cf33aaSKeith Busch #endif
359e1cf33aaSKeith Busch 
3601da177e4SLinus Torvalds #define K(x) ((x) << (PAGE_SHIFT - 10))
36110fbcf4cSKay Sievers static ssize_t node_read_meminfo(struct device *dev,
36210fbcf4cSKay Sievers 			struct device_attribute *attr, char *buf)
3631da177e4SLinus Torvalds {
3641da177e4SLinus Torvalds 	int n;
3651da177e4SLinus Torvalds 	int nid = dev->id;
366599d0c95SMel Gorman 	struct pglist_data *pgdat = NODE_DATA(nid);
3671da177e4SLinus Torvalds 	struct sysinfo i;
36861f94e18SVlastimil Babka 	unsigned long sreclaimable, sunreclaimable;
3691da177e4SLinus Torvalds 
3701da177e4SLinus Torvalds 	si_meminfo_node(&i, nid);
37161f94e18SVlastimil Babka 	sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE);
37261f94e18SVlastimil Babka 	sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE);
3737ee92255SKOSAKI Motohiro 	n = sprintf(buf,
3741da177e4SLinus Torvalds 		       "Node %d MemTotal:       %8lu kB\n"
3751da177e4SLinus Torvalds 		       "Node %d MemFree:        %8lu kB\n"
3761da177e4SLinus Torvalds 		       "Node %d MemUsed:        %8lu kB\n"
3771da177e4SLinus Torvalds 		       "Node %d Active:         %8lu kB\n"
3781da177e4SLinus Torvalds 		       "Node %d Inactive:       %8lu kB\n"
3794f98a2feSRik van Riel 		       "Node %d Active(anon):   %8lu kB\n"
3804f98a2feSRik van Riel 		       "Node %d Inactive(anon): %8lu kB\n"
3814f98a2feSRik van Riel 		       "Node %d Active(file):   %8lu kB\n"
3824f98a2feSRik van Riel 		       "Node %d Inactive(file): %8lu kB\n"
3835344b7e6SNick Piggin 		       "Node %d Unevictable:    %8lu kB\n"
3847ee92255SKOSAKI Motohiro 		       "Node %d Mlocked:        %8lu kB\n",
3857ee92255SKOSAKI Motohiro 		       nid, K(i.totalram),
3867ee92255SKOSAKI Motohiro 		       nid, K(i.freeram),
3877ee92255SKOSAKI Motohiro 		       nid, K(i.totalram - i.freeram),
388599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
389599d0c95SMel Gorman 				node_page_state(pgdat, NR_ACTIVE_FILE)),
390599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
391599d0c95SMel Gorman 				node_page_state(pgdat, NR_INACTIVE_FILE)),
392599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)),
393599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)),
394599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)),
395599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)),
396599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_UNEVICTABLE)),
39775ef7184SMel Gorman 		       nid, K(sum_zone_node_page_state(nid, NR_MLOCK)));
3987ee92255SKOSAKI Motohiro 
399182e8e23SChristoph Lameter #ifdef CONFIG_HIGHMEM
4007ee92255SKOSAKI Motohiro 	n += sprintf(buf + n,
4011da177e4SLinus Torvalds 		       "Node %d HighTotal:      %8lu kB\n"
4021da177e4SLinus Torvalds 		       "Node %d HighFree:       %8lu kB\n"
4031da177e4SLinus Torvalds 		       "Node %d LowTotal:       %8lu kB\n"
4047ee92255SKOSAKI Motohiro 		       "Node %d LowFree:        %8lu kB\n",
4057ee92255SKOSAKI Motohiro 		       nid, K(i.totalhigh),
4067ee92255SKOSAKI Motohiro 		       nid, K(i.freehigh),
4077ee92255SKOSAKI Motohiro 		       nid, K(i.totalram - i.totalhigh),
4087ee92255SKOSAKI Motohiro 		       nid, K(i.freeram - i.freehigh));
409182e8e23SChristoph Lameter #endif
4107ee92255SKOSAKI Motohiro 	n += sprintf(buf + n,
411c07e02dbSMartin Hicks 		       "Node %d Dirty:          %8lu kB\n"
412c07e02dbSMartin Hicks 		       "Node %d Writeback:      %8lu kB\n"
413347ce434SChristoph Lameter 		       "Node %d FilePages:      %8lu kB\n"
414c07e02dbSMartin Hicks 		       "Node %d Mapped:         %8lu kB\n"
415f3dbd344SChristoph Lameter 		       "Node %d AnonPages:      %8lu kB\n"
4164b02108aSKOSAKI Motohiro 		       "Node %d Shmem:          %8lu kB\n"
417c6a7f572SKOSAKI Motohiro 		       "Node %d KernelStack:    %8lu kB\n"
418628d06a4SSami Tolvanen #ifdef CONFIG_SHADOW_CALL_STACK
419628d06a4SSami Tolvanen 		       "Node %d ShadowCallStack:%8lu kB\n"
420628d06a4SSami Tolvanen #endif
421df849a15SChristoph Lameter 		       "Node %d PageTables:     %8lu kB\n"
422f5ef68daSAndrew Morton 		       "Node %d NFS_Unstable:   %8lu kB\n"
423d2c5e30cSChristoph Lameter 		       "Node %d Bounce:         %8lu kB\n"
424fc3ba692SMiklos Szeredi 		       "Node %d WritebackTmp:   %8lu kB\n"
42561f94e18SVlastimil Babka 		       "Node %d KReclaimable:   %8lu kB\n"
426972d1a7bSChristoph Lameter 		       "Node %d Slab:           %8lu kB\n"
427972d1a7bSChristoph Lameter 		       "Node %d SReclaimable:   %8lu kB\n"
42805b258e9SDavid Rientjes 		       "Node %d SUnreclaim:     %8lu kB\n"
42905b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE
43005b258e9SDavid Rientjes 		       "Node %d AnonHugePages:  %8lu kB\n"
43165c45377SKirill A. Shutemov 		       "Node %d ShmemHugePages: %8lu kB\n"
43265c45377SKirill A. Shutemov 		       "Node %d ShmemPmdMapped: %8lu kB\n"
43360fbf0abSSong Liu 		       "Node %d FileHugePages: %8lu kB\n"
43460fbf0abSSong Liu 		       "Node %d FilePmdMapped: %8lu kB\n"
43505b258e9SDavid Rientjes #endif
43605b258e9SDavid Rientjes 			,
43711fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
43811fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_WRITEBACK)),
43911fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_FILE_PAGES)),
44050658e2eSMel Gorman 		       nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
4414b9d0fabSMel Gorman 		       nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
442cc7452b6SRafael Aquini 		       nid, K(i.sharedram),
443d30dd8beSAndy Lutomirski 		       nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
444628d06a4SSami Tolvanen #ifdef CONFIG_SHADOW_CALL_STACK
445628d06a4SSami Tolvanen 		       nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
446628d06a4SSami Tolvanen #endif
44775ef7184SMel Gorman 		       nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
44811fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
44975ef7184SMel Gorman 		       nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
45011fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
45161f94e18SVlastimil Babka 		       nid, K(sreclaimable +
45261f94e18SVlastimil Babka 			      node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
45361f94e18SVlastimil Babka 		       nid, K(sreclaimable + sunreclaimable),
45461f94e18SVlastimil Babka 		       nid, K(sreclaimable),
45561f94e18SVlastimil Babka 		       nid, K(sunreclaimable)
45605b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE
45761f94e18SVlastimil Babka 		       ,
45811fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_ANON_THPS) *
45965c45377SKirill A. Shutemov 				       HPAGE_PMD_NR),
46011fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
46165c45377SKirill A. Shutemov 				       HPAGE_PMD_NR),
46211fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
46360fbf0abSSong Liu 				       HPAGE_PMD_NR),
46460fbf0abSSong Liu 		       nid, K(node_page_state(pgdat, NR_FILE_THPS) *
46560fbf0abSSong Liu 				       HPAGE_PMD_NR),
46660fbf0abSSong Liu 		       nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED) *
46761f94e18SVlastimil Babka 				       HPAGE_PMD_NR)
46805b258e9SDavid Rientjes #endif
46961f94e18SVlastimil Babka 		       );
4701da177e4SLinus Torvalds 	n += hugetlb_report_node_meminfo(nid, buf + n);
4711da177e4SLinus Torvalds 	return n;
4721da177e4SLinus Torvalds }
4731da177e4SLinus Torvalds 
4741da177e4SLinus Torvalds #undef K
47510fbcf4cSKay Sievers static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
4761da177e4SLinus Torvalds 
47710fbcf4cSKay Sievers static ssize_t node_read_numastat(struct device *dev,
47810fbcf4cSKay Sievers 				struct device_attribute *attr, char *buf)
4791da177e4SLinus Torvalds {
4801da177e4SLinus Torvalds 	return sprintf(buf,
4811da177e4SLinus Torvalds 		       "numa_hit %lu\n"
4821da177e4SLinus Torvalds 		       "numa_miss %lu\n"
4831da177e4SLinus Torvalds 		       "numa_foreign %lu\n"
4841da177e4SLinus Torvalds 		       "interleave_hit %lu\n"
4851da177e4SLinus Torvalds 		       "local_node %lu\n"
4861da177e4SLinus Torvalds 		       "other_node %lu\n",
4873a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_HIT),
4883a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_MISS),
4893a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_FOREIGN),
4903a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
4913a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_LOCAL),
4923a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_OTHER));
4931da177e4SLinus Torvalds }
49410fbcf4cSKay Sievers static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
4951da177e4SLinus Torvalds 
49610fbcf4cSKay Sievers static ssize_t node_read_vmstat(struct device *dev,
49710fbcf4cSKay Sievers 				struct device_attribute *attr, char *buf)
4982ac39037SMichael Rubin {
4992ac39037SMichael Rubin 	int nid = dev->id;
50075ef7184SMel Gorman 	struct pglist_data *pgdat = NODE_DATA(nid);
501fa25c503SKOSAKI Motohiro 	int i;
502fa25c503SKOSAKI Motohiro 	int n = 0;
503fa25c503SKOSAKI Motohiro 
504fa25c503SKOSAKI Motohiro 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
5059d7ea9a2SKonstantin Khlebnikov 		n += sprintf(buf+n, "%s %lu\n", zone_stat_name(i),
50675ef7184SMel Gorman 			     sum_zone_node_page_state(nid, i));
50775ef7184SMel Gorman 
5083a321d2aSKemi Wang #ifdef CONFIG_NUMA
5093a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
5109d7ea9a2SKonstantin Khlebnikov 		n += sprintf(buf+n, "%s %lu\n", numa_stat_name(i),
5113a321d2aSKemi Wang 			     sum_zone_numa_state(nid, i));
5123a321d2aSKemi Wang #endif
5133a321d2aSKemi Wang 
5143a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
5159d7ea9a2SKonstantin Khlebnikov 		n += sprintf(buf+n, "%s %lu\n", node_stat_name(i),
51675ef7184SMel Gorman 			     node_page_state(pgdat, i));
517fa25c503SKOSAKI Motohiro 
518fa25c503SKOSAKI Motohiro 	return n;
5192ac39037SMichael Rubin }
52010fbcf4cSKay Sievers static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
5212ac39037SMichael Rubin 
52210fbcf4cSKay Sievers static ssize_t node_read_distance(struct device *dev,
52310fbcf4cSKay Sievers 			struct device_attribute *attr, char *buf)
5241da177e4SLinus Torvalds {
5251da177e4SLinus Torvalds 	int nid = dev->id;
5261da177e4SLinus Torvalds 	int len = 0;
5271da177e4SLinus Torvalds 	int i;
5281da177e4SLinus Torvalds 
52912ee3c0aSDavid Rientjes 	/*
53012ee3c0aSDavid Rientjes 	 * buf is currently PAGE_SIZE in length and each node needs 4 chars
53112ee3c0aSDavid Rientjes 	 * at the most (distance + space or newline).
53212ee3c0aSDavid Rientjes 	 */
53312ee3c0aSDavid Rientjes 	BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
5341da177e4SLinus Torvalds 
5351da177e4SLinus Torvalds 	for_each_online_node(i)
5361da177e4SLinus Torvalds 		len += sprintf(buf + len, "%s%d", i ? " " : "", node_distance(nid, i));
5371da177e4SLinus Torvalds 
5381da177e4SLinus Torvalds 	len += sprintf(buf + len, "\n");
5391da177e4SLinus Torvalds 	return len;
5401da177e4SLinus Torvalds }
54110fbcf4cSKay Sievers static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL);
5421da177e4SLinus Torvalds 
5433c9b8aafSTakashi Iwai static struct attribute *node_dev_attrs[] = {
5443c9b8aafSTakashi Iwai 	&dev_attr_cpumap.attr,
5453c9b8aafSTakashi Iwai 	&dev_attr_cpulist.attr,
5463c9b8aafSTakashi Iwai 	&dev_attr_meminfo.attr,
5473c9b8aafSTakashi Iwai 	&dev_attr_numastat.attr,
5483c9b8aafSTakashi Iwai 	&dev_attr_distance.attr,
5493c9b8aafSTakashi Iwai 	&dev_attr_vmstat.attr,
5503c9b8aafSTakashi Iwai 	NULL
5513c9b8aafSTakashi Iwai };
5527ca7ec40SGreg Kroah-Hartman ATTRIBUTE_GROUPS(node_dev);
5533c9b8aafSTakashi Iwai 
5549a305230SLee Schermerhorn #ifdef CONFIG_HUGETLBFS
5559a305230SLee Schermerhorn /*
5569a305230SLee Schermerhorn  * hugetlbfs per node attributes registration interface:
5579a305230SLee Schermerhorn  * When/if hugetlb[fs] subsystem initializes [sometime after this module],
5584faf8d95SLee Schermerhorn  * it will register its per node attributes for all online nodes with
5594faf8d95SLee Schermerhorn  * memory.  It will also call register_hugetlbfs_with_node(), below, to
5609a305230SLee Schermerhorn  * register its attribute registration functions with this node driver.
5619a305230SLee Schermerhorn  * Once these hooks have been initialized, the node driver will call into
5629a305230SLee Schermerhorn  * the hugetlb module to [un]register attributes for hot-plugged nodes.
5639a305230SLee Schermerhorn  */
5649a305230SLee Schermerhorn static node_registration_func_t __hugetlb_register_node;
5659a305230SLee Schermerhorn static node_registration_func_t __hugetlb_unregister_node;
5669a305230SLee Schermerhorn 
56739da08cbSLee Schermerhorn static inline bool hugetlb_register_node(struct node *node)
5689a305230SLee Schermerhorn {
5694faf8d95SLee Schermerhorn 	if (__hugetlb_register_node &&
5708cebfcd0SLai Jiangshan 			node_state(node->dev.id, N_MEMORY)) {
5719a305230SLee Schermerhorn 		__hugetlb_register_node(node);
57239da08cbSLee Schermerhorn 		return true;
57339da08cbSLee Schermerhorn 	}
57439da08cbSLee Schermerhorn 	return false;
5759a305230SLee Schermerhorn }
5769a305230SLee Schermerhorn 
5779a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node)
5789a305230SLee Schermerhorn {
5799a305230SLee Schermerhorn 	if (__hugetlb_unregister_node)
5809a305230SLee Schermerhorn 		__hugetlb_unregister_node(node);
5819a305230SLee Schermerhorn }
5829a305230SLee Schermerhorn 
5839a305230SLee Schermerhorn void register_hugetlbfs_with_node(node_registration_func_t doregister,
5849a305230SLee Schermerhorn 				  node_registration_func_t unregister)
5859a305230SLee Schermerhorn {
5869a305230SLee Schermerhorn 	__hugetlb_register_node   = doregister;
5879a305230SLee Schermerhorn 	__hugetlb_unregister_node = unregister;
5889a305230SLee Schermerhorn }
5899a305230SLee Schermerhorn #else
5909a305230SLee Schermerhorn static inline void hugetlb_register_node(struct node *node) {}
5919a305230SLee Schermerhorn 
5929a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node) {}
5939a305230SLee Schermerhorn #endif
5949a305230SLee Schermerhorn 
5958c7b5b4eSYasuaki Ishimatsu static void node_device_release(struct device *dev)
5968c7b5b4eSYasuaki Ishimatsu {
5978c7b5b4eSYasuaki Ishimatsu 	struct node *node = to_node(dev);
5988c7b5b4eSYasuaki Ishimatsu 
5998c7b5b4eSYasuaki Ishimatsu #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
6008c7b5b4eSYasuaki Ishimatsu 	/*
6018c7b5b4eSYasuaki Ishimatsu 	 * We schedule the work only when a memory section is
6028c7b5b4eSYasuaki Ishimatsu 	 * onlined/offlined on this node. When we come here,
6038c7b5b4eSYasuaki Ishimatsu 	 * all the memory on this node has been offlined,
6048c7b5b4eSYasuaki Ishimatsu 	 * so we won't enqueue new work to this work.
6058c7b5b4eSYasuaki Ishimatsu 	 *
6068c7b5b4eSYasuaki Ishimatsu 	 * The work is using node->node_work, so we should
6078c7b5b4eSYasuaki Ishimatsu 	 * flush work before freeing the memory.
6088c7b5b4eSYasuaki Ishimatsu 	 */
6098c7b5b4eSYasuaki Ishimatsu 	flush_work(&node->node_work);
6108c7b5b4eSYasuaki Ishimatsu #endif
6118c7b5b4eSYasuaki Ishimatsu 	kfree(node);
6128c7b5b4eSYasuaki Ishimatsu }
6131da177e4SLinus Torvalds 
6141da177e4SLinus Torvalds /*
615405ae7d3SRobert P. J. Day  * register_node - Setup a sysfs device for a node.
6161da177e4SLinus Torvalds  * @num - Node number to use when creating the device.
6171da177e4SLinus Torvalds  *
6181da177e4SLinus Torvalds  * Initialize and register the node device.
6191da177e4SLinus Torvalds  */
620a7be6e5aSDou Liyang static int register_node(struct node *node, int num)
6211da177e4SLinus Torvalds {
6221da177e4SLinus Torvalds 	int error;
6231da177e4SLinus Torvalds 
62410fbcf4cSKay Sievers 	node->dev.id = num;
62510fbcf4cSKay Sievers 	node->dev.bus = &node_subsys;
6268c7b5b4eSYasuaki Ishimatsu 	node->dev.release = node_device_release;
6277ca7ec40SGreg Kroah-Hartman 	node->dev.groups = node_dev_groups;
62810fbcf4cSKay Sievers 	error = device_register(&node->dev);
6291da177e4SLinus Torvalds 
630c1cc0d51SArvind Yadav 	if (error)
631c1cc0d51SArvind Yadav 		put_device(&node->dev);
632c1cc0d51SArvind Yadav 	else {
6339a305230SLee Schermerhorn 		hugetlb_register_node(node);
634ed4a6d7fSMel Gorman 
635ed4a6d7fSMel Gorman 		compaction_register_node(node);
6361da177e4SLinus Torvalds 	}
6371da177e4SLinus Torvalds 	return error;
6381da177e4SLinus Torvalds }
6391da177e4SLinus Torvalds 
6404b45099bSKeiichiro Tokunaga /**
6414b45099bSKeiichiro Tokunaga  * unregister_node - unregister a node device
6424b45099bSKeiichiro Tokunaga  * @node: node going away
6434b45099bSKeiichiro Tokunaga  *
6444b45099bSKeiichiro Tokunaga  * Unregisters a node device @node.  All the devices on the node must be
6454b45099bSKeiichiro Tokunaga  * unregistered before calling this function.
6464b45099bSKeiichiro Tokunaga  */
6474b45099bSKeiichiro Tokunaga void unregister_node(struct node *node)
6484b45099bSKeiichiro Tokunaga {
6494faf8d95SLee Schermerhorn 	hugetlb_unregister_node(node);		/* no-op, if memoryless node */
65008d9dbe7SKeith Busch 	node_remove_accesses(node);
651acc02a10SKeith Busch 	node_remove_caches(node);
65210fbcf4cSKay Sievers 	device_unregister(&node->dev);
6534b45099bSKeiichiro Tokunaga }
6544b45099bSKeiichiro Tokunaga 
6558732794bSWen Congyang struct node *node_devices[MAX_NUMNODES];
6560fc44159SYasunori Goto 
65776b67ed9SKAMEZAWA Hiroyuki /*
65876b67ed9SKAMEZAWA Hiroyuki  * register cpu under node
65976b67ed9SKAMEZAWA Hiroyuki  */
66076b67ed9SKAMEZAWA Hiroyuki int register_cpu_under_node(unsigned int cpu, unsigned int nid)
66176b67ed9SKAMEZAWA Hiroyuki {
6621830794aSAlex Chiang 	int ret;
6638a25a2fdSKay Sievers 	struct device *obj;
664f8246f31SAlex Chiang 
665f8246f31SAlex Chiang 	if (!node_online(nid))
666f8246f31SAlex Chiang 		return 0;
667f8246f31SAlex Chiang 
6688a25a2fdSKay Sievers 	obj = get_cpu_device(cpu);
66976b67ed9SKAMEZAWA Hiroyuki 	if (!obj)
67076b67ed9SKAMEZAWA Hiroyuki 		return 0;
671f8246f31SAlex Chiang 
6728732794bSWen Congyang 	ret = sysfs_create_link(&node_devices[nid]->dev.kobj,
67376b67ed9SKAMEZAWA Hiroyuki 				&obj->kobj,
67476b67ed9SKAMEZAWA Hiroyuki 				kobject_name(&obj->kobj));
6751830794aSAlex Chiang 	if (ret)
6761830794aSAlex Chiang 		return ret;
6771830794aSAlex Chiang 
6781830794aSAlex Chiang 	return sysfs_create_link(&obj->kobj,
6798732794bSWen Congyang 				 &node_devices[nid]->dev.kobj,
6808732794bSWen Congyang 				 kobject_name(&node_devices[nid]->dev.kobj));
68176b67ed9SKAMEZAWA Hiroyuki }
68276b67ed9SKAMEZAWA Hiroyuki 
68308d9dbe7SKeith Busch /**
68408d9dbe7SKeith Busch  * register_memory_node_under_compute_node - link memory node to its compute
68508d9dbe7SKeith Busch  *					     node for a given access class.
68658cb346cSMauro Carvalho Chehab  * @mem_nid:	Memory node number
68758cb346cSMauro Carvalho Chehab  * @cpu_nid:	Cpu  node number
68808d9dbe7SKeith Busch  * @access:	Access class to register
68908d9dbe7SKeith Busch  *
69008d9dbe7SKeith Busch  * Description:
69108d9dbe7SKeith Busch  * 	For use with platforms that may have separate memory and compute nodes.
69208d9dbe7SKeith Busch  * 	This function will export node relationships linking which memory
69308d9dbe7SKeith Busch  * 	initiator nodes can access memory targets at a given ranked access
69408d9dbe7SKeith Busch  * 	class.
69508d9dbe7SKeith Busch  */
69608d9dbe7SKeith Busch int register_memory_node_under_compute_node(unsigned int mem_nid,
69708d9dbe7SKeith Busch 					    unsigned int cpu_nid,
69808d9dbe7SKeith Busch 					    unsigned access)
69908d9dbe7SKeith Busch {
70008d9dbe7SKeith Busch 	struct node *init_node, *targ_node;
70108d9dbe7SKeith Busch 	struct node_access_nodes *initiator, *target;
70208d9dbe7SKeith Busch 	int ret;
70308d9dbe7SKeith Busch 
70408d9dbe7SKeith Busch 	if (!node_online(cpu_nid) || !node_online(mem_nid))
70508d9dbe7SKeith Busch 		return -ENODEV;
70608d9dbe7SKeith Busch 
70708d9dbe7SKeith Busch 	init_node = node_devices[cpu_nid];
70808d9dbe7SKeith Busch 	targ_node = node_devices[mem_nid];
70908d9dbe7SKeith Busch 	initiator = node_init_node_access(init_node, access);
71008d9dbe7SKeith Busch 	target = node_init_node_access(targ_node, access);
71108d9dbe7SKeith Busch 	if (!initiator || !target)
71208d9dbe7SKeith Busch 		return -ENOMEM;
71308d9dbe7SKeith Busch 
71408d9dbe7SKeith Busch 	ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets",
71508d9dbe7SKeith Busch 				      &targ_node->dev.kobj,
71608d9dbe7SKeith Busch 				      dev_name(&targ_node->dev));
71708d9dbe7SKeith Busch 	if (ret)
71808d9dbe7SKeith Busch 		return ret;
71908d9dbe7SKeith Busch 
72008d9dbe7SKeith Busch 	ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators",
72108d9dbe7SKeith Busch 				      &init_node->dev.kobj,
72208d9dbe7SKeith Busch 				      dev_name(&init_node->dev));
72308d9dbe7SKeith Busch 	if (ret)
72408d9dbe7SKeith Busch 		goto err;
72508d9dbe7SKeith Busch 
72608d9dbe7SKeith Busch 	return 0;
72708d9dbe7SKeith Busch  err:
72808d9dbe7SKeith Busch 	sysfs_remove_link_from_group(&initiator->dev.kobj, "targets",
72908d9dbe7SKeith Busch 				     dev_name(&targ_node->dev));
73008d9dbe7SKeith Busch 	return ret;
73108d9dbe7SKeith Busch }
73208d9dbe7SKeith Busch 
73376b67ed9SKAMEZAWA Hiroyuki int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
73476b67ed9SKAMEZAWA Hiroyuki {
7358a25a2fdSKay Sievers 	struct device *obj;
736b9d52dadSAlex Chiang 
737b9d52dadSAlex Chiang 	if (!node_online(nid))
738b9d52dadSAlex Chiang 		return 0;
739b9d52dadSAlex Chiang 
7408a25a2fdSKay Sievers 	obj = get_cpu_device(cpu);
741b9d52dadSAlex Chiang 	if (!obj)
742b9d52dadSAlex Chiang 		return 0;
743b9d52dadSAlex Chiang 
7448732794bSWen Congyang 	sysfs_remove_link(&node_devices[nid]->dev.kobj,
74576b67ed9SKAMEZAWA Hiroyuki 			  kobject_name(&obj->kobj));
7461830794aSAlex Chiang 	sysfs_remove_link(&obj->kobj,
7478732794bSWen Congyang 			  kobject_name(&node_devices[nid]->dev.kobj));
748b9d52dadSAlex Chiang 
74976b67ed9SKAMEZAWA Hiroyuki 	return 0;
75076b67ed9SKAMEZAWA Hiroyuki }
75176b67ed9SKAMEZAWA Hiroyuki 
752c04fc586SGary Hade #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
753bd721ea7SFabian Frederick static int __ref get_nid_for_pfn(unsigned long pfn)
754c04fc586SGary Hade {
755c04fc586SGary Hade 	if (!pfn_valid_within(pfn))
756c04fc586SGary Hade 		return -1;
7573a80a7faSMel Gorman #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
7588cdde385SThomas Gleixner 	if (system_state < SYSTEM_RUNNING)
7593a80a7faSMel Gorman 		return early_pfn_to_nid(pfn);
7603a80a7faSMel Gorman #endif
761c04fc586SGary Hade 	return pfn_to_nid(pfn);
762c04fc586SGary Hade }
763c04fc586SGary Hade 
764c04fc586SGary Hade /* register memory section under specified node if it spans that node */
7658d595c4cSDavid Hildenbrand static int register_mem_sect_under_node(struct memory_block *mem_blk,
7668d595c4cSDavid Hildenbrand 					 void *arg)
767c04fc586SGary Hade {
768b6c88d3bSDavid Hildenbrand 	unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE;
769b6c88d3bSDavid Hildenbrand 	unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
770b6c88d3bSDavid Hildenbrand 	unsigned long end_pfn = start_pfn + memory_block_pfns - 1;
7714fbce633SOscar Salvador 	int ret, nid = *(int *)arg;
772b6c88d3bSDavid Hildenbrand 	unsigned long pfn;
773c04fc586SGary Hade 
774b6c88d3bSDavid Hildenbrand 	for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
775c04fc586SGary Hade 		int page_nid;
776c04fc586SGary Hade 
77704697858SYinghai Lu 		/*
77804697858SYinghai Lu 		 * memory block could have several absent sections from start.
77904697858SYinghai Lu 		 * skip pfn range from absent section
78004697858SYinghai Lu 		 */
781e03d1f78SPingfan Liu 		if (!pfn_in_present_section(pfn)) {
78204697858SYinghai Lu 			pfn = round_down(pfn + PAGES_PER_SECTION,
78304697858SYinghai Lu 					 PAGES_PER_SECTION) - 1;
78404697858SYinghai Lu 			continue;
78504697858SYinghai Lu 		}
78604697858SYinghai Lu 
787fc44f7f9SPavel Tatashin 		/*
788fc44f7f9SPavel Tatashin 		 * We need to check if page belongs to nid only for the boot
789fc44f7f9SPavel Tatashin 		 * case, during hotplug we know that all pages in the memory
790fc44f7f9SPavel Tatashin 		 * block belong to the same node.
791fc44f7f9SPavel Tatashin 		 */
7924fbce633SOscar Salvador 		if (system_state == SYSTEM_BOOTING) {
793c04fc586SGary Hade 			page_nid = get_nid_for_pfn(pfn);
794c04fc586SGary Hade 			if (page_nid < 0)
795c04fc586SGary Hade 				continue;
796c04fc586SGary Hade 			if (page_nid != nid)
797c04fc586SGary Hade 				continue;
798fc44f7f9SPavel Tatashin 		}
799d84f2f5aSDavid Hildenbrand 
800d84f2f5aSDavid Hildenbrand 		/*
801d84f2f5aSDavid Hildenbrand 		 * If this memory block spans multiple nodes, we only indicate
802d84f2f5aSDavid Hildenbrand 		 * the last processed node.
803d84f2f5aSDavid Hildenbrand 		 */
804d84f2f5aSDavid Hildenbrand 		mem_blk->nid = nid;
805d84f2f5aSDavid Hildenbrand 
8068732794bSWen Congyang 		ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
80710fbcf4cSKay Sievers 					&mem_blk->dev.kobj,
80810fbcf4cSKay Sievers 					kobject_name(&mem_blk->dev.kobj));
809dee5d0d5SAlex Chiang 		if (ret)
810dee5d0d5SAlex Chiang 			return ret;
811dee5d0d5SAlex Chiang 
81210fbcf4cSKay Sievers 		return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
8138732794bSWen Congyang 				&node_devices[nid]->dev.kobj,
8148732794bSWen Congyang 				kobject_name(&node_devices[nid]->dev.kobj));
815c04fc586SGary Hade 	}
816c04fc586SGary Hade 	/* mem section does not span the specified node */
817c04fc586SGary Hade 	return 0;
818c04fc586SGary Hade }
819c04fc586SGary Hade 
8204c4b7f9bSDavid Hildenbrand /*
821d84f2f5aSDavid Hildenbrand  * Unregister a memory block device under the node it spans. Memory blocks
822d84f2f5aSDavid Hildenbrand  * with multiple nodes cannot be offlined and therefore also never be removed.
8234c4b7f9bSDavid Hildenbrand  */
824a31b264cSDavid Hildenbrand void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
825c04fc586SGary Hade {
826d84f2f5aSDavid Hildenbrand 	if (mem_blk->nid == NUMA_NO_NODE)
827d84f2f5aSDavid Hildenbrand 		return;
828c04fc586SGary Hade 
829d84f2f5aSDavid Hildenbrand 	sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj,
83010fbcf4cSKay Sievers 			  kobject_name(&mem_blk->dev.kobj));
83110fbcf4cSKay Sievers 	sysfs_remove_link(&mem_blk->dev.kobj,
832d84f2f5aSDavid Hildenbrand 			  kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
833c04fc586SGary Hade }
834c04fc586SGary Hade 
8354fbce633SOscar Salvador int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
836c04fc586SGary Hade {
837fbcf73ceSDavid Hildenbrand 	return walk_memory_blocks(PFN_PHYS(start_pfn),
838fbcf73ceSDavid Hildenbrand 				  PFN_PHYS(end_pfn - start_pfn), (void *)&nid,
8394fbce633SOscar Salvador 				  register_mem_sect_under_node);
840c04fc586SGary Hade }
8414faf8d95SLee Schermerhorn 
84239da08cbSLee Schermerhorn #ifdef CONFIG_HUGETLBFS
8434faf8d95SLee Schermerhorn /*
8444faf8d95SLee Schermerhorn  * Handle per node hstate attribute [un]registration on transistions
8454faf8d95SLee Schermerhorn  * to/from memoryless state.
8464faf8d95SLee Schermerhorn  */
84739da08cbSLee Schermerhorn static void node_hugetlb_work(struct work_struct *work)
84839da08cbSLee Schermerhorn {
84939da08cbSLee Schermerhorn 	struct node *node = container_of(work, struct node, node_work);
85039da08cbSLee Schermerhorn 
85139da08cbSLee Schermerhorn 	/*
85239da08cbSLee Schermerhorn 	 * We only get here when a node transitions to/from memoryless state.
85339da08cbSLee Schermerhorn 	 * We can detect which transition occurred by examining whether the
85439da08cbSLee Schermerhorn 	 * node has memory now.  hugetlb_register_node() already check this
85539da08cbSLee Schermerhorn 	 * so we try to register the attributes.  If that fails, then the
85639da08cbSLee Schermerhorn 	 * node has transitioned to memoryless, try to unregister the
85739da08cbSLee Schermerhorn 	 * attributes.
85839da08cbSLee Schermerhorn 	 */
85939da08cbSLee Schermerhorn 	if (!hugetlb_register_node(node))
86039da08cbSLee Schermerhorn 		hugetlb_unregister_node(node);
86139da08cbSLee Schermerhorn }
86239da08cbSLee Schermerhorn 
86339da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid)
86439da08cbSLee Schermerhorn {
8658732794bSWen Congyang 	INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
86639da08cbSLee Schermerhorn }
8674faf8d95SLee Schermerhorn 
8684faf8d95SLee Schermerhorn static int node_memory_callback(struct notifier_block *self,
8694faf8d95SLee Schermerhorn 				unsigned long action, void *arg)
8704faf8d95SLee Schermerhorn {
8714faf8d95SLee Schermerhorn 	struct memory_notify *mnb = arg;
8724faf8d95SLee Schermerhorn 	int nid = mnb->status_change_nid;
8734faf8d95SLee Schermerhorn 
8744faf8d95SLee Schermerhorn 	switch (action) {
87539da08cbSLee Schermerhorn 	case MEM_ONLINE:
87639da08cbSLee Schermerhorn 	case MEM_OFFLINE:
87739da08cbSLee Schermerhorn 		/*
87839da08cbSLee Schermerhorn 		 * offload per node hstate [un]registration to a work thread
87939da08cbSLee Schermerhorn 		 * when transitioning to/from memoryless state.
88039da08cbSLee Schermerhorn 		 */
8814faf8d95SLee Schermerhorn 		if (nid != NUMA_NO_NODE)
8828732794bSWen Congyang 			schedule_work(&node_devices[nid]->node_work);
8834faf8d95SLee Schermerhorn 		break;
88439da08cbSLee Schermerhorn 
8854faf8d95SLee Schermerhorn 	case MEM_GOING_ONLINE:
8864faf8d95SLee Schermerhorn 	case MEM_GOING_OFFLINE:
8874faf8d95SLee Schermerhorn 	case MEM_CANCEL_ONLINE:
8884faf8d95SLee Schermerhorn 	case MEM_CANCEL_OFFLINE:
8894faf8d95SLee Schermerhorn 	default:
8904faf8d95SLee Schermerhorn 		break;
8914faf8d95SLee Schermerhorn 	}
8924faf8d95SLee Schermerhorn 
8934faf8d95SLee Schermerhorn 	return NOTIFY_OK;
8944faf8d95SLee Schermerhorn }
89539da08cbSLee Schermerhorn #endif	/* CONFIG_HUGETLBFS */
89639da08cbSLee Schermerhorn #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
89739da08cbSLee Schermerhorn 
89839da08cbSLee Schermerhorn #if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \
89939da08cbSLee Schermerhorn     !defined(CONFIG_HUGETLBFS)
9004faf8d95SLee Schermerhorn static inline int node_memory_callback(struct notifier_block *self,
9014faf8d95SLee Schermerhorn 				unsigned long action, void *arg)
9024faf8d95SLee Schermerhorn {
9034faf8d95SLee Schermerhorn 	return NOTIFY_OK;
9044faf8d95SLee Schermerhorn }
90539da08cbSLee Schermerhorn 
90639da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid) { }
90739da08cbSLee Schermerhorn 
90839da08cbSLee Schermerhorn #endif
909c04fc586SGary Hade 
9109037a993SMichal Hocko int __register_one_node(int nid)
9110fc44159SYasunori Goto {
9129037a993SMichal Hocko 	int error;
9139037a993SMichal Hocko 	int cpu;
9140fc44159SYasunori Goto 
9158732794bSWen Congyang 	node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
9168732794bSWen Congyang 	if (!node_devices[nid])
9178732794bSWen Congyang 		return -ENOMEM;
9188732794bSWen Congyang 
919a7be6e5aSDou Liyang 	error = register_node(node_devices[nid], nid);
92076b67ed9SKAMEZAWA Hiroyuki 
92176b67ed9SKAMEZAWA Hiroyuki 	/* link cpu under this node */
92276b67ed9SKAMEZAWA Hiroyuki 	for_each_present_cpu(cpu) {
92376b67ed9SKAMEZAWA Hiroyuki 		if (cpu_to_node(cpu) == nid)
92476b67ed9SKAMEZAWA Hiroyuki 			register_cpu_under_node(cpu, nid);
92576b67ed9SKAMEZAWA Hiroyuki 	}
926c04fc586SGary Hade 
92708d9dbe7SKeith Busch 	INIT_LIST_HEAD(&node_devices[nid]->access_list);
92839da08cbSLee Schermerhorn 	/* initialize work queue for memory hot plug */
92939da08cbSLee Schermerhorn 	init_node_hugetlb_work(nid);
930acc02a10SKeith Busch 	node_init_caches(nid);
9310fc44159SYasunori Goto 
9320fc44159SYasunori Goto 	return error;
9330fc44159SYasunori Goto }
9340fc44159SYasunori Goto 
9350fc44159SYasunori Goto void unregister_one_node(int nid)
9360fc44159SYasunori Goto {
93792d585efSXishi Qiu 	if (!node_devices[nid])
93892d585efSXishi Qiu 		return;
93992d585efSXishi Qiu 
9408732794bSWen Congyang 	unregister_node(node_devices[nid]);
9418732794bSWen Congyang 	node_devices[nid] = NULL;
9420fc44159SYasunori Goto }
9430fc44159SYasunori Goto 
944bde631a5SLee Schermerhorn /*
945bde631a5SLee Schermerhorn  * node states attributes
946bde631a5SLee Schermerhorn  */
947bde631a5SLee Schermerhorn 
948bde631a5SLee Schermerhorn static ssize_t print_nodes_state(enum node_states state, char *buf)
949bde631a5SLee Schermerhorn {
950bde631a5SLee Schermerhorn 	int n;
951bde631a5SLee Schermerhorn 
952f799b1a7STejun Heo 	n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
953f799b1a7STejun Heo 		      nodemask_pr_args(&node_states[state]));
954f6238818SRyota Ozaki 	buf[n++] = '\n';
955f6238818SRyota Ozaki 	buf[n] = '\0';
956bde631a5SLee Schermerhorn 	return n;
957bde631a5SLee Schermerhorn }
958bde631a5SLee Schermerhorn 
959b15f562fSAndi Kleen struct node_attr {
96010fbcf4cSKay Sievers 	struct device_attribute attr;
961b15f562fSAndi Kleen 	enum node_states state;
962b15f562fSAndi Kleen };
963b15f562fSAndi Kleen 
96410fbcf4cSKay Sievers static ssize_t show_node_state(struct device *dev,
96510fbcf4cSKay Sievers 			       struct device_attribute *attr, char *buf)
966bde631a5SLee Schermerhorn {
967b15f562fSAndi Kleen 	struct node_attr *na = container_of(attr, struct node_attr, attr);
968b15f562fSAndi Kleen 	return print_nodes_state(na->state, buf);
969bde631a5SLee Schermerhorn }
970bde631a5SLee Schermerhorn 
971b15f562fSAndi Kleen #define _NODE_ATTR(name, state) \
97210fbcf4cSKay Sievers 	{ __ATTR(name, 0444, show_node_state, NULL), state }
973bde631a5SLee Schermerhorn 
974b15f562fSAndi Kleen static struct node_attr node_state_attr[] = {
975fcf07d22SLai Jiangshan 	[N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE),
976fcf07d22SLai Jiangshan 	[N_ONLINE] = _NODE_ATTR(online, N_ONLINE),
977fcf07d22SLai Jiangshan 	[N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY),
978bde631a5SLee Schermerhorn #ifdef CONFIG_HIGHMEM
979fcf07d22SLai Jiangshan 	[N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
980bde631a5SLee Schermerhorn #endif
98120b2f52bSLai Jiangshan 	[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
982fcf07d22SLai Jiangshan 	[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
983bde631a5SLee Schermerhorn };
984bde631a5SLee Schermerhorn 
98510fbcf4cSKay Sievers static struct attribute *node_state_attrs[] = {
986fcf07d22SLai Jiangshan 	&node_state_attr[N_POSSIBLE].attr.attr,
987fcf07d22SLai Jiangshan 	&node_state_attr[N_ONLINE].attr.attr,
988fcf07d22SLai Jiangshan 	&node_state_attr[N_NORMAL_MEMORY].attr.attr,
9893701cde6SAndi Kleen #ifdef CONFIG_HIGHMEM
990fcf07d22SLai Jiangshan 	&node_state_attr[N_HIGH_MEMORY].attr.attr,
9913701cde6SAndi Kleen #endif
99220b2f52bSLai Jiangshan 	&node_state_attr[N_MEMORY].attr.attr,
993fcf07d22SLai Jiangshan 	&node_state_attr[N_CPU].attr.attr,
9943701cde6SAndi Kleen 	NULL
9953701cde6SAndi Kleen };
996bde631a5SLee Schermerhorn 
99710fbcf4cSKay Sievers static struct attribute_group memory_root_attr_group = {
99810fbcf4cSKay Sievers 	.attrs = node_state_attrs,
99910fbcf4cSKay Sievers };
100010fbcf4cSKay Sievers 
100110fbcf4cSKay Sievers static const struct attribute_group *cpu_root_attr_groups[] = {
100210fbcf4cSKay Sievers 	&memory_root_attr_group,
100310fbcf4cSKay Sievers 	NULL,
100410fbcf4cSKay Sievers };
100510fbcf4cSKay Sievers 
10064faf8d95SLee Schermerhorn #define NODE_CALLBACK_PRI	2	/* lower than SLAB */
10074b45099bSKeiichiro Tokunaga static int __init register_node_type(void)
10081da177e4SLinus Torvalds {
1009bde631a5SLee Schermerhorn 	int ret;
1010bde631a5SLee Schermerhorn 
10113701cde6SAndi Kleen  	BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
10123701cde6SAndi Kleen  	BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
10133701cde6SAndi Kleen 
101410fbcf4cSKay Sievers 	ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
10154faf8d95SLee Schermerhorn 	if (!ret) {
10166e259e7dSAndrew Morton 		static struct notifier_block node_memory_callback_nb = {
10176e259e7dSAndrew Morton 			.notifier_call = node_memory_callback,
10186e259e7dSAndrew Morton 			.priority = NODE_CALLBACK_PRI,
10196e259e7dSAndrew Morton 		};
10206e259e7dSAndrew Morton 		register_hotmemory_notifier(&node_memory_callback_nb);
10214faf8d95SLee Schermerhorn 	}
1022bde631a5SLee Schermerhorn 
1023bde631a5SLee Schermerhorn 	/*
1024bde631a5SLee Schermerhorn 	 * Note:  we're not going to unregister the node class if we fail
1025bde631a5SLee Schermerhorn 	 * to register the node state class attribute files.
1026bde631a5SLee Schermerhorn 	 */
1027bde631a5SLee Schermerhorn 	return ret;
10281da177e4SLinus Torvalds }
10291da177e4SLinus Torvalds postcore_initcall(register_node_type);
1030