xref: /openbmc/linux/drivers/base/node.c (revision 4c4b7f9b)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
21da177e4SLinus Torvalds /*
310fbcf4cSKay Sievers  * Basic Node interface support
41da177e4SLinus Torvalds  */
51da177e4SLinus Torvalds 
61da177e4SLinus Torvalds #include <linux/module.h>
71da177e4SLinus Torvalds #include <linux/init.h>
81da177e4SLinus Torvalds #include <linux/mm.h>
9c04fc586SGary Hade #include <linux/memory.h>
10fa25c503SKOSAKI Motohiro #include <linux/vmstat.h>
116e259e7dSAndrew Morton #include <linux/notifier.h>
121da177e4SLinus Torvalds #include <linux/node.h>
131da177e4SLinus Torvalds #include <linux/hugetlb.h>
14ed4a6d7fSMel Gorman #include <linux/compaction.h>
151da177e4SLinus Torvalds #include <linux/cpumask.h>
161da177e4SLinus Torvalds #include <linux/topology.h>
171da177e4SLinus Torvalds #include <linux/nodemask.h>
1876b67ed9SKAMEZAWA Hiroyuki #include <linux/cpu.h>
19bde631a5SLee Schermerhorn #include <linux/device.h>
2008d9dbe7SKeith Busch #include <linux/pm_runtime.h>
21af936a16SLee Schermerhorn #include <linux/swap.h>
2218e5b539STejun Heo #include <linux/slab.h>
231da177e4SLinus Torvalds 
2410fbcf4cSKay Sievers static struct bus_type node_subsys = {
25af5ca3f4SKay Sievers 	.name = "node",
2610fbcf4cSKay Sievers 	.dev_name = "node",
271da177e4SLinus Torvalds };
281da177e4SLinus Torvalds 
291da177e4SLinus Torvalds 
305aaba363SSudeep Holla static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
311da177e4SLinus Torvalds {
32064f0e93SZhen Lei 	ssize_t n;
33064f0e93SZhen Lei 	cpumask_var_t mask;
341da177e4SLinus Torvalds 	struct node *node_dev = to_node(dev);
351da177e4SLinus Torvalds 
3639106dcfSMike Travis 	/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
3739106dcfSMike Travis 	BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
381da177e4SLinus Torvalds 
39064f0e93SZhen Lei 	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
40064f0e93SZhen Lei 		return 0;
41064f0e93SZhen Lei 
42064f0e93SZhen Lei 	cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
43064f0e93SZhen Lei 	n = cpumap_print_to_pagebuf(list, buf, mask);
44064f0e93SZhen Lei 	free_cpumask_var(mask);
45064f0e93SZhen Lei 
46064f0e93SZhen Lei 	return n;
471da177e4SLinus Torvalds }
481da177e4SLinus Torvalds 
4910fbcf4cSKay Sievers static inline ssize_t node_read_cpumask(struct device *dev,
5010fbcf4cSKay Sievers 				struct device_attribute *attr, char *buf)
5139106dcfSMike Travis {
525aaba363SSudeep Holla 	return node_read_cpumap(dev, false, buf);
5339106dcfSMike Travis }
5410fbcf4cSKay Sievers static inline ssize_t node_read_cpulist(struct device *dev,
5510fbcf4cSKay Sievers 				struct device_attribute *attr, char *buf)
5639106dcfSMike Travis {
575aaba363SSudeep Holla 	return node_read_cpumap(dev, true, buf);
5839106dcfSMike Travis }
5939106dcfSMike Travis 
6010fbcf4cSKay Sievers static DEVICE_ATTR(cpumap,  S_IRUGO, node_read_cpumask, NULL);
6110fbcf4cSKay Sievers static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
621da177e4SLinus Torvalds 
6308d9dbe7SKeith Busch /**
6408d9dbe7SKeith Busch  * struct node_access_nodes - Access class device to hold user visible
6508d9dbe7SKeith Busch  * 			      relationships to other nodes.
6608d9dbe7SKeith Busch  * @dev:	Device for this memory access class
6708d9dbe7SKeith Busch  * @list_node:	List element in the node's access list
6808d9dbe7SKeith Busch  * @access:	The access class rank
6958cb346cSMauro Carvalho Chehab  * @hmem_attrs: Heterogeneous memory performance attributes
7008d9dbe7SKeith Busch  */
7108d9dbe7SKeith Busch struct node_access_nodes {
7208d9dbe7SKeith Busch 	struct device		dev;
7308d9dbe7SKeith Busch 	struct list_head	list_node;
7408d9dbe7SKeith Busch 	unsigned		access;
75e1cf33aaSKeith Busch #ifdef CONFIG_HMEM_REPORTING
76e1cf33aaSKeith Busch 	struct node_hmem_attrs	hmem_attrs;
77e1cf33aaSKeith Busch #endif
7808d9dbe7SKeith Busch };
7908d9dbe7SKeith Busch #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev)
8008d9dbe7SKeith Busch 
8108d9dbe7SKeith Busch static struct attribute *node_init_access_node_attrs[] = {
8208d9dbe7SKeith Busch 	NULL,
8308d9dbe7SKeith Busch };
8408d9dbe7SKeith Busch 
8508d9dbe7SKeith Busch static struct attribute *node_targ_access_node_attrs[] = {
8608d9dbe7SKeith Busch 	NULL,
8708d9dbe7SKeith Busch };
8808d9dbe7SKeith Busch 
8908d9dbe7SKeith Busch static const struct attribute_group initiators = {
9008d9dbe7SKeith Busch 	.name	= "initiators",
9108d9dbe7SKeith Busch 	.attrs	= node_init_access_node_attrs,
9208d9dbe7SKeith Busch };
9308d9dbe7SKeith Busch 
9408d9dbe7SKeith Busch static const struct attribute_group targets = {
9508d9dbe7SKeith Busch 	.name	= "targets",
9608d9dbe7SKeith Busch 	.attrs	= node_targ_access_node_attrs,
9708d9dbe7SKeith Busch };
9808d9dbe7SKeith Busch 
9908d9dbe7SKeith Busch static const struct attribute_group *node_access_node_groups[] = {
10008d9dbe7SKeith Busch 	&initiators,
10108d9dbe7SKeith Busch 	&targets,
10208d9dbe7SKeith Busch 	NULL,
10308d9dbe7SKeith Busch };
10408d9dbe7SKeith Busch 
10508d9dbe7SKeith Busch static void node_remove_accesses(struct node *node)
10608d9dbe7SKeith Busch {
10708d9dbe7SKeith Busch 	struct node_access_nodes *c, *cnext;
10808d9dbe7SKeith Busch 
10908d9dbe7SKeith Busch 	list_for_each_entry_safe(c, cnext, &node->access_list, list_node) {
11008d9dbe7SKeith Busch 		list_del(&c->list_node);
11108d9dbe7SKeith Busch 		device_unregister(&c->dev);
11208d9dbe7SKeith Busch 	}
11308d9dbe7SKeith Busch }
11408d9dbe7SKeith Busch 
11508d9dbe7SKeith Busch static void node_access_release(struct device *dev)
11608d9dbe7SKeith Busch {
11708d9dbe7SKeith Busch 	kfree(to_access_nodes(dev));
11808d9dbe7SKeith Busch }
11908d9dbe7SKeith Busch 
12008d9dbe7SKeith Busch static struct node_access_nodes *node_init_node_access(struct node *node,
12108d9dbe7SKeith Busch 						       unsigned access)
12208d9dbe7SKeith Busch {
12308d9dbe7SKeith Busch 	struct node_access_nodes *access_node;
12408d9dbe7SKeith Busch 	struct device *dev;
12508d9dbe7SKeith Busch 
12608d9dbe7SKeith Busch 	list_for_each_entry(access_node, &node->access_list, list_node)
12708d9dbe7SKeith Busch 		if (access_node->access == access)
12808d9dbe7SKeith Busch 			return access_node;
12908d9dbe7SKeith Busch 
13008d9dbe7SKeith Busch 	access_node = kzalloc(sizeof(*access_node), GFP_KERNEL);
13108d9dbe7SKeith Busch 	if (!access_node)
13208d9dbe7SKeith Busch 		return NULL;
13308d9dbe7SKeith Busch 
13408d9dbe7SKeith Busch 	access_node->access = access;
13508d9dbe7SKeith Busch 	dev = &access_node->dev;
13608d9dbe7SKeith Busch 	dev->parent = &node->dev;
13708d9dbe7SKeith Busch 	dev->release = node_access_release;
13808d9dbe7SKeith Busch 	dev->groups = node_access_node_groups;
13908d9dbe7SKeith Busch 	if (dev_set_name(dev, "access%u", access))
14008d9dbe7SKeith Busch 		goto free;
14108d9dbe7SKeith Busch 
14208d9dbe7SKeith Busch 	if (device_register(dev))
14308d9dbe7SKeith Busch 		goto free_name;
14408d9dbe7SKeith Busch 
14508d9dbe7SKeith Busch 	pm_runtime_no_callbacks(dev);
14608d9dbe7SKeith Busch 	list_add_tail(&access_node->list_node, &node->access_list);
14708d9dbe7SKeith Busch 	return access_node;
14808d9dbe7SKeith Busch free_name:
14908d9dbe7SKeith Busch 	kfree_const(dev->kobj.name);
15008d9dbe7SKeith Busch free:
15108d9dbe7SKeith Busch 	kfree(access_node);
15208d9dbe7SKeith Busch 	return NULL;
15308d9dbe7SKeith Busch }
15408d9dbe7SKeith Busch 
155e1cf33aaSKeith Busch #ifdef CONFIG_HMEM_REPORTING
156e1cf33aaSKeith Busch #define ACCESS_ATTR(name) 						   \
157e1cf33aaSKeith Busch static ssize_t name##_show(struct device *dev,				   \
158e1cf33aaSKeith Busch 			   struct device_attribute *attr,		   \
159e1cf33aaSKeith Busch 			   char *buf)					   \
160e1cf33aaSKeith Busch {									   \
161e1cf33aaSKeith Busch 	return sprintf(buf, "%u\n", to_access_nodes(dev)->hmem_attrs.name); \
162e1cf33aaSKeith Busch }									   \
163e1cf33aaSKeith Busch static DEVICE_ATTR_RO(name);
164e1cf33aaSKeith Busch 
165e1cf33aaSKeith Busch ACCESS_ATTR(read_bandwidth)
166e1cf33aaSKeith Busch ACCESS_ATTR(read_latency)
167e1cf33aaSKeith Busch ACCESS_ATTR(write_bandwidth)
168e1cf33aaSKeith Busch ACCESS_ATTR(write_latency)
169e1cf33aaSKeith Busch 
170e1cf33aaSKeith Busch static struct attribute *access_attrs[] = {
171e1cf33aaSKeith Busch 	&dev_attr_read_bandwidth.attr,
172e1cf33aaSKeith Busch 	&dev_attr_read_latency.attr,
173e1cf33aaSKeith Busch 	&dev_attr_write_bandwidth.attr,
174e1cf33aaSKeith Busch 	&dev_attr_write_latency.attr,
175e1cf33aaSKeith Busch 	NULL,
176e1cf33aaSKeith Busch };
177e1cf33aaSKeith Busch 
178e1cf33aaSKeith Busch /**
179e1cf33aaSKeith Busch  * node_set_perf_attrs - Set the performance values for given access class
180e1cf33aaSKeith Busch  * @nid: Node identifier to be set
181e1cf33aaSKeith Busch  * @hmem_attrs: Heterogeneous memory performance attributes
182e1cf33aaSKeith Busch  * @access: The access class the for the given attributes
183e1cf33aaSKeith Busch  */
184e1cf33aaSKeith Busch void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
185e1cf33aaSKeith Busch 			 unsigned access)
186e1cf33aaSKeith Busch {
187e1cf33aaSKeith Busch 	struct node_access_nodes *c;
188e1cf33aaSKeith Busch 	struct node *node;
189e1cf33aaSKeith Busch 	int i;
190e1cf33aaSKeith Busch 
191e1cf33aaSKeith Busch 	if (WARN_ON_ONCE(!node_online(nid)))
192e1cf33aaSKeith Busch 		return;
193e1cf33aaSKeith Busch 
194e1cf33aaSKeith Busch 	node = node_devices[nid];
195e1cf33aaSKeith Busch 	c = node_init_node_access(node, access);
196e1cf33aaSKeith Busch 	if (!c)
197e1cf33aaSKeith Busch 		return;
198e1cf33aaSKeith Busch 
199e1cf33aaSKeith Busch 	c->hmem_attrs = *hmem_attrs;
200e1cf33aaSKeith Busch 	for (i = 0; access_attrs[i] != NULL; i++) {
201e1cf33aaSKeith Busch 		if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i],
202e1cf33aaSKeith Busch 					    "initiators")) {
203e1cf33aaSKeith Busch 			pr_info("failed to add performance attribute to node %d\n",
204e1cf33aaSKeith Busch 				nid);
205e1cf33aaSKeith Busch 			break;
206e1cf33aaSKeith Busch 		}
207e1cf33aaSKeith Busch 	}
208e1cf33aaSKeith Busch }
209acc02a10SKeith Busch 
210acc02a10SKeith Busch /**
211acc02a10SKeith Busch  * struct node_cache_info - Internal tracking for memory node caches
212acc02a10SKeith Busch  * @dev:	Device represeting the cache level
213acc02a10SKeith Busch  * @node:	List element for tracking in the node
214acc02a10SKeith Busch  * @cache_attrs:Attributes for this cache level
215acc02a10SKeith Busch  */
216acc02a10SKeith Busch struct node_cache_info {
217acc02a10SKeith Busch 	struct device dev;
218acc02a10SKeith Busch 	struct list_head node;
219acc02a10SKeith Busch 	struct node_cache_attrs cache_attrs;
220acc02a10SKeith Busch };
221acc02a10SKeith Busch #define to_cache_info(device) container_of(device, struct node_cache_info, dev)
222acc02a10SKeith Busch 
223acc02a10SKeith Busch #define CACHE_ATTR(name, fmt) 						\
224acc02a10SKeith Busch static ssize_t name##_show(struct device *dev,				\
225acc02a10SKeith Busch 			   struct device_attribute *attr,		\
226acc02a10SKeith Busch 			   char *buf)					\
227acc02a10SKeith Busch {									\
228acc02a10SKeith Busch 	return sprintf(buf, fmt "\n", to_cache_info(dev)->cache_attrs.name);\
229acc02a10SKeith Busch }									\
230acc02a10SKeith Busch DEVICE_ATTR_RO(name);
231acc02a10SKeith Busch 
232acc02a10SKeith Busch CACHE_ATTR(size, "%llu")
233acc02a10SKeith Busch CACHE_ATTR(line_size, "%u")
234acc02a10SKeith Busch CACHE_ATTR(indexing, "%u")
235acc02a10SKeith Busch CACHE_ATTR(write_policy, "%u")
236acc02a10SKeith Busch 
237acc02a10SKeith Busch static struct attribute *cache_attrs[] = {
238acc02a10SKeith Busch 	&dev_attr_indexing.attr,
239acc02a10SKeith Busch 	&dev_attr_size.attr,
240acc02a10SKeith Busch 	&dev_attr_line_size.attr,
241acc02a10SKeith Busch 	&dev_attr_write_policy.attr,
242acc02a10SKeith Busch 	NULL,
243acc02a10SKeith Busch };
244acc02a10SKeith Busch ATTRIBUTE_GROUPS(cache);
245acc02a10SKeith Busch 
246acc02a10SKeith Busch static void node_cache_release(struct device *dev)
247acc02a10SKeith Busch {
248acc02a10SKeith Busch 	kfree(dev);
249acc02a10SKeith Busch }
250acc02a10SKeith Busch 
251acc02a10SKeith Busch static void node_cacheinfo_release(struct device *dev)
252acc02a10SKeith Busch {
253acc02a10SKeith Busch 	struct node_cache_info *info = to_cache_info(dev);
254acc02a10SKeith Busch 	kfree(info);
255acc02a10SKeith Busch }
256acc02a10SKeith Busch 
257acc02a10SKeith Busch static void node_init_cache_dev(struct node *node)
258acc02a10SKeith Busch {
259acc02a10SKeith Busch 	struct device *dev;
260acc02a10SKeith Busch 
261acc02a10SKeith Busch 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
262acc02a10SKeith Busch 	if (!dev)
263acc02a10SKeith Busch 		return;
264acc02a10SKeith Busch 
265acc02a10SKeith Busch 	dev->parent = &node->dev;
266acc02a10SKeith Busch 	dev->release = node_cache_release;
267acc02a10SKeith Busch 	if (dev_set_name(dev, "memory_side_cache"))
268acc02a10SKeith Busch 		goto free_dev;
269acc02a10SKeith Busch 
270acc02a10SKeith Busch 	if (device_register(dev))
271acc02a10SKeith Busch 		goto free_name;
272acc02a10SKeith Busch 
273acc02a10SKeith Busch 	pm_runtime_no_callbacks(dev);
274acc02a10SKeith Busch 	node->cache_dev = dev;
275acc02a10SKeith Busch 	return;
276acc02a10SKeith Busch free_name:
277acc02a10SKeith Busch 	kfree_const(dev->kobj.name);
278acc02a10SKeith Busch free_dev:
279acc02a10SKeith Busch 	kfree(dev);
280acc02a10SKeith Busch }
281acc02a10SKeith Busch 
282acc02a10SKeith Busch /**
283acc02a10SKeith Busch  * node_add_cache() - add cache attribute to a memory node
284acc02a10SKeith Busch  * @nid: Node identifier that has new cache attributes
285acc02a10SKeith Busch  * @cache_attrs: Attributes for the cache being added
286acc02a10SKeith Busch  */
287acc02a10SKeith Busch void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs)
288acc02a10SKeith Busch {
289acc02a10SKeith Busch 	struct node_cache_info *info;
290acc02a10SKeith Busch 	struct device *dev;
291acc02a10SKeith Busch 	struct node *node;
292acc02a10SKeith Busch 
293acc02a10SKeith Busch 	if (!node_online(nid) || !node_devices[nid])
294acc02a10SKeith Busch 		return;
295acc02a10SKeith Busch 
296acc02a10SKeith Busch 	node = node_devices[nid];
297acc02a10SKeith Busch 	list_for_each_entry(info, &node->cache_attrs, node) {
298acc02a10SKeith Busch 		if (info->cache_attrs.level == cache_attrs->level) {
299acc02a10SKeith Busch 			dev_warn(&node->dev,
300acc02a10SKeith Busch 				"attempt to add duplicate cache level:%d\n",
301acc02a10SKeith Busch 				cache_attrs->level);
302acc02a10SKeith Busch 			return;
303acc02a10SKeith Busch 		}
304acc02a10SKeith Busch 	}
305acc02a10SKeith Busch 
306acc02a10SKeith Busch 	if (!node->cache_dev)
307acc02a10SKeith Busch 		node_init_cache_dev(node);
308acc02a10SKeith Busch 	if (!node->cache_dev)
309acc02a10SKeith Busch 		return;
310acc02a10SKeith Busch 
311acc02a10SKeith Busch 	info = kzalloc(sizeof(*info), GFP_KERNEL);
312acc02a10SKeith Busch 	if (!info)
313acc02a10SKeith Busch 		return;
314acc02a10SKeith Busch 
315acc02a10SKeith Busch 	dev = &info->dev;
316acc02a10SKeith Busch 	dev->parent = node->cache_dev;
317acc02a10SKeith Busch 	dev->release = node_cacheinfo_release;
318acc02a10SKeith Busch 	dev->groups = cache_groups;
319acc02a10SKeith Busch 	if (dev_set_name(dev, "index%d", cache_attrs->level))
320acc02a10SKeith Busch 		goto free_cache;
321acc02a10SKeith Busch 
322acc02a10SKeith Busch 	info->cache_attrs = *cache_attrs;
323acc02a10SKeith Busch 	if (device_register(dev)) {
324acc02a10SKeith Busch 		dev_warn(&node->dev, "failed to add cache level:%d\n",
325acc02a10SKeith Busch 			 cache_attrs->level);
326acc02a10SKeith Busch 		goto free_name;
327acc02a10SKeith Busch 	}
328acc02a10SKeith Busch 	pm_runtime_no_callbacks(dev);
329acc02a10SKeith Busch 	list_add_tail(&info->node, &node->cache_attrs);
330acc02a10SKeith Busch 	return;
331acc02a10SKeith Busch free_name:
332acc02a10SKeith Busch 	kfree_const(dev->kobj.name);
333acc02a10SKeith Busch free_cache:
334acc02a10SKeith Busch 	kfree(info);
335acc02a10SKeith Busch }
336acc02a10SKeith Busch 
337acc02a10SKeith Busch static void node_remove_caches(struct node *node)
338acc02a10SKeith Busch {
339acc02a10SKeith Busch 	struct node_cache_info *info, *next;
340acc02a10SKeith Busch 
341acc02a10SKeith Busch 	if (!node->cache_dev)
342acc02a10SKeith Busch 		return;
343acc02a10SKeith Busch 
344acc02a10SKeith Busch 	list_for_each_entry_safe(info, next, &node->cache_attrs, node) {
345acc02a10SKeith Busch 		list_del(&info->node);
346acc02a10SKeith Busch 		device_unregister(&info->dev);
347acc02a10SKeith Busch 	}
348acc02a10SKeith Busch 	device_unregister(node->cache_dev);
349acc02a10SKeith Busch }
350acc02a10SKeith Busch 
351acc02a10SKeith Busch static void node_init_caches(unsigned int nid)
352acc02a10SKeith Busch {
353acc02a10SKeith Busch 	INIT_LIST_HEAD(&node_devices[nid]->cache_attrs);
354acc02a10SKeith Busch }
355acc02a10SKeith Busch #else
356acc02a10SKeith Busch static void node_init_caches(unsigned int nid) { }
357acc02a10SKeith Busch static void node_remove_caches(struct node *node) { }
358e1cf33aaSKeith Busch #endif
359e1cf33aaSKeith Busch 
3601da177e4SLinus Torvalds #define K(x) ((x) << (PAGE_SHIFT - 10))
36110fbcf4cSKay Sievers static ssize_t node_read_meminfo(struct device *dev,
36210fbcf4cSKay Sievers 			struct device_attribute *attr, char *buf)
3631da177e4SLinus Torvalds {
3641da177e4SLinus Torvalds 	int n;
3651da177e4SLinus Torvalds 	int nid = dev->id;
366599d0c95SMel Gorman 	struct pglist_data *pgdat = NODE_DATA(nid);
3671da177e4SLinus Torvalds 	struct sysinfo i;
36861f94e18SVlastimil Babka 	unsigned long sreclaimable, sunreclaimable;
3691da177e4SLinus Torvalds 
3701da177e4SLinus Torvalds 	si_meminfo_node(&i, nid);
37161f94e18SVlastimil Babka 	sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE);
37261f94e18SVlastimil Babka 	sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE);
3737ee92255SKOSAKI Motohiro 	n = sprintf(buf,
3741da177e4SLinus Torvalds 		       "Node %d MemTotal:       %8lu kB\n"
3751da177e4SLinus Torvalds 		       "Node %d MemFree:        %8lu kB\n"
3761da177e4SLinus Torvalds 		       "Node %d MemUsed:        %8lu kB\n"
3771da177e4SLinus Torvalds 		       "Node %d Active:         %8lu kB\n"
3781da177e4SLinus Torvalds 		       "Node %d Inactive:       %8lu kB\n"
3794f98a2feSRik van Riel 		       "Node %d Active(anon):   %8lu kB\n"
3804f98a2feSRik van Riel 		       "Node %d Inactive(anon): %8lu kB\n"
3814f98a2feSRik van Riel 		       "Node %d Active(file):   %8lu kB\n"
3824f98a2feSRik van Riel 		       "Node %d Inactive(file): %8lu kB\n"
3835344b7e6SNick Piggin 		       "Node %d Unevictable:    %8lu kB\n"
3847ee92255SKOSAKI Motohiro 		       "Node %d Mlocked:        %8lu kB\n",
3857ee92255SKOSAKI Motohiro 		       nid, K(i.totalram),
3867ee92255SKOSAKI Motohiro 		       nid, K(i.freeram),
3877ee92255SKOSAKI Motohiro 		       nid, K(i.totalram - i.freeram),
388599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
389599d0c95SMel Gorman 				node_page_state(pgdat, NR_ACTIVE_FILE)),
390599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
391599d0c95SMel Gorman 				node_page_state(pgdat, NR_INACTIVE_FILE)),
392599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)),
393599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)),
394599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)),
395599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)),
396599d0c95SMel Gorman 		       nid, K(node_page_state(pgdat, NR_UNEVICTABLE)),
39775ef7184SMel Gorman 		       nid, K(sum_zone_node_page_state(nid, NR_MLOCK)));
3987ee92255SKOSAKI Motohiro 
399182e8e23SChristoph Lameter #ifdef CONFIG_HIGHMEM
4007ee92255SKOSAKI Motohiro 	n += sprintf(buf + n,
4011da177e4SLinus Torvalds 		       "Node %d HighTotal:      %8lu kB\n"
4021da177e4SLinus Torvalds 		       "Node %d HighFree:       %8lu kB\n"
4031da177e4SLinus Torvalds 		       "Node %d LowTotal:       %8lu kB\n"
4047ee92255SKOSAKI Motohiro 		       "Node %d LowFree:        %8lu kB\n",
4057ee92255SKOSAKI Motohiro 		       nid, K(i.totalhigh),
4067ee92255SKOSAKI Motohiro 		       nid, K(i.freehigh),
4077ee92255SKOSAKI Motohiro 		       nid, K(i.totalram - i.totalhigh),
4087ee92255SKOSAKI Motohiro 		       nid, K(i.freeram - i.freehigh));
409182e8e23SChristoph Lameter #endif
4107ee92255SKOSAKI Motohiro 	n += sprintf(buf + n,
411c07e02dbSMartin Hicks 		       "Node %d Dirty:          %8lu kB\n"
412c07e02dbSMartin Hicks 		       "Node %d Writeback:      %8lu kB\n"
413347ce434SChristoph Lameter 		       "Node %d FilePages:      %8lu kB\n"
414c07e02dbSMartin Hicks 		       "Node %d Mapped:         %8lu kB\n"
415f3dbd344SChristoph Lameter 		       "Node %d AnonPages:      %8lu kB\n"
4164b02108aSKOSAKI Motohiro 		       "Node %d Shmem:          %8lu kB\n"
417c6a7f572SKOSAKI Motohiro 		       "Node %d KernelStack:    %8lu kB\n"
418df849a15SChristoph Lameter 		       "Node %d PageTables:     %8lu kB\n"
419f5ef68daSAndrew Morton 		       "Node %d NFS_Unstable:   %8lu kB\n"
420d2c5e30cSChristoph Lameter 		       "Node %d Bounce:         %8lu kB\n"
421fc3ba692SMiklos Szeredi 		       "Node %d WritebackTmp:   %8lu kB\n"
42261f94e18SVlastimil Babka 		       "Node %d KReclaimable:   %8lu kB\n"
423972d1a7bSChristoph Lameter 		       "Node %d Slab:           %8lu kB\n"
424972d1a7bSChristoph Lameter 		       "Node %d SReclaimable:   %8lu kB\n"
42505b258e9SDavid Rientjes 		       "Node %d SUnreclaim:     %8lu kB\n"
42605b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE
42705b258e9SDavid Rientjes 		       "Node %d AnonHugePages:  %8lu kB\n"
42865c45377SKirill A. Shutemov 		       "Node %d ShmemHugePages: %8lu kB\n"
42965c45377SKirill A. Shutemov 		       "Node %d ShmemPmdMapped: %8lu kB\n"
43005b258e9SDavid Rientjes #endif
43105b258e9SDavid Rientjes 			,
43211fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
43311fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_WRITEBACK)),
43411fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_FILE_PAGES)),
43550658e2eSMel Gorman 		       nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
4364b9d0fabSMel Gorman 		       nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
437cc7452b6SRafael Aquini 		       nid, K(i.sharedram),
438d30dd8beSAndy Lutomirski 		       nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
43975ef7184SMel Gorman 		       nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
44011fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
44175ef7184SMel Gorman 		       nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
44211fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
44361f94e18SVlastimil Babka 		       nid, K(sreclaimable +
44461f94e18SVlastimil Babka 			      node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
44561f94e18SVlastimil Babka 		       nid, K(sreclaimable + sunreclaimable),
44661f94e18SVlastimil Babka 		       nid, K(sreclaimable),
44761f94e18SVlastimil Babka 		       nid, K(sunreclaimable)
44805b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE
44961f94e18SVlastimil Babka 		       ,
45011fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_ANON_THPS) *
45165c45377SKirill A. Shutemov 				       HPAGE_PMD_NR),
45211fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
45365c45377SKirill A. Shutemov 				       HPAGE_PMD_NR),
45411fb9989SMel Gorman 		       nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
45561f94e18SVlastimil Babka 				       HPAGE_PMD_NR)
45605b258e9SDavid Rientjes #endif
45761f94e18SVlastimil Babka 		       );
4581da177e4SLinus Torvalds 	n += hugetlb_report_node_meminfo(nid, buf + n);
4591da177e4SLinus Torvalds 	return n;
4601da177e4SLinus Torvalds }
4611da177e4SLinus Torvalds 
4621da177e4SLinus Torvalds #undef K
46310fbcf4cSKay Sievers static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
4641da177e4SLinus Torvalds 
46510fbcf4cSKay Sievers static ssize_t node_read_numastat(struct device *dev,
46610fbcf4cSKay Sievers 				struct device_attribute *attr, char *buf)
4671da177e4SLinus Torvalds {
4681da177e4SLinus Torvalds 	return sprintf(buf,
4691da177e4SLinus Torvalds 		       "numa_hit %lu\n"
4701da177e4SLinus Torvalds 		       "numa_miss %lu\n"
4711da177e4SLinus Torvalds 		       "numa_foreign %lu\n"
4721da177e4SLinus Torvalds 		       "interleave_hit %lu\n"
4731da177e4SLinus Torvalds 		       "local_node %lu\n"
4741da177e4SLinus Torvalds 		       "other_node %lu\n",
4753a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_HIT),
4763a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_MISS),
4773a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_FOREIGN),
4783a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
4793a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_LOCAL),
4803a321d2aSKemi Wang 		       sum_zone_numa_state(dev->id, NUMA_OTHER));
4811da177e4SLinus Torvalds }
48210fbcf4cSKay Sievers static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
4831da177e4SLinus Torvalds 
48410fbcf4cSKay Sievers static ssize_t node_read_vmstat(struct device *dev,
48510fbcf4cSKay Sievers 				struct device_attribute *attr, char *buf)
4862ac39037SMichael Rubin {
4872ac39037SMichael Rubin 	int nid = dev->id;
48875ef7184SMel Gorman 	struct pglist_data *pgdat = NODE_DATA(nid);
489fa25c503SKOSAKI Motohiro 	int i;
490fa25c503SKOSAKI Motohiro 	int n = 0;
491fa25c503SKOSAKI Motohiro 
492fa25c503SKOSAKI Motohiro 	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
493fa25c503SKOSAKI Motohiro 		n += sprintf(buf+n, "%s %lu\n", vmstat_text[i],
49475ef7184SMel Gorman 			     sum_zone_node_page_state(nid, i));
49575ef7184SMel Gorman 
4963a321d2aSKemi Wang #ifdef CONFIG_NUMA
4973a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
49875ef7184SMel Gorman 		n += sprintf(buf+n, "%s %lu\n",
49975ef7184SMel Gorman 			     vmstat_text[i + NR_VM_ZONE_STAT_ITEMS],
5003a321d2aSKemi Wang 			     sum_zone_numa_state(nid, i));
5013a321d2aSKemi Wang #endif
5023a321d2aSKemi Wang 
5033a321d2aSKemi Wang 	for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
5043a321d2aSKemi Wang 		n += sprintf(buf+n, "%s %lu\n",
5053a321d2aSKemi Wang 			     vmstat_text[i + NR_VM_ZONE_STAT_ITEMS +
5063a321d2aSKemi Wang 			     NR_VM_NUMA_STAT_ITEMS],
50775ef7184SMel Gorman 			     node_page_state(pgdat, i));
508fa25c503SKOSAKI Motohiro 
509fa25c503SKOSAKI Motohiro 	return n;
5102ac39037SMichael Rubin }
51110fbcf4cSKay Sievers static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL);
5122ac39037SMichael Rubin 
51310fbcf4cSKay Sievers static ssize_t node_read_distance(struct device *dev,
51410fbcf4cSKay Sievers 			struct device_attribute *attr, char *buf)
5151da177e4SLinus Torvalds {
5161da177e4SLinus Torvalds 	int nid = dev->id;
5171da177e4SLinus Torvalds 	int len = 0;
5181da177e4SLinus Torvalds 	int i;
5191da177e4SLinus Torvalds 
52012ee3c0aSDavid Rientjes 	/*
52112ee3c0aSDavid Rientjes 	 * buf is currently PAGE_SIZE in length and each node needs 4 chars
52212ee3c0aSDavid Rientjes 	 * at the most (distance + space or newline).
52312ee3c0aSDavid Rientjes 	 */
52412ee3c0aSDavid Rientjes 	BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
5251da177e4SLinus Torvalds 
5261da177e4SLinus Torvalds 	for_each_online_node(i)
5271da177e4SLinus Torvalds 		len += sprintf(buf + len, "%s%d", i ? " " : "", node_distance(nid, i));
5281da177e4SLinus Torvalds 
5291da177e4SLinus Torvalds 	len += sprintf(buf + len, "\n");
5301da177e4SLinus Torvalds 	return len;
5311da177e4SLinus Torvalds }
53210fbcf4cSKay Sievers static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL);
5331da177e4SLinus Torvalds 
5343c9b8aafSTakashi Iwai static struct attribute *node_dev_attrs[] = {
5353c9b8aafSTakashi Iwai 	&dev_attr_cpumap.attr,
5363c9b8aafSTakashi Iwai 	&dev_attr_cpulist.attr,
5373c9b8aafSTakashi Iwai 	&dev_attr_meminfo.attr,
5383c9b8aafSTakashi Iwai 	&dev_attr_numastat.attr,
5393c9b8aafSTakashi Iwai 	&dev_attr_distance.attr,
5403c9b8aafSTakashi Iwai 	&dev_attr_vmstat.attr,
5413c9b8aafSTakashi Iwai 	NULL
5423c9b8aafSTakashi Iwai };
5437ca7ec40SGreg Kroah-Hartman ATTRIBUTE_GROUPS(node_dev);
5443c9b8aafSTakashi Iwai 
5459a305230SLee Schermerhorn #ifdef CONFIG_HUGETLBFS
5469a305230SLee Schermerhorn /*
5479a305230SLee Schermerhorn  * hugetlbfs per node attributes registration interface:
5489a305230SLee Schermerhorn  * When/if hugetlb[fs] subsystem initializes [sometime after this module],
5494faf8d95SLee Schermerhorn  * it will register its per node attributes for all online nodes with
5504faf8d95SLee Schermerhorn  * memory.  It will also call register_hugetlbfs_with_node(), below, to
5519a305230SLee Schermerhorn  * register its attribute registration functions with this node driver.
5529a305230SLee Schermerhorn  * Once these hooks have been initialized, the node driver will call into
5539a305230SLee Schermerhorn  * the hugetlb module to [un]register attributes for hot-plugged nodes.
5549a305230SLee Schermerhorn  */
5559a305230SLee Schermerhorn static node_registration_func_t __hugetlb_register_node;
5569a305230SLee Schermerhorn static node_registration_func_t __hugetlb_unregister_node;
5579a305230SLee Schermerhorn 
55839da08cbSLee Schermerhorn static inline bool hugetlb_register_node(struct node *node)
5599a305230SLee Schermerhorn {
5604faf8d95SLee Schermerhorn 	if (__hugetlb_register_node &&
5618cebfcd0SLai Jiangshan 			node_state(node->dev.id, N_MEMORY)) {
5629a305230SLee Schermerhorn 		__hugetlb_register_node(node);
56339da08cbSLee Schermerhorn 		return true;
56439da08cbSLee Schermerhorn 	}
56539da08cbSLee Schermerhorn 	return false;
5669a305230SLee Schermerhorn }
5679a305230SLee Schermerhorn 
5689a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node)
5699a305230SLee Schermerhorn {
5709a305230SLee Schermerhorn 	if (__hugetlb_unregister_node)
5719a305230SLee Schermerhorn 		__hugetlb_unregister_node(node);
5729a305230SLee Schermerhorn }
5739a305230SLee Schermerhorn 
5749a305230SLee Schermerhorn void register_hugetlbfs_with_node(node_registration_func_t doregister,
5759a305230SLee Schermerhorn 				  node_registration_func_t unregister)
5769a305230SLee Schermerhorn {
5779a305230SLee Schermerhorn 	__hugetlb_register_node   = doregister;
5789a305230SLee Schermerhorn 	__hugetlb_unregister_node = unregister;
5799a305230SLee Schermerhorn }
5809a305230SLee Schermerhorn #else
5819a305230SLee Schermerhorn static inline void hugetlb_register_node(struct node *node) {}
5829a305230SLee Schermerhorn 
5839a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node) {}
5849a305230SLee Schermerhorn #endif
5859a305230SLee Schermerhorn 
5868c7b5b4eSYasuaki Ishimatsu static void node_device_release(struct device *dev)
5878c7b5b4eSYasuaki Ishimatsu {
5888c7b5b4eSYasuaki Ishimatsu 	struct node *node = to_node(dev);
5898c7b5b4eSYasuaki Ishimatsu 
5908c7b5b4eSYasuaki Ishimatsu #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
5918c7b5b4eSYasuaki Ishimatsu 	/*
5928c7b5b4eSYasuaki Ishimatsu 	 * We schedule the work only when a memory section is
5938c7b5b4eSYasuaki Ishimatsu 	 * onlined/offlined on this node. When we come here,
5948c7b5b4eSYasuaki Ishimatsu 	 * all the memory on this node has been offlined,
5958c7b5b4eSYasuaki Ishimatsu 	 * so we won't enqueue new work to this work.
5968c7b5b4eSYasuaki Ishimatsu 	 *
5978c7b5b4eSYasuaki Ishimatsu 	 * The work is using node->node_work, so we should
5988c7b5b4eSYasuaki Ishimatsu 	 * flush work before freeing the memory.
5998c7b5b4eSYasuaki Ishimatsu 	 */
6008c7b5b4eSYasuaki Ishimatsu 	flush_work(&node->node_work);
6018c7b5b4eSYasuaki Ishimatsu #endif
6028c7b5b4eSYasuaki Ishimatsu 	kfree(node);
6038c7b5b4eSYasuaki Ishimatsu }
6041da177e4SLinus Torvalds 
6051da177e4SLinus Torvalds /*
606405ae7d3SRobert P. J. Day  * register_node - Setup a sysfs device for a node.
6071da177e4SLinus Torvalds  * @num - Node number to use when creating the device.
6081da177e4SLinus Torvalds  *
6091da177e4SLinus Torvalds  * Initialize and register the node device.
6101da177e4SLinus Torvalds  */
611a7be6e5aSDou Liyang static int register_node(struct node *node, int num)
6121da177e4SLinus Torvalds {
6131da177e4SLinus Torvalds 	int error;
6141da177e4SLinus Torvalds 
61510fbcf4cSKay Sievers 	node->dev.id = num;
61610fbcf4cSKay Sievers 	node->dev.bus = &node_subsys;
6178c7b5b4eSYasuaki Ishimatsu 	node->dev.release = node_device_release;
6187ca7ec40SGreg Kroah-Hartman 	node->dev.groups = node_dev_groups;
61910fbcf4cSKay Sievers 	error = device_register(&node->dev);
6201da177e4SLinus Torvalds 
621c1cc0d51SArvind Yadav 	if (error)
622c1cc0d51SArvind Yadav 		put_device(&node->dev);
623c1cc0d51SArvind Yadav 	else {
6249a305230SLee Schermerhorn 		hugetlb_register_node(node);
625ed4a6d7fSMel Gorman 
626ed4a6d7fSMel Gorman 		compaction_register_node(node);
6271da177e4SLinus Torvalds 	}
6281da177e4SLinus Torvalds 	return error;
6291da177e4SLinus Torvalds }
6301da177e4SLinus Torvalds 
6314b45099bSKeiichiro Tokunaga /**
6324b45099bSKeiichiro Tokunaga  * unregister_node - unregister a node device
6334b45099bSKeiichiro Tokunaga  * @node: node going away
6344b45099bSKeiichiro Tokunaga  *
6354b45099bSKeiichiro Tokunaga  * Unregisters a node device @node.  All the devices on the node must be
6364b45099bSKeiichiro Tokunaga  * unregistered before calling this function.
6374b45099bSKeiichiro Tokunaga  */
6384b45099bSKeiichiro Tokunaga void unregister_node(struct node *node)
6394b45099bSKeiichiro Tokunaga {
6404faf8d95SLee Schermerhorn 	hugetlb_unregister_node(node);		/* no-op, if memoryless node */
64108d9dbe7SKeith Busch 	node_remove_accesses(node);
642acc02a10SKeith Busch 	node_remove_caches(node);
64310fbcf4cSKay Sievers 	device_unregister(&node->dev);
6444b45099bSKeiichiro Tokunaga }
6454b45099bSKeiichiro Tokunaga 
6468732794bSWen Congyang struct node *node_devices[MAX_NUMNODES];
6470fc44159SYasunori Goto 
64876b67ed9SKAMEZAWA Hiroyuki /*
64976b67ed9SKAMEZAWA Hiroyuki  * register cpu under node
65076b67ed9SKAMEZAWA Hiroyuki  */
65176b67ed9SKAMEZAWA Hiroyuki int register_cpu_under_node(unsigned int cpu, unsigned int nid)
65276b67ed9SKAMEZAWA Hiroyuki {
6531830794aSAlex Chiang 	int ret;
6548a25a2fdSKay Sievers 	struct device *obj;
655f8246f31SAlex Chiang 
656f8246f31SAlex Chiang 	if (!node_online(nid))
657f8246f31SAlex Chiang 		return 0;
658f8246f31SAlex Chiang 
6598a25a2fdSKay Sievers 	obj = get_cpu_device(cpu);
66076b67ed9SKAMEZAWA Hiroyuki 	if (!obj)
66176b67ed9SKAMEZAWA Hiroyuki 		return 0;
662f8246f31SAlex Chiang 
6638732794bSWen Congyang 	ret = sysfs_create_link(&node_devices[nid]->dev.kobj,
66476b67ed9SKAMEZAWA Hiroyuki 				&obj->kobj,
66576b67ed9SKAMEZAWA Hiroyuki 				kobject_name(&obj->kobj));
6661830794aSAlex Chiang 	if (ret)
6671830794aSAlex Chiang 		return ret;
6681830794aSAlex Chiang 
6691830794aSAlex Chiang 	return sysfs_create_link(&obj->kobj,
6708732794bSWen Congyang 				 &node_devices[nid]->dev.kobj,
6718732794bSWen Congyang 				 kobject_name(&node_devices[nid]->dev.kobj));
67276b67ed9SKAMEZAWA Hiroyuki }
67376b67ed9SKAMEZAWA Hiroyuki 
67408d9dbe7SKeith Busch /**
67508d9dbe7SKeith Busch  * register_memory_node_under_compute_node - link memory node to its compute
67608d9dbe7SKeith Busch  *					     node for a given access class.
67758cb346cSMauro Carvalho Chehab  * @mem_nid:	Memory node number
67858cb346cSMauro Carvalho Chehab  * @cpu_nid:	Cpu  node number
67908d9dbe7SKeith Busch  * @access:	Access class to register
68008d9dbe7SKeith Busch  *
68108d9dbe7SKeith Busch  * Description:
68208d9dbe7SKeith Busch  * 	For use with platforms that may have separate memory and compute nodes.
68308d9dbe7SKeith Busch  * 	This function will export node relationships linking which memory
68408d9dbe7SKeith Busch  * 	initiator nodes can access memory targets at a given ranked access
68508d9dbe7SKeith Busch  * 	class.
68608d9dbe7SKeith Busch  */
68708d9dbe7SKeith Busch int register_memory_node_under_compute_node(unsigned int mem_nid,
68808d9dbe7SKeith Busch 					    unsigned int cpu_nid,
68908d9dbe7SKeith Busch 					    unsigned access)
69008d9dbe7SKeith Busch {
69108d9dbe7SKeith Busch 	struct node *init_node, *targ_node;
69208d9dbe7SKeith Busch 	struct node_access_nodes *initiator, *target;
69308d9dbe7SKeith Busch 	int ret;
69408d9dbe7SKeith Busch 
69508d9dbe7SKeith Busch 	if (!node_online(cpu_nid) || !node_online(mem_nid))
69608d9dbe7SKeith Busch 		return -ENODEV;
69708d9dbe7SKeith Busch 
69808d9dbe7SKeith Busch 	init_node = node_devices[cpu_nid];
69908d9dbe7SKeith Busch 	targ_node = node_devices[mem_nid];
70008d9dbe7SKeith Busch 	initiator = node_init_node_access(init_node, access);
70108d9dbe7SKeith Busch 	target = node_init_node_access(targ_node, access);
70208d9dbe7SKeith Busch 	if (!initiator || !target)
70308d9dbe7SKeith Busch 		return -ENOMEM;
70408d9dbe7SKeith Busch 
70508d9dbe7SKeith Busch 	ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets",
70608d9dbe7SKeith Busch 				      &targ_node->dev.kobj,
70708d9dbe7SKeith Busch 				      dev_name(&targ_node->dev));
70808d9dbe7SKeith Busch 	if (ret)
70908d9dbe7SKeith Busch 		return ret;
71008d9dbe7SKeith Busch 
71108d9dbe7SKeith Busch 	ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators",
71208d9dbe7SKeith Busch 				      &init_node->dev.kobj,
71308d9dbe7SKeith Busch 				      dev_name(&init_node->dev));
71408d9dbe7SKeith Busch 	if (ret)
71508d9dbe7SKeith Busch 		goto err;
71608d9dbe7SKeith Busch 
71708d9dbe7SKeith Busch 	return 0;
71808d9dbe7SKeith Busch  err:
71908d9dbe7SKeith Busch 	sysfs_remove_link_from_group(&initiator->dev.kobj, "targets",
72008d9dbe7SKeith Busch 				     dev_name(&targ_node->dev));
72108d9dbe7SKeith Busch 	return ret;
72208d9dbe7SKeith Busch }
72308d9dbe7SKeith Busch 
72476b67ed9SKAMEZAWA Hiroyuki int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
72576b67ed9SKAMEZAWA Hiroyuki {
7268a25a2fdSKay Sievers 	struct device *obj;
727b9d52dadSAlex Chiang 
728b9d52dadSAlex Chiang 	if (!node_online(nid))
729b9d52dadSAlex Chiang 		return 0;
730b9d52dadSAlex Chiang 
7318a25a2fdSKay Sievers 	obj = get_cpu_device(cpu);
732b9d52dadSAlex Chiang 	if (!obj)
733b9d52dadSAlex Chiang 		return 0;
734b9d52dadSAlex Chiang 
7358732794bSWen Congyang 	sysfs_remove_link(&node_devices[nid]->dev.kobj,
73676b67ed9SKAMEZAWA Hiroyuki 			  kobject_name(&obj->kobj));
7371830794aSAlex Chiang 	sysfs_remove_link(&obj->kobj,
7388732794bSWen Congyang 			  kobject_name(&node_devices[nid]->dev.kobj));
739b9d52dadSAlex Chiang 
74076b67ed9SKAMEZAWA Hiroyuki 	return 0;
74176b67ed9SKAMEZAWA Hiroyuki }
74276b67ed9SKAMEZAWA Hiroyuki 
743c04fc586SGary Hade #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
744bd721ea7SFabian Frederick static int __ref get_nid_for_pfn(unsigned long pfn)
745c04fc586SGary Hade {
746c04fc586SGary Hade 	if (!pfn_valid_within(pfn))
747c04fc586SGary Hade 		return -1;
7483a80a7faSMel Gorman #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
7498cdde385SThomas Gleixner 	if (system_state < SYSTEM_RUNNING)
7503a80a7faSMel Gorman 		return early_pfn_to_nid(pfn);
7513a80a7faSMel Gorman #endif
752c04fc586SGary Hade 	return pfn_to_nid(pfn);
753c04fc586SGary Hade }
754c04fc586SGary Hade 
755c04fc586SGary Hade /* register memory section under specified node if it spans that node */
7564fbce633SOscar Salvador int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg)
757c04fc586SGary Hade {
7584fbce633SOscar Salvador 	int ret, nid = *(int *)arg;
759c04fc586SGary Hade 	unsigned long pfn, sect_start_pfn, sect_end_pfn;
760c04fc586SGary Hade 
761d0dc12e8SPavel Tatashin 	mem_blk->nid = nid;
762d3360164SNathan Fontenot 
763d3360164SNathan Fontenot 	sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
764d3360164SNathan Fontenot 	sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
765d3360164SNathan Fontenot 	sect_end_pfn += PAGES_PER_SECTION - 1;
766c04fc586SGary Hade 	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
767c04fc586SGary Hade 		int page_nid;
768c04fc586SGary Hade 
76904697858SYinghai Lu 		/*
77004697858SYinghai Lu 		 * memory block could have several absent sections from start.
77104697858SYinghai Lu 		 * skip pfn range from absent section
77204697858SYinghai Lu 		 */
77304697858SYinghai Lu 		if (!pfn_present(pfn)) {
77404697858SYinghai Lu 			pfn = round_down(pfn + PAGES_PER_SECTION,
77504697858SYinghai Lu 					 PAGES_PER_SECTION) - 1;
77604697858SYinghai Lu 			continue;
77704697858SYinghai Lu 		}
77804697858SYinghai Lu 
779fc44f7f9SPavel Tatashin 		/*
780fc44f7f9SPavel Tatashin 		 * We need to check if page belongs to nid only for the boot
781fc44f7f9SPavel Tatashin 		 * case, during hotplug we know that all pages in the memory
782fc44f7f9SPavel Tatashin 		 * block belong to the same node.
783fc44f7f9SPavel Tatashin 		 */
7844fbce633SOscar Salvador 		if (system_state == SYSTEM_BOOTING) {
785c04fc586SGary Hade 			page_nid = get_nid_for_pfn(pfn);
786c04fc586SGary Hade 			if (page_nid < 0)
787c04fc586SGary Hade 				continue;
788c04fc586SGary Hade 			if (page_nid != nid)
789c04fc586SGary Hade 				continue;
790fc44f7f9SPavel Tatashin 		}
7918732794bSWen Congyang 		ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
79210fbcf4cSKay Sievers 					&mem_blk->dev.kobj,
79310fbcf4cSKay Sievers 					kobject_name(&mem_blk->dev.kobj));
794dee5d0d5SAlex Chiang 		if (ret)
795dee5d0d5SAlex Chiang 			return ret;
796dee5d0d5SAlex Chiang 
79710fbcf4cSKay Sievers 		return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
7988732794bSWen Congyang 				&node_devices[nid]->dev.kobj,
7998732794bSWen Congyang 				kobject_name(&node_devices[nid]->dev.kobj));
800c04fc586SGary Hade 	}
801c04fc586SGary Hade 	/* mem section does not span the specified node */
802c04fc586SGary Hade 	return 0;
803c04fc586SGary Hade }
804c04fc586SGary Hade 
8054c4b7f9bSDavid Hildenbrand /*
8064c4b7f9bSDavid Hildenbrand  * Unregister memory block device under all nodes that it spans.
8074c4b7f9bSDavid Hildenbrand  */
8084c4b7f9bSDavid Hildenbrand int unregister_memory_block_under_nodes(struct memory_block *mem_blk)
809c04fc586SGary Hade {
8109ae49fabSDavid Rientjes 	NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL);
811c04fc586SGary Hade 	unsigned long pfn, sect_start_pfn, sect_end_pfn;
812c04fc586SGary Hade 
8139ae49fabSDavid Rientjes 	if (!mem_blk) {
8149ae49fabSDavid Rientjes 		NODEMASK_FREE(unlinked_nodes);
815c04fc586SGary Hade 		return -EFAULT;
8169ae49fabSDavid Rientjes 	}
8179ae49fabSDavid Rientjes 	if (!unlinked_nodes)
8189ae49fabSDavid Rientjes 		return -ENOMEM;
8199ae49fabSDavid Rientjes 	nodes_clear(*unlinked_nodes);
820d3360164SNathan Fontenot 
8214c4b7f9bSDavid Hildenbrand 	sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
8224c4b7f9bSDavid Hildenbrand 	sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
823c04fc586SGary Hade 	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
82447504980SRoel Kluin 		int nid;
825c04fc586SGary Hade 
826c04fc586SGary Hade 		nid = get_nid_for_pfn(pfn);
827c04fc586SGary Hade 		if (nid < 0)
828c04fc586SGary Hade 			continue;
829c04fc586SGary Hade 		if (!node_online(nid))
830c04fc586SGary Hade 			continue;
8319ae49fabSDavid Rientjes 		if (node_test_and_set(nid, *unlinked_nodes))
832c04fc586SGary Hade 			continue;
8338732794bSWen Congyang 		sysfs_remove_link(&node_devices[nid]->dev.kobj,
83410fbcf4cSKay Sievers 			 kobject_name(&mem_blk->dev.kobj));
83510fbcf4cSKay Sievers 		sysfs_remove_link(&mem_blk->dev.kobj,
8368732794bSWen Congyang 			 kobject_name(&node_devices[nid]->dev.kobj));
837c04fc586SGary Hade 	}
8389ae49fabSDavid Rientjes 	NODEMASK_FREE(unlinked_nodes);
839c04fc586SGary Hade 	return 0;
840c04fc586SGary Hade }
841c04fc586SGary Hade 
8424fbce633SOscar Salvador int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
843c04fc586SGary Hade {
8444fbce633SOscar Salvador 	return walk_memory_range(start_pfn, end_pfn, (void *)&nid,
8454fbce633SOscar Salvador 					register_mem_sect_under_node);
846c04fc586SGary Hade }
8474faf8d95SLee Schermerhorn 
84839da08cbSLee Schermerhorn #ifdef CONFIG_HUGETLBFS
8494faf8d95SLee Schermerhorn /*
8504faf8d95SLee Schermerhorn  * Handle per node hstate attribute [un]registration on transistions
8514faf8d95SLee Schermerhorn  * to/from memoryless state.
8524faf8d95SLee Schermerhorn  */
85339da08cbSLee Schermerhorn static void node_hugetlb_work(struct work_struct *work)
85439da08cbSLee Schermerhorn {
85539da08cbSLee Schermerhorn 	struct node *node = container_of(work, struct node, node_work);
85639da08cbSLee Schermerhorn 
85739da08cbSLee Schermerhorn 	/*
85839da08cbSLee Schermerhorn 	 * We only get here when a node transitions to/from memoryless state.
85939da08cbSLee Schermerhorn 	 * We can detect which transition occurred by examining whether the
86039da08cbSLee Schermerhorn 	 * node has memory now.  hugetlb_register_node() already check this
86139da08cbSLee Schermerhorn 	 * so we try to register the attributes.  If that fails, then the
86239da08cbSLee Schermerhorn 	 * node has transitioned to memoryless, try to unregister the
86339da08cbSLee Schermerhorn 	 * attributes.
86439da08cbSLee Schermerhorn 	 */
86539da08cbSLee Schermerhorn 	if (!hugetlb_register_node(node))
86639da08cbSLee Schermerhorn 		hugetlb_unregister_node(node);
86739da08cbSLee Schermerhorn }
86839da08cbSLee Schermerhorn 
86939da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid)
87039da08cbSLee Schermerhorn {
8718732794bSWen Congyang 	INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
87239da08cbSLee Schermerhorn }
8734faf8d95SLee Schermerhorn 
8744faf8d95SLee Schermerhorn static int node_memory_callback(struct notifier_block *self,
8754faf8d95SLee Schermerhorn 				unsigned long action, void *arg)
8764faf8d95SLee Schermerhorn {
8774faf8d95SLee Schermerhorn 	struct memory_notify *mnb = arg;
8784faf8d95SLee Schermerhorn 	int nid = mnb->status_change_nid;
8794faf8d95SLee Schermerhorn 
8804faf8d95SLee Schermerhorn 	switch (action) {
88139da08cbSLee Schermerhorn 	case MEM_ONLINE:
88239da08cbSLee Schermerhorn 	case MEM_OFFLINE:
88339da08cbSLee Schermerhorn 		/*
88439da08cbSLee Schermerhorn 		 * offload per node hstate [un]registration to a work thread
88539da08cbSLee Schermerhorn 		 * when transitioning to/from memoryless state.
88639da08cbSLee Schermerhorn 		 */
8874faf8d95SLee Schermerhorn 		if (nid != NUMA_NO_NODE)
8888732794bSWen Congyang 			schedule_work(&node_devices[nid]->node_work);
8894faf8d95SLee Schermerhorn 		break;
89039da08cbSLee Schermerhorn 
8914faf8d95SLee Schermerhorn 	case MEM_GOING_ONLINE:
8924faf8d95SLee Schermerhorn 	case MEM_GOING_OFFLINE:
8934faf8d95SLee Schermerhorn 	case MEM_CANCEL_ONLINE:
8944faf8d95SLee Schermerhorn 	case MEM_CANCEL_OFFLINE:
8954faf8d95SLee Schermerhorn 	default:
8964faf8d95SLee Schermerhorn 		break;
8974faf8d95SLee Schermerhorn 	}
8984faf8d95SLee Schermerhorn 
8994faf8d95SLee Schermerhorn 	return NOTIFY_OK;
9004faf8d95SLee Schermerhorn }
90139da08cbSLee Schermerhorn #endif	/* CONFIG_HUGETLBFS */
90239da08cbSLee Schermerhorn #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
90339da08cbSLee Schermerhorn 
90439da08cbSLee Schermerhorn #if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \
90539da08cbSLee Schermerhorn     !defined(CONFIG_HUGETLBFS)
9064faf8d95SLee Schermerhorn static inline int node_memory_callback(struct notifier_block *self,
9074faf8d95SLee Schermerhorn 				unsigned long action, void *arg)
9084faf8d95SLee Schermerhorn {
9094faf8d95SLee Schermerhorn 	return NOTIFY_OK;
9104faf8d95SLee Schermerhorn }
91139da08cbSLee Schermerhorn 
91239da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid) { }
91339da08cbSLee Schermerhorn 
91439da08cbSLee Schermerhorn #endif
915c04fc586SGary Hade 
9169037a993SMichal Hocko int __register_one_node(int nid)
9170fc44159SYasunori Goto {
9189037a993SMichal Hocko 	int error;
9199037a993SMichal Hocko 	int cpu;
9200fc44159SYasunori Goto 
9218732794bSWen Congyang 	node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
9228732794bSWen Congyang 	if (!node_devices[nid])
9238732794bSWen Congyang 		return -ENOMEM;
9248732794bSWen Congyang 
925a7be6e5aSDou Liyang 	error = register_node(node_devices[nid], nid);
92676b67ed9SKAMEZAWA Hiroyuki 
92776b67ed9SKAMEZAWA Hiroyuki 	/* link cpu under this node */
92876b67ed9SKAMEZAWA Hiroyuki 	for_each_present_cpu(cpu) {
92976b67ed9SKAMEZAWA Hiroyuki 		if (cpu_to_node(cpu) == nid)
93076b67ed9SKAMEZAWA Hiroyuki 			register_cpu_under_node(cpu, nid);
93176b67ed9SKAMEZAWA Hiroyuki 	}
932c04fc586SGary Hade 
93308d9dbe7SKeith Busch 	INIT_LIST_HEAD(&node_devices[nid]->access_list);
93439da08cbSLee Schermerhorn 	/* initialize work queue for memory hot plug */
93539da08cbSLee Schermerhorn 	init_node_hugetlb_work(nid);
936acc02a10SKeith Busch 	node_init_caches(nid);
9370fc44159SYasunori Goto 
9380fc44159SYasunori Goto 	return error;
9390fc44159SYasunori Goto }
9400fc44159SYasunori Goto 
9410fc44159SYasunori Goto void unregister_one_node(int nid)
9420fc44159SYasunori Goto {
94392d585efSXishi Qiu 	if (!node_devices[nid])
94492d585efSXishi Qiu 		return;
94592d585efSXishi Qiu 
9468732794bSWen Congyang 	unregister_node(node_devices[nid]);
9478732794bSWen Congyang 	node_devices[nid] = NULL;
9480fc44159SYasunori Goto }
9490fc44159SYasunori Goto 
950bde631a5SLee Schermerhorn /*
951bde631a5SLee Schermerhorn  * node states attributes
952bde631a5SLee Schermerhorn  */
953bde631a5SLee Schermerhorn 
954bde631a5SLee Schermerhorn static ssize_t print_nodes_state(enum node_states state, char *buf)
955bde631a5SLee Schermerhorn {
956bde631a5SLee Schermerhorn 	int n;
957bde631a5SLee Schermerhorn 
958f799b1a7STejun Heo 	n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
959f799b1a7STejun Heo 		      nodemask_pr_args(&node_states[state]));
960f6238818SRyota Ozaki 	buf[n++] = '\n';
961f6238818SRyota Ozaki 	buf[n] = '\0';
962bde631a5SLee Schermerhorn 	return n;
963bde631a5SLee Schermerhorn }
964bde631a5SLee Schermerhorn 
965b15f562fSAndi Kleen struct node_attr {
96610fbcf4cSKay Sievers 	struct device_attribute attr;
967b15f562fSAndi Kleen 	enum node_states state;
968b15f562fSAndi Kleen };
969b15f562fSAndi Kleen 
97010fbcf4cSKay Sievers static ssize_t show_node_state(struct device *dev,
97110fbcf4cSKay Sievers 			       struct device_attribute *attr, char *buf)
972bde631a5SLee Schermerhorn {
973b15f562fSAndi Kleen 	struct node_attr *na = container_of(attr, struct node_attr, attr);
974b15f562fSAndi Kleen 	return print_nodes_state(na->state, buf);
975bde631a5SLee Schermerhorn }
976bde631a5SLee Schermerhorn 
977b15f562fSAndi Kleen #define _NODE_ATTR(name, state) \
97810fbcf4cSKay Sievers 	{ __ATTR(name, 0444, show_node_state, NULL), state }
979bde631a5SLee Schermerhorn 
980b15f562fSAndi Kleen static struct node_attr node_state_attr[] = {
981fcf07d22SLai Jiangshan 	[N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE),
982fcf07d22SLai Jiangshan 	[N_ONLINE] = _NODE_ATTR(online, N_ONLINE),
983fcf07d22SLai Jiangshan 	[N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY),
984bde631a5SLee Schermerhorn #ifdef CONFIG_HIGHMEM
985fcf07d22SLai Jiangshan 	[N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
986bde631a5SLee Schermerhorn #endif
98720b2f52bSLai Jiangshan 	[N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
988fcf07d22SLai Jiangshan 	[N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
989bde631a5SLee Schermerhorn };
990bde631a5SLee Schermerhorn 
99110fbcf4cSKay Sievers static struct attribute *node_state_attrs[] = {
992fcf07d22SLai Jiangshan 	&node_state_attr[N_POSSIBLE].attr.attr,
993fcf07d22SLai Jiangshan 	&node_state_attr[N_ONLINE].attr.attr,
994fcf07d22SLai Jiangshan 	&node_state_attr[N_NORMAL_MEMORY].attr.attr,
9953701cde6SAndi Kleen #ifdef CONFIG_HIGHMEM
996fcf07d22SLai Jiangshan 	&node_state_attr[N_HIGH_MEMORY].attr.attr,
9973701cde6SAndi Kleen #endif
99820b2f52bSLai Jiangshan 	&node_state_attr[N_MEMORY].attr.attr,
999fcf07d22SLai Jiangshan 	&node_state_attr[N_CPU].attr.attr,
10003701cde6SAndi Kleen 	NULL
10013701cde6SAndi Kleen };
1002bde631a5SLee Schermerhorn 
100310fbcf4cSKay Sievers static struct attribute_group memory_root_attr_group = {
100410fbcf4cSKay Sievers 	.attrs = node_state_attrs,
100510fbcf4cSKay Sievers };
100610fbcf4cSKay Sievers 
100710fbcf4cSKay Sievers static const struct attribute_group *cpu_root_attr_groups[] = {
100810fbcf4cSKay Sievers 	&memory_root_attr_group,
100910fbcf4cSKay Sievers 	NULL,
101010fbcf4cSKay Sievers };
101110fbcf4cSKay Sievers 
10124faf8d95SLee Schermerhorn #define NODE_CALLBACK_PRI	2	/* lower than SLAB */
10134b45099bSKeiichiro Tokunaga static int __init register_node_type(void)
10141da177e4SLinus Torvalds {
1015bde631a5SLee Schermerhorn 	int ret;
1016bde631a5SLee Schermerhorn 
10173701cde6SAndi Kleen  	BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
10183701cde6SAndi Kleen  	BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
10193701cde6SAndi Kleen 
102010fbcf4cSKay Sievers 	ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
10214faf8d95SLee Schermerhorn 	if (!ret) {
10226e259e7dSAndrew Morton 		static struct notifier_block node_memory_callback_nb = {
10236e259e7dSAndrew Morton 			.notifier_call = node_memory_callback,
10246e259e7dSAndrew Morton 			.priority = NODE_CALLBACK_PRI,
10256e259e7dSAndrew Morton 		};
10266e259e7dSAndrew Morton 		register_hotmemory_notifier(&node_memory_callback_nb);
10274faf8d95SLee Schermerhorn 	}
1028bde631a5SLee Schermerhorn 
1029bde631a5SLee Schermerhorn 	/*
1030bde631a5SLee Schermerhorn 	 * Note:  we're not going to unregister the node class if we fail
1031bde631a5SLee Schermerhorn 	 * to register the node state class attribute files.
1032bde631a5SLee Schermerhorn 	 */
1033bde631a5SLee Schermerhorn 	return ret;
10341da177e4SLinus Torvalds }
10351da177e4SLinus Torvalds postcore_initcall(register_node_type);
1036