1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 21da177e4SLinus Torvalds /* 310fbcf4cSKay Sievers * Basic Node interface support 41da177e4SLinus Torvalds */ 51da177e4SLinus Torvalds 61da177e4SLinus Torvalds #include <linux/module.h> 71da177e4SLinus Torvalds #include <linux/init.h> 81da177e4SLinus Torvalds #include <linux/mm.h> 9c04fc586SGary Hade #include <linux/memory.h> 10fa25c503SKOSAKI Motohiro #include <linux/vmstat.h> 116e259e7dSAndrew Morton #include <linux/notifier.h> 121da177e4SLinus Torvalds #include <linux/node.h> 131da177e4SLinus Torvalds #include <linux/hugetlb.h> 14ed4a6d7fSMel Gorman #include <linux/compaction.h> 151da177e4SLinus Torvalds #include <linux/cpumask.h> 161da177e4SLinus Torvalds #include <linux/topology.h> 171da177e4SLinus Torvalds #include <linux/nodemask.h> 1876b67ed9SKAMEZAWA Hiroyuki #include <linux/cpu.h> 19bde631a5SLee Schermerhorn #include <linux/device.h> 2008d9dbe7SKeith Busch #include <linux/pm_runtime.h> 21af936a16SLee Schermerhorn #include <linux/swap.h> 2218e5b539STejun Heo #include <linux/slab.h> 231da177e4SLinus Torvalds 2410fbcf4cSKay Sievers static struct bus_type node_subsys = { 25af5ca3f4SKay Sievers .name = "node", 2610fbcf4cSKay Sievers .dev_name = "node", 271da177e4SLinus Torvalds }; 281da177e4SLinus Torvalds 291da177e4SLinus Torvalds 305aaba363SSudeep Holla static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf) 311da177e4SLinus Torvalds { 32064f0e93SZhen Lei ssize_t n; 33064f0e93SZhen Lei cpumask_var_t mask; 341da177e4SLinus Torvalds struct node *node_dev = to_node(dev); 351da177e4SLinus Torvalds 3639106dcfSMike Travis /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */ 3739106dcfSMike Travis BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); 381da177e4SLinus Torvalds 39064f0e93SZhen Lei if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 40064f0e93SZhen Lei return 0; 41064f0e93SZhen Lei 42064f0e93SZhen Lei cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); 43064f0e93SZhen Lei n = cpumap_print_to_pagebuf(list, buf, mask); 44064f0e93SZhen Lei free_cpumask_var(mask); 45064f0e93SZhen Lei 46064f0e93SZhen Lei return n; 471da177e4SLinus Torvalds } 481da177e4SLinus Torvalds 4910fbcf4cSKay Sievers static inline ssize_t node_read_cpumask(struct device *dev, 5010fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 5139106dcfSMike Travis { 525aaba363SSudeep Holla return node_read_cpumap(dev, false, buf); 5339106dcfSMike Travis } 5410fbcf4cSKay Sievers static inline ssize_t node_read_cpulist(struct device *dev, 5510fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 5639106dcfSMike Travis { 575aaba363SSudeep Holla return node_read_cpumap(dev, true, buf); 5839106dcfSMike Travis } 5939106dcfSMike Travis 6010fbcf4cSKay Sievers static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); 6110fbcf4cSKay Sievers static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL); 621da177e4SLinus Torvalds 6308d9dbe7SKeith Busch /** 6408d9dbe7SKeith Busch * struct node_access_nodes - Access class device to hold user visible 6508d9dbe7SKeith Busch * relationships to other nodes. 6608d9dbe7SKeith Busch * @dev: Device for this memory access class 6708d9dbe7SKeith Busch * @list_node: List element in the node's access list 6808d9dbe7SKeith Busch * @access: The access class rank 6958cb346cSMauro Carvalho Chehab * @hmem_attrs: Heterogeneous memory performance attributes 7008d9dbe7SKeith Busch */ 7108d9dbe7SKeith Busch struct node_access_nodes { 7208d9dbe7SKeith Busch struct device dev; 7308d9dbe7SKeith Busch struct list_head list_node; 7408d9dbe7SKeith Busch unsigned access; 75e1cf33aaSKeith Busch #ifdef CONFIG_HMEM_REPORTING 76e1cf33aaSKeith Busch struct node_hmem_attrs hmem_attrs; 77e1cf33aaSKeith Busch #endif 7808d9dbe7SKeith Busch }; 7908d9dbe7SKeith Busch #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev) 8008d9dbe7SKeith Busch 8108d9dbe7SKeith Busch static struct attribute *node_init_access_node_attrs[] = { 8208d9dbe7SKeith Busch NULL, 8308d9dbe7SKeith Busch }; 8408d9dbe7SKeith Busch 8508d9dbe7SKeith Busch static struct attribute *node_targ_access_node_attrs[] = { 8608d9dbe7SKeith Busch NULL, 8708d9dbe7SKeith Busch }; 8808d9dbe7SKeith Busch 8908d9dbe7SKeith Busch static const struct attribute_group initiators = { 9008d9dbe7SKeith Busch .name = "initiators", 9108d9dbe7SKeith Busch .attrs = node_init_access_node_attrs, 9208d9dbe7SKeith Busch }; 9308d9dbe7SKeith Busch 9408d9dbe7SKeith Busch static const struct attribute_group targets = { 9508d9dbe7SKeith Busch .name = "targets", 9608d9dbe7SKeith Busch .attrs = node_targ_access_node_attrs, 9708d9dbe7SKeith Busch }; 9808d9dbe7SKeith Busch 9908d9dbe7SKeith Busch static const struct attribute_group *node_access_node_groups[] = { 10008d9dbe7SKeith Busch &initiators, 10108d9dbe7SKeith Busch &targets, 10208d9dbe7SKeith Busch NULL, 10308d9dbe7SKeith Busch }; 10408d9dbe7SKeith Busch 10508d9dbe7SKeith Busch static void node_remove_accesses(struct node *node) 10608d9dbe7SKeith Busch { 10708d9dbe7SKeith Busch struct node_access_nodes *c, *cnext; 10808d9dbe7SKeith Busch 10908d9dbe7SKeith Busch list_for_each_entry_safe(c, cnext, &node->access_list, list_node) { 11008d9dbe7SKeith Busch list_del(&c->list_node); 11108d9dbe7SKeith Busch device_unregister(&c->dev); 11208d9dbe7SKeith Busch } 11308d9dbe7SKeith Busch } 11408d9dbe7SKeith Busch 11508d9dbe7SKeith Busch static void node_access_release(struct device *dev) 11608d9dbe7SKeith Busch { 11708d9dbe7SKeith Busch kfree(to_access_nodes(dev)); 11808d9dbe7SKeith Busch } 11908d9dbe7SKeith Busch 12008d9dbe7SKeith Busch static struct node_access_nodes *node_init_node_access(struct node *node, 12108d9dbe7SKeith Busch unsigned access) 12208d9dbe7SKeith Busch { 12308d9dbe7SKeith Busch struct node_access_nodes *access_node; 12408d9dbe7SKeith Busch struct device *dev; 12508d9dbe7SKeith Busch 12608d9dbe7SKeith Busch list_for_each_entry(access_node, &node->access_list, list_node) 12708d9dbe7SKeith Busch if (access_node->access == access) 12808d9dbe7SKeith Busch return access_node; 12908d9dbe7SKeith Busch 13008d9dbe7SKeith Busch access_node = kzalloc(sizeof(*access_node), GFP_KERNEL); 13108d9dbe7SKeith Busch if (!access_node) 13208d9dbe7SKeith Busch return NULL; 13308d9dbe7SKeith Busch 13408d9dbe7SKeith Busch access_node->access = access; 13508d9dbe7SKeith Busch dev = &access_node->dev; 13608d9dbe7SKeith Busch dev->parent = &node->dev; 13708d9dbe7SKeith Busch dev->release = node_access_release; 13808d9dbe7SKeith Busch dev->groups = node_access_node_groups; 13908d9dbe7SKeith Busch if (dev_set_name(dev, "access%u", access)) 14008d9dbe7SKeith Busch goto free; 14108d9dbe7SKeith Busch 14208d9dbe7SKeith Busch if (device_register(dev)) 14308d9dbe7SKeith Busch goto free_name; 14408d9dbe7SKeith Busch 14508d9dbe7SKeith Busch pm_runtime_no_callbacks(dev); 14608d9dbe7SKeith Busch list_add_tail(&access_node->list_node, &node->access_list); 14708d9dbe7SKeith Busch return access_node; 14808d9dbe7SKeith Busch free_name: 14908d9dbe7SKeith Busch kfree_const(dev->kobj.name); 15008d9dbe7SKeith Busch free: 15108d9dbe7SKeith Busch kfree(access_node); 15208d9dbe7SKeith Busch return NULL; 15308d9dbe7SKeith Busch } 15408d9dbe7SKeith Busch 155e1cf33aaSKeith Busch #ifdef CONFIG_HMEM_REPORTING 156e1cf33aaSKeith Busch #define ACCESS_ATTR(name) \ 157e1cf33aaSKeith Busch static ssize_t name##_show(struct device *dev, \ 158e1cf33aaSKeith Busch struct device_attribute *attr, \ 159e1cf33aaSKeith Busch char *buf) \ 160e1cf33aaSKeith Busch { \ 161e1cf33aaSKeith Busch return sprintf(buf, "%u\n", to_access_nodes(dev)->hmem_attrs.name); \ 162e1cf33aaSKeith Busch } \ 163e1cf33aaSKeith Busch static DEVICE_ATTR_RO(name); 164e1cf33aaSKeith Busch 165e1cf33aaSKeith Busch ACCESS_ATTR(read_bandwidth) 166e1cf33aaSKeith Busch ACCESS_ATTR(read_latency) 167e1cf33aaSKeith Busch ACCESS_ATTR(write_bandwidth) 168e1cf33aaSKeith Busch ACCESS_ATTR(write_latency) 169e1cf33aaSKeith Busch 170e1cf33aaSKeith Busch static struct attribute *access_attrs[] = { 171e1cf33aaSKeith Busch &dev_attr_read_bandwidth.attr, 172e1cf33aaSKeith Busch &dev_attr_read_latency.attr, 173e1cf33aaSKeith Busch &dev_attr_write_bandwidth.attr, 174e1cf33aaSKeith Busch &dev_attr_write_latency.attr, 175e1cf33aaSKeith Busch NULL, 176e1cf33aaSKeith Busch }; 177e1cf33aaSKeith Busch 178e1cf33aaSKeith Busch /** 179e1cf33aaSKeith Busch * node_set_perf_attrs - Set the performance values for given access class 180e1cf33aaSKeith Busch * @nid: Node identifier to be set 181e1cf33aaSKeith Busch * @hmem_attrs: Heterogeneous memory performance attributes 182e1cf33aaSKeith Busch * @access: The access class the for the given attributes 183e1cf33aaSKeith Busch */ 184e1cf33aaSKeith Busch void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, 185e1cf33aaSKeith Busch unsigned access) 186e1cf33aaSKeith Busch { 187e1cf33aaSKeith Busch struct node_access_nodes *c; 188e1cf33aaSKeith Busch struct node *node; 189e1cf33aaSKeith Busch int i; 190e1cf33aaSKeith Busch 191e1cf33aaSKeith Busch if (WARN_ON_ONCE(!node_online(nid))) 192e1cf33aaSKeith Busch return; 193e1cf33aaSKeith Busch 194e1cf33aaSKeith Busch node = node_devices[nid]; 195e1cf33aaSKeith Busch c = node_init_node_access(node, access); 196e1cf33aaSKeith Busch if (!c) 197e1cf33aaSKeith Busch return; 198e1cf33aaSKeith Busch 199e1cf33aaSKeith Busch c->hmem_attrs = *hmem_attrs; 200e1cf33aaSKeith Busch for (i = 0; access_attrs[i] != NULL; i++) { 201e1cf33aaSKeith Busch if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i], 202e1cf33aaSKeith Busch "initiators")) { 203e1cf33aaSKeith Busch pr_info("failed to add performance attribute to node %d\n", 204e1cf33aaSKeith Busch nid); 205e1cf33aaSKeith Busch break; 206e1cf33aaSKeith Busch } 207e1cf33aaSKeith Busch } 208e1cf33aaSKeith Busch } 209acc02a10SKeith Busch 210acc02a10SKeith Busch /** 211acc02a10SKeith Busch * struct node_cache_info - Internal tracking for memory node caches 212acc02a10SKeith Busch * @dev: Device represeting the cache level 213acc02a10SKeith Busch * @node: List element for tracking in the node 214acc02a10SKeith Busch * @cache_attrs:Attributes for this cache level 215acc02a10SKeith Busch */ 216acc02a10SKeith Busch struct node_cache_info { 217acc02a10SKeith Busch struct device dev; 218acc02a10SKeith Busch struct list_head node; 219acc02a10SKeith Busch struct node_cache_attrs cache_attrs; 220acc02a10SKeith Busch }; 221acc02a10SKeith Busch #define to_cache_info(device) container_of(device, struct node_cache_info, dev) 222acc02a10SKeith Busch 223acc02a10SKeith Busch #define CACHE_ATTR(name, fmt) \ 224acc02a10SKeith Busch static ssize_t name##_show(struct device *dev, \ 225acc02a10SKeith Busch struct device_attribute *attr, \ 226acc02a10SKeith Busch char *buf) \ 227acc02a10SKeith Busch { \ 228acc02a10SKeith Busch return sprintf(buf, fmt "\n", to_cache_info(dev)->cache_attrs.name);\ 229acc02a10SKeith Busch } \ 230acc02a10SKeith Busch DEVICE_ATTR_RO(name); 231acc02a10SKeith Busch 232acc02a10SKeith Busch CACHE_ATTR(size, "%llu") 233acc02a10SKeith Busch CACHE_ATTR(line_size, "%u") 234acc02a10SKeith Busch CACHE_ATTR(indexing, "%u") 235acc02a10SKeith Busch CACHE_ATTR(write_policy, "%u") 236acc02a10SKeith Busch 237acc02a10SKeith Busch static struct attribute *cache_attrs[] = { 238acc02a10SKeith Busch &dev_attr_indexing.attr, 239acc02a10SKeith Busch &dev_attr_size.attr, 240acc02a10SKeith Busch &dev_attr_line_size.attr, 241acc02a10SKeith Busch &dev_attr_write_policy.attr, 242acc02a10SKeith Busch NULL, 243acc02a10SKeith Busch }; 244acc02a10SKeith Busch ATTRIBUTE_GROUPS(cache); 245acc02a10SKeith Busch 246acc02a10SKeith Busch static void node_cache_release(struct device *dev) 247acc02a10SKeith Busch { 248acc02a10SKeith Busch kfree(dev); 249acc02a10SKeith Busch } 250acc02a10SKeith Busch 251acc02a10SKeith Busch static void node_cacheinfo_release(struct device *dev) 252acc02a10SKeith Busch { 253acc02a10SKeith Busch struct node_cache_info *info = to_cache_info(dev); 254acc02a10SKeith Busch kfree(info); 255acc02a10SKeith Busch } 256acc02a10SKeith Busch 257acc02a10SKeith Busch static void node_init_cache_dev(struct node *node) 258acc02a10SKeith Busch { 259acc02a10SKeith Busch struct device *dev; 260acc02a10SKeith Busch 261acc02a10SKeith Busch dev = kzalloc(sizeof(*dev), GFP_KERNEL); 262acc02a10SKeith Busch if (!dev) 263acc02a10SKeith Busch return; 264acc02a10SKeith Busch 265acc02a10SKeith Busch dev->parent = &node->dev; 266acc02a10SKeith Busch dev->release = node_cache_release; 267acc02a10SKeith Busch if (dev_set_name(dev, "memory_side_cache")) 268acc02a10SKeith Busch goto free_dev; 269acc02a10SKeith Busch 270acc02a10SKeith Busch if (device_register(dev)) 271acc02a10SKeith Busch goto free_name; 272acc02a10SKeith Busch 273acc02a10SKeith Busch pm_runtime_no_callbacks(dev); 274acc02a10SKeith Busch node->cache_dev = dev; 275acc02a10SKeith Busch return; 276acc02a10SKeith Busch free_name: 277acc02a10SKeith Busch kfree_const(dev->kobj.name); 278acc02a10SKeith Busch free_dev: 279acc02a10SKeith Busch kfree(dev); 280acc02a10SKeith Busch } 281acc02a10SKeith Busch 282acc02a10SKeith Busch /** 283acc02a10SKeith Busch * node_add_cache() - add cache attribute to a memory node 284acc02a10SKeith Busch * @nid: Node identifier that has new cache attributes 285acc02a10SKeith Busch * @cache_attrs: Attributes for the cache being added 286acc02a10SKeith Busch */ 287acc02a10SKeith Busch void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) 288acc02a10SKeith Busch { 289acc02a10SKeith Busch struct node_cache_info *info; 290acc02a10SKeith Busch struct device *dev; 291acc02a10SKeith Busch struct node *node; 292acc02a10SKeith Busch 293acc02a10SKeith Busch if (!node_online(nid) || !node_devices[nid]) 294acc02a10SKeith Busch return; 295acc02a10SKeith Busch 296acc02a10SKeith Busch node = node_devices[nid]; 297acc02a10SKeith Busch list_for_each_entry(info, &node->cache_attrs, node) { 298acc02a10SKeith Busch if (info->cache_attrs.level == cache_attrs->level) { 299acc02a10SKeith Busch dev_warn(&node->dev, 300acc02a10SKeith Busch "attempt to add duplicate cache level:%d\n", 301acc02a10SKeith Busch cache_attrs->level); 302acc02a10SKeith Busch return; 303acc02a10SKeith Busch } 304acc02a10SKeith Busch } 305acc02a10SKeith Busch 306acc02a10SKeith Busch if (!node->cache_dev) 307acc02a10SKeith Busch node_init_cache_dev(node); 308acc02a10SKeith Busch if (!node->cache_dev) 309acc02a10SKeith Busch return; 310acc02a10SKeith Busch 311acc02a10SKeith Busch info = kzalloc(sizeof(*info), GFP_KERNEL); 312acc02a10SKeith Busch if (!info) 313acc02a10SKeith Busch return; 314acc02a10SKeith Busch 315acc02a10SKeith Busch dev = &info->dev; 316acc02a10SKeith Busch dev->parent = node->cache_dev; 317acc02a10SKeith Busch dev->release = node_cacheinfo_release; 318acc02a10SKeith Busch dev->groups = cache_groups; 319acc02a10SKeith Busch if (dev_set_name(dev, "index%d", cache_attrs->level)) 320acc02a10SKeith Busch goto free_cache; 321acc02a10SKeith Busch 322acc02a10SKeith Busch info->cache_attrs = *cache_attrs; 323acc02a10SKeith Busch if (device_register(dev)) { 324acc02a10SKeith Busch dev_warn(&node->dev, "failed to add cache level:%d\n", 325acc02a10SKeith Busch cache_attrs->level); 326acc02a10SKeith Busch goto free_name; 327acc02a10SKeith Busch } 328acc02a10SKeith Busch pm_runtime_no_callbacks(dev); 329acc02a10SKeith Busch list_add_tail(&info->node, &node->cache_attrs); 330acc02a10SKeith Busch return; 331acc02a10SKeith Busch free_name: 332acc02a10SKeith Busch kfree_const(dev->kobj.name); 333acc02a10SKeith Busch free_cache: 334acc02a10SKeith Busch kfree(info); 335acc02a10SKeith Busch } 336acc02a10SKeith Busch 337acc02a10SKeith Busch static void node_remove_caches(struct node *node) 338acc02a10SKeith Busch { 339acc02a10SKeith Busch struct node_cache_info *info, *next; 340acc02a10SKeith Busch 341acc02a10SKeith Busch if (!node->cache_dev) 342acc02a10SKeith Busch return; 343acc02a10SKeith Busch 344acc02a10SKeith Busch list_for_each_entry_safe(info, next, &node->cache_attrs, node) { 345acc02a10SKeith Busch list_del(&info->node); 346acc02a10SKeith Busch device_unregister(&info->dev); 347acc02a10SKeith Busch } 348acc02a10SKeith Busch device_unregister(node->cache_dev); 349acc02a10SKeith Busch } 350acc02a10SKeith Busch 351acc02a10SKeith Busch static void node_init_caches(unsigned int nid) 352acc02a10SKeith Busch { 353acc02a10SKeith Busch INIT_LIST_HEAD(&node_devices[nid]->cache_attrs); 354acc02a10SKeith Busch } 355acc02a10SKeith Busch #else 356acc02a10SKeith Busch static void node_init_caches(unsigned int nid) { } 357acc02a10SKeith Busch static void node_remove_caches(struct node *node) { } 358e1cf33aaSKeith Busch #endif 359e1cf33aaSKeith Busch 3601da177e4SLinus Torvalds #define K(x) ((x) << (PAGE_SHIFT - 10)) 36110fbcf4cSKay Sievers static ssize_t node_read_meminfo(struct device *dev, 36210fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 3631da177e4SLinus Torvalds { 3641da177e4SLinus Torvalds int n; 3651da177e4SLinus Torvalds int nid = dev->id; 366599d0c95SMel Gorman struct pglist_data *pgdat = NODE_DATA(nid); 3671da177e4SLinus Torvalds struct sysinfo i; 36861f94e18SVlastimil Babka unsigned long sreclaimable, sunreclaimable; 3691da177e4SLinus Torvalds 3701da177e4SLinus Torvalds si_meminfo_node(&i, nid); 37161f94e18SVlastimil Babka sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE); 37261f94e18SVlastimil Babka sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE); 3737ee92255SKOSAKI Motohiro n = sprintf(buf, 3741da177e4SLinus Torvalds "Node %d MemTotal: %8lu kB\n" 3751da177e4SLinus Torvalds "Node %d MemFree: %8lu kB\n" 3761da177e4SLinus Torvalds "Node %d MemUsed: %8lu kB\n" 3771da177e4SLinus Torvalds "Node %d Active: %8lu kB\n" 3781da177e4SLinus Torvalds "Node %d Inactive: %8lu kB\n" 3794f98a2feSRik van Riel "Node %d Active(anon): %8lu kB\n" 3804f98a2feSRik van Riel "Node %d Inactive(anon): %8lu kB\n" 3814f98a2feSRik van Riel "Node %d Active(file): %8lu kB\n" 3824f98a2feSRik van Riel "Node %d Inactive(file): %8lu kB\n" 3835344b7e6SNick Piggin "Node %d Unevictable: %8lu kB\n" 3847ee92255SKOSAKI Motohiro "Node %d Mlocked: %8lu kB\n", 3857ee92255SKOSAKI Motohiro nid, K(i.totalram), 3867ee92255SKOSAKI Motohiro nid, K(i.freeram), 3877ee92255SKOSAKI Motohiro nid, K(i.totalram - i.freeram), 388599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) + 389599d0c95SMel Gorman node_page_state(pgdat, NR_ACTIVE_FILE)), 390599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) + 391599d0c95SMel Gorman node_page_state(pgdat, NR_INACTIVE_FILE)), 392599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)), 393599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)), 394599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)), 395599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)), 396599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_UNEVICTABLE)), 39775ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_MLOCK))); 3987ee92255SKOSAKI Motohiro 399182e8e23SChristoph Lameter #ifdef CONFIG_HIGHMEM 4007ee92255SKOSAKI Motohiro n += sprintf(buf + n, 4011da177e4SLinus Torvalds "Node %d HighTotal: %8lu kB\n" 4021da177e4SLinus Torvalds "Node %d HighFree: %8lu kB\n" 4031da177e4SLinus Torvalds "Node %d LowTotal: %8lu kB\n" 4047ee92255SKOSAKI Motohiro "Node %d LowFree: %8lu kB\n", 4057ee92255SKOSAKI Motohiro nid, K(i.totalhigh), 4067ee92255SKOSAKI Motohiro nid, K(i.freehigh), 4077ee92255SKOSAKI Motohiro nid, K(i.totalram - i.totalhigh), 4087ee92255SKOSAKI Motohiro nid, K(i.freeram - i.freehigh)); 409182e8e23SChristoph Lameter #endif 4107ee92255SKOSAKI Motohiro n += sprintf(buf + n, 411c07e02dbSMartin Hicks "Node %d Dirty: %8lu kB\n" 412c07e02dbSMartin Hicks "Node %d Writeback: %8lu kB\n" 413347ce434SChristoph Lameter "Node %d FilePages: %8lu kB\n" 414c07e02dbSMartin Hicks "Node %d Mapped: %8lu kB\n" 415f3dbd344SChristoph Lameter "Node %d AnonPages: %8lu kB\n" 4164b02108aSKOSAKI Motohiro "Node %d Shmem: %8lu kB\n" 417c6a7f572SKOSAKI Motohiro "Node %d KernelStack: %8lu kB\n" 418628d06a4SSami Tolvanen #ifdef CONFIG_SHADOW_CALL_STACK 419628d06a4SSami Tolvanen "Node %d ShadowCallStack:%8lu kB\n" 420628d06a4SSami Tolvanen #endif 421df849a15SChristoph Lameter "Node %d PageTables: %8lu kB\n" 422f5ef68daSAndrew Morton "Node %d NFS_Unstable: %8lu kB\n" 423d2c5e30cSChristoph Lameter "Node %d Bounce: %8lu kB\n" 424fc3ba692SMiklos Szeredi "Node %d WritebackTmp: %8lu kB\n" 42561f94e18SVlastimil Babka "Node %d KReclaimable: %8lu kB\n" 426972d1a7bSChristoph Lameter "Node %d Slab: %8lu kB\n" 427972d1a7bSChristoph Lameter "Node %d SReclaimable: %8lu kB\n" 42805b258e9SDavid Rientjes "Node %d SUnreclaim: %8lu kB\n" 42905b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE 43005b258e9SDavid Rientjes "Node %d AnonHugePages: %8lu kB\n" 43165c45377SKirill A. Shutemov "Node %d ShmemHugePages: %8lu kB\n" 43265c45377SKirill A. Shutemov "Node %d ShmemPmdMapped: %8lu kB\n" 43360fbf0abSSong Liu "Node %d FileHugePages: %8lu kB\n" 43460fbf0abSSong Liu "Node %d FilePmdMapped: %8lu kB\n" 43505b258e9SDavid Rientjes #endif 43605b258e9SDavid Rientjes , 43711fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_FILE_DIRTY)), 43811fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_WRITEBACK)), 43911fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_FILE_PAGES)), 44050658e2eSMel Gorman nid, K(node_page_state(pgdat, NR_FILE_MAPPED)), 4414b9d0fabSMel Gorman nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), 442cc7452b6SRafael Aquini nid, K(i.sharedram), 443d30dd8beSAndy Lutomirski nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), 444628d06a4SSami Tolvanen #ifdef CONFIG_SHADOW_CALL_STACK 445628d06a4SSami Tolvanen nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB), 446628d06a4SSami Tolvanen #endif 44775ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), 44811fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), 44975ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), 45011fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), 45161f94e18SVlastimil Babka nid, K(sreclaimable + 45261f94e18SVlastimil Babka node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)), 45361f94e18SVlastimil Babka nid, K(sreclaimable + sunreclaimable), 45461f94e18SVlastimil Babka nid, K(sreclaimable), 45561f94e18SVlastimil Babka nid, K(sunreclaimable) 45605b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE 45761f94e18SVlastimil Babka , 45811fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_ANON_THPS) * 45965c45377SKirill A. Shutemov HPAGE_PMD_NR), 46011fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_SHMEM_THPS) * 46165c45377SKirill A. Shutemov HPAGE_PMD_NR), 46211fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * 46360fbf0abSSong Liu HPAGE_PMD_NR), 46460fbf0abSSong Liu nid, K(node_page_state(pgdat, NR_FILE_THPS) * 46560fbf0abSSong Liu HPAGE_PMD_NR), 46660fbf0abSSong Liu nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED) * 46761f94e18SVlastimil Babka HPAGE_PMD_NR) 46805b258e9SDavid Rientjes #endif 46961f94e18SVlastimil Babka ); 4701da177e4SLinus Torvalds n += hugetlb_report_node_meminfo(nid, buf + n); 4711da177e4SLinus Torvalds return n; 4721da177e4SLinus Torvalds } 4731da177e4SLinus Torvalds 4741da177e4SLinus Torvalds #undef K 47510fbcf4cSKay Sievers static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); 4761da177e4SLinus Torvalds 47710fbcf4cSKay Sievers static ssize_t node_read_numastat(struct device *dev, 47810fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 4791da177e4SLinus Torvalds { 4801da177e4SLinus Torvalds return sprintf(buf, 4811da177e4SLinus Torvalds "numa_hit %lu\n" 4821da177e4SLinus Torvalds "numa_miss %lu\n" 4831da177e4SLinus Torvalds "numa_foreign %lu\n" 4841da177e4SLinus Torvalds "interleave_hit %lu\n" 4851da177e4SLinus Torvalds "local_node %lu\n" 4861da177e4SLinus Torvalds "other_node %lu\n", 4873a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_HIT), 4883a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_MISS), 4893a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_FOREIGN), 4903a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT), 4913a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_LOCAL), 4923a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_OTHER)); 4931da177e4SLinus Torvalds } 49410fbcf4cSKay Sievers static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); 4951da177e4SLinus Torvalds 49610fbcf4cSKay Sievers static ssize_t node_read_vmstat(struct device *dev, 49710fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 4982ac39037SMichael Rubin { 4992ac39037SMichael Rubin int nid = dev->id; 50075ef7184SMel Gorman struct pglist_data *pgdat = NODE_DATA(nid); 501fa25c503SKOSAKI Motohiro int i; 502fa25c503SKOSAKI Motohiro int n = 0; 503fa25c503SKOSAKI Motohiro 504fa25c503SKOSAKI Motohiro for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 5059d7ea9a2SKonstantin Khlebnikov n += sprintf(buf+n, "%s %lu\n", zone_stat_name(i), 50675ef7184SMel Gorman sum_zone_node_page_state(nid, i)); 50775ef7184SMel Gorman 5083a321d2aSKemi Wang #ifdef CONFIG_NUMA 5093a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 5109d7ea9a2SKonstantin Khlebnikov n += sprintf(buf+n, "%s %lu\n", numa_stat_name(i), 5113a321d2aSKemi Wang sum_zone_numa_state(nid, i)); 5123a321d2aSKemi Wang #endif 5133a321d2aSKemi Wang 5143a321d2aSKemi Wang for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 5159d7ea9a2SKonstantin Khlebnikov n += sprintf(buf+n, "%s %lu\n", node_stat_name(i), 51675ef7184SMel Gorman node_page_state(pgdat, i)); 517fa25c503SKOSAKI Motohiro 518fa25c503SKOSAKI Motohiro return n; 5192ac39037SMichael Rubin } 52010fbcf4cSKay Sievers static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL); 5212ac39037SMichael Rubin 52210fbcf4cSKay Sievers static ssize_t node_read_distance(struct device *dev, 52310fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 5241da177e4SLinus Torvalds { 5251da177e4SLinus Torvalds int nid = dev->id; 5261da177e4SLinus Torvalds int len = 0; 5271da177e4SLinus Torvalds int i; 5281da177e4SLinus Torvalds 52912ee3c0aSDavid Rientjes /* 53012ee3c0aSDavid Rientjes * buf is currently PAGE_SIZE in length and each node needs 4 chars 53112ee3c0aSDavid Rientjes * at the most (distance + space or newline). 53212ee3c0aSDavid Rientjes */ 53312ee3c0aSDavid Rientjes BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE); 5341da177e4SLinus Torvalds 5351da177e4SLinus Torvalds for_each_online_node(i) 5361da177e4SLinus Torvalds len += sprintf(buf + len, "%s%d", i ? " " : "", node_distance(nid, i)); 5371da177e4SLinus Torvalds 5381da177e4SLinus Torvalds len += sprintf(buf + len, "\n"); 5391da177e4SLinus Torvalds return len; 5401da177e4SLinus Torvalds } 54110fbcf4cSKay Sievers static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL); 5421da177e4SLinus Torvalds 5433c9b8aafSTakashi Iwai static struct attribute *node_dev_attrs[] = { 5443c9b8aafSTakashi Iwai &dev_attr_cpumap.attr, 5453c9b8aafSTakashi Iwai &dev_attr_cpulist.attr, 5463c9b8aafSTakashi Iwai &dev_attr_meminfo.attr, 5473c9b8aafSTakashi Iwai &dev_attr_numastat.attr, 5483c9b8aafSTakashi Iwai &dev_attr_distance.attr, 5493c9b8aafSTakashi Iwai &dev_attr_vmstat.attr, 5503c9b8aafSTakashi Iwai NULL 5513c9b8aafSTakashi Iwai }; 5527ca7ec40SGreg Kroah-Hartman ATTRIBUTE_GROUPS(node_dev); 5533c9b8aafSTakashi Iwai 5549a305230SLee Schermerhorn #ifdef CONFIG_HUGETLBFS 5559a305230SLee Schermerhorn /* 5569a305230SLee Schermerhorn * hugetlbfs per node attributes registration interface: 5579a305230SLee Schermerhorn * When/if hugetlb[fs] subsystem initializes [sometime after this module], 5584faf8d95SLee Schermerhorn * it will register its per node attributes for all online nodes with 5594faf8d95SLee Schermerhorn * memory. It will also call register_hugetlbfs_with_node(), below, to 5609a305230SLee Schermerhorn * register its attribute registration functions with this node driver. 5619a305230SLee Schermerhorn * Once these hooks have been initialized, the node driver will call into 5629a305230SLee Schermerhorn * the hugetlb module to [un]register attributes for hot-plugged nodes. 5639a305230SLee Schermerhorn */ 5649a305230SLee Schermerhorn static node_registration_func_t __hugetlb_register_node; 5659a305230SLee Schermerhorn static node_registration_func_t __hugetlb_unregister_node; 5669a305230SLee Schermerhorn 56739da08cbSLee Schermerhorn static inline bool hugetlb_register_node(struct node *node) 5689a305230SLee Schermerhorn { 5694faf8d95SLee Schermerhorn if (__hugetlb_register_node && 5708cebfcd0SLai Jiangshan node_state(node->dev.id, N_MEMORY)) { 5719a305230SLee Schermerhorn __hugetlb_register_node(node); 57239da08cbSLee Schermerhorn return true; 57339da08cbSLee Schermerhorn } 57439da08cbSLee Schermerhorn return false; 5759a305230SLee Schermerhorn } 5769a305230SLee Schermerhorn 5779a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node) 5789a305230SLee Schermerhorn { 5799a305230SLee Schermerhorn if (__hugetlb_unregister_node) 5809a305230SLee Schermerhorn __hugetlb_unregister_node(node); 5819a305230SLee Schermerhorn } 5829a305230SLee Schermerhorn 5839a305230SLee Schermerhorn void register_hugetlbfs_with_node(node_registration_func_t doregister, 5849a305230SLee Schermerhorn node_registration_func_t unregister) 5859a305230SLee Schermerhorn { 5869a305230SLee Schermerhorn __hugetlb_register_node = doregister; 5879a305230SLee Schermerhorn __hugetlb_unregister_node = unregister; 5889a305230SLee Schermerhorn } 5899a305230SLee Schermerhorn #else 5909a305230SLee Schermerhorn static inline void hugetlb_register_node(struct node *node) {} 5919a305230SLee Schermerhorn 5929a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node) {} 5939a305230SLee Schermerhorn #endif 5949a305230SLee Schermerhorn 5958c7b5b4eSYasuaki Ishimatsu static void node_device_release(struct device *dev) 5968c7b5b4eSYasuaki Ishimatsu { 5978c7b5b4eSYasuaki Ishimatsu struct node *node = to_node(dev); 5988c7b5b4eSYasuaki Ishimatsu 5998c7b5b4eSYasuaki Ishimatsu #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) 6008c7b5b4eSYasuaki Ishimatsu /* 6018c7b5b4eSYasuaki Ishimatsu * We schedule the work only when a memory section is 6028c7b5b4eSYasuaki Ishimatsu * onlined/offlined on this node. When we come here, 6038c7b5b4eSYasuaki Ishimatsu * all the memory on this node has been offlined, 6048c7b5b4eSYasuaki Ishimatsu * so we won't enqueue new work to this work. 6058c7b5b4eSYasuaki Ishimatsu * 6068c7b5b4eSYasuaki Ishimatsu * The work is using node->node_work, so we should 6078c7b5b4eSYasuaki Ishimatsu * flush work before freeing the memory. 6088c7b5b4eSYasuaki Ishimatsu */ 6098c7b5b4eSYasuaki Ishimatsu flush_work(&node->node_work); 6108c7b5b4eSYasuaki Ishimatsu #endif 6118c7b5b4eSYasuaki Ishimatsu kfree(node); 6128c7b5b4eSYasuaki Ishimatsu } 6131da177e4SLinus Torvalds 6141da177e4SLinus Torvalds /* 615405ae7d3SRobert P. J. Day * register_node - Setup a sysfs device for a node. 6161da177e4SLinus Torvalds * @num - Node number to use when creating the device. 6171da177e4SLinus Torvalds * 6181da177e4SLinus Torvalds * Initialize and register the node device. 6191da177e4SLinus Torvalds */ 620a7be6e5aSDou Liyang static int register_node(struct node *node, int num) 6211da177e4SLinus Torvalds { 6221da177e4SLinus Torvalds int error; 6231da177e4SLinus Torvalds 62410fbcf4cSKay Sievers node->dev.id = num; 62510fbcf4cSKay Sievers node->dev.bus = &node_subsys; 6268c7b5b4eSYasuaki Ishimatsu node->dev.release = node_device_release; 6277ca7ec40SGreg Kroah-Hartman node->dev.groups = node_dev_groups; 62810fbcf4cSKay Sievers error = device_register(&node->dev); 6291da177e4SLinus Torvalds 630c1cc0d51SArvind Yadav if (error) 631c1cc0d51SArvind Yadav put_device(&node->dev); 632c1cc0d51SArvind Yadav else { 6339a305230SLee Schermerhorn hugetlb_register_node(node); 634ed4a6d7fSMel Gorman 635ed4a6d7fSMel Gorman compaction_register_node(node); 6361da177e4SLinus Torvalds } 6371da177e4SLinus Torvalds return error; 6381da177e4SLinus Torvalds } 6391da177e4SLinus Torvalds 6404b45099bSKeiichiro Tokunaga /** 6414b45099bSKeiichiro Tokunaga * unregister_node - unregister a node device 6424b45099bSKeiichiro Tokunaga * @node: node going away 6434b45099bSKeiichiro Tokunaga * 6444b45099bSKeiichiro Tokunaga * Unregisters a node device @node. All the devices on the node must be 6454b45099bSKeiichiro Tokunaga * unregistered before calling this function. 6464b45099bSKeiichiro Tokunaga */ 6474b45099bSKeiichiro Tokunaga void unregister_node(struct node *node) 6484b45099bSKeiichiro Tokunaga { 6494faf8d95SLee Schermerhorn hugetlb_unregister_node(node); /* no-op, if memoryless node */ 65008d9dbe7SKeith Busch node_remove_accesses(node); 651acc02a10SKeith Busch node_remove_caches(node); 65210fbcf4cSKay Sievers device_unregister(&node->dev); 6534b45099bSKeiichiro Tokunaga } 6544b45099bSKeiichiro Tokunaga 6558732794bSWen Congyang struct node *node_devices[MAX_NUMNODES]; 6560fc44159SYasunori Goto 65776b67ed9SKAMEZAWA Hiroyuki /* 65876b67ed9SKAMEZAWA Hiroyuki * register cpu under node 65976b67ed9SKAMEZAWA Hiroyuki */ 66076b67ed9SKAMEZAWA Hiroyuki int register_cpu_under_node(unsigned int cpu, unsigned int nid) 66176b67ed9SKAMEZAWA Hiroyuki { 6621830794aSAlex Chiang int ret; 6638a25a2fdSKay Sievers struct device *obj; 664f8246f31SAlex Chiang 665f8246f31SAlex Chiang if (!node_online(nid)) 666f8246f31SAlex Chiang return 0; 667f8246f31SAlex Chiang 6688a25a2fdSKay Sievers obj = get_cpu_device(cpu); 66976b67ed9SKAMEZAWA Hiroyuki if (!obj) 67076b67ed9SKAMEZAWA Hiroyuki return 0; 671f8246f31SAlex Chiang 6728732794bSWen Congyang ret = sysfs_create_link(&node_devices[nid]->dev.kobj, 67376b67ed9SKAMEZAWA Hiroyuki &obj->kobj, 67476b67ed9SKAMEZAWA Hiroyuki kobject_name(&obj->kobj)); 6751830794aSAlex Chiang if (ret) 6761830794aSAlex Chiang return ret; 6771830794aSAlex Chiang 6781830794aSAlex Chiang return sysfs_create_link(&obj->kobj, 6798732794bSWen Congyang &node_devices[nid]->dev.kobj, 6808732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 68176b67ed9SKAMEZAWA Hiroyuki } 68276b67ed9SKAMEZAWA Hiroyuki 68308d9dbe7SKeith Busch /** 68408d9dbe7SKeith Busch * register_memory_node_under_compute_node - link memory node to its compute 68508d9dbe7SKeith Busch * node for a given access class. 68658cb346cSMauro Carvalho Chehab * @mem_nid: Memory node number 68758cb346cSMauro Carvalho Chehab * @cpu_nid: Cpu node number 68808d9dbe7SKeith Busch * @access: Access class to register 68908d9dbe7SKeith Busch * 69008d9dbe7SKeith Busch * Description: 69108d9dbe7SKeith Busch * For use with platforms that may have separate memory and compute nodes. 69208d9dbe7SKeith Busch * This function will export node relationships linking which memory 69308d9dbe7SKeith Busch * initiator nodes can access memory targets at a given ranked access 69408d9dbe7SKeith Busch * class. 69508d9dbe7SKeith Busch */ 69608d9dbe7SKeith Busch int register_memory_node_under_compute_node(unsigned int mem_nid, 69708d9dbe7SKeith Busch unsigned int cpu_nid, 69808d9dbe7SKeith Busch unsigned access) 69908d9dbe7SKeith Busch { 70008d9dbe7SKeith Busch struct node *init_node, *targ_node; 70108d9dbe7SKeith Busch struct node_access_nodes *initiator, *target; 70208d9dbe7SKeith Busch int ret; 70308d9dbe7SKeith Busch 70408d9dbe7SKeith Busch if (!node_online(cpu_nid) || !node_online(mem_nid)) 70508d9dbe7SKeith Busch return -ENODEV; 70608d9dbe7SKeith Busch 70708d9dbe7SKeith Busch init_node = node_devices[cpu_nid]; 70808d9dbe7SKeith Busch targ_node = node_devices[mem_nid]; 70908d9dbe7SKeith Busch initiator = node_init_node_access(init_node, access); 71008d9dbe7SKeith Busch target = node_init_node_access(targ_node, access); 71108d9dbe7SKeith Busch if (!initiator || !target) 71208d9dbe7SKeith Busch return -ENOMEM; 71308d9dbe7SKeith Busch 71408d9dbe7SKeith Busch ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets", 71508d9dbe7SKeith Busch &targ_node->dev.kobj, 71608d9dbe7SKeith Busch dev_name(&targ_node->dev)); 71708d9dbe7SKeith Busch if (ret) 71808d9dbe7SKeith Busch return ret; 71908d9dbe7SKeith Busch 72008d9dbe7SKeith Busch ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators", 72108d9dbe7SKeith Busch &init_node->dev.kobj, 72208d9dbe7SKeith Busch dev_name(&init_node->dev)); 72308d9dbe7SKeith Busch if (ret) 72408d9dbe7SKeith Busch goto err; 72508d9dbe7SKeith Busch 72608d9dbe7SKeith Busch return 0; 72708d9dbe7SKeith Busch err: 72808d9dbe7SKeith Busch sysfs_remove_link_from_group(&initiator->dev.kobj, "targets", 72908d9dbe7SKeith Busch dev_name(&targ_node->dev)); 73008d9dbe7SKeith Busch return ret; 73108d9dbe7SKeith Busch } 73208d9dbe7SKeith Busch 73376b67ed9SKAMEZAWA Hiroyuki int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) 73476b67ed9SKAMEZAWA Hiroyuki { 7358a25a2fdSKay Sievers struct device *obj; 736b9d52dadSAlex Chiang 737b9d52dadSAlex Chiang if (!node_online(nid)) 738b9d52dadSAlex Chiang return 0; 739b9d52dadSAlex Chiang 7408a25a2fdSKay Sievers obj = get_cpu_device(cpu); 741b9d52dadSAlex Chiang if (!obj) 742b9d52dadSAlex Chiang return 0; 743b9d52dadSAlex Chiang 7448732794bSWen Congyang sysfs_remove_link(&node_devices[nid]->dev.kobj, 74576b67ed9SKAMEZAWA Hiroyuki kobject_name(&obj->kobj)); 7461830794aSAlex Chiang sysfs_remove_link(&obj->kobj, 7478732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 748b9d52dadSAlex Chiang 74976b67ed9SKAMEZAWA Hiroyuki return 0; 75076b67ed9SKAMEZAWA Hiroyuki } 75176b67ed9SKAMEZAWA Hiroyuki 752c04fc586SGary Hade #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 753bd721ea7SFabian Frederick static int __ref get_nid_for_pfn(unsigned long pfn) 754c04fc586SGary Hade { 755c04fc586SGary Hade if (!pfn_valid_within(pfn)) 756c04fc586SGary Hade return -1; 7573a80a7faSMel Gorman #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 7588cdde385SThomas Gleixner if (system_state < SYSTEM_RUNNING) 7593a80a7faSMel Gorman return early_pfn_to_nid(pfn); 7603a80a7faSMel Gorman #endif 761c04fc586SGary Hade return pfn_to_nid(pfn); 762c04fc586SGary Hade } 763c04fc586SGary Hade 764c04fc586SGary Hade /* register memory section under specified node if it spans that node */ 7658d595c4cSDavid Hildenbrand static int register_mem_sect_under_node(struct memory_block *mem_blk, 7668d595c4cSDavid Hildenbrand void *arg) 767c04fc586SGary Hade { 768b6c88d3bSDavid Hildenbrand unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE; 769b6c88d3bSDavid Hildenbrand unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); 770b6c88d3bSDavid Hildenbrand unsigned long end_pfn = start_pfn + memory_block_pfns - 1; 7714fbce633SOscar Salvador int ret, nid = *(int *)arg; 772b6c88d3bSDavid Hildenbrand unsigned long pfn; 773c04fc586SGary Hade 774b6c88d3bSDavid Hildenbrand for (pfn = start_pfn; pfn <= end_pfn; pfn++) { 775c04fc586SGary Hade int page_nid; 776c04fc586SGary Hade 77704697858SYinghai Lu /* 77804697858SYinghai Lu * memory block could have several absent sections from start. 77904697858SYinghai Lu * skip pfn range from absent section 78004697858SYinghai Lu */ 781e03d1f78SPingfan Liu if (!pfn_in_present_section(pfn)) { 78204697858SYinghai Lu pfn = round_down(pfn + PAGES_PER_SECTION, 78304697858SYinghai Lu PAGES_PER_SECTION) - 1; 78404697858SYinghai Lu continue; 78504697858SYinghai Lu } 78604697858SYinghai Lu 787fc44f7f9SPavel Tatashin /* 788fc44f7f9SPavel Tatashin * We need to check if page belongs to nid only for the boot 789fc44f7f9SPavel Tatashin * case, during hotplug we know that all pages in the memory 790fc44f7f9SPavel Tatashin * block belong to the same node. 791fc44f7f9SPavel Tatashin */ 7924fbce633SOscar Salvador if (system_state == SYSTEM_BOOTING) { 793c04fc586SGary Hade page_nid = get_nid_for_pfn(pfn); 794c04fc586SGary Hade if (page_nid < 0) 795c04fc586SGary Hade continue; 796c04fc586SGary Hade if (page_nid != nid) 797c04fc586SGary Hade continue; 798fc44f7f9SPavel Tatashin } 799d84f2f5aSDavid Hildenbrand 800d84f2f5aSDavid Hildenbrand /* 801d84f2f5aSDavid Hildenbrand * If this memory block spans multiple nodes, we only indicate 802d84f2f5aSDavid Hildenbrand * the last processed node. 803d84f2f5aSDavid Hildenbrand */ 804d84f2f5aSDavid Hildenbrand mem_blk->nid = nid; 805d84f2f5aSDavid Hildenbrand 8068732794bSWen Congyang ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, 80710fbcf4cSKay Sievers &mem_blk->dev.kobj, 80810fbcf4cSKay Sievers kobject_name(&mem_blk->dev.kobj)); 809dee5d0d5SAlex Chiang if (ret) 810dee5d0d5SAlex Chiang return ret; 811dee5d0d5SAlex Chiang 81210fbcf4cSKay Sievers return sysfs_create_link_nowarn(&mem_blk->dev.kobj, 8138732794bSWen Congyang &node_devices[nid]->dev.kobj, 8148732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 815c04fc586SGary Hade } 816c04fc586SGary Hade /* mem section does not span the specified node */ 817c04fc586SGary Hade return 0; 818c04fc586SGary Hade } 819c04fc586SGary Hade 8204c4b7f9bSDavid Hildenbrand /* 821d84f2f5aSDavid Hildenbrand * Unregister a memory block device under the node it spans. Memory blocks 822d84f2f5aSDavid Hildenbrand * with multiple nodes cannot be offlined and therefore also never be removed. 8234c4b7f9bSDavid Hildenbrand */ 824a31b264cSDavid Hildenbrand void unregister_memory_block_under_nodes(struct memory_block *mem_blk) 825c04fc586SGary Hade { 826d84f2f5aSDavid Hildenbrand if (mem_blk->nid == NUMA_NO_NODE) 827d84f2f5aSDavid Hildenbrand return; 828c04fc586SGary Hade 829d84f2f5aSDavid Hildenbrand sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj, 83010fbcf4cSKay Sievers kobject_name(&mem_blk->dev.kobj)); 83110fbcf4cSKay Sievers sysfs_remove_link(&mem_blk->dev.kobj, 832d84f2f5aSDavid Hildenbrand kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); 833c04fc586SGary Hade } 834c04fc586SGary Hade 8354fbce633SOscar Salvador int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) 836c04fc586SGary Hade { 837fbcf73ceSDavid Hildenbrand return walk_memory_blocks(PFN_PHYS(start_pfn), 838fbcf73ceSDavid Hildenbrand PFN_PHYS(end_pfn - start_pfn), (void *)&nid, 8394fbce633SOscar Salvador register_mem_sect_under_node); 840c04fc586SGary Hade } 8414faf8d95SLee Schermerhorn 84239da08cbSLee Schermerhorn #ifdef CONFIG_HUGETLBFS 8434faf8d95SLee Schermerhorn /* 8444faf8d95SLee Schermerhorn * Handle per node hstate attribute [un]registration on transistions 8454faf8d95SLee Schermerhorn * to/from memoryless state. 8464faf8d95SLee Schermerhorn */ 84739da08cbSLee Schermerhorn static void node_hugetlb_work(struct work_struct *work) 84839da08cbSLee Schermerhorn { 84939da08cbSLee Schermerhorn struct node *node = container_of(work, struct node, node_work); 85039da08cbSLee Schermerhorn 85139da08cbSLee Schermerhorn /* 85239da08cbSLee Schermerhorn * We only get here when a node transitions to/from memoryless state. 85339da08cbSLee Schermerhorn * We can detect which transition occurred by examining whether the 85439da08cbSLee Schermerhorn * node has memory now. hugetlb_register_node() already check this 85539da08cbSLee Schermerhorn * so we try to register the attributes. If that fails, then the 85639da08cbSLee Schermerhorn * node has transitioned to memoryless, try to unregister the 85739da08cbSLee Schermerhorn * attributes. 85839da08cbSLee Schermerhorn */ 85939da08cbSLee Schermerhorn if (!hugetlb_register_node(node)) 86039da08cbSLee Schermerhorn hugetlb_unregister_node(node); 86139da08cbSLee Schermerhorn } 86239da08cbSLee Schermerhorn 86339da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid) 86439da08cbSLee Schermerhorn { 8658732794bSWen Congyang INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work); 86639da08cbSLee Schermerhorn } 8674faf8d95SLee Schermerhorn 8684faf8d95SLee Schermerhorn static int node_memory_callback(struct notifier_block *self, 8694faf8d95SLee Schermerhorn unsigned long action, void *arg) 8704faf8d95SLee Schermerhorn { 8714faf8d95SLee Schermerhorn struct memory_notify *mnb = arg; 8724faf8d95SLee Schermerhorn int nid = mnb->status_change_nid; 8734faf8d95SLee Schermerhorn 8744faf8d95SLee Schermerhorn switch (action) { 87539da08cbSLee Schermerhorn case MEM_ONLINE: 87639da08cbSLee Schermerhorn case MEM_OFFLINE: 87739da08cbSLee Schermerhorn /* 87839da08cbSLee Schermerhorn * offload per node hstate [un]registration to a work thread 87939da08cbSLee Schermerhorn * when transitioning to/from memoryless state. 88039da08cbSLee Schermerhorn */ 8814faf8d95SLee Schermerhorn if (nid != NUMA_NO_NODE) 8828732794bSWen Congyang schedule_work(&node_devices[nid]->node_work); 8834faf8d95SLee Schermerhorn break; 88439da08cbSLee Schermerhorn 8854faf8d95SLee Schermerhorn case MEM_GOING_ONLINE: 8864faf8d95SLee Schermerhorn case MEM_GOING_OFFLINE: 8874faf8d95SLee Schermerhorn case MEM_CANCEL_ONLINE: 8884faf8d95SLee Schermerhorn case MEM_CANCEL_OFFLINE: 8894faf8d95SLee Schermerhorn default: 8904faf8d95SLee Schermerhorn break; 8914faf8d95SLee Schermerhorn } 8924faf8d95SLee Schermerhorn 8934faf8d95SLee Schermerhorn return NOTIFY_OK; 8944faf8d95SLee Schermerhorn } 89539da08cbSLee Schermerhorn #endif /* CONFIG_HUGETLBFS */ 89639da08cbSLee Schermerhorn #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 89739da08cbSLee Schermerhorn 89839da08cbSLee Schermerhorn #if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \ 89939da08cbSLee Schermerhorn !defined(CONFIG_HUGETLBFS) 9004faf8d95SLee Schermerhorn static inline int node_memory_callback(struct notifier_block *self, 9014faf8d95SLee Schermerhorn unsigned long action, void *arg) 9024faf8d95SLee Schermerhorn { 9034faf8d95SLee Schermerhorn return NOTIFY_OK; 9044faf8d95SLee Schermerhorn } 90539da08cbSLee Schermerhorn 90639da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid) { } 90739da08cbSLee Schermerhorn 90839da08cbSLee Schermerhorn #endif 909c04fc586SGary Hade 9109037a993SMichal Hocko int __register_one_node(int nid) 9110fc44159SYasunori Goto { 9129037a993SMichal Hocko int error; 9139037a993SMichal Hocko int cpu; 9140fc44159SYasunori Goto 9158732794bSWen Congyang node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL); 9168732794bSWen Congyang if (!node_devices[nid]) 9178732794bSWen Congyang return -ENOMEM; 9188732794bSWen Congyang 919a7be6e5aSDou Liyang error = register_node(node_devices[nid], nid); 92076b67ed9SKAMEZAWA Hiroyuki 92176b67ed9SKAMEZAWA Hiroyuki /* link cpu under this node */ 92276b67ed9SKAMEZAWA Hiroyuki for_each_present_cpu(cpu) { 92376b67ed9SKAMEZAWA Hiroyuki if (cpu_to_node(cpu) == nid) 92476b67ed9SKAMEZAWA Hiroyuki register_cpu_under_node(cpu, nid); 92576b67ed9SKAMEZAWA Hiroyuki } 926c04fc586SGary Hade 92708d9dbe7SKeith Busch INIT_LIST_HEAD(&node_devices[nid]->access_list); 92839da08cbSLee Schermerhorn /* initialize work queue for memory hot plug */ 92939da08cbSLee Schermerhorn init_node_hugetlb_work(nid); 930acc02a10SKeith Busch node_init_caches(nid); 9310fc44159SYasunori Goto 9320fc44159SYasunori Goto return error; 9330fc44159SYasunori Goto } 9340fc44159SYasunori Goto 9350fc44159SYasunori Goto void unregister_one_node(int nid) 9360fc44159SYasunori Goto { 93792d585efSXishi Qiu if (!node_devices[nid]) 93892d585efSXishi Qiu return; 93992d585efSXishi Qiu 9408732794bSWen Congyang unregister_node(node_devices[nid]); 9418732794bSWen Congyang node_devices[nid] = NULL; 9420fc44159SYasunori Goto } 9430fc44159SYasunori Goto 944bde631a5SLee Schermerhorn /* 945bde631a5SLee Schermerhorn * node states attributes 946bde631a5SLee Schermerhorn */ 947bde631a5SLee Schermerhorn 948bde631a5SLee Schermerhorn static ssize_t print_nodes_state(enum node_states state, char *buf) 949bde631a5SLee Schermerhorn { 950bde631a5SLee Schermerhorn int n; 951bde631a5SLee Schermerhorn 952f799b1a7STejun Heo n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", 953f799b1a7STejun Heo nodemask_pr_args(&node_states[state])); 954f6238818SRyota Ozaki buf[n++] = '\n'; 955f6238818SRyota Ozaki buf[n] = '\0'; 956bde631a5SLee Schermerhorn return n; 957bde631a5SLee Schermerhorn } 958bde631a5SLee Schermerhorn 959b15f562fSAndi Kleen struct node_attr { 96010fbcf4cSKay Sievers struct device_attribute attr; 961b15f562fSAndi Kleen enum node_states state; 962b15f562fSAndi Kleen }; 963b15f562fSAndi Kleen 96410fbcf4cSKay Sievers static ssize_t show_node_state(struct device *dev, 96510fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 966bde631a5SLee Schermerhorn { 967b15f562fSAndi Kleen struct node_attr *na = container_of(attr, struct node_attr, attr); 968b15f562fSAndi Kleen return print_nodes_state(na->state, buf); 969bde631a5SLee Schermerhorn } 970bde631a5SLee Schermerhorn 971b15f562fSAndi Kleen #define _NODE_ATTR(name, state) \ 97210fbcf4cSKay Sievers { __ATTR(name, 0444, show_node_state, NULL), state } 973bde631a5SLee Schermerhorn 974b15f562fSAndi Kleen static struct node_attr node_state_attr[] = { 975fcf07d22SLai Jiangshan [N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE), 976fcf07d22SLai Jiangshan [N_ONLINE] = _NODE_ATTR(online, N_ONLINE), 977fcf07d22SLai Jiangshan [N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY), 978bde631a5SLee Schermerhorn #ifdef CONFIG_HIGHMEM 979fcf07d22SLai Jiangshan [N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY), 980bde631a5SLee Schermerhorn #endif 98120b2f52bSLai Jiangshan [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY), 982fcf07d22SLai Jiangshan [N_CPU] = _NODE_ATTR(has_cpu, N_CPU), 983bde631a5SLee Schermerhorn }; 984bde631a5SLee Schermerhorn 98510fbcf4cSKay Sievers static struct attribute *node_state_attrs[] = { 986fcf07d22SLai Jiangshan &node_state_attr[N_POSSIBLE].attr.attr, 987fcf07d22SLai Jiangshan &node_state_attr[N_ONLINE].attr.attr, 988fcf07d22SLai Jiangshan &node_state_attr[N_NORMAL_MEMORY].attr.attr, 9893701cde6SAndi Kleen #ifdef CONFIG_HIGHMEM 990fcf07d22SLai Jiangshan &node_state_attr[N_HIGH_MEMORY].attr.attr, 9913701cde6SAndi Kleen #endif 99220b2f52bSLai Jiangshan &node_state_attr[N_MEMORY].attr.attr, 993fcf07d22SLai Jiangshan &node_state_attr[N_CPU].attr.attr, 9943701cde6SAndi Kleen NULL 9953701cde6SAndi Kleen }; 996bde631a5SLee Schermerhorn 99710fbcf4cSKay Sievers static struct attribute_group memory_root_attr_group = { 99810fbcf4cSKay Sievers .attrs = node_state_attrs, 99910fbcf4cSKay Sievers }; 100010fbcf4cSKay Sievers 100110fbcf4cSKay Sievers static const struct attribute_group *cpu_root_attr_groups[] = { 100210fbcf4cSKay Sievers &memory_root_attr_group, 100310fbcf4cSKay Sievers NULL, 100410fbcf4cSKay Sievers }; 100510fbcf4cSKay Sievers 10064faf8d95SLee Schermerhorn #define NODE_CALLBACK_PRI 2 /* lower than SLAB */ 10074b45099bSKeiichiro Tokunaga static int __init register_node_type(void) 10081da177e4SLinus Torvalds { 1009bde631a5SLee Schermerhorn int ret; 1010bde631a5SLee Schermerhorn 10113701cde6SAndi Kleen BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES); 10123701cde6SAndi Kleen BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES); 10133701cde6SAndi Kleen 101410fbcf4cSKay Sievers ret = subsys_system_register(&node_subsys, cpu_root_attr_groups); 10154faf8d95SLee Schermerhorn if (!ret) { 10166e259e7dSAndrew Morton static struct notifier_block node_memory_callback_nb = { 10176e259e7dSAndrew Morton .notifier_call = node_memory_callback, 10186e259e7dSAndrew Morton .priority = NODE_CALLBACK_PRI, 10196e259e7dSAndrew Morton }; 10206e259e7dSAndrew Morton register_hotmemory_notifier(&node_memory_callback_nb); 10214faf8d95SLee Schermerhorn } 1022bde631a5SLee Schermerhorn 1023bde631a5SLee Schermerhorn /* 1024bde631a5SLee Schermerhorn * Note: we're not going to unregister the node class if we fail 1025bde631a5SLee Schermerhorn * to register the node state class attribute files. 1026bde631a5SLee Schermerhorn */ 1027bde631a5SLee Schermerhorn return ret; 10281da177e4SLinus Torvalds } 10291da177e4SLinus Torvalds postcore_initcall(register_node_type); 1030