1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 21da177e4SLinus Torvalds /* 310fbcf4cSKay Sievers * Basic Node interface support 41da177e4SLinus Torvalds */ 51da177e4SLinus Torvalds 61da177e4SLinus Torvalds #include <linux/module.h> 71da177e4SLinus Torvalds #include <linux/init.h> 81da177e4SLinus Torvalds #include <linux/mm.h> 9c04fc586SGary Hade #include <linux/memory.h> 10fa25c503SKOSAKI Motohiro #include <linux/vmstat.h> 116e259e7dSAndrew Morton #include <linux/notifier.h> 121da177e4SLinus Torvalds #include <linux/node.h> 131da177e4SLinus Torvalds #include <linux/hugetlb.h> 14ed4a6d7fSMel Gorman #include <linux/compaction.h> 151da177e4SLinus Torvalds #include <linux/cpumask.h> 161da177e4SLinus Torvalds #include <linux/topology.h> 171da177e4SLinus Torvalds #include <linux/nodemask.h> 1876b67ed9SKAMEZAWA Hiroyuki #include <linux/cpu.h> 19bde631a5SLee Schermerhorn #include <linux/device.h> 2008d9dbe7SKeith Busch #include <linux/pm_runtime.h> 21af936a16SLee Schermerhorn #include <linux/swap.h> 2218e5b539STejun Heo #include <linux/slab.h> 231da177e4SLinus Torvalds 2410fbcf4cSKay Sievers static struct bus_type node_subsys = { 25af5ca3f4SKay Sievers .name = "node", 2610fbcf4cSKay Sievers .dev_name = "node", 271da177e4SLinus Torvalds }; 281da177e4SLinus Torvalds 291da177e4SLinus Torvalds 305aaba363SSudeep Holla static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf) 311da177e4SLinus Torvalds { 32064f0e93SZhen Lei ssize_t n; 33064f0e93SZhen Lei cpumask_var_t mask; 341da177e4SLinus Torvalds struct node *node_dev = to_node(dev); 351da177e4SLinus Torvalds 3639106dcfSMike Travis /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */ 3739106dcfSMike Travis BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); 381da177e4SLinus Torvalds 39064f0e93SZhen Lei if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 40064f0e93SZhen Lei return 0; 41064f0e93SZhen Lei 42064f0e93SZhen Lei cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); 43064f0e93SZhen Lei n = cpumap_print_to_pagebuf(list, buf, mask); 44064f0e93SZhen Lei free_cpumask_var(mask); 45064f0e93SZhen Lei 46064f0e93SZhen Lei return n; 471da177e4SLinus Torvalds } 481da177e4SLinus Torvalds 4910fbcf4cSKay Sievers static inline ssize_t node_read_cpumask(struct device *dev, 5010fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 5139106dcfSMike Travis { 525aaba363SSudeep Holla return node_read_cpumap(dev, false, buf); 5339106dcfSMike Travis } 5410fbcf4cSKay Sievers static inline ssize_t node_read_cpulist(struct device *dev, 5510fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 5639106dcfSMike Travis { 575aaba363SSudeep Holla return node_read_cpumap(dev, true, buf); 5839106dcfSMike Travis } 5939106dcfSMike Travis 6010fbcf4cSKay Sievers static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); 6110fbcf4cSKay Sievers static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL); 621da177e4SLinus Torvalds 6308d9dbe7SKeith Busch /** 6408d9dbe7SKeith Busch * struct node_access_nodes - Access class device to hold user visible 6508d9dbe7SKeith Busch * relationships to other nodes. 6608d9dbe7SKeith Busch * @dev: Device for this memory access class 6708d9dbe7SKeith Busch * @list_node: List element in the node's access list 6808d9dbe7SKeith Busch * @access: The access class rank 6958cb346cSMauro Carvalho Chehab * @hmem_attrs: Heterogeneous memory performance attributes 7008d9dbe7SKeith Busch */ 7108d9dbe7SKeith Busch struct node_access_nodes { 7208d9dbe7SKeith Busch struct device dev; 7308d9dbe7SKeith Busch struct list_head list_node; 7408d9dbe7SKeith Busch unsigned access; 75e1cf33aaSKeith Busch #ifdef CONFIG_HMEM_REPORTING 76e1cf33aaSKeith Busch struct node_hmem_attrs hmem_attrs; 77e1cf33aaSKeith Busch #endif 7808d9dbe7SKeith Busch }; 7908d9dbe7SKeith Busch #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev) 8008d9dbe7SKeith Busch 8108d9dbe7SKeith Busch static struct attribute *node_init_access_node_attrs[] = { 8208d9dbe7SKeith Busch NULL, 8308d9dbe7SKeith Busch }; 8408d9dbe7SKeith Busch 8508d9dbe7SKeith Busch static struct attribute *node_targ_access_node_attrs[] = { 8608d9dbe7SKeith Busch NULL, 8708d9dbe7SKeith Busch }; 8808d9dbe7SKeith Busch 8908d9dbe7SKeith Busch static const struct attribute_group initiators = { 9008d9dbe7SKeith Busch .name = "initiators", 9108d9dbe7SKeith Busch .attrs = node_init_access_node_attrs, 9208d9dbe7SKeith Busch }; 9308d9dbe7SKeith Busch 9408d9dbe7SKeith Busch static const struct attribute_group targets = { 9508d9dbe7SKeith Busch .name = "targets", 9608d9dbe7SKeith Busch .attrs = node_targ_access_node_attrs, 9708d9dbe7SKeith Busch }; 9808d9dbe7SKeith Busch 9908d9dbe7SKeith Busch static const struct attribute_group *node_access_node_groups[] = { 10008d9dbe7SKeith Busch &initiators, 10108d9dbe7SKeith Busch &targets, 10208d9dbe7SKeith Busch NULL, 10308d9dbe7SKeith Busch }; 10408d9dbe7SKeith Busch 10508d9dbe7SKeith Busch static void node_remove_accesses(struct node *node) 10608d9dbe7SKeith Busch { 10708d9dbe7SKeith Busch struct node_access_nodes *c, *cnext; 10808d9dbe7SKeith Busch 10908d9dbe7SKeith Busch list_for_each_entry_safe(c, cnext, &node->access_list, list_node) { 11008d9dbe7SKeith Busch list_del(&c->list_node); 11108d9dbe7SKeith Busch device_unregister(&c->dev); 11208d9dbe7SKeith Busch } 11308d9dbe7SKeith Busch } 11408d9dbe7SKeith Busch 11508d9dbe7SKeith Busch static void node_access_release(struct device *dev) 11608d9dbe7SKeith Busch { 11708d9dbe7SKeith Busch kfree(to_access_nodes(dev)); 11808d9dbe7SKeith Busch } 11908d9dbe7SKeith Busch 12008d9dbe7SKeith Busch static struct node_access_nodes *node_init_node_access(struct node *node, 12108d9dbe7SKeith Busch unsigned access) 12208d9dbe7SKeith Busch { 12308d9dbe7SKeith Busch struct node_access_nodes *access_node; 12408d9dbe7SKeith Busch struct device *dev; 12508d9dbe7SKeith Busch 12608d9dbe7SKeith Busch list_for_each_entry(access_node, &node->access_list, list_node) 12708d9dbe7SKeith Busch if (access_node->access == access) 12808d9dbe7SKeith Busch return access_node; 12908d9dbe7SKeith Busch 13008d9dbe7SKeith Busch access_node = kzalloc(sizeof(*access_node), GFP_KERNEL); 13108d9dbe7SKeith Busch if (!access_node) 13208d9dbe7SKeith Busch return NULL; 13308d9dbe7SKeith Busch 13408d9dbe7SKeith Busch access_node->access = access; 13508d9dbe7SKeith Busch dev = &access_node->dev; 13608d9dbe7SKeith Busch dev->parent = &node->dev; 13708d9dbe7SKeith Busch dev->release = node_access_release; 13808d9dbe7SKeith Busch dev->groups = node_access_node_groups; 13908d9dbe7SKeith Busch if (dev_set_name(dev, "access%u", access)) 14008d9dbe7SKeith Busch goto free; 14108d9dbe7SKeith Busch 14208d9dbe7SKeith Busch if (device_register(dev)) 14308d9dbe7SKeith Busch goto free_name; 14408d9dbe7SKeith Busch 14508d9dbe7SKeith Busch pm_runtime_no_callbacks(dev); 14608d9dbe7SKeith Busch list_add_tail(&access_node->list_node, &node->access_list); 14708d9dbe7SKeith Busch return access_node; 14808d9dbe7SKeith Busch free_name: 14908d9dbe7SKeith Busch kfree_const(dev->kobj.name); 15008d9dbe7SKeith Busch free: 15108d9dbe7SKeith Busch kfree(access_node); 15208d9dbe7SKeith Busch return NULL; 15308d9dbe7SKeith Busch } 15408d9dbe7SKeith Busch 155e1cf33aaSKeith Busch #ifdef CONFIG_HMEM_REPORTING 156e1cf33aaSKeith Busch #define ACCESS_ATTR(name) \ 157e1cf33aaSKeith Busch static ssize_t name##_show(struct device *dev, \ 158e1cf33aaSKeith Busch struct device_attribute *attr, \ 159e1cf33aaSKeith Busch char *buf) \ 160e1cf33aaSKeith Busch { \ 161e1cf33aaSKeith Busch return sprintf(buf, "%u\n", to_access_nodes(dev)->hmem_attrs.name); \ 162e1cf33aaSKeith Busch } \ 163e1cf33aaSKeith Busch static DEVICE_ATTR_RO(name); 164e1cf33aaSKeith Busch 165e1cf33aaSKeith Busch ACCESS_ATTR(read_bandwidth) 166e1cf33aaSKeith Busch ACCESS_ATTR(read_latency) 167e1cf33aaSKeith Busch ACCESS_ATTR(write_bandwidth) 168e1cf33aaSKeith Busch ACCESS_ATTR(write_latency) 169e1cf33aaSKeith Busch 170e1cf33aaSKeith Busch static struct attribute *access_attrs[] = { 171e1cf33aaSKeith Busch &dev_attr_read_bandwidth.attr, 172e1cf33aaSKeith Busch &dev_attr_read_latency.attr, 173e1cf33aaSKeith Busch &dev_attr_write_bandwidth.attr, 174e1cf33aaSKeith Busch &dev_attr_write_latency.attr, 175e1cf33aaSKeith Busch NULL, 176e1cf33aaSKeith Busch }; 177e1cf33aaSKeith Busch 178e1cf33aaSKeith Busch /** 179e1cf33aaSKeith Busch * node_set_perf_attrs - Set the performance values for given access class 180e1cf33aaSKeith Busch * @nid: Node identifier to be set 181e1cf33aaSKeith Busch * @hmem_attrs: Heterogeneous memory performance attributes 182e1cf33aaSKeith Busch * @access: The access class the for the given attributes 183e1cf33aaSKeith Busch */ 184e1cf33aaSKeith Busch void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs, 185e1cf33aaSKeith Busch unsigned access) 186e1cf33aaSKeith Busch { 187e1cf33aaSKeith Busch struct node_access_nodes *c; 188e1cf33aaSKeith Busch struct node *node; 189e1cf33aaSKeith Busch int i; 190e1cf33aaSKeith Busch 191e1cf33aaSKeith Busch if (WARN_ON_ONCE(!node_online(nid))) 192e1cf33aaSKeith Busch return; 193e1cf33aaSKeith Busch 194e1cf33aaSKeith Busch node = node_devices[nid]; 195e1cf33aaSKeith Busch c = node_init_node_access(node, access); 196e1cf33aaSKeith Busch if (!c) 197e1cf33aaSKeith Busch return; 198e1cf33aaSKeith Busch 199e1cf33aaSKeith Busch c->hmem_attrs = *hmem_attrs; 200e1cf33aaSKeith Busch for (i = 0; access_attrs[i] != NULL; i++) { 201e1cf33aaSKeith Busch if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i], 202e1cf33aaSKeith Busch "initiators")) { 203e1cf33aaSKeith Busch pr_info("failed to add performance attribute to node %d\n", 204e1cf33aaSKeith Busch nid); 205e1cf33aaSKeith Busch break; 206e1cf33aaSKeith Busch } 207e1cf33aaSKeith Busch } 208e1cf33aaSKeith Busch } 209acc02a10SKeith Busch 210acc02a10SKeith Busch /** 211acc02a10SKeith Busch * struct node_cache_info - Internal tracking for memory node caches 212acc02a10SKeith Busch * @dev: Device represeting the cache level 213acc02a10SKeith Busch * @node: List element for tracking in the node 214acc02a10SKeith Busch * @cache_attrs:Attributes for this cache level 215acc02a10SKeith Busch */ 216acc02a10SKeith Busch struct node_cache_info { 217acc02a10SKeith Busch struct device dev; 218acc02a10SKeith Busch struct list_head node; 219acc02a10SKeith Busch struct node_cache_attrs cache_attrs; 220acc02a10SKeith Busch }; 221acc02a10SKeith Busch #define to_cache_info(device) container_of(device, struct node_cache_info, dev) 222acc02a10SKeith Busch 223acc02a10SKeith Busch #define CACHE_ATTR(name, fmt) \ 224acc02a10SKeith Busch static ssize_t name##_show(struct device *dev, \ 225acc02a10SKeith Busch struct device_attribute *attr, \ 226acc02a10SKeith Busch char *buf) \ 227acc02a10SKeith Busch { \ 228acc02a10SKeith Busch return sprintf(buf, fmt "\n", to_cache_info(dev)->cache_attrs.name);\ 229acc02a10SKeith Busch } \ 230acc02a10SKeith Busch DEVICE_ATTR_RO(name); 231acc02a10SKeith Busch 232acc02a10SKeith Busch CACHE_ATTR(size, "%llu") 233acc02a10SKeith Busch CACHE_ATTR(line_size, "%u") 234acc02a10SKeith Busch CACHE_ATTR(indexing, "%u") 235acc02a10SKeith Busch CACHE_ATTR(write_policy, "%u") 236acc02a10SKeith Busch 237acc02a10SKeith Busch static struct attribute *cache_attrs[] = { 238acc02a10SKeith Busch &dev_attr_indexing.attr, 239acc02a10SKeith Busch &dev_attr_size.attr, 240acc02a10SKeith Busch &dev_attr_line_size.attr, 241acc02a10SKeith Busch &dev_attr_write_policy.attr, 242acc02a10SKeith Busch NULL, 243acc02a10SKeith Busch }; 244acc02a10SKeith Busch ATTRIBUTE_GROUPS(cache); 245acc02a10SKeith Busch 246acc02a10SKeith Busch static void node_cache_release(struct device *dev) 247acc02a10SKeith Busch { 248acc02a10SKeith Busch kfree(dev); 249acc02a10SKeith Busch } 250acc02a10SKeith Busch 251acc02a10SKeith Busch static void node_cacheinfo_release(struct device *dev) 252acc02a10SKeith Busch { 253acc02a10SKeith Busch struct node_cache_info *info = to_cache_info(dev); 254acc02a10SKeith Busch kfree(info); 255acc02a10SKeith Busch } 256acc02a10SKeith Busch 257acc02a10SKeith Busch static void node_init_cache_dev(struct node *node) 258acc02a10SKeith Busch { 259acc02a10SKeith Busch struct device *dev; 260acc02a10SKeith Busch 261acc02a10SKeith Busch dev = kzalloc(sizeof(*dev), GFP_KERNEL); 262acc02a10SKeith Busch if (!dev) 263acc02a10SKeith Busch return; 264acc02a10SKeith Busch 265acc02a10SKeith Busch dev->parent = &node->dev; 266acc02a10SKeith Busch dev->release = node_cache_release; 267acc02a10SKeith Busch if (dev_set_name(dev, "memory_side_cache")) 268acc02a10SKeith Busch goto free_dev; 269acc02a10SKeith Busch 270acc02a10SKeith Busch if (device_register(dev)) 271acc02a10SKeith Busch goto free_name; 272acc02a10SKeith Busch 273acc02a10SKeith Busch pm_runtime_no_callbacks(dev); 274acc02a10SKeith Busch node->cache_dev = dev; 275acc02a10SKeith Busch return; 276acc02a10SKeith Busch free_name: 277acc02a10SKeith Busch kfree_const(dev->kobj.name); 278acc02a10SKeith Busch free_dev: 279acc02a10SKeith Busch kfree(dev); 280acc02a10SKeith Busch } 281acc02a10SKeith Busch 282acc02a10SKeith Busch /** 283acc02a10SKeith Busch * node_add_cache() - add cache attribute to a memory node 284acc02a10SKeith Busch * @nid: Node identifier that has new cache attributes 285acc02a10SKeith Busch * @cache_attrs: Attributes for the cache being added 286acc02a10SKeith Busch */ 287acc02a10SKeith Busch void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) 288acc02a10SKeith Busch { 289acc02a10SKeith Busch struct node_cache_info *info; 290acc02a10SKeith Busch struct device *dev; 291acc02a10SKeith Busch struct node *node; 292acc02a10SKeith Busch 293acc02a10SKeith Busch if (!node_online(nid) || !node_devices[nid]) 294acc02a10SKeith Busch return; 295acc02a10SKeith Busch 296acc02a10SKeith Busch node = node_devices[nid]; 297acc02a10SKeith Busch list_for_each_entry(info, &node->cache_attrs, node) { 298acc02a10SKeith Busch if (info->cache_attrs.level == cache_attrs->level) { 299acc02a10SKeith Busch dev_warn(&node->dev, 300acc02a10SKeith Busch "attempt to add duplicate cache level:%d\n", 301acc02a10SKeith Busch cache_attrs->level); 302acc02a10SKeith Busch return; 303acc02a10SKeith Busch } 304acc02a10SKeith Busch } 305acc02a10SKeith Busch 306acc02a10SKeith Busch if (!node->cache_dev) 307acc02a10SKeith Busch node_init_cache_dev(node); 308acc02a10SKeith Busch if (!node->cache_dev) 309acc02a10SKeith Busch return; 310acc02a10SKeith Busch 311acc02a10SKeith Busch info = kzalloc(sizeof(*info), GFP_KERNEL); 312acc02a10SKeith Busch if (!info) 313acc02a10SKeith Busch return; 314acc02a10SKeith Busch 315acc02a10SKeith Busch dev = &info->dev; 316acc02a10SKeith Busch dev->parent = node->cache_dev; 317acc02a10SKeith Busch dev->release = node_cacheinfo_release; 318acc02a10SKeith Busch dev->groups = cache_groups; 319acc02a10SKeith Busch if (dev_set_name(dev, "index%d", cache_attrs->level)) 320acc02a10SKeith Busch goto free_cache; 321acc02a10SKeith Busch 322acc02a10SKeith Busch info->cache_attrs = *cache_attrs; 323acc02a10SKeith Busch if (device_register(dev)) { 324acc02a10SKeith Busch dev_warn(&node->dev, "failed to add cache level:%d\n", 325acc02a10SKeith Busch cache_attrs->level); 326acc02a10SKeith Busch goto free_name; 327acc02a10SKeith Busch } 328acc02a10SKeith Busch pm_runtime_no_callbacks(dev); 329acc02a10SKeith Busch list_add_tail(&info->node, &node->cache_attrs); 330acc02a10SKeith Busch return; 331acc02a10SKeith Busch free_name: 332acc02a10SKeith Busch kfree_const(dev->kobj.name); 333acc02a10SKeith Busch free_cache: 334acc02a10SKeith Busch kfree(info); 335acc02a10SKeith Busch } 336acc02a10SKeith Busch 337acc02a10SKeith Busch static void node_remove_caches(struct node *node) 338acc02a10SKeith Busch { 339acc02a10SKeith Busch struct node_cache_info *info, *next; 340acc02a10SKeith Busch 341acc02a10SKeith Busch if (!node->cache_dev) 342acc02a10SKeith Busch return; 343acc02a10SKeith Busch 344acc02a10SKeith Busch list_for_each_entry_safe(info, next, &node->cache_attrs, node) { 345acc02a10SKeith Busch list_del(&info->node); 346acc02a10SKeith Busch device_unregister(&info->dev); 347acc02a10SKeith Busch } 348acc02a10SKeith Busch device_unregister(node->cache_dev); 349acc02a10SKeith Busch } 350acc02a10SKeith Busch 351acc02a10SKeith Busch static void node_init_caches(unsigned int nid) 352acc02a10SKeith Busch { 353acc02a10SKeith Busch INIT_LIST_HEAD(&node_devices[nid]->cache_attrs); 354acc02a10SKeith Busch } 355acc02a10SKeith Busch #else 356acc02a10SKeith Busch static void node_init_caches(unsigned int nid) { } 357acc02a10SKeith Busch static void node_remove_caches(struct node *node) { } 358e1cf33aaSKeith Busch #endif 359e1cf33aaSKeith Busch 3601da177e4SLinus Torvalds #define K(x) ((x) << (PAGE_SHIFT - 10)) 36110fbcf4cSKay Sievers static ssize_t node_read_meminfo(struct device *dev, 36210fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 3631da177e4SLinus Torvalds { 3641da177e4SLinus Torvalds int n; 3651da177e4SLinus Torvalds int nid = dev->id; 366599d0c95SMel Gorman struct pglist_data *pgdat = NODE_DATA(nid); 3671da177e4SLinus Torvalds struct sysinfo i; 36861f94e18SVlastimil Babka unsigned long sreclaimable, sunreclaimable; 3691da177e4SLinus Torvalds 3701da177e4SLinus Torvalds si_meminfo_node(&i, nid); 37161f94e18SVlastimil Babka sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE); 37261f94e18SVlastimil Babka sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE); 3737ee92255SKOSAKI Motohiro n = sprintf(buf, 3741da177e4SLinus Torvalds "Node %d MemTotal: %8lu kB\n" 3751da177e4SLinus Torvalds "Node %d MemFree: %8lu kB\n" 3761da177e4SLinus Torvalds "Node %d MemUsed: %8lu kB\n" 3771da177e4SLinus Torvalds "Node %d Active: %8lu kB\n" 3781da177e4SLinus Torvalds "Node %d Inactive: %8lu kB\n" 3794f98a2feSRik van Riel "Node %d Active(anon): %8lu kB\n" 3804f98a2feSRik van Riel "Node %d Inactive(anon): %8lu kB\n" 3814f98a2feSRik van Riel "Node %d Active(file): %8lu kB\n" 3824f98a2feSRik van Riel "Node %d Inactive(file): %8lu kB\n" 3835344b7e6SNick Piggin "Node %d Unevictable: %8lu kB\n" 3847ee92255SKOSAKI Motohiro "Node %d Mlocked: %8lu kB\n", 3857ee92255SKOSAKI Motohiro nid, K(i.totalram), 3867ee92255SKOSAKI Motohiro nid, K(i.freeram), 3877ee92255SKOSAKI Motohiro nid, K(i.totalram - i.freeram), 388599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) + 389599d0c95SMel Gorman node_page_state(pgdat, NR_ACTIVE_FILE)), 390599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) + 391599d0c95SMel Gorman node_page_state(pgdat, NR_INACTIVE_FILE)), 392599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)), 393599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)), 394599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)), 395599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)), 396599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_UNEVICTABLE)), 39775ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_MLOCK))); 3987ee92255SKOSAKI Motohiro 399182e8e23SChristoph Lameter #ifdef CONFIG_HIGHMEM 4007ee92255SKOSAKI Motohiro n += sprintf(buf + n, 4011da177e4SLinus Torvalds "Node %d HighTotal: %8lu kB\n" 4021da177e4SLinus Torvalds "Node %d HighFree: %8lu kB\n" 4031da177e4SLinus Torvalds "Node %d LowTotal: %8lu kB\n" 4047ee92255SKOSAKI Motohiro "Node %d LowFree: %8lu kB\n", 4057ee92255SKOSAKI Motohiro nid, K(i.totalhigh), 4067ee92255SKOSAKI Motohiro nid, K(i.freehigh), 4077ee92255SKOSAKI Motohiro nid, K(i.totalram - i.totalhigh), 4087ee92255SKOSAKI Motohiro nid, K(i.freeram - i.freehigh)); 409182e8e23SChristoph Lameter #endif 4107ee92255SKOSAKI Motohiro n += sprintf(buf + n, 411c07e02dbSMartin Hicks "Node %d Dirty: %8lu kB\n" 412c07e02dbSMartin Hicks "Node %d Writeback: %8lu kB\n" 413347ce434SChristoph Lameter "Node %d FilePages: %8lu kB\n" 414c07e02dbSMartin Hicks "Node %d Mapped: %8lu kB\n" 415f3dbd344SChristoph Lameter "Node %d AnonPages: %8lu kB\n" 4164b02108aSKOSAKI Motohiro "Node %d Shmem: %8lu kB\n" 417c6a7f572SKOSAKI Motohiro "Node %d KernelStack: %8lu kB\n" 418df849a15SChristoph Lameter "Node %d PageTables: %8lu kB\n" 419f5ef68daSAndrew Morton "Node %d NFS_Unstable: %8lu kB\n" 420d2c5e30cSChristoph Lameter "Node %d Bounce: %8lu kB\n" 421fc3ba692SMiklos Szeredi "Node %d WritebackTmp: %8lu kB\n" 42261f94e18SVlastimil Babka "Node %d KReclaimable: %8lu kB\n" 423972d1a7bSChristoph Lameter "Node %d Slab: %8lu kB\n" 424972d1a7bSChristoph Lameter "Node %d SReclaimable: %8lu kB\n" 42505b258e9SDavid Rientjes "Node %d SUnreclaim: %8lu kB\n" 42605b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE 42705b258e9SDavid Rientjes "Node %d AnonHugePages: %8lu kB\n" 42865c45377SKirill A. Shutemov "Node %d ShmemHugePages: %8lu kB\n" 42965c45377SKirill A. Shutemov "Node %d ShmemPmdMapped: %8lu kB\n" 43005b258e9SDavid Rientjes #endif 43105b258e9SDavid Rientjes , 43211fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_FILE_DIRTY)), 43311fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_WRITEBACK)), 43411fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_FILE_PAGES)), 43550658e2eSMel Gorman nid, K(node_page_state(pgdat, NR_FILE_MAPPED)), 4364b9d0fabSMel Gorman nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), 437cc7452b6SRafael Aquini nid, K(i.sharedram), 438d30dd8beSAndy Lutomirski nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), 43975ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), 44011fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), 44175ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), 44211fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), 44361f94e18SVlastimil Babka nid, K(sreclaimable + 44461f94e18SVlastimil Babka node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)), 44561f94e18SVlastimil Babka nid, K(sreclaimable + sunreclaimable), 44661f94e18SVlastimil Babka nid, K(sreclaimable), 44761f94e18SVlastimil Babka nid, K(sunreclaimable) 44805b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE 44961f94e18SVlastimil Babka , 45011fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_ANON_THPS) * 45165c45377SKirill A. Shutemov HPAGE_PMD_NR), 45211fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_SHMEM_THPS) * 45365c45377SKirill A. Shutemov HPAGE_PMD_NR), 45411fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * 45561f94e18SVlastimil Babka HPAGE_PMD_NR) 45605b258e9SDavid Rientjes #endif 45761f94e18SVlastimil Babka ); 4581da177e4SLinus Torvalds n += hugetlb_report_node_meminfo(nid, buf + n); 4591da177e4SLinus Torvalds return n; 4601da177e4SLinus Torvalds } 4611da177e4SLinus Torvalds 4621da177e4SLinus Torvalds #undef K 46310fbcf4cSKay Sievers static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); 4641da177e4SLinus Torvalds 46510fbcf4cSKay Sievers static ssize_t node_read_numastat(struct device *dev, 46610fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 4671da177e4SLinus Torvalds { 4681da177e4SLinus Torvalds return sprintf(buf, 4691da177e4SLinus Torvalds "numa_hit %lu\n" 4701da177e4SLinus Torvalds "numa_miss %lu\n" 4711da177e4SLinus Torvalds "numa_foreign %lu\n" 4721da177e4SLinus Torvalds "interleave_hit %lu\n" 4731da177e4SLinus Torvalds "local_node %lu\n" 4741da177e4SLinus Torvalds "other_node %lu\n", 4753a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_HIT), 4763a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_MISS), 4773a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_FOREIGN), 4783a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT), 4793a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_LOCAL), 4803a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_OTHER)); 4811da177e4SLinus Torvalds } 48210fbcf4cSKay Sievers static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); 4831da177e4SLinus Torvalds 48410fbcf4cSKay Sievers static ssize_t node_read_vmstat(struct device *dev, 48510fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 4862ac39037SMichael Rubin { 4872ac39037SMichael Rubin int nid = dev->id; 48875ef7184SMel Gorman struct pglist_data *pgdat = NODE_DATA(nid); 489fa25c503SKOSAKI Motohiro int i; 490fa25c503SKOSAKI Motohiro int n = 0; 491fa25c503SKOSAKI Motohiro 492fa25c503SKOSAKI Motohiro for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 493fa25c503SKOSAKI Motohiro n += sprintf(buf+n, "%s %lu\n", vmstat_text[i], 49475ef7184SMel Gorman sum_zone_node_page_state(nid, i)); 49575ef7184SMel Gorman 4963a321d2aSKemi Wang #ifdef CONFIG_NUMA 4973a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 49875ef7184SMel Gorman n += sprintf(buf+n, "%s %lu\n", 49975ef7184SMel Gorman vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], 5003a321d2aSKemi Wang sum_zone_numa_state(nid, i)); 5013a321d2aSKemi Wang #endif 5023a321d2aSKemi Wang 5033a321d2aSKemi Wang for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 5043a321d2aSKemi Wang n += sprintf(buf+n, "%s %lu\n", 5053a321d2aSKemi Wang vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + 5063a321d2aSKemi Wang NR_VM_NUMA_STAT_ITEMS], 50775ef7184SMel Gorman node_page_state(pgdat, i)); 508fa25c503SKOSAKI Motohiro 509fa25c503SKOSAKI Motohiro return n; 5102ac39037SMichael Rubin } 51110fbcf4cSKay Sievers static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL); 5122ac39037SMichael Rubin 51310fbcf4cSKay Sievers static ssize_t node_read_distance(struct device *dev, 51410fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 5151da177e4SLinus Torvalds { 5161da177e4SLinus Torvalds int nid = dev->id; 5171da177e4SLinus Torvalds int len = 0; 5181da177e4SLinus Torvalds int i; 5191da177e4SLinus Torvalds 52012ee3c0aSDavid Rientjes /* 52112ee3c0aSDavid Rientjes * buf is currently PAGE_SIZE in length and each node needs 4 chars 52212ee3c0aSDavid Rientjes * at the most (distance + space or newline). 52312ee3c0aSDavid Rientjes */ 52412ee3c0aSDavid Rientjes BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE); 5251da177e4SLinus Torvalds 5261da177e4SLinus Torvalds for_each_online_node(i) 5271da177e4SLinus Torvalds len += sprintf(buf + len, "%s%d", i ? " " : "", node_distance(nid, i)); 5281da177e4SLinus Torvalds 5291da177e4SLinus Torvalds len += sprintf(buf + len, "\n"); 5301da177e4SLinus Torvalds return len; 5311da177e4SLinus Torvalds } 53210fbcf4cSKay Sievers static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL); 5331da177e4SLinus Torvalds 5343c9b8aafSTakashi Iwai static struct attribute *node_dev_attrs[] = { 5353c9b8aafSTakashi Iwai &dev_attr_cpumap.attr, 5363c9b8aafSTakashi Iwai &dev_attr_cpulist.attr, 5373c9b8aafSTakashi Iwai &dev_attr_meminfo.attr, 5383c9b8aafSTakashi Iwai &dev_attr_numastat.attr, 5393c9b8aafSTakashi Iwai &dev_attr_distance.attr, 5403c9b8aafSTakashi Iwai &dev_attr_vmstat.attr, 5413c9b8aafSTakashi Iwai NULL 5423c9b8aafSTakashi Iwai }; 5437ca7ec40SGreg Kroah-Hartman ATTRIBUTE_GROUPS(node_dev); 5443c9b8aafSTakashi Iwai 5459a305230SLee Schermerhorn #ifdef CONFIG_HUGETLBFS 5469a305230SLee Schermerhorn /* 5479a305230SLee Schermerhorn * hugetlbfs per node attributes registration interface: 5489a305230SLee Schermerhorn * When/if hugetlb[fs] subsystem initializes [sometime after this module], 5494faf8d95SLee Schermerhorn * it will register its per node attributes for all online nodes with 5504faf8d95SLee Schermerhorn * memory. It will also call register_hugetlbfs_with_node(), below, to 5519a305230SLee Schermerhorn * register its attribute registration functions with this node driver. 5529a305230SLee Schermerhorn * Once these hooks have been initialized, the node driver will call into 5539a305230SLee Schermerhorn * the hugetlb module to [un]register attributes for hot-plugged nodes. 5549a305230SLee Schermerhorn */ 5559a305230SLee Schermerhorn static node_registration_func_t __hugetlb_register_node; 5569a305230SLee Schermerhorn static node_registration_func_t __hugetlb_unregister_node; 5579a305230SLee Schermerhorn 55839da08cbSLee Schermerhorn static inline bool hugetlb_register_node(struct node *node) 5599a305230SLee Schermerhorn { 5604faf8d95SLee Schermerhorn if (__hugetlb_register_node && 5618cebfcd0SLai Jiangshan node_state(node->dev.id, N_MEMORY)) { 5629a305230SLee Schermerhorn __hugetlb_register_node(node); 56339da08cbSLee Schermerhorn return true; 56439da08cbSLee Schermerhorn } 56539da08cbSLee Schermerhorn return false; 5669a305230SLee Schermerhorn } 5679a305230SLee Schermerhorn 5689a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node) 5699a305230SLee Schermerhorn { 5709a305230SLee Schermerhorn if (__hugetlb_unregister_node) 5719a305230SLee Schermerhorn __hugetlb_unregister_node(node); 5729a305230SLee Schermerhorn } 5739a305230SLee Schermerhorn 5749a305230SLee Schermerhorn void register_hugetlbfs_with_node(node_registration_func_t doregister, 5759a305230SLee Schermerhorn node_registration_func_t unregister) 5769a305230SLee Schermerhorn { 5779a305230SLee Schermerhorn __hugetlb_register_node = doregister; 5789a305230SLee Schermerhorn __hugetlb_unregister_node = unregister; 5799a305230SLee Schermerhorn } 5809a305230SLee Schermerhorn #else 5819a305230SLee Schermerhorn static inline void hugetlb_register_node(struct node *node) {} 5829a305230SLee Schermerhorn 5839a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node) {} 5849a305230SLee Schermerhorn #endif 5859a305230SLee Schermerhorn 5868c7b5b4eSYasuaki Ishimatsu static void node_device_release(struct device *dev) 5878c7b5b4eSYasuaki Ishimatsu { 5888c7b5b4eSYasuaki Ishimatsu struct node *node = to_node(dev); 5898c7b5b4eSYasuaki Ishimatsu 5908c7b5b4eSYasuaki Ishimatsu #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) 5918c7b5b4eSYasuaki Ishimatsu /* 5928c7b5b4eSYasuaki Ishimatsu * We schedule the work only when a memory section is 5938c7b5b4eSYasuaki Ishimatsu * onlined/offlined on this node. When we come here, 5948c7b5b4eSYasuaki Ishimatsu * all the memory on this node has been offlined, 5958c7b5b4eSYasuaki Ishimatsu * so we won't enqueue new work to this work. 5968c7b5b4eSYasuaki Ishimatsu * 5978c7b5b4eSYasuaki Ishimatsu * The work is using node->node_work, so we should 5988c7b5b4eSYasuaki Ishimatsu * flush work before freeing the memory. 5998c7b5b4eSYasuaki Ishimatsu */ 6008c7b5b4eSYasuaki Ishimatsu flush_work(&node->node_work); 6018c7b5b4eSYasuaki Ishimatsu #endif 6028c7b5b4eSYasuaki Ishimatsu kfree(node); 6038c7b5b4eSYasuaki Ishimatsu } 6041da177e4SLinus Torvalds 6051da177e4SLinus Torvalds /* 606405ae7d3SRobert P. J. Day * register_node - Setup a sysfs device for a node. 6071da177e4SLinus Torvalds * @num - Node number to use when creating the device. 6081da177e4SLinus Torvalds * 6091da177e4SLinus Torvalds * Initialize and register the node device. 6101da177e4SLinus Torvalds */ 611a7be6e5aSDou Liyang static int register_node(struct node *node, int num) 6121da177e4SLinus Torvalds { 6131da177e4SLinus Torvalds int error; 6141da177e4SLinus Torvalds 61510fbcf4cSKay Sievers node->dev.id = num; 61610fbcf4cSKay Sievers node->dev.bus = &node_subsys; 6178c7b5b4eSYasuaki Ishimatsu node->dev.release = node_device_release; 6187ca7ec40SGreg Kroah-Hartman node->dev.groups = node_dev_groups; 61910fbcf4cSKay Sievers error = device_register(&node->dev); 6201da177e4SLinus Torvalds 621c1cc0d51SArvind Yadav if (error) 622c1cc0d51SArvind Yadav put_device(&node->dev); 623c1cc0d51SArvind Yadav else { 6249a305230SLee Schermerhorn hugetlb_register_node(node); 625ed4a6d7fSMel Gorman 626ed4a6d7fSMel Gorman compaction_register_node(node); 6271da177e4SLinus Torvalds } 6281da177e4SLinus Torvalds return error; 6291da177e4SLinus Torvalds } 6301da177e4SLinus Torvalds 6314b45099bSKeiichiro Tokunaga /** 6324b45099bSKeiichiro Tokunaga * unregister_node - unregister a node device 6334b45099bSKeiichiro Tokunaga * @node: node going away 6344b45099bSKeiichiro Tokunaga * 6354b45099bSKeiichiro Tokunaga * Unregisters a node device @node. All the devices on the node must be 6364b45099bSKeiichiro Tokunaga * unregistered before calling this function. 6374b45099bSKeiichiro Tokunaga */ 6384b45099bSKeiichiro Tokunaga void unregister_node(struct node *node) 6394b45099bSKeiichiro Tokunaga { 6404faf8d95SLee Schermerhorn hugetlb_unregister_node(node); /* no-op, if memoryless node */ 64108d9dbe7SKeith Busch node_remove_accesses(node); 642acc02a10SKeith Busch node_remove_caches(node); 64310fbcf4cSKay Sievers device_unregister(&node->dev); 6444b45099bSKeiichiro Tokunaga } 6454b45099bSKeiichiro Tokunaga 6468732794bSWen Congyang struct node *node_devices[MAX_NUMNODES]; 6470fc44159SYasunori Goto 64876b67ed9SKAMEZAWA Hiroyuki /* 64976b67ed9SKAMEZAWA Hiroyuki * register cpu under node 65076b67ed9SKAMEZAWA Hiroyuki */ 65176b67ed9SKAMEZAWA Hiroyuki int register_cpu_under_node(unsigned int cpu, unsigned int nid) 65276b67ed9SKAMEZAWA Hiroyuki { 6531830794aSAlex Chiang int ret; 6548a25a2fdSKay Sievers struct device *obj; 655f8246f31SAlex Chiang 656f8246f31SAlex Chiang if (!node_online(nid)) 657f8246f31SAlex Chiang return 0; 658f8246f31SAlex Chiang 6598a25a2fdSKay Sievers obj = get_cpu_device(cpu); 66076b67ed9SKAMEZAWA Hiroyuki if (!obj) 66176b67ed9SKAMEZAWA Hiroyuki return 0; 662f8246f31SAlex Chiang 6638732794bSWen Congyang ret = sysfs_create_link(&node_devices[nid]->dev.kobj, 66476b67ed9SKAMEZAWA Hiroyuki &obj->kobj, 66576b67ed9SKAMEZAWA Hiroyuki kobject_name(&obj->kobj)); 6661830794aSAlex Chiang if (ret) 6671830794aSAlex Chiang return ret; 6681830794aSAlex Chiang 6691830794aSAlex Chiang return sysfs_create_link(&obj->kobj, 6708732794bSWen Congyang &node_devices[nid]->dev.kobj, 6718732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 67276b67ed9SKAMEZAWA Hiroyuki } 67376b67ed9SKAMEZAWA Hiroyuki 67408d9dbe7SKeith Busch /** 67508d9dbe7SKeith Busch * register_memory_node_under_compute_node - link memory node to its compute 67608d9dbe7SKeith Busch * node for a given access class. 67758cb346cSMauro Carvalho Chehab * @mem_nid: Memory node number 67858cb346cSMauro Carvalho Chehab * @cpu_nid: Cpu node number 67908d9dbe7SKeith Busch * @access: Access class to register 68008d9dbe7SKeith Busch * 68108d9dbe7SKeith Busch * Description: 68208d9dbe7SKeith Busch * For use with platforms that may have separate memory and compute nodes. 68308d9dbe7SKeith Busch * This function will export node relationships linking which memory 68408d9dbe7SKeith Busch * initiator nodes can access memory targets at a given ranked access 68508d9dbe7SKeith Busch * class. 68608d9dbe7SKeith Busch */ 68708d9dbe7SKeith Busch int register_memory_node_under_compute_node(unsigned int mem_nid, 68808d9dbe7SKeith Busch unsigned int cpu_nid, 68908d9dbe7SKeith Busch unsigned access) 69008d9dbe7SKeith Busch { 69108d9dbe7SKeith Busch struct node *init_node, *targ_node; 69208d9dbe7SKeith Busch struct node_access_nodes *initiator, *target; 69308d9dbe7SKeith Busch int ret; 69408d9dbe7SKeith Busch 69508d9dbe7SKeith Busch if (!node_online(cpu_nid) || !node_online(mem_nid)) 69608d9dbe7SKeith Busch return -ENODEV; 69708d9dbe7SKeith Busch 69808d9dbe7SKeith Busch init_node = node_devices[cpu_nid]; 69908d9dbe7SKeith Busch targ_node = node_devices[mem_nid]; 70008d9dbe7SKeith Busch initiator = node_init_node_access(init_node, access); 70108d9dbe7SKeith Busch target = node_init_node_access(targ_node, access); 70208d9dbe7SKeith Busch if (!initiator || !target) 70308d9dbe7SKeith Busch return -ENOMEM; 70408d9dbe7SKeith Busch 70508d9dbe7SKeith Busch ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets", 70608d9dbe7SKeith Busch &targ_node->dev.kobj, 70708d9dbe7SKeith Busch dev_name(&targ_node->dev)); 70808d9dbe7SKeith Busch if (ret) 70908d9dbe7SKeith Busch return ret; 71008d9dbe7SKeith Busch 71108d9dbe7SKeith Busch ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators", 71208d9dbe7SKeith Busch &init_node->dev.kobj, 71308d9dbe7SKeith Busch dev_name(&init_node->dev)); 71408d9dbe7SKeith Busch if (ret) 71508d9dbe7SKeith Busch goto err; 71608d9dbe7SKeith Busch 71708d9dbe7SKeith Busch return 0; 71808d9dbe7SKeith Busch err: 71908d9dbe7SKeith Busch sysfs_remove_link_from_group(&initiator->dev.kobj, "targets", 72008d9dbe7SKeith Busch dev_name(&targ_node->dev)); 72108d9dbe7SKeith Busch return ret; 72208d9dbe7SKeith Busch } 72308d9dbe7SKeith Busch 72476b67ed9SKAMEZAWA Hiroyuki int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) 72576b67ed9SKAMEZAWA Hiroyuki { 7268a25a2fdSKay Sievers struct device *obj; 727b9d52dadSAlex Chiang 728b9d52dadSAlex Chiang if (!node_online(nid)) 729b9d52dadSAlex Chiang return 0; 730b9d52dadSAlex Chiang 7318a25a2fdSKay Sievers obj = get_cpu_device(cpu); 732b9d52dadSAlex Chiang if (!obj) 733b9d52dadSAlex Chiang return 0; 734b9d52dadSAlex Chiang 7358732794bSWen Congyang sysfs_remove_link(&node_devices[nid]->dev.kobj, 73676b67ed9SKAMEZAWA Hiroyuki kobject_name(&obj->kobj)); 7371830794aSAlex Chiang sysfs_remove_link(&obj->kobj, 7388732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 739b9d52dadSAlex Chiang 74076b67ed9SKAMEZAWA Hiroyuki return 0; 74176b67ed9SKAMEZAWA Hiroyuki } 74276b67ed9SKAMEZAWA Hiroyuki 743c04fc586SGary Hade #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 744bd721ea7SFabian Frederick static int __ref get_nid_for_pfn(unsigned long pfn) 745c04fc586SGary Hade { 746c04fc586SGary Hade if (!pfn_valid_within(pfn)) 747c04fc586SGary Hade return -1; 7483a80a7faSMel Gorman #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 7498cdde385SThomas Gleixner if (system_state < SYSTEM_RUNNING) 7503a80a7faSMel Gorman return early_pfn_to_nid(pfn); 7513a80a7faSMel Gorman #endif 752c04fc586SGary Hade return pfn_to_nid(pfn); 753c04fc586SGary Hade } 754c04fc586SGary Hade 755c04fc586SGary Hade /* register memory section under specified node if it spans that node */ 7564fbce633SOscar Salvador int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) 757c04fc586SGary Hade { 7584fbce633SOscar Salvador int ret, nid = *(int *)arg; 759c04fc586SGary Hade unsigned long pfn, sect_start_pfn, sect_end_pfn; 760c04fc586SGary Hade 761d0dc12e8SPavel Tatashin mem_blk->nid = nid; 762d3360164SNathan Fontenot 763d3360164SNathan Fontenot sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); 764d3360164SNathan Fontenot sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); 765d3360164SNathan Fontenot sect_end_pfn += PAGES_PER_SECTION - 1; 766c04fc586SGary Hade for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { 767c04fc586SGary Hade int page_nid; 768c04fc586SGary Hade 76904697858SYinghai Lu /* 77004697858SYinghai Lu * memory block could have several absent sections from start. 77104697858SYinghai Lu * skip pfn range from absent section 77204697858SYinghai Lu */ 77304697858SYinghai Lu if (!pfn_present(pfn)) { 77404697858SYinghai Lu pfn = round_down(pfn + PAGES_PER_SECTION, 77504697858SYinghai Lu PAGES_PER_SECTION) - 1; 77604697858SYinghai Lu continue; 77704697858SYinghai Lu } 77804697858SYinghai Lu 779fc44f7f9SPavel Tatashin /* 780fc44f7f9SPavel Tatashin * We need to check if page belongs to nid only for the boot 781fc44f7f9SPavel Tatashin * case, during hotplug we know that all pages in the memory 782fc44f7f9SPavel Tatashin * block belong to the same node. 783fc44f7f9SPavel Tatashin */ 7844fbce633SOscar Salvador if (system_state == SYSTEM_BOOTING) { 785c04fc586SGary Hade page_nid = get_nid_for_pfn(pfn); 786c04fc586SGary Hade if (page_nid < 0) 787c04fc586SGary Hade continue; 788c04fc586SGary Hade if (page_nid != nid) 789c04fc586SGary Hade continue; 790fc44f7f9SPavel Tatashin } 7918732794bSWen Congyang ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, 79210fbcf4cSKay Sievers &mem_blk->dev.kobj, 79310fbcf4cSKay Sievers kobject_name(&mem_blk->dev.kobj)); 794dee5d0d5SAlex Chiang if (ret) 795dee5d0d5SAlex Chiang return ret; 796dee5d0d5SAlex Chiang 79710fbcf4cSKay Sievers return sysfs_create_link_nowarn(&mem_blk->dev.kobj, 7988732794bSWen Congyang &node_devices[nid]->dev.kobj, 7998732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 800c04fc586SGary Hade } 801c04fc586SGary Hade /* mem section does not span the specified node */ 802c04fc586SGary Hade return 0; 803c04fc586SGary Hade } 804c04fc586SGary Hade 8054c4b7f9bSDavid Hildenbrand /* 8064c4b7f9bSDavid Hildenbrand * Unregister memory block device under all nodes that it spans. 8074c4b7f9bSDavid Hildenbrand */ 8084c4b7f9bSDavid Hildenbrand int unregister_memory_block_under_nodes(struct memory_block *mem_blk) 809c04fc586SGary Hade { 8109ae49fabSDavid Rientjes NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); 811c04fc586SGary Hade unsigned long pfn, sect_start_pfn, sect_end_pfn; 812c04fc586SGary Hade 8139ae49fabSDavid Rientjes if (!mem_blk) { 8149ae49fabSDavid Rientjes NODEMASK_FREE(unlinked_nodes); 815c04fc586SGary Hade return -EFAULT; 8169ae49fabSDavid Rientjes } 8179ae49fabSDavid Rientjes if (!unlinked_nodes) 8189ae49fabSDavid Rientjes return -ENOMEM; 8199ae49fabSDavid Rientjes nodes_clear(*unlinked_nodes); 820d3360164SNathan Fontenot 8214c4b7f9bSDavid Hildenbrand sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); 8224c4b7f9bSDavid Hildenbrand sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); 823c04fc586SGary Hade for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { 82447504980SRoel Kluin int nid; 825c04fc586SGary Hade 826c04fc586SGary Hade nid = get_nid_for_pfn(pfn); 827c04fc586SGary Hade if (nid < 0) 828c04fc586SGary Hade continue; 829c04fc586SGary Hade if (!node_online(nid)) 830c04fc586SGary Hade continue; 8319ae49fabSDavid Rientjes if (node_test_and_set(nid, *unlinked_nodes)) 832c04fc586SGary Hade continue; 8338732794bSWen Congyang sysfs_remove_link(&node_devices[nid]->dev.kobj, 83410fbcf4cSKay Sievers kobject_name(&mem_blk->dev.kobj)); 83510fbcf4cSKay Sievers sysfs_remove_link(&mem_blk->dev.kobj, 8368732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 837c04fc586SGary Hade } 8389ae49fabSDavid Rientjes NODEMASK_FREE(unlinked_nodes); 839c04fc586SGary Hade return 0; 840c04fc586SGary Hade } 841c04fc586SGary Hade 8424fbce633SOscar Salvador int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) 843c04fc586SGary Hade { 8444fbce633SOscar Salvador return walk_memory_range(start_pfn, end_pfn, (void *)&nid, 8454fbce633SOscar Salvador register_mem_sect_under_node); 846c04fc586SGary Hade } 8474faf8d95SLee Schermerhorn 84839da08cbSLee Schermerhorn #ifdef CONFIG_HUGETLBFS 8494faf8d95SLee Schermerhorn /* 8504faf8d95SLee Schermerhorn * Handle per node hstate attribute [un]registration on transistions 8514faf8d95SLee Schermerhorn * to/from memoryless state. 8524faf8d95SLee Schermerhorn */ 85339da08cbSLee Schermerhorn static void node_hugetlb_work(struct work_struct *work) 85439da08cbSLee Schermerhorn { 85539da08cbSLee Schermerhorn struct node *node = container_of(work, struct node, node_work); 85639da08cbSLee Schermerhorn 85739da08cbSLee Schermerhorn /* 85839da08cbSLee Schermerhorn * We only get here when a node transitions to/from memoryless state. 85939da08cbSLee Schermerhorn * We can detect which transition occurred by examining whether the 86039da08cbSLee Schermerhorn * node has memory now. hugetlb_register_node() already check this 86139da08cbSLee Schermerhorn * so we try to register the attributes. If that fails, then the 86239da08cbSLee Schermerhorn * node has transitioned to memoryless, try to unregister the 86339da08cbSLee Schermerhorn * attributes. 86439da08cbSLee Schermerhorn */ 86539da08cbSLee Schermerhorn if (!hugetlb_register_node(node)) 86639da08cbSLee Schermerhorn hugetlb_unregister_node(node); 86739da08cbSLee Schermerhorn } 86839da08cbSLee Schermerhorn 86939da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid) 87039da08cbSLee Schermerhorn { 8718732794bSWen Congyang INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work); 87239da08cbSLee Schermerhorn } 8734faf8d95SLee Schermerhorn 8744faf8d95SLee Schermerhorn static int node_memory_callback(struct notifier_block *self, 8754faf8d95SLee Schermerhorn unsigned long action, void *arg) 8764faf8d95SLee Schermerhorn { 8774faf8d95SLee Schermerhorn struct memory_notify *mnb = arg; 8784faf8d95SLee Schermerhorn int nid = mnb->status_change_nid; 8794faf8d95SLee Schermerhorn 8804faf8d95SLee Schermerhorn switch (action) { 88139da08cbSLee Schermerhorn case MEM_ONLINE: 88239da08cbSLee Schermerhorn case MEM_OFFLINE: 88339da08cbSLee Schermerhorn /* 88439da08cbSLee Schermerhorn * offload per node hstate [un]registration to a work thread 88539da08cbSLee Schermerhorn * when transitioning to/from memoryless state. 88639da08cbSLee Schermerhorn */ 8874faf8d95SLee Schermerhorn if (nid != NUMA_NO_NODE) 8888732794bSWen Congyang schedule_work(&node_devices[nid]->node_work); 8894faf8d95SLee Schermerhorn break; 89039da08cbSLee Schermerhorn 8914faf8d95SLee Schermerhorn case MEM_GOING_ONLINE: 8924faf8d95SLee Schermerhorn case MEM_GOING_OFFLINE: 8934faf8d95SLee Schermerhorn case MEM_CANCEL_ONLINE: 8944faf8d95SLee Schermerhorn case MEM_CANCEL_OFFLINE: 8954faf8d95SLee Schermerhorn default: 8964faf8d95SLee Schermerhorn break; 8974faf8d95SLee Schermerhorn } 8984faf8d95SLee Schermerhorn 8994faf8d95SLee Schermerhorn return NOTIFY_OK; 9004faf8d95SLee Schermerhorn } 90139da08cbSLee Schermerhorn #endif /* CONFIG_HUGETLBFS */ 90239da08cbSLee Schermerhorn #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 90339da08cbSLee Schermerhorn 90439da08cbSLee Schermerhorn #if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \ 90539da08cbSLee Schermerhorn !defined(CONFIG_HUGETLBFS) 9064faf8d95SLee Schermerhorn static inline int node_memory_callback(struct notifier_block *self, 9074faf8d95SLee Schermerhorn unsigned long action, void *arg) 9084faf8d95SLee Schermerhorn { 9094faf8d95SLee Schermerhorn return NOTIFY_OK; 9104faf8d95SLee Schermerhorn } 91139da08cbSLee Schermerhorn 91239da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid) { } 91339da08cbSLee Schermerhorn 91439da08cbSLee Schermerhorn #endif 915c04fc586SGary Hade 9169037a993SMichal Hocko int __register_one_node(int nid) 9170fc44159SYasunori Goto { 9189037a993SMichal Hocko int error; 9199037a993SMichal Hocko int cpu; 9200fc44159SYasunori Goto 9218732794bSWen Congyang node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL); 9228732794bSWen Congyang if (!node_devices[nid]) 9238732794bSWen Congyang return -ENOMEM; 9248732794bSWen Congyang 925a7be6e5aSDou Liyang error = register_node(node_devices[nid], nid); 92676b67ed9SKAMEZAWA Hiroyuki 92776b67ed9SKAMEZAWA Hiroyuki /* link cpu under this node */ 92876b67ed9SKAMEZAWA Hiroyuki for_each_present_cpu(cpu) { 92976b67ed9SKAMEZAWA Hiroyuki if (cpu_to_node(cpu) == nid) 93076b67ed9SKAMEZAWA Hiroyuki register_cpu_under_node(cpu, nid); 93176b67ed9SKAMEZAWA Hiroyuki } 932c04fc586SGary Hade 93308d9dbe7SKeith Busch INIT_LIST_HEAD(&node_devices[nid]->access_list); 93439da08cbSLee Schermerhorn /* initialize work queue for memory hot plug */ 93539da08cbSLee Schermerhorn init_node_hugetlb_work(nid); 936acc02a10SKeith Busch node_init_caches(nid); 9370fc44159SYasunori Goto 9380fc44159SYasunori Goto return error; 9390fc44159SYasunori Goto } 9400fc44159SYasunori Goto 9410fc44159SYasunori Goto void unregister_one_node(int nid) 9420fc44159SYasunori Goto { 94392d585efSXishi Qiu if (!node_devices[nid]) 94492d585efSXishi Qiu return; 94592d585efSXishi Qiu 9468732794bSWen Congyang unregister_node(node_devices[nid]); 9478732794bSWen Congyang node_devices[nid] = NULL; 9480fc44159SYasunori Goto } 9490fc44159SYasunori Goto 950bde631a5SLee Schermerhorn /* 951bde631a5SLee Schermerhorn * node states attributes 952bde631a5SLee Schermerhorn */ 953bde631a5SLee Schermerhorn 954bde631a5SLee Schermerhorn static ssize_t print_nodes_state(enum node_states state, char *buf) 955bde631a5SLee Schermerhorn { 956bde631a5SLee Schermerhorn int n; 957bde631a5SLee Schermerhorn 958f799b1a7STejun Heo n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", 959f799b1a7STejun Heo nodemask_pr_args(&node_states[state])); 960f6238818SRyota Ozaki buf[n++] = '\n'; 961f6238818SRyota Ozaki buf[n] = '\0'; 962bde631a5SLee Schermerhorn return n; 963bde631a5SLee Schermerhorn } 964bde631a5SLee Schermerhorn 965b15f562fSAndi Kleen struct node_attr { 96610fbcf4cSKay Sievers struct device_attribute attr; 967b15f562fSAndi Kleen enum node_states state; 968b15f562fSAndi Kleen }; 969b15f562fSAndi Kleen 97010fbcf4cSKay Sievers static ssize_t show_node_state(struct device *dev, 97110fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 972bde631a5SLee Schermerhorn { 973b15f562fSAndi Kleen struct node_attr *na = container_of(attr, struct node_attr, attr); 974b15f562fSAndi Kleen return print_nodes_state(na->state, buf); 975bde631a5SLee Schermerhorn } 976bde631a5SLee Schermerhorn 977b15f562fSAndi Kleen #define _NODE_ATTR(name, state) \ 97810fbcf4cSKay Sievers { __ATTR(name, 0444, show_node_state, NULL), state } 979bde631a5SLee Schermerhorn 980b15f562fSAndi Kleen static struct node_attr node_state_attr[] = { 981fcf07d22SLai Jiangshan [N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE), 982fcf07d22SLai Jiangshan [N_ONLINE] = _NODE_ATTR(online, N_ONLINE), 983fcf07d22SLai Jiangshan [N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY), 984bde631a5SLee Schermerhorn #ifdef CONFIG_HIGHMEM 985fcf07d22SLai Jiangshan [N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY), 986bde631a5SLee Schermerhorn #endif 98720b2f52bSLai Jiangshan [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY), 988fcf07d22SLai Jiangshan [N_CPU] = _NODE_ATTR(has_cpu, N_CPU), 989bde631a5SLee Schermerhorn }; 990bde631a5SLee Schermerhorn 99110fbcf4cSKay Sievers static struct attribute *node_state_attrs[] = { 992fcf07d22SLai Jiangshan &node_state_attr[N_POSSIBLE].attr.attr, 993fcf07d22SLai Jiangshan &node_state_attr[N_ONLINE].attr.attr, 994fcf07d22SLai Jiangshan &node_state_attr[N_NORMAL_MEMORY].attr.attr, 9953701cde6SAndi Kleen #ifdef CONFIG_HIGHMEM 996fcf07d22SLai Jiangshan &node_state_attr[N_HIGH_MEMORY].attr.attr, 9973701cde6SAndi Kleen #endif 99820b2f52bSLai Jiangshan &node_state_attr[N_MEMORY].attr.attr, 999fcf07d22SLai Jiangshan &node_state_attr[N_CPU].attr.attr, 10003701cde6SAndi Kleen NULL 10013701cde6SAndi Kleen }; 1002bde631a5SLee Schermerhorn 100310fbcf4cSKay Sievers static struct attribute_group memory_root_attr_group = { 100410fbcf4cSKay Sievers .attrs = node_state_attrs, 100510fbcf4cSKay Sievers }; 100610fbcf4cSKay Sievers 100710fbcf4cSKay Sievers static const struct attribute_group *cpu_root_attr_groups[] = { 100810fbcf4cSKay Sievers &memory_root_attr_group, 100910fbcf4cSKay Sievers NULL, 101010fbcf4cSKay Sievers }; 101110fbcf4cSKay Sievers 10124faf8d95SLee Schermerhorn #define NODE_CALLBACK_PRI 2 /* lower than SLAB */ 10134b45099bSKeiichiro Tokunaga static int __init register_node_type(void) 10141da177e4SLinus Torvalds { 1015bde631a5SLee Schermerhorn int ret; 1016bde631a5SLee Schermerhorn 10173701cde6SAndi Kleen BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES); 10183701cde6SAndi Kleen BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES); 10193701cde6SAndi Kleen 102010fbcf4cSKay Sievers ret = subsys_system_register(&node_subsys, cpu_root_attr_groups); 10214faf8d95SLee Schermerhorn if (!ret) { 10226e259e7dSAndrew Morton static struct notifier_block node_memory_callback_nb = { 10236e259e7dSAndrew Morton .notifier_call = node_memory_callback, 10246e259e7dSAndrew Morton .priority = NODE_CALLBACK_PRI, 10256e259e7dSAndrew Morton }; 10266e259e7dSAndrew Morton register_hotmemory_notifier(&node_memory_callback_nb); 10274faf8d95SLee Schermerhorn } 1028bde631a5SLee Schermerhorn 1029bde631a5SLee Schermerhorn /* 1030bde631a5SLee Schermerhorn * Note: we're not going to unregister the node class if we fail 1031bde631a5SLee Schermerhorn * to register the node state class attribute files. 1032bde631a5SLee Schermerhorn */ 1033bde631a5SLee Schermerhorn return ret; 10341da177e4SLinus Torvalds } 10351da177e4SLinus Torvalds postcore_initcall(register_node_type); 1036