1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 21da177e4SLinus Torvalds /* 310fbcf4cSKay Sievers * Basic Node interface support 41da177e4SLinus Torvalds */ 51da177e4SLinus Torvalds 61da177e4SLinus Torvalds #include <linux/module.h> 71da177e4SLinus Torvalds #include <linux/init.h> 81da177e4SLinus Torvalds #include <linux/mm.h> 9c04fc586SGary Hade #include <linux/memory.h> 10fa25c503SKOSAKI Motohiro #include <linux/vmstat.h> 116e259e7dSAndrew Morton #include <linux/notifier.h> 121da177e4SLinus Torvalds #include <linux/node.h> 131da177e4SLinus Torvalds #include <linux/hugetlb.h> 14ed4a6d7fSMel Gorman #include <linux/compaction.h> 151da177e4SLinus Torvalds #include <linux/cpumask.h> 161da177e4SLinus Torvalds #include <linux/topology.h> 171da177e4SLinus Torvalds #include <linux/nodemask.h> 1876b67ed9SKAMEZAWA Hiroyuki #include <linux/cpu.h> 19bde631a5SLee Schermerhorn #include <linux/device.h> 20af936a16SLee Schermerhorn #include <linux/swap.h> 2118e5b539STejun Heo #include <linux/slab.h> 221da177e4SLinus Torvalds 2310fbcf4cSKay Sievers static struct bus_type node_subsys = { 24af5ca3f4SKay Sievers .name = "node", 2510fbcf4cSKay Sievers .dev_name = "node", 261da177e4SLinus Torvalds }; 271da177e4SLinus Torvalds 281da177e4SLinus Torvalds 295aaba363SSudeep Holla static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf) 301da177e4SLinus Torvalds { 31064f0e93SZhen Lei ssize_t n; 32064f0e93SZhen Lei cpumask_var_t mask; 331da177e4SLinus Torvalds struct node *node_dev = to_node(dev); 341da177e4SLinus Torvalds 3539106dcfSMike Travis /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */ 3639106dcfSMike Travis BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); 371da177e4SLinus Torvalds 38064f0e93SZhen Lei if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 39064f0e93SZhen Lei return 0; 40064f0e93SZhen Lei 41064f0e93SZhen Lei cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask); 42064f0e93SZhen Lei n = cpumap_print_to_pagebuf(list, buf, mask); 43064f0e93SZhen Lei free_cpumask_var(mask); 44064f0e93SZhen Lei 45064f0e93SZhen Lei return n; 461da177e4SLinus Torvalds } 471da177e4SLinus Torvalds 4810fbcf4cSKay Sievers static inline ssize_t node_read_cpumask(struct device *dev, 4910fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 5039106dcfSMike Travis { 515aaba363SSudeep Holla return node_read_cpumap(dev, false, buf); 5239106dcfSMike Travis } 5310fbcf4cSKay Sievers static inline ssize_t node_read_cpulist(struct device *dev, 5410fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 5539106dcfSMike Travis { 565aaba363SSudeep Holla return node_read_cpumap(dev, true, buf); 5739106dcfSMike Travis } 5839106dcfSMike Travis 5910fbcf4cSKay Sievers static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); 6010fbcf4cSKay Sievers static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL); 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds #define K(x) ((x) << (PAGE_SHIFT - 10)) 6310fbcf4cSKay Sievers static ssize_t node_read_meminfo(struct device *dev, 6410fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 651da177e4SLinus Torvalds { 661da177e4SLinus Torvalds int n; 671da177e4SLinus Torvalds int nid = dev->id; 68599d0c95SMel Gorman struct pglist_data *pgdat = NODE_DATA(nid); 691da177e4SLinus Torvalds struct sysinfo i; 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds si_meminfo_node(&i, nid); 727ee92255SKOSAKI Motohiro n = sprintf(buf, 731da177e4SLinus Torvalds "Node %d MemTotal: %8lu kB\n" 741da177e4SLinus Torvalds "Node %d MemFree: %8lu kB\n" 751da177e4SLinus Torvalds "Node %d MemUsed: %8lu kB\n" 761da177e4SLinus Torvalds "Node %d Active: %8lu kB\n" 771da177e4SLinus Torvalds "Node %d Inactive: %8lu kB\n" 784f98a2feSRik van Riel "Node %d Active(anon): %8lu kB\n" 794f98a2feSRik van Riel "Node %d Inactive(anon): %8lu kB\n" 804f98a2feSRik van Riel "Node %d Active(file): %8lu kB\n" 814f98a2feSRik van Riel "Node %d Inactive(file): %8lu kB\n" 825344b7e6SNick Piggin "Node %d Unevictable: %8lu kB\n" 837ee92255SKOSAKI Motohiro "Node %d Mlocked: %8lu kB\n", 847ee92255SKOSAKI Motohiro nid, K(i.totalram), 857ee92255SKOSAKI Motohiro nid, K(i.freeram), 867ee92255SKOSAKI Motohiro nid, K(i.totalram - i.freeram), 87599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) + 88599d0c95SMel Gorman node_page_state(pgdat, NR_ACTIVE_FILE)), 89599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) + 90599d0c95SMel Gorman node_page_state(pgdat, NR_INACTIVE_FILE)), 91599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)), 92599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)), 93599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)), 94599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)), 95599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_UNEVICTABLE)), 9675ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_MLOCK))); 977ee92255SKOSAKI Motohiro 98182e8e23SChristoph Lameter #ifdef CONFIG_HIGHMEM 997ee92255SKOSAKI Motohiro n += sprintf(buf + n, 1001da177e4SLinus Torvalds "Node %d HighTotal: %8lu kB\n" 1011da177e4SLinus Torvalds "Node %d HighFree: %8lu kB\n" 1021da177e4SLinus Torvalds "Node %d LowTotal: %8lu kB\n" 1037ee92255SKOSAKI Motohiro "Node %d LowFree: %8lu kB\n", 1047ee92255SKOSAKI Motohiro nid, K(i.totalhigh), 1057ee92255SKOSAKI Motohiro nid, K(i.freehigh), 1067ee92255SKOSAKI Motohiro nid, K(i.totalram - i.totalhigh), 1077ee92255SKOSAKI Motohiro nid, K(i.freeram - i.freehigh)); 108182e8e23SChristoph Lameter #endif 1097ee92255SKOSAKI Motohiro n += sprintf(buf + n, 110c07e02dbSMartin Hicks "Node %d Dirty: %8lu kB\n" 111c07e02dbSMartin Hicks "Node %d Writeback: %8lu kB\n" 112347ce434SChristoph Lameter "Node %d FilePages: %8lu kB\n" 113c07e02dbSMartin Hicks "Node %d Mapped: %8lu kB\n" 114f3dbd344SChristoph Lameter "Node %d AnonPages: %8lu kB\n" 1154b02108aSKOSAKI Motohiro "Node %d Shmem: %8lu kB\n" 116c6a7f572SKOSAKI Motohiro "Node %d KernelStack: %8lu kB\n" 117df849a15SChristoph Lameter "Node %d PageTables: %8lu kB\n" 118f5ef68daSAndrew Morton "Node %d NFS_Unstable: %8lu kB\n" 119d2c5e30cSChristoph Lameter "Node %d Bounce: %8lu kB\n" 120fc3ba692SMiklos Szeredi "Node %d WritebackTmp: %8lu kB\n" 121972d1a7bSChristoph Lameter "Node %d Slab: %8lu kB\n" 122972d1a7bSChristoph Lameter "Node %d SReclaimable: %8lu kB\n" 12305b258e9SDavid Rientjes "Node %d SUnreclaim: %8lu kB\n" 12405b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE 12505b258e9SDavid Rientjes "Node %d AnonHugePages: %8lu kB\n" 12665c45377SKirill A. Shutemov "Node %d ShmemHugePages: %8lu kB\n" 12765c45377SKirill A. Shutemov "Node %d ShmemPmdMapped: %8lu kB\n" 12805b258e9SDavid Rientjes #endif 12905b258e9SDavid Rientjes , 13011fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_FILE_DIRTY)), 13111fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_WRITEBACK)), 13211fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_FILE_PAGES)), 13350658e2eSMel Gorman nid, K(node_page_state(pgdat, NR_FILE_MAPPED)), 1344b9d0fabSMel Gorman nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), 135cc7452b6SRafael Aquini nid, K(i.sharedram), 136d30dd8beSAndy Lutomirski nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), 13775ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), 13811fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), 13975ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), 14011fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), 141385386cfSJohannes Weiner nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE) + 142385386cfSJohannes Weiner node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)), 143385386cfSJohannes Weiner nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE)), 14405b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE 145385386cfSJohannes Weiner nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)), 14611fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_ANON_THPS) * 14765c45377SKirill A. Shutemov HPAGE_PMD_NR), 14811fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_SHMEM_THPS) * 14965c45377SKirill A. Shutemov HPAGE_PMD_NR), 15011fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * 15191a13c28SClaudio Scordino HPAGE_PMD_NR)); 15291a13c28SClaudio Scordino #else 153385386cfSJohannes Weiner nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE))); 15405b258e9SDavid Rientjes #endif 1551da177e4SLinus Torvalds n += hugetlb_report_node_meminfo(nid, buf + n); 1561da177e4SLinus Torvalds return n; 1571da177e4SLinus Torvalds } 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds #undef K 16010fbcf4cSKay Sievers static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); 1611da177e4SLinus Torvalds 16210fbcf4cSKay Sievers static ssize_t node_read_numastat(struct device *dev, 16310fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 1641da177e4SLinus Torvalds { 1651da177e4SLinus Torvalds return sprintf(buf, 1661da177e4SLinus Torvalds "numa_hit %lu\n" 1671da177e4SLinus Torvalds "numa_miss %lu\n" 1681da177e4SLinus Torvalds "numa_foreign %lu\n" 1691da177e4SLinus Torvalds "interleave_hit %lu\n" 1701da177e4SLinus Torvalds "local_node %lu\n" 1711da177e4SLinus Torvalds "other_node %lu\n", 1723a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_HIT), 1733a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_MISS), 1743a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_FOREIGN), 1753a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT), 1763a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_LOCAL), 1773a321d2aSKemi Wang sum_zone_numa_state(dev->id, NUMA_OTHER)); 1781da177e4SLinus Torvalds } 17910fbcf4cSKay Sievers static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); 1801da177e4SLinus Torvalds 18110fbcf4cSKay Sievers static ssize_t node_read_vmstat(struct device *dev, 18210fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 1832ac39037SMichael Rubin { 1842ac39037SMichael Rubin int nid = dev->id; 18575ef7184SMel Gorman struct pglist_data *pgdat = NODE_DATA(nid); 186fa25c503SKOSAKI Motohiro int i; 187fa25c503SKOSAKI Motohiro int n = 0; 188fa25c503SKOSAKI Motohiro 189fa25c503SKOSAKI Motohiro for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 190fa25c503SKOSAKI Motohiro n += sprintf(buf+n, "%s %lu\n", vmstat_text[i], 19175ef7184SMel Gorman sum_zone_node_page_state(nid, i)); 19275ef7184SMel Gorman 1933a321d2aSKemi Wang #ifdef CONFIG_NUMA 1943a321d2aSKemi Wang for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) 19575ef7184SMel Gorman n += sprintf(buf+n, "%s %lu\n", 19675ef7184SMel Gorman vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], 1973a321d2aSKemi Wang sum_zone_numa_state(nid, i)); 1983a321d2aSKemi Wang #endif 1993a321d2aSKemi Wang 2003a321d2aSKemi Wang for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 2013a321d2aSKemi Wang n += sprintf(buf+n, "%s %lu\n", 2023a321d2aSKemi Wang vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + 2033a321d2aSKemi Wang NR_VM_NUMA_STAT_ITEMS], 20475ef7184SMel Gorman node_page_state(pgdat, i)); 205fa25c503SKOSAKI Motohiro 206fa25c503SKOSAKI Motohiro return n; 2072ac39037SMichael Rubin } 20810fbcf4cSKay Sievers static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL); 2092ac39037SMichael Rubin 21010fbcf4cSKay Sievers static ssize_t node_read_distance(struct device *dev, 21110fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 2121da177e4SLinus Torvalds { 2131da177e4SLinus Torvalds int nid = dev->id; 2141da177e4SLinus Torvalds int len = 0; 2151da177e4SLinus Torvalds int i; 2161da177e4SLinus Torvalds 21712ee3c0aSDavid Rientjes /* 21812ee3c0aSDavid Rientjes * buf is currently PAGE_SIZE in length and each node needs 4 chars 21912ee3c0aSDavid Rientjes * at the most (distance + space or newline). 22012ee3c0aSDavid Rientjes */ 22112ee3c0aSDavid Rientjes BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE); 2221da177e4SLinus Torvalds 2231da177e4SLinus Torvalds for_each_online_node(i) 2241da177e4SLinus Torvalds len += sprintf(buf + len, "%s%d", i ? " " : "", node_distance(nid, i)); 2251da177e4SLinus Torvalds 2261da177e4SLinus Torvalds len += sprintf(buf + len, "\n"); 2271da177e4SLinus Torvalds return len; 2281da177e4SLinus Torvalds } 22910fbcf4cSKay Sievers static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL); 2301da177e4SLinus Torvalds 2313c9b8aafSTakashi Iwai static struct attribute *node_dev_attrs[] = { 2323c9b8aafSTakashi Iwai &dev_attr_cpumap.attr, 2333c9b8aafSTakashi Iwai &dev_attr_cpulist.attr, 2343c9b8aafSTakashi Iwai &dev_attr_meminfo.attr, 2353c9b8aafSTakashi Iwai &dev_attr_numastat.attr, 2363c9b8aafSTakashi Iwai &dev_attr_distance.attr, 2373c9b8aafSTakashi Iwai &dev_attr_vmstat.attr, 2383c9b8aafSTakashi Iwai NULL 2393c9b8aafSTakashi Iwai }; 2407ca7ec40SGreg Kroah-Hartman ATTRIBUTE_GROUPS(node_dev); 2413c9b8aafSTakashi Iwai 2429a305230SLee Schermerhorn #ifdef CONFIG_HUGETLBFS 2439a305230SLee Schermerhorn /* 2449a305230SLee Schermerhorn * hugetlbfs per node attributes registration interface: 2459a305230SLee Schermerhorn * When/if hugetlb[fs] subsystem initializes [sometime after this module], 2464faf8d95SLee Schermerhorn * it will register its per node attributes for all online nodes with 2474faf8d95SLee Schermerhorn * memory. It will also call register_hugetlbfs_with_node(), below, to 2489a305230SLee Schermerhorn * register its attribute registration functions with this node driver. 2499a305230SLee Schermerhorn * Once these hooks have been initialized, the node driver will call into 2509a305230SLee Schermerhorn * the hugetlb module to [un]register attributes for hot-plugged nodes. 2519a305230SLee Schermerhorn */ 2529a305230SLee Schermerhorn static node_registration_func_t __hugetlb_register_node; 2539a305230SLee Schermerhorn static node_registration_func_t __hugetlb_unregister_node; 2549a305230SLee Schermerhorn 25539da08cbSLee Schermerhorn static inline bool hugetlb_register_node(struct node *node) 2569a305230SLee Schermerhorn { 2574faf8d95SLee Schermerhorn if (__hugetlb_register_node && 2588cebfcd0SLai Jiangshan node_state(node->dev.id, N_MEMORY)) { 2599a305230SLee Schermerhorn __hugetlb_register_node(node); 26039da08cbSLee Schermerhorn return true; 26139da08cbSLee Schermerhorn } 26239da08cbSLee Schermerhorn return false; 2639a305230SLee Schermerhorn } 2649a305230SLee Schermerhorn 2659a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node) 2669a305230SLee Schermerhorn { 2679a305230SLee Schermerhorn if (__hugetlb_unregister_node) 2689a305230SLee Schermerhorn __hugetlb_unregister_node(node); 2699a305230SLee Schermerhorn } 2709a305230SLee Schermerhorn 2719a305230SLee Schermerhorn void register_hugetlbfs_with_node(node_registration_func_t doregister, 2729a305230SLee Schermerhorn node_registration_func_t unregister) 2739a305230SLee Schermerhorn { 2749a305230SLee Schermerhorn __hugetlb_register_node = doregister; 2759a305230SLee Schermerhorn __hugetlb_unregister_node = unregister; 2769a305230SLee Schermerhorn } 2779a305230SLee Schermerhorn #else 2789a305230SLee Schermerhorn static inline void hugetlb_register_node(struct node *node) {} 2799a305230SLee Schermerhorn 2809a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node) {} 2819a305230SLee Schermerhorn #endif 2829a305230SLee Schermerhorn 2838c7b5b4eSYasuaki Ishimatsu static void node_device_release(struct device *dev) 2848c7b5b4eSYasuaki Ishimatsu { 2858c7b5b4eSYasuaki Ishimatsu struct node *node = to_node(dev); 2868c7b5b4eSYasuaki Ishimatsu 2878c7b5b4eSYasuaki Ishimatsu #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) 2888c7b5b4eSYasuaki Ishimatsu /* 2898c7b5b4eSYasuaki Ishimatsu * We schedule the work only when a memory section is 2908c7b5b4eSYasuaki Ishimatsu * onlined/offlined on this node. When we come here, 2918c7b5b4eSYasuaki Ishimatsu * all the memory on this node has been offlined, 2928c7b5b4eSYasuaki Ishimatsu * so we won't enqueue new work to this work. 2938c7b5b4eSYasuaki Ishimatsu * 2948c7b5b4eSYasuaki Ishimatsu * The work is using node->node_work, so we should 2958c7b5b4eSYasuaki Ishimatsu * flush work before freeing the memory. 2968c7b5b4eSYasuaki Ishimatsu */ 2978c7b5b4eSYasuaki Ishimatsu flush_work(&node->node_work); 2988c7b5b4eSYasuaki Ishimatsu #endif 2998c7b5b4eSYasuaki Ishimatsu kfree(node); 3008c7b5b4eSYasuaki Ishimatsu } 3011da177e4SLinus Torvalds 3021da177e4SLinus Torvalds /* 303405ae7d3SRobert P. J. Day * register_node - Setup a sysfs device for a node. 3041da177e4SLinus Torvalds * @num - Node number to use when creating the device. 3051da177e4SLinus Torvalds * 3061da177e4SLinus Torvalds * Initialize and register the node device. 3071da177e4SLinus Torvalds */ 308a7be6e5aSDou Liyang static int register_node(struct node *node, int num) 3091da177e4SLinus Torvalds { 3101da177e4SLinus Torvalds int error; 3111da177e4SLinus Torvalds 31210fbcf4cSKay Sievers node->dev.id = num; 31310fbcf4cSKay Sievers node->dev.bus = &node_subsys; 3148c7b5b4eSYasuaki Ishimatsu node->dev.release = node_device_release; 3157ca7ec40SGreg Kroah-Hartman node->dev.groups = node_dev_groups; 31610fbcf4cSKay Sievers error = device_register(&node->dev); 3171da177e4SLinus Torvalds 318c1cc0d51SArvind Yadav if (error) 319c1cc0d51SArvind Yadav put_device(&node->dev); 320c1cc0d51SArvind Yadav else { 3219a305230SLee Schermerhorn hugetlb_register_node(node); 322ed4a6d7fSMel Gorman 323ed4a6d7fSMel Gorman compaction_register_node(node); 3241da177e4SLinus Torvalds } 3251da177e4SLinus Torvalds return error; 3261da177e4SLinus Torvalds } 3271da177e4SLinus Torvalds 3284b45099bSKeiichiro Tokunaga /** 3294b45099bSKeiichiro Tokunaga * unregister_node - unregister a node device 3304b45099bSKeiichiro Tokunaga * @node: node going away 3314b45099bSKeiichiro Tokunaga * 3324b45099bSKeiichiro Tokunaga * Unregisters a node device @node. All the devices on the node must be 3334b45099bSKeiichiro Tokunaga * unregistered before calling this function. 3344b45099bSKeiichiro Tokunaga */ 3354b45099bSKeiichiro Tokunaga void unregister_node(struct node *node) 3364b45099bSKeiichiro Tokunaga { 3374faf8d95SLee Schermerhorn hugetlb_unregister_node(node); /* no-op, if memoryless node */ 338af936a16SLee Schermerhorn 33910fbcf4cSKay Sievers device_unregister(&node->dev); 3404b45099bSKeiichiro Tokunaga } 3414b45099bSKeiichiro Tokunaga 3428732794bSWen Congyang struct node *node_devices[MAX_NUMNODES]; 3430fc44159SYasunori Goto 34476b67ed9SKAMEZAWA Hiroyuki /* 34576b67ed9SKAMEZAWA Hiroyuki * register cpu under node 34676b67ed9SKAMEZAWA Hiroyuki */ 34776b67ed9SKAMEZAWA Hiroyuki int register_cpu_under_node(unsigned int cpu, unsigned int nid) 34876b67ed9SKAMEZAWA Hiroyuki { 3491830794aSAlex Chiang int ret; 3508a25a2fdSKay Sievers struct device *obj; 351f8246f31SAlex Chiang 352f8246f31SAlex Chiang if (!node_online(nid)) 353f8246f31SAlex Chiang return 0; 354f8246f31SAlex Chiang 3558a25a2fdSKay Sievers obj = get_cpu_device(cpu); 35676b67ed9SKAMEZAWA Hiroyuki if (!obj) 35776b67ed9SKAMEZAWA Hiroyuki return 0; 358f8246f31SAlex Chiang 3598732794bSWen Congyang ret = sysfs_create_link(&node_devices[nid]->dev.kobj, 36076b67ed9SKAMEZAWA Hiroyuki &obj->kobj, 36176b67ed9SKAMEZAWA Hiroyuki kobject_name(&obj->kobj)); 3621830794aSAlex Chiang if (ret) 3631830794aSAlex Chiang return ret; 3641830794aSAlex Chiang 3651830794aSAlex Chiang return sysfs_create_link(&obj->kobj, 3668732794bSWen Congyang &node_devices[nid]->dev.kobj, 3678732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 36876b67ed9SKAMEZAWA Hiroyuki } 36976b67ed9SKAMEZAWA Hiroyuki 37076b67ed9SKAMEZAWA Hiroyuki int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) 37176b67ed9SKAMEZAWA Hiroyuki { 3728a25a2fdSKay Sievers struct device *obj; 373b9d52dadSAlex Chiang 374b9d52dadSAlex Chiang if (!node_online(nid)) 375b9d52dadSAlex Chiang return 0; 376b9d52dadSAlex Chiang 3778a25a2fdSKay Sievers obj = get_cpu_device(cpu); 378b9d52dadSAlex Chiang if (!obj) 379b9d52dadSAlex Chiang return 0; 380b9d52dadSAlex Chiang 3818732794bSWen Congyang sysfs_remove_link(&node_devices[nid]->dev.kobj, 38276b67ed9SKAMEZAWA Hiroyuki kobject_name(&obj->kobj)); 3831830794aSAlex Chiang sysfs_remove_link(&obj->kobj, 3848732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 385b9d52dadSAlex Chiang 38676b67ed9SKAMEZAWA Hiroyuki return 0; 38776b67ed9SKAMEZAWA Hiroyuki } 38876b67ed9SKAMEZAWA Hiroyuki 389c04fc586SGary Hade #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 390bd721ea7SFabian Frederick static int __ref get_nid_for_pfn(unsigned long pfn) 391c04fc586SGary Hade { 392c04fc586SGary Hade if (!pfn_valid_within(pfn)) 393c04fc586SGary Hade return -1; 3943a80a7faSMel Gorman #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 3958cdde385SThomas Gleixner if (system_state < SYSTEM_RUNNING) 3963a80a7faSMel Gorman return early_pfn_to_nid(pfn); 3973a80a7faSMel Gorman #endif 398c04fc586SGary Hade return pfn_to_nid(pfn); 399c04fc586SGary Hade } 400c04fc586SGary Hade 401c04fc586SGary Hade /* register memory section under specified node if it spans that node */ 4024fbce633SOscar Salvador int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) 403c04fc586SGary Hade { 4044fbce633SOscar Salvador int ret, nid = *(int *)arg; 405c04fc586SGary Hade unsigned long pfn, sect_start_pfn, sect_end_pfn; 406c04fc586SGary Hade 407c04fc586SGary Hade if (!mem_blk) 408c04fc586SGary Hade return -EFAULT; 409d0dc12e8SPavel Tatashin 410d0dc12e8SPavel Tatashin mem_blk->nid = nid; 411c04fc586SGary Hade if (!node_online(nid)) 412c04fc586SGary Hade return 0; 413d3360164SNathan Fontenot 414d3360164SNathan Fontenot sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); 415d3360164SNathan Fontenot sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); 416d3360164SNathan Fontenot sect_end_pfn += PAGES_PER_SECTION - 1; 417c04fc586SGary Hade for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { 418c04fc586SGary Hade int page_nid; 419c04fc586SGary Hade 42004697858SYinghai Lu /* 42104697858SYinghai Lu * memory block could have several absent sections from start. 42204697858SYinghai Lu * skip pfn range from absent section 42304697858SYinghai Lu */ 42404697858SYinghai Lu if (!pfn_present(pfn)) { 42504697858SYinghai Lu pfn = round_down(pfn + PAGES_PER_SECTION, 42604697858SYinghai Lu PAGES_PER_SECTION) - 1; 42704697858SYinghai Lu continue; 42804697858SYinghai Lu } 42904697858SYinghai Lu 430fc44f7f9SPavel Tatashin /* 431fc44f7f9SPavel Tatashin * We need to check if page belongs to nid only for the boot 432fc44f7f9SPavel Tatashin * case, during hotplug we know that all pages in the memory 433fc44f7f9SPavel Tatashin * block belong to the same node. 434fc44f7f9SPavel Tatashin */ 4354fbce633SOscar Salvador if (system_state == SYSTEM_BOOTING) { 436c04fc586SGary Hade page_nid = get_nid_for_pfn(pfn); 437c04fc586SGary Hade if (page_nid < 0) 438c04fc586SGary Hade continue; 439c04fc586SGary Hade if (page_nid != nid) 440c04fc586SGary Hade continue; 441fc44f7f9SPavel Tatashin } 4428732794bSWen Congyang ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, 44310fbcf4cSKay Sievers &mem_blk->dev.kobj, 44410fbcf4cSKay Sievers kobject_name(&mem_blk->dev.kobj)); 445dee5d0d5SAlex Chiang if (ret) 446dee5d0d5SAlex Chiang return ret; 447dee5d0d5SAlex Chiang 44810fbcf4cSKay Sievers return sysfs_create_link_nowarn(&mem_blk->dev.kobj, 4498732794bSWen Congyang &node_devices[nid]->dev.kobj, 4508732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 451c04fc586SGary Hade } 452c04fc586SGary Hade /* mem section does not span the specified node */ 453c04fc586SGary Hade return 0; 454c04fc586SGary Hade } 455c04fc586SGary Hade 456c04fc586SGary Hade /* unregister memory section under all nodes that it spans */ 457d3360164SNathan Fontenot int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, 458d3360164SNathan Fontenot unsigned long phys_index) 459c04fc586SGary Hade { 4609ae49fabSDavid Rientjes NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); 461c04fc586SGary Hade unsigned long pfn, sect_start_pfn, sect_end_pfn; 462c04fc586SGary Hade 4639ae49fabSDavid Rientjes if (!mem_blk) { 4649ae49fabSDavid Rientjes NODEMASK_FREE(unlinked_nodes); 465c04fc586SGary Hade return -EFAULT; 4669ae49fabSDavid Rientjes } 4679ae49fabSDavid Rientjes if (!unlinked_nodes) 4689ae49fabSDavid Rientjes return -ENOMEM; 4699ae49fabSDavid Rientjes nodes_clear(*unlinked_nodes); 470d3360164SNathan Fontenot 471d3360164SNathan Fontenot sect_start_pfn = section_nr_to_pfn(phys_index); 472c04fc586SGary Hade sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; 473c04fc586SGary Hade for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { 47447504980SRoel Kluin int nid; 475c04fc586SGary Hade 476c04fc586SGary Hade nid = get_nid_for_pfn(pfn); 477c04fc586SGary Hade if (nid < 0) 478c04fc586SGary Hade continue; 479c04fc586SGary Hade if (!node_online(nid)) 480c04fc586SGary Hade continue; 4819ae49fabSDavid Rientjes if (node_test_and_set(nid, *unlinked_nodes)) 482c04fc586SGary Hade continue; 4838732794bSWen Congyang sysfs_remove_link(&node_devices[nid]->dev.kobj, 48410fbcf4cSKay Sievers kobject_name(&mem_blk->dev.kobj)); 48510fbcf4cSKay Sievers sysfs_remove_link(&mem_blk->dev.kobj, 4868732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 487c04fc586SGary Hade } 4889ae49fabSDavid Rientjes NODEMASK_FREE(unlinked_nodes); 489c04fc586SGary Hade return 0; 490c04fc586SGary Hade } 491c04fc586SGary Hade 4924fbce633SOscar Salvador int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) 493c04fc586SGary Hade { 4944fbce633SOscar Salvador return walk_memory_range(start_pfn, end_pfn, (void *)&nid, 4954fbce633SOscar Salvador register_mem_sect_under_node); 496c04fc586SGary Hade } 4974faf8d95SLee Schermerhorn 49839da08cbSLee Schermerhorn #ifdef CONFIG_HUGETLBFS 4994faf8d95SLee Schermerhorn /* 5004faf8d95SLee Schermerhorn * Handle per node hstate attribute [un]registration on transistions 5014faf8d95SLee Schermerhorn * to/from memoryless state. 5024faf8d95SLee Schermerhorn */ 50339da08cbSLee Schermerhorn static void node_hugetlb_work(struct work_struct *work) 50439da08cbSLee Schermerhorn { 50539da08cbSLee Schermerhorn struct node *node = container_of(work, struct node, node_work); 50639da08cbSLee Schermerhorn 50739da08cbSLee Schermerhorn /* 50839da08cbSLee Schermerhorn * We only get here when a node transitions to/from memoryless state. 50939da08cbSLee Schermerhorn * We can detect which transition occurred by examining whether the 51039da08cbSLee Schermerhorn * node has memory now. hugetlb_register_node() already check this 51139da08cbSLee Schermerhorn * so we try to register the attributes. If that fails, then the 51239da08cbSLee Schermerhorn * node has transitioned to memoryless, try to unregister the 51339da08cbSLee Schermerhorn * attributes. 51439da08cbSLee Schermerhorn */ 51539da08cbSLee Schermerhorn if (!hugetlb_register_node(node)) 51639da08cbSLee Schermerhorn hugetlb_unregister_node(node); 51739da08cbSLee Schermerhorn } 51839da08cbSLee Schermerhorn 51939da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid) 52039da08cbSLee Schermerhorn { 5218732794bSWen Congyang INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work); 52239da08cbSLee Schermerhorn } 5234faf8d95SLee Schermerhorn 5244faf8d95SLee Schermerhorn static int node_memory_callback(struct notifier_block *self, 5254faf8d95SLee Schermerhorn unsigned long action, void *arg) 5264faf8d95SLee Schermerhorn { 5274faf8d95SLee Schermerhorn struct memory_notify *mnb = arg; 5284faf8d95SLee Schermerhorn int nid = mnb->status_change_nid; 5294faf8d95SLee Schermerhorn 5304faf8d95SLee Schermerhorn switch (action) { 53139da08cbSLee Schermerhorn case MEM_ONLINE: 53239da08cbSLee Schermerhorn case MEM_OFFLINE: 53339da08cbSLee Schermerhorn /* 53439da08cbSLee Schermerhorn * offload per node hstate [un]registration to a work thread 53539da08cbSLee Schermerhorn * when transitioning to/from memoryless state. 53639da08cbSLee Schermerhorn */ 5374faf8d95SLee Schermerhorn if (nid != NUMA_NO_NODE) 5388732794bSWen Congyang schedule_work(&node_devices[nid]->node_work); 5394faf8d95SLee Schermerhorn break; 54039da08cbSLee Schermerhorn 5414faf8d95SLee Schermerhorn case MEM_GOING_ONLINE: 5424faf8d95SLee Schermerhorn case MEM_GOING_OFFLINE: 5434faf8d95SLee Schermerhorn case MEM_CANCEL_ONLINE: 5444faf8d95SLee Schermerhorn case MEM_CANCEL_OFFLINE: 5454faf8d95SLee Schermerhorn default: 5464faf8d95SLee Schermerhorn break; 5474faf8d95SLee Schermerhorn } 5484faf8d95SLee Schermerhorn 5494faf8d95SLee Schermerhorn return NOTIFY_OK; 5504faf8d95SLee Schermerhorn } 55139da08cbSLee Schermerhorn #endif /* CONFIG_HUGETLBFS */ 55239da08cbSLee Schermerhorn #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 55339da08cbSLee Schermerhorn 55439da08cbSLee Schermerhorn #if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \ 55539da08cbSLee Schermerhorn !defined(CONFIG_HUGETLBFS) 5564faf8d95SLee Schermerhorn static inline int node_memory_callback(struct notifier_block *self, 5574faf8d95SLee Schermerhorn unsigned long action, void *arg) 5584faf8d95SLee Schermerhorn { 5594faf8d95SLee Schermerhorn return NOTIFY_OK; 5604faf8d95SLee Schermerhorn } 56139da08cbSLee Schermerhorn 56239da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid) { } 56339da08cbSLee Schermerhorn 56439da08cbSLee Schermerhorn #endif 565c04fc586SGary Hade 5669037a993SMichal Hocko int __register_one_node(int nid) 5670fc44159SYasunori Goto { 5689037a993SMichal Hocko int error; 5699037a993SMichal Hocko int cpu; 5700fc44159SYasunori Goto 5718732794bSWen Congyang node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL); 5728732794bSWen Congyang if (!node_devices[nid]) 5738732794bSWen Congyang return -ENOMEM; 5748732794bSWen Congyang 575a7be6e5aSDou Liyang error = register_node(node_devices[nid], nid); 57676b67ed9SKAMEZAWA Hiroyuki 57776b67ed9SKAMEZAWA Hiroyuki /* link cpu under this node */ 57876b67ed9SKAMEZAWA Hiroyuki for_each_present_cpu(cpu) { 57976b67ed9SKAMEZAWA Hiroyuki if (cpu_to_node(cpu) == nid) 58076b67ed9SKAMEZAWA Hiroyuki register_cpu_under_node(cpu, nid); 58176b67ed9SKAMEZAWA Hiroyuki } 582c04fc586SGary Hade 58339da08cbSLee Schermerhorn /* initialize work queue for memory hot plug */ 58439da08cbSLee Schermerhorn init_node_hugetlb_work(nid); 5850fc44159SYasunori Goto 5860fc44159SYasunori Goto return error; 5870fc44159SYasunori Goto } 5880fc44159SYasunori Goto 5890fc44159SYasunori Goto void unregister_one_node(int nid) 5900fc44159SYasunori Goto { 59192d585efSXishi Qiu if (!node_devices[nid]) 59292d585efSXishi Qiu return; 59392d585efSXishi Qiu 5948732794bSWen Congyang unregister_node(node_devices[nid]); 5958732794bSWen Congyang node_devices[nid] = NULL; 5960fc44159SYasunori Goto } 5970fc44159SYasunori Goto 598bde631a5SLee Schermerhorn /* 599bde631a5SLee Schermerhorn * node states attributes 600bde631a5SLee Schermerhorn */ 601bde631a5SLee Schermerhorn 602bde631a5SLee Schermerhorn static ssize_t print_nodes_state(enum node_states state, char *buf) 603bde631a5SLee Schermerhorn { 604bde631a5SLee Schermerhorn int n; 605bde631a5SLee Schermerhorn 606f799b1a7STejun Heo n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", 607f799b1a7STejun Heo nodemask_pr_args(&node_states[state])); 608f6238818SRyota Ozaki buf[n++] = '\n'; 609f6238818SRyota Ozaki buf[n] = '\0'; 610bde631a5SLee Schermerhorn return n; 611bde631a5SLee Schermerhorn } 612bde631a5SLee Schermerhorn 613b15f562fSAndi Kleen struct node_attr { 61410fbcf4cSKay Sievers struct device_attribute attr; 615b15f562fSAndi Kleen enum node_states state; 616b15f562fSAndi Kleen }; 617b15f562fSAndi Kleen 61810fbcf4cSKay Sievers static ssize_t show_node_state(struct device *dev, 61910fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 620bde631a5SLee Schermerhorn { 621b15f562fSAndi Kleen struct node_attr *na = container_of(attr, struct node_attr, attr); 622b15f562fSAndi Kleen return print_nodes_state(na->state, buf); 623bde631a5SLee Schermerhorn } 624bde631a5SLee Schermerhorn 625b15f562fSAndi Kleen #define _NODE_ATTR(name, state) \ 62610fbcf4cSKay Sievers { __ATTR(name, 0444, show_node_state, NULL), state } 627bde631a5SLee Schermerhorn 628b15f562fSAndi Kleen static struct node_attr node_state_attr[] = { 629fcf07d22SLai Jiangshan [N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE), 630fcf07d22SLai Jiangshan [N_ONLINE] = _NODE_ATTR(online, N_ONLINE), 631fcf07d22SLai Jiangshan [N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY), 632bde631a5SLee Schermerhorn #ifdef CONFIG_HIGHMEM 633fcf07d22SLai Jiangshan [N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY), 634bde631a5SLee Schermerhorn #endif 63520b2f52bSLai Jiangshan [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY), 636fcf07d22SLai Jiangshan [N_CPU] = _NODE_ATTR(has_cpu, N_CPU), 637bde631a5SLee Schermerhorn }; 638bde631a5SLee Schermerhorn 63910fbcf4cSKay Sievers static struct attribute *node_state_attrs[] = { 640fcf07d22SLai Jiangshan &node_state_attr[N_POSSIBLE].attr.attr, 641fcf07d22SLai Jiangshan &node_state_attr[N_ONLINE].attr.attr, 642fcf07d22SLai Jiangshan &node_state_attr[N_NORMAL_MEMORY].attr.attr, 6433701cde6SAndi Kleen #ifdef CONFIG_HIGHMEM 644fcf07d22SLai Jiangshan &node_state_attr[N_HIGH_MEMORY].attr.attr, 6453701cde6SAndi Kleen #endif 64620b2f52bSLai Jiangshan &node_state_attr[N_MEMORY].attr.attr, 647fcf07d22SLai Jiangshan &node_state_attr[N_CPU].attr.attr, 6483701cde6SAndi Kleen NULL 6493701cde6SAndi Kleen }; 650bde631a5SLee Schermerhorn 65110fbcf4cSKay Sievers static struct attribute_group memory_root_attr_group = { 65210fbcf4cSKay Sievers .attrs = node_state_attrs, 65310fbcf4cSKay Sievers }; 65410fbcf4cSKay Sievers 65510fbcf4cSKay Sievers static const struct attribute_group *cpu_root_attr_groups[] = { 65610fbcf4cSKay Sievers &memory_root_attr_group, 65710fbcf4cSKay Sievers NULL, 65810fbcf4cSKay Sievers }; 65910fbcf4cSKay Sievers 6604faf8d95SLee Schermerhorn #define NODE_CALLBACK_PRI 2 /* lower than SLAB */ 6614b45099bSKeiichiro Tokunaga static int __init register_node_type(void) 6621da177e4SLinus Torvalds { 663bde631a5SLee Schermerhorn int ret; 664bde631a5SLee Schermerhorn 6653701cde6SAndi Kleen BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES); 6663701cde6SAndi Kleen BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES); 6673701cde6SAndi Kleen 66810fbcf4cSKay Sievers ret = subsys_system_register(&node_subsys, cpu_root_attr_groups); 6694faf8d95SLee Schermerhorn if (!ret) { 6706e259e7dSAndrew Morton static struct notifier_block node_memory_callback_nb = { 6716e259e7dSAndrew Morton .notifier_call = node_memory_callback, 6726e259e7dSAndrew Morton .priority = NODE_CALLBACK_PRI, 6736e259e7dSAndrew Morton }; 6746e259e7dSAndrew Morton register_hotmemory_notifier(&node_memory_callback_nb); 6754faf8d95SLee Schermerhorn } 676bde631a5SLee Schermerhorn 677bde631a5SLee Schermerhorn /* 678bde631a5SLee Schermerhorn * Note: we're not going to unregister the node class if we fail 679bde631a5SLee Schermerhorn * to register the node state class attribute files. 680bde631a5SLee Schermerhorn */ 681bde631a5SLee Schermerhorn return ret; 6821da177e4SLinus Torvalds } 6831da177e4SLinus Torvalds postcore_initcall(register_node_type); 684