11da177e4SLinus Torvalds /* 210fbcf4cSKay Sievers * Basic Node interface support 31da177e4SLinus Torvalds */ 41da177e4SLinus Torvalds 51da177e4SLinus Torvalds #include <linux/module.h> 61da177e4SLinus Torvalds #include <linux/init.h> 71da177e4SLinus Torvalds #include <linux/mm.h> 8c04fc586SGary Hade #include <linux/memory.h> 9fa25c503SKOSAKI Motohiro #include <linux/vmstat.h> 106e259e7dSAndrew Morton #include <linux/notifier.h> 111da177e4SLinus Torvalds #include <linux/node.h> 121da177e4SLinus Torvalds #include <linux/hugetlb.h> 13ed4a6d7fSMel Gorman #include <linux/compaction.h> 141da177e4SLinus Torvalds #include <linux/cpumask.h> 151da177e4SLinus Torvalds #include <linux/topology.h> 161da177e4SLinus Torvalds #include <linux/nodemask.h> 1776b67ed9SKAMEZAWA Hiroyuki #include <linux/cpu.h> 18bde631a5SLee Schermerhorn #include <linux/device.h> 19af936a16SLee Schermerhorn #include <linux/swap.h> 2018e5b539STejun Heo #include <linux/slab.h> 211da177e4SLinus Torvalds 2210fbcf4cSKay Sievers static struct bus_type node_subsys = { 23af5ca3f4SKay Sievers .name = "node", 2410fbcf4cSKay Sievers .dev_name = "node", 251da177e4SLinus Torvalds }; 261da177e4SLinus Torvalds 271da177e4SLinus Torvalds 285aaba363SSudeep Holla static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf) 291da177e4SLinus Torvalds { 301da177e4SLinus Torvalds struct node *node_dev = to_node(dev); 3110fbcf4cSKay Sievers const struct cpumask *mask = cpumask_of_node(node_dev->dev.id); 321da177e4SLinus Torvalds 3339106dcfSMike Travis /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */ 3439106dcfSMike Travis BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1)); 351da177e4SLinus Torvalds 365aaba363SSudeep Holla return cpumap_print_to_pagebuf(list, buf, mask); 371da177e4SLinus Torvalds } 381da177e4SLinus Torvalds 3910fbcf4cSKay Sievers static inline ssize_t node_read_cpumask(struct device *dev, 4010fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 4139106dcfSMike Travis { 425aaba363SSudeep Holla return node_read_cpumap(dev, false, buf); 4339106dcfSMike Travis } 4410fbcf4cSKay Sievers static inline ssize_t node_read_cpulist(struct device *dev, 4510fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 4639106dcfSMike Travis { 475aaba363SSudeep Holla return node_read_cpumap(dev, true, buf); 4839106dcfSMike Travis } 4939106dcfSMike Travis 5010fbcf4cSKay Sievers static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); 5110fbcf4cSKay Sievers static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL); 521da177e4SLinus Torvalds 531da177e4SLinus Torvalds #define K(x) ((x) << (PAGE_SHIFT - 10)) 5410fbcf4cSKay Sievers static ssize_t node_read_meminfo(struct device *dev, 5510fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 561da177e4SLinus Torvalds { 571da177e4SLinus Torvalds int n; 581da177e4SLinus Torvalds int nid = dev->id; 59599d0c95SMel Gorman struct pglist_data *pgdat = NODE_DATA(nid); 601da177e4SLinus Torvalds struct sysinfo i; 611da177e4SLinus Torvalds 621da177e4SLinus Torvalds si_meminfo_node(&i, nid); 637ee92255SKOSAKI Motohiro n = sprintf(buf, 641da177e4SLinus Torvalds "Node %d MemTotal: %8lu kB\n" 651da177e4SLinus Torvalds "Node %d MemFree: %8lu kB\n" 661da177e4SLinus Torvalds "Node %d MemUsed: %8lu kB\n" 671da177e4SLinus Torvalds "Node %d Active: %8lu kB\n" 681da177e4SLinus Torvalds "Node %d Inactive: %8lu kB\n" 694f98a2feSRik van Riel "Node %d Active(anon): %8lu kB\n" 704f98a2feSRik van Riel "Node %d Inactive(anon): %8lu kB\n" 714f98a2feSRik van Riel "Node %d Active(file): %8lu kB\n" 724f98a2feSRik van Riel "Node %d Inactive(file): %8lu kB\n" 735344b7e6SNick Piggin "Node %d Unevictable: %8lu kB\n" 747ee92255SKOSAKI Motohiro "Node %d Mlocked: %8lu kB\n", 757ee92255SKOSAKI Motohiro nid, K(i.totalram), 767ee92255SKOSAKI Motohiro nid, K(i.freeram), 777ee92255SKOSAKI Motohiro nid, K(i.totalram - i.freeram), 78599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) + 79599d0c95SMel Gorman node_page_state(pgdat, NR_ACTIVE_FILE)), 80599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) + 81599d0c95SMel Gorman node_page_state(pgdat, NR_INACTIVE_FILE)), 82599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)), 83599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)), 84599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)), 85599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)), 86599d0c95SMel Gorman nid, K(node_page_state(pgdat, NR_UNEVICTABLE)), 8775ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_MLOCK))); 887ee92255SKOSAKI Motohiro 89182e8e23SChristoph Lameter #ifdef CONFIG_HIGHMEM 907ee92255SKOSAKI Motohiro n += sprintf(buf + n, 911da177e4SLinus Torvalds "Node %d HighTotal: %8lu kB\n" 921da177e4SLinus Torvalds "Node %d HighFree: %8lu kB\n" 931da177e4SLinus Torvalds "Node %d LowTotal: %8lu kB\n" 947ee92255SKOSAKI Motohiro "Node %d LowFree: %8lu kB\n", 957ee92255SKOSAKI Motohiro nid, K(i.totalhigh), 967ee92255SKOSAKI Motohiro nid, K(i.freehigh), 977ee92255SKOSAKI Motohiro nid, K(i.totalram - i.totalhigh), 987ee92255SKOSAKI Motohiro nid, K(i.freeram - i.freehigh)); 99182e8e23SChristoph Lameter #endif 1007ee92255SKOSAKI Motohiro n += sprintf(buf + n, 101c07e02dbSMartin Hicks "Node %d Dirty: %8lu kB\n" 102c07e02dbSMartin Hicks "Node %d Writeback: %8lu kB\n" 103347ce434SChristoph Lameter "Node %d FilePages: %8lu kB\n" 104c07e02dbSMartin Hicks "Node %d Mapped: %8lu kB\n" 105f3dbd344SChristoph Lameter "Node %d AnonPages: %8lu kB\n" 1064b02108aSKOSAKI Motohiro "Node %d Shmem: %8lu kB\n" 107c6a7f572SKOSAKI Motohiro "Node %d KernelStack: %8lu kB\n" 108df849a15SChristoph Lameter "Node %d PageTables: %8lu kB\n" 109f5ef68daSAndrew Morton "Node %d NFS_Unstable: %8lu kB\n" 110d2c5e30cSChristoph Lameter "Node %d Bounce: %8lu kB\n" 111fc3ba692SMiklos Szeredi "Node %d WritebackTmp: %8lu kB\n" 112972d1a7bSChristoph Lameter "Node %d Slab: %8lu kB\n" 113972d1a7bSChristoph Lameter "Node %d SReclaimable: %8lu kB\n" 11405b258e9SDavid Rientjes "Node %d SUnreclaim: %8lu kB\n" 11505b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE 11605b258e9SDavid Rientjes "Node %d AnonHugePages: %8lu kB\n" 11765c45377SKirill A. Shutemov "Node %d ShmemHugePages: %8lu kB\n" 11865c45377SKirill A. Shutemov "Node %d ShmemPmdMapped: %8lu kB\n" 11905b258e9SDavid Rientjes #endif 12005b258e9SDavid Rientjes , 12111fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_FILE_DIRTY)), 12211fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_WRITEBACK)), 12311fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_FILE_PAGES)), 12450658e2eSMel Gorman nid, K(node_page_state(pgdat, NR_FILE_MAPPED)), 1254b9d0fabSMel Gorman nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), 126cc7452b6SRafael Aquini nid, K(i.sharedram), 12775ef7184SMel Gorman nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK) * 128c6a7f572SKOSAKI Motohiro THREAD_SIZE / 1024, 12975ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), 13011fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), 13175ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), 13211fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), 13375ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE) + 13475ef7184SMel Gorman sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), 13575ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE)), 13605b258e9SDavid Rientjes #ifdef CONFIG_TRANSPARENT_HUGEPAGE 13775ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), 13811fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_ANON_THPS) * 13965c45377SKirill A. Shutemov HPAGE_PMD_NR), 14011fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_SHMEM_THPS) * 14165c45377SKirill A. Shutemov HPAGE_PMD_NR), 14211fb9989SMel Gorman nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * 14391a13c28SClaudio Scordino HPAGE_PMD_NR)); 14491a13c28SClaudio Scordino #else 14575ef7184SMel Gorman nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE))); 14605b258e9SDavid Rientjes #endif 1471da177e4SLinus Torvalds n += hugetlb_report_node_meminfo(nid, buf + n); 1481da177e4SLinus Torvalds return n; 1491da177e4SLinus Torvalds } 1501da177e4SLinus Torvalds 1511da177e4SLinus Torvalds #undef K 15210fbcf4cSKay Sievers static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL); 1531da177e4SLinus Torvalds 15410fbcf4cSKay Sievers static ssize_t node_read_numastat(struct device *dev, 15510fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 1561da177e4SLinus Torvalds { 1571da177e4SLinus Torvalds return sprintf(buf, 1581da177e4SLinus Torvalds "numa_hit %lu\n" 1591da177e4SLinus Torvalds "numa_miss %lu\n" 1601da177e4SLinus Torvalds "numa_foreign %lu\n" 1611da177e4SLinus Torvalds "interleave_hit %lu\n" 1621da177e4SLinus Torvalds "local_node %lu\n" 1631da177e4SLinus Torvalds "other_node %lu\n", 16475ef7184SMel Gorman sum_zone_node_page_state(dev->id, NUMA_HIT), 16575ef7184SMel Gorman sum_zone_node_page_state(dev->id, NUMA_MISS), 16675ef7184SMel Gorman sum_zone_node_page_state(dev->id, NUMA_FOREIGN), 16775ef7184SMel Gorman sum_zone_node_page_state(dev->id, NUMA_INTERLEAVE_HIT), 16875ef7184SMel Gorman sum_zone_node_page_state(dev->id, NUMA_LOCAL), 16975ef7184SMel Gorman sum_zone_node_page_state(dev->id, NUMA_OTHER)); 1701da177e4SLinus Torvalds } 17110fbcf4cSKay Sievers static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); 1721da177e4SLinus Torvalds 17310fbcf4cSKay Sievers static ssize_t node_read_vmstat(struct device *dev, 17410fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 1752ac39037SMichael Rubin { 1762ac39037SMichael Rubin int nid = dev->id; 17775ef7184SMel Gorman struct pglist_data *pgdat = NODE_DATA(nid); 178fa25c503SKOSAKI Motohiro int i; 179fa25c503SKOSAKI Motohiro int n = 0; 180fa25c503SKOSAKI Motohiro 181fa25c503SKOSAKI Motohiro for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 182fa25c503SKOSAKI Motohiro n += sprintf(buf+n, "%s %lu\n", vmstat_text[i], 18375ef7184SMel Gorman sum_zone_node_page_state(nid, i)); 18475ef7184SMel Gorman 18575ef7184SMel Gorman for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) 18675ef7184SMel Gorman n += sprintf(buf+n, "%s %lu\n", 18775ef7184SMel Gorman vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], 18875ef7184SMel Gorman node_page_state(pgdat, i)); 189fa25c503SKOSAKI Motohiro 190fa25c503SKOSAKI Motohiro return n; 1912ac39037SMichael Rubin } 19210fbcf4cSKay Sievers static DEVICE_ATTR(vmstat, S_IRUGO, node_read_vmstat, NULL); 1932ac39037SMichael Rubin 19410fbcf4cSKay Sievers static ssize_t node_read_distance(struct device *dev, 19510fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 1961da177e4SLinus Torvalds { 1971da177e4SLinus Torvalds int nid = dev->id; 1981da177e4SLinus Torvalds int len = 0; 1991da177e4SLinus Torvalds int i; 2001da177e4SLinus Torvalds 20112ee3c0aSDavid Rientjes /* 20212ee3c0aSDavid Rientjes * buf is currently PAGE_SIZE in length and each node needs 4 chars 20312ee3c0aSDavid Rientjes * at the most (distance + space or newline). 20412ee3c0aSDavid Rientjes */ 20512ee3c0aSDavid Rientjes BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE); 2061da177e4SLinus Torvalds 2071da177e4SLinus Torvalds for_each_online_node(i) 2081da177e4SLinus Torvalds len += sprintf(buf + len, "%s%d", i ? " " : "", node_distance(nid, i)); 2091da177e4SLinus Torvalds 2101da177e4SLinus Torvalds len += sprintf(buf + len, "\n"); 2111da177e4SLinus Torvalds return len; 2121da177e4SLinus Torvalds } 21310fbcf4cSKay Sievers static DEVICE_ATTR(distance, S_IRUGO, node_read_distance, NULL); 2141da177e4SLinus Torvalds 2153c9b8aafSTakashi Iwai static struct attribute *node_dev_attrs[] = { 2163c9b8aafSTakashi Iwai &dev_attr_cpumap.attr, 2173c9b8aafSTakashi Iwai &dev_attr_cpulist.attr, 2183c9b8aafSTakashi Iwai &dev_attr_meminfo.attr, 2193c9b8aafSTakashi Iwai &dev_attr_numastat.attr, 2203c9b8aafSTakashi Iwai &dev_attr_distance.attr, 2213c9b8aafSTakashi Iwai &dev_attr_vmstat.attr, 2223c9b8aafSTakashi Iwai NULL 2233c9b8aafSTakashi Iwai }; 2247ca7ec40SGreg Kroah-Hartman ATTRIBUTE_GROUPS(node_dev); 2253c9b8aafSTakashi Iwai 2269a305230SLee Schermerhorn #ifdef CONFIG_HUGETLBFS 2279a305230SLee Schermerhorn /* 2289a305230SLee Schermerhorn * hugetlbfs per node attributes registration interface: 2299a305230SLee Schermerhorn * When/if hugetlb[fs] subsystem initializes [sometime after this module], 2304faf8d95SLee Schermerhorn * it will register its per node attributes for all online nodes with 2314faf8d95SLee Schermerhorn * memory. It will also call register_hugetlbfs_with_node(), below, to 2329a305230SLee Schermerhorn * register its attribute registration functions with this node driver. 2339a305230SLee Schermerhorn * Once these hooks have been initialized, the node driver will call into 2349a305230SLee Schermerhorn * the hugetlb module to [un]register attributes for hot-plugged nodes. 2359a305230SLee Schermerhorn */ 2369a305230SLee Schermerhorn static node_registration_func_t __hugetlb_register_node; 2379a305230SLee Schermerhorn static node_registration_func_t __hugetlb_unregister_node; 2389a305230SLee Schermerhorn 23939da08cbSLee Schermerhorn static inline bool hugetlb_register_node(struct node *node) 2409a305230SLee Schermerhorn { 2414faf8d95SLee Schermerhorn if (__hugetlb_register_node && 2428cebfcd0SLai Jiangshan node_state(node->dev.id, N_MEMORY)) { 2439a305230SLee Schermerhorn __hugetlb_register_node(node); 24439da08cbSLee Schermerhorn return true; 24539da08cbSLee Schermerhorn } 24639da08cbSLee Schermerhorn return false; 2479a305230SLee Schermerhorn } 2489a305230SLee Schermerhorn 2499a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node) 2509a305230SLee Schermerhorn { 2519a305230SLee Schermerhorn if (__hugetlb_unregister_node) 2529a305230SLee Schermerhorn __hugetlb_unregister_node(node); 2539a305230SLee Schermerhorn } 2549a305230SLee Schermerhorn 2559a305230SLee Schermerhorn void register_hugetlbfs_with_node(node_registration_func_t doregister, 2569a305230SLee Schermerhorn node_registration_func_t unregister) 2579a305230SLee Schermerhorn { 2589a305230SLee Schermerhorn __hugetlb_register_node = doregister; 2599a305230SLee Schermerhorn __hugetlb_unregister_node = unregister; 2609a305230SLee Schermerhorn } 2619a305230SLee Schermerhorn #else 2629a305230SLee Schermerhorn static inline void hugetlb_register_node(struct node *node) {} 2639a305230SLee Schermerhorn 2649a305230SLee Schermerhorn static inline void hugetlb_unregister_node(struct node *node) {} 2659a305230SLee Schermerhorn #endif 2669a305230SLee Schermerhorn 2678c7b5b4eSYasuaki Ishimatsu static void node_device_release(struct device *dev) 2688c7b5b4eSYasuaki Ishimatsu { 2698c7b5b4eSYasuaki Ishimatsu struct node *node = to_node(dev); 2708c7b5b4eSYasuaki Ishimatsu 2718c7b5b4eSYasuaki Ishimatsu #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) 2728c7b5b4eSYasuaki Ishimatsu /* 2738c7b5b4eSYasuaki Ishimatsu * We schedule the work only when a memory section is 2748c7b5b4eSYasuaki Ishimatsu * onlined/offlined on this node. When we come here, 2758c7b5b4eSYasuaki Ishimatsu * all the memory on this node has been offlined, 2768c7b5b4eSYasuaki Ishimatsu * so we won't enqueue new work to this work. 2778c7b5b4eSYasuaki Ishimatsu * 2788c7b5b4eSYasuaki Ishimatsu * The work is using node->node_work, so we should 2798c7b5b4eSYasuaki Ishimatsu * flush work before freeing the memory. 2808c7b5b4eSYasuaki Ishimatsu */ 2818c7b5b4eSYasuaki Ishimatsu flush_work(&node->node_work); 2828c7b5b4eSYasuaki Ishimatsu #endif 2838c7b5b4eSYasuaki Ishimatsu kfree(node); 2848c7b5b4eSYasuaki Ishimatsu } 2851da177e4SLinus Torvalds 2861da177e4SLinus Torvalds /* 287405ae7d3SRobert P. J. Day * register_node - Setup a sysfs device for a node. 2881da177e4SLinus Torvalds * @num - Node number to use when creating the device. 2891da177e4SLinus Torvalds * 2901da177e4SLinus Torvalds * Initialize and register the node device. 2911da177e4SLinus Torvalds */ 292fa264375SYasuaki Ishimatsu static int register_node(struct node *node, int num, struct node *parent) 2931da177e4SLinus Torvalds { 2941da177e4SLinus Torvalds int error; 2951da177e4SLinus Torvalds 29610fbcf4cSKay Sievers node->dev.id = num; 29710fbcf4cSKay Sievers node->dev.bus = &node_subsys; 2988c7b5b4eSYasuaki Ishimatsu node->dev.release = node_device_release; 2997ca7ec40SGreg Kroah-Hartman node->dev.groups = node_dev_groups; 30010fbcf4cSKay Sievers error = device_register(&node->dev); 3011da177e4SLinus Torvalds 3021da177e4SLinus Torvalds if (!error){ 3039a305230SLee Schermerhorn hugetlb_register_node(node); 304ed4a6d7fSMel Gorman 305ed4a6d7fSMel Gorman compaction_register_node(node); 3061da177e4SLinus Torvalds } 3071da177e4SLinus Torvalds return error; 3081da177e4SLinus Torvalds } 3091da177e4SLinus Torvalds 3104b45099bSKeiichiro Tokunaga /** 3114b45099bSKeiichiro Tokunaga * unregister_node - unregister a node device 3124b45099bSKeiichiro Tokunaga * @node: node going away 3134b45099bSKeiichiro Tokunaga * 3144b45099bSKeiichiro Tokunaga * Unregisters a node device @node. All the devices on the node must be 3154b45099bSKeiichiro Tokunaga * unregistered before calling this function. 3164b45099bSKeiichiro Tokunaga */ 3174b45099bSKeiichiro Tokunaga void unregister_node(struct node *node) 3184b45099bSKeiichiro Tokunaga { 3194faf8d95SLee Schermerhorn hugetlb_unregister_node(node); /* no-op, if memoryless node */ 320af936a16SLee Schermerhorn 32110fbcf4cSKay Sievers device_unregister(&node->dev); 3224b45099bSKeiichiro Tokunaga } 3234b45099bSKeiichiro Tokunaga 3248732794bSWen Congyang struct node *node_devices[MAX_NUMNODES]; 3250fc44159SYasunori Goto 32676b67ed9SKAMEZAWA Hiroyuki /* 32776b67ed9SKAMEZAWA Hiroyuki * register cpu under node 32876b67ed9SKAMEZAWA Hiroyuki */ 32976b67ed9SKAMEZAWA Hiroyuki int register_cpu_under_node(unsigned int cpu, unsigned int nid) 33076b67ed9SKAMEZAWA Hiroyuki { 3311830794aSAlex Chiang int ret; 3328a25a2fdSKay Sievers struct device *obj; 333f8246f31SAlex Chiang 334f8246f31SAlex Chiang if (!node_online(nid)) 335f8246f31SAlex Chiang return 0; 336f8246f31SAlex Chiang 3378a25a2fdSKay Sievers obj = get_cpu_device(cpu); 33876b67ed9SKAMEZAWA Hiroyuki if (!obj) 33976b67ed9SKAMEZAWA Hiroyuki return 0; 340f8246f31SAlex Chiang 3418732794bSWen Congyang ret = sysfs_create_link(&node_devices[nid]->dev.kobj, 34276b67ed9SKAMEZAWA Hiroyuki &obj->kobj, 34376b67ed9SKAMEZAWA Hiroyuki kobject_name(&obj->kobj)); 3441830794aSAlex Chiang if (ret) 3451830794aSAlex Chiang return ret; 3461830794aSAlex Chiang 3471830794aSAlex Chiang return sysfs_create_link(&obj->kobj, 3488732794bSWen Congyang &node_devices[nid]->dev.kobj, 3498732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 35076b67ed9SKAMEZAWA Hiroyuki } 35176b67ed9SKAMEZAWA Hiroyuki 35276b67ed9SKAMEZAWA Hiroyuki int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) 35376b67ed9SKAMEZAWA Hiroyuki { 3548a25a2fdSKay Sievers struct device *obj; 355b9d52dadSAlex Chiang 356b9d52dadSAlex Chiang if (!node_online(nid)) 357b9d52dadSAlex Chiang return 0; 358b9d52dadSAlex Chiang 3598a25a2fdSKay Sievers obj = get_cpu_device(cpu); 360b9d52dadSAlex Chiang if (!obj) 361b9d52dadSAlex Chiang return 0; 362b9d52dadSAlex Chiang 3638732794bSWen Congyang sysfs_remove_link(&node_devices[nid]->dev.kobj, 36476b67ed9SKAMEZAWA Hiroyuki kobject_name(&obj->kobj)); 3651830794aSAlex Chiang sysfs_remove_link(&obj->kobj, 3668732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 367b9d52dadSAlex Chiang 36876b67ed9SKAMEZAWA Hiroyuki return 0; 36976b67ed9SKAMEZAWA Hiroyuki } 37076b67ed9SKAMEZAWA Hiroyuki 371c04fc586SGary Hade #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 372c04fc586SGary Hade #define page_initialized(page) (page->lru.next) 373c04fc586SGary Hade 3743a80a7faSMel Gorman static int __init_refok get_nid_for_pfn(unsigned long pfn) 375c04fc586SGary Hade { 376c04fc586SGary Hade struct page *page; 377c04fc586SGary Hade 378c04fc586SGary Hade if (!pfn_valid_within(pfn)) 379c04fc586SGary Hade return -1; 3803a80a7faSMel Gorman #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 3813a80a7faSMel Gorman if (system_state == SYSTEM_BOOTING) 3823a80a7faSMel Gorman return early_pfn_to_nid(pfn); 3833a80a7faSMel Gorman #endif 384c04fc586SGary Hade page = pfn_to_page(pfn); 385c04fc586SGary Hade if (!page_initialized(page)) 386c04fc586SGary Hade return -1; 387c04fc586SGary Hade return pfn_to_nid(pfn); 388c04fc586SGary Hade } 389c04fc586SGary Hade 390c04fc586SGary Hade /* register memory section under specified node if it spans that node */ 391c04fc586SGary Hade int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) 392c04fc586SGary Hade { 393dee5d0d5SAlex Chiang int ret; 394c04fc586SGary Hade unsigned long pfn, sect_start_pfn, sect_end_pfn; 395c04fc586SGary Hade 396c04fc586SGary Hade if (!mem_blk) 397c04fc586SGary Hade return -EFAULT; 398c04fc586SGary Hade if (!node_online(nid)) 399c04fc586SGary Hade return 0; 400d3360164SNathan Fontenot 401d3360164SNathan Fontenot sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); 402d3360164SNathan Fontenot sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); 403d3360164SNathan Fontenot sect_end_pfn += PAGES_PER_SECTION - 1; 404c04fc586SGary Hade for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { 405c04fc586SGary Hade int page_nid; 406c04fc586SGary Hade 40704697858SYinghai Lu /* 40804697858SYinghai Lu * memory block could have several absent sections from start. 40904697858SYinghai Lu * skip pfn range from absent section 41004697858SYinghai Lu */ 41104697858SYinghai Lu if (!pfn_present(pfn)) { 41204697858SYinghai Lu pfn = round_down(pfn + PAGES_PER_SECTION, 41304697858SYinghai Lu PAGES_PER_SECTION) - 1; 41404697858SYinghai Lu continue; 41504697858SYinghai Lu } 41604697858SYinghai Lu 417c04fc586SGary Hade page_nid = get_nid_for_pfn(pfn); 418c04fc586SGary Hade if (page_nid < 0) 419c04fc586SGary Hade continue; 420c04fc586SGary Hade if (page_nid != nid) 421c04fc586SGary Hade continue; 4228732794bSWen Congyang ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, 42310fbcf4cSKay Sievers &mem_blk->dev.kobj, 42410fbcf4cSKay Sievers kobject_name(&mem_blk->dev.kobj)); 425dee5d0d5SAlex Chiang if (ret) 426dee5d0d5SAlex Chiang return ret; 427dee5d0d5SAlex Chiang 42810fbcf4cSKay Sievers return sysfs_create_link_nowarn(&mem_blk->dev.kobj, 4298732794bSWen Congyang &node_devices[nid]->dev.kobj, 4308732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 431c04fc586SGary Hade } 432c04fc586SGary Hade /* mem section does not span the specified node */ 433c04fc586SGary Hade return 0; 434c04fc586SGary Hade } 435c04fc586SGary Hade 436c04fc586SGary Hade /* unregister memory section under all nodes that it spans */ 437d3360164SNathan Fontenot int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, 438d3360164SNathan Fontenot unsigned long phys_index) 439c04fc586SGary Hade { 4409ae49fabSDavid Rientjes NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); 441c04fc586SGary Hade unsigned long pfn, sect_start_pfn, sect_end_pfn; 442c04fc586SGary Hade 4439ae49fabSDavid Rientjes if (!mem_blk) { 4449ae49fabSDavid Rientjes NODEMASK_FREE(unlinked_nodes); 445c04fc586SGary Hade return -EFAULT; 4469ae49fabSDavid Rientjes } 4479ae49fabSDavid Rientjes if (!unlinked_nodes) 4489ae49fabSDavid Rientjes return -ENOMEM; 4499ae49fabSDavid Rientjes nodes_clear(*unlinked_nodes); 450d3360164SNathan Fontenot 451d3360164SNathan Fontenot sect_start_pfn = section_nr_to_pfn(phys_index); 452c04fc586SGary Hade sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; 453c04fc586SGary Hade for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { 45447504980SRoel Kluin int nid; 455c04fc586SGary Hade 456c04fc586SGary Hade nid = get_nid_for_pfn(pfn); 457c04fc586SGary Hade if (nid < 0) 458c04fc586SGary Hade continue; 459c04fc586SGary Hade if (!node_online(nid)) 460c04fc586SGary Hade continue; 4619ae49fabSDavid Rientjes if (node_test_and_set(nid, *unlinked_nodes)) 462c04fc586SGary Hade continue; 4638732794bSWen Congyang sysfs_remove_link(&node_devices[nid]->dev.kobj, 46410fbcf4cSKay Sievers kobject_name(&mem_blk->dev.kobj)); 46510fbcf4cSKay Sievers sysfs_remove_link(&mem_blk->dev.kobj, 4668732794bSWen Congyang kobject_name(&node_devices[nid]->dev.kobj)); 467c04fc586SGary Hade } 4689ae49fabSDavid Rientjes NODEMASK_FREE(unlinked_nodes); 469c04fc586SGary Hade return 0; 470c04fc586SGary Hade } 471c04fc586SGary Hade 472c04fc586SGary Hade static int link_mem_sections(int nid) 473c04fc586SGary Hade { 474c04fc586SGary Hade unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn; 475c04fc586SGary Hade unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages; 476c04fc586SGary Hade unsigned long pfn; 47763d027a6SRobin Holt struct memory_block *mem_blk = NULL; 478c04fc586SGary Hade int err = 0; 479c04fc586SGary Hade 480c04fc586SGary Hade for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 481c04fc586SGary Hade unsigned long section_nr = pfn_to_section_nr(pfn); 482c04fc586SGary Hade struct mem_section *mem_sect; 483c04fc586SGary Hade int ret; 484c04fc586SGary Hade 485c04fc586SGary Hade if (!present_section_nr(section_nr)) 486c04fc586SGary Hade continue; 487c04fc586SGary Hade mem_sect = __nr_to_section(section_nr); 488321bf4edSYinghai Lu 489321bf4edSYinghai Lu /* same memblock ? */ 490321bf4edSYinghai Lu if (mem_blk) 491321bf4edSYinghai Lu if ((section_nr >= mem_blk->start_section_nr) && 492321bf4edSYinghai Lu (section_nr <= mem_blk->end_section_nr)) 493321bf4edSYinghai Lu continue; 494321bf4edSYinghai Lu 49563d027a6SRobin Holt mem_blk = find_memory_block_hinted(mem_sect, mem_blk); 496321bf4edSYinghai Lu 497c04fc586SGary Hade ret = register_mem_sect_under_node(mem_blk, nid); 498c04fc586SGary Hade if (!err) 499c04fc586SGary Hade err = ret; 500c04fc586SGary Hade 501c04fc586SGary Hade /* discard ref obtained in find_memory_block() */ 502c04fc586SGary Hade } 50363d027a6SRobin Holt 50463d027a6SRobin Holt if (mem_blk) 50510fbcf4cSKay Sievers kobject_put(&mem_blk->dev.kobj); 506c04fc586SGary Hade return err; 507c04fc586SGary Hade } 5084faf8d95SLee Schermerhorn 50939da08cbSLee Schermerhorn #ifdef CONFIG_HUGETLBFS 5104faf8d95SLee Schermerhorn /* 5114faf8d95SLee Schermerhorn * Handle per node hstate attribute [un]registration on transistions 5124faf8d95SLee Schermerhorn * to/from memoryless state. 5134faf8d95SLee Schermerhorn */ 51439da08cbSLee Schermerhorn static void node_hugetlb_work(struct work_struct *work) 51539da08cbSLee Schermerhorn { 51639da08cbSLee Schermerhorn struct node *node = container_of(work, struct node, node_work); 51739da08cbSLee Schermerhorn 51839da08cbSLee Schermerhorn /* 51939da08cbSLee Schermerhorn * We only get here when a node transitions to/from memoryless state. 52039da08cbSLee Schermerhorn * We can detect which transition occurred by examining whether the 52139da08cbSLee Schermerhorn * node has memory now. hugetlb_register_node() already check this 52239da08cbSLee Schermerhorn * so we try to register the attributes. If that fails, then the 52339da08cbSLee Schermerhorn * node has transitioned to memoryless, try to unregister the 52439da08cbSLee Schermerhorn * attributes. 52539da08cbSLee Schermerhorn */ 52639da08cbSLee Schermerhorn if (!hugetlb_register_node(node)) 52739da08cbSLee Schermerhorn hugetlb_unregister_node(node); 52839da08cbSLee Schermerhorn } 52939da08cbSLee Schermerhorn 53039da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid) 53139da08cbSLee Schermerhorn { 5328732794bSWen Congyang INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work); 53339da08cbSLee Schermerhorn } 5344faf8d95SLee Schermerhorn 5354faf8d95SLee Schermerhorn static int node_memory_callback(struct notifier_block *self, 5364faf8d95SLee Schermerhorn unsigned long action, void *arg) 5374faf8d95SLee Schermerhorn { 5384faf8d95SLee Schermerhorn struct memory_notify *mnb = arg; 5394faf8d95SLee Schermerhorn int nid = mnb->status_change_nid; 5404faf8d95SLee Schermerhorn 5414faf8d95SLee Schermerhorn switch (action) { 54239da08cbSLee Schermerhorn case MEM_ONLINE: 54339da08cbSLee Schermerhorn case MEM_OFFLINE: 54439da08cbSLee Schermerhorn /* 54539da08cbSLee Schermerhorn * offload per node hstate [un]registration to a work thread 54639da08cbSLee Schermerhorn * when transitioning to/from memoryless state. 54739da08cbSLee Schermerhorn */ 5484faf8d95SLee Schermerhorn if (nid != NUMA_NO_NODE) 5498732794bSWen Congyang schedule_work(&node_devices[nid]->node_work); 5504faf8d95SLee Schermerhorn break; 55139da08cbSLee Schermerhorn 5524faf8d95SLee Schermerhorn case MEM_GOING_ONLINE: 5534faf8d95SLee Schermerhorn case MEM_GOING_OFFLINE: 5544faf8d95SLee Schermerhorn case MEM_CANCEL_ONLINE: 5554faf8d95SLee Schermerhorn case MEM_CANCEL_OFFLINE: 5564faf8d95SLee Schermerhorn default: 5574faf8d95SLee Schermerhorn break; 5584faf8d95SLee Schermerhorn } 5594faf8d95SLee Schermerhorn 5604faf8d95SLee Schermerhorn return NOTIFY_OK; 5614faf8d95SLee Schermerhorn } 56239da08cbSLee Schermerhorn #endif /* CONFIG_HUGETLBFS */ 56339da08cbSLee Schermerhorn #else /* !CONFIG_MEMORY_HOTPLUG_SPARSE */ 5644faf8d95SLee Schermerhorn 56539da08cbSLee Schermerhorn static int link_mem_sections(int nid) { return 0; } 56639da08cbSLee Schermerhorn #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 56739da08cbSLee Schermerhorn 56839da08cbSLee Schermerhorn #if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \ 56939da08cbSLee Schermerhorn !defined(CONFIG_HUGETLBFS) 5704faf8d95SLee Schermerhorn static inline int node_memory_callback(struct notifier_block *self, 5714faf8d95SLee Schermerhorn unsigned long action, void *arg) 5724faf8d95SLee Schermerhorn { 5734faf8d95SLee Schermerhorn return NOTIFY_OK; 5744faf8d95SLee Schermerhorn } 57539da08cbSLee Schermerhorn 57639da08cbSLee Schermerhorn static void init_node_hugetlb_work(int nid) { } 57739da08cbSLee Schermerhorn 57839da08cbSLee Schermerhorn #endif 579c04fc586SGary Hade 5800fc44159SYasunori Goto int register_one_node(int nid) 5810fc44159SYasunori Goto { 5820fc44159SYasunori Goto int error = 0; 58376b67ed9SKAMEZAWA Hiroyuki int cpu; 5840fc44159SYasunori Goto 5850fc44159SYasunori Goto if (node_online(nid)) { 5860fc44159SYasunori Goto int p_node = parent_node(nid); 5870fc44159SYasunori Goto struct node *parent = NULL; 5880fc44159SYasunori Goto 5890fc44159SYasunori Goto if (p_node != nid) 5908732794bSWen Congyang parent = node_devices[p_node]; 5910fc44159SYasunori Goto 5928732794bSWen Congyang node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL); 5938732794bSWen Congyang if (!node_devices[nid]) 5948732794bSWen Congyang return -ENOMEM; 5958732794bSWen Congyang 5968732794bSWen Congyang error = register_node(node_devices[nid], nid, parent); 59776b67ed9SKAMEZAWA Hiroyuki 59876b67ed9SKAMEZAWA Hiroyuki /* link cpu under this node */ 59976b67ed9SKAMEZAWA Hiroyuki for_each_present_cpu(cpu) { 60076b67ed9SKAMEZAWA Hiroyuki if (cpu_to_node(cpu) == nid) 60176b67ed9SKAMEZAWA Hiroyuki register_cpu_under_node(cpu, nid); 60276b67ed9SKAMEZAWA Hiroyuki } 603c04fc586SGary Hade 604c04fc586SGary Hade /* link memory sections under this node */ 605c04fc586SGary Hade error = link_mem_sections(nid); 60639da08cbSLee Schermerhorn 60739da08cbSLee Schermerhorn /* initialize work queue for memory hot plug */ 60839da08cbSLee Schermerhorn init_node_hugetlb_work(nid); 6090fc44159SYasunori Goto } 6100fc44159SYasunori Goto 6110fc44159SYasunori Goto return error; 6120fc44159SYasunori Goto 6130fc44159SYasunori Goto } 6140fc44159SYasunori Goto 6150fc44159SYasunori Goto void unregister_one_node(int nid) 6160fc44159SYasunori Goto { 61792d585efSXishi Qiu if (!node_devices[nid]) 61892d585efSXishi Qiu return; 61992d585efSXishi Qiu 6208732794bSWen Congyang unregister_node(node_devices[nid]); 6218732794bSWen Congyang node_devices[nid] = NULL; 6220fc44159SYasunori Goto } 6230fc44159SYasunori Goto 624bde631a5SLee Schermerhorn /* 625bde631a5SLee Schermerhorn * node states attributes 626bde631a5SLee Schermerhorn */ 627bde631a5SLee Schermerhorn 628bde631a5SLee Schermerhorn static ssize_t print_nodes_state(enum node_states state, char *buf) 629bde631a5SLee Schermerhorn { 630bde631a5SLee Schermerhorn int n; 631bde631a5SLee Schermerhorn 632f799b1a7STejun Heo n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", 633f799b1a7STejun Heo nodemask_pr_args(&node_states[state])); 634f6238818SRyota Ozaki buf[n++] = '\n'; 635f6238818SRyota Ozaki buf[n] = '\0'; 636bde631a5SLee Schermerhorn return n; 637bde631a5SLee Schermerhorn } 638bde631a5SLee Schermerhorn 639b15f562fSAndi Kleen struct node_attr { 64010fbcf4cSKay Sievers struct device_attribute attr; 641b15f562fSAndi Kleen enum node_states state; 642b15f562fSAndi Kleen }; 643b15f562fSAndi Kleen 64410fbcf4cSKay Sievers static ssize_t show_node_state(struct device *dev, 64510fbcf4cSKay Sievers struct device_attribute *attr, char *buf) 646bde631a5SLee Schermerhorn { 647b15f562fSAndi Kleen struct node_attr *na = container_of(attr, struct node_attr, attr); 648b15f562fSAndi Kleen return print_nodes_state(na->state, buf); 649bde631a5SLee Schermerhorn } 650bde631a5SLee Schermerhorn 651b15f562fSAndi Kleen #define _NODE_ATTR(name, state) \ 65210fbcf4cSKay Sievers { __ATTR(name, 0444, show_node_state, NULL), state } 653bde631a5SLee Schermerhorn 654b15f562fSAndi Kleen static struct node_attr node_state_attr[] = { 655fcf07d22SLai Jiangshan [N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE), 656fcf07d22SLai Jiangshan [N_ONLINE] = _NODE_ATTR(online, N_ONLINE), 657fcf07d22SLai Jiangshan [N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY), 658bde631a5SLee Schermerhorn #ifdef CONFIG_HIGHMEM 659fcf07d22SLai Jiangshan [N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY), 660bde631a5SLee Schermerhorn #endif 66120b2f52bSLai Jiangshan #ifdef CONFIG_MOVABLE_NODE 66220b2f52bSLai Jiangshan [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY), 66320b2f52bSLai Jiangshan #endif 664fcf07d22SLai Jiangshan [N_CPU] = _NODE_ATTR(has_cpu, N_CPU), 665bde631a5SLee Schermerhorn }; 666bde631a5SLee Schermerhorn 66710fbcf4cSKay Sievers static struct attribute *node_state_attrs[] = { 668fcf07d22SLai Jiangshan &node_state_attr[N_POSSIBLE].attr.attr, 669fcf07d22SLai Jiangshan &node_state_attr[N_ONLINE].attr.attr, 670fcf07d22SLai Jiangshan &node_state_attr[N_NORMAL_MEMORY].attr.attr, 6713701cde6SAndi Kleen #ifdef CONFIG_HIGHMEM 672fcf07d22SLai Jiangshan &node_state_attr[N_HIGH_MEMORY].attr.attr, 6733701cde6SAndi Kleen #endif 67420b2f52bSLai Jiangshan #ifdef CONFIG_MOVABLE_NODE 67520b2f52bSLai Jiangshan &node_state_attr[N_MEMORY].attr.attr, 67620b2f52bSLai Jiangshan #endif 677fcf07d22SLai Jiangshan &node_state_attr[N_CPU].attr.attr, 6783701cde6SAndi Kleen NULL 6793701cde6SAndi Kleen }; 680bde631a5SLee Schermerhorn 68110fbcf4cSKay Sievers static struct attribute_group memory_root_attr_group = { 68210fbcf4cSKay Sievers .attrs = node_state_attrs, 68310fbcf4cSKay Sievers }; 68410fbcf4cSKay Sievers 68510fbcf4cSKay Sievers static const struct attribute_group *cpu_root_attr_groups[] = { 68610fbcf4cSKay Sievers &memory_root_attr_group, 68710fbcf4cSKay Sievers NULL, 68810fbcf4cSKay Sievers }; 68910fbcf4cSKay Sievers 6904faf8d95SLee Schermerhorn #define NODE_CALLBACK_PRI 2 /* lower than SLAB */ 6914b45099bSKeiichiro Tokunaga static int __init register_node_type(void) 6921da177e4SLinus Torvalds { 693bde631a5SLee Schermerhorn int ret; 694bde631a5SLee Schermerhorn 6953701cde6SAndi Kleen BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES); 6963701cde6SAndi Kleen BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES); 6973701cde6SAndi Kleen 69810fbcf4cSKay Sievers ret = subsys_system_register(&node_subsys, cpu_root_attr_groups); 6994faf8d95SLee Schermerhorn if (!ret) { 7006e259e7dSAndrew Morton static struct notifier_block node_memory_callback_nb = { 7016e259e7dSAndrew Morton .notifier_call = node_memory_callback, 7026e259e7dSAndrew Morton .priority = NODE_CALLBACK_PRI, 7036e259e7dSAndrew Morton }; 7046e259e7dSAndrew Morton register_hotmemory_notifier(&node_memory_callback_nb); 7054faf8d95SLee Schermerhorn } 706bde631a5SLee Schermerhorn 707bde631a5SLee Schermerhorn /* 708bde631a5SLee Schermerhorn * Note: we're not going to unregister the node class if we fail 709bde631a5SLee Schermerhorn * to register the node state class attribute files. 710bde631a5SLee Schermerhorn */ 711bde631a5SLee Schermerhorn return ret; 7121da177e4SLinus Torvalds } 7131da177e4SLinus Torvalds postcore_initcall(register_node_type); 714