12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later 29d5171a8SRashmica Gupta /* 39d5171a8SRashmica Gupta * Copyright (C) IBM Corporation, 2014, 2017 49d5171a8SRashmica Gupta * Anton Blanchard, Rashmica Gupta. 59d5171a8SRashmica Gupta */ 69d5171a8SRashmica Gupta 79d5171a8SRashmica Gupta #define pr_fmt(fmt) "memtrace: " fmt 89d5171a8SRashmica Gupta 99d5171a8SRashmica Gupta #include <linux/bitops.h> 109d5171a8SRashmica Gupta #include <linux/string.h> 119d5171a8SRashmica Gupta #include <linux/memblock.h> 129d5171a8SRashmica Gupta #include <linux/init.h> 139d5171a8SRashmica Gupta #include <linux/moduleparam.h> 149d5171a8SRashmica Gupta #include <linux/fs.h> 159d5171a8SRashmica Gupta #include <linux/debugfs.h> 169d5171a8SRashmica Gupta #include <linux/slab.h> 179d5171a8SRashmica Gupta #include <linux/memory.h> 189d5171a8SRashmica Gupta #include <linux/memory_hotplug.h> 1998fa15f3SAnshuman Khandual #include <linux/numa.h> 209d5171a8SRashmica Gupta #include <asm/machdep.h> 219d5171a8SRashmica Gupta #include <asm/debugfs.h> 222ac02e5eSAneesh Kumar K.V #include <asm/cacheflush.h> 239d5171a8SRashmica Gupta 249d5171a8SRashmica Gupta /* This enables us to keep track of the memory removed from each node. */ 259d5171a8SRashmica Gupta struct memtrace_entry { 269d5171a8SRashmica Gupta void *mem; 279d5171a8SRashmica Gupta u64 start; 289d5171a8SRashmica Gupta u64 size; 299d5171a8SRashmica Gupta u32 nid; 309d5171a8SRashmica Gupta struct dentry *dir; 319d5171a8SRashmica Gupta char name[16]; 329d5171a8SRashmica Gupta }; 339d5171a8SRashmica Gupta 34d6718941SDavid Hildenbrand static DEFINE_MUTEX(memtrace_mutex); 359d5171a8SRashmica Gupta static u64 memtrace_size; 369d5171a8SRashmica Gupta 379d5171a8SRashmica Gupta static struct memtrace_entry *memtrace_array; 389d5171a8SRashmica Gupta static unsigned int memtrace_array_nr; 399d5171a8SRashmica Gupta 409d5171a8SRashmica Gupta 419d5171a8SRashmica Gupta static ssize_t memtrace_read(struct file *filp, char __user *ubuf, 429d5171a8SRashmica Gupta size_t count, loff_t *ppos) 439d5171a8SRashmica Gupta { 449d5171a8SRashmica Gupta struct memtrace_entry *ent = filp->private_data; 459d5171a8SRashmica Gupta 469d5171a8SRashmica Gupta return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); 479d5171a8SRashmica Gupta } 489d5171a8SRashmica Gupta 4908a022adSJordan Niethe static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma) 5008a022adSJordan Niethe { 5108a022adSJordan Niethe struct memtrace_entry *ent = filp->private_data; 5208a022adSJordan Niethe 5308a022adSJordan Niethe if (ent->size < vma->vm_end - vma->vm_start) 5408a022adSJordan Niethe return -EINVAL; 5508a022adSJordan Niethe 5608a022adSJordan Niethe if (vma->vm_pgoff << PAGE_SHIFT >= ent->size) 5708a022adSJordan Niethe return -EINVAL; 5808a022adSJordan Niethe 5908a022adSJordan Niethe vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 6008a022adSJordan Niethe return remap_pfn_range(vma, vma->vm_start, PHYS_PFN(ent->start) + vma->vm_pgoff, 6108a022adSJordan Niethe vma->vm_end - vma->vm_start, vma->vm_page_prot); 6208a022adSJordan Niethe } 6308a022adSJordan Niethe 649d5171a8SRashmica Gupta static const struct file_operations memtrace_fops = { 659d5171a8SRashmica Gupta .llseek = default_llseek, 669d5171a8SRashmica Gupta .read = memtrace_read, 679d5171a8SRashmica Gupta .open = simple_open, 6808a022adSJordan Niethe .mmap = memtrace_mmap, 699d5171a8SRashmica Gupta }; 709d5171a8SRashmica Gupta 712ac02e5eSAneesh Kumar K.V #define FLUSH_CHUNK_SIZE SZ_1G 722ac02e5eSAneesh Kumar K.V /** 732ac02e5eSAneesh Kumar K.V * flush_dcache_range_chunked(): Write any modified data cache blocks out to 742ac02e5eSAneesh Kumar K.V * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE 752ac02e5eSAneesh Kumar K.V * Does not invalidate the corresponding instruction cache blocks. 762ac02e5eSAneesh Kumar K.V * 772ac02e5eSAneesh Kumar K.V * @start: the start address 782ac02e5eSAneesh Kumar K.V * @stop: the stop address (exclusive) 792ac02e5eSAneesh Kumar K.V * @chunk: the max size of the chunks 802ac02e5eSAneesh Kumar K.V */ 812ac02e5eSAneesh Kumar K.V static void flush_dcache_range_chunked(unsigned long start, unsigned long stop, 822ac02e5eSAneesh Kumar K.V unsigned long chunk) 832ac02e5eSAneesh Kumar K.V { 842ac02e5eSAneesh Kumar K.V unsigned long i; 852ac02e5eSAneesh Kumar K.V 862ac02e5eSAneesh Kumar K.V for (i = start; i < stop; i += chunk) { 872ac02e5eSAneesh Kumar K.V flush_dcache_range(i, min(stop, i + chunk)); 882ac02e5eSAneesh Kumar K.V cond_resched(); 892ac02e5eSAneesh Kumar K.V } 902ac02e5eSAneesh Kumar K.V } 912ac02e5eSAneesh Kumar K.V 92c74cf7a3SDavid Hildenbrand static void memtrace_clear_range(unsigned long start_pfn, 93c74cf7a3SDavid Hildenbrand unsigned long nr_pages) 94c74cf7a3SDavid Hildenbrand { 95c74cf7a3SDavid Hildenbrand unsigned long pfn; 96c74cf7a3SDavid Hildenbrand 970bd4b96dSDavid Hildenbrand /* As HIGHMEM does not apply, use clear_page() directly. */ 98c74cf7a3SDavid Hildenbrand for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { 99c74cf7a3SDavid Hildenbrand if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) 100c74cf7a3SDavid Hildenbrand cond_resched(); 101c74cf7a3SDavid Hildenbrand clear_page(__va(PFN_PHYS(pfn))); 102c74cf7a3SDavid Hildenbrand } 1032ac02e5eSAneesh Kumar K.V /* 1042ac02e5eSAneesh Kumar K.V * Before we go ahead and use this range as cache inhibited range 1052ac02e5eSAneesh Kumar K.V * flush the cache. 1062ac02e5eSAneesh Kumar K.V */ 107*b910fcbaSSandipan Das flush_dcache_range_chunked((unsigned long)pfn_to_kaddr(start_pfn), 108*b910fcbaSSandipan Das (unsigned long)pfn_to_kaddr(start_pfn + nr_pages), 1092ac02e5eSAneesh Kumar K.V FLUSH_CHUNK_SIZE); 110c74cf7a3SDavid Hildenbrand } 111c74cf7a3SDavid Hildenbrand 1129d5171a8SRashmica Gupta static u64 memtrace_alloc_node(u32 nid, u64 size) 1139d5171a8SRashmica Gupta { 1140bd4b96dSDavid Hildenbrand const unsigned long nr_pages = PHYS_PFN(size); 1150bd4b96dSDavid Hildenbrand unsigned long pfn, start_pfn; 1160bd4b96dSDavid Hildenbrand struct page *page; 1179d5171a8SRashmica Gupta 1183f7daf3dSRashmica Gupta /* 1190bd4b96dSDavid Hildenbrand * Trace memory needs to be aligned to the size, which is guaranteed 1200bd4b96dSDavid Hildenbrand * by alloc_contig_pages(). 121c74cf7a3SDavid Hildenbrand */ 1220bd4b96dSDavid Hildenbrand page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE | 1230bd4b96dSDavid Hildenbrand __GFP_NOWARN, nid, NULL); 1240bd4b96dSDavid Hildenbrand if (!page) 1259d5171a8SRashmica Gupta return 0; 1260bd4b96dSDavid Hildenbrand start_pfn = page_to_pfn(page); 1270bd4b96dSDavid Hildenbrand 1280bd4b96dSDavid Hildenbrand /* 1290bd4b96dSDavid Hildenbrand * Clear the range while we still have a linear mapping. 1300bd4b96dSDavid Hildenbrand * 1310bd4b96dSDavid Hildenbrand * TODO: use __GFP_ZERO with alloc_contig_pages() once supported. 1320bd4b96dSDavid Hildenbrand */ 1330bd4b96dSDavid Hildenbrand memtrace_clear_range(start_pfn, nr_pages); 1340bd4b96dSDavid Hildenbrand 1350bd4b96dSDavid Hildenbrand /* 1360bd4b96dSDavid Hildenbrand * Set pages PageOffline(), to indicate that nobody (e.g., hibernation, 1370bd4b96dSDavid Hildenbrand * dumping, ...) should be touching these pages. 1380bd4b96dSDavid Hildenbrand */ 1390bd4b96dSDavid Hildenbrand for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) 1400bd4b96dSDavid Hildenbrand __SetPageOffline(pfn_to_page(pfn)); 1410bd4b96dSDavid Hildenbrand 1420bd4b96dSDavid Hildenbrand arch_remove_linear_mapping(PFN_PHYS(start_pfn), size); 1430bd4b96dSDavid Hildenbrand 1440bd4b96dSDavid Hildenbrand return PFN_PHYS(start_pfn); 1459d5171a8SRashmica Gupta } 1469d5171a8SRashmica Gupta 1479d5171a8SRashmica Gupta static int memtrace_init_regions_runtime(u64 size) 1489d5171a8SRashmica Gupta { 1499d5171a8SRashmica Gupta u32 nid; 1509d5171a8SRashmica Gupta u64 m; 1519d5171a8SRashmica Gupta 1529d5171a8SRashmica Gupta memtrace_array = kcalloc(num_online_nodes(), 1539d5171a8SRashmica Gupta sizeof(struct memtrace_entry), GFP_KERNEL); 1549d5171a8SRashmica Gupta if (!memtrace_array) { 1559d5171a8SRashmica Gupta pr_err("Failed to allocate memtrace_array\n"); 1569d5171a8SRashmica Gupta return -EINVAL; 1579d5171a8SRashmica Gupta } 1589d5171a8SRashmica Gupta 1599d5171a8SRashmica Gupta for_each_online_node(nid) { 1609d5171a8SRashmica Gupta m = memtrace_alloc_node(nid, size); 1619d5171a8SRashmica Gupta 1629d5171a8SRashmica Gupta /* 1639d5171a8SRashmica Gupta * A node might not have any local memory, so warn but 1649d5171a8SRashmica Gupta * continue on. 1659d5171a8SRashmica Gupta */ 1669d5171a8SRashmica Gupta if (!m) { 1679d5171a8SRashmica Gupta pr_err("Failed to allocate trace memory on node %d\n", nid); 1689d5171a8SRashmica Gupta continue; 1699d5171a8SRashmica Gupta } 1709d5171a8SRashmica Gupta 1719d5171a8SRashmica Gupta pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m); 1729d5171a8SRashmica Gupta 1739d5171a8SRashmica Gupta memtrace_array[memtrace_array_nr].start = m; 1749d5171a8SRashmica Gupta memtrace_array[memtrace_array_nr].size = size; 1759d5171a8SRashmica Gupta memtrace_array[memtrace_array_nr].nid = nid; 1769d5171a8SRashmica Gupta memtrace_array_nr++; 1779d5171a8SRashmica Gupta } 1789d5171a8SRashmica Gupta 1799d5171a8SRashmica Gupta return 0; 1809d5171a8SRashmica Gupta } 1819d5171a8SRashmica Gupta 1829d5171a8SRashmica Gupta static struct dentry *memtrace_debugfs_dir; 1839d5171a8SRashmica Gupta 1849d5171a8SRashmica Gupta static int memtrace_init_debugfs(void) 1859d5171a8SRashmica Gupta { 1869d5171a8SRashmica Gupta int ret = 0; 1879d5171a8SRashmica Gupta int i; 1889d5171a8SRashmica Gupta 1899d5171a8SRashmica Gupta for (i = 0; i < memtrace_array_nr; i++) { 1909d5171a8SRashmica Gupta struct dentry *dir; 1919d5171a8SRashmica Gupta struct memtrace_entry *ent = &memtrace_array[i]; 1929d5171a8SRashmica Gupta 1939d5171a8SRashmica Gupta ent->mem = ioremap(ent->start, ent->size); 1949d5171a8SRashmica Gupta /* Warn but continue on */ 1959d5171a8SRashmica Gupta if (!ent->mem) { 1969d5171a8SRashmica Gupta pr_err("Failed to map trace memory at 0x%llx\n", 1979d5171a8SRashmica Gupta ent->start); 1989d5171a8SRashmica Gupta ret = -1; 1999d5171a8SRashmica Gupta continue; 2009d5171a8SRashmica Gupta } 2019d5171a8SRashmica Gupta 2029d5171a8SRashmica Gupta snprintf(ent->name, 16, "%08x", ent->nid); 2039d5171a8SRashmica Gupta dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); 2049d5171a8SRashmica Gupta 2059d5171a8SRashmica Gupta ent->dir = dir; 20608a022adSJordan Niethe debugfs_create_file_unsafe("trace", 0600, dir, ent, &memtrace_fops); 2079d5171a8SRashmica Gupta debugfs_create_x64("start", 0400, dir, &ent->start); 2089d5171a8SRashmica Gupta debugfs_create_x64("size", 0400, dir, &ent->size); 2099d5171a8SRashmica Gupta } 2109d5171a8SRashmica Gupta 2119d5171a8SRashmica Gupta return ret; 2129d5171a8SRashmica Gupta } 2139d5171a8SRashmica Gupta 2140bd4b96dSDavid Hildenbrand static int memtrace_free(int nid, u64 start, u64 size) 215d3da701dSRashmica Gupta { 2160bd4b96dSDavid Hildenbrand struct mhp_params params = { .pgprot = PAGE_KERNEL }; 2170bd4b96dSDavid Hildenbrand const unsigned long nr_pages = PHYS_PFN(size); 2180bd4b96dSDavid Hildenbrand const unsigned long start_pfn = PHYS_PFN(start); 2190bd4b96dSDavid Hildenbrand unsigned long pfn; 2200bd4b96dSDavid Hildenbrand int ret; 2210bd4b96dSDavid Hildenbrand 2220bd4b96dSDavid Hildenbrand ret = arch_create_linear_mapping(nid, start, size, ¶ms); 2230bd4b96dSDavid Hildenbrand if (ret) 2240bd4b96dSDavid Hildenbrand return ret; 2250bd4b96dSDavid Hildenbrand 2260bd4b96dSDavid Hildenbrand for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) 2270bd4b96dSDavid Hildenbrand __ClearPageOffline(pfn_to_page(pfn)); 2280bd4b96dSDavid Hildenbrand 2290bd4b96dSDavid Hildenbrand free_contig_range(start_pfn, nr_pages); 2300bd4b96dSDavid Hildenbrand return 0; 231d3da701dSRashmica Gupta } 232d3da701dSRashmica Gupta 233d3da701dSRashmica Gupta /* 2340bd4b96dSDavid Hildenbrand * Iterate through the chunks of memory we allocated and attempt to expose 2350bd4b96dSDavid Hildenbrand * them back to the kernel. 236d3da701dSRashmica Gupta */ 2370bd4b96dSDavid Hildenbrand static int memtrace_free_regions(void) 238d3da701dSRashmica Gupta { 239d3da701dSRashmica Gupta int i, ret = 0; 240d3da701dSRashmica Gupta struct memtrace_entry *ent; 241d3da701dSRashmica Gupta 242d3da701dSRashmica Gupta for (i = memtrace_array_nr - 1; i >= 0; i--) { 243d3da701dSRashmica Gupta ent = &memtrace_array[i]; 244d3da701dSRashmica Gupta 2450bd4b96dSDavid Hildenbrand /* We have freed this chunk previously */ 24698fa15f3SAnshuman Khandual if (ent->nid == NUMA_NO_NODE) 247d3da701dSRashmica Gupta continue; 248d3da701dSRashmica Gupta 249d3da701dSRashmica Gupta /* Remove from io mappings */ 250d3da701dSRashmica Gupta if (ent->mem) { 251d3da701dSRashmica Gupta iounmap(ent->mem); 252d3da701dSRashmica Gupta ent->mem = 0; 253d3da701dSRashmica Gupta } 254d3da701dSRashmica Gupta 2550bd4b96dSDavid Hildenbrand if (memtrace_free(ent->nid, ent->start, ent->size)) { 2560bd4b96dSDavid Hildenbrand pr_err("Failed to free trace memory on node %d\n", 257d3da701dSRashmica Gupta ent->nid); 258d3da701dSRashmica Gupta ret += 1; 259d3da701dSRashmica Gupta continue; 260d3da701dSRashmica Gupta } 261d3da701dSRashmica Gupta 262d3da701dSRashmica Gupta /* 2630bd4b96dSDavid Hildenbrand * Memory was freed successfully so clean up references to it 2640bd4b96dSDavid Hildenbrand * so on reentry we can tell that this chunk was freed. 265d3da701dSRashmica Gupta */ 266d3da701dSRashmica Gupta debugfs_remove_recursive(ent->dir); 2670bd4b96dSDavid Hildenbrand pr_info("Freed trace memory back on node %d\n", ent->nid); 26898fa15f3SAnshuman Khandual ent->size = ent->start = ent->nid = NUMA_NO_NODE; 269d3da701dSRashmica Gupta } 270d3da701dSRashmica Gupta if (ret) 271d3da701dSRashmica Gupta return ret; 272d3da701dSRashmica Gupta 2730bd4b96dSDavid Hildenbrand /* If all chunks of memory were freed successfully, reset globals */ 274d3da701dSRashmica Gupta kfree(memtrace_array); 275d3da701dSRashmica Gupta memtrace_array = NULL; 276d3da701dSRashmica Gupta memtrace_size = 0; 277d3da701dSRashmica Gupta memtrace_array_nr = 0; 278d3da701dSRashmica Gupta return 0; 279d3da701dSRashmica Gupta } 280d3da701dSRashmica Gupta 2819d5171a8SRashmica Gupta static int memtrace_enable_set(void *data, u64 val) 2829d5171a8SRashmica Gupta { 283d6718941SDavid Hildenbrand int rc = -EAGAIN; 284d3da701dSRashmica Gupta u64 bytes; 285d3da701dSRashmica Gupta 286d3da701dSRashmica Gupta /* 287d3da701dSRashmica Gupta * Don't attempt to do anything if size isn't aligned to a memory 288d3da701dSRashmica Gupta * block or equal to zero. 289d3da701dSRashmica Gupta */ 290d3da701dSRashmica Gupta bytes = memory_block_size_bytes(); 291d3da701dSRashmica Gupta if (val & (bytes - 1)) { 292d3da701dSRashmica Gupta pr_err("Value must be aligned with 0x%llx\n", bytes); 2939d5171a8SRashmica Gupta return -EINVAL; 294d3da701dSRashmica Gupta } 295d3da701dSRashmica Gupta 296d6718941SDavid Hildenbrand mutex_lock(&memtrace_mutex); 297d6718941SDavid Hildenbrand 2980bd4b96dSDavid Hildenbrand /* Free all previously allocated memory. */ 2990bd4b96dSDavid Hildenbrand if (memtrace_size && memtrace_free_regions()) 300d6718941SDavid Hildenbrand goto out_unlock; 3019d5171a8SRashmica Gupta 302d6718941SDavid Hildenbrand if (!val) { 303d6718941SDavid Hildenbrand rc = 0; 304d6718941SDavid Hildenbrand goto out_unlock; 305d6718941SDavid Hildenbrand } 3069d5171a8SRashmica Gupta 3070bd4b96dSDavid Hildenbrand /* Allocate memory. */ 3089d5171a8SRashmica Gupta if (memtrace_init_regions_runtime(val)) 309d6718941SDavid Hildenbrand goto out_unlock; 3109d5171a8SRashmica Gupta 3119d5171a8SRashmica Gupta if (memtrace_init_debugfs()) 312d6718941SDavid Hildenbrand goto out_unlock; 3139d5171a8SRashmica Gupta 3149d5171a8SRashmica Gupta memtrace_size = val; 315d6718941SDavid Hildenbrand rc = 0; 316d6718941SDavid Hildenbrand out_unlock: 317d6718941SDavid Hildenbrand mutex_unlock(&memtrace_mutex); 318d6718941SDavid Hildenbrand return rc; 3199d5171a8SRashmica Gupta } 3209d5171a8SRashmica Gupta 3219d5171a8SRashmica Gupta static int memtrace_enable_get(void *data, u64 *val) 3229d5171a8SRashmica Gupta { 3239d5171a8SRashmica Gupta *val = memtrace_size; 3249d5171a8SRashmica Gupta return 0; 3259d5171a8SRashmica Gupta } 3269d5171a8SRashmica Gupta 3279d5171a8SRashmica Gupta DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get, 3289d5171a8SRashmica Gupta memtrace_enable_set, "0x%016llx\n"); 3299d5171a8SRashmica Gupta 3309d5171a8SRashmica Gupta static int memtrace_init(void) 3319d5171a8SRashmica Gupta { 3329d5171a8SRashmica Gupta memtrace_debugfs_dir = debugfs_create_dir("memtrace", 3339d5171a8SRashmica Gupta powerpc_debugfs_root); 3349d5171a8SRashmica Gupta 3359d5171a8SRashmica Gupta debugfs_create_file("enable", 0600, memtrace_debugfs_dir, 3369d5171a8SRashmica Gupta NULL, &memtrace_init_fops); 3379d5171a8SRashmica Gupta 3389d5171a8SRashmica Gupta return 0; 3399d5171a8SRashmica Gupta } 3409d5171a8SRashmica Gupta machine_device_initcall(powernv, memtrace_init); 341