12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
29d5171a8SRashmica Gupta /*
39d5171a8SRashmica Gupta * Copyright (C) IBM Corporation, 2014, 2017
49d5171a8SRashmica Gupta * Anton Blanchard, Rashmica Gupta.
59d5171a8SRashmica Gupta */
69d5171a8SRashmica Gupta
79d5171a8SRashmica Gupta #define pr_fmt(fmt) "memtrace: " fmt
89d5171a8SRashmica Gupta
99d5171a8SRashmica Gupta #include <linux/bitops.h>
109d5171a8SRashmica Gupta #include <linux/string.h>
119d5171a8SRashmica Gupta #include <linux/memblock.h>
129d5171a8SRashmica Gupta #include <linux/init.h>
139d5171a8SRashmica Gupta #include <linux/moduleparam.h>
149d5171a8SRashmica Gupta #include <linux/fs.h>
159d5171a8SRashmica Gupta #include <linux/debugfs.h>
169d5171a8SRashmica Gupta #include <linux/slab.h>
179d5171a8SRashmica Gupta #include <linux/memory.h>
189d5171a8SRashmica Gupta #include <linux/memory_hotplug.h>
1998fa15f3SAnshuman Khandual #include <linux/numa.h>
209d5171a8SRashmica Gupta #include <asm/machdep.h>
212ac02e5eSAneesh Kumar K.V #include <asm/cacheflush.h>
229d5171a8SRashmica Gupta
239d5171a8SRashmica Gupta /* This enables us to keep track of the memory removed from each node. */
249d5171a8SRashmica Gupta struct memtrace_entry {
259d5171a8SRashmica Gupta void *mem;
269d5171a8SRashmica Gupta u64 start;
279d5171a8SRashmica Gupta u64 size;
289d5171a8SRashmica Gupta u32 nid;
299d5171a8SRashmica Gupta struct dentry *dir;
309d5171a8SRashmica Gupta char name[16];
319d5171a8SRashmica Gupta };
329d5171a8SRashmica Gupta
33d6718941SDavid Hildenbrand static DEFINE_MUTEX(memtrace_mutex);
349d5171a8SRashmica Gupta static u64 memtrace_size;
359d5171a8SRashmica Gupta
369d5171a8SRashmica Gupta static struct memtrace_entry *memtrace_array;
379d5171a8SRashmica Gupta static unsigned int memtrace_array_nr;
389d5171a8SRashmica Gupta
399d5171a8SRashmica Gupta
memtrace_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)409d5171a8SRashmica Gupta static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
419d5171a8SRashmica Gupta size_t count, loff_t *ppos)
429d5171a8SRashmica Gupta {
439d5171a8SRashmica Gupta struct memtrace_entry *ent = filp->private_data;
449d5171a8SRashmica Gupta
459d5171a8SRashmica Gupta return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
469d5171a8SRashmica Gupta }
479d5171a8SRashmica Gupta
memtrace_mmap(struct file * filp,struct vm_area_struct * vma)4808a022adSJordan Niethe static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
4908a022adSJordan Niethe {
5008a022adSJordan Niethe struct memtrace_entry *ent = filp->private_data;
5108a022adSJordan Niethe
5208a022adSJordan Niethe if (ent->size < vma->vm_end - vma->vm_start)
5308a022adSJordan Niethe return -EINVAL;
5408a022adSJordan Niethe
5508a022adSJordan Niethe if (vma->vm_pgoff << PAGE_SHIFT >= ent->size)
5608a022adSJordan Niethe return -EINVAL;
5708a022adSJordan Niethe
5808a022adSJordan Niethe vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
5908a022adSJordan Niethe return remap_pfn_range(vma, vma->vm_start, PHYS_PFN(ent->start) + vma->vm_pgoff,
6008a022adSJordan Niethe vma->vm_end - vma->vm_start, vma->vm_page_prot);
6108a022adSJordan Niethe }
6208a022adSJordan Niethe
639d5171a8SRashmica Gupta static const struct file_operations memtrace_fops = {
649d5171a8SRashmica Gupta .llseek = default_llseek,
659d5171a8SRashmica Gupta .read = memtrace_read,
669d5171a8SRashmica Gupta .open = simple_open,
6708a022adSJordan Niethe .mmap = memtrace_mmap,
689d5171a8SRashmica Gupta };
699d5171a8SRashmica Gupta
702ac02e5eSAneesh Kumar K.V #define FLUSH_CHUNK_SIZE SZ_1G
712ac02e5eSAneesh Kumar K.V /**
722ac02e5eSAneesh Kumar K.V * flush_dcache_range_chunked(): Write any modified data cache blocks out to
732ac02e5eSAneesh Kumar K.V * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE
742ac02e5eSAneesh Kumar K.V * Does not invalidate the corresponding instruction cache blocks.
752ac02e5eSAneesh Kumar K.V *
762ac02e5eSAneesh Kumar K.V * @start: the start address
772ac02e5eSAneesh Kumar K.V * @stop: the stop address (exclusive)
782ac02e5eSAneesh Kumar K.V * @chunk: the max size of the chunks
792ac02e5eSAneesh Kumar K.V */
flush_dcache_range_chunked(unsigned long start,unsigned long stop,unsigned long chunk)802ac02e5eSAneesh Kumar K.V static void flush_dcache_range_chunked(unsigned long start, unsigned long stop,
812ac02e5eSAneesh Kumar K.V unsigned long chunk)
822ac02e5eSAneesh Kumar K.V {
832ac02e5eSAneesh Kumar K.V unsigned long i;
842ac02e5eSAneesh Kumar K.V
852ac02e5eSAneesh Kumar K.V for (i = start; i < stop; i += chunk) {
862ac02e5eSAneesh Kumar K.V flush_dcache_range(i, min(stop, i + chunk));
872ac02e5eSAneesh Kumar K.V cond_resched();
882ac02e5eSAneesh Kumar K.V }
892ac02e5eSAneesh Kumar K.V }
902ac02e5eSAneesh Kumar K.V
memtrace_clear_range(unsigned long start_pfn,unsigned long nr_pages)91c74cf7a3SDavid Hildenbrand static void memtrace_clear_range(unsigned long start_pfn,
92c74cf7a3SDavid Hildenbrand unsigned long nr_pages)
93c74cf7a3SDavid Hildenbrand {
94c74cf7a3SDavid Hildenbrand unsigned long pfn;
95c74cf7a3SDavid Hildenbrand
960bd4b96dSDavid Hildenbrand /* As HIGHMEM does not apply, use clear_page() directly. */
97c74cf7a3SDavid Hildenbrand for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
98c74cf7a3SDavid Hildenbrand if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
99c74cf7a3SDavid Hildenbrand cond_resched();
100c74cf7a3SDavid Hildenbrand clear_page(__va(PFN_PHYS(pfn)));
101c74cf7a3SDavid Hildenbrand }
1022ac02e5eSAneesh Kumar K.V /*
1032ac02e5eSAneesh Kumar K.V * Before we go ahead and use this range as cache inhibited range
1042ac02e5eSAneesh Kumar K.V * flush the cache.
1052ac02e5eSAneesh Kumar K.V */
106b910fcbaSSandipan Das flush_dcache_range_chunked((unsigned long)pfn_to_kaddr(start_pfn),
107b910fcbaSSandipan Das (unsigned long)pfn_to_kaddr(start_pfn + nr_pages),
1082ac02e5eSAneesh Kumar K.V FLUSH_CHUNK_SIZE);
109c74cf7a3SDavid Hildenbrand }
110c74cf7a3SDavid Hildenbrand
memtrace_alloc_node(u32 nid,u64 size)1119d5171a8SRashmica Gupta static u64 memtrace_alloc_node(u32 nid, u64 size)
1129d5171a8SRashmica Gupta {
1130bd4b96dSDavid Hildenbrand const unsigned long nr_pages = PHYS_PFN(size);
1140bd4b96dSDavid Hildenbrand unsigned long pfn, start_pfn;
1150bd4b96dSDavid Hildenbrand struct page *page;
1169d5171a8SRashmica Gupta
1173f7daf3dSRashmica Gupta /*
1180bd4b96dSDavid Hildenbrand * Trace memory needs to be aligned to the size, which is guaranteed
1190bd4b96dSDavid Hildenbrand * by alloc_contig_pages().
120c74cf7a3SDavid Hildenbrand */
1210bd4b96dSDavid Hildenbrand page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE |
1220bd4b96dSDavid Hildenbrand __GFP_NOWARN, nid, NULL);
1230bd4b96dSDavid Hildenbrand if (!page)
1249d5171a8SRashmica Gupta return 0;
1250bd4b96dSDavid Hildenbrand start_pfn = page_to_pfn(page);
1260bd4b96dSDavid Hildenbrand
1270bd4b96dSDavid Hildenbrand /*
1280bd4b96dSDavid Hildenbrand * Clear the range while we still have a linear mapping.
1290bd4b96dSDavid Hildenbrand *
1300bd4b96dSDavid Hildenbrand * TODO: use __GFP_ZERO with alloc_contig_pages() once supported.
1310bd4b96dSDavid Hildenbrand */
1320bd4b96dSDavid Hildenbrand memtrace_clear_range(start_pfn, nr_pages);
1330bd4b96dSDavid Hildenbrand
1340bd4b96dSDavid Hildenbrand /*
1350bd4b96dSDavid Hildenbrand * Set pages PageOffline(), to indicate that nobody (e.g., hibernation,
1360bd4b96dSDavid Hildenbrand * dumping, ...) should be touching these pages.
1370bd4b96dSDavid Hildenbrand */
1380bd4b96dSDavid Hildenbrand for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
1390bd4b96dSDavid Hildenbrand __SetPageOffline(pfn_to_page(pfn));
1400bd4b96dSDavid Hildenbrand
1410bd4b96dSDavid Hildenbrand arch_remove_linear_mapping(PFN_PHYS(start_pfn), size);
1420bd4b96dSDavid Hildenbrand
1430bd4b96dSDavid Hildenbrand return PFN_PHYS(start_pfn);
1449d5171a8SRashmica Gupta }
1459d5171a8SRashmica Gupta
memtrace_init_regions_runtime(u64 size)1469d5171a8SRashmica Gupta static int memtrace_init_regions_runtime(u64 size)
1479d5171a8SRashmica Gupta {
1489d5171a8SRashmica Gupta u32 nid;
1499d5171a8SRashmica Gupta u64 m;
1509d5171a8SRashmica Gupta
1519d5171a8SRashmica Gupta memtrace_array = kcalloc(num_online_nodes(),
1529d5171a8SRashmica Gupta sizeof(struct memtrace_entry), GFP_KERNEL);
1539d5171a8SRashmica Gupta if (!memtrace_array) {
1549d5171a8SRashmica Gupta pr_err("Failed to allocate memtrace_array\n");
1559d5171a8SRashmica Gupta return -EINVAL;
1569d5171a8SRashmica Gupta }
1579d5171a8SRashmica Gupta
1589d5171a8SRashmica Gupta for_each_online_node(nid) {
1599d5171a8SRashmica Gupta m = memtrace_alloc_node(nid, size);
1609d5171a8SRashmica Gupta
1619d5171a8SRashmica Gupta /*
1629d5171a8SRashmica Gupta * A node might not have any local memory, so warn but
1639d5171a8SRashmica Gupta * continue on.
1649d5171a8SRashmica Gupta */
1659d5171a8SRashmica Gupta if (!m) {
1669d5171a8SRashmica Gupta pr_err("Failed to allocate trace memory on node %d\n", nid);
1679d5171a8SRashmica Gupta continue;
1689d5171a8SRashmica Gupta }
1699d5171a8SRashmica Gupta
1709d5171a8SRashmica Gupta pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
1719d5171a8SRashmica Gupta
1729d5171a8SRashmica Gupta memtrace_array[memtrace_array_nr].start = m;
1739d5171a8SRashmica Gupta memtrace_array[memtrace_array_nr].size = size;
1749d5171a8SRashmica Gupta memtrace_array[memtrace_array_nr].nid = nid;
1759d5171a8SRashmica Gupta memtrace_array_nr++;
1769d5171a8SRashmica Gupta }
1779d5171a8SRashmica Gupta
1789d5171a8SRashmica Gupta return 0;
1799d5171a8SRashmica Gupta }
1809d5171a8SRashmica Gupta
1819d5171a8SRashmica Gupta static struct dentry *memtrace_debugfs_dir;
1829d5171a8SRashmica Gupta
memtrace_init_debugfs(void)1839d5171a8SRashmica Gupta static int memtrace_init_debugfs(void)
1849d5171a8SRashmica Gupta {
1859d5171a8SRashmica Gupta int ret = 0;
1869d5171a8SRashmica Gupta int i;
1879d5171a8SRashmica Gupta
1889d5171a8SRashmica Gupta for (i = 0; i < memtrace_array_nr; i++) {
1899d5171a8SRashmica Gupta struct dentry *dir;
1909d5171a8SRashmica Gupta struct memtrace_entry *ent = &memtrace_array[i];
1919d5171a8SRashmica Gupta
1929d5171a8SRashmica Gupta ent->mem = ioremap(ent->start, ent->size);
1939d5171a8SRashmica Gupta /* Warn but continue on */
1949d5171a8SRashmica Gupta if (!ent->mem) {
1959d5171a8SRashmica Gupta pr_err("Failed to map trace memory at 0x%llx\n",
1969d5171a8SRashmica Gupta ent->start);
1979d5171a8SRashmica Gupta ret = -1;
1989d5171a8SRashmica Gupta continue;
1999d5171a8SRashmica Gupta }
2009d5171a8SRashmica Gupta
2019d5171a8SRashmica Gupta snprintf(ent->name, 16, "%08x", ent->nid);
2029d5171a8SRashmica Gupta dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
2039d5171a8SRashmica Gupta
2049d5171a8SRashmica Gupta ent->dir = dir;
20508a022adSJordan Niethe debugfs_create_file_unsafe("trace", 0600, dir, ent, &memtrace_fops);
2069d5171a8SRashmica Gupta debugfs_create_x64("start", 0400, dir, &ent->start);
2079d5171a8SRashmica Gupta debugfs_create_x64("size", 0400, dir, &ent->size);
2089d5171a8SRashmica Gupta }
2099d5171a8SRashmica Gupta
2109d5171a8SRashmica Gupta return ret;
2119d5171a8SRashmica Gupta }
2129d5171a8SRashmica Gupta
memtrace_free(int nid,u64 start,u64 size)2130bd4b96dSDavid Hildenbrand static int memtrace_free(int nid, u64 start, u64 size)
214d3da701dSRashmica Gupta {
2150bd4b96dSDavid Hildenbrand struct mhp_params params = { .pgprot = PAGE_KERNEL };
2160bd4b96dSDavid Hildenbrand const unsigned long nr_pages = PHYS_PFN(size);
2170bd4b96dSDavid Hildenbrand const unsigned long start_pfn = PHYS_PFN(start);
2180bd4b96dSDavid Hildenbrand unsigned long pfn;
2190bd4b96dSDavid Hildenbrand int ret;
2200bd4b96dSDavid Hildenbrand
2210bd4b96dSDavid Hildenbrand ret = arch_create_linear_mapping(nid, start, size, ¶ms);
2220bd4b96dSDavid Hildenbrand if (ret)
2230bd4b96dSDavid Hildenbrand return ret;
2240bd4b96dSDavid Hildenbrand
2250bd4b96dSDavid Hildenbrand for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
2260bd4b96dSDavid Hildenbrand __ClearPageOffline(pfn_to_page(pfn));
2270bd4b96dSDavid Hildenbrand
2280bd4b96dSDavid Hildenbrand free_contig_range(start_pfn, nr_pages);
2290bd4b96dSDavid Hildenbrand return 0;
230d3da701dSRashmica Gupta }
231d3da701dSRashmica Gupta
232d3da701dSRashmica Gupta /*
2330bd4b96dSDavid Hildenbrand * Iterate through the chunks of memory we allocated and attempt to expose
2340bd4b96dSDavid Hildenbrand * them back to the kernel.
235d3da701dSRashmica Gupta */
memtrace_free_regions(void)2360bd4b96dSDavid Hildenbrand static int memtrace_free_regions(void)
237d3da701dSRashmica Gupta {
238d3da701dSRashmica Gupta int i, ret = 0;
239d3da701dSRashmica Gupta struct memtrace_entry *ent;
240d3da701dSRashmica Gupta
241d3da701dSRashmica Gupta for (i = memtrace_array_nr - 1; i >= 0; i--) {
242d3da701dSRashmica Gupta ent = &memtrace_array[i];
243d3da701dSRashmica Gupta
2440bd4b96dSDavid Hildenbrand /* We have freed this chunk previously */
24598fa15f3SAnshuman Khandual if (ent->nid == NUMA_NO_NODE)
246d3da701dSRashmica Gupta continue;
247d3da701dSRashmica Gupta
248d3da701dSRashmica Gupta /* Remove from io mappings */
249d3da701dSRashmica Gupta if (ent->mem) {
250d3da701dSRashmica Gupta iounmap(ent->mem);
251d3da701dSRashmica Gupta ent->mem = 0;
252d3da701dSRashmica Gupta }
253d3da701dSRashmica Gupta
2540bd4b96dSDavid Hildenbrand if (memtrace_free(ent->nid, ent->start, ent->size)) {
2550bd4b96dSDavid Hildenbrand pr_err("Failed to free trace memory on node %d\n",
256d3da701dSRashmica Gupta ent->nid);
257d3da701dSRashmica Gupta ret += 1;
258d3da701dSRashmica Gupta continue;
259d3da701dSRashmica Gupta }
260d3da701dSRashmica Gupta
261d3da701dSRashmica Gupta /*
2620bd4b96dSDavid Hildenbrand * Memory was freed successfully so clean up references to it
2630bd4b96dSDavid Hildenbrand * so on reentry we can tell that this chunk was freed.
264d3da701dSRashmica Gupta */
265d3da701dSRashmica Gupta debugfs_remove_recursive(ent->dir);
2660bd4b96dSDavid Hildenbrand pr_info("Freed trace memory back on node %d\n", ent->nid);
26798fa15f3SAnshuman Khandual ent->size = ent->start = ent->nid = NUMA_NO_NODE;
268d3da701dSRashmica Gupta }
269d3da701dSRashmica Gupta if (ret)
270d3da701dSRashmica Gupta return ret;
271d3da701dSRashmica Gupta
2720bd4b96dSDavid Hildenbrand /* If all chunks of memory were freed successfully, reset globals */
273d3da701dSRashmica Gupta kfree(memtrace_array);
274d3da701dSRashmica Gupta memtrace_array = NULL;
275d3da701dSRashmica Gupta memtrace_size = 0;
276d3da701dSRashmica Gupta memtrace_array_nr = 0;
277d3da701dSRashmica Gupta return 0;
278d3da701dSRashmica Gupta }
279d3da701dSRashmica Gupta
memtrace_enable_set(void * data,u64 val)2809d5171a8SRashmica Gupta static int memtrace_enable_set(void *data, u64 val)
2819d5171a8SRashmica Gupta {
282d6718941SDavid Hildenbrand int rc = -EAGAIN;
283d3da701dSRashmica Gupta u64 bytes;
284d3da701dSRashmica Gupta
285d3da701dSRashmica Gupta /*
286d3da701dSRashmica Gupta * Don't attempt to do anything if size isn't aligned to a memory
287d3da701dSRashmica Gupta * block or equal to zero.
288d3da701dSRashmica Gupta */
289d3da701dSRashmica Gupta bytes = memory_block_size_bytes();
290d3da701dSRashmica Gupta if (val & (bytes - 1)) {
291d3da701dSRashmica Gupta pr_err("Value must be aligned with 0x%llx\n", bytes);
2929d5171a8SRashmica Gupta return -EINVAL;
293d3da701dSRashmica Gupta }
294d3da701dSRashmica Gupta
295d6718941SDavid Hildenbrand mutex_lock(&memtrace_mutex);
296d6718941SDavid Hildenbrand
2970bd4b96dSDavid Hildenbrand /* Free all previously allocated memory. */
2980bd4b96dSDavid Hildenbrand if (memtrace_size && memtrace_free_regions())
299d6718941SDavid Hildenbrand goto out_unlock;
3009d5171a8SRashmica Gupta
301d6718941SDavid Hildenbrand if (!val) {
302d6718941SDavid Hildenbrand rc = 0;
303d6718941SDavid Hildenbrand goto out_unlock;
304d6718941SDavid Hildenbrand }
3059d5171a8SRashmica Gupta
3060bd4b96dSDavid Hildenbrand /* Allocate memory. */
3079d5171a8SRashmica Gupta if (memtrace_init_regions_runtime(val))
308d6718941SDavid Hildenbrand goto out_unlock;
3099d5171a8SRashmica Gupta
3109d5171a8SRashmica Gupta if (memtrace_init_debugfs())
311d6718941SDavid Hildenbrand goto out_unlock;
3129d5171a8SRashmica Gupta
3139d5171a8SRashmica Gupta memtrace_size = val;
314d6718941SDavid Hildenbrand rc = 0;
315d6718941SDavid Hildenbrand out_unlock:
316d6718941SDavid Hildenbrand mutex_unlock(&memtrace_mutex);
317d6718941SDavid Hildenbrand return rc;
3189d5171a8SRashmica Gupta }
3199d5171a8SRashmica Gupta
memtrace_enable_get(void * data,u64 * val)3209d5171a8SRashmica Gupta static int memtrace_enable_get(void *data, u64 *val)
3219d5171a8SRashmica Gupta {
3229d5171a8SRashmica Gupta *val = memtrace_size;
3239d5171a8SRashmica Gupta return 0;
3249d5171a8SRashmica Gupta }
3259d5171a8SRashmica Gupta
3269d5171a8SRashmica Gupta DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
3279d5171a8SRashmica Gupta memtrace_enable_set, "0x%016llx\n");
3289d5171a8SRashmica Gupta
memtrace_init(void)3299d5171a8SRashmica Gupta static int memtrace_init(void)
3309d5171a8SRashmica Gupta {
3319d5171a8SRashmica Gupta memtrace_debugfs_dir = debugfs_create_dir("memtrace",
332*dbf77fedSAneesh Kumar K.V arch_debugfs_dir);
3339d5171a8SRashmica Gupta
3349d5171a8SRashmica Gupta debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
3359d5171a8SRashmica Gupta NULL, &memtrace_init_fops);
3369d5171a8SRashmica Gupta
3379d5171a8SRashmica Gupta return 0;
3389d5171a8SRashmica Gupta }
3399d5171a8SRashmica Gupta machine_device_initcall(powernv, memtrace_init);
340