1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) IBM Corporation, 2014, 2017 4 * Anton Blanchard, Rashmica Gupta. 5 */ 6 7 #define pr_fmt(fmt) "memtrace: " fmt 8 9 #include <linux/bitops.h> 10 #include <linux/string.h> 11 #include <linux/memblock.h> 12 #include <linux/init.h> 13 #include <linux/moduleparam.h> 14 #include <linux/fs.h> 15 #include <linux/debugfs.h> 16 #include <linux/slab.h> 17 #include <linux/memory.h> 18 #include <linux/memory_hotplug.h> 19 #include <linux/numa.h> 20 #include <asm/machdep.h> 21 #include <asm/debugfs.h> 22 23 /* This enables us to keep track of the memory removed from each node. */ 24 struct memtrace_entry { 25 void *mem; 26 u64 start; 27 u64 size; 28 u32 nid; 29 struct dentry *dir; 30 char name[16]; 31 }; 32 33 static DEFINE_MUTEX(memtrace_mutex); 34 static u64 memtrace_size; 35 36 static struct memtrace_entry *memtrace_array; 37 static unsigned int memtrace_array_nr; 38 39 40 static ssize_t memtrace_read(struct file *filp, char __user *ubuf, 41 size_t count, loff_t *ppos) 42 { 43 struct memtrace_entry *ent = filp->private_data; 44 45 return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); 46 } 47 48 static const struct file_operations memtrace_fops = { 49 .llseek = default_llseek, 50 .read = memtrace_read, 51 .open = simple_open, 52 }; 53 54 static void memtrace_clear_range(unsigned long start_pfn, 55 unsigned long nr_pages) 56 { 57 unsigned long pfn; 58 59 /* As HIGHMEM does not apply, use clear_page() directly. */ 60 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { 61 if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) 62 cond_resched(); 63 clear_page(__va(PFN_PHYS(pfn))); 64 } 65 } 66 67 static u64 memtrace_alloc_node(u32 nid, u64 size) 68 { 69 const unsigned long nr_pages = PHYS_PFN(size); 70 unsigned long pfn, start_pfn; 71 struct page *page; 72 73 /* 74 * Trace memory needs to be aligned to the size, which is guaranteed 75 * by alloc_contig_pages(). 76 */ 77 page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE | 78 __GFP_NOWARN, nid, NULL); 79 if (!page) 80 return 0; 81 start_pfn = page_to_pfn(page); 82 83 /* 84 * Clear the range while we still have a linear mapping. 85 * 86 * TODO: use __GFP_ZERO with alloc_contig_pages() once supported. 87 */ 88 memtrace_clear_range(start_pfn, nr_pages); 89 90 /* 91 * Set pages PageOffline(), to indicate that nobody (e.g., hibernation, 92 * dumping, ...) should be touching these pages. 93 */ 94 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) 95 __SetPageOffline(pfn_to_page(pfn)); 96 97 arch_remove_linear_mapping(PFN_PHYS(start_pfn), size); 98 99 return PFN_PHYS(start_pfn); 100 } 101 102 static int memtrace_init_regions_runtime(u64 size) 103 { 104 u32 nid; 105 u64 m; 106 107 memtrace_array = kcalloc(num_online_nodes(), 108 sizeof(struct memtrace_entry), GFP_KERNEL); 109 if (!memtrace_array) { 110 pr_err("Failed to allocate memtrace_array\n"); 111 return -EINVAL; 112 } 113 114 for_each_online_node(nid) { 115 m = memtrace_alloc_node(nid, size); 116 117 /* 118 * A node might not have any local memory, so warn but 119 * continue on. 120 */ 121 if (!m) { 122 pr_err("Failed to allocate trace memory on node %d\n", nid); 123 continue; 124 } 125 126 pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m); 127 128 memtrace_array[memtrace_array_nr].start = m; 129 memtrace_array[memtrace_array_nr].size = size; 130 memtrace_array[memtrace_array_nr].nid = nid; 131 memtrace_array_nr++; 132 } 133 134 return 0; 135 } 136 137 static struct dentry *memtrace_debugfs_dir; 138 139 static int memtrace_init_debugfs(void) 140 { 141 int ret = 0; 142 int i; 143 144 for (i = 0; i < memtrace_array_nr; i++) { 145 struct dentry *dir; 146 struct memtrace_entry *ent = &memtrace_array[i]; 147 148 ent->mem = ioremap(ent->start, ent->size); 149 /* Warn but continue on */ 150 if (!ent->mem) { 151 pr_err("Failed to map trace memory at 0x%llx\n", 152 ent->start); 153 ret = -1; 154 continue; 155 } 156 157 snprintf(ent->name, 16, "%08x", ent->nid); 158 dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); 159 160 ent->dir = dir; 161 debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); 162 debugfs_create_x64("start", 0400, dir, &ent->start); 163 debugfs_create_x64("size", 0400, dir, &ent->size); 164 } 165 166 return ret; 167 } 168 169 static int memtrace_free(int nid, u64 start, u64 size) 170 { 171 struct mhp_params params = { .pgprot = PAGE_KERNEL }; 172 const unsigned long nr_pages = PHYS_PFN(size); 173 const unsigned long start_pfn = PHYS_PFN(start); 174 unsigned long pfn; 175 int ret; 176 177 ret = arch_create_linear_mapping(nid, start, size, ¶ms); 178 if (ret) 179 return ret; 180 181 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) 182 __ClearPageOffline(pfn_to_page(pfn)); 183 184 free_contig_range(start_pfn, nr_pages); 185 return 0; 186 } 187 188 /* 189 * Iterate through the chunks of memory we allocated and attempt to expose 190 * them back to the kernel. 191 */ 192 static int memtrace_free_regions(void) 193 { 194 int i, ret = 0; 195 struct memtrace_entry *ent; 196 197 for (i = memtrace_array_nr - 1; i >= 0; i--) { 198 ent = &memtrace_array[i]; 199 200 /* We have freed this chunk previously */ 201 if (ent->nid == NUMA_NO_NODE) 202 continue; 203 204 /* Remove from io mappings */ 205 if (ent->mem) { 206 iounmap(ent->mem); 207 ent->mem = 0; 208 } 209 210 if (memtrace_free(ent->nid, ent->start, ent->size)) { 211 pr_err("Failed to free trace memory on node %d\n", 212 ent->nid); 213 ret += 1; 214 continue; 215 } 216 217 /* 218 * Memory was freed successfully so clean up references to it 219 * so on reentry we can tell that this chunk was freed. 220 */ 221 debugfs_remove_recursive(ent->dir); 222 pr_info("Freed trace memory back on node %d\n", ent->nid); 223 ent->size = ent->start = ent->nid = NUMA_NO_NODE; 224 } 225 if (ret) 226 return ret; 227 228 /* If all chunks of memory were freed successfully, reset globals */ 229 kfree(memtrace_array); 230 memtrace_array = NULL; 231 memtrace_size = 0; 232 memtrace_array_nr = 0; 233 return 0; 234 } 235 236 static int memtrace_enable_set(void *data, u64 val) 237 { 238 int rc = -EAGAIN; 239 u64 bytes; 240 241 /* 242 * Don't attempt to do anything if size isn't aligned to a memory 243 * block or equal to zero. 244 */ 245 bytes = memory_block_size_bytes(); 246 if (val & (bytes - 1)) { 247 pr_err("Value must be aligned with 0x%llx\n", bytes); 248 return -EINVAL; 249 } 250 251 mutex_lock(&memtrace_mutex); 252 253 /* Free all previously allocated memory. */ 254 if (memtrace_size && memtrace_free_regions()) 255 goto out_unlock; 256 257 if (!val) { 258 rc = 0; 259 goto out_unlock; 260 } 261 262 /* Allocate memory. */ 263 if (memtrace_init_regions_runtime(val)) 264 goto out_unlock; 265 266 if (memtrace_init_debugfs()) 267 goto out_unlock; 268 269 memtrace_size = val; 270 rc = 0; 271 out_unlock: 272 mutex_unlock(&memtrace_mutex); 273 return rc; 274 } 275 276 static int memtrace_enable_get(void *data, u64 *val) 277 { 278 *val = memtrace_size; 279 return 0; 280 } 281 282 DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get, 283 memtrace_enable_set, "0x%016llx\n"); 284 285 static int memtrace_init(void) 286 { 287 memtrace_debugfs_dir = debugfs_create_dir("memtrace", 288 powerpc_debugfs_root); 289 290 debugfs_create_file("enable", 0600, memtrace_debugfs_dir, 291 NULL, &memtrace_init_fops); 292 293 return 0; 294 } 295 machine_device_initcall(powernv, memtrace_init); 296