1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) IBM Corporation, 2014, 2017 4 * Anton Blanchard, Rashmica Gupta. 5 */ 6 7 #define pr_fmt(fmt) "memtrace: " fmt 8 9 #include <linux/bitops.h> 10 #include <linux/string.h> 11 #include <linux/memblock.h> 12 #include <linux/init.h> 13 #include <linux/moduleparam.h> 14 #include <linux/fs.h> 15 #include <linux/debugfs.h> 16 #include <linux/slab.h> 17 #include <linux/memory.h> 18 #include <linux/memory_hotplug.h> 19 #include <linux/numa.h> 20 #include <asm/machdep.h> 21 #include <asm/debugfs.h> 22 23 /* This enables us to keep track of the memory removed from each node. */ 24 struct memtrace_entry { 25 void *mem; 26 u64 start; 27 u64 size; 28 u32 nid; 29 struct dentry *dir; 30 char name[16]; 31 }; 32 33 static u64 memtrace_size; 34 35 static struct memtrace_entry *memtrace_array; 36 static unsigned int memtrace_array_nr; 37 38 39 static ssize_t memtrace_read(struct file *filp, char __user *ubuf, 40 size_t count, loff_t *ppos) 41 { 42 struct memtrace_entry *ent = filp->private_data; 43 44 return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); 45 } 46 47 static const struct file_operations memtrace_fops = { 48 .llseek = default_llseek, 49 .read = memtrace_read, 50 .open = simple_open, 51 }; 52 53 static int check_memblock_online(struct memory_block *mem, void *arg) 54 { 55 if (mem->state != MEM_ONLINE) 56 return -1; 57 58 return 0; 59 } 60 61 static int change_memblock_state(struct memory_block *mem, void *arg) 62 { 63 unsigned long state = (unsigned long)arg; 64 65 mem->state = state; 66 67 return 0; 68 } 69 70 static void memtrace_clear_range(unsigned long start_pfn, 71 unsigned long nr_pages) 72 { 73 unsigned long pfn; 74 75 /* 76 * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM 77 * does not apply, avoid passing around "struct page" and use 78 * clear_page() instead directly. 79 */ 80 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { 81 if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) 82 cond_resched(); 83 clear_page(__va(PFN_PHYS(pfn))); 84 } 85 } 86 87 /* called with device_hotplug_lock held */ 88 static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) 89 { 90 const unsigned long start = PFN_PHYS(start_pfn); 91 const unsigned long size = PFN_PHYS(nr_pages); 92 93 if (walk_memory_blocks(start, size, NULL, check_memblock_online)) 94 return false; 95 96 walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE, 97 change_memblock_state); 98 99 if (offline_pages(start_pfn, nr_pages)) { 100 walk_memory_blocks(start, size, (void *)MEM_ONLINE, 101 change_memblock_state); 102 return false; 103 } 104 105 walk_memory_blocks(start, size, (void *)MEM_OFFLINE, 106 change_memblock_state); 107 108 109 return true; 110 } 111 112 static u64 memtrace_alloc_node(u32 nid, u64 size) 113 { 114 u64 start_pfn, end_pfn, nr_pages, pfn; 115 u64 base_pfn; 116 u64 bytes = memory_block_size_bytes(); 117 118 if (!node_spanned_pages(nid)) 119 return 0; 120 121 start_pfn = node_start_pfn(nid); 122 end_pfn = node_end_pfn(nid); 123 nr_pages = size >> PAGE_SHIFT; 124 125 /* Trace memory needs to be aligned to the size */ 126 end_pfn = round_down(end_pfn - nr_pages, nr_pages); 127 128 lock_device_hotplug(); 129 for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { 130 if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { 131 /* 132 * Clear the range while we still have a linear 133 * mapping. 134 */ 135 memtrace_clear_range(base_pfn, nr_pages); 136 /* 137 * Remove memory in memory block size chunks so that 138 * iomem resources are always split to the same size and 139 * we never try to remove memory that spans two iomem 140 * resources. 141 */ 142 end_pfn = base_pfn + nr_pages; 143 for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { 144 __remove_memory(nid, pfn << PAGE_SHIFT, bytes); 145 } 146 unlock_device_hotplug(); 147 return base_pfn << PAGE_SHIFT; 148 } 149 } 150 unlock_device_hotplug(); 151 152 return 0; 153 } 154 155 static int memtrace_init_regions_runtime(u64 size) 156 { 157 u32 nid; 158 u64 m; 159 160 memtrace_array = kcalloc(num_online_nodes(), 161 sizeof(struct memtrace_entry), GFP_KERNEL); 162 if (!memtrace_array) { 163 pr_err("Failed to allocate memtrace_array\n"); 164 return -EINVAL; 165 } 166 167 for_each_online_node(nid) { 168 m = memtrace_alloc_node(nid, size); 169 170 /* 171 * A node might not have any local memory, so warn but 172 * continue on. 173 */ 174 if (!m) { 175 pr_err("Failed to allocate trace memory on node %d\n", nid); 176 continue; 177 } 178 179 pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m); 180 181 memtrace_array[memtrace_array_nr].start = m; 182 memtrace_array[memtrace_array_nr].size = size; 183 memtrace_array[memtrace_array_nr].nid = nid; 184 memtrace_array_nr++; 185 } 186 187 return 0; 188 } 189 190 static struct dentry *memtrace_debugfs_dir; 191 192 static int memtrace_init_debugfs(void) 193 { 194 int ret = 0; 195 int i; 196 197 for (i = 0; i < memtrace_array_nr; i++) { 198 struct dentry *dir; 199 struct memtrace_entry *ent = &memtrace_array[i]; 200 201 ent->mem = ioremap(ent->start, ent->size); 202 /* Warn but continue on */ 203 if (!ent->mem) { 204 pr_err("Failed to map trace memory at 0x%llx\n", 205 ent->start); 206 ret = -1; 207 continue; 208 } 209 210 snprintf(ent->name, 16, "%08x", ent->nid); 211 dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); 212 213 ent->dir = dir; 214 debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); 215 debugfs_create_x64("start", 0400, dir, &ent->start); 216 debugfs_create_x64("size", 0400, dir, &ent->size); 217 } 218 219 return ret; 220 } 221 222 static int online_mem_block(struct memory_block *mem, void *arg) 223 { 224 return device_online(&mem->dev); 225 } 226 227 /* 228 * Iterate through the chunks of memory we have removed from the kernel 229 * and attempt to add them back to the kernel. 230 */ 231 static int memtrace_online(void) 232 { 233 int i, ret = 0; 234 struct memtrace_entry *ent; 235 236 for (i = memtrace_array_nr - 1; i >= 0; i--) { 237 ent = &memtrace_array[i]; 238 239 /* We have onlined this chunk previously */ 240 if (ent->nid == NUMA_NO_NODE) 241 continue; 242 243 /* Remove from io mappings */ 244 if (ent->mem) { 245 iounmap(ent->mem); 246 ent->mem = 0; 247 } 248 249 if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) { 250 pr_err("Failed to add trace memory to node %d\n", 251 ent->nid); 252 ret += 1; 253 continue; 254 } 255 256 lock_device_hotplug(); 257 walk_memory_blocks(ent->start, ent->size, NULL, 258 online_mem_block); 259 unlock_device_hotplug(); 260 261 /* 262 * Memory was added successfully so clean up references to it 263 * so on reentry we can tell that this chunk was added. 264 */ 265 debugfs_remove_recursive(ent->dir); 266 pr_info("Added trace memory back to node %d\n", ent->nid); 267 ent->size = ent->start = ent->nid = NUMA_NO_NODE; 268 } 269 if (ret) 270 return ret; 271 272 /* If all chunks of memory were added successfully, reset globals */ 273 kfree(memtrace_array); 274 memtrace_array = NULL; 275 memtrace_size = 0; 276 memtrace_array_nr = 0; 277 return 0; 278 } 279 280 static int memtrace_enable_set(void *data, u64 val) 281 { 282 u64 bytes; 283 284 /* 285 * Don't attempt to do anything if size isn't aligned to a memory 286 * block or equal to zero. 287 */ 288 bytes = memory_block_size_bytes(); 289 if (val & (bytes - 1)) { 290 pr_err("Value must be aligned with 0x%llx\n", bytes); 291 return -EINVAL; 292 } 293 294 /* Re-add/online previously removed/offlined memory */ 295 if (memtrace_size) { 296 if (memtrace_online()) 297 return -EAGAIN; 298 } 299 300 if (!val) 301 return 0; 302 303 /* Offline and remove memory */ 304 if (memtrace_init_regions_runtime(val)) 305 return -EINVAL; 306 307 if (memtrace_init_debugfs()) 308 return -EINVAL; 309 310 memtrace_size = val; 311 312 return 0; 313 } 314 315 static int memtrace_enable_get(void *data, u64 *val) 316 { 317 *val = memtrace_size; 318 return 0; 319 } 320 321 DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get, 322 memtrace_enable_set, "0x%016llx\n"); 323 324 static int memtrace_init(void) 325 { 326 memtrace_debugfs_dir = debugfs_create_dir("memtrace", 327 powerpc_debugfs_root); 328 329 debugfs_create_file("enable", 0600, memtrace_debugfs_dir, 330 NULL, &memtrace_init_fops); 331 332 return 0; 333 } 334 machine_device_initcall(powernv, memtrace_init); 335