1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) IBM Corporation, 2014, 2017 4 * Anton Blanchard, Rashmica Gupta. 5 */ 6 7 #define pr_fmt(fmt) "memtrace: " fmt 8 9 #include <linux/bitops.h> 10 #include <linux/string.h> 11 #include <linux/memblock.h> 12 #include <linux/init.h> 13 #include <linux/moduleparam.h> 14 #include <linux/fs.h> 15 #include <linux/debugfs.h> 16 #include <linux/slab.h> 17 #include <linux/memory.h> 18 #include <linux/memory_hotplug.h> 19 #include <linux/numa.h> 20 #include <asm/machdep.h> 21 #include <asm/debugfs.h> 22 23 /* This enables us to keep track of the memory removed from each node. */ 24 struct memtrace_entry { 25 void *mem; 26 u64 start; 27 u64 size; 28 u32 nid; 29 struct dentry *dir; 30 char name[16]; 31 }; 32 33 static DEFINE_MUTEX(memtrace_mutex); 34 static u64 memtrace_size; 35 36 static struct memtrace_entry *memtrace_array; 37 static unsigned int memtrace_array_nr; 38 39 40 static ssize_t memtrace_read(struct file *filp, char __user *ubuf, 41 size_t count, loff_t *ppos) 42 { 43 struct memtrace_entry *ent = filp->private_data; 44 45 return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); 46 } 47 48 static const struct file_operations memtrace_fops = { 49 .llseek = default_llseek, 50 .read = memtrace_read, 51 .open = simple_open, 52 }; 53 54 static int check_memblock_online(struct memory_block *mem, void *arg) 55 { 56 if (mem->state != MEM_ONLINE) 57 return -1; 58 59 return 0; 60 } 61 62 static int change_memblock_state(struct memory_block *mem, void *arg) 63 { 64 unsigned long state = (unsigned long)arg; 65 66 mem->state = state; 67 68 return 0; 69 } 70 71 static void memtrace_clear_range(unsigned long start_pfn, 72 unsigned long nr_pages) 73 { 74 unsigned long pfn; 75 76 /* 77 * As pages are offline, we cannot trust the memmap anymore. As HIGHMEM 78 * does not apply, avoid passing around "struct page" and use 79 * clear_page() instead directly. 80 */ 81 for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { 82 if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) 83 cond_resched(); 84 clear_page(__va(PFN_PHYS(pfn))); 85 } 86 } 87 88 /* called with device_hotplug_lock held */ 89 static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) 90 { 91 const unsigned long start = PFN_PHYS(start_pfn); 92 const unsigned long size = PFN_PHYS(nr_pages); 93 94 if (walk_memory_blocks(start, size, NULL, check_memblock_online)) 95 return false; 96 97 walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE, 98 change_memblock_state); 99 100 if (offline_pages(start_pfn, nr_pages)) { 101 walk_memory_blocks(start, size, (void *)MEM_ONLINE, 102 change_memblock_state); 103 return false; 104 } 105 106 walk_memory_blocks(start, size, (void *)MEM_OFFLINE, 107 change_memblock_state); 108 109 110 return true; 111 } 112 113 static u64 memtrace_alloc_node(u32 nid, u64 size) 114 { 115 u64 start_pfn, end_pfn, nr_pages, pfn; 116 u64 base_pfn; 117 u64 bytes = memory_block_size_bytes(); 118 119 if (!node_spanned_pages(nid)) 120 return 0; 121 122 start_pfn = node_start_pfn(nid); 123 end_pfn = node_end_pfn(nid); 124 nr_pages = size >> PAGE_SHIFT; 125 126 /* Trace memory needs to be aligned to the size */ 127 end_pfn = round_down(end_pfn - nr_pages, nr_pages); 128 129 lock_device_hotplug(); 130 for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { 131 if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { 132 /* 133 * Clear the range while we still have a linear 134 * mapping. 135 */ 136 memtrace_clear_range(base_pfn, nr_pages); 137 /* 138 * Remove memory in memory block size chunks so that 139 * iomem resources are always split to the same size and 140 * we never try to remove memory that spans two iomem 141 * resources. 142 */ 143 end_pfn = base_pfn + nr_pages; 144 for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { 145 __remove_memory(nid, pfn << PAGE_SHIFT, bytes); 146 } 147 unlock_device_hotplug(); 148 return base_pfn << PAGE_SHIFT; 149 } 150 } 151 unlock_device_hotplug(); 152 153 return 0; 154 } 155 156 static int memtrace_init_regions_runtime(u64 size) 157 { 158 u32 nid; 159 u64 m; 160 161 memtrace_array = kcalloc(num_online_nodes(), 162 sizeof(struct memtrace_entry), GFP_KERNEL); 163 if (!memtrace_array) { 164 pr_err("Failed to allocate memtrace_array\n"); 165 return -EINVAL; 166 } 167 168 for_each_online_node(nid) { 169 m = memtrace_alloc_node(nid, size); 170 171 /* 172 * A node might not have any local memory, so warn but 173 * continue on. 174 */ 175 if (!m) { 176 pr_err("Failed to allocate trace memory on node %d\n", nid); 177 continue; 178 } 179 180 pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m); 181 182 memtrace_array[memtrace_array_nr].start = m; 183 memtrace_array[memtrace_array_nr].size = size; 184 memtrace_array[memtrace_array_nr].nid = nid; 185 memtrace_array_nr++; 186 } 187 188 return 0; 189 } 190 191 static struct dentry *memtrace_debugfs_dir; 192 193 static int memtrace_init_debugfs(void) 194 { 195 int ret = 0; 196 int i; 197 198 for (i = 0; i < memtrace_array_nr; i++) { 199 struct dentry *dir; 200 struct memtrace_entry *ent = &memtrace_array[i]; 201 202 ent->mem = ioremap(ent->start, ent->size); 203 /* Warn but continue on */ 204 if (!ent->mem) { 205 pr_err("Failed to map trace memory at 0x%llx\n", 206 ent->start); 207 ret = -1; 208 continue; 209 } 210 211 snprintf(ent->name, 16, "%08x", ent->nid); 212 dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); 213 214 ent->dir = dir; 215 debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); 216 debugfs_create_x64("start", 0400, dir, &ent->start); 217 debugfs_create_x64("size", 0400, dir, &ent->size); 218 } 219 220 return ret; 221 } 222 223 static int online_mem_block(struct memory_block *mem, void *arg) 224 { 225 return device_online(&mem->dev); 226 } 227 228 /* 229 * Iterate through the chunks of memory we have removed from the kernel 230 * and attempt to add them back to the kernel. 231 */ 232 static int memtrace_online(void) 233 { 234 int i, ret = 0; 235 struct memtrace_entry *ent; 236 237 for (i = memtrace_array_nr - 1; i >= 0; i--) { 238 ent = &memtrace_array[i]; 239 240 /* We have onlined this chunk previously */ 241 if (ent->nid == NUMA_NO_NODE) 242 continue; 243 244 /* Remove from io mappings */ 245 if (ent->mem) { 246 iounmap(ent->mem); 247 ent->mem = 0; 248 } 249 250 if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) { 251 pr_err("Failed to add trace memory to node %d\n", 252 ent->nid); 253 ret += 1; 254 continue; 255 } 256 257 lock_device_hotplug(); 258 walk_memory_blocks(ent->start, ent->size, NULL, 259 online_mem_block); 260 unlock_device_hotplug(); 261 262 /* 263 * Memory was added successfully so clean up references to it 264 * so on reentry we can tell that this chunk was added. 265 */ 266 debugfs_remove_recursive(ent->dir); 267 pr_info("Added trace memory back to node %d\n", ent->nid); 268 ent->size = ent->start = ent->nid = NUMA_NO_NODE; 269 } 270 if (ret) 271 return ret; 272 273 /* If all chunks of memory were added successfully, reset globals */ 274 kfree(memtrace_array); 275 memtrace_array = NULL; 276 memtrace_size = 0; 277 memtrace_array_nr = 0; 278 return 0; 279 } 280 281 static int memtrace_enable_set(void *data, u64 val) 282 { 283 int rc = -EAGAIN; 284 u64 bytes; 285 286 /* 287 * Don't attempt to do anything if size isn't aligned to a memory 288 * block or equal to zero. 289 */ 290 bytes = memory_block_size_bytes(); 291 if (val & (bytes - 1)) { 292 pr_err("Value must be aligned with 0x%llx\n", bytes); 293 return -EINVAL; 294 } 295 296 mutex_lock(&memtrace_mutex); 297 298 /* Re-add/online previously removed/offlined memory */ 299 if (memtrace_size) { 300 if (memtrace_online()) 301 goto out_unlock; 302 } 303 304 if (!val) { 305 rc = 0; 306 goto out_unlock; 307 } 308 309 /* Offline and remove memory */ 310 if (memtrace_init_regions_runtime(val)) 311 goto out_unlock; 312 313 if (memtrace_init_debugfs()) 314 goto out_unlock; 315 316 memtrace_size = val; 317 rc = 0; 318 out_unlock: 319 mutex_unlock(&memtrace_mutex); 320 return rc; 321 } 322 323 static int memtrace_enable_get(void *data, u64 *val) 324 { 325 *val = memtrace_size; 326 return 0; 327 } 328 329 DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get, 330 memtrace_enable_set, "0x%016llx\n"); 331 332 static int memtrace_init(void) 333 { 334 memtrace_debugfs_dir = debugfs_create_dir("memtrace", 335 powerpc_debugfs_root); 336 337 debugfs_create_file("enable", 0600, memtrace_debugfs_dir, 338 NULL, &memtrace_init_fops); 339 340 return 0; 341 } 342 machine_device_initcall(powernv, memtrace_init); 343