1 /* 2 * Copyright (C) IBM Corporation, 2014, 2017 3 * Anton Blanchard, Rashmica Gupta. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 */ 10 11 #define pr_fmt(fmt) "memtrace: " fmt 12 13 #include <linux/bitops.h> 14 #include <linux/string.h> 15 #include <linux/memblock.h> 16 #include <linux/init.h> 17 #include <linux/moduleparam.h> 18 #include <linux/fs.h> 19 #include <linux/debugfs.h> 20 #include <linux/slab.h> 21 #include <linux/memory.h> 22 #include <linux/memory_hotplug.h> 23 #include <linux/numa.h> 24 #include <asm/machdep.h> 25 #include <asm/debugfs.h> 26 27 /* This enables us to keep track of the memory removed from each node. */ 28 struct memtrace_entry { 29 void *mem; 30 u64 start; 31 u64 size; 32 u32 nid; 33 struct dentry *dir; 34 char name[16]; 35 }; 36 37 static u64 memtrace_size; 38 39 static struct memtrace_entry *memtrace_array; 40 static unsigned int memtrace_array_nr; 41 42 43 static ssize_t memtrace_read(struct file *filp, char __user *ubuf, 44 size_t count, loff_t *ppos) 45 { 46 struct memtrace_entry *ent = filp->private_data; 47 48 return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); 49 } 50 51 static const struct file_operations memtrace_fops = { 52 .llseek = default_llseek, 53 .read = memtrace_read, 54 .open = simple_open, 55 }; 56 57 static int check_memblock_online(struct memory_block *mem, void *arg) 58 { 59 if (mem->state != MEM_ONLINE) 60 return -1; 61 62 return 0; 63 } 64 65 static int change_memblock_state(struct memory_block *mem, void *arg) 66 { 67 unsigned long state = (unsigned long)arg; 68 69 mem->state = state; 70 71 return 0; 72 } 73 74 /* called with device_hotplug_lock held */ 75 static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) 76 { 77 u64 end_pfn = start_pfn + nr_pages - 1; 78 79 if (walk_memory_range(start_pfn, end_pfn, NULL, 80 check_memblock_online)) 81 return false; 82 83 walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE, 84 change_memblock_state); 85 86 if (offline_pages(start_pfn, nr_pages)) { 87 walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE, 88 change_memblock_state); 89 return false; 90 } 91 92 walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, 93 change_memblock_state); 94 95 96 return true; 97 } 98 99 static u64 memtrace_alloc_node(u32 nid, u64 size) 100 { 101 u64 start_pfn, end_pfn, nr_pages, pfn; 102 u64 base_pfn; 103 u64 bytes = memory_block_size_bytes(); 104 105 if (!node_spanned_pages(nid)) 106 return 0; 107 108 start_pfn = node_start_pfn(nid); 109 end_pfn = node_end_pfn(nid); 110 nr_pages = size >> PAGE_SHIFT; 111 112 /* Trace memory needs to be aligned to the size */ 113 end_pfn = round_down(end_pfn - nr_pages, nr_pages); 114 115 lock_device_hotplug(); 116 for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { 117 if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { 118 /* 119 * Remove memory in memory block size chunks so that 120 * iomem resources are always split to the same size and 121 * we never try to remove memory that spans two iomem 122 * resources. 123 */ 124 end_pfn = base_pfn + nr_pages; 125 for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { 126 __remove_memory(nid, pfn << PAGE_SHIFT, bytes); 127 } 128 unlock_device_hotplug(); 129 return base_pfn << PAGE_SHIFT; 130 } 131 } 132 unlock_device_hotplug(); 133 134 return 0; 135 } 136 137 static int memtrace_init_regions_runtime(u64 size) 138 { 139 u32 nid; 140 u64 m; 141 142 memtrace_array = kcalloc(num_online_nodes(), 143 sizeof(struct memtrace_entry), GFP_KERNEL); 144 if (!memtrace_array) { 145 pr_err("Failed to allocate memtrace_array\n"); 146 return -EINVAL; 147 } 148 149 for_each_online_node(nid) { 150 m = memtrace_alloc_node(nid, size); 151 152 /* 153 * A node might not have any local memory, so warn but 154 * continue on. 155 */ 156 if (!m) { 157 pr_err("Failed to allocate trace memory on node %d\n", nid); 158 continue; 159 } 160 161 pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m); 162 163 memtrace_array[memtrace_array_nr].start = m; 164 memtrace_array[memtrace_array_nr].size = size; 165 memtrace_array[memtrace_array_nr].nid = nid; 166 memtrace_array_nr++; 167 } 168 169 return 0; 170 } 171 172 static struct dentry *memtrace_debugfs_dir; 173 174 static int memtrace_init_debugfs(void) 175 { 176 int ret = 0; 177 int i; 178 179 for (i = 0; i < memtrace_array_nr; i++) { 180 struct dentry *dir; 181 struct memtrace_entry *ent = &memtrace_array[i]; 182 183 ent->mem = ioremap(ent->start, ent->size); 184 /* Warn but continue on */ 185 if (!ent->mem) { 186 pr_err("Failed to map trace memory at 0x%llx\n", 187 ent->start); 188 ret = -1; 189 continue; 190 } 191 192 snprintf(ent->name, 16, "%08x", ent->nid); 193 dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); 194 if (!dir) { 195 pr_err("Failed to create debugfs directory for node %d\n", 196 ent->nid); 197 return -1; 198 } 199 200 ent->dir = dir; 201 debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); 202 debugfs_create_x64("start", 0400, dir, &ent->start); 203 debugfs_create_x64("size", 0400, dir, &ent->size); 204 } 205 206 return ret; 207 } 208 209 static int online_mem_block(struct memory_block *mem, void *arg) 210 { 211 return device_online(&mem->dev); 212 } 213 214 /* 215 * Iterate through the chunks of memory we have removed from the kernel 216 * and attempt to add them back to the kernel. 217 */ 218 static int memtrace_online(void) 219 { 220 int i, ret = 0; 221 struct memtrace_entry *ent; 222 223 for (i = memtrace_array_nr - 1; i >= 0; i--) { 224 ent = &memtrace_array[i]; 225 226 /* We have onlined this chunk previously */ 227 if (ent->nid == NUMA_NO_NODE) 228 continue; 229 230 /* Remove from io mappings */ 231 if (ent->mem) { 232 iounmap(ent->mem); 233 ent->mem = 0; 234 } 235 236 if (add_memory(ent->nid, ent->start, ent->size)) { 237 pr_err("Failed to add trace memory to node %d\n", 238 ent->nid); 239 ret += 1; 240 continue; 241 } 242 243 /* 244 * If kernel isn't compiled with the auto online option 245 * we need to online the memory ourselves. 246 */ 247 if (!memhp_auto_online) { 248 lock_device_hotplug(); 249 walk_memory_range(PFN_DOWN(ent->start), 250 PFN_UP(ent->start + ent->size - 1), 251 NULL, online_mem_block); 252 unlock_device_hotplug(); 253 } 254 255 /* 256 * Memory was added successfully so clean up references to it 257 * so on reentry we can tell that this chunk was added. 258 */ 259 debugfs_remove_recursive(ent->dir); 260 pr_info("Added trace memory back to node %d\n", ent->nid); 261 ent->size = ent->start = ent->nid = NUMA_NO_NODE; 262 } 263 if (ret) 264 return ret; 265 266 /* If all chunks of memory were added successfully, reset globals */ 267 kfree(memtrace_array); 268 memtrace_array = NULL; 269 memtrace_size = 0; 270 memtrace_array_nr = 0; 271 return 0; 272 } 273 274 static int memtrace_enable_set(void *data, u64 val) 275 { 276 u64 bytes; 277 278 /* 279 * Don't attempt to do anything if size isn't aligned to a memory 280 * block or equal to zero. 281 */ 282 bytes = memory_block_size_bytes(); 283 if (val & (bytes - 1)) { 284 pr_err("Value must be aligned with 0x%llx\n", bytes); 285 return -EINVAL; 286 } 287 288 /* Re-add/online previously removed/offlined memory */ 289 if (memtrace_size) { 290 if (memtrace_online()) 291 return -EAGAIN; 292 } 293 294 if (!val) 295 return 0; 296 297 /* Offline and remove memory */ 298 if (memtrace_init_regions_runtime(val)) 299 return -EINVAL; 300 301 if (memtrace_init_debugfs()) 302 return -EINVAL; 303 304 memtrace_size = val; 305 306 return 0; 307 } 308 309 static int memtrace_enable_get(void *data, u64 *val) 310 { 311 *val = memtrace_size; 312 return 0; 313 } 314 315 DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get, 316 memtrace_enable_set, "0x%016llx\n"); 317 318 static int memtrace_init(void) 319 { 320 memtrace_debugfs_dir = debugfs_create_dir("memtrace", 321 powerpc_debugfs_root); 322 if (!memtrace_debugfs_dir) 323 return -1; 324 325 debugfs_create_file("enable", 0600, memtrace_debugfs_dir, 326 NULL, &memtrace_init_fops); 327 328 return 0; 329 } 330 machine_device_initcall(powernv, memtrace_init); 331