1 /* 2 * Copyright (C) IBM Corporation, 2014, 2017 3 * Anton Blanchard, Rashmica Gupta. 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 */ 10 11 #define pr_fmt(fmt) "memtrace: " fmt 12 13 #include <linux/bitops.h> 14 #include <linux/string.h> 15 #include <linux/memblock.h> 16 #include <linux/init.h> 17 #include <linux/moduleparam.h> 18 #include <linux/fs.h> 19 #include <linux/debugfs.h> 20 #include <linux/slab.h> 21 #include <linux/memory.h> 22 #include <linux/memory_hotplug.h> 23 #include <asm/machdep.h> 24 #include <asm/debugfs.h> 25 26 /* This enables us to keep track of the memory removed from each node. */ 27 struct memtrace_entry { 28 void *mem; 29 u64 start; 30 u64 size; 31 u32 nid; 32 struct dentry *dir; 33 char name[16]; 34 }; 35 36 static u64 memtrace_size; 37 38 static struct memtrace_entry *memtrace_array; 39 static unsigned int memtrace_array_nr; 40 41 42 static ssize_t memtrace_read(struct file *filp, char __user *ubuf, 43 size_t count, loff_t *ppos) 44 { 45 struct memtrace_entry *ent = filp->private_data; 46 47 return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); 48 } 49 50 static const struct file_operations memtrace_fops = { 51 .llseek = default_llseek, 52 .read = memtrace_read, 53 .open = simple_open, 54 }; 55 56 static int check_memblock_online(struct memory_block *mem, void *arg) 57 { 58 if (mem->state != MEM_ONLINE) 59 return -1; 60 61 return 0; 62 } 63 64 static int change_memblock_state(struct memory_block *mem, void *arg) 65 { 66 unsigned long state = (unsigned long)arg; 67 68 mem->state = state; 69 70 return 0; 71 } 72 73 /* called with device_hotplug_lock held */ 74 static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) 75 { 76 u64 end_pfn = start_pfn + nr_pages - 1; 77 78 if (walk_memory_range(start_pfn, end_pfn, NULL, 79 check_memblock_online)) 80 return false; 81 82 walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE, 83 change_memblock_state); 84 85 if (offline_pages(start_pfn, nr_pages)) { 86 walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE, 87 change_memblock_state); 88 return false; 89 } 90 91 walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, 92 change_memblock_state); 93 94 95 return true; 96 } 97 98 static u64 memtrace_alloc_node(u32 nid, u64 size) 99 { 100 u64 start_pfn, end_pfn, nr_pages, pfn; 101 u64 base_pfn; 102 u64 bytes = memory_block_size_bytes(); 103 104 if (!node_spanned_pages(nid)) 105 return 0; 106 107 start_pfn = node_start_pfn(nid); 108 end_pfn = node_end_pfn(nid); 109 nr_pages = size >> PAGE_SHIFT; 110 111 /* Trace memory needs to be aligned to the size */ 112 end_pfn = round_down(end_pfn - nr_pages, nr_pages); 113 114 lock_device_hotplug(); 115 for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { 116 if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { 117 /* 118 * Remove memory in memory block size chunks so that 119 * iomem resources are always split to the same size and 120 * we never try to remove memory that spans two iomem 121 * resources. 122 */ 123 end_pfn = base_pfn + nr_pages; 124 for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { 125 __remove_memory(nid, pfn << PAGE_SHIFT, bytes); 126 } 127 unlock_device_hotplug(); 128 return base_pfn << PAGE_SHIFT; 129 } 130 } 131 unlock_device_hotplug(); 132 133 return 0; 134 } 135 136 static int memtrace_init_regions_runtime(u64 size) 137 { 138 u32 nid; 139 u64 m; 140 141 memtrace_array = kcalloc(num_online_nodes(), 142 sizeof(struct memtrace_entry), GFP_KERNEL); 143 if (!memtrace_array) { 144 pr_err("Failed to allocate memtrace_array\n"); 145 return -EINVAL; 146 } 147 148 for_each_online_node(nid) { 149 m = memtrace_alloc_node(nid, size); 150 151 /* 152 * A node might not have any local memory, so warn but 153 * continue on. 154 */ 155 if (!m) { 156 pr_err("Failed to allocate trace memory on node %d\n", nid); 157 continue; 158 } 159 160 pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m); 161 162 memtrace_array[memtrace_array_nr].start = m; 163 memtrace_array[memtrace_array_nr].size = size; 164 memtrace_array[memtrace_array_nr].nid = nid; 165 memtrace_array_nr++; 166 } 167 168 return 0; 169 } 170 171 static struct dentry *memtrace_debugfs_dir; 172 173 static int memtrace_init_debugfs(void) 174 { 175 int ret = 0; 176 int i; 177 178 for (i = 0; i < memtrace_array_nr; i++) { 179 struct dentry *dir; 180 struct memtrace_entry *ent = &memtrace_array[i]; 181 182 ent->mem = ioremap(ent->start, ent->size); 183 /* Warn but continue on */ 184 if (!ent->mem) { 185 pr_err("Failed to map trace memory at 0x%llx\n", 186 ent->start); 187 ret = -1; 188 continue; 189 } 190 191 snprintf(ent->name, 16, "%08x", ent->nid); 192 dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); 193 if (!dir) { 194 pr_err("Failed to create debugfs directory for node %d\n", 195 ent->nid); 196 return -1; 197 } 198 199 ent->dir = dir; 200 debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); 201 debugfs_create_x64("start", 0400, dir, &ent->start); 202 debugfs_create_x64("size", 0400, dir, &ent->size); 203 } 204 205 return ret; 206 } 207 208 static int online_mem_block(struct memory_block *mem, void *arg) 209 { 210 return device_online(&mem->dev); 211 } 212 213 /* 214 * Iterate through the chunks of memory we have removed from the kernel 215 * and attempt to add them back to the kernel. 216 */ 217 static int memtrace_online(void) 218 { 219 int i, ret = 0; 220 struct memtrace_entry *ent; 221 222 for (i = memtrace_array_nr - 1; i >= 0; i--) { 223 ent = &memtrace_array[i]; 224 225 /* We have onlined this chunk previously */ 226 if (ent->nid == -1) 227 continue; 228 229 /* Remove from io mappings */ 230 if (ent->mem) { 231 iounmap(ent->mem); 232 ent->mem = 0; 233 } 234 235 if (add_memory(ent->nid, ent->start, ent->size)) { 236 pr_err("Failed to add trace memory to node %d\n", 237 ent->nid); 238 ret += 1; 239 continue; 240 } 241 242 /* 243 * If kernel isn't compiled with the auto online option 244 * we need to online the memory ourselves. 245 */ 246 if (!memhp_auto_online) { 247 lock_device_hotplug(); 248 walk_memory_range(PFN_DOWN(ent->start), 249 PFN_UP(ent->start + ent->size - 1), 250 NULL, online_mem_block); 251 unlock_device_hotplug(); 252 } 253 254 /* 255 * Memory was added successfully so clean up references to it 256 * so on reentry we can tell that this chunk was added. 257 */ 258 debugfs_remove_recursive(ent->dir); 259 pr_info("Added trace memory back to node %d\n", ent->nid); 260 ent->size = ent->start = ent->nid = -1; 261 } 262 if (ret) 263 return ret; 264 265 /* If all chunks of memory were added successfully, reset globals */ 266 kfree(memtrace_array); 267 memtrace_array = NULL; 268 memtrace_size = 0; 269 memtrace_array_nr = 0; 270 return 0; 271 } 272 273 static int memtrace_enable_set(void *data, u64 val) 274 { 275 u64 bytes; 276 277 /* 278 * Don't attempt to do anything if size isn't aligned to a memory 279 * block or equal to zero. 280 */ 281 bytes = memory_block_size_bytes(); 282 if (val & (bytes - 1)) { 283 pr_err("Value must be aligned with 0x%llx\n", bytes); 284 return -EINVAL; 285 } 286 287 /* Re-add/online previously removed/offlined memory */ 288 if (memtrace_size) { 289 if (memtrace_online()) 290 return -EAGAIN; 291 } 292 293 if (!val) 294 return 0; 295 296 /* Offline and remove memory */ 297 if (memtrace_init_regions_runtime(val)) 298 return -EINVAL; 299 300 if (memtrace_init_debugfs()) 301 return -EINVAL; 302 303 memtrace_size = val; 304 305 return 0; 306 } 307 308 static int memtrace_enable_get(void *data, u64 *val) 309 { 310 *val = memtrace_size; 311 return 0; 312 } 313 314 DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get, 315 memtrace_enable_set, "0x%016llx\n"); 316 317 static int memtrace_init(void) 318 { 319 memtrace_debugfs_dir = debugfs_create_dir("memtrace", 320 powerpc_debugfs_root); 321 if (!memtrace_debugfs_dir) 322 return -1; 323 324 debugfs_create_file("enable", 0600, memtrace_debugfs_dir, 325 NULL, &memtrace_init_fops); 326 327 return 0; 328 } 329 machine_device_initcall(powernv, memtrace_init); 330