1 /* 2 * Debug helper to dump the current kernel pagetables of the system 3 * so that we can see what the various memory ranges are set to. 4 * 5 * (C) Copyright 2008 Intel Corporation 6 * 7 * Author: Arjan van de Ven <arjan@linux.intel.com> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; version 2 12 * of the License. 13 */ 14 15 #include <linux/debugfs.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/seq_file.h> 19 20 #include <asm/pgtable.h> 21 22 /* 23 * The dumper groups pagetable entries of the same type into one, and for 24 * that it needs to keep some state when walking, and flush this state 25 * when a "break" in the continuity is found. 26 */ 27 struct pg_state { 28 int level; 29 pgprot_t current_prot; 30 unsigned long start_address; 31 unsigned long current_address; 32 const struct addr_marker *marker; 33 unsigned long lines; 34 bool to_dmesg; 35 }; 36 37 struct addr_marker { 38 unsigned long start_address; 39 const char *name; 40 unsigned long max_lines; 41 }; 42 43 /* indices for address_markers; keep sync'd w/ address_markers below */ 44 enum address_markers_idx { 45 USER_SPACE_NR = 0, 46 #ifdef CONFIG_X86_64 47 KERNEL_SPACE_NR, 48 LOW_KERNEL_NR, 49 VMALLOC_START_NR, 50 VMEMMAP_START_NR, 51 ESPFIX_START_NR, 52 HIGH_KERNEL_NR, 53 MODULES_VADDR_NR, 54 MODULES_END_NR, 55 #else 56 KERNEL_SPACE_NR, 57 VMALLOC_START_NR, 58 VMALLOC_END_NR, 59 # ifdef CONFIG_HIGHMEM 60 PKMAP_BASE_NR, 61 # endif 62 FIXADDR_START_NR, 63 #endif 64 }; 65 66 /* Address space markers hints */ 67 static struct addr_marker address_markers[] = { 68 { 0, "User Space" }, 69 #ifdef CONFIG_X86_64 70 { 0x8000000000000000UL, "Kernel Space" }, 71 { PAGE_OFFSET, "Low Kernel Mapping" }, 72 { VMALLOC_START, "vmalloc() Area" }, 73 { VMEMMAP_START, "Vmemmap" }, 74 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, 75 { __START_KERNEL_map, "High Kernel Mapping" }, 76 { MODULES_VADDR, "Modules" }, 77 { MODULES_END, "End Modules" }, 78 #else 79 { PAGE_OFFSET, "Kernel Mapping" }, 80 { 0/* VMALLOC_START */, "vmalloc() Area" }, 81 { 0/*VMALLOC_END*/, "vmalloc() End" }, 82 # ifdef CONFIG_HIGHMEM 83 { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, 84 # endif 85 { 0/*FIXADDR_START*/, "Fixmap Area" }, 86 #endif 87 { -1, NULL } /* End of list */ 88 }; 89 90 /* Multipliers for offsets within the PTEs */ 91 #define PTE_LEVEL_MULT (PAGE_SIZE) 92 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) 93 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) 94 #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) 95 96 #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ 97 ({ \ 98 if (to_dmesg) \ 99 printk(KERN_INFO fmt, ##args); \ 100 else \ 101 if (m) \ 102 seq_printf(m, fmt, ##args); \ 103 }) 104 105 #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ 106 ({ \ 107 if (to_dmesg) \ 108 printk(KERN_CONT fmt, ##args); \ 109 else \ 110 if (m) \ 111 seq_printf(m, fmt, ##args); \ 112 }) 113 114 /* 115 * Print a readable form of a pgprot_t to the seq_file 116 */ 117 static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) 118 { 119 pgprotval_t pr = pgprot_val(prot); 120 static const char * const level_name[] = 121 { "cr3", "pgd", "pud", "pmd", "pte" }; 122 123 if (!pgprot_val(prot)) { 124 /* Not present */ 125 pt_dump_cont_printf(m, dmsg, " "); 126 } else { 127 if (pr & _PAGE_USER) 128 pt_dump_cont_printf(m, dmsg, "USR "); 129 else 130 pt_dump_cont_printf(m, dmsg, " "); 131 if (pr & _PAGE_RW) 132 pt_dump_cont_printf(m, dmsg, "RW "); 133 else 134 pt_dump_cont_printf(m, dmsg, "ro "); 135 if (pr & _PAGE_PWT) 136 pt_dump_cont_printf(m, dmsg, "PWT "); 137 else 138 pt_dump_cont_printf(m, dmsg, " "); 139 if (pr & _PAGE_PCD) 140 pt_dump_cont_printf(m, dmsg, "PCD "); 141 else 142 pt_dump_cont_printf(m, dmsg, " "); 143 144 /* Bit 9 has a different meaning on level 3 vs 4 */ 145 if (level <= 3) { 146 if (pr & _PAGE_PSE) 147 pt_dump_cont_printf(m, dmsg, "PSE "); 148 else 149 pt_dump_cont_printf(m, dmsg, " "); 150 } else { 151 if (pr & _PAGE_PAT) 152 pt_dump_cont_printf(m, dmsg, "pat "); 153 else 154 pt_dump_cont_printf(m, dmsg, " "); 155 } 156 if (pr & _PAGE_GLOBAL) 157 pt_dump_cont_printf(m, dmsg, "GLB "); 158 else 159 pt_dump_cont_printf(m, dmsg, " "); 160 if (pr & _PAGE_NX) 161 pt_dump_cont_printf(m, dmsg, "NX "); 162 else 163 pt_dump_cont_printf(m, dmsg, "x "); 164 } 165 pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); 166 } 167 168 /* 169 * On 64 bits, sign-extend the 48 bit address to 64 bit 170 */ 171 static unsigned long normalize_addr(unsigned long u) 172 { 173 #ifdef CONFIG_X86_64 174 return (signed long)(u << 16) >> 16; 175 #else 176 return u; 177 #endif 178 } 179 180 /* 181 * This function gets called on a break in a continuous series 182 * of PTE entries; the next one is different so we need to 183 * print what we collected so far. 184 */ 185 static void note_page(struct seq_file *m, struct pg_state *st, 186 pgprot_t new_prot, int level) 187 { 188 pgprotval_t prot, cur; 189 static const char units[] = "BKMGTPE"; 190 191 /* 192 * If we have a "break" in the series, we need to flush the state that 193 * we have now. "break" is either changing perms, levels or 194 * address space marker. 195 */ 196 prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; 197 cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; 198 199 if (!st->level) { 200 /* First entry */ 201 st->current_prot = new_prot; 202 st->level = level; 203 st->marker = address_markers; 204 st->lines = 0; 205 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", 206 st->marker->name); 207 } else if (prot != cur || level != st->level || 208 st->current_address >= st->marker[1].start_address) { 209 const char *unit = units; 210 unsigned long delta; 211 int width = sizeof(unsigned long) * 2; 212 213 /* 214 * Now print the actual finished series 215 */ 216 if (!st->marker->max_lines || 217 st->lines < st->marker->max_lines) { 218 pt_dump_seq_printf(m, st->to_dmesg, 219 "0x%0*lx-0x%0*lx ", 220 width, st->start_address, 221 width, st->current_address); 222 223 delta = st->current_address - st->start_address; 224 while (!(delta & 1023) && unit[1]) { 225 delta >>= 10; 226 unit++; 227 } 228 pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", 229 delta, *unit); 230 printk_prot(m, st->current_prot, st->level, 231 st->to_dmesg); 232 } 233 st->lines++; 234 235 /* 236 * We print markers for special areas of address space, 237 * such as the start of vmalloc space etc. 238 * This helps in the interpretation. 239 */ 240 if (st->current_address >= st->marker[1].start_address) { 241 if (st->marker->max_lines && 242 st->lines > st->marker->max_lines) { 243 unsigned long nskip = 244 st->lines - st->marker->max_lines; 245 pt_dump_seq_printf(m, st->to_dmesg, 246 "... %lu entr%s skipped ... \n", 247 nskip, 248 nskip == 1 ? "y" : "ies"); 249 } 250 st->marker++; 251 st->lines = 0; 252 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", 253 st->marker->name); 254 } 255 256 st->start_address = st->current_address; 257 st->current_prot = new_prot; 258 st->level = level; 259 } 260 } 261 262 static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, 263 unsigned long P) 264 { 265 int i; 266 pte_t *start; 267 268 start = (pte_t *) pmd_page_vaddr(addr); 269 for (i = 0; i < PTRS_PER_PTE; i++) { 270 pgprot_t prot = pte_pgprot(*start); 271 272 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); 273 note_page(m, st, prot, 4); 274 start++; 275 } 276 } 277 278 #if PTRS_PER_PMD > 1 279 280 static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, 281 unsigned long P) 282 { 283 int i; 284 pmd_t *start; 285 286 start = (pmd_t *) pud_page_vaddr(addr); 287 for (i = 0; i < PTRS_PER_PMD; i++) { 288 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); 289 if (!pmd_none(*start)) { 290 pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; 291 292 if (pmd_large(*start) || !pmd_present(*start)) 293 note_page(m, st, __pgprot(prot), 3); 294 else 295 walk_pte_level(m, st, *start, 296 P + i * PMD_LEVEL_MULT); 297 } else 298 note_page(m, st, __pgprot(0), 3); 299 start++; 300 } 301 } 302 303 #else 304 #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) 305 #define pud_large(a) pmd_large(__pmd(pud_val(a))) 306 #define pud_none(a) pmd_none(__pmd(pud_val(a))) 307 #endif 308 309 #if PTRS_PER_PUD > 1 310 311 static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, 312 unsigned long P) 313 { 314 int i; 315 pud_t *start; 316 317 start = (pud_t *) pgd_page_vaddr(addr); 318 319 for (i = 0; i < PTRS_PER_PUD; i++) { 320 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); 321 if (!pud_none(*start)) { 322 pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; 323 324 if (pud_large(*start) || !pud_present(*start)) 325 note_page(m, st, __pgprot(prot), 2); 326 else 327 walk_pmd_level(m, st, *start, 328 P + i * PUD_LEVEL_MULT); 329 } else 330 note_page(m, st, __pgprot(0), 2); 331 332 start++; 333 } 334 } 335 336 #else 337 #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) 338 #define pgd_large(a) pud_large(__pud(pgd_val(a))) 339 #define pgd_none(a) pud_none(__pud(pgd_val(a))) 340 #endif 341 342 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) 343 { 344 #ifdef CONFIG_X86_64 345 pgd_t *start = (pgd_t *) &init_level4_pgt; 346 #else 347 pgd_t *start = swapper_pg_dir; 348 #endif 349 int i; 350 struct pg_state st = {}; 351 352 if (pgd) { 353 start = pgd; 354 st.to_dmesg = true; 355 } 356 357 for (i = 0; i < PTRS_PER_PGD; i++) { 358 st.current_address = normalize_addr(i * PGD_LEVEL_MULT); 359 if (!pgd_none(*start)) { 360 pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; 361 362 if (pgd_large(*start) || !pgd_present(*start)) 363 note_page(m, &st, __pgprot(prot), 1); 364 else 365 walk_pud_level(m, &st, *start, 366 i * PGD_LEVEL_MULT); 367 } else 368 note_page(m, &st, __pgprot(0), 1); 369 370 start++; 371 } 372 373 /* Flush out the last page */ 374 st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); 375 note_page(m, &st, __pgprot(0), 0); 376 } 377 378 static int ptdump_show(struct seq_file *m, void *v) 379 { 380 ptdump_walk_pgd_level(m, NULL); 381 return 0; 382 } 383 384 static int ptdump_open(struct inode *inode, struct file *filp) 385 { 386 return single_open(filp, ptdump_show, NULL); 387 } 388 389 static const struct file_operations ptdump_fops = { 390 .open = ptdump_open, 391 .read = seq_read, 392 .llseek = seq_lseek, 393 .release = single_release, 394 }; 395 396 static int pt_dump_init(void) 397 { 398 struct dentry *pe; 399 400 #ifdef CONFIG_X86_32 401 /* Not a compile-time constant on x86-32 */ 402 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; 403 address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; 404 # ifdef CONFIG_HIGHMEM 405 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; 406 # endif 407 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; 408 #endif 409 410 pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, 411 &ptdump_fops); 412 if (!pe) 413 return -ENOMEM; 414 415 return 0; 416 } 417 418 __initcall(pt_dump_init); 419 MODULE_LICENSE("GPL"); 420 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); 421 MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); 422