1 /* 2 * Debug helper to dump the current kernel pagetables of the system 3 * so that we can see what the various memory ranges are set to. 4 * 5 * (C) Copyright 2008 Intel Corporation 6 * 7 * Author: Arjan van de Ven <arjan@linux.intel.com> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; version 2 12 * of the License. 13 */ 14 15 #include <linux/debugfs.h> 16 #include <linux/kasan.h> 17 #include <linux/mm.h> 18 #include <linux/init.h> 19 #include <linux/sched.h> 20 #include <linux/seq_file.h> 21 22 #include <asm/pgtable.h> 23 24 /* 25 * The dumper groups pagetable entries of the same type into one, and for 26 * that it needs to keep some state when walking, and flush this state 27 * when a "break" in the continuity is found. 28 */ 29 struct pg_state { 30 int level; 31 pgprot_t current_prot; 32 unsigned long start_address; 33 unsigned long current_address; 34 const struct addr_marker *marker; 35 unsigned long lines; 36 bool to_dmesg; 37 bool check_wx; 38 unsigned long wx_pages; 39 }; 40 41 struct addr_marker { 42 unsigned long start_address; 43 const char *name; 44 unsigned long max_lines; 45 }; 46 47 /* Address space markers hints */ 48 49 #ifdef CONFIG_X86_64 50 51 enum address_markers_idx { 52 USER_SPACE_NR = 0, 53 KERNEL_SPACE_NR, 54 LOW_KERNEL_NR, 55 #if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL) 56 LDT_NR, 57 #endif 58 VMALLOC_START_NR, 59 VMEMMAP_START_NR, 60 #ifdef CONFIG_KASAN 61 KASAN_SHADOW_START_NR, 62 KASAN_SHADOW_END_NR, 63 #endif 64 CPU_ENTRY_AREA_NR, 65 #if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL) 66 LDT_NR, 67 #endif 68 #ifdef CONFIG_X86_ESPFIX64 69 ESPFIX_START_NR, 70 #endif 71 #ifdef CONFIG_EFI 72 EFI_END_NR, 73 #endif 74 HIGH_KERNEL_NR, 75 MODULES_VADDR_NR, 76 MODULES_END_NR, 77 FIXADDR_START_NR, 78 END_OF_SPACE_NR, 79 }; 80 81 static struct addr_marker address_markers[] = { 82 [USER_SPACE_NR] = { 0, "User Space" }, 83 [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" }, 84 [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" }, 85 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, 86 [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, 87 #ifdef CONFIG_KASAN 88 [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, 89 [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, 90 #endif 91 #ifdef CONFIG_MODIFY_LDT_SYSCALL 92 [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" }, 93 #endif 94 [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, 95 #ifdef CONFIG_X86_ESPFIX64 96 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, 97 #endif 98 #ifdef CONFIG_EFI 99 [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" }, 100 #endif 101 [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" }, 102 [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" }, 103 [MODULES_END_NR] = { MODULES_END, "End Modules" }, 104 [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" }, 105 [END_OF_SPACE_NR] = { -1, NULL } 106 }; 107 108 #else /* CONFIG_X86_64 */ 109 110 enum address_markers_idx { 111 USER_SPACE_NR = 0, 112 KERNEL_SPACE_NR, 113 VMALLOC_START_NR, 114 VMALLOC_END_NR, 115 #ifdef CONFIG_HIGHMEM 116 PKMAP_BASE_NR, 117 #endif 118 CPU_ENTRY_AREA_NR, 119 FIXADDR_START_NR, 120 END_OF_SPACE_NR, 121 }; 122 123 static struct addr_marker address_markers[] = { 124 [USER_SPACE_NR] = { 0, "User Space" }, 125 [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" }, 126 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, 127 [VMALLOC_END_NR] = { 0UL, "vmalloc() End" }, 128 #ifdef CONFIG_HIGHMEM 129 [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, 130 #endif 131 [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" }, 132 [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, 133 [END_OF_SPACE_NR] = { -1, NULL } 134 }; 135 136 #endif /* !CONFIG_X86_64 */ 137 138 /* Multipliers for offsets within the PTEs */ 139 #define PTE_LEVEL_MULT (PAGE_SIZE) 140 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) 141 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) 142 #define P4D_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) 143 #define PGD_LEVEL_MULT (PTRS_PER_P4D * P4D_LEVEL_MULT) 144 145 #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ 146 ({ \ 147 if (to_dmesg) \ 148 printk(KERN_INFO fmt, ##args); \ 149 else \ 150 if (m) \ 151 seq_printf(m, fmt, ##args); \ 152 }) 153 154 #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ 155 ({ \ 156 if (to_dmesg) \ 157 printk(KERN_CONT fmt, ##args); \ 158 else \ 159 if (m) \ 160 seq_printf(m, fmt, ##args); \ 161 }) 162 163 /* 164 * Print a readable form of a pgprot_t to the seq_file 165 */ 166 static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) 167 { 168 pgprotval_t pr = pgprot_val(prot); 169 static const char * const level_name[] = 170 { "cr3", "pgd", "p4d", "pud", "pmd", "pte" }; 171 172 if (!(pr & _PAGE_PRESENT)) { 173 /* Not present */ 174 pt_dump_cont_printf(m, dmsg, " "); 175 } else { 176 if (pr & _PAGE_USER) 177 pt_dump_cont_printf(m, dmsg, "USR "); 178 else 179 pt_dump_cont_printf(m, dmsg, " "); 180 if (pr & _PAGE_RW) 181 pt_dump_cont_printf(m, dmsg, "RW "); 182 else 183 pt_dump_cont_printf(m, dmsg, "ro "); 184 if (pr & _PAGE_PWT) 185 pt_dump_cont_printf(m, dmsg, "PWT "); 186 else 187 pt_dump_cont_printf(m, dmsg, " "); 188 if (pr & _PAGE_PCD) 189 pt_dump_cont_printf(m, dmsg, "PCD "); 190 else 191 pt_dump_cont_printf(m, dmsg, " "); 192 193 /* Bit 7 has a different meaning on level 3 vs 4 */ 194 if (level <= 4 && pr & _PAGE_PSE) 195 pt_dump_cont_printf(m, dmsg, "PSE "); 196 else 197 pt_dump_cont_printf(m, dmsg, " "); 198 if ((level == 5 && pr & _PAGE_PAT) || 199 ((level == 4 || level == 3) && pr & _PAGE_PAT_LARGE)) 200 pt_dump_cont_printf(m, dmsg, "PAT "); 201 else 202 pt_dump_cont_printf(m, dmsg, " "); 203 if (pr & _PAGE_GLOBAL) 204 pt_dump_cont_printf(m, dmsg, "GLB "); 205 else 206 pt_dump_cont_printf(m, dmsg, " "); 207 if (pr & _PAGE_NX) 208 pt_dump_cont_printf(m, dmsg, "NX "); 209 else 210 pt_dump_cont_printf(m, dmsg, "x "); 211 } 212 pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); 213 } 214 215 /* 216 * On 64 bits, sign-extend the 48 bit address to 64 bit 217 */ 218 static unsigned long normalize_addr(unsigned long u) 219 { 220 int shift; 221 if (!IS_ENABLED(CONFIG_X86_64)) 222 return u; 223 224 shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); 225 return (signed long)(u << shift) >> shift; 226 } 227 228 /* 229 * This function gets called on a break in a continuous series 230 * of PTE entries; the next one is different so we need to 231 * print what we collected so far. 232 */ 233 static void note_page(struct seq_file *m, struct pg_state *st, 234 pgprot_t new_prot, int level) 235 { 236 pgprotval_t prot, cur; 237 static const char units[] = "BKMGTPE"; 238 239 /* 240 * If we have a "break" in the series, we need to flush the state that 241 * we have now. "break" is either changing perms, levels or 242 * address space marker. 243 */ 244 prot = pgprot_val(new_prot); 245 cur = pgprot_val(st->current_prot); 246 247 if (!st->level) { 248 /* First entry */ 249 st->current_prot = new_prot; 250 st->level = level; 251 st->marker = address_markers; 252 st->lines = 0; 253 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", 254 st->marker->name); 255 } else if (prot != cur || level != st->level || 256 st->current_address >= st->marker[1].start_address) { 257 const char *unit = units; 258 unsigned long delta; 259 int width = sizeof(unsigned long) * 2; 260 pgprotval_t pr = pgprot_val(st->current_prot); 261 262 if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) { 263 WARN_ONCE(1, 264 "x86/mm: Found insecure W+X mapping at address %p/%pS\n", 265 (void *)st->start_address, 266 (void *)st->start_address); 267 st->wx_pages += (st->current_address - 268 st->start_address) / PAGE_SIZE; 269 } 270 271 /* 272 * Now print the actual finished series 273 */ 274 if (!st->marker->max_lines || 275 st->lines < st->marker->max_lines) { 276 pt_dump_seq_printf(m, st->to_dmesg, 277 "0x%0*lx-0x%0*lx ", 278 width, st->start_address, 279 width, st->current_address); 280 281 delta = st->current_address - st->start_address; 282 while (!(delta & 1023) && unit[1]) { 283 delta >>= 10; 284 unit++; 285 } 286 pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", 287 delta, *unit); 288 printk_prot(m, st->current_prot, st->level, 289 st->to_dmesg); 290 } 291 st->lines++; 292 293 /* 294 * We print markers for special areas of address space, 295 * such as the start of vmalloc space etc. 296 * This helps in the interpretation. 297 */ 298 if (st->current_address >= st->marker[1].start_address) { 299 if (st->marker->max_lines && 300 st->lines > st->marker->max_lines) { 301 unsigned long nskip = 302 st->lines - st->marker->max_lines; 303 pt_dump_seq_printf(m, st->to_dmesg, 304 "... %lu entr%s skipped ... \n", 305 nskip, 306 nskip == 1 ? "y" : "ies"); 307 } 308 st->marker++; 309 st->lines = 0; 310 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", 311 st->marker->name); 312 } 313 314 st->start_address = st->current_address; 315 st->current_prot = new_prot; 316 st->level = level; 317 } 318 } 319 320 static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P) 321 { 322 int i; 323 pte_t *start; 324 pgprotval_t prot; 325 326 start = (pte_t *)pmd_page_vaddr(addr); 327 for (i = 0; i < PTRS_PER_PTE; i++) { 328 prot = pte_flags(*start); 329 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); 330 note_page(m, st, __pgprot(prot), 5); 331 start++; 332 } 333 } 334 #ifdef CONFIG_KASAN 335 336 /* 337 * This is an optimization for KASAN=y case. Since all kasan page tables 338 * eventually point to the kasan_zero_page we could call note_page() 339 * right away without walking through lower level page tables. This saves 340 * us dozens of seconds (minutes for 5-level config) while checking for 341 * W+X mapping or reading kernel_page_tables debugfs file. 342 */ 343 static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, 344 void *pt) 345 { 346 if (__pa(pt) == __pa(kasan_zero_pmd) || 347 #ifdef CONFIG_X86_5LEVEL 348 __pa(pt) == __pa(kasan_zero_p4d) || 349 #endif 350 __pa(pt) == __pa(kasan_zero_pud)) { 351 pgprotval_t prot = pte_flags(kasan_zero_pte[0]); 352 note_page(m, st, __pgprot(prot), 5); 353 return true; 354 } 355 return false; 356 } 357 #else 358 static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, 359 void *pt) 360 { 361 return false; 362 } 363 #endif 364 365 #if PTRS_PER_PMD > 1 366 367 static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P) 368 { 369 int i; 370 pmd_t *start, *pmd_start; 371 pgprotval_t prot; 372 373 pmd_start = start = (pmd_t *)pud_page_vaddr(addr); 374 for (i = 0; i < PTRS_PER_PMD; i++) { 375 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); 376 if (!pmd_none(*start)) { 377 if (pmd_large(*start) || !pmd_present(*start)) { 378 prot = pmd_flags(*start); 379 note_page(m, st, __pgprot(prot), 4); 380 } else if (!kasan_page_table(m, st, pmd_start)) { 381 walk_pte_level(m, st, *start, 382 P + i * PMD_LEVEL_MULT); 383 } 384 } else 385 note_page(m, st, __pgprot(0), 4); 386 start++; 387 } 388 } 389 390 #else 391 #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) 392 #define pud_large(a) pmd_large(__pmd(pud_val(a))) 393 #define pud_none(a) pmd_none(__pmd(pud_val(a))) 394 #endif 395 396 #if PTRS_PER_PUD > 1 397 398 static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P) 399 { 400 int i; 401 pud_t *start, *pud_start; 402 pgprotval_t prot; 403 pud_t *prev_pud = NULL; 404 405 pud_start = start = (pud_t *)p4d_page_vaddr(addr); 406 407 for (i = 0; i < PTRS_PER_PUD; i++) { 408 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); 409 if (!pud_none(*start)) { 410 if (pud_large(*start) || !pud_present(*start)) { 411 prot = pud_flags(*start); 412 note_page(m, st, __pgprot(prot), 3); 413 } else if (!kasan_page_table(m, st, pud_start)) { 414 walk_pmd_level(m, st, *start, 415 P + i * PUD_LEVEL_MULT); 416 } 417 } else 418 note_page(m, st, __pgprot(0), 3); 419 420 prev_pud = start; 421 start++; 422 } 423 } 424 425 #else 426 #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p) 427 #define p4d_large(a) pud_large(__pud(p4d_val(a))) 428 #define p4d_none(a) pud_none(__pud(p4d_val(a))) 429 #endif 430 431 #if PTRS_PER_P4D > 1 432 433 static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) 434 { 435 int i; 436 p4d_t *start, *p4d_start; 437 pgprotval_t prot; 438 439 p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); 440 441 for (i = 0; i < PTRS_PER_P4D; i++) { 442 st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); 443 if (!p4d_none(*start)) { 444 if (p4d_large(*start) || !p4d_present(*start)) { 445 prot = p4d_flags(*start); 446 note_page(m, st, __pgprot(prot), 2); 447 } else if (!kasan_page_table(m, st, p4d_start)) { 448 walk_pud_level(m, st, *start, 449 P + i * P4D_LEVEL_MULT); 450 } 451 } else 452 note_page(m, st, __pgprot(0), 2); 453 454 start++; 455 } 456 } 457 458 #else 459 #define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p) 460 #define pgd_large(a) p4d_large(__p4d(pgd_val(a))) 461 #define pgd_none(a) p4d_none(__p4d(pgd_val(a))) 462 #endif 463 464 static inline bool is_hypervisor_range(int idx) 465 { 466 #ifdef CONFIG_X86_64 467 /* 468 * ffff800000000000 - ffff87ffffffffff is reserved for 469 * the hypervisor. 470 */ 471 return (idx >= pgd_index(__PAGE_OFFSET) - 16) && 472 (idx < pgd_index(__PAGE_OFFSET)); 473 #else 474 return false; 475 #endif 476 } 477 478 static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, 479 bool checkwx, bool dmesg) 480 { 481 #ifdef CONFIG_X86_64 482 pgd_t *start = (pgd_t *) &init_top_pgt; 483 #else 484 pgd_t *start = swapper_pg_dir; 485 #endif 486 pgprotval_t prot; 487 int i; 488 struct pg_state st = {}; 489 490 if (pgd) { 491 start = pgd; 492 st.to_dmesg = dmesg; 493 } 494 495 st.check_wx = checkwx; 496 if (checkwx) 497 st.wx_pages = 0; 498 499 for (i = 0; i < PTRS_PER_PGD; i++) { 500 st.current_address = normalize_addr(i * PGD_LEVEL_MULT); 501 if (!pgd_none(*start) && !is_hypervisor_range(i)) { 502 if (pgd_large(*start) || !pgd_present(*start)) { 503 prot = pgd_flags(*start); 504 note_page(m, &st, __pgprot(prot), 1); 505 } else { 506 walk_p4d_level(m, &st, *start, 507 i * PGD_LEVEL_MULT); 508 } 509 } else 510 note_page(m, &st, __pgprot(0), 1); 511 512 cond_resched(); 513 start++; 514 } 515 516 /* Flush out the last page */ 517 st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); 518 note_page(m, &st, __pgprot(0), 0); 519 if (!checkwx) 520 return; 521 if (st.wx_pages) 522 pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n", 523 st.wx_pages); 524 else 525 pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); 526 } 527 528 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) 529 { 530 ptdump_walk_pgd_level_core(m, pgd, false, true); 531 } 532 533 void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user) 534 { 535 #ifdef CONFIG_PAGE_TABLE_ISOLATION 536 if (user && static_cpu_has(X86_FEATURE_PTI)) 537 pgd = kernel_to_user_pgdp(pgd); 538 #endif 539 ptdump_walk_pgd_level_core(m, pgd, false, false); 540 } 541 EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); 542 543 static void ptdump_walk_user_pgd_level_checkwx(void) 544 { 545 #ifdef CONFIG_PAGE_TABLE_ISOLATION 546 pgd_t *pgd = (pgd_t *) &init_top_pgt; 547 548 if (!static_cpu_has(X86_FEATURE_PTI)) 549 return; 550 551 pr_info("x86/mm: Checking user space page tables\n"); 552 pgd = kernel_to_user_pgdp(pgd); 553 ptdump_walk_pgd_level_core(NULL, pgd, true, false); 554 #endif 555 } 556 557 void ptdump_walk_pgd_level_checkwx(void) 558 { 559 ptdump_walk_pgd_level_core(NULL, NULL, true, false); 560 ptdump_walk_user_pgd_level_checkwx(); 561 } 562 563 static int __init pt_dump_init(void) 564 { 565 /* 566 * Various markers are not compile-time constants, so assign them 567 * here. 568 */ 569 #ifdef CONFIG_X86_64 570 address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; 571 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; 572 address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; 573 #endif 574 #ifdef CONFIG_X86_32 575 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; 576 address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; 577 # ifdef CONFIG_HIGHMEM 578 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; 579 # endif 580 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; 581 address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE; 582 #endif 583 return 0; 584 } 585 __initcall(pt_dump_init); 586