1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Debug helper to dump the current kernel pagetables of the system 4 * so that we can see what the various memory ranges are set to. 5 * 6 * (C) Copyright 2008 Intel Corporation 7 * 8 * Author: Arjan van de Ven <arjan@linux.intel.com> 9 */ 10 11 #include <linux/debugfs.h> 12 #include <linux/kasan.h> 13 #include <linux/mm.h> 14 #include <linux/init.h> 15 #include <linux/sched.h> 16 #include <linux/seq_file.h> 17 #include <linux/highmem.h> 18 #include <linux/pci.h> 19 #include <linux/ptdump.h> 20 21 #include <asm/e820/types.h> 22 #include <asm/pgtable.h> 23 24 /* 25 * The dumper groups pagetable entries of the same type into one, and for 26 * that it needs to keep some state when walking, and flush this state 27 * when a "break" in the continuity is found. 28 */ 29 struct pg_state { 30 struct ptdump_state ptdump; 31 int level; 32 pgprotval_t current_prot; 33 pgprotval_t effective_prot; 34 pgprotval_t prot_levels[5]; 35 unsigned long start_address; 36 const struct addr_marker *marker; 37 unsigned long lines; 38 bool to_dmesg; 39 bool check_wx; 40 unsigned long wx_pages; 41 struct seq_file *seq; 42 }; 43 44 struct addr_marker { 45 unsigned long start_address; 46 const char *name; 47 unsigned long max_lines; 48 }; 49 50 /* Address space markers hints */ 51 52 #ifdef CONFIG_X86_64 53 54 enum address_markers_idx { 55 USER_SPACE_NR = 0, 56 KERNEL_SPACE_NR, 57 #ifdef CONFIG_MODIFY_LDT_SYSCALL 58 LDT_NR, 59 #endif 60 LOW_KERNEL_NR, 61 VMALLOC_START_NR, 62 VMEMMAP_START_NR, 63 #ifdef CONFIG_KASAN 64 KASAN_SHADOW_START_NR, 65 KASAN_SHADOW_END_NR, 66 #endif 67 CPU_ENTRY_AREA_NR, 68 #ifdef CONFIG_X86_ESPFIX64 69 ESPFIX_START_NR, 70 #endif 71 #ifdef CONFIG_EFI 72 EFI_END_NR, 73 #endif 74 HIGH_KERNEL_NR, 75 MODULES_VADDR_NR, 76 MODULES_END_NR, 77 FIXADDR_START_NR, 78 END_OF_SPACE_NR, 79 }; 80 81 static struct addr_marker address_markers[] = { 82 [USER_SPACE_NR] = { 0, "User Space" }, 83 [KERNEL_SPACE_NR] = { (1UL << 63), "Kernel Space" }, 84 [LOW_KERNEL_NR] = { 0UL, "Low Kernel Mapping" }, 85 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, 86 [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, 87 #ifdef CONFIG_KASAN 88 /* 89 * These fields get initialized with the (dynamic) 90 * KASAN_SHADOW_{START,END} values in pt_dump_init(). 91 */ 92 [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" }, 93 [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" }, 94 #endif 95 #ifdef CONFIG_MODIFY_LDT_SYSCALL 96 [LDT_NR] = { 0UL, "LDT remap" }, 97 #endif 98 [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, 99 #ifdef CONFIG_X86_ESPFIX64 100 [ESPFIX_START_NR] = { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, 101 #endif 102 #ifdef CONFIG_EFI 103 [EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" }, 104 #endif 105 [HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" }, 106 [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" }, 107 [MODULES_END_NR] = { MODULES_END, "End Modules" }, 108 [FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" }, 109 [END_OF_SPACE_NR] = { -1, NULL } 110 }; 111 112 #define INIT_PGD ((pgd_t *) &init_top_pgt) 113 114 #else /* CONFIG_X86_64 */ 115 116 enum address_markers_idx { 117 USER_SPACE_NR = 0, 118 KERNEL_SPACE_NR, 119 VMALLOC_START_NR, 120 VMALLOC_END_NR, 121 #ifdef CONFIG_HIGHMEM 122 PKMAP_BASE_NR, 123 #endif 124 #ifdef CONFIG_MODIFY_LDT_SYSCALL 125 LDT_NR, 126 #endif 127 CPU_ENTRY_AREA_NR, 128 FIXADDR_START_NR, 129 END_OF_SPACE_NR, 130 }; 131 132 static struct addr_marker address_markers[] = { 133 [USER_SPACE_NR] = { 0, "User Space" }, 134 [KERNEL_SPACE_NR] = { PAGE_OFFSET, "Kernel Mapping" }, 135 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, 136 [VMALLOC_END_NR] = { 0UL, "vmalloc() End" }, 137 #ifdef CONFIG_HIGHMEM 138 [PKMAP_BASE_NR] = { 0UL, "Persistent kmap() Area" }, 139 #endif 140 #ifdef CONFIG_MODIFY_LDT_SYSCALL 141 [LDT_NR] = { 0UL, "LDT remap" }, 142 #endif 143 [CPU_ENTRY_AREA_NR] = { 0UL, "CPU entry area" }, 144 [FIXADDR_START_NR] = { 0UL, "Fixmap area" }, 145 [END_OF_SPACE_NR] = { -1, NULL } 146 }; 147 148 #define INIT_PGD (swapper_pg_dir) 149 150 #endif /* !CONFIG_X86_64 */ 151 152 /* Multipliers for offsets within the PTEs */ 153 #define PTE_LEVEL_MULT (PAGE_SIZE) 154 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) 155 #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) 156 #define P4D_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) 157 #define PGD_LEVEL_MULT (PTRS_PER_P4D * P4D_LEVEL_MULT) 158 159 #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ 160 ({ \ 161 if (to_dmesg) \ 162 printk(KERN_INFO fmt, ##args); \ 163 else \ 164 if (m) \ 165 seq_printf(m, fmt, ##args); \ 166 }) 167 168 #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ 169 ({ \ 170 if (to_dmesg) \ 171 printk(KERN_CONT fmt, ##args); \ 172 else \ 173 if (m) \ 174 seq_printf(m, fmt, ##args); \ 175 }) 176 177 /* 178 * Print a readable form of a pgprot_t to the seq_file 179 */ 180 static void printk_prot(struct seq_file *m, pgprotval_t pr, int level, bool dmsg) 181 { 182 static const char * const level_name[] = 183 { "pgd", "p4d", "pud", "pmd", "pte" }; 184 185 if (!(pr & _PAGE_PRESENT)) { 186 /* Not present */ 187 pt_dump_cont_printf(m, dmsg, " "); 188 } else { 189 if (pr & _PAGE_USER) 190 pt_dump_cont_printf(m, dmsg, "USR "); 191 else 192 pt_dump_cont_printf(m, dmsg, " "); 193 if (pr & _PAGE_RW) 194 pt_dump_cont_printf(m, dmsg, "RW "); 195 else 196 pt_dump_cont_printf(m, dmsg, "ro "); 197 if (pr & _PAGE_PWT) 198 pt_dump_cont_printf(m, dmsg, "PWT "); 199 else 200 pt_dump_cont_printf(m, dmsg, " "); 201 if (pr & _PAGE_PCD) 202 pt_dump_cont_printf(m, dmsg, "PCD "); 203 else 204 pt_dump_cont_printf(m, dmsg, " "); 205 206 /* Bit 7 has a different meaning on level 3 vs 4 */ 207 if (level <= 3 && pr & _PAGE_PSE) 208 pt_dump_cont_printf(m, dmsg, "PSE "); 209 else 210 pt_dump_cont_printf(m, dmsg, " "); 211 if ((level == 4 && pr & _PAGE_PAT) || 212 ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE)) 213 pt_dump_cont_printf(m, dmsg, "PAT "); 214 else 215 pt_dump_cont_printf(m, dmsg, " "); 216 if (pr & _PAGE_GLOBAL) 217 pt_dump_cont_printf(m, dmsg, "GLB "); 218 else 219 pt_dump_cont_printf(m, dmsg, " "); 220 if (pr & _PAGE_NX) 221 pt_dump_cont_printf(m, dmsg, "NX "); 222 else 223 pt_dump_cont_printf(m, dmsg, "x "); 224 } 225 pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); 226 } 227 228 static void note_wx(struct pg_state *st, unsigned long addr) 229 { 230 unsigned long npages; 231 232 npages = (addr - st->start_address) / PAGE_SIZE; 233 234 #ifdef CONFIG_PCI_BIOS 235 /* 236 * If PCI BIOS is enabled, the PCI BIOS area is forced to WX. 237 * Inform about it, but avoid the warning. 238 */ 239 if (pcibios_enabled && st->start_address >= PAGE_OFFSET + BIOS_BEGIN && 240 addr <= PAGE_OFFSET + BIOS_END) { 241 pr_warn_once("x86/mm: PCI BIOS W+X mapping %lu pages\n", npages); 242 return; 243 } 244 #endif 245 /* Account the WX pages */ 246 st->wx_pages += npages; 247 WARN_ONCE(__supported_pte_mask & _PAGE_NX, 248 "x86/mm: Found insecure W+X mapping at address %pS\n", 249 (void *)st->start_address); 250 } 251 252 static void effective_prot(struct ptdump_state *pt_st, int level, u64 val) 253 { 254 struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); 255 pgprotval_t prot = val & PTE_FLAGS_MASK; 256 pgprotval_t effective; 257 258 if (level > 0) { 259 pgprotval_t higher_prot = st->prot_levels[level - 1]; 260 261 effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) | 262 ((higher_prot | prot) & _PAGE_NX); 263 } else { 264 effective = prot; 265 } 266 267 st->prot_levels[level] = effective; 268 } 269 270 /* 271 * This function gets called on a break in a continuous series 272 * of PTE entries; the next one is different so we need to 273 * print what we collected so far. 274 */ 275 static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, 276 u64 val) 277 { 278 struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); 279 pgprotval_t new_prot, new_eff; 280 pgprotval_t cur, eff; 281 static const char units[] = "BKMGTPE"; 282 struct seq_file *m = st->seq; 283 284 new_prot = val & PTE_FLAGS_MASK; 285 if (!val) 286 new_eff = 0; 287 else 288 new_eff = st->prot_levels[level]; 289 290 /* 291 * If we have a "break" in the series, we need to flush the state that 292 * we have now. "break" is either changing perms, levels or 293 * address space marker. 294 */ 295 cur = st->current_prot; 296 eff = st->effective_prot; 297 298 if (st->level == -1) { 299 /* First entry */ 300 st->current_prot = new_prot; 301 st->effective_prot = new_eff; 302 st->level = level; 303 st->marker = address_markers; 304 st->lines = 0; 305 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", 306 st->marker->name); 307 } else if (new_prot != cur || new_eff != eff || level != st->level || 308 addr >= st->marker[1].start_address) { 309 const char *unit = units; 310 unsigned long delta; 311 int width = sizeof(unsigned long) * 2; 312 313 if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) 314 note_wx(st, addr); 315 316 /* 317 * Now print the actual finished series 318 */ 319 if (!st->marker->max_lines || 320 st->lines < st->marker->max_lines) { 321 pt_dump_seq_printf(m, st->to_dmesg, 322 "0x%0*lx-0x%0*lx ", 323 width, st->start_address, 324 width, addr); 325 326 delta = addr - st->start_address; 327 while (!(delta & 1023) && unit[1]) { 328 delta >>= 10; 329 unit++; 330 } 331 pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", 332 delta, *unit); 333 printk_prot(m, st->current_prot, st->level, 334 st->to_dmesg); 335 } 336 st->lines++; 337 338 /* 339 * We print markers for special areas of address space, 340 * such as the start of vmalloc space etc. 341 * This helps in the interpretation. 342 */ 343 if (addr >= st->marker[1].start_address) { 344 if (st->marker->max_lines && 345 st->lines > st->marker->max_lines) { 346 unsigned long nskip = 347 st->lines - st->marker->max_lines; 348 pt_dump_seq_printf(m, st->to_dmesg, 349 "... %lu entr%s skipped ... \n", 350 nskip, 351 nskip == 1 ? "y" : "ies"); 352 } 353 st->marker++; 354 st->lines = 0; 355 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", 356 st->marker->name); 357 } 358 359 st->start_address = addr; 360 st->current_prot = new_prot; 361 st->effective_prot = new_eff; 362 st->level = level; 363 } 364 } 365 366 static void ptdump_walk_pgd_level_core(struct seq_file *m, 367 struct mm_struct *mm, pgd_t *pgd, 368 bool checkwx, bool dmesg) 369 { 370 const struct ptdump_range ptdump_ranges[] = { 371 #ifdef CONFIG_X86_64 372 {0, PTRS_PER_PGD * PGD_LEVEL_MULT / 2}, 373 {GUARD_HOLE_END_ADDR, ~0UL}, 374 #else 375 {0, ~0UL}, 376 #endif 377 {0, 0} 378 }; 379 380 struct pg_state st = { 381 .ptdump = { 382 .note_page = note_page, 383 .effective_prot = effective_prot, 384 .range = ptdump_ranges 385 }, 386 .level = -1, 387 .to_dmesg = dmesg, 388 .check_wx = checkwx, 389 .seq = m 390 }; 391 392 ptdump_walk_pgd(&st.ptdump, mm, pgd); 393 394 if (!checkwx) 395 return; 396 if (st.wx_pages) 397 pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n", 398 st.wx_pages); 399 else 400 pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); 401 } 402 403 void ptdump_walk_pgd_level(struct seq_file *m, struct mm_struct *mm) 404 { 405 ptdump_walk_pgd_level_core(m, mm, mm->pgd, false, true); 406 } 407 408 void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, 409 bool user) 410 { 411 pgd_t *pgd = mm->pgd; 412 #ifdef CONFIG_PAGE_TABLE_ISOLATION 413 if (user && boot_cpu_has(X86_FEATURE_PTI)) 414 pgd = kernel_to_user_pgdp(pgd); 415 #endif 416 ptdump_walk_pgd_level_core(m, mm, pgd, false, false); 417 } 418 EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs); 419 420 void ptdump_walk_user_pgd_level_checkwx(void) 421 { 422 #ifdef CONFIG_PAGE_TABLE_ISOLATION 423 pgd_t *pgd = INIT_PGD; 424 425 if (!(__supported_pte_mask & _PAGE_NX) || 426 !boot_cpu_has(X86_FEATURE_PTI)) 427 return; 428 429 pr_info("x86/mm: Checking user space page tables\n"); 430 pgd = kernel_to_user_pgdp(pgd); 431 ptdump_walk_pgd_level_core(NULL, &init_mm, pgd, true, false); 432 #endif 433 } 434 435 void ptdump_walk_pgd_level_checkwx(void) 436 { 437 ptdump_walk_pgd_level_core(NULL, &init_mm, INIT_PGD, true, false); 438 } 439 440 static int __init pt_dump_init(void) 441 { 442 /* 443 * Various markers are not compile-time constants, so assign them 444 * here. 445 */ 446 #ifdef CONFIG_X86_64 447 address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; 448 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; 449 address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; 450 #ifdef CONFIG_MODIFY_LDT_SYSCALL 451 address_markers[LDT_NR].start_address = LDT_BASE_ADDR; 452 #endif 453 #ifdef CONFIG_KASAN 454 address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; 455 address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; 456 #endif 457 #endif 458 #ifdef CONFIG_X86_32 459 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; 460 address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; 461 # ifdef CONFIG_HIGHMEM 462 address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; 463 # endif 464 address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; 465 address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE; 466 # ifdef CONFIG_MODIFY_LDT_SYSCALL 467 address_markers[LDT_NR].start_address = LDT_BASE_ADDR; 468 # endif 469 #endif 470 return 0; 471 } 472 __initcall(pt_dump_init); 473