1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright 2016, Rashmica Gupta, IBM Corp. 4 * 5 * This traverses the kernel virtual memory and dumps the pages that are in 6 * the hash pagetable, along with their flags to 7 * /sys/kernel/debug/kernel_hash_pagetable. 8 * 9 * If radix is enabled then there is no hash page table and so no debugfs file 10 * is generated. 11 */ 12 #include <linux/debugfs.h> 13 #include <linux/fs.h> 14 #include <linux/io.h> 15 #include <linux/mm.h> 16 #include <linux/sched.h> 17 #include <linux/seq_file.h> 18 #include <asm/pgtable.h> 19 #include <linux/const.h> 20 #include <asm/page.h> 21 #include <asm/pgalloc.h> 22 #include <asm/plpar_wrappers.h> 23 #include <linux/memblock.h> 24 #include <asm/firmware.h> 25 26 struct pg_state { 27 struct seq_file *seq; 28 const struct addr_marker *marker; 29 unsigned long start_address; 30 unsigned int level; 31 u64 current_flags; 32 }; 33 34 struct addr_marker { 35 unsigned long start_address; 36 const char *name; 37 }; 38 39 static struct addr_marker address_markers[] = { 40 { 0, "Start of kernel VM" }, 41 { 0, "vmalloc() Area" }, 42 { 0, "vmalloc() End" }, 43 { 0, "isa I/O start" }, 44 { 0, "isa I/O end" }, 45 { 0, "phb I/O start" }, 46 { 0, "phb I/O end" }, 47 { 0, "I/O remap start" }, 48 { 0, "I/O remap end" }, 49 { 0, "vmemmap start" }, 50 { -1, NULL }, 51 }; 52 53 struct flag_info { 54 u64 mask; 55 u64 val; 56 const char *set; 57 const char *clear; 58 bool is_val; 59 int shift; 60 }; 61 62 static const struct flag_info v_flag_array[] = { 63 { 64 .mask = SLB_VSID_B, 65 .val = SLB_VSID_B_256M, 66 .set = "ssize: 256M", 67 .clear = "ssize: 1T ", 68 }, { 69 .mask = HPTE_V_SECONDARY, 70 .val = HPTE_V_SECONDARY, 71 .set = "secondary", 72 .clear = "primary ", 73 }, { 74 .mask = HPTE_V_VALID, 75 .val = HPTE_V_VALID, 76 .set = "valid ", 77 .clear = "invalid", 78 }, { 79 .mask = HPTE_V_BOLTED, 80 .val = HPTE_V_BOLTED, 81 .set = "bolted", 82 .clear = "", 83 } 84 }; 85 86 static const struct flag_info r_flag_array[] = { 87 { 88 .mask = HPTE_R_PP0 | HPTE_R_PP, 89 .val = PP_RWXX, 90 .set = "prot:RW--", 91 }, { 92 .mask = HPTE_R_PP0 | HPTE_R_PP, 93 .val = PP_RWRX, 94 .set = "prot:RWR-", 95 }, { 96 .mask = HPTE_R_PP0 | HPTE_R_PP, 97 .val = PP_RWRW, 98 .set = "prot:RWRW", 99 }, { 100 .mask = HPTE_R_PP0 | HPTE_R_PP, 101 .val = PP_RXRX, 102 .set = "prot:R-R-", 103 }, { 104 .mask = HPTE_R_PP0 | HPTE_R_PP, 105 .val = PP_RXXX, 106 .set = "prot:R---", 107 }, { 108 .mask = HPTE_R_KEY_HI | HPTE_R_KEY_LO, 109 .val = HPTE_R_KEY_HI | HPTE_R_KEY_LO, 110 .set = "key", 111 .clear = "", 112 .is_val = true, 113 }, { 114 .mask = HPTE_R_R, 115 .val = HPTE_R_R, 116 .set = "ref", 117 .clear = " ", 118 }, { 119 .mask = HPTE_R_C, 120 .val = HPTE_R_C, 121 .set = "changed", 122 .clear = " ", 123 }, { 124 .mask = HPTE_R_N, 125 .val = HPTE_R_N, 126 .set = "no execute", 127 }, { 128 .mask = HPTE_R_WIMG, 129 .val = HPTE_R_W, 130 .set = "writethru", 131 }, { 132 .mask = HPTE_R_WIMG, 133 .val = HPTE_R_I, 134 .set = "no cache", 135 }, { 136 .mask = HPTE_R_WIMG, 137 .val = HPTE_R_G, 138 .set = "guarded", 139 } 140 }; 141 142 static int calculate_pagesize(struct pg_state *st, int ps, char s[]) 143 { 144 static const char units[] = "BKMGTPE"; 145 const char *unit = units; 146 147 while (ps > 9 && unit[1]) { 148 ps -= 10; 149 unit++; 150 } 151 seq_printf(st->seq, " %s_ps: %i%c\t", s, 1<<ps, *unit); 152 return ps; 153 } 154 155 static void dump_flag_info(struct pg_state *st, const struct flag_info 156 *flag, u64 pte, int num) 157 { 158 unsigned int i; 159 160 for (i = 0; i < num; i++, flag++) { 161 const char *s = NULL; 162 u64 val; 163 164 /* flag not defined so don't check it */ 165 if (flag->mask == 0) 166 continue; 167 /* Some 'flags' are actually values */ 168 if (flag->is_val) { 169 val = pte & flag->val; 170 if (flag->shift) 171 val = val >> flag->shift; 172 seq_printf(st->seq, " %s:%llx", flag->set, val); 173 } else { 174 if ((pte & flag->mask) == flag->val) 175 s = flag->set; 176 else 177 s = flag->clear; 178 if (s) 179 seq_printf(st->seq, " %s", s); 180 } 181 } 182 } 183 184 static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r, 185 unsigned long rpn, int bps, int aps, unsigned long lp) 186 { 187 int aps_index; 188 189 while (ea >= st->marker[1].start_address) { 190 st->marker++; 191 seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); 192 } 193 seq_printf(st->seq, "0x%lx:\t", ea); 194 seq_printf(st->seq, "AVPN:%llx\t", HPTE_V_AVPN_VAL(v)); 195 dump_flag_info(st, v_flag_array, v, ARRAY_SIZE(v_flag_array)); 196 seq_printf(st->seq, " rpn: %lx\t", rpn); 197 dump_flag_info(st, r_flag_array, r, ARRAY_SIZE(r_flag_array)); 198 199 calculate_pagesize(st, bps, "base"); 200 aps_index = calculate_pagesize(st, aps, "actual"); 201 if (aps_index != 2) 202 seq_printf(st->seq, "LP enc: %lx", lp); 203 seq_putc(st->seq, '\n'); 204 } 205 206 207 static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 208 *r) 209 { 210 struct hash_pte *hptep; 211 unsigned long hash, vsid, vpn, hpte_group, want_v, hpte_v; 212 int i, ssize = mmu_kernel_ssize; 213 unsigned long shift = mmu_psize_defs[psize].shift; 214 215 /* calculate hash */ 216 vsid = get_kernel_vsid(ea, ssize); 217 vpn = hpt_vpn(ea, vsid, ssize); 218 hash = hpt_hash(vpn, shift, ssize); 219 want_v = hpte_encode_avpn(vpn, psize, ssize); 220 221 /* to check in the secondary hash table, we invert the hash */ 222 if (!primary) 223 hash = ~hash; 224 hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; 225 for (i = 0; i < HPTES_PER_GROUP; i++) { 226 hptep = htab_address + hpte_group; 227 hpte_v = be64_to_cpu(hptep->v); 228 229 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { 230 /* HPTE matches */ 231 *v = be64_to_cpu(hptep->v); 232 *r = be64_to_cpu(hptep->r); 233 return 0; 234 } 235 ++hpte_group; 236 } 237 return -1; 238 } 239 240 static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) 241 { 242 struct hash_pte ptes[4]; 243 unsigned long vsid, vpn, hash, hpte_group, want_v; 244 int i, j, ssize = mmu_kernel_ssize; 245 long lpar_rc = 0; 246 unsigned long shift = mmu_psize_defs[psize].shift; 247 248 /* calculate hash */ 249 vsid = get_kernel_vsid(ea, ssize); 250 vpn = hpt_vpn(ea, vsid, ssize); 251 hash = hpt_hash(vpn, shift, ssize); 252 want_v = hpte_encode_avpn(vpn, psize, ssize); 253 254 /* to check in the secondary hash table, we invert the hash */ 255 if (!primary) 256 hash = ~hash; 257 hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; 258 /* see if we can find an entry in the hpte with this hash */ 259 for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { 260 lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); 261 262 if (lpar_rc != H_SUCCESS) 263 continue; 264 for (j = 0; j < 4; j++) { 265 if (HPTE_V_COMPARE(ptes[j].v, want_v) && 266 (ptes[j].v & HPTE_V_VALID)) { 267 /* HPTE matches */ 268 *v = ptes[j].v; 269 *r = ptes[j].r; 270 return 0; 271 } 272 } 273 } 274 return -1; 275 } 276 277 static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps, 278 unsigned long *lp_bits) 279 { 280 struct mmu_psize_def entry; 281 unsigned long arpn, mask, lp; 282 int penc = -2, idx = 0, shift; 283 284 /*. 285 * The LP field has 8 bits. Depending on the actual page size, some of 286 * these bits are concatenated with the APRN to get the RPN. The rest 287 * of the bits in the LP field is the LP value and is an encoding for 288 * the base page size and the actual page size. 289 * 290 * - find the mmu entry for our base page size 291 * - go through all page encodings and use the associated mask to 292 * find an encoding that matches our encoding in the LP field. 293 */ 294 arpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; 295 lp = arpn & 0xff; 296 297 entry = mmu_psize_defs[bps]; 298 while (idx < MMU_PAGE_COUNT) { 299 penc = entry.penc[idx]; 300 if ((penc != -1) && (mmu_psize_defs[idx].shift)) { 301 shift = mmu_psize_defs[idx].shift - HPTE_R_RPN_SHIFT; 302 mask = (0x1 << (shift)) - 1; 303 if ((lp & mask) == penc) { 304 *aps = mmu_psize_to_shift(idx); 305 *lp_bits = lp & mask; 306 *rpn = arpn >> shift; 307 return; 308 } 309 } 310 idx++; 311 } 312 } 313 314 static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v, 315 u64 *r) 316 { 317 if (IS_ENABLED(CONFIG_PPC_PSERIES) && firmware_has_feature(FW_FEATURE_LPAR)) 318 return pseries_find(ea, psize, primary, v, r); 319 320 return native_find(ea, psize, primary, v, r); 321 } 322 323 static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize) 324 { 325 unsigned long slot; 326 u64 v = 0, r = 0; 327 unsigned long rpn, lp_bits; 328 int base_psize = 0, actual_psize = 0; 329 330 if (ea < PAGE_OFFSET) 331 return -1; 332 333 /* Look in primary table */ 334 slot = base_hpte_find(ea, psize, true, &v, &r); 335 336 /* Look in secondary table */ 337 if (slot == -1) 338 slot = base_hpte_find(ea, psize, false, &v, &r); 339 340 /* No entry found */ 341 if (slot == -1) 342 return -1; 343 344 /* 345 * We found an entry in the hash page table: 346 * - check that this has the same base page 347 * - find the actual page size 348 * - find the RPN 349 */ 350 base_psize = mmu_psize_to_shift(psize); 351 352 if ((v & HPTE_V_LARGE) == HPTE_V_LARGE) { 353 decode_r(psize, r, &rpn, &actual_psize, &lp_bits); 354 } else { 355 /* 4K actual page size */ 356 actual_psize = 12; 357 rpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; 358 /* In this case there are no LP bits */ 359 lp_bits = -1; 360 } 361 /* 362 * We didn't find a matching encoding, so the PTE we found isn't for 363 * this address. 364 */ 365 if (actual_psize == -1) 366 return -1; 367 368 dump_hpte_info(st, ea, v, r, rpn, base_psize, actual_psize, lp_bits); 369 return 0; 370 } 371 372 static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) 373 { 374 pte_t *pte = pte_offset_kernel(pmd, 0); 375 unsigned long addr, pteval, psize; 376 int i, status; 377 378 for (i = 0; i < PTRS_PER_PTE; i++, pte++) { 379 addr = start + i * PAGE_SIZE; 380 pteval = pte_val(*pte); 381 382 if (addr < VMALLOC_END) 383 psize = mmu_vmalloc_psize; 384 else 385 psize = mmu_io_psize; 386 387 /* check for secret 4K mappings */ 388 if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && 389 ((pteval & H_PAGE_COMBO) == H_PAGE_COMBO || 390 (pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN)) 391 psize = mmu_io_psize; 392 393 /* check for hashpte */ 394 status = hpte_find(st, addr, psize); 395 396 if (((pteval & H_PAGE_HASHPTE) != H_PAGE_HASHPTE) 397 && (status != -1)) { 398 /* found a hpte that is not in the linux page tables */ 399 seq_printf(st->seq, "page probably bolted before linux" 400 " pagetables were set: addr:%lx, pteval:%lx\n", 401 addr, pteval); 402 } 403 } 404 } 405 406 static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) 407 { 408 pmd_t *pmd = pmd_offset(pud, 0); 409 unsigned long addr; 410 unsigned int i; 411 412 for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { 413 addr = start + i * PMD_SIZE; 414 if (!pmd_none(*pmd)) 415 /* pmd exists */ 416 walk_pte(st, pmd, addr); 417 } 418 } 419 420 static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) 421 { 422 pud_t *pud = pud_offset(pgd, 0); 423 unsigned long addr; 424 unsigned int i; 425 426 for (i = 0; i < PTRS_PER_PUD; i++, pud++) { 427 addr = start + i * PUD_SIZE; 428 if (!pud_none(*pud)) 429 /* pud exists */ 430 walk_pmd(st, pud, addr); 431 } 432 } 433 434 static void walk_pagetables(struct pg_state *st) 435 { 436 pgd_t *pgd = pgd_offset_k(0UL); 437 unsigned int i; 438 unsigned long addr; 439 440 /* 441 * Traverse the linux pagetable structure and dump pages that are in 442 * the hash pagetable. 443 */ 444 for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { 445 addr = KERN_VIRT_START + i * PGDIR_SIZE; 446 if (!pgd_none(*pgd)) 447 /* pgd exists */ 448 walk_pud(st, pgd, addr); 449 } 450 } 451 452 453 static void walk_linearmapping(struct pg_state *st) 454 { 455 unsigned long addr; 456 457 /* 458 * Traverse the linear mapping section of virtual memory and dump pages 459 * that are in the hash pagetable. 460 */ 461 unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift; 462 463 for (addr = PAGE_OFFSET; addr < PAGE_OFFSET + 464 memblock_end_of_DRAM(); addr += psize) 465 hpte_find(st, addr, mmu_linear_psize); 466 } 467 468 static void walk_vmemmap(struct pg_state *st) 469 { 470 struct vmemmap_backing *ptr = vmemmap_list; 471 472 if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) 473 return; 474 /* 475 * Traverse the vmemmaped memory and dump pages that are in the hash 476 * pagetable. 477 */ 478 while (ptr->list) { 479 hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize); 480 ptr = ptr->list; 481 } 482 seq_puts(st->seq, "---[ vmemmap end ]---\n"); 483 } 484 485 static void populate_markers(void) 486 { 487 address_markers[0].start_address = PAGE_OFFSET; 488 address_markers[1].start_address = VMALLOC_START; 489 address_markers[2].start_address = VMALLOC_END; 490 address_markers[3].start_address = ISA_IO_BASE; 491 address_markers[4].start_address = ISA_IO_END; 492 address_markers[5].start_address = PHB_IO_BASE; 493 address_markers[6].start_address = PHB_IO_END; 494 address_markers[7].start_address = IOREMAP_BASE; 495 address_markers[8].start_address = IOREMAP_END; 496 address_markers[9].start_address = H_VMEMMAP_START; 497 } 498 499 static int ptdump_show(struct seq_file *m, void *v) 500 { 501 struct pg_state st = { 502 .seq = m, 503 .start_address = PAGE_OFFSET, 504 .marker = address_markers, 505 }; 506 /* 507 * Traverse the 0xc, 0xd and 0xf areas of the kernel virtual memory and 508 * dump pages that are in the hash pagetable. 509 */ 510 walk_linearmapping(&st); 511 walk_pagetables(&st); 512 walk_vmemmap(&st); 513 return 0; 514 } 515 516 static int ptdump_open(struct inode *inode, struct file *file) 517 { 518 return single_open(file, ptdump_show, NULL); 519 } 520 521 static const struct file_operations ptdump_fops = { 522 .open = ptdump_open, 523 .read = seq_read, 524 .llseek = seq_lseek, 525 .release = single_release, 526 }; 527 528 static int ptdump_init(void) 529 { 530 struct dentry *debugfs_file; 531 532 if (!radix_enabled()) { 533 populate_markers(); 534 debugfs_file = debugfs_create_file("kernel_hash_pagetable", 535 0400, NULL, NULL, &ptdump_fops); 536 return debugfs_file ? 0 : -ENOMEM; 537 } 538 return 0; 539 } 540 device_initcall(ptdump_init); 541