1 /* 2 * Copyright 2016, Rashmica Gupta, IBM Corp. 3 * 4 * This traverses the kernel virtual memory and dumps the pages that are in 5 * the hash pagetable, along with their flags to 6 * /sys/kernel/debug/kernel_hash_pagetable. 7 * 8 * If radix is enabled then there is no hash page table and so no debugfs file 9 * is generated. 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License 13 * as published by the Free Software Foundation; version 2 14 * of the License. 15 */ 16 #include <linux/debugfs.h> 17 #include <linux/fs.h> 18 #include <linux/io.h> 19 #include <linux/mm.h> 20 #include <linux/sched.h> 21 #include <linux/seq_file.h> 22 #include <asm/pgtable.h> 23 #include <linux/const.h> 24 #include <asm/page.h> 25 #include <asm/pgalloc.h> 26 #include <asm/plpar_wrappers.h> 27 #include <linux/memblock.h> 28 #include <asm/firmware.h> 29 30 struct pg_state { 31 struct seq_file *seq; 32 const struct addr_marker *marker; 33 unsigned long start_address; 34 unsigned int level; 35 u64 current_flags; 36 }; 37 38 struct addr_marker { 39 unsigned long start_address; 40 const char *name; 41 }; 42 43 static struct addr_marker address_markers[] = { 44 { 0, "Start of kernel VM" }, 45 { 0, "vmalloc() Area" }, 46 { 0, "vmalloc() End" }, 47 { 0, "isa I/O start" }, 48 { 0, "isa I/O end" }, 49 { 0, "phb I/O start" }, 50 { 0, "phb I/O end" }, 51 { 0, "I/O remap start" }, 52 { 0, "I/O remap end" }, 53 { 0, "vmemmap start" }, 54 { -1, NULL }, 55 }; 56 57 struct flag_info { 58 u64 mask; 59 u64 val; 60 const char *set; 61 const char *clear; 62 bool is_val; 63 int shift; 64 }; 65 66 static const struct flag_info v_flag_array[] = { 67 { 68 .mask = SLB_VSID_B, 69 .val = SLB_VSID_B_256M, 70 .set = "ssize: 256M", 71 .clear = "ssize: 1T ", 72 }, { 73 .mask = HPTE_V_SECONDARY, 74 .val = HPTE_V_SECONDARY, 75 .set = "secondary", 76 .clear = "primary ", 77 }, { 78 .mask = HPTE_V_VALID, 79 .val = HPTE_V_VALID, 80 .set = "valid ", 81 .clear = "invalid", 82 }, { 83 .mask = HPTE_V_BOLTED, 84 .val = HPTE_V_BOLTED, 85 .set = "bolted", 86 .clear = "", 87 } 88 }; 89 90 static const struct flag_info r_flag_array[] = { 91 { 92 .mask = HPTE_R_PP0 | HPTE_R_PP, 93 .val = PP_RWXX, 94 .set = "prot:RW--", 95 }, { 96 .mask = HPTE_R_PP0 | HPTE_R_PP, 97 .val = PP_RWRX, 98 .set = "prot:RWR-", 99 }, { 100 .mask = HPTE_R_PP0 | HPTE_R_PP, 101 .val = PP_RWRW, 102 .set = "prot:RWRW", 103 }, { 104 .mask = HPTE_R_PP0 | HPTE_R_PP, 105 .val = PP_RXRX, 106 .set = "prot:R-R-", 107 }, { 108 .mask = HPTE_R_PP0 | HPTE_R_PP, 109 .val = PP_RXXX, 110 .set = "prot:R---", 111 }, { 112 .mask = HPTE_R_KEY_HI | HPTE_R_KEY_LO, 113 .val = HPTE_R_KEY_HI | HPTE_R_KEY_LO, 114 .set = "key", 115 .clear = "", 116 .is_val = true, 117 }, { 118 .mask = HPTE_R_R, 119 .val = HPTE_R_R, 120 .set = "ref", 121 .clear = " ", 122 }, { 123 .mask = HPTE_R_C, 124 .val = HPTE_R_C, 125 .set = "changed", 126 .clear = " ", 127 }, { 128 .mask = HPTE_R_N, 129 .val = HPTE_R_N, 130 .set = "no execute", 131 }, { 132 .mask = HPTE_R_WIMG, 133 .val = HPTE_R_W, 134 .set = "writethru", 135 }, { 136 .mask = HPTE_R_WIMG, 137 .val = HPTE_R_I, 138 .set = "no cache", 139 }, { 140 .mask = HPTE_R_WIMG, 141 .val = HPTE_R_G, 142 .set = "guarded", 143 } 144 }; 145 146 static int calculate_pagesize(struct pg_state *st, int ps, char s[]) 147 { 148 static const char units[] = "BKMGTPE"; 149 const char *unit = units; 150 151 while (ps > 9 && unit[1]) { 152 ps -= 10; 153 unit++; 154 } 155 seq_printf(st->seq, " %s_ps: %i%c\t", s, 1<<ps, *unit); 156 return ps; 157 } 158 159 static void dump_flag_info(struct pg_state *st, const struct flag_info 160 *flag, u64 pte, int num) 161 { 162 unsigned int i; 163 164 for (i = 0; i < num; i++, flag++) { 165 const char *s = NULL; 166 u64 val; 167 168 /* flag not defined so don't check it */ 169 if (flag->mask == 0) 170 continue; 171 /* Some 'flags' are actually values */ 172 if (flag->is_val) { 173 val = pte & flag->val; 174 if (flag->shift) 175 val = val >> flag->shift; 176 seq_printf(st->seq, " %s:%llx", flag->set, val); 177 } else { 178 if ((pte & flag->mask) == flag->val) 179 s = flag->set; 180 else 181 s = flag->clear; 182 if (s) 183 seq_printf(st->seq, " %s", s); 184 } 185 } 186 } 187 188 static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r, 189 unsigned long rpn, int bps, int aps, unsigned long lp) 190 { 191 int aps_index; 192 193 while (ea >= st->marker[1].start_address) { 194 st->marker++; 195 seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); 196 } 197 seq_printf(st->seq, "0x%lx:\t", ea); 198 seq_printf(st->seq, "AVPN:%llx\t", HPTE_V_AVPN_VAL(v)); 199 dump_flag_info(st, v_flag_array, v, ARRAY_SIZE(v_flag_array)); 200 seq_printf(st->seq, " rpn: %lx\t", rpn); 201 dump_flag_info(st, r_flag_array, r, ARRAY_SIZE(r_flag_array)); 202 203 calculate_pagesize(st, bps, "base"); 204 aps_index = calculate_pagesize(st, aps, "actual"); 205 if (aps_index != 2) 206 seq_printf(st->seq, "LP enc: %lx", lp); 207 seq_putc(st->seq, '\n'); 208 } 209 210 211 static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64 212 *r) 213 { 214 struct hash_pte *hptep; 215 unsigned long hash, vsid, vpn, hpte_group, want_v, hpte_v; 216 int i, ssize = mmu_kernel_ssize; 217 unsigned long shift = mmu_psize_defs[psize].shift; 218 219 /* calculate hash */ 220 vsid = get_kernel_vsid(ea, ssize); 221 vpn = hpt_vpn(ea, vsid, ssize); 222 hash = hpt_hash(vpn, shift, ssize); 223 want_v = hpte_encode_avpn(vpn, psize, ssize); 224 225 /* to check in the secondary hash table, we invert the hash */ 226 if (!primary) 227 hash = ~hash; 228 hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; 229 for (i = 0; i < HPTES_PER_GROUP; i++) { 230 hptep = htab_address + hpte_group; 231 hpte_v = be64_to_cpu(hptep->v); 232 233 if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { 234 /* HPTE matches */ 235 *v = be64_to_cpu(hptep->v); 236 *r = be64_to_cpu(hptep->r); 237 return 0; 238 } 239 ++hpte_group; 240 } 241 return -1; 242 } 243 244 #ifdef CONFIG_PPC_PSERIES 245 static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r) 246 { 247 struct hash_pte ptes[4]; 248 unsigned long vsid, vpn, hash, hpte_group, want_v; 249 int i, j, ssize = mmu_kernel_ssize; 250 long lpar_rc = 0; 251 unsigned long shift = mmu_psize_defs[psize].shift; 252 253 /* calculate hash */ 254 vsid = get_kernel_vsid(ea, ssize); 255 vpn = hpt_vpn(ea, vsid, ssize); 256 hash = hpt_hash(vpn, shift, ssize); 257 want_v = hpte_encode_avpn(vpn, psize, ssize); 258 259 /* to check in the secondary hash table, we invert the hash */ 260 if (!primary) 261 hash = ~hash; 262 hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; 263 /* see if we can find an entry in the hpte with this hash */ 264 for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { 265 lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); 266 267 if (lpar_rc != H_SUCCESS) 268 continue; 269 for (j = 0; j < 4; j++) { 270 if (HPTE_V_COMPARE(ptes[j].v, want_v) && 271 (ptes[j].v & HPTE_V_VALID)) { 272 /* HPTE matches */ 273 *v = ptes[j].v; 274 *r = ptes[j].r; 275 return 0; 276 } 277 } 278 } 279 return -1; 280 } 281 #endif 282 283 static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps, 284 unsigned long *lp_bits) 285 { 286 struct mmu_psize_def entry; 287 unsigned long arpn, mask, lp; 288 int penc = -2, idx = 0, shift; 289 290 /*. 291 * The LP field has 8 bits. Depending on the actual page size, some of 292 * these bits are concatenated with the APRN to get the RPN. The rest 293 * of the bits in the LP field is the LP value and is an encoding for 294 * the base page size and the actual page size. 295 * 296 * - find the mmu entry for our base page size 297 * - go through all page encodings and use the associated mask to 298 * find an encoding that matches our encoding in the LP field. 299 */ 300 arpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; 301 lp = arpn & 0xff; 302 303 entry = mmu_psize_defs[bps]; 304 while (idx < MMU_PAGE_COUNT) { 305 penc = entry.penc[idx]; 306 if ((penc != -1) && (mmu_psize_defs[idx].shift)) { 307 shift = mmu_psize_defs[idx].shift - HPTE_R_RPN_SHIFT; 308 mask = (0x1 << (shift)) - 1; 309 if ((lp & mask) == penc) { 310 *aps = mmu_psize_to_shift(idx); 311 *lp_bits = lp & mask; 312 *rpn = arpn >> shift; 313 return; 314 } 315 } 316 idx++; 317 } 318 } 319 320 static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v, 321 u64 *r) 322 { 323 #ifdef CONFIG_PPC_PSERIES 324 if (firmware_has_feature(FW_FEATURE_LPAR)) 325 return pseries_find(ea, psize, primary, v, r); 326 #endif 327 return native_find(ea, psize, primary, v, r); 328 } 329 330 static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize) 331 { 332 unsigned long slot; 333 u64 v = 0, r = 0; 334 unsigned long rpn, lp_bits; 335 int base_psize = 0, actual_psize = 0; 336 337 if (ea < PAGE_OFFSET) 338 return -1; 339 340 /* Look in primary table */ 341 slot = base_hpte_find(ea, psize, true, &v, &r); 342 343 /* Look in secondary table */ 344 if (slot == -1) 345 slot = base_hpte_find(ea, psize, false, &v, &r); 346 347 /* No entry found */ 348 if (slot == -1) 349 return -1; 350 351 /* 352 * We found an entry in the hash page table: 353 * - check that this has the same base page 354 * - find the actual page size 355 * - find the RPN 356 */ 357 base_psize = mmu_psize_to_shift(psize); 358 359 if ((v & HPTE_V_LARGE) == HPTE_V_LARGE) { 360 decode_r(psize, r, &rpn, &actual_psize, &lp_bits); 361 } else { 362 /* 4K actual page size */ 363 actual_psize = 12; 364 rpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT; 365 /* In this case there are no LP bits */ 366 lp_bits = -1; 367 } 368 /* 369 * We didn't find a matching encoding, so the PTE we found isn't for 370 * this address. 371 */ 372 if (actual_psize == -1) 373 return -1; 374 375 dump_hpte_info(st, ea, v, r, rpn, base_psize, actual_psize, lp_bits); 376 return 0; 377 } 378 379 static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) 380 { 381 pte_t *pte = pte_offset_kernel(pmd, 0); 382 unsigned long addr, pteval, psize; 383 int i, status; 384 385 for (i = 0; i < PTRS_PER_PTE; i++, pte++) { 386 addr = start + i * PAGE_SIZE; 387 pteval = pte_val(*pte); 388 389 if (addr < VMALLOC_END) 390 psize = mmu_vmalloc_psize; 391 else 392 psize = mmu_io_psize; 393 #ifdef CONFIG_PPC_64K_PAGES 394 /* check for secret 4K mappings */ 395 if (((pteval & H_PAGE_COMBO) == H_PAGE_COMBO) || 396 ((pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN)) 397 psize = mmu_io_psize; 398 #endif 399 /* check for hashpte */ 400 status = hpte_find(st, addr, psize); 401 402 if (((pteval & H_PAGE_HASHPTE) != H_PAGE_HASHPTE) 403 && (status != -1)) { 404 /* found a hpte that is not in the linux page tables */ 405 seq_printf(st->seq, "page probably bolted before linux" 406 " pagetables were set: addr:%lx, pteval:%lx\n", 407 addr, pteval); 408 } 409 } 410 } 411 412 static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) 413 { 414 pmd_t *pmd = pmd_offset(pud, 0); 415 unsigned long addr; 416 unsigned int i; 417 418 for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { 419 addr = start + i * PMD_SIZE; 420 if (!pmd_none(*pmd)) 421 /* pmd exists */ 422 walk_pte(st, pmd, addr); 423 } 424 } 425 426 static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) 427 { 428 pud_t *pud = pud_offset(pgd, 0); 429 unsigned long addr; 430 unsigned int i; 431 432 for (i = 0; i < PTRS_PER_PUD; i++, pud++) { 433 addr = start + i * PUD_SIZE; 434 if (!pud_none(*pud)) 435 /* pud exists */ 436 walk_pmd(st, pud, addr); 437 } 438 } 439 440 static void walk_pagetables(struct pg_state *st) 441 { 442 pgd_t *pgd = pgd_offset_k(0UL); 443 unsigned int i; 444 unsigned long addr; 445 446 /* 447 * Traverse the linux pagetable structure and dump pages that are in 448 * the hash pagetable. 449 */ 450 for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { 451 addr = KERN_VIRT_START + i * PGDIR_SIZE; 452 if (!pgd_none(*pgd)) 453 /* pgd exists */ 454 walk_pud(st, pgd, addr); 455 } 456 } 457 458 459 static void walk_linearmapping(struct pg_state *st) 460 { 461 unsigned long addr; 462 463 /* 464 * Traverse the linear mapping section of virtual memory and dump pages 465 * that are in the hash pagetable. 466 */ 467 unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift; 468 469 for (addr = PAGE_OFFSET; addr < PAGE_OFFSET + 470 memblock_end_of_DRAM(); addr += psize) 471 hpte_find(st, addr, mmu_linear_psize); 472 } 473 474 static void walk_vmemmap(struct pg_state *st) 475 { 476 #ifdef CONFIG_SPARSEMEM_VMEMMAP 477 struct vmemmap_backing *ptr = vmemmap_list; 478 479 /* 480 * Traverse the vmemmaped memory and dump pages that are in the hash 481 * pagetable. 482 */ 483 while (ptr->list) { 484 hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize); 485 ptr = ptr->list; 486 } 487 seq_puts(st->seq, "---[ vmemmap end ]---\n"); 488 #endif 489 } 490 491 static void populate_markers(void) 492 { 493 address_markers[0].start_address = PAGE_OFFSET; 494 address_markers[1].start_address = VMALLOC_START; 495 address_markers[2].start_address = VMALLOC_END; 496 address_markers[3].start_address = ISA_IO_BASE; 497 address_markers[4].start_address = ISA_IO_END; 498 address_markers[5].start_address = PHB_IO_BASE; 499 address_markers[6].start_address = PHB_IO_END; 500 address_markers[7].start_address = IOREMAP_BASE; 501 address_markers[8].start_address = IOREMAP_END; 502 #ifdef CONFIG_PPC_BOOK3S_64 503 address_markers[9].start_address = H_VMEMMAP_BASE; 504 #else 505 address_markers[9].start_address = VMEMMAP_BASE; 506 #endif 507 } 508 509 static int ptdump_show(struct seq_file *m, void *v) 510 { 511 struct pg_state st = { 512 .seq = m, 513 .start_address = PAGE_OFFSET, 514 .marker = address_markers, 515 }; 516 /* 517 * Traverse the 0xc, 0xd and 0xf areas of the kernel virtual memory and 518 * dump pages that are in the hash pagetable. 519 */ 520 walk_linearmapping(&st); 521 walk_pagetables(&st); 522 walk_vmemmap(&st); 523 return 0; 524 } 525 526 static int ptdump_open(struct inode *inode, struct file *file) 527 { 528 return single_open(file, ptdump_show, NULL); 529 } 530 531 static const struct file_operations ptdump_fops = { 532 .open = ptdump_open, 533 .read = seq_read, 534 .llseek = seq_lseek, 535 .release = single_release, 536 }; 537 538 static int ptdump_init(void) 539 { 540 struct dentry *debugfs_file; 541 542 if (!radix_enabled()) { 543 populate_markers(); 544 debugfs_file = debugfs_create_file("kernel_hash_pagetable", 545 0400, NULL, NULL, &ptdump_fops); 546 return debugfs_file ? 0 : -ENOMEM; 547 } 548 return 0; 549 } 550 device_initcall(ptdump_init); 551