1 /* 2 * PowerPC Radix MMU mulation helpers for QEMU. 3 * 4 * Copyright (c) 2016 Suraj Jitindar Singh, IBM Corporation 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "exec/exec-all.h" 23 #include "qemu/error-report.h" 24 #include "sysemu/kvm.h" 25 #include "kvm_ppc.h" 26 #include "exec/log.h" 27 #include "internal.h" 28 #include "mmu-radix64.h" 29 #include "mmu-book3s-v3.h" 30 31 static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState *env, 32 vaddr eaddr, 33 uint64_t *lpid, uint64_t *pid) 34 { 35 if (msr_hv) { /* MSR[HV] -> Hypervisor/bare metal */ 36 switch (eaddr & R_EADDR_QUADRANT) { 37 case R_EADDR_QUADRANT0: 38 *lpid = 0; 39 *pid = env->spr[SPR_BOOKS_PID]; 40 break; 41 case R_EADDR_QUADRANT1: 42 *lpid = env->spr[SPR_LPIDR]; 43 *pid = env->spr[SPR_BOOKS_PID]; 44 break; 45 case R_EADDR_QUADRANT2: 46 *lpid = env->spr[SPR_LPIDR]; 47 *pid = 0; 48 break; 49 case R_EADDR_QUADRANT3: 50 *lpid = 0; 51 *pid = 0; 52 break; 53 default: 54 g_assert_not_reached(); 55 } 56 } else { /* !MSR[HV] -> Guest */ 57 switch (eaddr & R_EADDR_QUADRANT) { 58 case R_EADDR_QUADRANT0: /* Guest application */ 59 *lpid = env->spr[SPR_LPIDR]; 60 *pid = env->spr[SPR_BOOKS_PID]; 61 break; 62 case R_EADDR_QUADRANT1: /* Illegal */ 63 case R_EADDR_QUADRANT2: 64 return false; 65 case R_EADDR_QUADRANT3: /* Guest OS */ 66 *lpid = env->spr[SPR_LPIDR]; 67 *pid = 0; /* pid set to 0 -> addresses guest operating system */ 68 break; 69 default: 70 g_assert_not_reached(); 71 } 72 } 73 74 return true; 75 } 76 77 static void ppc_radix64_raise_segi(PowerPCCPU *cpu, MMUAccessType access_type, 78 vaddr eaddr) 79 { 80 CPUState *cs = CPU(cpu); 81 CPUPPCState *env = &cpu->env; 82 83 switch (access_type) { 84 case MMU_INST_FETCH: 85 /* Instruction Segment Interrupt */ 86 cs->exception_index = POWERPC_EXCP_ISEG; 87 break; 88 case MMU_DATA_STORE: 89 case MMU_DATA_LOAD: 90 /* Data Segment Interrupt */ 91 cs->exception_index = POWERPC_EXCP_DSEG; 92 env->spr[SPR_DAR] = eaddr; 93 break; 94 default: 95 g_assert_not_reached(); 96 } 97 env->error_code = 0; 98 } 99 100 static inline const char *access_str(MMUAccessType access_type) 101 { 102 return access_type == MMU_DATA_LOAD ? "reading" : 103 (access_type == MMU_DATA_STORE ? "writing" : "execute"); 104 } 105 106 static void ppc_radix64_raise_si(PowerPCCPU *cpu, MMUAccessType access_type, 107 vaddr eaddr, uint32_t cause) 108 { 109 CPUState *cs = CPU(cpu); 110 CPUPPCState *env = &cpu->env; 111 112 qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx" cause %08x\n", 113 __func__, access_str(access_type), 114 eaddr, cause); 115 116 switch (access_type) { 117 case MMU_INST_FETCH: 118 /* Instruction Storage Interrupt */ 119 cs->exception_index = POWERPC_EXCP_ISI; 120 env->error_code = cause; 121 break; 122 case MMU_DATA_STORE: 123 cause |= DSISR_ISSTORE; 124 /* fall through */ 125 case MMU_DATA_LOAD: 126 /* Data Storage Interrupt */ 127 cs->exception_index = POWERPC_EXCP_DSI; 128 env->spr[SPR_DSISR] = cause; 129 env->spr[SPR_DAR] = eaddr; 130 env->error_code = 0; 131 break; 132 default: 133 g_assert_not_reached(); 134 } 135 } 136 137 static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, MMUAccessType access_type, 138 vaddr eaddr, hwaddr g_raddr, uint32_t cause) 139 { 140 CPUState *cs = CPU(cpu); 141 CPUPPCState *env = &cpu->env; 142 143 qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx" 0x%" 144 HWADDR_PRIx" cause %08x\n", 145 __func__, access_str(access_type), 146 eaddr, g_raddr, cause); 147 148 switch (access_type) { 149 case MMU_INST_FETCH: 150 /* H Instruction Storage Interrupt */ 151 cs->exception_index = POWERPC_EXCP_HISI; 152 env->spr[SPR_ASDR] = g_raddr; 153 env->error_code = cause; 154 break; 155 case MMU_DATA_STORE: 156 cause |= DSISR_ISSTORE; 157 /* fall through */ 158 case MMU_DATA_LOAD: 159 /* H Data Storage Interrupt */ 160 cs->exception_index = POWERPC_EXCP_HDSI; 161 env->spr[SPR_HDSISR] = cause; 162 env->spr[SPR_HDAR] = eaddr; 163 env->spr[SPR_ASDR] = g_raddr; 164 env->error_code = 0; 165 break; 166 default: 167 g_assert_not_reached(); 168 } 169 } 170 171 static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, 172 uint64_t pte, int *fault_cause, int *prot, 173 int mmu_idx, bool partition_scoped) 174 { 175 CPUPPCState *env = &cpu->env; 176 int need_prot; 177 178 /* Check Page Attributes (pte58:59) */ 179 if ((pte & R_PTE_ATT) == R_PTE_ATT_NI_IO && access_type == MMU_INST_FETCH) { 180 /* 181 * Radix PTE entries with the non-idempotent I/O attribute are treated 182 * as guarded storage 183 */ 184 *fault_cause |= SRR1_NOEXEC_GUARD; 185 return true; 186 } 187 188 /* Determine permissions allowed by Encoded Access Authority */ 189 if (!partition_scoped && (pte & R_PTE_EAA_PRIV) && msr_pr) { 190 *prot = 0; 191 } else if (mmuidx_pr(mmu_idx) || (pte & R_PTE_EAA_PRIV) || 192 partition_scoped) { 193 *prot = ppc_radix64_get_prot_eaa(pte); 194 } else { /* !msr_pr && !(pte & R_PTE_EAA_PRIV) && !partition_scoped */ 195 *prot = ppc_radix64_get_prot_eaa(pte); 196 *prot &= ppc_radix64_get_prot_amr(cpu); /* Least combined permissions */ 197 } 198 199 /* Check if requested access type is allowed */ 200 need_prot = prot_for_access_type(access_type); 201 if (need_prot & ~*prot) { /* Page Protected for that Access */ 202 *fault_cause |= DSISR_PROTFAULT; 203 return true; 204 } 205 206 return false; 207 } 208 209 static void ppc_radix64_set_rc(PowerPCCPU *cpu, MMUAccessType access_type, 210 uint64_t pte, hwaddr pte_addr, int *prot) 211 { 212 CPUState *cs = CPU(cpu); 213 uint64_t npte; 214 215 npte = pte | R_PTE_R; /* Always set reference bit */ 216 217 if (access_type == MMU_DATA_STORE) { /* Store/Write */ 218 npte |= R_PTE_C; /* Set change bit */ 219 } else { 220 /* 221 * Treat the page as read-only for now, so that a later write 222 * will pass through this function again to set the C bit. 223 */ 224 *prot &= ~PAGE_WRITE; 225 } 226 227 if (pte ^ npte) { /* If pte has changed then write it back */ 228 stq_phys(cs->as, pte_addr, npte); 229 } 230 } 231 232 static int ppc_radix64_next_level(AddressSpace *as, vaddr eaddr, 233 uint64_t *pte_addr, uint64_t *nls, 234 int *psize, uint64_t *pte, int *fault_cause) 235 { 236 uint64_t index, pde; 237 238 if (*nls < 5) { /* Directory maps less than 2**5 entries */ 239 *fault_cause |= DSISR_R_BADCONFIG; 240 return 1; 241 } 242 243 /* Read page <directory/table> entry from guest address space */ 244 pde = ldq_phys(as, *pte_addr); 245 if (!(pde & R_PTE_VALID)) { /* Invalid Entry */ 246 *fault_cause |= DSISR_NOPTE; 247 return 1; 248 } 249 250 *pte = pde; 251 *psize -= *nls; 252 if (!(pde & R_PTE_LEAF)) { /* Prepare for next iteration */ 253 *nls = pde & R_PDE_NLS; 254 index = eaddr >> (*psize - *nls); /* Shift */ 255 index &= ((1UL << *nls) - 1); /* Mask */ 256 *pte_addr = (pde & R_PDE_NLB) + (index * sizeof(pde)); 257 } 258 return 0; 259 } 260 261 static int ppc_radix64_walk_tree(AddressSpace *as, vaddr eaddr, 262 uint64_t base_addr, uint64_t nls, 263 hwaddr *raddr, int *psize, uint64_t *pte, 264 int *fault_cause, hwaddr *pte_addr) 265 { 266 uint64_t index, pde, rpn , mask; 267 268 if (nls < 5) { /* Directory maps less than 2**5 entries */ 269 *fault_cause |= DSISR_R_BADCONFIG; 270 return 1; 271 } 272 273 index = eaddr >> (*psize - nls); /* Shift */ 274 index &= ((1UL << nls) - 1); /* Mask */ 275 *pte_addr = base_addr + (index * sizeof(pde)); 276 do { 277 int ret; 278 279 ret = ppc_radix64_next_level(as, eaddr, pte_addr, &nls, psize, &pde, 280 fault_cause); 281 if (ret) { 282 return ret; 283 } 284 } while (!(pde & R_PTE_LEAF)); 285 286 *pte = pde; 287 rpn = pde & R_PTE_RPN; 288 mask = (1UL << *psize) - 1; 289 290 /* Or high bits of rpn and low bits to ea to form whole real addr */ 291 *raddr = (rpn & ~mask) | (eaddr & mask); 292 return 0; 293 } 294 295 static bool validate_pate(PowerPCCPU *cpu, uint64_t lpid, ppc_v3_pate_t *pate) 296 { 297 CPUPPCState *env = &cpu->env; 298 299 if (!(pate->dw0 & PATE0_HR)) { 300 return false; 301 } 302 if (lpid == 0 && !msr_hv) { 303 return false; 304 } 305 if ((pate->dw0 & PATE1_R_PRTS) < 5) { 306 return false; 307 } 308 /* More checks ... */ 309 return true; 310 } 311 312 static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, 313 MMUAccessType access_type, 314 vaddr eaddr, hwaddr g_raddr, 315 ppc_v3_pate_t pate, 316 hwaddr *h_raddr, int *h_prot, 317 int *h_page_size, bool pde_addr, 318 int mmu_idx, bool guest_visible) 319 { 320 int fault_cause = 0; 321 hwaddr pte_addr; 322 uint64_t pte; 323 324 qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx 325 " mmu_idx %u (prot %c%c%c) 0x%"HWADDR_PRIx"\n", 326 __func__, access_str(access_type), 327 eaddr, mmu_idx, 328 *h_prot & PAGE_READ ? 'r' : '-', 329 *h_prot & PAGE_WRITE ? 'w' : '-', 330 *h_prot & PAGE_EXEC ? 'x' : '-', 331 g_raddr); 332 333 *h_page_size = PRTBE_R_GET_RTS(pate.dw0); 334 /* No valid pte or access denied due to protection */ 335 if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB, 336 pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size, 337 &pte, &fault_cause, &pte_addr) || 338 ppc_radix64_check_prot(cpu, access_type, pte, 339 &fault_cause, h_prot, mmu_idx, true)) { 340 if (pde_addr) { /* address being translated was that of a guest pde */ 341 fault_cause |= DSISR_PRTABLE_FAULT; 342 } 343 if (guest_visible) { 344 ppc_radix64_raise_hsi(cpu, access_type, eaddr, g_raddr, fault_cause); 345 } 346 return 1; 347 } 348 349 if (guest_visible) { 350 ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, h_prot); 351 } 352 353 return 0; 354 } 355 356 static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, 357 MMUAccessType access_type, 358 vaddr eaddr, uint64_t pid, 359 ppc_v3_pate_t pate, hwaddr *g_raddr, 360 int *g_prot, int *g_page_size, 361 int mmu_idx, bool guest_visible) 362 { 363 CPUState *cs = CPU(cpu); 364 CPUPPCState *env = &cpu->env; 365 uint64_t offset, size, prtbe_addr, prtbe0, base_addr, nls, index, pte; 366 int fault_cause = 0, h_page_size, h_prot; 367 hwaddr h_raddr, pte_addr; 368 int ret; 369 370 qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx 371 " mmu_idx %u pid %"PRIu64"\n", 372 __func__, access_str(access_type), 373 eaddr, mmu_idx, pid); 374 375 /* Index Process Table by PID to Find Corresponding Process Table Entry */ 376 offset = pid * sizeof(struct prtb_entry); 377 size = 1ULL << ((pate.dw1 & PATE1_R_PRTS) + 12); 378 if (offset >= size) { 379 /* offset exceeds size of the process table */ 380 if (guest_visible) { 381 ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_NOPTE); 382 } 383 return 1; 384 } 385 prtbe_addr = (pate.dw1 & PATE1_R_PRTB) + offset; 386 387 if (cpu->vhyp) { 388 prtbe0 = ldq_phys(cs->as, prtbe_addr); 389 } else { 390 /* 391 * Process table addresses are subject to partition-scoped 392 * translation 393 * 394 * On a Radix host, the partition-scoped page table for LPID=0 395 * is only used to translate the effective addresses of the 396 * process table entries. 397 */ 398 ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr, 399 pate, &h_raddr, &h_prot, 400 &h_page_size, true, 401 /* mmu_idx is 5 because we're translating from hypervisor scope */ 402 5, guest_visible); 403 if (ret) { 404 return ret; 405 } 406 prtbe0 = ldq_phys(cs->as, h_raddr); 407 } 408 409 /* Walk Radix Tree from Process Table Entry to Convert EA to RA */ 410 *g_page_size = PRTBE_R_GET_RTS(prtbe0); 411 base_addr = prtbe0 & PRTBE_R_RPDB; 412 nls = prtbe0 & PRTBE_R_RPDS; 413 if (msr_hv || cpu->vhyp) { 414 /* 415 * Can treat process table addresses as real addresses 416 */ 417 ret = ppc_radix64_walk_tree(cs->as, eaddr & R_EADDR_MASK, base_addr, 418 nls, g_raddr, g_page_size, &pte, 419 &fault_cause, &pte_addr); 420 if (ret) { 421 /* No valid PTE */ 422 if (guest_visible) { 423 ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause); 424 } 425 return ret; 426 } 427 } else { 428 uint64_t rpn, mask; 429 430 index = (eaddr & R_EADDR_MASK) >> (*g_page_size - nls); /* Shift */ 431 index &= ((1UL << nls) - 1); /* Mask */ 432 pte_addr = base_addr + (index * sizeof(pte)); 433 434 /* 435 * Each process table address is subject to a partition-scoped 436 * translation 437 */ 438 do { 439 ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr, 440 pate, &h_raddr, &h_prot, 441 &h_page_size, true, 442 /* mmu_idx is 5 because we're translating from hypervisor scope */ 443 5, guest_visible); 444 if (ret) { 445 return ret; 446 } 447 448 ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK, &h_raddr, 449 &nls, g_page_size, &pte, &fault_cause); 450 if (ret) { 451 /* No valid pte */ 452 if (guest_visible) { 453 ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause); 454 } 455 return ret; 456 } 457 pte_addr = h_raddr; 458 } while (!(pte & R_PTE_LEAF)); 459 460 rpn = pte & R_PTE_RPN; 461 mask = (1UL << *g_page_size) - 1; 462 463 /* Or high bits of rpn and low bits to ea to form whole real addr */ 464 *g_raddr = (rpn & ~mask) | (eaddr & mask); 465 } 466 467 if (ppc_radix64_check_prot(cpu, access_type, pte, &fault_cause, 468 g_prot, mmu_idx, false)) { 469 /* Access denied due to protection */ 470 if (guest_visible) { 471 ppc_radix64_raise_si(cpu, access_type, eaddr, fault_cause); 472 } 473 return 1; 474 } 475 476 if (guest_visible) { 477 ppc_radix64_set_rc(cpu, access_type, pte, pte_addr, g_prot); 478 } 479 480 return 0; 481 } 482 483 /* 484 * Radix tree translation is a 2 steps translation process: 485 * 486 * 1. Process-scoped translation: Guest Eff Addr -> Guest Real Addr 487 * 2. Partition-scoped translation: Guest Real Addr -> Host Real Addr 488 * 489 * MSR[HV] 490 * +-------------+----------------+---------------+ 491 * | | HV = 0 | HV = 1 | 492 * +-------------+----------------+---------------+ 493 * | Relocation | Partition | No | 494 * | = Off | Scoped | Translation | 495 * Relocation +-------------+----------------+---------------+ 496 * | Relocation | Partition & | Process | 497 * | = On | Process Scoped | Scoped | 498 * +-------------+----------------+---------------+ 499 */ 500 static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, 501 MMUAccessType access_type, hwaddr *raddr, 502 int *psizep, int *protp, int mmu_idx, 503 bool guest_visible) 504 { 505 CPUPPCState *env = &cpu->env; 506 uint64_t lpid, pid; 507 ppc_v3_pate_t pate; 508 int psize, prot; 509 hwaddr g_raddr; 510 bool relocation; 511 512 assert(!(mmuidx_hv(mmu_idx) && cpu->vhyp)); 513 514 relocation = !mmuidx_real(mmu_idx); 515 516 /* HV or virtual hypervisor Real Mode Access */ 517 if (!relocation && (mmuidx_hv(mmu_idx) || cpu->vhyp)) { 518 /* In real mode top 4 effective addr bits (mostly) ignored */ 519 *raddr = eaddr & 0x0FFFFFFFFFFFFFFFULL; 520 521 /* In HV mode, add HRMOR if top EA bit is clear */ 522 if (mmuidx_hv(mmu_idx) || !env->has_hv_mode) { 523 if (!(eaddr >> 63)) { 524 *raddr |= env->spr[SPR_HRMOR]; 525 } 526 } 527 *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC; 528 *psizep = TARGET_PAGE_BITS; 529 return true; 530 } 531 532 /* 533 * Check UPRT (we avoid the check in real mode to deal with 534 * transitional states during kexec. 535 */ 536 if (guest_visible && !ppc64_use_proc_tbl(cpu)) { 537 qemu_log_mask(LOG_GUEST_ERROR, 538 "LPCR:UPRT not set in radix mode ! LPCR=" 539 TARGET_FMT_lx "\n", env->spr[SPR_LPCR]); 540 } 541 542 /* Virtual Mode Access - get the fully qualified address */ 543 if (!ppc_radix64_get_fully_qualified_addr(&cpu->env, eaddr, &lpid, &pid)) { 544 if (guest_visible) { 545 ppc_radix64_raise_segi(cpu, access_type, eaddr); 546 } 547 return false; 548 } 549 550 /* Get Process Table */ 551 if (cpu->vhyp) { 552 PPCVirtualHypervisorClass *vhc; 553 vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); 554 vhc->get_pate(cpu->vhyp, &pate); 555 } else { 556 if (!ppc64_v3_get_pate(cpu, lpid, &pate)) { 557 if (guest_visible) { 558 ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_NOPTE); 559 } 560 return false; 561 } 562 if (!validate_pate(cpu, lpid, &pate)) { 563 if (guest_visible) { 564 ppc_radix64_raise_si(cpu, access_type, eaddr, DSISR_R_BADCONFIG); 565 } 566 return false; 567 } 568 } 569 570 *psizep = INT_MAX; 571 *protp = PAGE_READ | PAGE_WRITE | PAGE_EXEC; 572 573 /* 574 * Perform process-scoped translation if relocation enabled. 575 * 576 * - Translates an effective address to a host real address in 577 * quadrants 0 and 3 when HV=1. 578 * 579 * - Translates an effective address to a guest real address. 580 */ 581 if (relocation) { 582 int ret = ppc_radix64_process_scoped_xlate(cpu, access_type, eaddr, pid, 583 pate, &g_raddr, &prot, 584 &psize, mmu_idx, guest_visible); 585 if (ret) { 586 return false; 587 } 588 *psizep = MIN(*psizep, psize); 589 *protp &= prot; 590 } else { 591 g_raddr = eaddr & R_EADDR_MASK; 592 } 593 594 if (cpu->vhyp) { 595 *raddr = g_raddr; 596 } else { 597 /* 598 * Perform partition-scoped translation if !HV or HV access to 599 * quadrants 1 or 2. Translates a guest real address to a host 600 * real address. 601 */ 602 if (lpid || !mmuidx_hv(mmu_idx)) { 603 int ret; 604 605 ret = ppc_radix64_partition_scoped_xlate(cpu, access_type, eaddr, 606 g_raddr, pate, raddr, 607 &prot, &psize, false, 608 mmu_idx, guest_visible); 609 if (ret) { 610 return false; 611 } 612 *psizep = MIN(*psizep, psize); 613 *protp &= prot; 614 } else { 615 *raddr = g_raddr; 616 } 617 } 618 619 return true; 620 } 621 622 bool ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, 623 hwaddr *raddrp, int *psizep, int *protp, int mmu_idx, 624 bool guest_visible) 625 { 626 bool ret = ppc_radix64_xlate_impl(cpu, eaddr, access_type, raddrp, 627 psizep, protp, mmu_idx, guest_visible); 628 629 qemu_log_mask(CPU_LOG_MMU, "%s for %s @0x%"VADDR_PRIx 630 " mmu_idx %u (prot %c%c%c) -> 0x%"HWADDR_PRIx"\n", 631 __func__, access_str(access_type), 632 eaddr, mmu_idx, 633 *protp & PAGE_READ ? 'r' : '-', 634 *protp & PAGE_WRITE ? 'w' : '-', 635 *protp & PAGE_EXEC ? 'x' : '-', 636 *raddrp); 637 638 return ret; 639 } 640