1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 5 */ 6 7 #include <linux/types.h> 8 #include <linux/string.h> 9 #include <linux/kvm.h> 10 #include <linux/kvm_host.h> 11 #include <linux/hugetlb.h> 12 #include <linux/module.h> 13 #include <linux/log2.h> 14 #include <linux/sizes.h> 15 16 #include <asm/trace.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/kvm_book3s.h> 19 #include <asm/book3s/64/mmu-hash.h> 20 #include <asm/hvcall.h> 21 #include <asm/synch.h> 22 #include <asm/ppc-opcode.h> 23 #include <asm/pte-walk.h> 24 25 /* Translate address of a vmalloc'd thing to a linear map address */ 26 static void *real_vmalloc_addr(void *addr) 27 { 28 return __va(ppc_find_vmap_phys((unsigned long)addr)); 29 } 30 31 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ 32 static int global_invalidates(struct kvm *kvm) 33 { 34 int global; 35 int cpu; 36 37 /* 38 * If there is only one vcore, and it's currently running, 39 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set, 40 * we can use tlbiel as long as we mark all other physical 41 * cores as potentially having stale TLB entries for this lpid. 42 * Otherwise, don't use tlbiel. 43 */ 44 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu) 45 global = 0; 46 else 47 global = 1; 48 49 /* LPID has been switched to host if in virt mode so can't do local */ 50 if (!global && (mfmsr() & (MSR_IR|MSR_DR))) 51 global = 1; 52 53 if (!global) { 54 /* any other core might now have stale TLB entries... */ 55 smp_wmb(); 56 cpumask_setall(&kvm->arch.need_tlb_flush); 57 cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; 58 /* 59 * On POWER9, threads are independent but the TLB is shared, 60 * so use the bit for the first thread to represent the core. 61 */ 62 if (cpu_has_feature(CPU_FTR_ARCH_300)) 63 cpu = cpu_first_tlb_thread_sibling(cpu); 64 cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); 65 } 66 67 return global; 68 } 69 70 /* 71 * Add this HPTE into the chain for the real page. 72 * Must be called with the chain locked; it unlocks the chain. 73 */ 74 void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 75 unsigned long *rmap, long pte_index, int realmode) 76 { 77 struct revmap_entry *head, *tail; 78 unsigned long i; 79 80 if (*rmap & KVMPPC_RMAP_PRESENT) { 81 i = *rmap & KVMPPC_RMAP_INDEX; 82 head = &kvm->arch.hpt.rev[i]; 83 if (realmode) 84 head = real_vmalloc_addr(head); 85 tail = &kvm->arch.hpt.rev[head->back]; 86 if (realmode) 87 tail = real_vmalloc_addr(tail); 88 rev->forw = i; 89 rev->back = head->back; 90 tail->forw = pte_index; 91 head->back = pte_index; 92 } else { 93 rev->forw = rev->back = pte_index; 94 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | 95 pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT; 96 } 97 unlock_rmap(rmap); 98 } 99 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 100 101 /* Update the dirty bitmap of a memslot */ 102 void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, 103 unsigned long gfn, unsigned long psize) 104 { 105 unsigned long npages; 106 107 if (!psize || !memslot->dirty_bitmap) 108 return; 109 npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE; 110 gfn -= memslot->base_gfn; 111 set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages); 112 } 113 EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map); 114 115 static void kvmppc_set_dirty_from_hpte(struct kvm *kvm, 116 unsigned long hpte_v, unsigned long hpte_gr) 117 { 118 struct kvm_memory_slot *memslot; 119 unsigned long gfn; 120 unsigned long psize; 121 122 psize = kvmppc_actual_pgsz(hpte_v, hpte_gr); 123 gfn = hpte_rpn(hpte_gr, psize); 124 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 125 if (memslot && memslot->dirty_bitmap) 126 kvmppc_update_dirty_map(memslot, gfn, psize); 127 } 128 129 /* Returns a pointer to the revmap entry for the page mapped by a HPTE */ 130 static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, 131 unsigned long hpte_gr, 132 struct kvm_memory_slot **memslotp, 133 unsigned long *gfnp) 134 { 135 struct kvm_memory_slot *memslot; 136 unsigned long *rmap; 137 unsigned long gfn; 138 139 gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr)); 140 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 141 if (memslotp) 142 *memslotp = memslot; 143 if (gfnp) 144 *gfnp = gfn; 145 if (!memslot) 146 return NULL; 147 148 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); 149 return rmap; 150 } 151 152 /* Remove this HPTE from the chain for a real page */ 153 static void remove_revmap_chain(struct kvm *kvm, long pte_index, 154 struct revmap_entry *rev, 155 unsigned long hpte_v, unsigned long hpte_r) 156 { 157 struct revmap_entry *next, *prev; 158 unsigned long ptel, head; 159 unsigned long *rmap; 160 unsigned long rcbits; 161 struct kvm_memory_slot *memslot; 162 unsigned long gfn; 163 164 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 165 ptel = rev->guest_rpte |= rcbits; 166 rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn); 167 if (!rmap) 168 return; 169 lock_rmap(rmap); 170 171 head = *rmap & KVMPPC_RMAP_INDEX; 172 next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]); 173 prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]); 174 next->back = rev->back; 175 prev->forw = rev->forw; 176 if (head == pte_index) { 177 head = rev->forw; 178 if (head == pte_index) 179 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 180 else 181 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; 182 } 183 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; 184 if (rcbits & HPTE_R_C) 185 kvmppc_update_dirty_map(memslot, gfn, 186 kvmppc_actual_pgsz(hpte_v, hpte_r)); 187 unlock_rmap(rmap); 188 } 189 190 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, 191 long pte_index, unsigned long pteh, unsigned long ptel, 192 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) 193 { 194 unsigned long i, pa, gpa, gfn, psize; 195 unsigned long slot_fn, hva; 196 __be64 *hpte; 197 struct revmap_entry *rev; 198 unsigned long g_ptel; 199 struct kvm_memory_slot *memslot; 200 unsigned hpage_shift; 201 bool is_ci; 202 unsigned long *rmap; 203 pte_t *ptep; 204 unsigned int writing; 205 unsigned long mmu_seq; 206 unsigned long rcbits; 207 208 if (kvm_is_radix(kvm)) 209 return H_FUNCTION; 210 /* 211 * The HPTE gets used by compute_tlbie_rb() to set TLBIE bits, so 212 * these functions should work together -- must ensure a guest can not 213 * cause problems with the TLBIE that KVM executes. 214 */ 215 if ((pteh >> HPTE_V_SSIZE_SHIFT) & 0x2) { 216 /* B=0b1x is a reserved value, disallow it. */ 217 return H_PARAMETER; 218 } 219 psize = kvmppc_actual_pgsz(pteh, ptel); 220 if (!psize) 221 return H_PARAMETER; 222 writing = hpte_is_writable(ptel); 223 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 224 ptel &= ~HPTE_GR_RESERVED; 225 g_ptel = ptel; 226 227 /* used later to detect if we might have been invalidated */ 228 mmu_seq = kvm->mmu_notifier_seq; 229 smp_rmb(); 230 231 /* Find the memslot (if any) for this address */ 232 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 233 gfn = gpa >> PAGE_SHIFT; 234 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 235 pa = 0; 236 is_ci = false; 237 rmap = NULL; 238 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { 239 /* Emulated MMIO - mark this with key=31 */ 240 pteh |= HPTE_V_ABSENT; 241 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; 242 goto do_insert; 243 } 244 245 /* Check if the requested page fits entirely in the memslot. */ 246 if (!slot_is_aligned(memslot, psize)) 247 return H_PARAMETER; 248 slot_fn = gfn - memslot->base_gfn; 249 rmap = &memslot->arch.rmap[slot_fn]; 250 251 /* Translate to host virtual address */ 252 hva = __gfn_to_hva_memslot(memslot, gfn); 253 254 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 255 ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift); 256 if (ptep) { 257 pte_t pte; 258 unsigned int host_pte_size; 259 260 if (hpage_shift) 261 host_pte_size = 1ul << hpage_shift; 262 else 263 host_pte_size = PAGE_SIZE; 264 /* 265 * We should always find the guest page size 266 * to <= host page size, if host is using hugepage 267 */ 268 if (host_pte_size < psize) { 269 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 270 return H_PARAMETER; 271 } 272 pte = kvmppc_read_update_linux_pte(ptep, writing); 273 if (pte_present(pte) && !pte_protnone(pte)) { 274 if (writing && !__pte_write(pte)) 275 /* make the actual HPTE be read-only */ 276 ptel = hpte_make_readonly(ptel); 277 is_ci = pte_ci(pte); 278 pa = pte_pfn(pte) << PAGE_SHIFT; 279 pa |= hva & (host_pte_size - 1); 280 pa |= gpa & ~PAGE_MASK; 281 } 282 } 283 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 284 285 ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); 286 ptel |= pa; 287 288 if (pa) 289 pteh |= HPTE_V_VALID; 290 else { 291 pteh |= HPTE_V_ABSENT; 292 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 293 } 294 295 /*If we had host pte mapping then Check WIMG */ 296 if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { 297 if (is_ci) 298 return H_PARAMETER; 299 /* 300 * Allow guest to map emulated device memory as 301 * uncacheable, but actually make it cacheable. 302 */ 303 ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G); 304 ptel |= HPTE_R_M; 305 } 306 307 /* Find and lock the HPTEG slot to use */ 308 do_insert: 309 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 310 return H_PARAMETER; 311 if (likely((flags & H_EXACT) == 0)) { 312 pte_index &= ~7UL; 313 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 314 for (i = 0; i < 8; ++i) { 315 if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && 316 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 317 HPTE_V_ABSENT)) 318 break; 319 hpte += 2; 320 } 321 if (i == 8) { 322 /* 323 * Since try_lock_hpte doesn't retry (not even stdcx. 324 * failures), it could be that there is a free slot 325 * but we transiently failed to lock it. Try again, 326 * actually locking each slot and checking it. 327 */ 328 hpte -= 16; 329 for (i = 0; i < 8; ++i) { 330 u64 pte; 331 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 332 cpu_relax(); 333 pte = be64_to_cpu(hpte[0]); 334 if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) 335 break; 336 __unlock_hpte(hpte, pte); 337 hpte += 2; 338 } 339 if (i == 8) 340 return H_PTEG_FULL; 341 } 342 pte_index += i; 343 } else { 344 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 345 if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 346 HPTE_V_ABSENT)) { 347 /* Lock the slot and check again */ 348 u64 pte; 349 350 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 351 cpu_relax(); 352 pte = be64_to_cpu(hpte[0]); 353 if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { 354 __unlock_hpte(hpte, pte); 355 return H_PTEG_FULL; 356 } 357 } 358 } 359 360 /* Save away the guest's idea of the second HPTE dword */ 361 rev = &kvm->arch.hpt.rev[pte_index]; 362 if (realmode) 363 rev = real_vmalloc_addr(rev); 364 if (rev) { 365 rev->guest_rpte = g_ptel; 366 note_hpte_modification(kvm, rev); 367 } 368 369 /* Link HPTE into reverse-map chain */ 370 if (pteh & HPTE_V_VALID) { 371 if (realmode) 372 rmap = real_vmalloc_addr(rmap); 373 lock_rmap(rmap); 374 /* Check for pending invalidations under the rmap chain lock */ 375 if (mmu_notifier_retry(kvm, mmu_seq)) { 376 /* inval in progress, write a non-present HPTE */ 377 pteh |= HPTE_V_ABSENT; 378 pteh &= ~HPTE_V_VALID; 379 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 380 unlock_rmap(rmap); 381 } else { 382 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, 383 realmode); 384 /* Only set R/C in real HPTE if already set in *rmap */ 385 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; 386 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); 387 } 388 } 389 390 /* Convert to new format on P9 */ 391 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 392 ptel = hpte_old_to_new_r(pteh, ptel); 393 pteh = hpte_old_to_new_v(pteh); 394 } 395 hpte[1] = cpu_to_be64(ptel); 396 397 /* Write the first HPTE dword, unlocking the HPTE and making it valid */ 398 eieio(); 399 __unlock_hpte(hpte, pteh); 400 asm volatile("ptesync" : : : "memory"); 401 402 *pte_idx_ret = pte_index; 403 return H_SUCCESS; 404 } 405 EXPORT_SYMBOL_GPL(kvmppc_do_h_enter); 406 407 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 408 long pte_index, unsigned long pteh, unsigned long ptel) 409 { 410 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel, 411 vcpu->arch.pgdir, true, 412 &vcpu->arch.regs.gpr[4]); 413 } 414 EXPORT_SYMBOL_GPL(kvmppc_h_enter); 415 416 #ifdef __BIG_ENDIAN__ 417 #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 418 #else 419 #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) 420 #endif 421 422 static inline int is_mmio_hpte(unsigned long v, unsigned long r) 423 { 424 return ((v & HPTE_V_ABSENT) && 425 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 426 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); 427 } 428 429 static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid) 430 { 431 432 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 433 /* Radix flush for a hash guest */ 434 435 unsigned long rb,rs,prs,r,ric; 436 437 rb = PPC_BIT(52); /* IS = 2 */ 438 rs = 0; /* lpid = 0 */ 439 prs = 0; /* partition scoped */ 440 r = 1; /* radix format */ 441 ric = 0; /* RIC_FLSUH_TLB */ 442 443 /* 444 * Need the extra ptesync to make sure we don't 445 * re-order the tlbie 446 */ 447 asm volatile("ptesync": : :"memory"); 448 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 449 : : "r"(rb), "i"(r), "i"(prs), 450 "i"(ric), "r"(rs) : "memory"); 451 } 452 453 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 454 asm volatile("ptesync": : :"memory"); 455 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 456 "r" (rb_value), "r" (lpid)); 457 } 458 } 459 460 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, 461 long npages, int global, bool need_sync) 462 { 463 long i; 464 465 /* 466 * We use the POWER9 5-operand versions of tlbie and tlbiel here. 467 * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores 468 * the RS field, this is backwards-compatible with P7 and P8. 469 */ 470 if (global) { 471 if (need_sync) 472 asm volatile("ptesync" : : : "memory"); 473 for (i = 0; i < npages; ++i) { 474 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 475 "r" (rbvalues[i]), "r" (kvm->arch.lpid)); 476 } 477 478 fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid); 479 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 480 } else { 481 if (need_sync) 482 asm volatile("ptesync" : : : "memory"); 483 for (i = 0; i < npages; ++i) { 484 asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : : 485 "r" (rbvalues[i]), "r" (0)); 486 } 487 asm volatile("ptesync" : : : "memory"); 488 } 489 } 490 491 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 492 unsigned long pte_index, unsigned long avpn, 493 unsigned long *hpret) 494 { 495 __be64 *hpte; 496 unsigned long v, r, rb; 497 struct revmap_entry *rev; 498 u64 pte, orig_pte, pte_r; 499 500 if (kvm_is_radix(kvm)) 501 return H_FUNCTION; 502 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 503 return H_PARAMETER; 504 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 505 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 506 cpu_relax(); 507 pte = orig_pte = be64_to_cpu(hpte[0]); 508 pte_r = be64_to_cpu(hpte[1]); 509 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 510 pte = hpte_new_to_old_v(pte, pte_r); 511 pte_r = hpte_new_to_old_r(pte_r); 512 } 513 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 514 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || 515 ((flags & H_ANDCOND) && (pte & avpn) != 0)) { 516 __unlock_hpte(hpte, orig_pte); 517 return H_NOT_FOUND; 518 } 519 520 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 521 v = pte & ~HPTE_V_HVLOCK; 522 if (v & HPTE_V_VALID) { 523 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); 524 rb = compute_tlbie_rb(v, pte_r, pte_index); 525 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 526 /* 527 * The reference (R) and change (C) bits in a HPT 528 * entry can be set by hardware at any time up until 529 * the HPTE is invalidated and the TLB invalidation 530 * sequence has completed. This means that when 531 * removing a HPTE, we need to re-read the HPTE after 532 * the invalidation sequence has completed in order to 533 * obtain reliable values of R and C. 534 */ 535 remove_revmap_chain(kvm, pte_index, rev, v, 536 be64_to_cpu(hpte[1])); 537 } 538 r = rev->guest_rpte & ~HPTE_GR_RESERVED; 539 note_hpte_modification(kvm, rev); 540 unlock_hpte(hpte, 0); 541 542 if (is_mmio_hpte(v, pte_r)) 543 atomic64_inc(&kvm->arch.mmio_update); 544 545 if (v & HPTE_V_ABSENT) 546 v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; 547 hpret[0] = v; 548 hpret[1] = r; 549 return H_SUCCESS; 550 } 551 EXPORT_SYMBOL_GPL(kvmppc_do_h_remove); 552 553 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, 554 unsigned long pte_index, unsigned long avpn) 555 { 556 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn, 557 &vcpu->arch.regs.gpr[4]); 558 } 559 EXPORT_SYMBOL_GPL(kvmppc_h_remove); 560 561 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) 562 { 563 struct kvm *kvm = vcpu->kvm; 564 unsigned long *args = &vcpu->arch.regs.gpr[4]; 565 __be64 *hp, *hptes[4]; 566 unsigned long tlbrb[4]; 567 long int i, j, k, n, found, indexes[4]; 568 unsigned long flags, req, pte_index, rcbits; 569 int global; 570 long int ret = H_SUCCESS; 571 struct revmap_entry *rev, *revs[4]; 572 u64 hp0, hp1; 573 574 if (kvm_is_radix(kvm)) 575 return H_FUNCTION; 576 global = global_invalidates(kvm); 577 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 578 n = 0; 579 for (; i < 4; ++i) { 580 j = i * 2; 581 pte_index = args[j]; 582 flags = pte_index >> 56; 583 pte_index &= ((1ul << 56) - 1); 584 req = flags >> 6; 585 flags &= 3; 586 if (req == 3) { /* no more requests */ 587 i = 4; 588 break; 589 } 590 if (req != 1 || flags == 3 || 591 pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) { 592 /* parameter error */ 593 args[j] = ((0xa0 | flags) << 56) + pte_index; 594 ret = H_PARAMETER; 595 break; 596 } 597 hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4)); 598 /* to avoid deadlock, don't spin except for first */ 599 if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { 600 if (n) 601 break; 602 while (!try_lock_hpte(hp, HPTE_V_HVLOCK)) 603 cpu_relax(); 604 } 605 found = 0; 606 hp0 = be64_to_cpu(hp[0]); 607 hp1 = be64_to_cpu(hp[1]); 608 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 609 hp0 = hpte_new_to_old_v(hp0, hp1); 610 hp1 = hpte_new_to_old_r(hp1); 611 } 612 if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { 613 switch (flags & 3) { 614 case 0: /* absolute */ 615 found = 1; 616 break; 617 case 1: /* andcond */ 618 if (!(hp0 & args[j + 1])) 619 found = 1; 620 break; 621 case 2: /* AVPN */ 622 if ((hp0 & ~0x7fUL) == args[j + 1]) 623 found = 1; 624 break; 625 } 626 } 627 if (!found) { 628 hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); 629 args[j] = ((0x90 | flags) << 56) + pte_index; 630 continue; 631 } 632 633 args[j] = ((0x80 | flags) << 56) + pte_index; 634 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 635 note_hpte_modification(kvm, rev); 636 637 if (!(hp0 & HPTE_V_VALID)) { 638 /* insert R and C bits from PTE */ 639 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 640 args[j] |= rcbits << (56 - 5); 641 hp[0] = 0; 642 if (is_mmio_hpte(hp0, hp1)) 643 atomic64_inc(&kvm->arch.mmio_update); 644 continue; 645 } 646 647 /* leave it locked */ 648 hp[0] &= ~cpu_to_be64(HPTE_V_VALID); 649 tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index); 650 indexes[n] = j; 651 hptes[n] = hp; 652 revs[n] = rev; 653 ++n; 654 } 655 656 if (!n) 657 break; 658 659 /* Now that we've collected a batch, do the tlbies */ 660 do_tlbies(kvm, tlbrb, n, global, true); 661 662 /* Read PTE low words after tlbie to get final R/C values */ 663 for (k = 0; k < n; ++k) { 664 j = indexes[k]; 665 pte_index = args[j] & ((1ul << 56) - 1); 666 hp = hptes[k]; 667 rev = revs[k]; 668 remove_revmap_chain(kvm, pte_index, rev, 669 be64_to_cpu(hp[0]), be64_to_cpu(hp[1])); 670 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 671 args[j] |= rcbits << (56 - 5); 672 __unlock_hpte(hp, 0); 673 } 674 } 675 676 return ret; 677 } 678 EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove); 679 680 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, 681 unsigned long pte_index, unsigned long avpn) 682 { 683 struct kvm *kvm = vcpu->kvm; 684 __be64 *hpte; 685 struct revmap_entry *rev; 686 unsigned long v, r, rb, mask, bits; 687 u64 pte_v, pte_r; 688 689 if (kvm_is_radix(kvm)) 690 return H_FUNCTION; 691 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 692 return H_PARAMETER; 693 694 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 695 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 696 cpu_relax(); 697 v = pte_v = be64_to_cpu(hpte[0]); 698 if (cpu_has_feature(CPU_FTR_ARCH_300)) 699 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1])); 700 if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 701 ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) { 702 __unlock_hpte(hpte, pte_v); 703 return H_NOT_FOUND; 704 } 705 706 pte_r = be64_to_cpu(hpte[1]); 707 bits = (flags << 55) & HPTE_R_PP0; 708 bits |= (flags << 48) & HPTE_R_KEY_HI; 709 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); 710 711 /* Update guest view of 2nd HPTE dword */ 712 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | 713 HPTE_R_KEY_HI | HPTE_R_KEY_LO; 714 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 715 if (rev) { 716 r = (rev->guest_rpte & ~mask) | bits; 717 rev->guest_rpte = r; 718 note_hpte_modification(kvm, rev); 719 } 720 721 /* Update HPTE */ 722 if (v & HPTE_V_VALID) { 723 /* 724 * If the page is valid, don't let it transition from 725 * readonly to writable. If it should be writable, we'll 726 * take a trap and let the page fault code sort it out. 727 */ 728 r = (pte_r & ~mask) | bits; 729 if (hpte_is_writable(r) && !hpte_is_writable(pte_r)) 730 r = hpte_make_readonly(r); 731 /* If the PTE is changing, invalidate it first */ 732 if (r != pte_r) { 733 rb = compute_tlbie_rb(v, r, pte_index); 734 hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | 735 HPTE_V_ABSENT); 736 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 737 /* Don't lose R/C bit updates done by hardware */ 738 r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); 739 hpte[1] = cpu_to_be64(r); 740 } 741 } 742 unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK); 743 asm volatile("ptesync" : : : "memory"); 744 if (is_mmio_hpte(v, pte_r)) 745 atomic64_inc(&kvm->arch.mmio_update); 746 747 return H_SUCCESS; 748 } 749 EXPORT_SYMBOL_GPL(kvmppc_h_protect); 750 751 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, 752 unsigned long pte_index) 753 { 754 struct kvm *kvm = vcpu->kvm; 755 __be64 *hpte; 756 unsigned long v, r; 757 int i, n = 1; 758 struct revmap_entry *rev = NULL; 759 760 if (kvm_is_radix(kvm)) 761 return H_FUNCTION; 762 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 763 return H_PARAMETER; 764 if (flags & H_READ_4) { 765 pte_index &= ~3; 766 n = 4; 767 } 768 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 769 for (i = 0; i < n; ++i, ++pte_index) { 770 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 771 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 772 r = be64_to_cpu(hpte[1]); 773 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 774 v = hpte_new_to_old_v(v, r); 775 r = hpte_new_to_old_r(r); 776 } 777 if (v & HPTE_V_ABSENT) { 778 v &= ~HPTE_V_ABSENT; 779 v |= HPTE_V_VALID; 780 } 781 if (v & HPTE_V_VALID) { 782 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); 783 r &= ~HPTE_GR_RESERVED; 784 } 785 vcpu->arch.regs.gpr[4 + i * 2] = v; 786 vcpu->arch.regs.gpr[5 + i * 2] = r; 787 } 788 return H_SUCCESS; 789 } 790 EXPORT_SYMBOL_GPL(kvmppc_h_read); 791 792 long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, 793 unsigned long pte_index) 794 { 795 struct kvm *kvm = vcpu->kvm; 796 __be64 *hpte; 797 unsigned long v, r, gr; 798 struct revmap_entry *rev; 799 unsigned long *rmap; 800 long ret = H_NOT_FOUND; 801 802 if (kvm_is_radix(kvm)) 803 return H_FUNCTION; 804 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 805 return H_PARAMETER; 806 807 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 808 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 809 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 810 cpu_relax(); 811 v = be64_to_cpu(hpte[0]); 812 r = be64_to_cpu(hpte[1]); 813 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 814 goto out; 815 816 gr = rev->guest_rpte; 817 if (rev->guest_rpte & HPTE_R_R) { 818 rev->guest_rpte &= ~HPTE_R_R; 819 note_hpte_modification(kvm, rev); 820 } 821 if (v & HPTE_V_VALID) { 822 gr |= r & (HPTE_R_R | HPTE_R_C); 823 if (r & HPTE_R_R) { 824 kvmppc_clear_ref_hpte(kvm, hpte, pte_index); 825 rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL); 826 if (rmap) { 827 lock_rmap(rmap); 828 *rmap |= KVMPPC_RMAP_REFERENCED; 829 unlock_rmap(rmap); 830 } 831 } 832 } 833 vcpu->arch.regs.gpr[4] = gr; 834 ret = H_SUCCESS; 835 out: 836 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 837 return ret; 838 } 839 EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref); 840 841 long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, 842 unsigned long pte_index) 843 { 844 struct kvm *kvm = vcpu->kvm; 845 __be64 *hpte; 846 unsigned long v, r, gr; 847 struct revmap_entry *rev; 848 long ret = H_NOT_FOUND; 849 850 if (kvm_is_radix(kvm)) 851 return H_FUNCTION; 852 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 853 return H_PARAMETER; 854 855 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 856 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 857 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 858 cpu_relax(); 859 v = be64_to_cpu(hpte[0]); 860 r = be64_to_cpu(hpte[1]); 861 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 862 goto out; 863 864 gr = rev->guest_rpte; 865 if (gr & HPTE_R_C) { 866 rev->guest_rpte &= ~HPTE_R_C; 867 note_hpte_modification(kvm, rev); 868 } 869 if (v & HPTE_V_VALID) { 870 /* need to make it temporarily absent so C is stable */ 871 hpte[0] |= cpu_to_be64(HPTE_V_ABSENT); 872 kvmppc_invalidate_hpte(kvm, hpte, pte_index); 873 r = be64_to_cpu(hpte[1]); 874 gr |= r & (HPTE_R_R | HPTE_R_C); 875 if (r & HPTE_R_C) { 876 hpte[1] = cpu_to_be64(r & ~HPTE_R_C); 877 eieio(); 878 kvmppc_set_dirty_from_hpte(kvm, v, gr); 879 } 880 } 881 vcpu->arch.regs.gpr[4] = gr; 882 ret = H_SUCCESS; 883 out: 884 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 885 return ret; 886 } 887 EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod); 888 889 static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, 890 unsigned long gpa, int writing, unsigned long *hpa, 891 struct kvm_memory_slot **memslot_p) 892 { 893 struct kvm *kvm = vcpu->kvm; 894 struct kvm_memory_slot *memslot; 895 unsigned long gfn, hva, pa, psize = PAGE_SHIFT; 896 unsigned int shift; 897 pte_t *ptep, pte; 898 899 /* Find the memslot for this address */ 900 gfn = gpa >> PAGE_SHIFT; 901 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 902 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 903 return H_PARAMETER; 904 905 /* Translate to host virtual address */ 906 hva = __gfn_to_hva_memslot(memslot, gfn); 907 908 /* Try to find the host pte for that virtual address */ 909 ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); 910 if (!ptep) 911 return H_TOO_HARD; 912 pte = kvmppc_read_update_linux_pte(ptep, writing); 913 if (!pte_present(pte)) 914 return H_TOO_HARD; 915 916 /* Convert to a physical address */ 917 if (shift) 918 psize = 1UL << shift; 919 pa = pte_pfn(pte) << PAGE_SHIFT; 920 pa |= hva & (psize - 1); 921 pa |= gpa & ~PAGE_MASK; 922 923 if (hpa) 924 *hpa = pa; 925 if (memslot_p) 926 *memslot_p = memslot; 927 928 return H_SUCCESS; 929 } 930 931 static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, 932 unsigned long dest) 933 { 934 struct kvm_memory_slot *memslot; 935 struct kvm *kvm = vcpu->kvm; 936 unsigned long pa, mmu_seq; 937 long ret = H_SUCCESS; 938 int i; 939 940 /* Used later to detect if we might have been invalidated */ 941 mmu_seq = kvm->mmu_notifier_seq; 942 smp_rmb(); 943 944 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 945 946 ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot); 947 if (ret != H_SUCCESS) 948 goto out_unlock; 949 950 /* Zero the page */ 951 for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) 952 dcbz((void *)pa); 953 kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 954 955 out_unlock: 956 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 957 return ret; 958 } 959 960 static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, 961 unsigned long dest, unsigned long src) 962 { 963 unsigned long dest_pa, src_pa, mmu_seq; 964 struct kvm_memory_slot *dest_memslot; 965 struct kvm *kvm = vcpu->kvm; 966 long ret = H_SUCCESS; 967 968 /* Used later to detect if we might have been invalidated */ 969 mmu_seq = kvm->mmu_notifier_seq; 970 smp_rmb(); 971 972 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 973 ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot); 974 if (ret != H_SUCCESS) 975 goto out_unlock; 976 977 ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL); 978 if (ret != H_SUCCESS) 979 goto out_unlock; 980 981 /* Copy the page */ 982 memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); 983 984 kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 985 986 out_unlock: 987 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 988 return ret; 989 } 990 991 long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, 992 unsigned long dest, unsigned long src) 993 { 994 struct kvm *kvm = vcpu->kvm; 995 u64 pg_mask = SZ_4K - 1; /* 4K page size */ 996 long ret = H_SUCCESS; 997 998 /* Don't handle radix mode here, go up to the virtual mode handler */ 999 if (kvm_is_radix(kvm)) 1000 return H_TOO_HARD; 1001 1002 /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ 1003 if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | 1004 H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) 1005 return H_PARAMETER; 1006 1007 /* dest (and src if copy_page flag set) must be page aligned */ 1008 if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) 1009 return H_PARAMETER; 1010 1011 /* zero and/or copy the page as determined by the flags */ 1012 if (flags & H_COPY_PAGE) 1013 ret = kvmppc_do_h_page_init_copy(vcpu, dest, src); 1014 else if (flags & H_ZERO_PAGE) 1015 ret = kvmppc_do_h_page_init_zero(vcpu, dest); 1016 1017 /* We can ignore the other flags */ 1018 1019 return ret; 1020 } 1021 1022 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 1023 unsigned long pte_index) 1024 { 1025 unsigned long rb; 1026 u64 hp0, hp1; 1027 1028 hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); 1029 hp0 = be64_to_cpu(hptep[0]); 1030 hp1 = be64_to_cpu(hptep[1]); 1031 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1032 hp0 = hpte_new_to_old_v(hp0, hp1); 1033 hp1 = hpte_new_to_old_r(hp1); 1034 } 1035 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1036 do_tlbies(kvm, &rb, 1, 1, true); 1037 } 1038 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); 1039 1040 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, 1041 unsigned long pte_index) 1042 { 1043 unsigned long rb; 1044 unsigned char rbyte; 1045 u64 hp0, hp1; 1046 1047 hp0 = be64_to_cpu(hptep[0]); 1048 hp1 = be64_to_cpu(hptep[1]); 1049 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1050 hp0 = hpte_new_to_old_v(hp0, hp1); 1051 hp1 = hpte_new_to_old_r(hp1); 1052 } 1053 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1054 rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; 1055 /* modify only the second-last byte, which contains the ref bit */ 1056 *((char *)hptep + 14) = rbyte; 1057 do_tlbies(kvm, &rb, 1, 1, false); 1058 } 1059 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); 1060 1061 static int slb_base_page_shift[4] = { 1062 24, /* 16M */ 1063 16, /* 64k */ 1064 34, /* 16G */ 1065 20, /* 1M, unsupported */ 1066 }; 1067 1068 static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu, 1069 unsigned long eaddr, unsigned long slb_v, long mmio_update) 1070 { 1071 struct mmio_hpte_cache_entry *entry = NULL; 1072 unsigned int pshift; 1073 unsigned int i; 1074 1075 for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) { 1076 entry = &vcpu->arch.mmio_cache.entry[i]; 1077 if (entry->mmio_update == mmio_update) { 1078 pshift = entry->slb_base_pshift; 1079 if ((entry->eaddr >> pshift) == (eaddr >> pshift) && 1080 entry->slb_v == slb_v) 1081 return entry; 1082 } 1083 } 1084 return NULL; 1085 } 1086 1087 static struct mmio_hpte_cache_entry * 1088 next_mmio_cache_entry(struct kvm_vcpu *vcpu) 1089 { 1090 unsigned int index = vcpu->arch.mmio_cache.index; 1091 1092 vcpu->arch.mmio_cache.index++; 1093 if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE) 1094 vcpu->arch.mmio_cache.index = 0; 1095 1096 return &vcpu->arch.mmio_cache.entry[index]; 1097 } 1098 1099 /* When called from virtmode, this func should be protected by 1100 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK 1101 * can trigger deadlock issue. 1102 */ 1103 long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, 1104 unsigned long valid) 1105 { 1106 unsigned int i; 1107 unsigned int pshift; 1108 unsigned long somask; 1109 unsigned long vsid, hash; 1110 unsigned long avpn; 1111 __be64 *hpte; 1112 unsigned long mask, val; 1113 unsigned long v, r, orig_v; 1114 1115 /* Get page shift, work out hash and AVPN etc. */ 1116 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; 1117 val = 0; 1118 pshift = 12; 1119 if (slb_v & SLB_VSID_L) { 1120 mask |= HPTE_V_LARGE; 1121 val |= HPTE_V_LARGE; 1122 pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4]; 1123 } 1124 if (slb_v & SLB_VSID_B_1T) { 1125 somask = (1UL << 40) - 1; 1126 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T; 1127 vsid ^= vsid << 25; 1128 } else { 1129 somask = (1UL << 28) - 1; 1130 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; 1131 } 1132 hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt); 1133 avpn = slb_v & ~(somask >> 16); /* also includes B */ 1134 avpn |= (eaddr & somask) >> 16; 1135 1136 if (pshift >= 24) 1137 avpn &= ~((1UL << (pshift - 16)) - 1); 1138 else 1139 avpn &= ~0x7fUL; 1140 val |= avpn; 1141 1142 for (;;) { 1143 hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7)); 1144 1145 for (i = 0; i < 16; i += 2) { 1146 /* Read the PTE racily */ 1147 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1148 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1149 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1])); 1150 1151 /* Check valid/absent, hash, segment size and AVPN */ 1152 if (!(v & valid) || (v & mask) != val) 1153 continue; 1154 1155 /* Lock the PTE and read it under the lock */ 1156 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) 1157 cpu_relax(); 1158 v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1159 r = be64_to_cpu(hpte[i+1]); 1160 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1161 v = hpte_new_to_old_v(v, r); 1162 r = hpte_new_to_old_r(r); 1163 } 1164 1165 /* 1166 * Check the HPTE again, including base page size 1167 */ 1168 if ((v & valid) && (v & mask) == val && 1169 kvmppc_hpte_base_page_shift(v, r) == pshift) 1170 /* Return with the HPTE still locked */ 1171 return (hash << 3) + (i >> 1); 1172 1173 __unlock_hpte(&hpte[i], orig_v); 1174 } 1175 1176 if (val & HPTE_V_SECONDARY) 1177 break; 1178 val |= HPTE_V_SECONDARY; 1179 hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt); 1180 } 1181 return -1; 1182 } 1183 EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); 1184 1185 /* 1186 * Called in real mode to check whether an HPTE not found fault 1187 * is due to accessing a paged-out page or an emulated MMIO page, 1188 * or if a protection fault is due to accessing a page that the 1189 * guest wanted read/write access to but which we made read-only. 1190 * Returns a possibly modified status (DSISR) value if not 1191 * (i.e. pass the interrupt to the guest), 1192 * -1 to pass the fault up to host kernel mode code, -2 to do that 1193 * and also load the instruction word (for MMIO emulation), 1194 * or 0 if we should make the guest retry the access. 1195 */ 1196 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, 1197 unsigned long slb_v, unsigned int status, bool data) 1198 { 1199 struct kvm *kvm = vcpu->kvm; 1200 long int index; 1201 unsigned long v, r, gr, orig_v; 1202 __be64 *hpte; 1203 unsigned long valid; 1204 struct revmap_entry *rev; 1205 unsigned long pp, key; 1206 struct mmio_hpte_cache_entry *cache_entry = NULL; 1207 long mmio_update = 0; 1208 1209 /* For protection fault, expect to find a valid HPTE */ 1210 valid = HPTE_V_VALID; 1211 if (status & DSISR_NOHPTE) { 1212 valid |= HPTE_V_ABSENT; 1213 mmio_update = atomic64_read(&kvm->arch.mmio_update); 1214 cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update); 1215 } 1216 if (cache_entry) { 1217 index = cache_entry->pte_index; 1218 v = cache_entry->hpte_v; 1219 r = cache_entry->hpte_r; 1220 gr = cache_entry->rpte; 1221 } else { 1222 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); 1223 if (index < 0) { 1224 if (status & DSISR_NOHPTE) 1225 return status; /* there really was no HPTE */ 1226 return 0; /* for prot fault, HPTE disappeared */ 1227 } 1228 hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); 1229 v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 1230 r = be64_to_cpu(hpte[1]); 1231 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1232 v = hpte_new_to_old_v(v, r); 1233 r = hpte_new_to_old_r(r); 1234 } 1235 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]); 1236 gr = rev->guest_rpte; 1237 1238 unlock_hpte(hpte, orig_v); 1239 } 1240 1241 /* For not found, if the HPTE is valid by now, retry the instruction */ 1242 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) 1243 return 0; 1244 1245 /* Check access permissions to the page */ 1246 pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 1247 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 1248 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ 1249 if (!data) { 1250 if (gr & (HPTE_R_N | HPTE_R_G)) 1251 return status | SRR1_ISI_N_G_OR_CIP; 1252 if (!hpte_read_permission(pp, slb_v & key)) 1253 return status | SRR1_ISI_PROT; 1254 } else if (status & DSISR_ISSTORE) { 1255 /* check write permission */ 1256 if (!hpte_write_permission(pp, slb_v & key)) 1257 return status | DSISR_PROTFAULT; 1258 } else { 1259 if (!hpte_read_permission(pp, slb_v & key)) 1260 return status | DSISR_PROTFAULT; 1261 } 1262 1263 /* Check storage key, if applicable */ 1264 if (data && (vcpu->arch.shregs.msr & MSR_DR)) { 1265 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); 1266 if (status & DSISR_ISSTORE) 1267 perm >>= 1; 1268 if (perm & 1) 1269 return status | DSISR_KEYFAULT; 1270 } 1271 1272 /* Save HPTE info for virtual-mode handler */ 1273 vcpu->arch.pgfault_addr = addr; 1274 vcpu->arch.pgfault_index = index; 1275 vcpu->arch.pgfault_hpte[0] = v; 1276 vcpu->arch.pgfault_hpte[1] = r; 1277 vcpu->arch.pgfault_cache = cache_entry; 1278 1279 /* Check the storage key to see if it is possibly emulated MMIO */ 1280 if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 1281 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) { 1282 if (!cache_entry) { 1283 unsigned int pshift = 12; 1284 unsigned int pshift_index; 1285 1286 if (slb_v & SLB_VSID_L) { 1287 pshift_index = ((slb_v & SLB_VSID_LP) >> 4); 1288 pshift = slb_base_page_shift[pshift_index]; 1289 } 1290 cache_entry = next_mmio_cache_entry(vcpu); 1291 cache_entry->eaddr = addr; 1292 cache_entry->slb_base_pshift = pshift; 1293 cache_entry->pte_index = index; 1294 cache_entry->hpte_v = v; 1295 cache_entry->hpte_r = r; 1296 cache_entry->rpte = gr; 1297 cache_entry->slb_v = slb_v; 1298 cache_entry->mmio_update = mmio_update; 1299 } 1300 if (data && (vcpu->arch.shregs.msr & MSR_IR)) 1301 return -2; /* MMIO emulation - load instr word */ 1302 } 1303 1304 return -1; /* send fault up to host kernel mode */ 1305 } 1306 EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault); 1307