1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 5 */ 6 7 #include <linux/types.h> 8 #include <linux/string.h> 9 #include <linux/kvm.h> 10 #include <linux/kvm_host.h> 11 #include <linux/hugetlb.h> 12 #include <linux/module.h> 13 #include <linux/log2.h> 14 #include <linux/sizes.h> 15 16 #include <asm/trace.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/kvm_book3s.h> 19 #include <asm/book3s/64/mmu-hash.h> 20 #include <asm/hvcall.h> 21 #include <asm/synch.h> 22 #include <asm/ppc-opcode.h> 23 #include <asm/pte-walk.h> 24 25 /* Translate address of a vmalloc'd thing to a linear map address */ 26 static void *real_vmalloc_addr(void *addr) 27 { 28 return __va(ppc_find_vmap_phys((unsigned long)addr)); 29 } 30 31 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ 32 static int global_invalidates(struct kvm *kvm) 33 { 34 int global; 35 int cpu; 36 37 /* 38 * If there is only one vcore, and it's currently running, 39 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set, 40 * we can use tlbiel as long as we mark all other physical 41 * cores as potentially having stale TLB entries for this lpid. 42 * Otherwise, don't use tlbiel. 43 */ 44 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu) 45 global = 0; 46 else 47 global = 1; 48 49 /* LPID has been switched to host if in virt mode so can't do local */ 50 if (!global && (mfmsr() & (MSR_IR|MSR_DR))) 51 global = 1; 52 53 if (!global) { 54 /* any other core might now have stale TLB entries... */ 55 smp_wmb(); 56 cpumask_setall(&kvm->arch.need_tlb_flush); 57 cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; 58 /* 59 * On POWER9, threads are independent but the TLB is shared, 60 * so use the bit for the first thread to represent the core. 61 */ 62 if (cpu_has_feature(CPU_FTR_ARCH_300)) 63 cpu = cpu_first_tlb_thread_sibling(cpu); 64 cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); 65 } 66 67 return global; 68 } 69 70 /* 71 * Add this HPTE into the chain for the real page. 72 * Must be called with the chain locked; it unlocks the chain. 73 */ 74 void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 75 unsigned long *rmap, long pte_index, int realmode) 76 { 77 struct revmap_entry *head, *tail; 78 unsigned long i; 79 80 if (*rmap & KVMPPC_RMAP_PRESENT) { 81 i = *rmap & KVMPPC_RMAP_INDEX; 82 head = &kvm->arch.hpt.rev[i]; 83 if (realmode) 84 head = real_vmalloc_addr(head); 85 tail = &kvm->arch.hpt.rev[head->back]; 86 if (realmode) 87 tail = real_vmalloc_addr(tail); 88 rev->forw = i; 89 rev->back = head->back; 90 tail->forw = pte_index; 91 head->back = pte_index; 92 } else { 93 rev->forw = rev->back = pte_index; 94 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | 95 pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT; 96 } 97 unlock_rmap(rmap); 98 } 99 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 100 101 /* Update the dirty bitmap of a memslot */ 102 void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, 103 unsigned long gfn, unsigned long psize) 104 { 105 unsigned long npages; 106 107 if (!psize || !memslot->dirty_bitmap) 108 return; 109 npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE; 110 gfn -= memslot->base_gfn; 111 set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages); 112 } 113 EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map); 114 115 static void kvmppc_set_dirty_from_hpte(struct kvm *kvm, 116 unsigned long hpte_v, unsigned long hpte_gr) 117 { 118 struct kvm_memory_slot *memslot; 119 unsigned long gfn; 120 unsigned long psize; 121 122 psize = kvmppc_actual_pgsz(hpte_v, hpte_gr); 123 gfn = hpte_rpn(hpte_gr, psize); 124 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 125 if (memslot && memslot->dirty_bitmap) 126 kvmppc_update_dirty_map(memslot, gfn, psize); 127 } 128 129 /* Returns a pointer to the revmap entry for the page mapped by a HPTE */ 130 static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, 131 unsigned long hpte_gr, 132 struct kvm_memory_slot **memslotp, 133 unsigned long *gfnp) 134 { 135 struct kvm_memory_slot *memslot; 136 unsigned long *rmap; 137 unsigned long gfn; 138 139 gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr)); 140 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 141 if (memslotp) 142 *memslotp = memslot; 143 if (gfnp) 144 *gfnp = gfn; 145 if (!memslot) 146 return NULL; 147 148 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); 149 return rmap; 150 } 151 152 /* Remove this HPTE from the chain for a real page */ 153 static void remove_revmap_chain(struct kvm *kvm, long pte_index, 154 struct revmap_entry *rev, 155 unsigned long hpte_v, unsigned long hpte_r) 156 { 157 struct revmap_entry *next, *prev; 158 unsigned long ptel, head; 159 unsigned long *rmap; 160 unsigned long rcbits; 161 struct kvm_memory_slot *memslot; 162 unsigned long gfn; 163 164 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 165 ptel = rev->guest_rpte |= rcbits; 166 rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn); 167 if (!rmap) 168 return; 169 lock_rmap(rmap); 170 171 head = *rmap & KVMPPC_RMAP_INDEX; 172 next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]); 173 prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]); 174 next->back = rev->back; 175 prev->forw = rev->forw; 176 if (head == pte_index) { 177 head = rev->forw; 178 if (head == pte_index) 179 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 180 else 181 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; 182 } 183 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; 184 if (rcbits & HPTE_R_C) 185 kvmppc_update_dirty_map(memslot, gfn, 186 kvmppc_actual_pgsz(hpte_v, hpte_r)); 187 unlock_rmap(rmap); 188 } 189 190 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, 191 long pte_index, unsigned long pteh, unsigned long ptel, 192 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) 193 { 194 unsigned long i, pa, gpa, gfn, psize; 195 unsigned long slot_fn, hva; 196 __be64 *hpte; 197 struct revmap_entry *rev; 198 unsigned long g_ptel; 199 struct kvm_memory_slot *memslot; 200 unsigned hpage_shift; 201 bool is_ci; 202 unsigned long *rmap; 203 pte_t *ptep; 204 unsigned int writing; 205 unsigned long mmu_seq; 206 unsigned long rcbits; 207 208 if (kvm_is_radix(kvm)) 209 return H_FUNCTION; 210 psize = kvmppc_actual_pgsz(pteh, ptel); 211 if (!psize) 212 return H_PARAMETER; 213 writing = hpte_is_writable(ptel); 214 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 215 ptel &= ~HPTE_GR_RESERVED; 216 g_ptel = ptel; 217 218 /* used later to detect if we might have been invalidated */ 219 mmu_seq = kvm->mmu_notifier_seq; 220 smp_rmb(); 221 222 /* Find the memslot (if any) for this address */ 223 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 224 gfn = gpa >> PAGE_SHIFT; 225 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 226 pa = 0; 227 is_ci = false; 228 rmap = NULL; 229 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { 230 /* Emulated MMIO - mark this with key=31 */ 231 pteh |= HPTE_V_ABSENT; 232 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; 233 goto do_insert; 234 } 235 236 /* Check if the requested page fits entirely in the memslot. */ 237 if (!slot_is_aligned(memslot, psize)) 238 return H_PARAMETER; 239 slot_fn = gfn - memslot->base_gfn; 240 rmap = &memslot->arch.rmap[slot_fn]; 241 242 /* Translate to host virtual address */ 243 hva = __gfn_to_hva_memslot(memslot, gfn); 244 245 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 246 ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift); 247 if (ptep) { 248 pte_t pte; 249 unsigned int host_pte_size; 250 251 if (hpage_shift) 252 host_pte_size = 1ul << hpage_shift; 253 else 254 host_pte_size = PAGE_SIZE; 255 /* 256 * We should always find the guest page size 257 * to <= host page size, if host is using hugepage 258 */ 259 if (host_pte_size < psize) { 260 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 261 return H_PARAMETER; 262 } 263 pte = kvmppc_read_update_linux_pte(ptep, writing); 264 if (pte_present(pte) && !pte_protnone(pte)) { 265 if (writing && !__pte_write(pte)) 266 /* make the actual HPTE be read-only */ 267 ptel = hpte_make_readonly(ptel); 268 is_ci = pte_ci(pte); 269 pa = pte_pfn(pte) << PAGE_SHIFT; 270 pa |= hva & (host_pte_size - 1); 271 pa |= gpa & ~PAGE_MASK; 272 } 273 } 274 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 275 276 ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); 277 ptel |= pa; 278 279 if (pa) 280 pteh |= HPTE_V_VALID; 281 else { 282 pteh |= HPTE_V_ABSENT; 283 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 284 } 285 286 /*If we had host pte mapping then Check WIMG */ 287 if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { 288 if (is_ci) 289 return H_PARAMETER; 290 /* 291 * Allow guest to map emulated device memory as 292 * uncacheable, but actually make it cacheable. 293 */ 294 ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G); 295 ptel |= HPTE_R_M; 296 } 297 298 /* Find and lock the HPTEG slot to use */ 299 do_insert: 300 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 301 return H_PARAMETER; 302 if (likely((flags & H_EXACT) == 0)) { 303 pte_index &= ~7UL; 304 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 305 for (i = 0; i < 8; ++i) { 306 if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && 307 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 308 HPTE_V_ABSENT)) 309 break; 310 hpte += 2; 311 } 312 if (i == 8) { 313 /* 314 * Since try_lock_hpte doesn't retry (not even stdcx. 315 * failures), it could be that there is a free slot 316 * but we transiently failed to lock it. Try again, 317 * actually locking each slot and checking it. 318 */ 319 hpte -= 16; 320 for (i = 0; i < 8; ++i) { 321 u64 pte; 322 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 323 cpu_relax(); 324 pte = be64_to_cpu(hpte[0]); 325 if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) 326 break; 327 __unlock_hpte(hpte, pte); 328 hpte += 2; 329 } 330 if (i == 8) 331 return H_PTEG_FULL; 332 } 333 pte_index += i; 334 } else { 335 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 336 if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 337 HPTE_V_ABSENT)) { 338 /* Lock the slot and check again */ 339 u64 pte; 340 341 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 342 cpu_relax(); 343 pte = be64_to_cpu(hpte[0]); 344 if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { 345 __unlock_hpte(hpte, pte); 346 return H_PTEG_FULL; 347 } 348 } 349 } 350 351 /* Save away the guest's idea of the second HPTE dword */ 352 rev = &kvm->arch.hpt.rev[pte_index]; 353 if (realmode) 354 rev = real_vmalloc_addr(rev); 355 if (rev) { 356 rev->guest_rpte = g_ptel; 357 note_hpte_modification(kvm, rev); 358 } 359 360 /* Link HPTE into reverse-map chain */ 361 if (pteh & HPTE_V_VALID) { 362 if (realmode) 363 rmap = real_vmalloc_addr(rmap); 364 lock_rmap(rmap); 365 /* Check for pending invalidations under the rmap chain lock */ 366 if (mmu_notifier_retry(kvm, mmu_seq)) { 367 /* inval in progress, write a non-present HPTE */ 368 pteh |= HPTE_V_ABSENT; 369 pteh &= ~HPTE_V_VALID; 370 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 371 unlock_rmap(rmap); 372 } else { 373 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, 374 realmode); 375 /* Only set R/C in real HPTE if already set in *rmap */ 376 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; 377 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); 378 } 379 } 380 381 /* Convert to new format on P9 */ 382 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 383 ptel = hpte_old_to_new_r(pteh, ptel); 384 pteh = hpte_old_to_new_v(pteh); 385 } 386 hpte[1] = cpu_to_be64(ptel); 387 388 /* Write the first HPTE dword, unlocking the HPTE and making it valid */ 389 eieio(); 390 __unlock_hpte(hpte, pteh); 391 asm volatile("ptesync" : : : "memory"); 392 393 *pte_idx_ret = pte_index; 394 return H_SUCCESS; 395 } 396 EXPORT_SYMBOL_GPL(kvmppc_do_h_enter); 397 398 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 399 long pte_index, unsigned long pteh, unsigned long ptel) 400 { 401 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel, 402 vcpu->arch.pgdir, true, 403 &vcpu->arch.regs.gpr[4]); 404 } 405 EXPORT_SYMBOL_GPL(kvmppc_h_enter); 406 407 #ifdef __BIG_ENDIAN__ 408 #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 409 #else 410 #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) 411 #endif 412 413 static inline int is_mmio_hpte(unsigned long v, unsigned long r) 414 { 415 return ((v & HPTE_V_ABSENT) && 416 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 417 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); 418 } 419 420 static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid) 421 { 422 423 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 424 /* Radix flush for a hash guest */ 425 426 unsigned long rb,rs,prs,r,ric; 427 428 rb = PPC_BIT(52); /* IS = 2 */ 429 rs = 0; /* lpid = 0 */ 430 prs = 0; /* partition scoped */ 431 r = 1; /* radix format */ 432 ric = 0; /* RIC_FLSUH_TLB */ 433 434 /* 435 * Need the extra ptesync to make sure we don't 436 * re-order the tlbie 437 */ 438 asm volatile("ptesync": : :"memory"); 439 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 440 : : "r"(rb), "i"(r), "i"(prs), 441 "i"(ric), "r"(rs) : "memory"); 442 } 443 444 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 445 asm volatile("ptesync": : :"memory"); 446 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 447 "r" (rb_value), "r" (lpid)); 448 } 449 } 450 451 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, 452 long npages, int global, bool need_sync) 453 { 454 long i; 455 456 /* 457 * We use the POWER9 5-operand versions of tlbie and tlbiel here. 458 * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores 459 * the RS field, this is backwards-compatible with P7 and P8. 460 */ 461 if (global) { 462 if (need_sync) 463 asm volatile("ptesync" : : : "memory"); 464 for (i = 0; i < npages; ++i) { 465 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 466 "r" (rbvalues[i]), "r" (kvm->arch.lpid)); 467 } 468 469 fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid); 470 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 471 } else { 472 if (need_sync) 473 asm volatile("ptesync" : : : "memory"); 474 for (i = 0; i < npages; ++i) { 475 asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : : 476 "r" (rbvalues[i]), "r" (0)); 477 } 478 asm volatile("ptesync" : : : "memory"); 479 } 480 } 481 482 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 483 unsigned long pte_index, unsigned long avpn, 484 unsigned long *hpret) 485 { 486 __be64 *hpte; 487 unsigned long v, r, rb; 488 struct revmap_entry *rev; 489 u64 pte, orig_pte, pte_r; 490 491 if (kvm_is_radix(kvm)) 492 return H_FUNCTION; 493 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 494 return H_PARAMETER; 495 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 496 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 497 cpu_relax(); 498 pte = orig_pte = be64_to_cpu(hpte[0]); 499 pte_r = be64_to_cpu(hpte[1]); 500 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 501 pte = hpte_new_to_old_v(pte, pte_r); 502 pte_r = hpte_new_to_old_r(pte_r); 503 } 504 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 505 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || 506 ((flags & H_ANDCOND) && (pte & avpn) != 0)) { 507 __unlock_hpte(hpte, orig_pte); 508 return H_NOT_FOUND; 509 } 510 511 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 512 v = pte & ~HPTE_V_HVLOCK; 513 if (v & HPTE_V_VALID) { 514 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); 515 rb = compute_tlbie_rb(v, pte_r, pte_index); 516 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 517 /* 518 * The reference (R) and change (C) bits in a HPT 519 * entry can be set by hardware at any time up until 520 * the HPTE is invalidated and the TLB invalidation 521 * sequence has completed. This means that when 522 * removing a HPTE, we need to re-read the HPTE after 523 * the invalidation sequence has completed in order to 524 * obtain reliable values of R and C. 525 */ 526 remove_revmap_chain(kvm, pte_index, rev, v, 527 be64_to_cpu(hpte[1])); 528 } 529 r = rev->guest_rpte & ~HPTE_GR_RESERVED; 530 note_hpte_modification(kvm, rev); 531 unlock_hpte(hpte, 0); 532 533 if (is_mmio_hpte(v, pte_r)) 534 atomic64_inc(&kvm->arch.mmio_update); 535 536 if (v & HPTE_V_ABSENT) 537 v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; 538 hpret[0] = v; 539 hpret[1] = r; 540 return H_SUCCESS; 541 } 542 EXPORT_SYMBOL_GPL(kvmppc_do_h_remove); 543 544 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, 545 unsigned long pte_index, unsigned long avpn) 546 { 547 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn, 548 &vcpu->arch.regs.gpr[4]); 549 } 550 EXPORT_SYMBOL_GPL(kvmppc_h_remove); 551 552 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) 553 { 554 struct kvm *kvm = vcpu->kvm; 555 unsigned long *args = &vcpu->arch.regs.gpr[4]; 556 __be64 *hp, *hptes[4]; 557 unsigned long tlbrb[4]; 558 long int i, j, k, n, found, indexes[4]; 559 unsigned long flags, req, pte_index, rcbits; 560 int global; 561 long int ret = H_SUCCESS; 562 struct revmap_entry *rev, *revs[4]; 563 u64 hp0, hp1; 564 565 if (kvm_is_radix(kvm)) 566 return H_FUNCTION; 567 global = global_invalidates(kvm); 568 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 569 n = 0; 570 for (; i < 4; ++i) { 571 j = i * 2; 572 pte_index = args[j]; 573 flags = pte_index >> 56; 574 pte_index &= ((1ul << 56) - 1); 575 req = flags >> 6; 576 flags &= 3; 577 if (req == 3) { /* no more requests */ 578 i = 4; 579 break; 580 } 581 if (req != 1 || flags == 3 || 582 pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) { 583 /* parameter error */ 584 args[j] = ((0xa0 | flags) << 56) + pte_index; 585 ret = H_PARAMETER; 586 break; 587 } 588 hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4)); 589 /* to avoid deadlock, don't spin except for first */ 590 if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { 591 if (n) 592 break; 593 while (!try_lock_hpte(hp, HPTE_V_HVLOCK)) 594 cpu_relax(); 595 } 596 found = 0; 597 hp0 = be64_to_cpu(hp[0]); 598 hp1 = be64_to_cpu(hp[1]); 599 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 600 hp0 = hpte_new_to_old_v(hp0, hp1); 601 hp1 = hpte_new_to_old_r(hp1); 602 } 603 if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { 604 switch (flags & 3) { 605 case 0: /* absolute */ 606 found = 1; 607 break; 608 case 1: /* andcond */ 609 if (!(hp0 & args[j + 1])) 610 found = 1; 611 break; 612 case 2: /* AVPN */ 613 if ((hp0 & ~0x7fUL) == args[j + 1]) 614 found = 1; 615 break; 616 } 617 } 618 if (!found) { 619 hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); 620 args[j] = ((0x90 | flags) << 56) + pte_index; 621 continue; 622 } 623 624 args[j] = ((0x80 | flags) << 56) + pte_index; 625 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 626 note_hpte_modification(kvm, rev); 627 628 if (!(hp0 & HPTE_V_VALID)) { 629 /* insert R and C bits from PTE */ 630 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 631 args[j] |= rcbits << (56 - 5); 632 hp[0] = 0; 633 if (is_mmio_hpte(hp0, hp1)) 634 atomic64_inc(&kvm->arch.mmio_update); 635 continue; 636 } 637 638 /* leave it locked */ 639 hp[0] &= ~cpu_to_be64(HPTE_V_VALID); 640 tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index); 641 indexes[n] = j; 642 hptes[n] = hp; 643 revs[n] = rev; 644 ++n; 645 } 646 647 if (!n) 648 break; 649 650 /* Now that we've collected a batch, do the tlbies */ 651 do_tlbies(kvm, tlbrb, n, global, true); 652 653 /* Read PTE low words after tlbie to get final R/C values */ 654 for (k = 0; k < n; ++k) { 655 j = indexes[k]; 656 pte_index = args[j] & ((1ul << 56) - 1); 657 hp = hptes[k]; 658 rev = revs[k]; 659 remove_revmap_chain(kvm, pte_index, rev, 660 be64_to_cpu(hp[0]), be64_to_cpu(hp[1])); 661 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 662 args[j] |= rcbits << (56 - 5); 663 __unlock_hpte(hp, 0); 664 } 665 } 666 667 return ret; 668 } 669 EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove); 670 671 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, 672 unsigned long pte_index, unsigned long avpn) 673 { 674 struct kvm *kvm = vcpu->kvm; 675 __be64 *hpte; 676 struct revmap_entry *rev; 677 unsigned long v, r, rb, mask, bits; 678 u64 pte_v, pte_r; 679 680 if (kvm_is_radix(kvm)) 681 return H_FUNCTION; 682 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 683 return H_PARAMETER; 684 685 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 686 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 687 cpu_relax(); 688 v = pte_v = be64_to_cpu(hpte[0]); 689 if (cpu_has_feature(CPU_FTR_ARCH_300)) 690 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1])); 691 if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 692 ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) { 693 __unlock_hpte(hpte, pte_v); 694 return H_NOT_FOUND; 695 } 696 697 pte_r = be64_to_cpu(hpte[1]); 698 bits = (flags << 55) & HPTE_R_PP0; 699 bits |= (flags << 48) & HPTE_R_KEY_HI; 700 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); 701 702 /* Update guest view of 2nd HPTE dword */ 703 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | 704 HPTE_R_KEY_HI | HPTE_R_KEY_LO; 705 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 706 if (rev) { 707 r = (rev->guest_rpte & ~mask) | bits; 708 rev->guest_rpte = r; 709 note_hpte_modification(kvm, rev); 710 } 711 712 /* Update HPTE */ 713 if (v & HPTE_V_VALID) { 714 /* 715 * If the page is valid, don't let it transition from 716 * readonly to writable. If it should be writable, we'll 717 * take a trap and let the page fault code sort it out. 718 */ 719 r = (pte_r & ~mask) | bits; 720 if (hpte_is_writable(r) && !hpte_is_writable(pte_r)) 721 r = hpte_make_readonly(r); 722 /* If the PTE is changing, invalidate it first */ 723 if (r != pte_r) { 724 rb = compute_tlbie_rb(v, r, pte_index); 725 hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | 726 HPTE_V_ABSENT); 727 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 728 /* Don't lose R/C bit updates done by hardware */ 729 r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); 730 hpte[1] = cpu_to_be64(r); 731 } 732 } 733 unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK); 734 asm volatile("ptesync" : : : "memory"); 735 if (is_mmio_hpte(v, pte_r)) 736 atomic64_inc(&kvm->arch.mmio_update); 737 738 return H_SUCCESS; 739 } 740 EXPORT_SYMBOL_GPL(kvmppc_h_protect); 741 742 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, 743 unsigned long pte_index) 744 { 745 struct kvm *kvm = vcpu->kvm; 746 __be64 *hpte; 747 unsigned long v, r; 748 int i, n = 1; 749 struct revmap_entry *rev = NULL; 750 751 if (kvm_is_radix(kvm)) 752 return H_FUNCTION; 753 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 754 return H_PARAMETER; 755 if (flags & H_READ_4) { 756 pte_index &= ~3; 757 n = 4; 758 } 759 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 760 for (i = 0; i < n; ++i, ++pte_index) { 761 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 762 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 763 r = be64_to_cpu(hpte[1]); 764 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 765 v = hpte_new_to_old_v(v, r); 766 r = hpte_new_to_old_r(r); 767 } 768 if (v & HPTE_V_ABSENT) { 769 v &= ~HPTE_V_ABSENT; 770 v |= HPTE_V_VALID; 771 } 772 if (v & HPTE_V_VALID) { 773 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); 774 r &= ~HPTE_GR_RESERVED; 775 } 776 vcpu->arch.regs.gpr[4 + i * 2] = v; 777 vcpu->arch.regs.gpr[5 + i * 2] = r; 778 } 779 return H_SUCCESS; 780 } 781 EXPORT_SYMBOL_GPL(kvmppc_h_read); 782 783 long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, 784 unsigned long pte_index) 785 { 786 struct kvm *kvm = vcpu->kvm; 787 __be64 *hpte; 788 unsigned long v, r, gr; 789 struct revmap_entry *rev; 790 unsigned long *rmap; 791 long ret = H_NOT_FOUND; 792 793 if (kvm_is_radix(kvm)) 794 return H_FUNCTION; 795 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 796 return H_PARAMETER; 797 798 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 799 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 800 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 801 cpu_relax(); 802 v = be64_to_cpu(hpte[0]); 803 r = be64_to_cpu(hpte[1]); 804 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 805 goto out; 806 807 gr = rev->guest_rpte; 808 if (rev->guest_rpte & HPTE_R_R) { 809 rev->guest_rpte &= ~HPTE_R_R; 810 note_hpte_modification(kvm, rev); 811 } 812 if (v & HPTE_V_VALID) { 813 gr |= r & (HPTE_R_R | HPTE_R_C); 814 if (r & HPTE_R_R) { 815 kvmppc_clear_ref_hpte(kvm, hpte, pte_index); 816 rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL); 817 if (rmap) { 818 lock_rmap(rmap); 819 *rmap |= KVMPPC_RMAP_REFERENCED; 820 unlock_rmap(rmap); 821 } 822 } 823 } 824 vcpu->arch.regs.gpr[4] = gr; 825 ret = H_SUCCESS; 826 out: 827 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 828 return ret; 829 } 830 EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref); 831 832 long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, 833 unsigned long pte_index) 834 { 835 struct kvm *kvm = vcpu->kvm; 836 __be64 *hpte; 837 unsigned long v, r, gr; 838 struct revmap_entry *rev; 839 long ret = H_NOT_FOUND; 840 841 if (kvm_is_radix(kvm)) 842 return H_FUNCTION; 843 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 844 return H_PARAMETER; 845 846 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 847 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 848 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 849 cpu_relax(); 850 v = be64_to_cpu(hpte[0]); 851 r = be64_to_cpu(hpte[1]); 852 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 853 goto out; 854 855 gr = rev->guest_rpte; 856 if (gr & HPTE_R_C) { 857 rev->guest_rpte &= ~HPTE_R_C; 858 note_hpte_modification(kvm, rev); 859 } 860 if (v & HPTE_V_VALID) { 861 /* need to make it temporarily absent so C is stable */ 862 hpte[0] |= cpu_to_be64(HPTE_V_ABSENT); 863 kvmppc_invalidate_hpte(kvm, hpte, pte_index); 864 r = be64_to_cpu(hpte[1]); 865 gr |= r & (HPTE_R_R | HPTE_R_C); 866 if (r & HPTE_R_C) { 867 hpte[1] = cpu_to_be64(r & ~HPTE_R_C); 868 eieio(); 869 kvmppc_set_dirty_from_hpte(kvm, v, gr); 870 } 871 } 872 vcpu->arch.regs.gpr[4] = gr; 873 ret = H_SUCCESS; 874 out: 875 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 876 return ret; 877 } 878 EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod); 879 880 static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, 881 unsigned long gpa, int writing, unsigned long *hpa, 882 struct kvm_memory_slot **memslot_p) 883 { 884 struct kvm *kvm = vcpu->kvm; 885 struct kvm_memory_slot *memslot; 886 unsigned long gfn, hva, pa, psize = PAGE_SHIFT; 887 unsigned int shift; 888 pte_t *ptep, pte; 889 890 /* Find the memslot for this address */ 891 gfn = gpa >> PAGE_SHIFT; 892 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 893 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 894 return H_PARAMETER; 895 896 /* Translate to host virtual address */ 897 hva = __gfn_to_hva_memslot(memslot, gfn); 898 899 /* Try to find the host pte for that virtual address */ 900 ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); 901 if (!ptep) 902 return H_TOO_HARD; 903 pte = kvmppc_read_update_linux_pte(ptep, writing); 904 if (!pte_present(pte)) 905 return H_TOO_HARD; 906 907 /* Convert to a physical address */ 908 if (shift) 909 psize = 1UL << shift; 910 pa = pte_pfn(pte) << PAGE_SHIFT; 911 pa |= hva & (psize - 1); 912 pa |= gpa & ~PAGE_MASK; 913 914 if (hpa) 915 *hpa = pa; 916 if (memslot_p) 917 *memslot_p = memslot; 918 919 return H_SUCCESS; 920 } 921 922 static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, 923 unsigned long dest) 924 { 925 struct kvm_memory_slot *memslot; 926 struct kvm *kvm = vcpu->kvm; 927 unsigned long pa, mmu_seq; 928 long ret = H_SUCCESS; 929 int i; 930 931 /* Used later to detect if we might have been invalidated */ 932 mmu_seq = kvm->mmu_notifier_seq; 933 smp_rmb(); 934 935 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 936 937 ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot); 938 if (ret != H_SUCCESS) 939 goto out_unlock; 940 941 /* Zero the page */ 942 for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) 943 dcbz((void *)pa); 944 kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 945 946 out_unlock: 947 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 948 return ret; 949 } 950 951 static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, 952 unsigned long dest, unsigned long src) 953 { 954 unsigned long dest_pa, src_pa, mmu_seq; 955 struct kvm_memory_slot *dest_memslot; 956 struct kvm *kvm = vcpu->kvm; 957 long ret = H_SUCCESS; 958 959 /* Used later to detect if we might have been invalidated */ 960 mmu_seq = kvm->mmu_notifier_seq; 961 smp_rmb(); 962 963 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 964 ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot); 965 if (ret != H_SUCCESS) 966 goto out_unlock; 967 968 ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL); 969 if (ret != H_SUCCESS) 970 goto out_unlock; 971 972 /* Copy the page */ 973 memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); 974 975 kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 976 977 out_unlock: 978 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 979 return ret; 980 } 981 982 long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, 983 unsigned long dest, unsigned long src) 984 { 985 struct kvm *kvm = vcpu->kvm; 986 u64 pg_mask = SZ_4K - 1; /* 4K page size */ 987 long ret = H_SUCCESS; 988 989 /* Don't handle radix mode here, go up to the virtual mode handler */ 990 if (kvm_is_radix(kvm)) 991 return H_TOO_HARD; 992 993 /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ 994 if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | 995 H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) 996 return H_PARAMETER; 997 998 /* dest (and src if copy_page flag set) must be page aligned */ 999 if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) 1000 return H_PARAMETER; 1001 1002 /* zero and/or copy the page as determined by the flags */ 1003 if (flags & H_COPY_PAGE) 1004 ret = kvmppc_do_h_page_init_copy(vcpu, dest, src); 1005 else if (flags & H_ZERO_PAGE) 1006 ret = kvmppc_do_h_page_init_zero(vcpu, dest); 1007 1008 /* We can ignore the other flags */ 1009 1010 return ret; 1011 } 1012 1013 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 1014 unsigned long pte_index) 1015 { 1016 unsigned long rb; 1017 u64 hp0, hp1; 1018 1019 hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); 1020 hp0 = be64_to_cpu(hptep[0]); 1021 hp1 = be64_to_cpu(hptep[1]); 1022 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1023 hp0 = hpte_new_to_old_v(hp0, hp1); 1024 hp1 = hpte_new_to_old_r(hp1); 1025 } 1026 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1027 do_tlbies(kvm, &rb, 1, 1, true); 1028 } 1029 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); 1030 1031 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, 1032 unsigned long pte_index) 1033 { 1034 unsigned long rb; 1035 unsigned char rbyte; 1036 u64 hp0, hp1; 1037 1038 hp0 = be64_to_cpu(hptep[0]); 1039 hp1 = be64_to_cpu(hptep[1]); 1040 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1041 hp0 = hpte_new_to_old_v(hp0, hp1); 1042 hp1 = hpte_new_to_old_r(hp1); 1043 } 1044 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1045 rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; 1046 /* modify only the second-last byte, which contains the ref bit */ 1047 *((char *)hptep + 14) = rbyte; 1048 do_tlbies(kvm, &rb, 1, 1, false); 1049 } 1050 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); 1051 1052 static int slb_base_page_shift[4] = { 1053 24, /* 16M */ 1054 16, /* 64k */ 1055 34, /* 16G */ 1056 20, /* 1M, unsupported */ 1057 }; 1058 1059 static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu, 1060 unsigned long eaddr, unsigned long slb_v, long mmio_update) 1061 { 1062 struct mmio_hpte_cache_entry *entry = NULL; 1063 unsigned int pshift; 1064 unsigned int i; 1065 1066 for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) { 1067 entry = &vcpu->arch.mmio_cache.entry[i]; 1068 if (entry->mmio_update == mmio_update) { 1069 pshift = entry->slb_base_pshift; 1070 if ((entry->eaddr >> pshift) == (eaddr >> pshift) && 1071 entry->slb_v == slb_v) 1072 return entry; 1073 } 1074 } 1075 return NULL; 1076 } 1077 1078 static struct mmio_hpte_cache_entry * 1079 next_mmio_cache_entry(struct kvm_vcpu *vcpu) 1080 { 1081 unsigned int index = vcpu->arch.mmio_cache.index; 1082 1083 vcpu->arch.mmio_cache.index++; 1084 if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE) 1085 vcpu->arch.mmio_cache.index = 0; 1086 1087 return &vcpu->arch.mmio_cache.entry[index]; 1088 } 1089 1090 /* When called from virtmode, this func should be protected by 1091 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK 1092 * can trigger deadlock issue. 1093 */ 1094 long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, 1095 unsigned long valid) 1096 { 1097 unsigned int i; 1098 unsigned int pshift; 1099 unsigned long somask; 1100 unsigned long vsid, hash; 1101 unsigned long avpn; 1102 __be64 *hpte; 1103 unsigned long mask, val; 1104 unsigned long v, r, orig_v; 1105 1106 /* Get page shift, work out hash and AVPN etc. */ 1107 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; 1108 val = 0; 1109 pshift = 12; 1110 if (slb_v & SLB_VSID_L) { 1111 mask |= HPTE_V_LARGE; 1112 val |= HPTE_V_LARGE; 1113 pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4]; 1114 } 1115 if (slb_v & SLB_VSID_B_1T) { 1116 somask = (1UL << 40) - 1; 1117 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T; 1118 vsid ^= vsid << 25; 1119 } else { 1120 somask = (1UL << 28) - 1; 1121 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; 1122 } 1123 hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt); 1124 avpn = slb_v & ~(somask >> 16); /* also includes B */ 1125 avpn |= (eaddr & somask) >> 16; 1126 1127 if (pshift >= 24) 1128 avpn &= ~((1UL << (pshift - 16)) - 1); 1129 else 1130 avpn &= ~0x7fUL; 1131 val |= avpn; 1132 1133 for (;;) { 1134 hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7)); 1135 1136 for (i = 0; i < 16; i += 2) { 1137 /* Read the PTE racily */ 1138 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1139 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1140 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1])); 1141 1142 /* Check valid/absent, hash, segment size and AVPN */ 1143 if (!(v & valid) || (v & mask) != val) 1144 continue; 1145 1146 /* Lock the PTE and read it under the lock */ 1147 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) 1148 cpu_relax(); 1149 v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1150 r = be64_to_cpu(hpte[i+1]); 1151 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1152 v = hpte_new_to_old_v(v, r); 1153 r = hpte_new_to_old_r(r); 1154 } 1155 1156 /* 1157 * Check the HPTE again, including base page size 1158 */ 1159 if ((v & valid) && (v & mask) == val && 1160 kvmppc_hpte_base_page_shift(v, r) == pshift) 1161 /* Return with the HPTE still locked */ 1162 return (hash << 3) + (i >> 1); 1163 1164 __unlock_hpte(&hpte[i], orig_v); 1165 } 1166 1167 if (val & HPTE_V_SECONDARY) 1168 break; 1169 val |= HPTE_V_SECONDARY; 1170 hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt); 1171 } 1172 return -1; 1173 } 1174 EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); 1175 1176 /* 1177 * Called in real mode to check whether an HPTE not found fault 1178 * is due to accessing a paged-out page or an emulated MMIO page, 1179 * or if a protection fault is due to accessing a page that the 1180 * guest wanted read/write access to but which we made read-only. 1181 * Returns a possibly modified status (DSISR) value if not 1182 * (i.e. pass the interrupt to the guest), 1183 * -1 to pass the fault up to host kernel mode code, -2 to do that 1184 * and also load the instruction word (for MMIO emulation), 1185 * or 0 if we should make the guest retry the access. 1186 */ 1187 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, 1188 unsigned long slb_v, unsigned int status, bool data) 1189 { 1190 struct kvm *kvm = vcpu->kvm; 1191 long int index; 1192 unsigned long v, r, gr, orig_v; 1193 __be64 *hpte; 1194 unsigned long valid; 1195 struct revmap_entry *rev; 1196 unsigned long pp, key; 1197 struct mmio_hpte_cache_entry *cache_entry = NULL; 1198 long mmio_update = 0; 1199 1200 /* For protection fault, expect to find a valid HPTE */ 1201 valid = HPTE_V_VALID; 1202 if (status & DSISR_NOHPTE) { 1203 valid |= HPTE_V_ABSENT; 1204 mmio_update = atomic64_read(&kvm->arch.mmio_update); 1205 cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update); 1206 } 1207 if (cache_entry) { 1208 index = cache_entry->pte_index; 1209 v = cache_entry->hpte_v; 1210 r = cache_entry->hpte_r; 1211 gr = cache_entry->rpte; 1212 } else { 1213 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); 1214 if (index < 0) { 1215 if (status & DSISR_NOHPTE) 1216 return status; /* there really was no HPTE */ 1217 return 0; /* for prot fault, HPTE disappeared */ 1218 } 1219 hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); 1220 v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 1221 r = be64_to_cpu(hpte[1]); 1222 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1223 v = hpte_new_to_old_v(v, r); 1224 r = hpte_new_to_old_r(r); 1225 } 1226 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]); 1227 gr = rev->guest_rpte; 1228 1229 unlock_hpte(hpte, orig_v); 1230 } 1231 1232 /* For not found, if the HPTE is valid by now, retry the instruction */ 1233 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) 1234 return 0; 1235 1236 /* Check access permissions to the page */ 1237 pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 1238 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 1239 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ 1240 if (!data) { 1241 if (gr & (HPTE_R_N | HPTE_R_G)) 1242 return status | SRR1_ISI_N_G_OR_CIP; 1243 if (!hpte_read_permission(pp, slb_v & key)) 1244 return status | SRR1_ISI_PROT; 1245 } else if (status & DSISR_ISSTORE) { 1246 /* check write permission */ 1247 if (!hpte_write_permission(pp, slb_v & key)) 1248 return status | DSISR_PROTFAULT; 1249 } else { 1250 if (!hpte_read_permission(pp, slb_v & key)) 1251 return status | DSISR_PROTFAULT; 1252 } 1253 1254 /* Check storage key, if applicable */ 1255 if (data && (vcpu->arch.shregs.msr & MSR_DR)) { 1256 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); 1257 if (status & DSISR_ISSTORE) 1258 perm >>= 1; 1259 if (perm & 1) 1260 return status | DSISR_KEYFAULT; 1261 } 1262 1263 /* Save HPTE info for virtual-mode handler */ 1264 vcpu->arch.pgfault_addr = addr; 1265 vcpu->arch.pgfault_index = index; 1266 vcpu->arch.pgfault_hpte[0] = v; 1267 vcpu->arch.pgfault_hpte[1] = r; 1268 vcpu->arch.pgfault_cache = cache_entry; 1269 1270 /* Check the storage key to see if it is possibly emulated MMIO */ 1271 if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 1272 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) { 1273 if (!cache_entry) { 1274 unsigned int pshift = 12; 1275 unsigned int pshift_index; 1276 1277 if (slb_v & SLB_VSID_L) { 1278 pshift_index = ((slb_v & SLB_VSID_LP) >> 4); 1279 pshift = slb_base_page_shift[pshift_index]; 1280 } 1281 cache_entry = next_mmio_cache_entry(vcpu); 1282 cache_entry->eaddr = addr; 1283 cache_entry->slb_base_pshift = pshift; 1284 cache_entry->pte_index = index; 1285 cache_entry->hpte_v = v; 1286 cache_entry->hpte_r = r; 1287 cache_entry->rpte = gr; 1288 cache_entry->slb_v = slb_v; 1289 cache_entry->mmio_update = mmio_update; 1290 } 1291 if (data && (vcpu->arch.shregs.msr & MSR_IR)) 1292 return -2; /* MMIO emulation - load instr word */ 1293 } 1294 1295 return -1; /* send fault up to host kernel mode */ 1296 } 1297 EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault); 1298