1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 5 */ 6 7 #include <linux/types.h> 8 #include <linux/string.h> 9 #include <linux/kvm.h> 10 #include <linux/kvm_host.h> 11 #include <linux/hugetlb.h> 12 #include <linux/module.h> 13 #include <linux/log2.h> 14 #include <linux/sizes.h> 15 16 #include <asm/trace.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/kvm_book3s.h> 19 #include <asm/book3s/64/mmu-hash.h> 20 #include <asm/hvcall.h> 21 #include <asm/synch.h> 22 #include <asm/ppc-opcode.h> 23 #include <asm/pte-walk.h> 24 25 /* Translate address of a vmalloc'd thing to a linear map address */ 26 static void *real_vmalloc_addr(void *x) 27 { 28 unsigned long addr = (unsigned long) x; 29 pte_t *p; 30 /* 31 * assume we don't have huge pages in vmalloc space... 32 * So don't worry about THP collapse/split. Called 33 * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. 34 */ 35 p = find_init_mm_pte(addr, NULL); 36 if (!p || !pte_present(*p)) 37 return NULL; 38 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); 39 return __va(addr); 40 } 41 42 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ 43 static int global_invalidates(struct kvm *kvm) 44 { 45 int global; 46 int cpu; 47 48 /* 49 * If there is only one vcore, and it's currently running, 50 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set, 51 * we can use tlbiel as long as we mark all other physical 52 * cores as potentially having stale TLB entries for this lpid. 53 * Otherwise, don't use tlbiel. 54 */ 55 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu) 56 global = 0; 57 else 58 global = 1; 59 60 if (!global) { 61 /* any other core might now have stale TLB entries... */ 62 smp_wmb(); 63 cpumask_setall(&kvm->arch.need_tlb_flush); 64 cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; 65 /* 66 * On POWER9, threads are independent but the TLB is shared, 67 * so use the bit for the first thread to represent the core. 68 */ 69 if (cpu_has_feature(CPU_FTR_ARCH_300)) 70 cpu = cpu_first_thread_sibling(cpu); 71 cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); 72 } 73 74 return global; 75 } 76 77 /* 78 * Add this HPTE into the chain for the real page. 79 * Must be called with the chain locked; it unlocks the chain. 80 */ 81 void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 82 unsigned long *rmap, long pte_index, int realmode) 83 { 84 struct revmap_entry *head, *tail; 85 unsigned long i; 86 87 if (*rmap & KVMPPC_RMAP_PRESENT) { 88 i = *rmap & KVMPPC_RMAP_INDEX; 89 head = &kvm->arch.hpt.rev[i]; 90 if (realmode) 91 head = real_vmalloc_addr(head); 92 tail = &kvm->arch.hpt.rev[head->back]; 93 if (realmode) 94 tail = real_vmalloc_addr(tail); 95 rev->forw = i; 96 rev->back = head->back; 97 tail->forw = pte_index; 98 head->back = pte_index; 99 } else { 100 rev->forw = rev->back = pte_index; 101 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | 102 pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT; 103 } 104 unlock_rmap(rmap); 105 } 106 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 107 108 /* Update the dirty bitmap of a memslot */ 109 void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, 110 unsigned long gfn, unsigned long psize) 111 { 112 unsigned long npages; 113 114 if (!psize || !memslot->dirty_bitmap) 115 return; 116 npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE; 117 gfn -= memslot->base_gfn; 118 set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages); 119 } 120 EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map); 121 122 static void kvmppc_set_dirty_from_hpte(struct kvm *kvm, 123 unsigned long hpte_v, unsigned long hpte_gr) 124 { 125 struct kvm_memory_slot *memslot; 126 unsigned long gfn; 127 unsigned long psize; 128 129 psize = kvmppc_actual_pgsz(hpte_v, hpte_gr); 130 gfn = hpte_rpn(hpte_gr, psize); 131 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 132 if (memslot && memslot->dirty_bitmap) 133 kvmppc_update_dirty_map(memslot, gfn, psize); 134 } 135 136 /* Returns a pointer to the revmap entry for the page mapped by a HPTE */ 137 static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, 138 unsigned long hpte_gr, 139 struct kvm_memory_slot **memslotp, 140 unsigned long *gfnp) 141 { 142 struct kvm_memory_slot *memslot; 143 unsigned long *rmap; 144 unsigned long gfn; 145 146 gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr)); 147 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 148 if (memslotp) 149 *memslotp = memslot; 150 if (gfnp) 151 *gfnp = gfn; 152 if (!memslot) 153 return NULL; 154 155 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); 156 return rmap; 157 } 158 159 /* Remove this HPTE from the chain for a real page */ 160 static void remove_revmap_chain(struct kvm *kvm, long pte_index, 161 struct revmap_entry *rev, 162 unsigned long hpte_v, unsigned long hpte_r) 163 { 164 struct revmap_entry *next, *prev; 165 unsigned long ptel, head; 166 unsigned long *rmap; 167 unsigned long rcbits; 168 struct kvm_memory_slot *memslot; 169 unsigned long gfn; 170 171 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 172 ptel = rev->guest_rpte |= rcbits; 173 rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn); 174 if (!rmap) 175 return; 176 lock_rmap(rmap); 177 178 head = *rmap & KVMPPC_RMAP_INDEX; 179 next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]); 180 prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]); 181 next->back = rev->back; 182 prev->forw = rev->forw; 183 if (head == pte_index) { 184 head = rev->forw; 185 if (head == pte_index) 186 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 187 else 188 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; 189 } 190 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; 191 if (rcbits & HPTE_R_C) 192 kvmppc_update_dirty_map(memslot, gfn, 193 kvmppc_actual_pgsz(hpte_v, hpte_r)); 194 unlock_rmap(rmap); 195 } 196 197 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, 198 long pte_index, unsigned long pteh, unsigned long ptel, 199 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) 200 { 201 unsigned long i, pa, gpa, gfn, psize; 202 unsigned long slot_fn, hva; 203 __be64 *hpte; 204 struct revmap_entry *rev; 205 unsigned long g_ptel; 206 struct kvm_memory_slot *memslot; 207 unsigned hpage_shift; 208 bool is_ci; 209 unsigned long *rmap; 210 pte_t *ptep; 211 unsigned int writing; 212 unsigned long mmu_seq; 213 unsigned long rcbits; 214 215 if (kvm_is_radix(kvm)) 216 return H_FUNCTION; 217 psize = kvmppc_actual_pgsz(pteh, ptel); 218 if (!psize) 219 return H_PARAMETER; 220 writing = hpte_is_writable(ptel); 221 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 222 ptel &= ~HPTE_GR_RESERVED; 223 g_ptel = ptel; 224 225 /* used later to detect if we might have been invalidated */ 226 mmu_seq = kvm->mmu_notifier_seq; 227 smp_rmb(); 228 229 /* Find the memslot (if any) for this address */ 230 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 231 gfn = gpa >> PAGE_SHIFT; 232 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 233 pa = 0; 234 is_ci = false; 235 rmap = NULL; 236 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { 237 /* Emulated MMIO - mark this with key=31 */ 238 pteh |= HPTE_V_ABSENT; 239 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; 240 goto do_insert; 241 } 242 243 /* Check if the requested page fits entirely in the memslot. */ 244 if (!slot_is_aligned(memslot, psize)) 245 return H_PARAMETER; 246 slot_fn = gfn - memslot->base_gfn; 247 rmap = &memslot->arch.rmap[slot_fn]; 248 249 /* Translate to host virtual address */ 250 hva = __gfn_to_hva_memslot(memslot, gfn); 251 252 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 253 ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift); 254 if (ptep) { 255 pte_t pte; 256 unsigned int host_pte_size; 257 258 if (hpage_shift) 259 host_pte_size = 1ul << hpage_shift; 260 else 261 host_pte_size = PAGE_SIZE; 262 /* 263 * We should always find the guest page size 264 * to <= host page size, if host is using hugepage 265 */ 266 if (host_pte_size < psize) { 267 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 268 return H_PARAMETER; 269 } 270 pte = kvmppc_read_update_linux_pte(ptep, writing); 271 if (pte_present(pte) && !pte_protnone(pte)) { 272 if (writing && !__pte_write(pte)) 273 /* make the actual HPTE be read-only */ 274 ptel = hpte_make_readonly(ptel); 275 is_ci = pte_ci(pte); 276 pa = pte_pfn(pte) << PAGE_SHIFT; 277 pa |= hva & (host_pte_size - 1); 278 pa |= gpa & ~PAGE_MASK; 279 } 280 } 281 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 282 283 ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); 284 ptel |= pa; 285 286 if (pa) 287 pteh |= HPTE_V_VALID; 288 else { 289 pteh |= HPTE_V_ABSENT; 290 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 291 } 292 293 /*If we had host pte mapping then Check WIMG */ 294 if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { 295 if (is_ci) 296 return H_PARAMETER; 297 /* 298 * Allow guest to map emulated device memory as 299 * uncacheable, but actually make it cacheable. 300 */ 301 ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G); 302 ptel |= HPTE_R_M; 303 } 304 305 /* Find and lock the HPTEG slot to use */ 306 do_insert: 307 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 308 return H_PARAMETER; 309 if (likely((flags & H_EXACT) == 0)) { 310 pte_index &= ~7UL; 311 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 312 for (i = 0; i < 8; ++i) { 313 if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && 314 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 315 HPTE_V_ABSENT)) 316 break; 317 hpte += 2; 318 } 319 if (i == 8) { 320 /* 321 * Since try_lock_hpte doesn't retry (not even stdcx. 322 * failures), it could be that there is a free slot 323 * but we transiently failed to lock it. Try again, 324 * actually locking each slot and checking it. 325 */ 326 hpte -= 16; 327 for (i = 0; i < 8; ++i) { 328 u64 pte; 329 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 330 cpu_relax(); 331 pte = be64_to_cpu(hpte[0]); 332 if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) 333 break; 334 __unlock_hpte(hpte, pte); 335 hpte += 2; 336 } 337 if (i == 8) 338 return H_PTEG_FULL; 339 } 340 pte_index += i; 341 } else { 342 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 343 if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 344 HPTE_V_ABSENT)) { 345 /* Lock the slot and check again */ 346 u64 pte; 347 348 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 349 cpu_relax(); 350 pte = be64_to_cpu(hpte[0]); 351 if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { 352 __unlock_hpte(hpte, pte); 353 return H_PTEG_FULL; 354 } 355 } 356 } 357 358 /* Save away the guest's idea of the second HPTE dword */ 359 rev = &kvm->arch.hpt.rev[pte_index]; 360 if (realmode) 361 rev = real_vmalloc_addr(rev); 362 if (rev) { 363 rev->guest_rpte = g_ptel; 364 note_hpte_modification(kvm, rev); 365 } 366 367 /* Link HPTE into reverse-map chain */ 368 if (pteh & HPTE_V_VALID) { 369 if (realmode) 370 rmap = real_vmalloc_addr(rmap); 371 lock_rmap(rmap); 372 /* Check for pending invalidations under the rmap chain lock */ 373 if (mmu_notifier_retry(kvm, mmu_seq)) { 374 /* inval in progress, write a non-present HPTE */ 375 pteh |= HPTE_V_ABSENT; 376 pteh &= ~HPTE_V_VALID; 377 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 378 unlock_rmap(rmap); 379 } else { 380 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, 381 realmode); 382 /* Only set R/C in real HPTE if already set in *rmap */ 383 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; 384 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); 385 } 386 } 387 388 /* Convert to new format on P9 */ 389 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 390 ptel = hpte_old_to_new_r(pteh, ptel); 391 pteh = hpte_old_to_new_v(pteh); 392 } 393 hpte[1] = cpu_to_be64(ptel); 394 395 /* Write the first HPTE dword, unlocking the HPTE and making it valid */ 396 eieio(); 397 __unlock_hpte(hpte, pteh); 398 asm volatile("ptesync" : : : "memory"); 399 400 *pte_idx_ret = pte_index; 401 return H_SUCCESS; 402 } 403 EXPORT_SYMBOL_GPL(kvmppc_do_h_enter); 404 405 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 406 long pte_index, unsigned long pteh, unsigned long ptel) 407 { 408 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel, 409 vcpu->arch.pgdir, true, 410 &vcpu->arch.regs.gpr[4]); 411 } 412 413 #ifdef __BIG_ENDIAN__ 414 #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 415 #else 416 #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) 417 #endif 418 419 static inline int is_mmio_hpte(unsigned long v, unsigned long r) 420 { 421 return ((v & HPTE_V_ABSENT) && 422 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 423 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); 424 } 425 426 static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid) 427 { 428 429 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 430 /* Radix flush for a hash guest */ 431 432 unsigned long rb,rs,prs,r,ric; 433 434 rb = PPC_BIT(52); /* IS = 2 */ 435 rs = 0; /* lpid = 0 */ 436 prs = 0; /* partition scoped */ 437 r = 1; /* radix format */ 438 ric = 0; /* RIC_FLSUH_TLB */ 439 440 /* 441 * Need the extra ptesync to make sure we don't 442 * re-order the tlbie 443 */ 444 asm volatile("ptesync": : :"memory"); 445 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 446 : : "r"(rb), "i"(r), "i"(prs), 447 "i"(ric), "r"(rs) : "memory"); 448 } 449 450 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 451 asm volatile("ptesync": : :"memory"); 452 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 453 "r" (rb_value), "r" (lpid)); 454 } 455 } 456 457 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, 458 long npages, int global, bool need_sync) 459 { 460 long i; 461 462 /* 463 * We use the POWER9 5-operand versions of tlbie and tlbiel here. 464 * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores 465 * the RS field, this is backwards-compatible with P7 and P8. 466 */ 467 if (global) { 468 if (need_sync) 469 asm volatile("ptesync" : : : "memory"); 470 for (i = 0; i < npages; ++i) { 471 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 472 "r" (rbvalues[i]), "r" (kvm->arch.lpid)); 473 } 474 475 fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid); 476 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 477 } else { 478 if (need_sync) 479 asm volatile("ptesync" : : : "memory"); 480 for (i = 0; i < npages; ++i) { 481 asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : : 482 "r" (rbvalues[i]), "r" (0)); 483 } 484 asm volatile("ptesync" : : : "memory"); 485 } 486 } 487 488 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 489 unsigned long pte_index, unsigned long avpn, 490 unsigned long *hpret) 491 { 492 __be64 *hpte; 493 unsigned long v, r, rb; 494 struct revmap_entry *rev; 495 u64 pte, orig_pte, pte_r; 496 497 if (kvm_is_radix(kvm)) 498 return H_FUNCTION; 499 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 500 return H_PARAMETER; 501 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 502 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 503 cpu_relax(); 504 pte = orig_pte = be64_to_cpu(hpte[0]); 505 pte_r = be64_to_cpu(hpte[1]); 506 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 507 pte = hpte_new_to_old_v(pte, pte_r); 508 pte_r = hpte_new_to_old_r(pte_r); 509 } 510 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 511 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || 512 ((flags & H_ANDCOND) && (pte & avpn) != 0)) { 513 __unlock_hpte(hpte, orig_pte); 514 return H_NOT_FOUND; 515 } 516 517 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 518 v = pte & ~HPTE_V_HVLOCK; 519 if (v & HPTE_V_VALID) { 520 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); 521 rb = compute_tlbie_rb(v, pte_r, pte_index); 522 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 523 /* 524 * The reference (R) and change (C) bits in a HPT 525 * entry can be set by hardware at any time up until 526 * the HPTE is invalidated and the TLB invalidation 527 * sequence has completed. This means that when 528 * removing a HPTE, we need to re-read the HPTE after 529 * the invalidation sequence has completed in order to 530 * obtain reliable values of R and C. 531 */ 532 remove_revmap_chain(kvm, pte_index, rev, v, 533 be64_to_cpu(hpte[1])); 534 } 535 r = rev->guest_rpte & ~HPTE_GR_RESERVED; 536 note_hpte_modification(kvm, rev); 537 unlock_hpte(hpte, 0); 538 539 if (is_mmio_hpte(v, pte_r)) 540 atomic64_inc(&kvm->arch.mmio_update); 541 542 if (v & HPTE_V_ABSENT) 543 v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; 544 hpret[0] = v; 545 hpret[1] = r; 546 return H_SUCCESS; 547 } 548 EXPORT_SYMBOL_GPL(kvmppc_do_h_remove); 549 550 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, 551 unsigned long pte_index, unsigned long avpn) 552 { 553 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn, 554 &vcpu->arch.regs.gpr[4]); 555 } 556 557 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) 558 { 559 struct kvm *kvm = vcpu->kvm; 560 unsigned long *args = &vcpu->arch.regs.gpr[4]; 561 __be64 *hp, *hptes[4]; 562 unsigned long tlbrb[4]; 563 long int i, j, k, n, found, indexes[4]; 564 unsigned long flags, req, pte_index, rcbits; 565 int global; 566 long int ret = H_SUCCESS; 567 struct revmap_entry *rev, *revs[4]; 568 u64 hp0, hp1; 569 570 if (kvm_is_radix(kvm)) 571 return H_FUNCTION; 572 global = global_invalidates(kvm); 573 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 574 n = 0; 575 for (; i < 4; ++i) { 576 j = i * 2; 577 pte_index = args[j]; 578 flags = pte_index >> 56; 579 pte_index &= ((1ul << 56) - 1); 580 req = flags >> 6; 581 flags &= 3; 582 if (req == 3) { /* no more requests */ 583 i = 4; 584 break; 585 } 586 if (req != 1 || flags == 3 || 587 pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) { 588 /* parameter error */ 589 args[j] = ((0xa0 | flags) << 56) + pte_index; 590 ret = H_PARAMETER; 591 break; 592 } 593 hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4)); 594 /* to avoid deadlock, don't spin except for first */ 595 if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { 596 if (n) 597 break; 598 while (!try_lock_hpte(hp, HPTE_V_HVLOCK)) 599 cpu_relax(); 600 } 601 found = 0; 602 hp0 = be64_to_cpu(hp[0]); 603 hp1 = be64_to_cpu(hp[1]); 604 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 605 hp0 = hpte_new_to_old_v(hp0, hp1); 606 hp1 = hpte_new_to_old_r(hp1); 607 } 608 if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { 609 switch (flags & 3) { 610 case 0: /* absolute */ 611 found = 1; 612 break; 613 case 1: /* andcond */ 614 if (!(hp0 & args[j + 1])) 615 found = 1; 616 break; 617 case 2: /* AVPN */ 618 if ((hp0 & ~0x7fUL) == args[j + 1]) 619 found = 1; 620 break; 621 } 622 } 623 if (!found) { 624 hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); 625 args[j] = ((0x90 | flags) << 56) + pte_index; 626 continue; 627 } 628 629 args[j] = ((0x80 | flags) << 56) + pte_index; 630 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 631 note_hpte_modification(kvm, rev); 632 633 if (!(hp0 & HPTE_V_VALID)) { 634 /* insert R and C bits from PTE */ 635 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 636 args[j] |= rcbits << (56 - 5); 637 hp[0] = 0; 638 if (is_mmio_hpte(hp0, hp1)) 639 atomic64_inc(&kvm->arch.mmio_update); 640 continue; 641 } 642 643 /* leave it locked */ 644 hp[0] &= ~cpu_to_be64(HPTE_V_VALID); 645 tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index); 646 indexes[n] = j; 647 hptes[n] = hp; 648 revs[n] = rev; 649 ++n; 650 } 651 652 if (!n) 653 break; 654 655 /* Now that we've collected a batch, do the tlbies */ 656 do_tlbies(kvm, tlbrb, n, global, true); 657 658 /* Read PTE low words after tlbie to get final R/C values */ 659 for (k = 0; k < n; ++k) { 660 j = indexes[k]; 661 pte_index = args[j] & ((1ul << 56) - 1); 662 hp = hptes[k]; 663 rev = revs[k]; 664 remove_revmap_chain(kvm, pte_index, rev, 665 be64_to_cpu(hp[0]), be64_to_cpu(hp[1])); 666 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 667 args[j] |= rcbits << (56 - 5); 668 __unlock_hpte(hp, 0); 669 } 670 } 671 672 return ret; 673 } 674 675 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, 676 unsigned long pte_index, unsigned long avpn, 677 unsigned long va) 678 { 679 struct kvm *kvm = vcpu->kvm; 680 __be64 *hpte; 681 struct revmap_entry *rev; 682 unsigned long v, r, rb, mask, bits; 683 u64 pte_v, pte_r; 684 685 if (kvm_is_radix(kvm)) 686 return H_FUNCTION; 687 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 688 return H_PARAMETER; 689 690 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 691 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 692 cpu_relax(); 693 v = pte_v = be64_to_cpu(hpte[0]); 694 if (cpu_has_feature(CPU_FTR_ARCH_300)) 695 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1])); 696 if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 697 ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) { 698 __unlock_hpte(hpte, pte_v); 699 return H_NOT_FOUND; 700 } 701 702 pte_r = be64_to_cpu(hpte[1]); 703 bits = (flags << 55) & HPTE_R_PP0; 704 bits |= (flags << 48) & HPTE_R_KEY_HI; 705 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); 706 707 /* Update guest view of 2nd HPTE dword */ 708 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | 709 HPTE_R_KEY_HI | HPTE_R_KEY_LO; 710 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 711 if (rev) { 712 r = (rev->guest_rpte & ~mask) | bits; 713 rev->guest_rpte = r; 714 note_hpte_modification(kvm, rev); 715 } 716 717 /* Update HPTE */ 718 if (v & HPTE_V_VALID) { 719 /* 720 * If the page is valid, don't let it transition from 721 * readonly to writable. If it should be writable, we'll 722 * take a trap and let the page fault code sort it out. 723 */ 724 r = (pte_r & ~mask) | bits; 725 if (hpte_is_writable(r) && !hpte_is_writable(pte_r)) 726 r = hpte_make_readonly(r); 727 /* If the PTE is changing, invalidate it first */ 728 if (r != pte_r) { 729 rb = compute_tlbie_rb(v, r, pte_index); 730 hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | 731 HPTE_V_ABSENT); 732 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 733 /* Don't lose R/C bit updates done by hardware */ 734 r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); 735 hpte[1] = cpu_to_be64(r); 736 } 737 } 738 unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK); 739 asm volatile("ptesync" : : : "memory"); 740 if (is_mmio_hpte(v, pte_r)) 741 atomic64_inc(&kvm->arch.mmio_update); 742 743 return H_SUCCESS; 744 } 745 746 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, 747 unsigned long pte_index) 748 { 749 struct kvm *kvm = vcpu->kvm; 750 __be64 *hpte; 751 unsigned long v, r; 752 int i, n = 1; 753 struct revmap_entry *rev = NULL; 754 755 if (kvm_is_radix(kvm)) 756 return H_FUNCTION; 757 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 758 return H_PARAMETER; 759 if (flags & H_READ_4) { 760 pte_index &= ~3; 761 n = 4; 762 } 763 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 764 for (i = 0; i < n; ++i, ++pte_index) { 765 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 766 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 767 r = be64_to_cpu(hpte[1]); 768 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 769 v = hpte_new_to_old_v(v, r); 770 r = hpte_new_to_old_r(r); 771 } 772 if (v & HPTE_V_ABSENT) { 773 v &= ~HPTE_V_ABSENT; 774 v |= HPTE_V_VALID; 775 } 776 if (v & HPTE_V_VALID) { 777 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); 778 r &= ~HPTE_GR_RESERVED; 779 } 780 vcpu->arch.regs.gpr[4 + i * 2] = v; 781 vcpu->arch.regs.gpr[5 + i * 2] = r; 782 } 783 return H_SUCCESS; 784 } 785 786 long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, 787 unsigned long pte_index) 788 { 789 struct kvm *kvm = vcpu->kvm; 790 __be64 *hpte; 791 unsigned long v, r, gr; 792 struct revmap_entry *rev; 793 unsigned long *rmap; 794 long ret = H_NOT_FOUND; 795 796 if (kvm_is_radix(kvm)) 797 return H_FUNCTION; 798 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 799 return H_PARAMETER; 800 801 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 802 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 803 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 804 cpu_relax(); 805 v = be64_to_cpu(hpte[0]); 806 r = be64_to_cpu(hpte[1]); 807 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 808 goto out; 809 810 gr = rev->guest_rpte; 811 if (rev->guest_rpte & HPTE_R_R) { 812 rev->guest_rpte &= ~HPTE_R_R; 813 note_hpte_modification(kvm, rev); 814 } 815 if (v & HPTE_V_VALID) { 816 gr |= r & (HPTE_R_R | HPTE_R_C); 817 if (r & HPTE_R_R) { 818 kvmppc_clear_ref_hpte(kvm, hpte, pte_index); 819 rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL); 820 if (rmap) { 821 lock_rmap(rmap); 822 *rmap |= KVMPPC_RMAP_REFERENCED; 823 unlock_rmap(rmap); 824 } 825 } 826 } 827 vcpu->arch.regs.gpr[4] = gr; 828 ret = H_SUCCESS; 829 out: 830 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 831 return ret; 832 } 833 834 long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, 835 unsigned long pte_index) 836 { 837 struct kvm *kvm = vcpu->kvm; 838 __be64 *hpte; 839 unsigned long v, r, gr; 840 struct revmap_entry *rev; 841 long ret = H_NOT_FOUND; 842 843 if (kvm_is_radix(kvm)) 844 return H_FUNCTION; 845 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 846 return H_PARAMETER; 847 848 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 849 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 850 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 851 cpu_relax(); 852 v = be64_to_cpu(hpte[0]); 853 r = be64_to_cpu(hpte[1]); 854 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 855 goto out; 856 857 gr = rev->guest_rpte; 858 if (gr & HPTE_R_C) { 859 rev->guest_rpte &= ~HPTE_R_C; 860 note_hpte_modification(kvm, rev); 861 } 862 if (v & HPTE_V_VALID) { 863 /* need to make it temporarily absent so C is stable */ 864 hpte[0] |= cpu_to_be64(HPTE_V_ABSENT); 865 kvmppc_invalidate_hpte(kvm, hpte, pte_index); 866 r = be64_to_cpu(hpte[1]); 867 gr |= r & (HPTE_R_R | HPTE_R_C); 868 if (r & HPTE_R_C) { 869 hpte[1] = cpu_to_be64(r & ~HPTE_R_C); 870 eieio(); 871 kvmppc_set_dirty_from_hpte(kvm, v, gr); 872 } 873 } 874 vcpu->arch.regs.gpr[4] = gr; 875 ret = H_SUCCESS; 876 out: 877 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 878 return ret; 879 } 880 881 static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, 882 unsigned long gpa, int writing, unsigned long *hpa, 883 struct kvm_memory_slot **memslot_p) 884 { 885 struct kvm *kvm = vcpu->kvm; 886 struct kvm_memory_slot *memslot; 887 unsigned long gfn, hva, pa, psize = PAGE_SHIFT; 888 unsigned int shift; 889 pte_t *ptep, pte; 890 891 /* Find the memslot for this address */ 892 gfn = gpa >> PAGE_SHIFT; 893 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 894 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 895 return H_PARAMETER; 896 897 /* Translate to host virtual address */ 898 hva = __gfn_to_hva_memslot(memslot, gfn); 899 900 /* Try to find the host pte for that virtual address */ 901 ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); 902 if (!ptep) 903 return H_TOO_HARD; 904 pte = kvmppc_read_update_linux_pte(ptep, writing); 905 if (!pte_present(pte)) 906 return H_TOO_HARD; 907 908 /* Convert to a physical address */ 909 if (shift) 910 psize = 1UL << shift; 911 pa = pte_pfn(pte) << PAGE_SHIFT; 912 pa |= hva & (psize - 1); 913 pa |= gpa & ~PAGE_MASK; 914 915 if (hpa) 916 *hpa = pa; 917 if (memslot_p) 918 *memslot_p = memslot; 919 920 return H_SUCCESS; 921 } 922 923 static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, 924 unsigned long dest) 925 { 926 struct kvm_memory_slot *memslot; 927 struct kvm *kvm = vcpu->kvm; 928 unsigned long pa, mmu_seq; 929 long ret = H_SUCCESS; 930 int i; 931 932 /* Used later to detect if we might have been invalidated */ 933 mmu_seq = kvm->mmu_notifier_seq; 934 smp_rmb(); 935 936 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 937 938 ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot); 939 if (ret != H_SUCCESS) 940 goto out_unlock; 941 942 /* Zero the page */ 943 for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) 944 dcbz((void *)pa); 945 kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 946 947 out_unlock: 948 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 949 return ret; 950 } 951 952 static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, 953 unsigned long dest, unsigned long src) 954 { 955 unsigned long dest_pa, src_pa, mmu_seq; 956 struct kvm_memory_slot *dest_memslot; 957 struct kvm *kvm = vcpu->kvm; 958 long ret = H_SUCCESS; 959 960 /* Used later to detect if we might have been invalidated */ 961 mmu_seq = kvm->mmu_notifier_seq; 962 smp_rmb(); 963 964 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 965 ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot); 966 if (ret != H_SUCCESS) 967 goto out_unlock; 968 969 ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL); 970 if (ret != H_SUCCESS) 971 goto out_unlock; 972 973 /* Copy the page */ 974 memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); 975 976 kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 977 978 out_unlock: 979 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 980 return ret; 981 } 982 983 long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, 984 unsigned long dest, unsigned long src) 985 { 986 struct kvm *kvm = vcpu->kvm; 987 u64 pg_mask = SZ_4K - 1; /* 4K page size */ 988 long ret = H_SUCCESS; 989 990 /* Don't handle radix mode here, go up to the virtual mode handler */ 991 if (kvm_is_radix(kvm)) 992 return H_TOO_HARD; 993 994 /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ 995 if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | 996 H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) 997 return H_PARAMETER; 998 999 /* dest (and src if copy_page flag set) must be page aligned */ 1000 if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) 1001 return H_PARAMETER; 1002 1003 /* zero and/or copy the page as determined by the flags */ 1004 if (flags & H_COPY_PAGE) 1005 ret = kvmppc_do_h_page_init_copy(vcpu, dest, src); 1006 else if (flags & H_ZERO_PAGE) 1007 ret = kvmppc_do_h_page_init_zero(vcpu, dest); 1008 1009 /* We can ignore the other flags */ 1010 1011 return ret; 1012 } 1013 1014 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 1015 unsigned long pte_index) 1016 { 1017 unsigned long rb; 1018 u64 hp0, hp1; 1019 1020 hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); 1021 hp0 = be64_to_cpu(hptep[0]); 1022 hp1 = be64_to_cpu(hptep[1]); 1023 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1024 hp0 = hpte_new_to_old_v(hp0, hp1); 1025 hp1 = hpte_new_to_old_r(hp1); 1026 } 1027 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1028 do_tlbies(kvm, &rb, 1, 1, true); 1029 } 1030 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); 1031 1032 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, 1033 unsigned long pte_index) 1034 { 1035 unsigned long rb; 1036 unsigned char rbyte; 1037 u64 hp0, hp1; 1038 1039 hp0 = be64_to_cpu(hptep[0]); 1040 hp1 = be64_to_cpu(hptep[1]); 1041 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1042 hp0 = hpte_new_to_old_v(hp0, hp1); 1043 hp1 = hpte_new_to_old_r(hp1); 1044 } 1045 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1046 rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; 1047 /* modify only the second-last byte, which contains the ref bit */ 1048 *((char *)hptep + 14) = rbyte; 1049 do_tlbies(kvm, &rb, 1, 1, false); 1050 } 1051 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); 1052 1053 static int slb_base_page_shift[4] = { 1054 24, /* 16M */ 1055 16, /* 64k */ 1056 34, /* 16G */ 1057 20, /* 1M, unsupported */ 1058 }; 1059 1060 static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu, 1061 unsigned long eaddr, unsigned long slb_v, long mmio_update) 1062 { 1063 struct mmio_hpte_cache_entry *entry = NULL; 1064 unsigned int pshift; 1065 unsigned int i; 1066 1067 for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) { 1068 entry = &vcpu->arch.mmio_cache.entry[i]; 1069 if (entry->mmio_update == mmio_update) { 1070 pshift = entry->slb_base_pshift; 1071 if ((entry->eaddr >> pshift) == (eaddr >> pshift) && 1072 entry->slb_v == slb_v) 1073 return entry; 1074 } 1075 } 1076 return NULL; 1077 } 1078 1079 static struct mmio_hpte_cache_entry * 1080 next_mmio_cache_entry(struct kvm_vcpu *vcpu) 1081 { 1082 unsigned int index = vcpu->arch.mmio_cache.index; 1083 1084 vcpu->arch.mmio_cache.index++; 1085 if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE) 1086 vcpu->arch.mmio_cache.index = 0; 1087 1088 return &vcpu->arch.mmio_cache.entry[index]; 1089 } 1090 1091 /* When called from virtmode, this func should be protected by 1092 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK 1093 * can trigger deadlock issue. 1094 */ 1095 long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, 1096 unsigned long valid) 1097 { 1098 unsigned int i; 1099 unsigned int pshift; 1100 unsigned long somask; 1101 unsigned long vsid, hash; 1102 unsigned long avpn; 1103 __be64 *hpte; 1104 unsigned long mask, val; 1105 unsigned long v, r, orig_v; 1106 1107 /* Get page shift, work out hash and AVPN etc. */ 1108 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; 1109 val = 0; 1110 pshift = 12; 1111 if (slb_v & SLB_VSID_L) { 1112 mask |= HPTE_V_LARGE; 1113 val |= HPTE_V_LARGE; 1114 pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4]; 1115 } 1116 if (slb_v & SLB_VSID_B_1T) { 1117 somask = (1UL << 40) - 1; 1118 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T; 1119 vsid ^= vsid << 25; 1120 } else { 1121 somask = (1UL << 28) - 1; 1122 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; 1123 } 1124 hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt); 1125 avpn = slb_v & ~(somask >> 16); /* also includes B */ 1126 avpn |= (eaddr & somask) >> 16; 1127 1128 if (pshift >= 24) 1129 avpn &= ~((1UL << (pshift - 16)) - 1); 1130 else 1131 avpn &= ~0x7fUL; 1132 val |= avpn; 1133 1134 for (;;) { 1135 hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7)); 1136 1137 for (i = 0; i < 16; i += 2) { 1138 /* Read the PTE racily */ 1139 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1140 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1141 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1])); 1142 1143 /* Check valid/absent, hash, segment size and AVPN */ 1144 if (!(v & valid) || (v & mask) != val) 1145 continue; 1146 1147 /* Lock the PTE and read it under the lock */ 1148 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) 1149 cpu_relax(); 1150 v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1151 r = be64_to_cpu(hpte[i+1]); 1152 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1153 v = hpte_new_to_old_v(v, r); 1154 r = hpte_new_to_old_r(r); 1155 } 1156 1157 /* 1158 * Check the HPTE again, including base page size 1159 */ 1160 if ((v & valid) && (v & mask) == val && 1161 kvmppc_hpte_base_page_shift(v, r) == pshift) 1162 /* Return with the HPTE still locked */ 1163 return (hash << 3) + (i >> 1); 1164 1165 __unlock_hpte(&hpte[i], orig_v); 1166 } 1167 1168 if (val & HPTE_V_SECONDARY) 1169 break; 1170 val |= HPTE_V_SECONDARY; 1171 hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt); 1172 } 1173 return -1; 1174 } 1175 EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); 1176 1177 /* 1178 * Called in real mode to check whether an HPTE not found fault 1179 * is due to accessing a paged-out page or an emulated MMIO page, 1180 * or if a protection fault is due to accessing a page that the 1181 * guest wanted read/write access to but which we made read-only. 1182 * Returns a possibly modified status (DSISR) value if not 1183 * (i.e. pass the interrupt to the guest), 1184 * -1 to pass the fault up to host kernel mode code, -2 to do that 1185 * and also load the instruction word (for MMIO emulation), 1186 * or 0 if we should make the guest retry the access. 1187 */ 1188 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, 1189 unsigned long slb_v, unsigned int status, bool data) 1190 { 1191 struct kvm *kvm = vcpu->kvm; 1192 long int index; 1193 unsigned long v, r, gr, orig_v; 1194 __be64 *hpte; 1195 unsigned long valid; 1196 struct revmap_entry *rev; 1197 unsigned long pp, key; 1198 struct mmio_hpte_cache_entry *cache_entry = NULL; 1199 long mmio_update = 0; 1200 1201 /* For protection fault, expect to find a valid HPTE */ 1202 valid = HPTE_V_VALID; 1203 if (status & DSISR_NOHPTE) { 1204 valid |= HPTE_V_ABSENT; 1205 mmio_update = atomic64_read(&kvm->arch.mmio_update); 1206 cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update); 1207 } 1208 if (cache_entry) { 1209 index = cache_entry->pte_index; 1210 v = cache_entry->hpte_v; 1211 r = cache_entry->hpte_r; 1212 gr = cache_entry->rpte; 1213 } else { 1214 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); 1215 if (index < 0) { 1216 if (status & DSISR_NOHPTE) 1217 return status; /* there really was no HPTE */ 1218 return 0; /* for prot fault, HPTE disappeared */ 1219 } 1220 hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); 1221 v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 1222 r = be64_to_cpu(hpte[1]); 1223 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1224 v = hpte_new_to_old_v(v, r); 1225 r = hpte_new_to_old_r(r); 1226 } 1227 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]); 1228 gr = rev->guest_rpte; 1229 1230 unlock_hpte(hpte, orig_v); 1231 } 1232 1233 /* For not found, if the HPTE is valid by now, retry the instruction */ 1234 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) 1235 return 0; 1236 1237 /* Check access permissions to the page */ 1238 pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 1239 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 1240 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ 1241 if (!data) { 1242 if (gr & (HPTE_R_N | HPTE_R_G)) 1243 return status | SRR1_ISI_N_G_OR_CIP; 1244 if (!hpte_read_permission(pp, slb_v & key)) 1245 return status | SRR1_ISI_PROT; 1246 } else if (status & DSISR_ISSTORE) { 1247 /* check write permission */ 1248 if (!hpte_write_permission(pp, slb_v & key)) 1249 return status | DSISR_PROTFAULT; 1250 } else { 1251 if (!hpte_read_permission(pp, slb_v & key)) 1252 return status | DSISR_PROTFAULT; 1253 } 1254 1255 /* Check storage key, if applicable */ 1256 if (data && (vcpu->arch.shregs.msr & MSR_DR)) { 1257 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); 1258 if (status & DSISR_ISSTORE) 1259 perm >>= 1; 1260 if (perm & 1) 1261 return status | DSISR_KEYFAULT; 1262 } 1263 1264 /* Save HPTE info for virtual-mode handler */ 1265 vcpu->arch.pgfault_addr = addr; 1266 vcpu->arch.pgfault_index = index; 1267 vcpu->arch.pgfault_hpte[0] = v; 1268 vcpu->arch.pgfault_hpte[1] = r; 1269 vcpu->arch.pgfault_cache = cache_entry; 1270 1271 /* Check the storage key to see if it is possibly emulated MMIO */ 1272 if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 1273 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) { 1274 if (!cache_entry) { 1275 unsigned int pshift = 12; 1276 unsigned int pshift_index; 1277 1278 if (slb_v & SLB_VSID_L) { 1279 pshift_index = ((slb_v & SLB_VSID_LP) >> 4); 1280 pshift = slb_base_page_shift[pshift_index]; 1281 } 1282 cache_entry = next_mmio_cache_entry(vcpu); 1283 cache_entry->eaddr = addr; 1284 cache_entry->slb_base_pshift = pshift; 1285 cache_entry->pte_index = index; 1286 cache_entry->hpte_v = v; 1287 cache_entry->hpte_r = r; 1288 cache_entry->rpte = gr; 1289 cache_entry->slb_v = slb_v; 1290 cache_entry->mmio_update = mmio_update; 1291 } 1292 if (data && (vcpu->arch.shregs.msr & MSR_IR)) 1293 return -2; /* MMIO emulation - load instr word */ 1294 } 1295 1296 return -1; /* send fault up to host kernel mode */ 1297 } 1298