1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 5 */ 6 7 #include <linux/types.h> 8 #include <linux/string.h> 9 #include <linux/kvm.h> 10 #include <linux/kvm_host.h> 11 #include <linux/hugetlb.h> 12 #include <linux/module.h> 13 #include <linux/log2.h> 14 #include <linux/sizes.h> 15 16 #include <asm/trace.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/kvm_book3s.h> 19 #include <asm/book3s/64/mmu-hash.h> 20 #include <asm/hvcall.h> 21 #include <asm/synch.h> 22 #include <asm/ppc-opcode.h> 23 #include <asm/pte-walk.h> 24 25 /* Translate address of a vmalloc'd thing to a linear map address */ 26 static void *real_vmalloc_addr(void *x) 27 { 28 unsigned long addr = (unsigned long) x; 29 pte_t *p; 30 /* 31 * assume we don't have huge pages in vmalloc space... 32 * So don't worry about THP collapse/split. Called 33 * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. 34 */ 35 p = find_init_mm_pte(addr, NULL); 36 if (!p || !pte_present(*p)) 37 return NULL; 38 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); 39 return __va(addr); 40 } 41 42 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ 43 static int global_invalidates(struct kvm *kvm) 44 { 45 int global; 46 int cpu; 47 48 /* 49 * If there is only one vcore, and it's currently running, 50 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set, 51 * we can use tlbiel as long as we mark all other physical 52 * cores as potentially having stale TLB entries for this lpid. 53 * Otherwise, don't use tlbiel. 54 */ 55 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu) 56 global = 0; 57 else 58 global = 1; 59 60 if (!global) { 61 /* any other core might now have stale TLB entries... */ 62 smp_wmb(); 63 cpumask_setall(&kvm->arch.need_tlb_flush); 64 cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; 65 /* 66 * On POWER9, threads are independent but the TLB is shared, 67 * so use the bit for the first thread to represent the core. 68 */ 69 if (cpu_has_feature(CPU_FTR_ARCH_300)) 70 cpu = cpu_first_thread_sibling(cpu); 71 cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); 72 } 73 74 return global; 75 } 76 77 /* 78 * Add this HPTE into the chain for the real page. 79 * Must be called with the chain locked; it unlocks the chain. 80 */ 81 void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 82 unsigned long *rmap, long pte_index, int realmode) 83 { 84 struct revmap_entry *head, *tail; 85 unsigned long i; 86 87 if (*rmap & KVMPPC_RMAP_PRESENT) { 88 i = *rmap & KVMPPC_RMAP_INDEX; 89 head = &kvm->arch.hpt.rev[i]; 90 if (realmode) 91 head = real_vmalloc_addr(head); 92 tail = &kvm->arch.hpt.rev[head->back]; 93 if (realmode) 94 tail = real_vmalloc_addr(tail); 95 rev->forw = i; 96 rev->back = head->back; 97 tail->forw = pte_index; 98 head->back = pte_index; 99 } else { 100 rev->forw = rev->back = pte_index; 101 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | 102 pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT; 103 } 104 unlock_rmap(rmap); 105 } 106 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 107 108 /* Update the dirty bitmap of a memslot */ 109 void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, 110 unsigned long gfn, unsigned long psize) 111 { 112 unsigned long npages; 113 114 if (!psize || !memslot->dirty_bitmap) 115 return; 116 npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE; 117 gfn -= memslot->base_gfn; 118 set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages); 119 } 120 EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map); 121 122 static void kvmppc_set_dirty_from_hpte(struct kvm *kvm, 123 unsigned long hpte_v, unsigned long hpte_gr) 124 { 125 struct kvm_memory_slot *memslot; 126 unsigned long gfn; 127 unsigned long psize; 128 129 psize = kvmppc_actual_pgsz(hpte_v, hpte_gr); 130 gfn = hpte_rpn(hpte_gr, psize); 131 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 132 if (memslot && memslot->dirty_bitmap) 133 kvmppc_update_dirty_map(memslot, gfn, psize); 134 } 135 136 /* Returns a pointer to the revmap entry for the page mapped by a HPTE */ 137 static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, 138 unsigned long hpte_gr, 139 struct kvm_memory_slot **memslotp, 140 unsigned long *gfnp) 141 { 142 struct kvm_memory_slot *memslot; 143 unsigned long *rmap; 144 unsigned long gfn; 145 146 gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr)); 147 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 148 if (memslotp) 149 *memslotp = memslot; 150 if (gfnp) 151 *gfnp = gfn; 152 if (!memslot) 153 return NULL; 154 155 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); 156 return rmap; 157 } 158 159 /* Remove this HPTE from the chain for a real page */ 160 static void remove_revmap_chain(struct kvm *kvm, long pte_index, 161 struct revmap_entry *rev, 162 unsigned long hpte_v, unsigned long hpte_r) 163 { 164 struct revmap_entry *next, *prev; 165 unsigned long ptel, head; 166 unsigned long *rmap; 167 unsigned long rcbits; 168 struct kvm_memory_slot *memslot; 169 unsigned long gfn; 170 171 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 172 ptel = rev->guest_rpte |= rcbits; 173 rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn); 174 if (!rmap) 175 return; 176 lock_rmap(rmap); 177 178 head = *rmap & KVMPPC_RMAP_INDEX; 179 next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]); 180 prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]); 181 next->back = rev->back; 182 prev->forw = rev->forw; 183 if (head == pte_index) { 184 head = rev->forw; 185 if (head == pte_index) 186 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 187 else 188 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; 189 } 190 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; 191 if (rcbits & HPTE_R_C) 192 kvmppc_update_dirty_map(memslot, gfn, 193 kvmppc_actual_pgsz(hpte_v, hpte_r)); 194 unlock_rmap(rmap); 195 } 196 197 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, 198 long pte_index, unsigned long pteh, unsigned long ptel, 199 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) 200 { 201 unsigned long i, pa, gpa, gfn, psize; 202 unsigned long slot_fn, hva; 203 __be64 *hpte; 204 struct revmap_entry *rev; 205 unsigned long g_ptel; 206 struct kvm_memory_slot *memslot; 207 unsigned hpage_shift; 208 bool is_ci; 209 unsigned long *rmap; 210 pte_t *ptep; 211 unsigned int writing; 212 unsigned long mmu_seq; 213 unsigned long rcbits; 214 215 if (kvm_is_radix(kvm)) 216 return H_FUNCTION; 217 psize = kvmppc_actual_pgsz(pteh, ptel); 218 if (!psize) 219 return H_PARAMETER; 220 writing = hpte_is_writable(ptel); 221 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 222 ptel &= ~HPTE_GR_RESERVED; 223 g_ptel = ptel; 224 225 /* used later to detect if we might have been invalidated */ 226 mmu_seq = kvm->mmu_notifier_seq; 227 smp_rmb(); 228 229 /* Find the memslot (if any) for this address */ 230 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 231 gfn = gpa >> PAGE_SHIFT; 232 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 233 pa = 0; 234 is_ci = false; 235 rmap = NULL; 236 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { 237 /* Emulated MMIO - mark this with key=31 */ 238 pteh |= HPTE_V_ABSENT; 239 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; 240 goto do_insert; 241 } 242 243 /* Check if the requested page fits entirely in the memslot. */ 244 if (!slot_is_aligned(memslot, psize)) 245 return H_PARAMETER; 246 slot_fn = gfn - memslot->base_gfn; 247 rmap = &memslot->arch.rmap[slot_fn]; 248 249 /* Translate to host virtual address */ 250 hva = __gfn_to_hva_memslot(memslot, gfn); 251 252 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 253 ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift); 254 if (ptep) { 255 pte_t pte; 256 unsigned int host_pte_size; 257 258 if (hpage_shift) 259 host_pte_size = 1ul << hpage_shift; 260 else 261 host_pte_size = PAGE_SIZE; 262 /* 263 * We should always find the guest page size 264 * to <= host page size, if host is using hugepage 265 */ 266 if (host_pte_size < psize) { 267 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 268 return H_PARAMETER; 269 } 270 pte = kvmppc_read_update_linux_pte(ptep, writing); 271 if (pte_present(pte) && !pte_protnone(pte)) { 272 if (writing && !__pte_write(pte)) 273 /* make the actual HPTE be read-only */ 274 ptel = hpte_make_readonly(ptel); 275 is_ci = pte_ci(pte); 276 pa = pte_pfn(pte) << PAGE_SHIFT; 277 pa |= hva & (host_pte_size - 1); 278 pa |= gpa & ~PAGE_MASK; 279 } 280 } 281 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 282 283 ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); 284 ptel |= pa; 285 286 if (pa) 287 pteh |= HPTE_V_VALID; 288 else { 289 pteh |= HPTE_V_ABSENT; 290 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 291 } 292 293 /*If we had host pte mapping then Check WIMG */ 294 if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { 295 if (is_ci) 296 return H_PARAMETER; 297 /* 298 * Allow guest to map emulated device memory as 299 * uncacheable, but actually make it cacheable. 300 */ 301 ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G); 302 ptel |= HPTE_R_M; 303 } 304 305 /* Find and lock the HPTEG slot to use */ 306 do_insert: 307 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 308 return H_PARAMETER; 309 if (likely((flags & H_EXACT) == 0)) { 310 pte_index &= ~7UL; 311 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 312 for (i = 0; i < 8; ++i) { 313 if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && 314 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 315 HPTE_V_ABSENT)) 316 break; 317 hpte += 2; 318 } 319 if (i == 8) { 320 /* 321 * Since try_lock_hpte doesn't retry (not even stdcx. 322 * failures), it could be that there is a free slot 323 * but we transiently failed to lock it. Try again, 324 * actually locking each slot and checking it. 325 */ 326 hpte -= 16; 327 for (i = 0; i < 8; ++i) { 328 u64 pte; 329 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 330 cpu_relax(); 331 pte = be64_to_cpu(hpte[0]); 332 if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) 333 break; 334 __unlock_hpte(hpte, pte); 335 hpte += 2; 336 } 337 if (i == 8) 338 return H_PTEG_FULL; 339 } 340 pte_index += i; 341 } else { 342 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 343 if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 344 HPTE_V_ABSENT)) { 345 /* Lock the slot and check again */ 346 u64 pte; 347 348 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 349 cpu_relax(); 350 pte = be64_to_cpu(hpte[0]); 351 if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { 352 __unlock_hpte(hpte, pte); 353 return H_PTEG_FULL; 354 } 355 } 356 } 357 358 /* Save away the guest's idea of the second HPTE dword */ 359 rev = &kvm->arch.hpt.rev[pte_index]; 360 if (realmode) 361 rev = real_vmalloc_addr(rev); 362 if (rev) { 363 rev->guest_rpte = g_ptel; 364 note_hpte_modification(kvm, rev); 365 } 366 367 /* Link HPTE into reverse-map chain */ 368 if (pteh & HPTE_V_VALID) { 369 if (realmode) 370 rmap = real_vmalloc_addr(rmap); 371 lock_rmap(rmap); 372 /* Check for pending invalidations under the rmap chain lock */ 373 if (mmu_notifier_retry(kvm, mmu_seq)) { 374 /* inval in progress, write a non-present HPTE */ 375 pteh |= HPTE_V_ABSENT; 376 pteh &= ~HPTE_V_VALID; 377 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 378 unlock_rmap(rmap); 379 } else { 380 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, 381 realmode); 382 /* Only set R/C in real HPTE if already set in *rmap */ 383 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; 384 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); 385 } 386 } 387 388 /* Convert to new format on P9 */ 389 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 390 ptel = hpte_old_to_new_r(pteh, ptel); 391 pteh = hpte_old_to_new_v(pteh); 392 } 393 hpte[1] = cpu_to_be64(ptel); 394 395 /* Write the first HPTE dword, unlocking the HPTE and making it valid */ 396 eieio(); 397 __unlock_hpte(hpte, pteh); 398 asm volatile("ptesync" : : : "memory"); 399 400 *pte_idx_ret = pte_index; 401 return H_SUCCESS; 402 } 403 EXPORT_SYMBOL_GPL(kvmppc_do_h_enter); 404 405 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 406 long pte_index, unsigned long pteh, unsigned long ptel) 407 { 408 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel, 409 vcpu->arch.pgdir, true, 410 &vcpu->arch.regs.gpr[4]); 411 } 412 413 #ifdef __BIG_ENDIAN__ 414 #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 415 #else 416 #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) 417 #endif 418 419 static inline int is_mmio_hpte(unsigned long v, unsigned long r) 420 { 421 return ((v & HPTE_V_ABSENT) && 422 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 423 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); 424 } 425 426 static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid) 427 { 428 429 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 430 /* Radix flush for a hash guest */ 431 432 unsigned long rb,rs,prs,r,ric; 433 434 rb = PPC_BIT(52); /* IS = 2 */ 435 rs = 0; /* lpid = 0 */ 436 prs = 0; /* partition scoped */ 437 r = 1; /* radix format */ 438 ric = 0; /* RIC_FLSUH_TLB */ 439 440 /* 441 * Need the extra ptesync to make sure we don't 442 * re-order the tlbie 443 */ 444 asm volatile("ptesync": : :"memory"); 445 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 446 : : "r"(rb), "i"(r), "i"(prs), 447 "i"(ric), "r"(rs) : "memory"); 448 } 449 450 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 451 asm volatile("ptesync": : :"memory"); 452 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 453 "r" (rb_value), "r" (lpid)); 454 } 455 } 456 457 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, 458 long npages, int global, bool need_sync) 459 { 460 long i; 461 462 /* 463 * We use the POWER9 5-operand versions of tlbie and tlbiel here. 464 * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores 465 * the RS field, this is backwards-compatible with P7 and P8. 466 */ 467 if (global) { 468 if (need_sync) 469 asm volatile("ptesync" : : : "memory"); 470 for (i = 0; i < npages; ++i) { 471 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 472 "r" (rbvalues[i]), "r" (kvm->arch.lpid)); 473 } 474 475 fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid); 476 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 477 } else { 478 if (need_sync) 479 asm volatile("ptesync" : : : "memory"); 480 for (i = 0; i < npages; ++i) { 481 asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : : 482 "r" (rbvalues[i]), "r" (0)); 483 } 484 asm volatile("ptesync" : : : "memory"); 485 } 486 } 487 488 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 489 unsigned long pte_index, unsigned long avpn, 490 unsigned long *hpret) 491 { 492 __be64 *hpte; 493 unsigned long v, r, rb; 494 struct revmap_entry *rev; 495 u64 pte, orig_pte, pte_r; 496 497 if (kvm_is_radix(kvm)) 498 return H_FUNCTION; 499 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 500 return H_PARAMETER; 501 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 502 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 503 cpu_relax(); 504 pte = orig_pte = be64_to_cpu(hpte[0]); 505 pte_r = be64_to_cpu(hpte[1]); 506 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 507 pte = hpte_new_to_old_v(pte, pte_r); 508 pte_r = hpte_new_to_old_r(pte_r); 509 } 510 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 511 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || 512 ((flags & H_ANDCOND) && (pte & avpn) != 0)) { 513 __unlock_hpte(hpte, orig_pte); 514 return H_NOT_FOUND; 515 } 516 517 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 518 v = pte & ~HPTE_V_HVLOCK; 519 if (v & HPTE_V_VALID) { 520 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); 521 rb = compute_tlbie_rb(v, pte_r, pte_index); 522 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 523 /* 524 * The reference (R) and change (C) bits in a HPT 525 * entry can be set by hardware at any time up until 526 * the HPTE is invalidated and the TLB invalidation 527 * sequence has completed. This means that when 528 * removing a HPTE, we need to re-read the HPTE after 529 * the invalidation sequence has completed in order to 530 * obtain reliable values of R and C. 531 */ 532 remove_revmap_chain(kvm, pte_index, rev, v, 533 be64_to_cpu(hpte[1])); 534 } 535 r = rev->guest_rpte & ~HPTE_GR_RESERVED; 536 note_hpte_modification(kvm, rev); 537 unlock_hpte(hpte, 0); 538 539 if (is_mmio_hpte(v, pte_r)) 540 atomic64_inc(&kvm->arch.mmio_update); 541 542 if (v & HPTE_V_ABSENT) 543 v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; 544 hpret[0] = v; 545 hpret[1] = r; 546 return H_SUCCESS; 547 } 548 EXPORT_SYMBOL_GPL(kvmppc_do_h_remove); 549 550 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, 551 unsigned long pte_index, unsigned long avpn) 552 { 553 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn, 554 &vcpu->arch.regs.gpr[4]); 555 } 556 557 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) 558 { 559 struct kvm *kvm = vcpu->kvm; 560 unsigned long *args = &vcpu->arch.regs.gpr[4]; 561 __be64 *hp, *hptes[4]; 562 unsigned long tlbrb[4]; 563 long int i, j, k, n, found, indexes[4]; 564 unsigned long flags, req, pte_index, rcbits; 565 int global; 566 long int ret = H_SUCCESS; 567 struct revmap_entry *rev, *revs[4]; 568 u64 hp0, hp1; 569 570 if (kvm_is_radix(kvm)) 571 return H_FUNCTION; 572 global = global_invalidates(kvm); 573 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 574 n = 0; 575 for (; i < 4; ++i) { 576 j = i * 2; 577 pte_index = args[j]; 578 flags = pte_index >> 56; 579 pte_index &= ((1ul << 56) - 1); 580 req = flags >> 6; 581 flags &= 3; 582 if (req == 3) { /* no more requests */ 583 i = 4; 584 break; 585 } 586 if (req != 1 || flags == 3 || 587 pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) { 588 /* parameter error */ 589 args[j] = ((0xa0 | flags) << 56) + pte_index; 590 ret = H_PARAMETER; 591 break; 592 } 593 hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4)); 594 /* to avoid deadlock, don't spin except for first */ 595 if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { 596 if (n) 597 break; 598 while (!try_lock_hpte(hp, HPTE_V_HVLOCK)) 599 cpu_relax(); 600 } 601 found = 0; 602 hp0 = be64_to_cpu(hp[0]); 603 hp1 = be64_to_cpu(hp[1]); 604 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 605 hp0 = hpte_new_to_old_v(hp0, hp1); 606 hp1 = hpte_new_to_old_r(hp1); 607 } 608 if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { 609 switch (flags & 3) { 610 case 0: /* absolute */ 611 found = 1; 612 break; 613 case 1: /* andcond */ 614 if (!(hp0 & args[j + 1])) 615 found = 1; 616 break; 617 case 2: /* AVPN */ 618 if ((hp0 & ~0x7fUL) == args[j + 1]) 619 found = 1; 620 break; 621 } 622 } 623 if (!found) { 624 hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); 625 args[j] = ((0x90 | flags) << 56) + pte_index; 626 continue; 627 } 628 629 args[j] = ((0x80 | flags) << 56) + pte_index; 630 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 631 note_hpte_modification(kvm, rev); 632 633 if (!(hp0 & HPTE_V_VALID)) { 634 /* insert R and C bits from PTE */ 635 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 636 args[j] |= rcbits << (56 - 5); 637 hp[0] = 0; 638 if (is_mmio_hpte(hp0, hp1)) 639 atomic64_inc(&kvm->arch.mmio_update); 640 continue; 641 } 642 643 /* leave it locked */ 644 hp[0] &= ~cpu_to_be64(HPTE_V_VALID); 645 tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index); 646 indexes[n] = j; 647 hptes[n] = hp; 648 revs[n] = rev; 649 ++n; 650 } 651 652 if (!n) 653 break; 654 655 /* Now that we've collected a batch, do the tlbies */ 656 do_tlbies(kvm, tlbrb, n, global, true); 657 658 /* Read PTE low words after tlbie to get final R/C values */ 659 for (k = 0; k < n; ++k) { 660 j = indexes[k]; 661 pte_index = args[j] & ((1ul << 56) - 1); 662 hp = hptes[k]; 663 rev = revs[k]; 664 remove_revmap_chain(kvm, pte_index, rev, 665 be64_to_cpu(hp[0]), be64_to_cpu(hp[1])); 666 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 667 args[j] |= rcbits << (56 - 5); 668 __unlock_hpte(hp, 0); 669 } 670 } 671 672 return ret; 673 } 674 675 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, 676 unsigned long pte_index, unsigned long avpn) 677 { 678 struct kvm *kvm = vcpu->kvm; 679 __be64 *hpte; 680 struct revmap_entry *rev; 681 unsigned long v, r, rb, mask, bits; 682 u64 pte_v, pte_r; 683 684 if (kvm_is_radix(kvm)) 685 return H_FUNCTION; 686 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 687 return H_PARAMETER; 688 689 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 690 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 691 cpu_relax(); 692 v = pte_v = be64_to_cpu(hpte[0]); 693 if (cpu_has_feature(CPU_FTR_ARCH_300)) 694 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1])); 695 if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 696 ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) { 697 __unlock_hpte(hpte, pte_v); 698 return H_NOT_FOUND; 699 } 700 701 pte_r = be64_to_cpu(hpte[1]); 702 bits = (flags << 55) & HPTE_R_PP0; 703 bits |= (flags << 48) & HPTE_R_KEY_HI; 704 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); 705 706 /* Update guest view of 2nd HPTE dword */ 707 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | 708 HPTE_R_KEY_HI | HPTE_R_KEY_LO; 709 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 710 if (rev) { 711 r = (rev->guest_rpte & ~mask) | bits; 712 rev->guest_rpte = r; 713 note_hpte_modification(kvm, rev); 714 } 715 716 /* Update HPTE */ 717 if (v & HPTE_V_VALID) { 718 /* 719 * If the page is valid, don't let it transition from 720 * readonly to writable. If it should be writable, we'll 721 * take a trap and let the page fault code sort it out. 722 */ 723 r = (pte_r & ~mask) | bits; 724 if (hpte_is_writable(r) && !hpte_is_writable(pte_r)) 725 r = hpte_make_readonly(r); 726 /* If the PTE is changing, invalidate it first */ 727 if (r != pte_r) { 728 rb = compute_tlbie_rb(v, r, pte_index); 729 hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | 730 HPTE_V_ABSENT); 731 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 732 /* Don't lose R/C bit updates done by hardware */ 733 r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); 734 hpte[1] = cpu_to_be64(r); 735 } 736 } 737 unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK); 738 asm volatile("ptesync" : : : "memory"); 739 if (is_mmio_hpte(v, pte_r)) 740 atomic64_inc(&kvm->arch.mmio_update); 741 742 return H_SUCCESS; 743 } 744 745 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, 746 unsigned long pte_index) 747 { 748 struct kvm *kvm = vcpu->kvm; 749 __be64 *hpte; 750 unsigned long v, r; 751 int i, n = 1; 752 struct revmap_entry *rev = NULL; 753 754 if (kvm_is_radix(kvm)) 755 return H_FUNCTION; 756 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 757 return H_PARAMETER; 758 if (flags & H_READ_4) { 759 pte_index &= ~3; 760 n = 4; 761 } 762 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 763 for (i = 0; i < n; ++i, ++pte_index) { 764 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 765 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 766 r = be64_to_cpu(hpte[1]); 767 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 768 v = hpte_new_to_old_v(v, r); 769 r = hpte_new_to_old_r(r); 770 } 771 if (v & HPTE_V_ABSENT) { 772 v &= ~HPTE_V_ABSENT; 773 v |= HPTE_V_VALID; 774 } 775 if (v & HPTE_V_VALID) { 776 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); 777 r &= ~HPTE_GR_RESERVED; 778 } 779 vcpu->arch.regs.gpr[4 + i * 2] = v; 780 vcpu->arch.regs.gpr[5 + i * 2] = r; 781 } 782 return H_SUCCESS; 783 } 784 785 long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, 786 unsigned long pte_index) 787 { 788 struct kvm *kvm = vcpu->kvm; 789 __be64 *hpte; 790 unsigned long v, r, gr; 791 struct revmap_entry *rev; 792 unsigned long *rmap; 793 long ret = H_NOT_FOUND; 794 795 if (kvm_is_radix(kvm)) 796 return H_FUNCTION; 797 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 798 return H_PARAMETER; 799 800 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 801 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 802 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 803 cpu_relax(); 804 v = be64_to_cpu(hpte[0]); 805 r = be64_to_cpu(hpte[1]); 806 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 807 goto out; 808 809 gr = rev->guest_rpte; 810 if (rev->guest_rpte & HPTE_R_R) { 811 rev->guest_rpte &= ~HPTE_R_R; 812 note_hpte_modification(kvm, rev); 813 } 814 if (v & HPTE_V_VALID) { 815 gr |= r & (HPTE_R_R | HPTE_R_C); 816 if (r & HPTE_R_R) { 817 kvmppc_clear_ref_hpte(kvm, hpte, pte_index); 818 rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL); 819 if (rmap) { 820 lock_rmap(rmap); 821 *rmap |= KVMPPC_RMAP_REFERENCED; 822 unlock_rmap(rmap); 823 } 824 } 825 } 826 vcpu->arch.regs.gpr[4] = gr; 827 ret = H_SUCCESS; 828 out: 829 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 830 return ret; 831 } 832 833 long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, 834 unsigned long pte_index) 835 { 836 struct kvm *kvm = vcpu->kvm; 837 __be64 *hpte; 838 unsigned long v, r, gr; 839 struct revmap_entry *rev; 840 long ret = H_NOT_FOUND; 841 842 if (kvm_is_radix(kvm)) 843 return H_FUNCTION; 844 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 845 return H_PARAMETER; 846 847 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 848 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 849 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 850 cpu_relax(); 851 v = be64_to_cpu(hpte[0]); 852 r = be64_to_cpu(hpte[1]); 853 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 854 goto out; 855 856 gr = rev->guest_rpte; 857 if (gr & HPTE_R_C) { 858 rev->guest_rpte &= ~HPTE_R_C; 859 note_hpte_modification(kvm, rev); 860 } 861 if (v & HPTE_V_VALID) { 862 /* need to make it temporarily absent so C is stable */ 863 hpte[0] |= cpu_to_be64(HPTE_V_ABSENT); 864 kvmppc_invalidate_hpte(kvm, hpte, pte_index); 865 r = be64_to_cpu(hpte[1]); 866 gr |= r & (HPTE_R_R | HPTE_R_C); 867 if (r & HPTE_R_C) { 868 hpte[1] = cpu_to_be64(r & ~HPTE_R_C); 869 eieio(); 870 kvmppc_set_dirty_from_hpte(kvm, v, gr); 871 } 872 } 873 vcpu->arch.regs.gpr[4] = gr; 874 ret = H_SUCCESS; 875 out: 876 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 877 return ret; 878 } 879 880 static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, 881 unsigned long gpa, int writing, unsigned long *hpa, 882 struct kvm_memory_slot **memslot_p) 883 { 884 struct kvm *kvm = vcpu->kvm; 885 struct kvm_memory_slot *memslot; 886 unsigned long gfn, hva, pa, psize = PAGE_SHIFT; 887 unsigned int shift; 888 pte_t *ptep, pte; 889 890 /* Find the memslot for this address */ 891 gfn = gpa >> PAGE_SHIFT; 892 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 893 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 894 return H_PARAMETER; 895 896 /* Translate to host virtual address */ 897 hva = __gfn_to_hva_memslot(memslot, gfn); 898 899 /* Try to find the host pte for that virtual address */ 900 ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); 901 if (!ptep) 902 return H_TOO_HARD; 903 pte = kvmppc_read_update_linux_pte(ptep, writing); 904 if (!pte_present(pte)) 905 return H_TOO_HARD; 906 907 /* Convert to a physical address */ 908 if (shift) 909 psize = 1UL << shift; 910 pa = pte_pfn(pte) << PAGE_SHIFT; 911 pa |= hva & (psize - 1); 912 pa |= gpa & ~PAGE_MASK; 913 914 if (hpa) 915 *hpa = pa; 916 if (memslot_p) 917 *memslot_p = memslot; 918 919 return H_SUCCESS; 920 } 921 922 static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, 923 unsigned long dest) 924 { 925 struct kvm_memory_slot *memslot; 926 struct kvm *kvm = vcpu->kvm; 927 unsigned long pa, mmu_seq; 928 long ret = H_SUCCESS; 929 int i; 930 931 /* Used later to detect if we might have been invalidated */ 932 mmu_seq = kvm->mmu_notifier_seq; 933 smp_rmb(); 934 935 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 936 937 ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot); 938 if (ret != H_SUCCESS) 939 goto out_unlock; 940 941 /* Zero the page */ 942 for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) 943 dcbz((void *)pa); 944 kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 945 946 out_unlock: 947 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 948 return ret; 949 } 950 951 static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, 952 unsigned long dest, unsigned long src) 953 { 954 unsigned long dest_pa, src_pa, mmu_seq; 955 struct kvm_memory_slot *dest_memslot; 956 struct kvm *kvm = vcpu->kvm; 957 long ret = H_SUCCESS; 958 959 /* Used later to detect if we might have been invalidated */ 960 mmu_seq = kvm->mmu_notifier_seq; 961 smp_rmb(); 962 963 arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); 964 ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot); 965 if (ret != H_SUCCESS) 966 goto out_unlock; 967 968 ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL); 969 if (ret != H_SUCCESS) 970 goto out_unlock; 971 972 /* Copy the page */ 973 memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); 974 975 kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 976 977 out_unlock: 978 arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); 979 return ret; 980 } 981 982 long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, 983 unsigned long dest, unsigned long src) 984 { 985 struct kvm *kvm = vcpu->kvm; 986 u64 pg_mask = SZ_4K - 1; /* 4K page size */ 987 long ret = H_SUCCESS; 988 989 /* Don't handle radix mode here, go up to the virtual mode handler */ 990 if (kvm_is_radix(kvm)) 991 return H_TOO_HARD; 992 993 /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ 994 if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | 995 H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) 996 return H_PARAMETER; 997 998 /* dest (and src if copy_page flag set) must be page aligned */ 999 if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) 1000 return H_PARAMETER; 1001 1002 /* zero and/or copy the page as determined by the flags */ 1003 if (flags & H_COPY_PAGE) 1004 ret = kvmppc_do_h_page_init_copy(vcpu, dest, src); 1005 else if (flags & H_ZERO_PAGE) 1006 ret = kvmppc_do_h_page_init_zero(vcpu, dest); 1007 1008 /* We can ignore the other flags */ 1009 1010 return ret; 1011 } 1012 1013 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 1014 unsigned long pte_index) 1015 { 1016 unsigned long rb; 1017 u64 hp0, hp1; 1018 1019 hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); 1020 hp0 = be64_to_cpu(hptep[0]); 1021 hp1 = be64_to_cpu(hptep[1]); 1022 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1023 hp0 = hpte_new_to_old_v(hp0, hp1); 1024 hp1 = hpte_new_to_old_r(hp1); 1025 } 1026 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1027 do_tlbies(kvm, &rb, 1, 1, true); 1028 } 1029 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); 1030 1031 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, 1032 unsigned long pte_index) 1033 { 1034 unsigned long rb; 1035 unsigned char rbyte; 1036 u64 hp0, hp1; 1037 1038 hp0 = be64_to_cpu(hptep[0]); 1039 hp1 = be64_to_cpu(hptep[1]); 1040 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1041 hp0 = hpte_new_to_old_v(hp0, hp1); 1042 hp1 = hpte_new_to_old_r(hp1); 1043 } 1044 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1045 rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; 1046 /* modify only the second-last byte, which contains the ref bit */ 1047 *((char *)hptep + 14) = rbyte; 1048 do_tlbies(kvm, &rb, 1, 1, false); 1049 } 1050 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); 1051 1052 static int slb_base_page_shift[4] = { 1053 24, /* 16M */ 1054 16, /* 64k */ 1055 34, /* 16G */ 1056 20, /* 1M, unsupported */ 1057 }; 1058 1059 static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu, 1060 unsigned long eaddr, unsigned long slb_v, long mmio_update) 1061 { 1062 struct mmio_hpte_cache_entry *entry = NULL; 1063 unsigned int pshift; 1064 unsigned int i; 1065 1066 for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) { 1067 entry = &vcpu->arch.mmio_cache.entry[i]; 1068 if (entry->mmio_update == mmio_update) { 1069 pshift = entry->slb_base_pshift; 1070 if ((entry->eaddr >> pshift) == (eaddr >> pshift) && 1071 entry->slb_v == slb_v) 1072 return entry; 1073 } 1074 } 1075 return NULL; 1076 } 1077 1078 static struct mmio_hpte_cache_entry * 1079 next_mmio_cache_entry(struct kvm_vcpu *vcpu) 1080 { 1081 unsigned int index = vcpu->arch.mmio_cache.index; 1082 1083 vcpu->arch.mmio_cache.index++; 1084 if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE) 1085 vcpu->arch.mmio_cache.index = 0; 1086 1087 return &vcpu->arch.mmio_cache.entry[index]; 1088 } 1089 1090 /* When called from virtmode, this func should be protected by 1091 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK 1092 * can trigger deadlock issue. 1093 */ 1094 long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, 1095 unsigned long valid) 1096 { 1097 unsigned int i; 1098 unsigned int pshift; 1099 unsigned long somask; 1100 unsigned long vsid, hash; 1101 unsigned long avpn; 1102 __be64 *hpte; 1103 unsigned long mask, val; 1104 unsigned long v, r, orig_v; 1105 1106 /* Get page shift, work out hash and AVPN etc. */ 1107 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; 1108 val = 0; 1109 pshift = 12; 1110 if (slb_v & SLB_VSID_L) { 1111 mask |= HPTE_V_LARGE; 1112 val |= HPTE_V_LARGE; 1113 pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4]; 1114 } 1115 if (slb_v & SLB_VSID_B_1T) { 1116 somask = (1UL << 40) - 1; 1117 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T; 1118 vsid ^= vsid << 25; 1119 } else { 1120 somask = (1UL << 28) - 1; 1121 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; 1122 } 1123 hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt); 1124 avpn = slb_v & ~(somask >> 16); /* also includes B */ 1125 avpn |= (eaddr & somask) >> 16; 1126 1127 if (pshift >= 24) 1128 avpn &= ~((1UL << (pshift - 16)) - 1); 1129 else 1130 avpn &= ~0x7fUL; 1131 val |= avpn; 1132 1133 for (;;) { 1134 hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7)); 1135 1136 for (i = 0; i < 16; i += 2) { 1137 /* Read the PTE racily */ 1138 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1139 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1140 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1])); 1141 1142 /* Check valid/absent, hash, segment size and AVPN */ 1143 if (!(v & valid) || (v & mask) != val) 1144 continue; 1145 1146 /* Lock the PTE and read it under the lock */ 1147 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) 1148 cpu_relax(); 1149 v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1150 r = be64_to_cpu(hpte[i+1]); 1151 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1152 v = hpte_new_to_old_v(v, r); 1153 r = hpte_new_to_old_r(r); 1154 } 1155 1156 /* 1157 * Check the HPTE again, including base page size 1158 */ 1159 if ((v & valid) && (v & mask) == val && 1160 kvmppc_hpte_base_page_shift(v, r) == pshift) 1161 /* Return with the HPTE still locked */ 1162 return (hash << 3) + (i >> 1); 1163 1164 __unlock_hpte(&hpte[i], orig_v); 1165 } 1166 1167 if (val & HPTE_V_SECONDARY) 1168 break; 1169 val |= HPTE_V_SECONDARY; 1170 hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt); 1171 } 1172 return -1; 1173 } 1174 EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); 1175 1176 /* 1177 * Called in real mode to check whether an HPTE not found fault 1178 * is due to accessing a paged-out page or an emulated MMIO page, 1179 * or if a protection fault is due to accessing a page that the 1180 * guest wanted read/write access to but which we made read-only. 1181 * Returns a possibly modified status (DSISR) value if not 1182 * (i.e. pass the interrupt to the guest), 1183 * -1 to pass the fault up to host kernel mode code, -2 to do that 1184 * and also load the instruction word (for MMIO emulation), 1185 * or 0 if we should make the guest retry the access. 1186 */ 1187 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, 1188 unsigned long slb_v, unsigned int status, bool data) 1189 { 1190 struct kvm *kvm = vcpu->kvm; 1191 long int index; 1192 unsigned long v, r, gr, orig_v; 1193 __be64 *hpte; 1194 unsigned long valid; 1195 struct revmap_entry *rev; 1196 unsigned long pp, key; 1197 struct mmio_hpte_cache_entry *cache_entry = NULL; 1198 long mmio_update = 0; 1199 1200 /* For protection fault, expect to find a valid HPTE */ 1201 valid = HPTE_V_VALID; 1202 if (status & DSISR_NOHPTE) { 1203 valid |= HPTE_V_ABSENT; 1204 mmio_update = atomic64_read(&kvm->arch.mmio_update); 1205 cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update); 1206 } 1207 if (cache_entry) { 1208 index = cache_entry->pte_index; 1209 v = cache_entry->hpte_v; 1210 r = cache_entry->hpte_r; 1211 gr = cache_entry->rpte; 1212 } else { 1213 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); 1214 if (index < 0) { 1215 if (status & DSISR_NOHPTE) 1216 return status; /* there really was no HPTE */ 1217 return 0; /* for prot fault, HPTE disappeared */ 1218 } 1219 hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); 1220 v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 1221 r = be64_to_cpu(hpte[1]); 1222 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1223 v = hpte_new_to_old_v(v, r); 1224 r = hpte_new_to_old_r(r); 1225 } 1226 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]); 1227 gr = rev->guest_rpte; 1228 1229 unlock_hpte(hpte, orig_v); 1230 } 1231 1232 /* For not found, if the HPTE is valid by now, retry the instruction */ 1233 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) 1234 return 0; 1235 1236 /* Check access permissions to the page */ 1237 pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 1238 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 1239 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ 1240 if (!data) { 1241 if (gr & (HPTE_R_N | HPTE_R_G)) 1242 return status | SRR1_ISI_N_G_OR_CIP; 1243 if (!hpte_read_permission(pp, slb_v & key)) 1244 return status | SRR1_ISI_PROT; 1245 } else if (status & DSISR_ISSTORE) { 1246 /* check write permission */ 1247 if (!hpte_write_permission(pp, slb_v & key)) 1248 return status | DSISR_PROTFAULT; 1249 } else { 1250 if (!hpte_read_permission(pp, slb_v & key)) 1251 return status | DSISR_PROTFAULT; 1252 } 1253 1254 /* Check storage key, if applicable */ 1255 if (data && (vcpu->arch.shregs.msr & MSR_DR)) { 1256 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); 1257 if (status & DSISR_ISSTORE) 1258 perm >>= 1; 1259 if (perm & 1) 1260 return status | DSISR_KEYFAULT; 1261 } 1262 1263 /* Save HPTE info for virtual-mode handler */ 1264 vcpu->arch.pgfault_addr = addr; 1265 vcpu->arch.pgfault_index = index; 1266 vcpu->arch.pgfault_hpte[0] = v; 1267 vcpu->arch.pgfault_hpte[1] = r; 1268 vcpu->arch.pgfault_cache = cache_entry; 1269 1270 /* Check the storage key to see if it is possibly emulated MMIO */ 1271 if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 1272 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) { 1273 if (!cache_entry) { 1274 unsigned int pshift = 12; 1275 unsigned int pshift_index; 1276 1277 if (slb_v & SLB_VSID_L) { 1278 pshift_index = ((slb_v & SLB_VSID_LP) >> 4); 1279 pshift = slb_base_page_shift[pshift_index]; 1280 } 1281 cache_entry = next_mmio_cache_entry(vcpu); 1282 cache_entry->eaddr = addr; 1283 cache_entry->slb_base_pshift = pshift; 1284 cache_entry->pte_index = index; 1285 cache_entry->hpte_v = v; 1286 cache_entry->hpte_r = r; 1287 cache_entry->rpte = gr; 1288 cache_entry->slb_v = slb_v; 1289 cache_entry->mmio_update = mmio_update; 1290 } 1291 if (data && (vcpu->arch.shregs.msr & MSR_IR)) 1292 return -2; /* MMIO emulation - load instr word */ 1293 } 1294 1295 return -1; /* send fault up to host kernel mode */ 1296 } 1297