1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * 4 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 5 */ 6 7 #include <linux/types.h> 8 #include <linux/string.h> 9 #include <linux/kvm.h> 10 #include <linux/kvm_host.h> 11 #include <linux/hugetlb.h> 12 #include <linux/module.h> 13 #include <linux/log2.h> 14 #include <linux/sizes.h> 15 16 #include <asm/trace.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/kvm_book3s.h> 19 #include <asm/book3s/64/mmu-hash.h> 20 #include <asm/hvcall.h> 21 #include <asm/synch.h> 22 #include <asm/ppc-opcode.h> 23 #include <asm/pte-walk.h> 24 25 /* Translate address of a vmalloc'd thing to a linear map address */ 26 static void *real_vmalloc_addr(void *x) 27 { 28 unsigned long addr = (unsigned long) x; 29 pte_t *p; 30 /* 31 * assume we don't have huge pages in vmalloc space... 32 * So don't worry about THP collapse/split. Called 33 * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore. 34 */ 35 p = find_init_mm_pte(addr, NULL); 36 if (!p || !pte_present(*p)) 37 return NULL; 38 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); 39 return __va(addr); 40 } 41 42 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ 43 static int global_invalidates(struct kvm *kvm) 44 { 45 int global; 46 int cpu; 47 48 /* 49 * If there is only one vcore, and it's currently running, 50 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set, 51 * we can use tlbiel as long as we mark all other physical 52 * cores as potentially having stale TLB entries for this lpid. 53 * Otherwise, don't use tlbiel. 54 */ 55 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu) 56 global = 0; 57 else 58 global = 1; 59 60 if (!global) { 61 /* any other core might now have stale TLB entries... */ 62 smp_wmb(); 63 cpumask_setall(&kvm->arch.need_tlb_flush); 64 cpu = local_paca->kvm_hstate.kvm_vcore->pcpu; 65 /* 66 * On POWER9, threads are independent but the TLB is shared, 67 * so use the bit for the first thread to represent the core. 68 */ 69 if (cpu_has_feature(CPU_FTR_ARCH_300)) 70 cpu = cpu_first_thread_sibling(cpu); 71 cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush); 72 } 73 74 return global; 75 } 76 77 /* 78 * Add this HPTE into the chain for the real page. 79 * Must be called with the chain locked; it unlocks the chain. 80 */ 81 void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 82 unsigned long *rmap, long pte_index, int realmode) 83 { 84 struct revmap_entry *head, *tail; 85 unsigned long i; 86 87 if (*rmap & KVMPPC_RMAP_PRESENT) { 88 i = *rmap & KVMPPC_RMAP_INDEX; 89 head = &kvm->arch.hpt.rev[i]; 90 if (realmode) 91 head = real_vmalloc_addr(head); 92 tail = &kvm->arch.hpt.rev[head->back]; 93 if (realmode) 94 tail = real_vmalloc_addr(tail); 95 rev->forw = i; 96 rev->back = head->back; 97 tail->forw = pte_index; 98 head->back = pte_index; 99 } else { 100 rev->forw = rev->back = pte_index; 101 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | 102 pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT; 103 } 104 unlock_rmap(rmap); 105 } 106 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 107 108 /* Update the dirty bitmap of a memslot */ 109 void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, 110 unsigned long gfn, unsigned long psize) 111 { 112 unsigned long npages; 113 114 if (!psize || !memslot->dirty_bitmap) 115 return; 116 npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE; 117 gfn -= memslot->base_gfn; 118 set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages); 119 } 120 EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map); 121 122 static void kvmppc_set_dirty_from_hpte(struct kvm *kvm, 123 unsigned long hpte_v, unsigned long hpte_gr) 124 { 125 struct kvm_memory_slot *memslot; 126 unsigned long gfn; 127 unsigned long psize; 128 129 psize = kvmppc_actual_pgsz(hpte_v, hpte_gr); 130 gfn = hpte_rpn(hpte_gr, psize); 131 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 132 if (memslot && memslot->dirty_bitmap) 133 kvmppc_update_dirty_map(memslot, gfn, psize); 134 } 135 136 /* Returns a pointer to the revmap entry for the page mapped by a HPTE */ 137 static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, 138 unsigned long hpte_gr, 139 struct kvm_memory_slot **memslotp, 140 unsigned long *gfnp) 141 { 142 struct kvm_memory_slot *memslot; 143 unsigned long *rmap; 144 unsigned long gfn; 145 146 gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr)); 147 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 148 if (memslotp) 149 *memslotp = memslot; 150 if (gfnp) 151 *gfnp = gfn; 152 if (!memslot) 153 return NULL; 154 155 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); 156 return rmap; 157 } 158 159 /* Remove this HPTE from the chain for a real page */ 160 static void remove_revmap_chain(struct kvm *kvm, long pte_index, 161 struct revmap_entry *rev, 162 unsigned long hpte_v, unsigned long hpte_r) 163 { 164 struct revmap_entry *next, *prev; 165 unsigned long ptel, head; 166 unsigned long *rmap; 167 unsigned long rcbits; 168 struct kvm_memory_slot *memslot; 169 unsigned long gfn; 170 171 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 172 ptel = rev->guest_rpte |= rcbits; 173 rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn); 174 if (!rmap) 175 return; 176 lock_rmap(rmap); 177 178 head = *rmap & KVMPPC_RMAP_INDEX; 179 next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]); 180 prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]); 181 next->back = rev->back; 182 prev->forw = rev->forw; 183 if (head == pte_index) { 184 head = rev->forw; 185 if (head == pte_index) 186 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 187 else 188 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; 189 } 190 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; 191 if (rcbits & HPTE_R_C) 192 kvmppc_update_dirty_map(memslot, gfn, 193 kvmppc_actual_pgsz(hpte_v, hpte_r)); 194 unlock_rmap(rmap); 195 } 196 197 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, 198 long pte_index, unsigned long pteh, unsigned long ptel, 199 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) 200 { 201 unsigned long i, pa, gpa, gfn, psize; 202 unsigned long slot_fn, hva; 203 __be64 *hpte; 204 struct revmap_entry *rev; 205 unsigned long g_ptel; 206 struct kvm_memory_slot *memslot; 207 unsigned hpage_shift; 208 bool is_ci; 209 unsigned long *rmap; 210 pte_t *ptep; 211 unsigned int writing; 212 unsigned long mmu_seq; 213 unsigned long rcbits, irq_flags = 0; 214 215 if (kvm_is_radix(kvm)) 216 return H_FUNCTION; 217 psize = kvmppc_actual_pgsz(pteh, ptel); 218 if (!psize) 219 return H_PARAMETER; 220 writing = hpte_is_writable(ptel); 221 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 222 ptel &= ~HPTE_GR_RESERVED; 223 g_ptel = ptel; 224 225 /* used later to detect if we might have been invalidated */ 226 mmu_seq = kvm->mmu_notifier_seq; 227 smp_rmb(); 228 229 /* Find the memslot (if any) for this address */ 230 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 231 gfn = gpa >> PAGE_SHIFT; 232 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 233 pa = 0; 234 is_ci = false; 235 rmap = NULL; 236 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { 237 /* Emulated MMIO - mark this with key=31 */ 238 pteh |= HPTE_V_ABSENT; 239 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; 240 goto do_insert; 241 } 242 243 /* Check if the requested page fits entirely in the memslot. */ 244 if (!slot_is_aligned(memslot, psize)) 245 return H_PARAMETER; 246 slot_fn = gfn - memslot->base_gfn; 247 rmap = &memslot->arch.rmap[slot_fn]; 248 249 /* Translate to host virtual address */ 250 hva = __gfn_to_hva_memslot(memslot, gfn); 251 /* 252 * If we had a page table table change after lookup, we would 253 * retry via mmu_notifier_retry. 254 */ 255 if (!realmode) 256 local_irq_save(irq_flags); 257 /* 258 * If called in real mode we have MSR_EE = 0. Otherwise 259 * we disable irq above. 260 */ 261 ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); 262 if (ptep) { 263 pte_t pte; 264 unsigned int host_pte_size; 265 266 if (hpage_shift) 267 host_pte_size = 1ul << hpage_shift; 268 else 269 host_pte_size = PAGE_SIZE; 270 /* 271 * We should always find the guest page size 272 * to <= host page size, if host is using hugepage 273 */ 274 if (host_pte_size < psize) { 275 if (!realmode) 276 local_irq_restore(flags); 277 return H_PARAMETER; 278 } 279 pte = kvmppc_read_update_linux_pte(ptep, writing); 280 if (pte_present(pte) && !pte_protnone(pte)) { 281 if (writing && !__pte_write(pte)) 282 /* make the actual HPTE be read-only */ 283 ptel = hpte_make_readonly(ptel); 284 is_ci = pte_ci(pte); 285 pa = pte_pfn(pte) << PAGE_SHIFT; 286 pa |= hva & (host_pte_size - 1); 287 pa |= gpa & ~PAGE_MASK; 288 } 289 } 290 if (!realmode) 291 local_irq_restore(irq_flags); 292 293 ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); 294 ptel |= pa; 295 296 if (pa) 297 pteh |= HPTE_V_VALID; 298 else { 299 pteh |= HPTE_V_ABSENT; 300 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 301 } 302 303 /*If we had host pte mapping then Check WIMG */ 304 if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { 305 if (is_ci) 306 return H_PARAMETER; 307 /* 308 * Allow guest to map emulated device memory as 309 * uncacheable, but actually make it cacheable. 310 */ 311 ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G); 312 ptel |= HPTE_R_M; 313 } 314 315 /* Find and lock the HPTEG slot to use */ 316 do_insert: 317 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 318 return H_PARAMETER; 319 if (likely((flags & H_EXACT) == 0)) { 320 pte_index &= ~7UL; 321 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 322 for (i = 0; i < 8; ++i) { 323 if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 && 324 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 325 HPTE_V_ABSENT)) 326 break; 327 hpte += 2; 328 } 329 if (i == 8) { 330 /* 331 * Since try_lock_hpte doesn't retry (not even stdcx. 332 * failures), it could be that there is a free slot 333 * but we transiently failed to lock it. Try again, 334 * actually locking each slot and checking it. 335 */ 336 hpte -= 16; 337 for (i = 0; i < 8; ++i) { 338 u64 pte; 339 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 340 cpu_relax(); 341 pte = be64_to_cpu(hpte[0]); 342 if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT))) 343 break; 344 __unlock_hpte(hpte, pte); 345 hpte += 2; 346 } 347 if (i == 8) 348 return H_PTEG_FULL; 349 } 350 pte_index += i; 351 } else { 352 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 353 if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 354 HPTE_V_ABSENT)) { 355 /* Lock the slot and check again */ 356 u64 pte; 357 358 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 359 cpu_relax(); 360 pte = be64_to_cpu(hpte[0]); 361 if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) { 362 __unlock_hpte(hpte, pte); 363 return H_PTEG_FULL; 364 } 365 } 366 } 367 368 /* Save away the guest's idea of the second HPTE dword */ 369 rev = &kvm->arch.hpt.rev[pte_index]; 370 if (realmode) 371 rev = real_vmalloc_addr(rev); 372 if (rev) { 373 rev->guest_rpte = g_ptel; 374 note_hpte_modification(kvm, rev); 375 } 376 377 /* Link HPTE into reverse-map chain */ 378 if (pteh & HPTE_V_VALID) { 379 if (realmode) 380 rmap = real_vmalloc_addr(rmap); 381 lock_rmap(rmap); 382 /* Check for pending invalidations under the rmap chain lock */ 383 if (mmu_notifier_retry(kvm, mmu_seq)) { 384 /* inval in progress, write a non-present HPTE */ 385 pteh |= HPTE_V_ABSENT; 386 pteh &= ~HPTE_V_VALID; 387 ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); 388 unlock_rmap(rmap); 389 } else { 390 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, 391 realmode); 392 /* Only set R/C in real HPTE if already set in *rmap */ 393 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; 394 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); 395 } 396 } 397 398 /* Convert to new format on P9 */ 399 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 400 ptel = hpte_old_to_new_r(pteh, ptel); 401 pteh = hpte_old_to_new_v(pteh); 402 } 403 hpte[1] = cpu_to_be64(ptel); 404 405 /* Write the first HPTE dword, unlocking the HPTE and making it valid */ 406 eieio(); 407 __unlock_hpte(hpte, pteh); 408 asm volatile("ptesync" : : : "memory"); 409 410 *pte_idx_ret = pte_index; 411 return H_SUCCESS; 412 } 413 EXPORT_SYMBOL_GPL(kvmppc_do_h_enter); 414 415 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 416 long pte_index, unsigned long pteh, unsigned long ptel) 417 { 418 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel, 419 vcpu->arch.pgdir, true, 420 &vcpu->arch.regs.gpr[4]); 421 } 422 423 #ifdef __BIG_ENDIAN__ 424 #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 425 #else 426 #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) 427 #endif 428 429 static inline int is_mmio_hpte(unsigned long v, unsigned long r) 430 { 431 return ((v & HPTE_V_ABSENT) && 432 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 433 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); 434 } 435 436 static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid) 437 { 438 439 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { 440 /* Radix flush for a hash guest */ 441 442 unsigned long rb,rs,prs,r,ric; 443 444 rb = PPC_BIT(52); /* IS = 2 */ 445 rs = 0; /* lpid = 0 */ 446 prs = 0; /* partition scoped */ 447 r = 1; /* radix format */ 448 ric = 0; /* RIC_FLSUH_TLB */ 449 450 /* 451 * Need the extra ptesync to make sure we don't 452 * re-order the tlbie 453 */ 454 asm volatile("ptesync": : :"memory"); 455 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) 456 : : "r"(rb), "i"(r), "i"(prs), 457 "i"(ric), "r"(rs) : "memory"); 458 } 459 460 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { 461 asm volatile("ptesync": : :"memory"); 462 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 463 "r" (rb_value), "r" (lpid)); 464 } 465 } 466 467 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, 468 long npages, int global, bool need_sync) 469 { 470 long i; 471 472 /* 473 * We use the POWER9 5-operand versions of tlbie and tlbiel here. 474 * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores 475 * the RS field, this is backwards-compatible with P7 and P8. 476 */ 477 if (global) { 478 if (need_sync) 479 asm volatile("ptesync" : : : "memory"); 480 for (i = 0; i < npages; ++i) { 481 asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : 482 "r" (rbvalues[i]), "r" (kvm->arch.lpid)); 483 } 484 485 fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid); 486 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 487 } else { 488 if (need_sync) 489 asm volatile("ptesync" : : : "memory"); 490 for (i = 0; i < npages; ++i) { 491 asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : : 492 "r" (rbvalues[i]), "r" (0)); 493 } 494 asm volatile("ptesync" : : : "memory"); 495 } 496 } 497 498 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 499 unsigned long pte_index, unsigned long avpn, 500 unsigned long *hpret) 501 { 502 __be64 *hpte; 503 unsigned long v, r, rb; 504 struct revmap_entry *rev; 505 u64 pte, orig_pte, pte_r; 506 507 if (kvm_is_radix(kvm)) 508 return H_FUNCTION; 509 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 510 return H_PARAMETER; 511 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 512 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 513 cpu_relax(); 514 pte = orig_pte = be64_to_cpu(hpte[0]); 515 pte_r = be64_to_cpu(hpte[1]); 516 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 517 pte = hpte_new_to_old_v(pte, pte_r); 518 pte_r = hpte_new_to_old_r(pte_r); 519 } 520 if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 521 ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || 522 ((flags & H_ANDCOND) && (pte & avpn) != 0)) { 523 __unlock_hpte(hpte, orig_pte); 524 return H_NOT_FOUND; 525 } 526 527 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 528 v = pte & ~HPTE_V_HVLOCK; 529 if (v & HPTE_V_VALID) { 530 hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); 531 rb = compute_tlbie_rb(v, pte_r, pte_index); 532 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 533 /* 534 * The reference (R) and change (C) bits in a HPT 535 * entry can be set by hardware at any time up until 536 * the HPTE is invalidated and the TLB invalidation 537 * sequence has completed. This means that when 538 * removing a HPTE, we need to re-read the HPTE after 539 * the invalidation sequence has completed in order to 540 * obtain reliable values of R and C. 541 */ 542 remove_revmap_chain(kvm, pte_index, rev, v, 543 be64_to_cpu(hpte[1])); 544 } 545 r = rev->guest_rpte & ~HPTE_GR_RESERVED; 546 note_hpte_modification(kvm, rev); 547 unlock_hpte(hpte, 0); 548 549 if (is_mmio_hpte(v, pte_r)) 550 atomic64_inc(&kvm->arch.mmio_update); 551 552 if (v & HPTE_V_ABSENT) 553 v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; 554 hpret[0] = v; 555 hpret[1] = r; 556 return H_SUCCESS; 557 } 558 EXPORT_SYMBOL_GPL(kvmppc_do_h_remove); 559 560 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, 561 unsigned long pte_index, unsigned long avpn) 562 { 563 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn, 564 &vcpu->arch.regs.gpr[4]); 565 } 566 567 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) 568 { 569 struct kvm *kvm = vcpu->kvm; 570 unsigned long *args = &vcpu->arch.regs.gpr[4]; 571 __be64 *hp, *hptes[4]; 572 unsigned long tlbrb[4]; 573 long int i, j, k, n, found, indexes[4]; 574 unsigned long flags, req, pte_index, rcbits; 575 int global; 576 long int ret = H_SUCCESS; 577 struct revmap_entry *rev, *revs[4]; 578 u64 hp0, hp1; 579 580 if (kvm_is_radix(kvm)) 581 return H_FUNCTION; 582 global = global_invalidates(kvm); 583 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 584 n = 0; 585 for (; i < 4; ++i) { 586 j = i * 2; 587 pte_index = args[j]; 588 flags = pte_index >> 56; 589 pte_index &= ((1ul << 56) - 1); 590 req = flags >> 6; 591 flags &= 3; 592 if (req == 3) { /* no more requests */ 593 i = 4; 594 break; 595 } 596 if (req != 1 || flags == 3 || 597 pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) { 598 /* parameter error */ 599 args[j] = ((0xa0 | flags) << 56) + pte_index; 600 ret = H_PARAMETER; 601 break; 602 } 603 hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4)); 604 /* to avoid deadlock, don't spin except for first */ 605 if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { 606 if (n) 607 break; 608 while (!try_lock_hpte(hp, HPTE_V_HVLOCK)) 609 cpu_relax(); 610 } 611 found = 0; 612 hp0 = be64_to_cpu(hp[0]); 613 hp1 = be64_to_cpu(hp[1]); 614 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 615 hp0 = hpte_new_to_old_v(hp0, hp1); 616 hp1 = hpte_new_to_old_r(hp1); 617 } 618 if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { 619 switch (flags & 3) { 620 case 0: /* absolute */ 621 found = 1; 622 break; 623 case 1: /* andcond */ 624 if (!(hp0 & args[j + 1])) 625 found = 1; 626 break; 627 case 2: /* AVPN */ 628 if ((hp0 & ~0x7fUL) == args[j + 1]) 629 found = 1; 630 break; 631 } 632 } 633 if (!found) { 634 hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK); 635 args[j] = ((0x90 | flags) << 56) + pte_index; 636 continue; 637 } 638 639 args[j] = ((0x80 | flags) << 56) + pte_index; 640 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 641 note_hpte_modification(kvm, rev); 642 643 if (!(hp0 & HPTE_V_VALID)) { 644 /* insert R and C bits from PTE */ 645 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 646 args[j] |= rcbits << (56 - 5); 647 hp[0] = 0; 648 if (is_mmio_hpte(hp0, hp1)) 649 atomic64_inc(&kvm->arch.mmio_update); 650 continue; 651 } 652 653 /* leave it locked */ 654 hp[0] &= ~cpu_to_be64(HPTE_V_VALID); 655 tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index); 656 indexes[n] = j; 657 hptes[n] = hp; 658 revs[n] = rev; 659 ++n; 660 } 661 662 if (!n) 663 break; 664 665 /* Now that we've collected a batch, do the tlbies */ 666 do_tlbies(kvm, tlbrb, n, global, true); 667 668 /* Read PTE low words after tlbie to get final R/C values */ 669 for (k = 0; k < n; ++k) { 670 j = indexes[k]; 671 pte_index = args[j] & ((1ul << 56) - 1); 672 hp = hptes[k]; 673 rev = revs[k]; 674 remove_revmap_chain(kvm, pte_index, rev, 675 be64_to_cpu(hp[0]), be64_to_cpu(hp[1])); 676 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 677 args[j] |= rcbits << (56 - 5); 678 __unlock_hpte(hp, 0); 679 } 680 } 681 682 return ret; 683 } 684 685 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, 686 unsigned long pte_index, unsigned long avpn, 687 unsigned long va) 688 { 689 struct kvm *kvm = vcpu->kvm; 690 __be64 *hpte; 691 struct revmap_entry *rev; 692 unsigned long v, r, rb, mask, bits; 693 u64 pte_v, pte_r; 694 695 if (kvm_is_radix(kvm)) 696 return H_FUNCTION; 697 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 698 return H_PARAMETER; 699 700 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 701 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 702 cpu_relax(); 703 v = pte_v = be64_to_cpu(hpte[0]); 704 if (cpu_has_feature(CPU_FTR_ARCH_300)) 705 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1])); 706 if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 707 ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) { 708 __unlock_hpte(hpte, pte_v); 709 return H_NOT_FOUND; 710 } 711 712 pte_r = be64_to_cpu(hpte[1]); 713 bits = (flags << 55) & HPTE_R_PP0; 714 bits |= (flags << 48) & HPTE_R_KEY_HI; 715 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); 716 717 /* Update guest view of 2nd HPTE dword */ 718 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | 719 HPTE_R_KEY_HI | HPTE_R_KEY_LO; 720 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 721 if (rev) { 722 r = (rev->guest_rpte & ~mask) | bits; 723 rev->guest_rpte = r; 724 note_hpte_modification(kvm, rev); 725 } 726 727 /* Update HPTE */ 728 if (v & HPTE_V_VALID) { 729 /* 730 * If the page is valid, don't let it transition from 731 * readonly to writable. If it should be writable, we'll 732 * take a trap and let the page fault code sort it out. 733 */ 734 r = (pte_r & ~mask) | bits; 735 if (hpte_is_writable(r) && !hpte_is_writable(pte_r)) 736 r = hpte_make_readonly(r); 737 /* If the PTE is changing, invalidate it first */ 738 if (r != pte_r) { 739 rb = compute_tlbie_rb(v, r, pte_index); 740 hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | 741 HPTE_V_ABSENT); 742 do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); 743 /* Don't lose R/C bit updates done by hardware */ 744 r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); 745 hpte[1] = cpu_to_be64(r); 746 } 747 } 748 unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK); 749 asm volatile("ptesync" : : : "memory"); 750 if (is_mmio_hpte(v, pte_r)) 751 atomic64_inc(&kvm->arch.mmio_update); 752 753 return H_SUCCESS; 754 } 755 756 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, 757 unsigned long pte_index) 758 { 759 struct kvm *kvm = vcpu->kvm; 760 __be64 *hpte; 761 unsigned long v, r; 762 int i, n = 1; 763 struct revmap_entry *rev = NULL; 764 765 if (kvm_is_radix(kvm)) 766 return H_FUNCTION; 767 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 768 return H_PARAMETER; 769 if (flags & H_READ_4) { 770 pte_index &= ~3; 771 n = 4; 772 } 773 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 774 for (i = 0; i < n; ++i, ++pte_index) { 775 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 776 v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 777 r = be64_to_cpu(hpte[1]); 778 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 779 v = hpte_new_to_old_v(v, r); 780 r = hpte_new_to_old_r(r); 781 } 782 if (v & HPTE_V_ABSENT) { 783 v &= ~HPTE_V_ABSENT; 784 v |= HPTE_V_VALID; 785 } 786 if (v & HPTE_V_VALID) { 787 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); 788 r &= ~HPTE_GR_RESERVED; 789 } 790 vcpu->arch.regs.gpr[4 + i * 2] = v; 791 vcpu->arch.regs.gpr[5 + i * 2] = r; 792 } 793 return H_SUCCESS; 794 } 795 796 long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, 797 unsigned long pte_index) 798 { 799 struct kvm *kvm = vcpu->kvm; 800 __be64 *hpte; 801 unsigned long v, r, gr; 802 struct revmap_entry *rev; 803 unsigned long *rmap; 804 long ret = H_NOT_FOUND; 805 806 if (kvm_is_radix(kvm)) 807 return H_FUNCTION; 808 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 809 return H_PARAMETER; 810 811 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 812 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 813 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 814 cpu_relax(); 815 v = be64_to_cpu(hpte[0]); 816 r = be64_to_cpu(hpte[1]); 817 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 818 goto out; 819 820 gr = rev->guest_rpte; 821 if (rev->guest_rpte & HPTE_R_R) { 822 rev->guest_rpte &= ~HPTE_R_R; 823 note_hpte_modification(kvm, rev); 824 } 825 if (v & HPTE_V_VALID) { 826 gr |= r & (HPTE_R_R | HPTE_R_C); 827 if (r & HPTE_R_R) { 828 kvmppc_clear_ref_hpte(kvm, hpte, pte_index); 829 rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL); 830 if (rmap) { 831 lock_rmap(rmap); 832 *rmap |= KVMPPC_RMAP_REFERENCED; 833 unlock_rmap(rmap); 834 } 835 } 836 } 837 vcpu->arch.regs.gpr[4] = gr; 838 ret = H_SUCCESS; 839 out: 840 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 841 return ret; 842 } 843 844 long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, 845 unsigned long pte_index) 846 { 847 struct kvm *kvm = vcpu->kvm; 848 __be64 *hpte; 849 unsigned long v, r, gr; 850 struct revmap_entry *rev; 851 long ret = H_NOT_FOUND; 852 853 if (kvm_is_radix(kvm)) 854 return H_FUNCTION; 855 if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) 856 return H_PARAMETER; 857 858 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]); 859 hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4)); 860 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 861 cpu_relax(); 862 v = be64_to_cpu(hpte[0]); 863 r = be64_to_cpu(hpte[1]); 864 if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) 865 goto out; 866 867 gr = rev->guest_rpte; 868 if (gr & HPTE_R_C) { 869 rev->guest_rpte &= ~HPTE_R_C; 870 note_hpte_modification(kvm, rev); 871 } 872 if (v & HPTE_V_VALID) { 873 /* need to make it temporarily absent so C is stable */ 874 hpte[0] |= cpu_to_be64(HPTE_V_ABSENT); 875 kvmppc_invalidate_hpte(kvm, hpte, pte_index); 876 r = be64_to_cpu(hpte[1]); 877 gr |= r & (HPTE_R_R | HPTE_R_C); 878 if (r & HPTE_R_C) { 879 hpte[1] = cpu_to_be64(r & ~HPTE_R_C); 880 eieio(); 881 kvmppc_set_dirty_from_hpte(kvm, v, gr); 882 } 883 } 884 vcpu->arch.regs.gpr[4] = gr; 885 ret = H_SUCCESS; 886 out: 887 unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); 888 return ret; 889 } 890 891 static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, 892 int writing, unsigned long *hpa, 893 struct kvm_memory_slot **memslot_p) 894 { 895 struct kvm *kvm = vcpu->kvm; 896 struct kvm_memory_slot *memslot; 897 unsigned long gfn, hva, pa, psize = PAGE_SHIFT; 898 unsigned int shift; 899 pte_t *ptep, pte; 900 901 /* Find the memslot for this address */ 902 gfn = gpa >> PAGE_SHIFT; 903 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); 904 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 905 return H_PARAMETER; 906 907 /* Translate to host virtual address */ 908 hva = __gfn_to_hva_memslot(memslot, gfn); 909 910 /* Try to find the host pte for that virtual address */ 911 ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); 912 if (!ptep) 913 return H_TOO_HARD; 914 pte = kvmppc_read_update_linux_pte(ptep, writing); 915 if (!pte_present(pte)) 916 return H_TOO_HARD; 917 918 /* Convert to a physical address */ 919 if (shift) 920 psize = 1UL << shift; 921 pa = pte_pfn(pte) << PAGE_SHIFT; 922 pa |= hva & (psize - 1); 923 pa |= gpa & ~PAGE_MASK; 924 925 if (hpa) 926 *hpa = pa; 927 if (memslot_p) 928 *memslot_p = memslot; 929 930 return H_SUCCESS; 931 } 932 933 static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, 934 unsigned long dest) 935 { 936 struct kvm_memory_slot *memslot; 937 struct kvm *kvm = vcpu->kvm; 938 unsigned long pa, mmu_seq; 939 long ret = H_SUCCESS; 940 int i; 941 942 /* Used later to detect if we might have been invalidated */ 943 mmu_seq = kvm->mmu_notifier_seq; 944 smp_rmb(); 945 946 ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot); 947 if (ret != H_SUCCESS) 948 return ret; 949 950 /* Check if we've been invalidated */ 951 raw_spin_lock(&kvm->mmu_lock.rlock); 952 if (mmu_notifier_retry(kvm, mmu_seq)) { 953 ret = H_TOO_HARD; 954 goto out_unlock; 955 } 956 957 /* Zero the page */ 958 for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) 959 dcbz((void *)pa); 960 kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 961 962 out_unlock: 963 raw_spin_unlock(&kvm->mmu_lock.rlock); 964 return ret; 965 } 966 967 static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, 968 unsigned long dest, unsigned long src) 969 { 970 unsigned long dest_pa, src_pa, mmu_seq; 971 struct kvm_memory_slot *dest_memslot; 972 struct kvm *kvm = vcpu->kvm; 973 long ret = H_SUCCESS; 974 975 /* Used later to detect if we might have been invalidated */ 976 mmu_seq = kvm->mmu_notifier_seq; 977 smp_rmb(); 978 979 ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot); 980 if (ret != H_SUCCESS) 981 return ret; 982 ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL); 983 if (ret != H_SUCCESS) 984 return ret; 985 986 /* Check if we've been invalidated */ 987 raw_spin_lock(&kvm->mmu_lock.rlock); 988 if (mmu_notifier_retry(kvm, mmu_seq)) { 989 ret = H_TOO_HARD; 990 goto out_unlock; 991 } 992 993 /* Copy the page */ 994 memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); 995 996 kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); 997 998 out_unlock: 999 raw_spin_unlock(&kvm->mmu_lock.rlock); 1000 return ret; 1001 } 1002 1003 long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags, 1004 unsigned long dest, unsigned long src) 1005 { 1006 struct kvm *kvm = vcpu->kvm; 1007 u64 pg_mask = SZ_4K - 1; /* 4K page size */ 1008 long ret = H_SUCCESS; 1009 1010 /* Don't handle radix mode here, go up to the virtual mode handler */ 1011 if (kvm_is_radix(kvm)) 1012 return H_TOO_HARD; 1013 1014 /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */ 1015 if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE | 1016 H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED)) 1017 return H_PARAMETER; 1018 1019 /* dest (and src if copy_page flag set) must be page aligned */ 1020 if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask))) 1021 return H_PARAMETER; 1022 1023 /* zero and/or copy the page as determined by the flags */ 1024 if (flags & H_COPY_PAGE) 1025 ret = kvmppc_do_h_page_init_copy(vcpu, dest, src); 1026 else if (flags & H_ZERO_PAGE) 1027 ret = kvmppc_do_h_page_init_zero(vcpu, dest); 1028 1029 /* We can ignore the other flags */ 1030 1031 return ret; 1032 } 1033 1034 void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, 1035 unsigned long pte_index) 1036 { 1037 unsigned long rb; 1038 u64 hp0, hp1; 1039 1040 hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); 1041 hp0 = be64_to_cpu(hptep[0]); 1042 hp1 = be64_to_cpu(hptep[1]); 1043 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1044 hp0 = hpte_new_to_old_v(hp0, hp1); 1045 hp1 = hpte_new_to_old_r(hp1); 1046 } 1047 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1048 do_tlbies(kvm, &rb, 1, 1, true); 1049 } 1050 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); 1051 1052 void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, 1053 unsigned long pte_index) 1054 { 1055 unsigned long rb; 1056 unsigned char rbyte; 1057 u64 hp0, hp1; 1058 1059 hp0 = be64_to_cpu(hptep[0]); 1060 hp1 = be64_to_cpu(hptep[1]); 1061 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1062 hp0 = hpte_new_to_old_v(hp0, hp1); 1063 hp1 = hpte_new_to_old_r(hp1); 1064 } 1065 rb = compute_tlbie_rb(hp0, hp1, pte_index); 1066 rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; 1067 /* modify only the second-last byte, which contains the ref bit */ 1068 *((char *)hptep + 14) = rbyte; 1069 do_tlbies(kvm, &rb, 1, 1, false); 1070 } 1071 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); 1072 1073 static int slb_base_page_shift[4] = { 1074 24, /* 16M */ 1075 16, /* 64k */ 1076 34, /* 16G */ 1077 20, /* 1M, unsupported */ 1078 }; 1079 1080 static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu, 1081 unsigned long eaddr, unsigned long slb_v, long mmio_update) 1082 { 1083 struct mmio_hpte_cache_entry *entry = NULL; 1084 unsigned int pshift; 1085 unsigned int i; 1086 1087 for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) { 1088 entry = &vcpu->arch.mmio_cache.entry[i]; 1089 if (entry->mmio_update == mmio_update) { 1090 pshift = entry->slb_base_pshift; 1091 if ((entry->eaddr >> pshift) == (eaddr >> pshift) && 1092 entry->slb_v == slb_v) 1093 return entry; 1094 } 1095 } 1096 return NULL; 1097 } 1098 1099 static struct mmio_hpte_cache_entry * 1100 next_mmio_cache_entry(struct kvm_vcpu *vcpu) 1101 { 1102 unsigned int index = vcpu->arch.mmio_cache.index; 1103 1104 vcpu->arch.mmio_cache.index++; 1105 if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE) 1106 vcpu->arch.mmio_cache.index = 0; 1107 1108 return &vcpu->arch.mmio_cache.entry[index]; 1109 } 1110 1111 /* When called from virtmode, this func should be protected by 1112 * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK 1113 * can trigger deadlock issue. 1114 */ 1115 long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, 1116 unsigned long valid) 1117 { 1118 unsigned int i; 1119 unsigned int pshift; 1120 unsigned long somask; 1121 unsigned long vsid, hash; 1122 unsigned long avpn; 1123 __be64 *hpte; 1124 unsigned long mask, val; 1125 unsigned long v, r, orig_v; 1126 1127 /* Get page shift, work out hash and AVPN etc. */ 1128 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; 1129 val = 0; 1130 pshift = 12; 1131 if (slb_v & SLB_VSID_L) { 1132 mask |= HPTE_V_LARGE; 1133 val |= HPTE_V_LARGE; 1134 pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4]; 1135 } 1136 if (slb_v & SLB_VSID_B_1T) { 1137 somask = (1UL << 40) - 1; 1138 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T; 1139 vsid ^= vsid << 25; 1140 } else { 1141 somask = (1UL << 28) - 1; 1142 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; 1143 } 1144 hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt); 1145 avpn = slb_v & ~(somask >> 16); /* also includes B */ 1146 avpn |= (eaddr & somask) >> 16; 1147 1148 if (pshift >= 24) 1149 avpn &= ~((1UL << (pshift - 16)) - 1); 1150 else 1151 avpn &= ~0x7fUL; 1152 val |= avpn; 1153 1154 for (;;) { 1155 hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7)); 1156 1157 for (i = 0; i < 16; i += 2) { 1158 /* Read the PTE racily */ 1159 v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1160 if (cpu_has_feature(CPU_FTR_ARCH_300)) 1161 v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1])); 1162 1163 /* Check valid/absent, hash, segment size and AVPN */ 1164 if (!(v & valid) || (v & mask) != val) 1165 continue; 1166 1167 /* Lock the PTE and read it under the lock */ 1168 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) 1169 cpu_relax(); 1170 v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; 1171 r = be64_to_cpu(hpte[i+1]); 1172 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1173 v = hpte_new_to_old_v(v, r); 1174 r = hpte_new_to_old_r(r); 1175 } 1176 1177 /* 1178 * Check the HPTE again, including base page size 1179 */ 1180 if ((v & valid) && (v & mask) == val && 1181 kvmppc_hpte_base_page_shift(v, r) == pshift) 1182 /* Return with the HPTE still locked */ 1183 return (hash << 3) + (i >> 1); 1184 1185 __unlock_hpte(&hpte[i], orig_v); 1186 } 1187 1188 if (val & HPTE_V_SECONDARY) 1189 break; 1190 val |= HPTE_V_SECONDARY; 1191 hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt); 1192 } 1193 return -1; 1194 } 1195 EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); 1196 1197 /* 1198 * Called in real mode to check whether an HPTE not found fault 1199 * is due to accessing a paged-out page or an emulated MMIO page, 1200 * or if a protection fault is due to accessing a page that the 1201 * guest wanted read/write access to but which we made read-only. 1202 * Returns a possibly modified status (DSISR) value if not 1203 * (i.e. pass the interrupt to the guest), 1204 * -1 to pass the fault up to host kernel mode code, -2 to do that 1205 * and also load the instruction word (for MMIO emulation), 1206 * or 0 if we should make the guest retry the access. 1207 */ 1208 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, 1209 unsigned long slb_v, unsigned int status, bool data) 1210 { 1211 struct kvm *kvm = vcpu->kvm; 1212 long int index; 1213 unsigned long v, r, gr, orig_v; 1214 __be64 *hpte; 1215 unsigned long valid; 1216 struct revmap_entry *rev; 1217 unsigned long pp, key; 1218 struct mmio_hpte_cache_entry *cache_entry = NULL; 1219 long mmio_update = 0; 1220 1221 /* For protection fault, expect to find a valid HPTE */ 1222 valid = HPTE_V_VALID; 1223 if (status & DSISR_NOHPTE) { 1224 valid |= HPTE_V_ABSENT; 1225 mmio_update = atomic64_read(&kvm->arch.mmio_update); 1226 cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update); 1227 } 1228 if (cache_entry) { 1229 index = cache_entry->pte_index; 1230 v = cache_entry->hpte_v; 1231 r = cache_entry->hpte_r; 1232 gr = cache_entry->rpte; 1233 } else { 1234 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); 1235 if (index < 0) { 1236 if (status & DSISR_NOHPTE) 1237 return status; /* there really was no HPTE */ 1238 return 0; /* for prot fault, HPTE disappeared */ 1239 } 1240 hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4)); 1241 v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; 1242 r = be64_to_cpu(hpte[1]); 1243 if (cpu_has_feature(CPU_FTR_ARCH_300)) { 1244 v = hpte_new_to_old_v(v, r); 1245 r = hpte_new_to_old_r(r); 1246 } 1247 rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]); 1248 gr = rev->guest_rpte; 1249 1250 unlock_hpte(hpte, orig_v); 1251 } 1252 1253 /* For not found, if the HPTE is valid by now, retry the instruction */ 1254 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) 1255 return 0; 1256 1257 /* Check access permissions to the page */ 1258 pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 1259 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 1260 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ 1261 if (!data) { 1262 if (gr & (HPTE_R_N | HPTE_R_G)) 1263 return status | SRR1_ISI_N_OR_G; 1264 if (!hpte_read_permission(pp, slb_v & key)) 1265 return status | SRR1_ISI_PROT; 1266 } else if (status & DSISR_ISSTORE) { 1267 /* check write permission */ 1268 if (!hpte_write_permission(pp, slb_v & key)) 1269 return status | DSISR_PROTFAULT; 1270 } else { 1271 if (!hpte_read_permission(pp, slb_v & key)) 1272 return status | DSISR_PROTFAULT; 1273 } 1274 1275 /* Check storage key, if applicable */ 1276 if (data && (vcpu->arch.shregs.msr & MSR_DR)) { 1277 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); 1278 if (status & DSISR_ISSTORE) 1279 perm >>= 1; 1280 if (perm & 1) 1281 return status | DSISR_KEYFAULT; 1282 } 1283 1284 /* Save HPTE info for virtual-mode handler */ 1285 vcpu->arch.pgfault_addr = addr; 1286 vcpu->arch.pgfault_index = index; 1287 vcpu->arch.pgfault_hpte[0] = v; 1288 vcpu->arch.pgfault_hpte[1] = r; 1289 vcpu->arch.pgfault_cache = cache_entry; 1290 1291 /* Check the storage key to see if it is possibly emulated MMIO */ 1292 if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 1293 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) { 1294 if (!cache_entry) { 1295 unsigned int pshift = 12; 1296 unsigned int pshift_index; 1297 1298 if (slb_v & SLB_VSID_L) { 1299 pshift_index = ((slb_v & SLB_VSID_LP) >> 4); 1300 pshift = slb_base_page_shift[pshift_index]; 1301 } 1302 cache_entry = next_mmio_cache_entry(vcpu); 1303 cache_entry->eaddr = addr; 1304 cache_entry->slb_base_pshift = pshift; 1305 cache_entry->pte_index = index; 1306 cache_entry->hpte_v = v; 1307 cache_entry->hpte_r = r; 1308 cache_entry->rpte = gr; 1309 cache_entry->slb_v = slb_v; 1310 cache_entry->mmio_update = mmio_update; 1311 } 1312 if (data && (vcpu->arch.shregs.msr & MSR_IR)) 1313 return -2; /* MMIO emulation - load instr word */ 1314 } 1315 1316 return -1; /* send fault up to host kernel mode */ 1317 } 1318