1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License, version 2, as 4 * published by the Free Software Foundation. 5 * 6 * Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 7 */ 8 9 #include <linux/types.h> 10 #include <linux/string.h> 11 #include <linux/kvm.h> 12 #include <linux/kvm_host.h> 13 #include <linux/hugetlb.h> 14 #include <linux/module.h> 15 16 #include <asm/tlbflush.h> 17 #include <asm/kvm_ppc.h> 18 #include <asm/kvm_book3s.h> 19 #include <asm/mmu-hash64.h> 20 #include <asm/hvcall.h> 21 #include <asm/synch.h> 22 #include <asm/ppc-opcode.h> 23 24 /* Translate address of a vmalloc'd thing to a linear map address */ 25 static void *real_vmalloc_addr(void *x) 26 { 27 unsigned long addr = (unsigned long) x; 28 pte_t *p; 29 30 p = find_linux_pte(swapper_pg_dir, addr); 31 if (!p || !pte_present(*p)) 32 return NULL; 33 /* assume we don't have huge pages in vmalloc space... */ 34 addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK); 35 return __va(addr); 36 } 37 38 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ 39 static int global_invalidates(struct kvm *kvm, unsigned long flags) 40 { 41 int global; 42 43 /* 44 * If there is only one vcore, and it's currently running, 45 * we can use tlbiel as long as we mark all other physical 46 * cores as potentially having stale TLB entries for this lpid. 47 * If we're not using MMU notifiers, we never take pages away 48 * from the guest, so we can use tlbiel if requested. 49 * Otherwise, don't use tlbiel. 50 */ 51 if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore) 52 global = 0; 53 else if (kvm->arch.using_mmu_notifiers) 54 global = 1; 55 else 56 global = !(flags & H_LOCAL); 57 58 if (!global) { 59 /* any other core might now have stale TLB entries... */ 60 smp_wmb(); 61 cpumask_setall(&kvm->arch.need_tlb_flush); 62 cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu, 63 &kvm->arch.need_tlb_flush); 64 } 65 66 return global; 67 } 68 69 /* 70 * Add this HPTE into the chain for the real page. 71 * Must be called with the chain locked; it unlocks the chain. 72 */ 73 void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, 74 unsigned long *rmap, long pte_index, int realmode) 75 { 76 struct revmap_entry *head, *tail; 77 unsigned long i; 78 79 if (*rmap & KVMPPC_RMAP_PRESENT) { 80 i = *rmap & KVMPPC_RMAP_INDEX; 81 head = &kvm->arch.revmap[i]; 82 if (realmode) 83 head = real_vmalloc_addr(head); 84 tail = &kvm->arch.revmap[head->back]; 85 if (realmode) 86 tail = real_vmalloc_addr(tail); 87 rev->forw = i; 88 rev->back = head->back; 89 tail->forw = pte_index; 90 head->back = pte_index; 91 } else { 92 rev->forw = rev->back = pte_index; 93 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | 94 pte_index | KVMPPC_RMAP_PRESENT; 95 } 96 unlock_rmap(rmap); 97 } 98 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); 99 100 /* 101 * Note modification of an HPTE; set the HPTE modified bit 102 * if anyone is interested. 103 */ 104 static inline void note_hpte_modification(struct kvm *kvm, 105 struct revmap_entry *rev) 106 { 107 if (atomic_read(&kvm->arch.hpte_mod_interest)) 108 rev->guest_rpte |= HPTE_GR_MODIFIED; 109 } 110 111 /* Remove this HPTE from the chain for a real page */ 112 static void remove_revmap_chain(struct kvm *kvm, long pte_index, 113 struct revmap_entry *rev, 114 unsigned long hpte_v, unsigned long hpte_r) 115 { 116 struct revmap_entry *next, *prev; 117 unsigned long gfn, ptel, head; 118 struct kvm_memory_slot *memslot; 119 unsigned long *rmap; 120 unsigned long rcbits; 121 122 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 123 ptel = rev->guest_rpte |= rcbits; 124 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); 125 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); 126 if (!memslot) 127 return; 128 129 rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); 130 lock_rmap(rmap); 131 132 head = *rmap & KVMPPC_RMAP_INDEX; 133 next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]); 134 prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]); 135 next->back = rev->back; 136 prev->forw = rev->forw; 137 if (head == pte_index) { 138 head = rev->forw; 139 if (head == pte_index) 140 *rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 141 else 142 *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head; 143 } 144 *rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT; 145 unlock_rmap(rmap); 146 } 147 148 static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva, 149 int writing, unsigned long *pte_sizep) 150 { 151 pte_t *ptep; 152 unsigned long ps = *pte_sizep; 153 unsigned int shift; 154 155 ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift); 156 if (!ptep) 157 return __pte(0); 158 if (shift) 159 *pte_sizep = 1ul << shift; 160 else 161 *pte_sizep = PAGE_SIZE; 162 if (ps > *pte_sizep) 163 return __pte(0); 164 if (!pte_present(*ptep)) 165 return __pte(0); 166 return kvmppc_read_update_linux_pte(ptep, writing); 167 } 168 169 static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) 170 { 171 asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); 172 hpte[0] = hpte_v; 173 } 174 175 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, 176 long pte_index, unsigned long pteh, unsigned long ptel, 177 pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) 178 { 179 unsigned long i, pa, gpa, gfn, psize; 180 unsigned long slot_fn, hva; 181 unsigned long *hpte; 182 struct revmap_entry *rev; 183 unsigned long g_ptel; 184 struct kvm_memory_slot *memslot; 185 unsigned long *physp, pte_size; 186 unsigned long is_io; 187 unsigned long *rmap; 188 pte_t pte; 189 unsigned int writing; 190 unsigned long mmu_seq; 191 unsigned long rcbits; 192 193 psize = hpte_page_size(pteh, ptel); 194 if (!psize) 195 return H_PARAMETER; 196 writing = hpte_is_writable(ptel); 197 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 198 ptel &= ~HPTE_GR_RESERVED; 199 g_ptel = ptel; 200 201 /* used later to detect if we might have been invalidated */ 202 mmu_seq = kvm->mmu_notifier_seq; 203 smp_rmb(); 204 205 /* Find the memslot (if any) for this address */ 206 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 207 gfn = gpa >> PAGE_SHIFT; 208 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); 209 pa = 0; 210 is_io = ~0ul; 211 rmap = NULL; 212 if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { 213 /* PPC970 can't do emulated MMIO */ 214 if (!cpu_has_feature(CPU_FTR_ARCH_206)) 215 return H_PARAMETER; 216 /* Emulated MMIO - mark this with key=31 */ 217 pteh |= HPTE_V_ABSENT; 218 ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO; 219 goto do_insert; 220 } 221 222 /* Check if the requested page fits entirely in the memslot. */ 223 if (!slot_is_aligned(memslot, psize)) 224 return H_PARAMETER; 225 slot_fn = gfn - memslot->base_gfn; 226 rmap = &memslot->arch.rmap[slot_fn]; 227 228 if (!kvm->arch.using_mmu_notifiers) { 229 physp = memslot->arch.slot_phys; 230 if (!physp) 231 return H_PARAMETER; 232 physp += slot_fn; 233 if (realmode) 234 physp = real_vmalloc_addr(physp); 235 pa = *physp; 236 if (!pa) 237 return H_TOO_HARD; 238 is_io = pa & (HPTE_R_I | HPTE_R_W); 239 pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK); 240 pa &= PAGE_MASK; 241 } else { 242 /* Translate to host virtual address */ 243 hva = __gfn_to_hva_memslot(memslot, gfn); 244 245 /* Look up the Linux PTE for the backing page */ 246 pte_size = psize; 247 pte = lookup_linux_pte(pgdir, hva, writing, &pte_size); 248 if (pte_present(pte)) { 249 if (writing && !pte_write(pte)) 250 /* make the actual HPTE be read-only */ 251 ptel = hpte_make_readonly(ptel); 252 is_io = hpte_cache_bits(pte_val(pte)); 253 pa = pte_pfn(pte) << PAGE_SHIFT; 254 } 255 } 256 257 if (pte_size < psize) 258 return H_PARAMETER; 259 if (pa && pte_size > psize) 260 pa |= gpa & (pte_size - 1); 261 262 ptel &= ~(HPTE_R_PP0 - psize); 263 ptel |= pa; 264 265 if (pa) 266 pteh |= HPTE_V_VALID; 267 else 268 pteh |= HPTE_V_ABSENT; 269 270 /* Check WIMG */ 271 if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) { 272 if (is_io) 273 return H_PARAMETER; 274 /* 275 * Allow guest to map emulated device memory as 276 * uncacheable, but actually make it cacheable. 277 */ 278 ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G); 279 ptel |= HPTE_R_M; 280 } 281 282 /* Find and lock the HPTEG slot to use */ 283 do_insert: 284 if (pte_index >= kvm->arch.hpt_npte) 285 return H_PARAMETER; 286 if (likely((flags & H_EXACT) == 0)) { 287 pte_index &= ~7UL; 288 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 289 for (i = 0; i < 8; ++i) { 290 if ((*hpte & HPTE_V_VALID) == 0 && 291 try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 292 HPTE_V_ABSENT)) 293 break; 294 hpte += 2; 295 } 296 if (i == 8) { 297 /* 298 * Since try_lock_hpte doesn't retry (not even stdcx. 299 * failures), it could be that there is a free slot 300 * but we transiently failed to lock it. Try again, 301 * actually locking each slot and checking it. 302 */ 303 hpte -= 16; 304 for (i = 0; i < 8; ++i) { 305 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 306 cpu_relax(); 307 if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT))) 308 break; 309 *hpte &= ~HPTE_V_HVLOCK; 310 hpte += 2; 311 } 312 if (i == 8) 313 return H_PTEG_FULL; 314 } 315 pte_index += i; 316 } else { 317 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 318 if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID | 319 HPTE_V_ABSENT)) { 320 /* Lock the slot and check again */ 321 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 322 cpu_relax(); 323 if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) { 324 *hpte &= ~HPTE_V_HVLOCK; 325 return H_PTEG_FULL; 326 } 327 } 328 } 329 330 /* Save away the guest's idea of the second HPTE dword */ 331 rev = &kvm->arch.revmap[pte_index]; 332 if (realmode) 333 rev = real_vmalloc_addr(rev); 334 if (rev) { 335 rev->guest_rpte = g_ptel; 336 note_hpte_modification(kvm, rev); 337 } 338 339 /* Link HPTE into reverse-map chain */ 340 if (pteh & HPTE_V_VALID) { 341 if (realmode) 342 rmap = real_vmalloc_addr(rmap); 343 lock_rmap(rmap); 344 /* Check for pending invalidations under the rmap chain lock */ 345 if (kvm->arch.using_mmu_notifiers && 346 mmu_notifier_retry(kvm, mmu_seq)) { 347 /* inval in progress, write a non-present HPTE */ 348 pteh |= HPTE_V_ABSENT; 349 pteh &= ~HPTE_V_VALID; 350 unlock_rmap(rmap); 351 } else { 352 kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, 353 realmode); 354 /* Only set R/C in real HPTE if already set in *rmap */ 355 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; 356 ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C); 357 } 358 } 359 360 hpte[1] = ptel; 361 362 /* Write the first HPTE dword, unlocking the HPTE and making it valid */ 363 eieio(); 364 hpte[0] = pteh; 365 asm volatile("ptesync" : : : "memory"); 366 367 *pte_idx_ret = pte_index; 368 return H_SUCCESS; 369 } 370 EXPORT_SYMBOL_GPL(kvmppc_do_h_enter); 371 372 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 373 long pte_index, unsigned long pteh, unsigned long ptel) 374 { 375 return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel, 376 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]); 377 } 378 379 #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) 380 381 static inline int try_lock_tlbie(unsigned int *lock) 382 { 383 unsigned int tmp, old; 384 unsigned int token = LOCK_TOKEN; 385 386 asm volatile("1:lwarx %1,0,%2\n" 387 " cmpwi cr0,%1,0\n" 388 " bne 2f\n" 389 " stwcx. %3,0,%2\n" 390 " bne- 1b\n" 391 " isync\n" 392 "2:" 393 : "=&r" (tmp), "=&r" (old) 394 : "r" (lock), "r" (token) 395 : "cc", "memory"); 396 return old == 0; 397 } 398 399 long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, 400 unsigned long pte_index, unsigned long avpn, 401 unsigned long *hpret) 402 { 403 unsigned long *hpte; 404 unsigned long v, r, rb; 405 struct revmap_entry *rev; 406 407 if (pte_index >= kvm->arch.hpt_npte) 408 return H_PARAMETER; 409 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 410 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 411 cpu_relax(); 412 if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 413 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) || 414 ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) { 415 hpte[0] &= ~HPTE_V_HVLOCK; 416 return H_NOT_FOUND; 417 } 418 419 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 420 v = hpte[0] & ~HPTE_V_HVLOCK; 421 if (v & HPTE_V_VALID) { 422 hpte[0] &= ~HPTE_V_VALID; 423 rb = compute_tlbie_rb(v, hpte[1], pte_index); 424 if (global_invalidates(kvm, flags)) { 425 while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) 426 cpu_relax(); 427 asm volatile("ptesync" : : : "memory"); 428 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" 429 : : "r" (rb), "r" (kvm->arch.lpid)); 430 asm volatile("ptesync" : : : "memory"); 431 kvm->arch.tlbie_lock = 0; 432 } else { 433 asm volatile("ptesync" : : : "memory"); 434 asm volatile("tlbiel %0" : : "r" (rb)); 435 asm volatile("ptesync" : : : "memory"); 436 } 437 /* Read PTE low word after tlbie to get final R/C values */ 438 remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); 439 } 440 r = rev->guest_rpte & ~HPTE_GR_RESERVED; 441 note_hpte_modification(kvm, rev); 442 unlock_hpte(hpte, 0); 443 444 hpret[0] = v; 445 hpret[1] = r; 446 return H_SUCCESS; 447 } 448 EXPORT_SYMBOL_GPL(kvmppc_do_h_remove); 449 450 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, 451 unsigned long pte_index, unsigned long avpn) 452 { 453 return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn, 454 &vcpu->arch.gpr[4]); 455 } 456 457 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) 458 { 459 struct kvm *kvm = vcpu->kvm; 460 unsigned long *args = &vcpu->arch.gpr[4]; 461 unsigned long *hp, *hptes[4], tlbrb[4]; 462 long int i, j, k, n, found, indexes[4]; 463 unsigned long flags, req, pte_index, rcbits; 464 long int local = 0; 465 long int ret = H_SUCCESS; 466 struct revmap_entry *rev, *revs[4]; 467 468 if (atomic_read(&kvm->online_vcpus) == 1) 469 local = 1; 470 for (i = 0; i < 4 && ret == H_SUCCESS; ) { 471 n = 0; 472 for (; i < 4; ++i) { 473 j = i * 2; 474 pte_index = args[j]; 475 flags = pte_index >> 56; 476 pte_index &= ((1ul << 56) - 1); 477 req = flags >> 6; 478 flags &= 3; 479 if (req == 3) { /* no more requests */ 480 i = 4; 481 break; 482 } 483 if (req != 1 || flags == 3 || 484 pte_index >= kvm->arch.hpt_npte) { 485 /* parameter error */ 486 args[j] = ((0xa0 | flags) << 56) + pte_index; 487 ret = H_PARAMETER; 488 break; 489 } 490 hp = (unsigned long *) 491 (kvm->arch.hpt_virt + (pte_index << 4)); 492 /* to avoid deadlock, don't spin except for first */ 493 if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) { 494 if (n) 495 break; 496 while (!try_lock_hpte(hp, HPTE_V_HVLOCK)) 497 cpu_relax(); 498 } 499 found = 0; 500 if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) { 501 switch (flags & 3) { 502 case 0: /* absolute */ 503 found = 1; 504 break; 505 case 1: /* andcond */ 506 if (!(hp[0] & args[j + 1])) 507 found = 1; 508 break; 509 case 2: /* AVPN */ 510 if ((hp[0] & ~0x7fUL) == args[j + 1]) 511 found = 1; 512 break; 513 } 514 } 515 if (!found) { 516 hp[0] &= ~HPTE_V_HVLOCK; 517 args[j] = ((0x90 | flags) << 56) + pte_index; 518 continue; 519 } 520 521 args[j] = ((0x80 | flags) << 56) + pte_index; 522 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 523 note_hpte_modification(kvm, rev); 524 525 if (!(hp[0] & HPTE_V_VALID)) { 526 /* insert R and C bits from PTE */ 527 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 528 args[j] |= rcbits << (56 - 5); 529 hp[0] = 0; 530 continue; 531 } 532 533 hp[0] &= ~HPTE_V_VALID; /* leave it locked */ 534 tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index); 535 indexes[n] = j; 536 hptes[n] = hp; 537 revs[n] = rev; 538 ++n; 539 } 540 541 if (!n) 542 break; 543 544 /* Now that we've collected a batch, do the tlbies */ 545 if (!local) { 546 while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) 547 cpu_relax(); 548 asm volatile("ptesync" : : : "memory"); 549 for (k = 0; k < n; ++k) 550 asm volatile(PPC_TLBIE(%1,%0) : : 551 "r" (tlbrb[k]), 552 "r" (kvm->arch.lpid)); 553 asm volatile("eieio; tlbsync; ptesync" : : : "memory"); 554 kvm->arch.tlbie_lock = 0; 555 } else { 556 asm volatile("ptesync" : : : "memory"); 557 for (k = 0; k < n; ++k) 558 asm volatile("tlbiel %0" : : "r" (tlbrb[k])); 559 asm volatile("ptesync" : : : "memory"); 560 } 561 562 /* Read PTE low words after tlbie to get final R/C values */ 563 for (k = 0; k < n; ++k) { 564 j = indexes[k]; 565 pte_index = args[j] & ((1ul << 56) - 1); 566 hp = hptes[k]; 567 rev = revs[k]; 568 remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]); 569 rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); 570 args[j] |= rcbits << (56 - 5); 571 hp[0] = 0; 572 } 573 } 574 575 return ret; 576 } 577 578 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, 579 unsigned long pte_index, unsigned long avpn, 580 unsigned long va) 581 { 582 struct kvm *kvm = vcpu->kvm; 583 unsigned long *hpte; 584 struct revmap_entry *rev; 585 unsigned long v, r, rb, mask, bits; 586 587 if (pte_index >= kvm->arch.hpt_npte) 588 return H_PARAMETER; 589 590 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 591 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) 592 cpu_relax(); 593 if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || 594 ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) { 595 hpte[0] &= ~HPTE_V_HVLOCK; 596 return H_NOT_FOUND; 597 } 598 599 v = hpte[0]; 600 bits = (flags << 55) & HPTE_R_PP0; 601 bits |= (flags << 48) & HPTE_R_KEY_HI; 602 bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); 603 604 /* Update guest view of 2nd HPTE dword */ 605 mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N | 606 HPTE_R_KEY_HI | HPTE_R_KEY_LO; 607 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 608 if (rev) { 609 r = (rev->guest_rpte & ~mask) | bits; 610 rev->guest_rpte = r; 611 note_hpte_modification(kvm, rev); 612 } 613 r = (hpte[1] & ~mask) | bits; 614 615 /* Update HPTE */ 616 if (v & HPTE_V_VALID) { 617 rb = compute_tlbie_rb(v, r, pte_index); 618 hpte[0] = v & ~HPTE_V_VALID; 619 if (global_invalidates(kvm, flags)) { 620 while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) 621 cpu_relax(); 622 asm volatile("ptesync" : : : "memory"); 623 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" 624 : : "r" (rb), "r" (kvm->arch.lpid)); 625 asm volatile("ptesync" : : : "memory"); 626 kvm->arch.tlbie_lock = 0; 627 } else { 628 asm volatile("ptesync" : : : "memory"); 629 asm volatile("tlbiel %0" : : "r" (rb)); 630 asm volatile("ptesync" : : : "memory"); 631 } 632 /* 633 * If the host has this page as readonly but the guest 634 * wants to make it read/write, reduce the permissions. 635 * Checking the host permissions involves finding the 636 * memslot and then the Linux PTE for the page. 637 */ 638 if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) { 639 unsigned long psize, gfn, hva; 640 struct kvm_memory_slot *memslot; 641 pgd_t *pgdir = vcpu->arch.pgdir; 642 pte_t pte; 643 644 psize = hpte_page_size(v, r); 645 gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; 646 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); 647 if (memslot) { 648 hva = __gfn_to_hva_memslot(memslot, gfn); 649 pte = lookup_linux_pte(pgdir, hva, 1, &psize); 650 if (pte_present(pte) && !pte_write(pte)) 651 r = hpte_make_readonly(r); 652 } 653 } 654 } 655 hpte[1] = r; 656 eieio(); 657 hpte[0] = v & ~HPTE_V_HVLOCK; 658 asm volatile("ptesync" : : : "memory"); 659 return H_SUCCESS; 660 } 661 662 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, 663 unsigned long pte_index) 664 { 665 struct kvm *kvm = vcpu->kvm; 666 unsigned long *hpte, v, r; 667 int i, n = 1; 668 struct revmap_entry *rev = NULL; 669 670 if (pte_index >= kvm->arch.hpt_npte) 671 return H_PARAMETER; 672 if (flags & H_READ_4) { 673 pte_index &= ~3; 674 n = 4; 675 } 676 rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); 677 for (i = 0; i < n; ++i, ++pte_index) { 678 hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); 679 v = hpte[0] & ~HPTE_V_HVLOCK; 680 r = hpte[1]; 681 if (v & HPTE_V_ABSENT) { 682 v &= ~HPTE_V_ABSENT; 683 v |= HPTE_V_VALID; 684 } 685 if (v & HPTE_V_VALID) { 686 r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); 687 r &= ~HPTE_GR_RESERVED; 688 } 689 vcpu->arch.gpr[4 + i * 2] = v; 690 vcpu->arch.gpr[5 + i * 2] = r; 691 } 692 return H_SUCCESS; 693 } 694 695 void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep, 696 unsigned long pte_index) 697 { 698 unsigned long rb; 699 700 hptep[0] &= ~HPTE_V_VALID; 701 rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); 702 while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) 703 cpu_relax(); 704 asm volatile("ptesync" : : : "memory"); 705 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" 706 : : "r" (rb), "r" (kvm->arch.lpid)); 707 asm volatile("ptesync" : : : "memory"); 708 kvm->arch.tlbie_lock = 0; 709 } 710 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); 711 712 void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep, 713 unsigned long pte_index) 714 { 715 unsigned long rb; 716 unsigned char rbyte; 717 718 rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index); 719 rbyte = (hptep[1] & ~HPTE_R_R) >> 8; 720 /* modify only the second-last byte, which contains the ref bit */ 721 *((char *)hptep + 14) = rbyte; 722 while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) 723 cpu_relax(); 724 asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync" 725 : : "r" (rb), "r" (kvm->arch.lpid)); 726 asm volatile("ptesync" : : : "memory"); 727 kvm->arch.tlbie_lock = 0; 728 } 729 EXPORT_SYMBOL_GPL(kvmppc_clear_ref_hpte); 730 731 static int slb_base_page_shift[4] = { 732 24, /* 16M */ 733 16, /* 64k */ 734 34, /* 16G */ 735 20, /* 1M, unsupported */ 736 }; 737 738 long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, 739 unsigned long valid) 740 { 741 unsigned int i; 742 unsigned int pshift; 743 unsigned long somask; 744 unsigned long vsid, hash; 745 unsigned long avpn; 746 unsigned long *hpte; 747 unsigned long mask, val; 748 unsigned long v, r; 749 750 /* Get page shift, work out hash and AVPN etc. */ 751 mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; 752 val = 0; 753 pshift = 12; 754 if (slb_v & SLB_VSID_L) { 755 mask |= HPTE_V_LARGE; 756 val |= HPTE_V_LARGE; 757 pshift = slb_base_page_shift[(slb_v & SLB_VSID_LP) >> 4]; 758 } 759 if (slb_v & SLB_VSID_B_1T) { 760 somask = (1UL << 40) - 1; 761 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T; 762 vsid ^= vsid << 25; 763 } else { 764 somask = (1UL << 28) - 1; 765 vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; 766 } 767 hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask; 768 avpn = slb_v & ~(somask >> 16); /* also includes B */ 769 avpn |= (eaddr & somask) >> 16; 770 771 if (pshift >= 24) 772 avpn &= ~((1UL << (pshift - 16)) - 1); 773 else 774 avpn &= ~0x7fUL; 775 val |= avpn; 776 777 for (;;) { 778 hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7)); 779 780 for (i = 0; i < 16; i += 2) { 781 /* Read the PTE racily */ 782 v = hpte[i] & ~HPTE_V_HVLOCK; 783 784 /* Check valid/absent, hash, segment size and AVPN */ 785 if (!(v & valid) || (v & mask) != val) 786 continue; 787 788 /* Lock the PTE and read it under the lock */ 789 while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) 790 cpu_relax(); 791 v = hpte[i] & ~HPTE_V_HVLOCK; 792 r = hpte[i+1]; 793 794 /* 795 * Check the HPTE again, including large page size 796 * Since we don't currently allow any MPSS (mixed 797 * page-size segment) page sizes, it is sufficient 798 * to check against the actual page size. 799 */ 800 if ((v & valid) && (v & mask) == val && 801 hpte_page_size(v, r) == (1ul << pshift)) 802 /* Return with the HPTE still locked */ 803 return (hash << 3) + (i >> 1); 804 805 /* Unlock and move on */ 806 hpte[i] = v; 807 } 808 809 if (val & HPTE_V_SECONDARY) 810 break; 811 val |= HPTE_V_SECONDARY; 812 hash = hash ^ kvm->arch.hpt_mask; 813 } 814 return -1; 815 } 816 EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte); 817 818 /* 819 * Called in real mode to check whether an HPTE not found fault 820 * is due to accessing a paged-out page or an emulated MMIO page, 821 * or if a protection fault is due to accessing a page that the 822 * guest wanted read/write access to but which we made read-only. 823 * Returns a possibly modified status (DSISR) value if not 824 * (i.e. pass the interrupt to the guest), 825 * -1 to pass the fault up to host kernel mode code, -2 to do that 826 * and also load the instruction word (for MMIO emulation), 827 * or 0 if we should make the guest retry the access. 828 */ 829 long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, 830 unsigned long slb_v, unsigned int status, bool data) 831 { 832 struct kvm *kvm = vcpu->kvm; 833 long int index; 834 unsigned long v, r, gr; 835 unsigned long *hpte; 836 unsigned long valid; 837 struct revmap_entry *rev; 838 unsigned long pp, key; 839 840 /* For protection fault, expect to find a valid HPTE */ 841 valid = HPTE_V_VALID; 842 if (status & DSISR_NOHPTE) 843 valid |= HPTE_V_ABSENT; 844 845 index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); 846 if (index < 0) { 847 if (status & DSISR_NOHPTE) 848 return status; /* there really was no HPTE */ 849 return 0; /* for prot fault, HPTE disappeared */ 850 } 851 hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); 852 v = hpte[0] & ~HPTE_V_HVLOCK; 853 r = hpte[1]; 854 rev = real_vmalloc_addr(&kvm->arch.revmap[index]); 855 gr = rev->guest_rpte; 856 857 unlock_hpte(hpte, v); 858 859 /* For not found, if the HPTE is valid by now, retry the instruction */ 860 if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) 861 return 0; 862 863 /* Check access permissions to the page */ 864 pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 865 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 866 status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ 867 if (!data) { 868 if (gr & (HPTE_R_N | HPTE_R_G)) 869 return status | SRR1_ISI_N_OR_G; 870 if (!hpte_read_permission(pp, slb_v & key)) 871 return status | SRR1_ISI_PROT; 872 } else if (status & DSISR_ISSTORE) { 873 /* check write permission */ 874 if (!hpte_write_permission(pp, slb_v & key)) 875 return status | DSISR_PROTFAULT; 876 } else { 877 if (!hpte_read_permission(pp, slb_v & key)) 878 return status | DSISR_PROTFAULT; 879 } 880 881 /* Check storage key, if applicable */ 882 if (data && (vcpu->arch.shregs.msr & MSR_DR)) { 883 unsigned int perm = hpte_get_skey_perm(gr, vcpu->arch.amr); 884 if (status & DSISR_ISSTORE) 885 perm >>= 1; 886 if (perm & 1) 887 return status | DSISR_KEYFAULT; 888 } 889 890 /* Save HPTE info for virtual-mode handler */ 891 vcpu->arch.pgfault_addr = addr; 892 vcpu->arch.pgfault_index = index; 893 vcpu->arch.pgfault_hpte[0] = v; 894 vcpu->arch.pgfault_hpte[1] = r; 895 896 /* Check the storage key to see if it is possibly emulated MMIO */ 897 if (data && (vcpu->arch.shregs.msr & MSR_IR) && 898 (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == 899 (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) 900 return -2; /* MMIO emulation - load instr word */ 901 902 return -1; /* send fault up to host kernel mode */ 903 } 904