1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corporation, 2018 4 * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com> 5 * Paul Mackerras <paulus@ozlabs.org> 6 * 7 * Description: KVM functions specific to running nested KVM-HV guests 8 * on Book3S processors (specifically POWER9 and later). 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/kvm_host.h> 13 #include <linux/llist.h> 14 15 #include <asm/kvm_ppc.h> 16 #include <asm/kvm_book3s.h> 17 #include <asm/mmu.h> 18 #include <asm/pgtable.h> 19 #include <asm/pgalloc.h> 20 #include <asm/pte-walk.h> 21 #include <asm/reg.h> 22 23 static struct patb_entry *pseries_partition_tb; 24 25 static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp); 26 static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free); 27 28 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) 29 { 30 struct kvmppc_vcore *vc = vcpu->arch.vcore; 31 32 hr->pcr = vc->pcr; 33 hr->dpdes = vc->dpdes; 34 hr->hfscr = vcpu->arch.hfscr; 35 hr->tb_offset = vc->tb_offset; 36 hr->dawr0 = vcpu->arch.dawr; 37 hr->dawrx0 = vcpu->arch.dawrx; 38 hr->ciabr = vcpu->arch.ciabr; 39 hr->purr = vcpu->arch.purr; 40 hr->spurr = vcpu->arch.spurr; 41 hr->ic = vcpu->arch.ic; 42 hr->vtb = vc->vtb; 43 hr->srr0 = vcpu->arch.shregs.srr0; 44 hr->srr1 = vcpu->arch.shregs.srr1; 45 hr->sprg[0] = vcpu->arch.shregs.sprg0; 46 hr->sprg[1] = vcpu->arch.shregs.sprg1; 47 hr->sprg[2] = vcpu->arch.shregs.sprg2; 48 hr->sprg[3] = vcpu->arch.shregs.sprg3; 49 hr->pidr = vcpu->arch.pid; 50 hr->cfar = vcpu->arch.cfar; 51 hr->ppr = vcpu->arch.ppr; 52 } 53 54 static void byteswap_pt_regs(struct pt_regs *regs) 55 { 56 unsigned long *addr = (unsigned long *) regs; 57 58 for (; addr < ((unsigned long *) (regs + 1)); addr++) 59 *addr = swab64(*addr); 60 } 61 62 static void byteswap_hv_regs(struct hv_guest_state *hr) 63 { 64 hr->version = swab64(hr->version); 65 hr->lpid = swab32(hr->lpid); 66 hr->vcpu_token = swab32(hr->vcpu_token); 67 hr->lpcr = swab64(hr->lpcr); 68 hr->pcr = swab64(hr->pcr); 69 hr->amor = swab64(hr->amor); 70 hr->dpdes = swab64(hr->dpdes); 71 hr->hfscr = swab64(hr->hfscr); 72 hr->tb_offset = swab64(hr->tb_offset); 73 hr->dawr0 = swab64(hr->dawr0); 74 hr->dawrx0 = swab64(hr->dawrx0); 75 hr->ciabr = swab64(hr->ciabr); 76 hr->hdec_expiry = swab64(hr->hdec_expiry); 77 hr->purr = swab64(hr->purr); 78 hr->spurr = swab64(hr->spurr); 79 hr->ic = swab64(hr->ic); 80 hr->vtb = swab64(hr->vtb); 81 hr->hdar = swab64(hr->hdar); 82 hr->hdsisr = swab64(hr->hdsisr); 83 hr->heir = swab64(hr->heir); 84 hr->asdr = swab64(hr->asdr); 85 hr->srr0 = swab64(hr->srr0); 86 hr->srr1 = swab64(hr->srr1); 87 hr->sprg[0] = swab64(hr->sprg[0]); 88 hr->sprg[1] = swab64(hr->sprg[1]); 89 hr->sprg[2] = swab64(hr->sprg[2]); 90 hr->sprg[3] = swab64(hr->sprg[3]); 91 hr->pidr = swab64(hr->pidr); 92 hr->cfar = swab64(hr->cfar); 93 hr->ppr = swab64(hr->ppr); 94 } 95 96 static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, 97 struct hv_guest_state *hr) 98 { 99 struct kvmppc_vcore *vc = vcpu->arch.vcore; 100 101 hr->dpdes = vc->dpdes; 102 hr->hfscr = vcpu->arch.hfscr; 103 hr->purr = vcpu->arch.purr; 104 hr->spurr = vcpu->arch.spurr; 105 hr->ic = vcpu->arch.ic; 106 hr->vtb = vc->vtb; 107 hr->srr0 = vcpu->arch.shregs.srr0; 108 hr->srr1 = vcpu->arch.shregs.srr1; 109 hr->sprg[0] = vcpu->arch.shregs.sprg0; 110 hr->sprg[1] = vcpu->arch.shregs.sprg1; 111 hr->sprg[2] = vcpu->arch.shregs.sprg2; 112 hr->sprg[3] = vcpu->arch.shregs.sprg3; 113 hr->pidr = vcpu->arch.pid; 114 hr->cfar = vcpu->arch.cfar; 115 hr->ppr = vcpu->arch.ppr; 116 switch (trap) { 117 case BOOK3S_INTERRUPT_H_DATA_STORAGE: 118 hr->hdar = vcpu->arch.fault_dar; 119 hr->hdsisr = vcpu->arch.fault_dsisr; 120 hr->asdr = vcpu->arch.fault_gpa; 121 break; 122 case BOOK3S_INTERRUPT_H_INST_STORAGE: 123 hr->asdr = vcpu->arch.fault_gpa; 124 break; 125 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 126 hr->heir = vcpu->arch.emul_inst; 127 break; 128 } 129 } 130 131 static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) 132 { 133 /* 134 * Don't let L1 enable features for L2 which we've disabled for L1, 135 * but preserve the interrupt cause field. 136 */ 137 hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr); 138 139 /* Don't let data address watchpoint match in hypervisor state */ 140 hr->dawrx0 &= ~DAWRX_HYP; 141 142 /* Don't let completed instruction address breakpt match in HV state */ 143 if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) 144 hr->ciabr &= ~CIABR_PRIV; 145 } 146 147 static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) 148 { 149 struct kvmppc_vcore *vc = vcpu->arch.vcore; 150 151 vc->pcr = hr->pcr; 152 vc->dpdes = hr->dpdes; 153 vcpu->arch.hfscr = hr->hfscr; 154 vcpu->arch.dawr = hr->dawr0; 155 vcpu->arch.dawrx = hr->dawrx0; 156 vcpu->arch.ciabr = hr->ciabr; 157 vcpu->arch.purr = hr->purr; 158 vcpu->arch.spurr = hr->spurr; 159 vcpu->arch.ic = hr->ic; 160 vc->vtb = hr->vtb; 161 vcpu->arch.shregs.srr0 = hr->srr0; 162 vcpu->arch.shregs.srr1 = hr->srr1; 163 vcpu->arch.shregs.sprg0 = hr->sprg[0]; 164 vcpu->arch.shregs.sprg1 = hr->sprg[1]; 165 vcpu->arch.shregs.sprg2 = hr->sprg[2]; 166 vcpu->arch.shregs.sprg3 = hr->sprg[3]; 167 vcpu->arch.pid = hr->pidr; 168 vcpu->arch.cfar = hr->cfar; 169 vcpu->arch.ppr = hr->ppr; 170 } 171 172 void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, 173 struct hv_guest_state *hr) 174 { 175 struct kvmppc_vcore *vc = vcpu->arch.vcore; 176 177 vc->dpdes = hr->dpdes; 178 vcpu->arch.hfscr = hr->hfscr; 179 vcpu->arch.purr = hr->purr; 180 vcpu->arch.spurr = hr->spurr; 181 vcpu->arch.ic = hr->ic; 182 vc->vtb = hr->vtb; 183 vcpu->arch.fault_dar = hr->hdar; 184 vcpu->arch.fault_dsisr = hr->hdsisr; 185 vcpu->arch.fault_gpa = hr->asdr; 186 vcpu->arch.emul_inst = hr->heir; 187 vcpu->arch.shregs.srr0 = hr->srr0; 188 vcpu->arch.shregs.srr1 = hr->srr1; 189 vcpu->arch.shregs.sprg0 = hr->sprg[0]; 190 vcpu->arch.shregs.sprg1 = hr->sprg[1]; 191 vcpu->arch.shregs.sprg2 = hr->sprg[2]; 192 vcpu->arch.shregs.sprg3 = hr->sprg[3]; 193 vcpu->arch.pid = hr->pidr; 194 vcpu->arch.cfar = hr->cfar; 195 vcpu->arch.ppr = hr->ppr; 196 } 197 198 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) 199 { 200 long int err, r; 201 struct kvm_nested_guest *l2; 202 struct pt_regs l2_regs, saved_l1_regs; 203 struct hv_guest_state l2_hv, saved_l1_hv; 204 struct kvmppc_vcore *vc = vcpu->arch.vcore; 205 u64 hv_ptr, regs_ptr; 206 u64 hdec_exp; 207 s64 delta_purr, delta_spurr, delta_ic, delta_vtb; 208 u64 mask; 209 unsigned long lpcr; 210 211 if (vcpu->kvm->arch.l1_ptcr == 0) 212 return H_NOT_AVAILABLE; 213 214 /* copy parameters in */ 215 hv_ptr = kvmppc_get_gpr(vcpu, 4); 216 err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv, 217 sizeof(struct hv_guest_state)); 218 if (err) 219 return H_PARAMETER; 220 if (kvmppc_need_byteswap(vcpu)) 221 byteswap_hv_regs(&l2_hv); 222 if (l2_hv.version != HV_GUEST_STATE_VERSION) 223 return H_P2; 224 225 regs_ptr = kvmppc_get_gpr(vcpu, 5); 226 err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, 227 sizeof(struct pt_regs)); 228 if (err) 229 return H_PARAMETER; 230 if (kvmppc_need_byteswap(vcpu)) 231 byteswap_pt_regs(&l2_regs); 232 if (l2_hv.vcpu_token >= NR_CPUS) 233 return H_PARAMETER; 234 235 /* translate lpid */ 236 l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true); 237 if (!l2) 238 return H_PARAMETER; 239 if (!l2->l1_gr_to_hr) { 240 mutex_lock(&l2->tlb_lock); 241 kvmhv_update_ptbl_cache(l2); 242 mutex_unlock(&l2->tlb_lock); 243 } 244 245 /* save l1 values of things */ 246 vcpu->arch.regs.msr = vcpu->arch.shregs.msr; 247 saved_l1_regs = vcpu->arch.regs; 248 kvmhv_save_hv_regs(vcpu, &saved_l1_hv); 249 250 /* convert TB values/offsets to host (L0) values */ 251 hdec_exp = l2_hv.hdec_expiry - vc->tb_offset; 252 vc->tb_offset += l2_hv.tb_offset; 253 254 /* set L1 state to L2 state */ 255 vcpu->arch.nested = l2; 256 vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; 257 vcpu->arch.regs = l2_regs; 258 vcpu->arch.shregs.msr = vcpu->arch.regs.msr; 259 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | 260 LPCR_LPES | LPCR_MER; 261 lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask); 262 sanitise_hv_regs(vcpu, &l2_hv); 263 restore_hv_regs(vcpu, &l2_hv); 264 265 vcpu->arch.ret = RESUME_GUEST; 266 vcpu->arch.trap = 0; 267 do { 268 if (mftb() >= hdec_exp) { 269 vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; 270 r = RESUME_HOST; 271 break; 272 } 273 r = kvmhv_run_single_vcpu(vcpu->arch.kvm_run, vcpu, hdec_exp, 274 lpcr); 275 } while (is_kvmppc_resume_guest(r)); 276 277 /* save L2 state for return */ 278 l2_regs = vcpu->arch.regs; 279 l2_regs.msr = vcpu->arch.shregs.msr; 280 delta_purr = vcpu->arch.purr - l2_hv.purr; 281 delta_spurr = vcpu->arch.spurr - l2_hv.spurr; 282 delta_ic = vcpu->arch.ic - l2_hv.ic; 283 delta_vtb = vc->vtb - l2_hv.vtb; 284 save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv); 285 286 /* restore L1 state */ 287 vcpu->arch.nested = NULL; 288 vcpu->arch.regs = saved_l1_regs; 289 vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK; 290 /* set L1 MSR TS field according to L2 transaction state */ 291 if (l2_regs.msr & MSR_TS_MASK) 292 vcpu->arch.shregs.msr |= MSR_TS_S; 293 vc->tb_offset = saved_l1_hv.tb_offset; 294 restore_hv_regs(vcpu, &saved_l1_hv); 295 vcpu->arch.purr += delta_purr; 296 vcpu->arch.spurr += delta_spurr; 297 vcpu->arch.ic += delta_ic; 298 vc->vtb += delta_vtb; 299 300 kvmhv_put_nested(l2); 301 302 /* copy l2_hv_state and regs back to guest */ 303 if (kvmppc_need_byteswap(vcpu)) { 304 byteswap_hv_regs(&l2_hv); 305 byteswap_pt_regs(&l2_regs); 306 } 307 err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv, 308 sizeof(struct hv_guest_state)); 309 if (err) 310 return H_AUTHORITY; 311 err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, 312 sizeof(struct pt_regs)); 313 if (err) 314 return H_AUTHORITY; 315 316 if (r == -EINTR) 317 return H_INTERRUPT; 318 319 return vcpu->arch.trap; 320 } 321 322 long kvmhv_nested_init(void) 323 { 324 long int ptb_order; 325 unsigned long ptcr; 326 long rc; 327 328 if (!kvmhv_on_pseries()) 329 return 0; 330 if (!radix_enabled()) 331 return -ENODEV; 332 333 /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */ 334 ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1; 335 if (ptb_order < 8) 336 ptb_order = 8; 337 pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order, 338 GFP_KERNEL); 339 if (!pseries_partition_tb) { 340 pr_err("kvm-hv: failed to allocated nested partition table\n"); 341 return -ENOMEM; 342 } 343 344 ptcr = __pa(pseries_partition_tb) | (ptb_order - 8); 345 rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr); 346 if (rc != H_SUCCESS) { 347 pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n", 348 rc); 349 kfree(pseries_partition_tb); 350 pseries_partition_tb = NULL; 351 return -ENODEV; 352 } 353 354 return 0; 355 } 356 357 void kvmhv_nested_exit(void) 358 { 359 /* 360 * N.B. the kvmhv_on_pseries() test is there because it enables 361 * the compiler to remove the call to plpar_hcall_norets() 362 * when CONFIG_PPC_PSERIES=n. 363 */ 364 if (kvmhv_on_pseries() && pseries_partition_tb) { 365 plpar_hcall_norets(H_SET_PARTITION_TABLE, 0); 366 kfree(pseries_partition_tb); 367 pseries_partition_tb = NULL; 368 } 369 } 370 371 static void kvmhv_flush_lpid(unsigned int lpid) 372 { 373 long rc; 374 375 if (!kvmhv_on_pseries()) { 376 radix__flush_tlb_lpid(lpid); 377 return; 378 } 379 380 rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1), 381 lpid, TLBIEL_INVAL_SET_LPID); 382 if (rc) 383 pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc); 384 } 385 386 void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1) 387 { 388 if (!kvmhv_on_pseries()) { 389 mmu_partition_table_set_entry(lpid, dw0, dw1); 390 return; 391 } 392 393 pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0); 394 pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1); 395 /* L0 will do the necessary barriers */ 396 kvmhv_flush_lpid(lpid); 397 } 398 399 static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp) 400 { 401 unsigned long dw0; 402 403 dw0 = PATB_HR | radix__get_tree_size() | 404 __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE; 405 kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table); 406 } 407 408 void kvmhv_vm_nested_init(struct kvm *kvm) 409 { 410 kvm->arch.max_nested_lpid = -1; 411 } 412 413 /* 414 * Handle the H_SET_PARTITION_TABLE hcall. 415 * r4 = guest real address of partition table + log_2(size) - 12 416 * (formatted as for the PTCR). 417 */ 418 long kvmhv_set_partition_table(struct kvm_vcpu *vcpu) 419 { 420 struct kvm *kvm = vcpu->kvm; 421 unsigned long ptcr = kvmppc_get_gpr(vcpu, 4); 422 int srcu_idx; 423 long ret = H_SUCCESS; 424 425 srcu_idx = srcu_read_lock(&kvm->srcu); 426 /* 427 * Limit the partition table to 4096 entries (because that's what 428 * hardware supports), and check the base address. 429 */ 430 if ((ptcr & PRTS_MASK) > 12 - 8 || 431 !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT)) 432 ret = H_PARAMETER; 433 srcu_read_unlock(&kvm->srcu, srcu_idx); 434 if (ret == H_SUCCESS) 435 kvm->arch.l1_ptcr = ptcr; 436 return ret; 437 } 438 439 /* 440 * Reload the partition table entry for a guest. 441 * Caller must hold gp->tlb_lock. 442 */ 443 static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) 444 { 445 int ret; 446 struct patb_entry ptbl_entry; 447 unsigned long ptbl_addr; 448 struct kvm *kvm = gp->l1_host; 449 450 ret = -EFAULT; 451 ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); 452 if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) 453 ret = kvm_read_guest(kvm, ptbl_addr, 454 &ptbl_entry, sizeof(ptbl_entry)); 455 if (ret) { 456 gp->l1_gr_to_hr = 0; 457 gp->process_table = 0; 458 } else { 459 gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0); 460 gp->process_table = be64_to_cpu(ptbl_entry.patb1); 461 } 462 kvmhv_set_nested_ptbl(gp); 463 } 464 465 struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid) 466 { 467 struct kvm_nested_guest *gp; 468 long shadow_lpid; 469 470 gp = kzalloc(sizeof(*gp), GFP_KERNEL); 471 if (!gp) 472 return NULL; 473 gp->l1_host = kvm; 474 gp->l1_lpid = lpid; 475 mutex_init(&gp->tlb_lock); 476 gp->shadow_pgtable = pgd_alloc(kvm->mm); 477 if (!gp->shadow_pgtable) 478 goto out_free; 479 shadow_lpid = kvmppc_alloc_lpid(); 480 if (shadow_lpid < 0) 481 goto out_free2; 482 gp->shadow_lpid = shadow_lpid; 483 484 memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu)); 485 486 return gp; 487 488 out_free2: 489 pgd_free(kvm->mm, gp->shadow_pgtable); 490 out_free: 491 kfree(gp); 492 return NULL; 493 } 494 495 /* 496 * Free up any resources allocated for a nested guest. 497 */ 498 static void kvmhv_release_nested(struct kvm_nested_guest *gp) 499 { 500 struct kvm *kvm = gp->l1_host; 501 502 if (gp->shadow_pgtable) { 503 /* 504 * No vcpu is using this struct and no call to 505 * kvmhv_get_nested can find this struct, 506 * so we don't need to hold kvm->mmu_lock. 507 */ 508 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, 509 gp->shadow_lpid); 510 pgd_free(kvm->mm, gp->shadow_pgtable); 511 } 512 kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0); 513 kvmppc_free_lpid(gp->shadow_lpid); 514 kfree(gp); 515 } 516 517 static void kvmhv_remove_nested(struct kvm_nested_guest *gp) 518 { 519 struct kvm *kvm = gp->l1_host; 520 int lpid = gp->l1_lpid; 521 long ref; 522 523 spin_lock(&kvm->mmu_lock); 524 if (gp == kvm->arch.nested_guests[lpid]) { 525 kvm->arch.nested_guests[lpid] = NULL; 526 if (lpid == kvm->arch.max_nested_lpid) { 527 while (--lpid >= 0 && !kvm->arch.nested_guests[lpid]) 528 ; 529 kvm->arch.max_nested_lpid = lpid; 530 } 531 --gp->refcnt; 532 } 533 ref = gp->refcnt; 534 spin_unlock(&kvm->mmu_lock); 535 if (ref == 0) 536 kvmhv_release_nested(gp); 537 } 538 539 /* 540 * Free up all nested resources allocated for this guest. 541 * This is called with no vcpus of the guest running, when 542 * switching the guest to HPT mode or when destroying the 543 * guest. 544 */ 545 void kvmhv_release_all_nested(struct kvm *kvm) 546 { 547 int i; 548 struct kvm_nested_guest *gp; 549 struct kvm_nested_guest *freelist = NULL; 550 struct kvm_memory_slot *memslot; 551 int srcu_idx; 552 553 spin_lock(&kvm->mmu_lock); 554 for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { 555 gp = kvm->arch.nested_guests[i]; 556 if (!gp) 557 continue; 558 kvm->arch.nested_guests[i] = NULL; 559 if (--gp->refcnt == 0) { 560 gp->next = freelist; 561 freelist = gp; 562 } 563 } 564 kvm->arch.max_nested_lpid = -1; 565 spin_unlock(&kvm->mmu_lock); 566 while ((gp = freelist) != NULL) { 567 freelist = gp->next; 568 kvmhv_release_nested(gp); 569 } 570 571 srcu_idx = srcu_read_lock(&kvm->srcu); 572 kvm_for_each_memslot(memslot, kvm_memslots(kvm)) 573 kvmhv_free_memslot_nest_rmap(memslot); 574 srcu_read_unlock(&kvm->srcu, srcu_idx); 575 } 576 577 /* caller must hold gp->tlb_lock */ 578 static void kvmhv_flush_nested(struct kvm_nested_guest *gp) 579 { 580 struct kvm *kvm = gp->l1_host; 581 582 spin_lock(&kvm->mmu_lock); 583 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid); 584 spin_unlock(&kvm->mmu_lock); 585 kvmhv_flush_lpid(gp->shadow_lpid); 586 kvmhv_update_ptbl_cache(gp); 587 if (gp->l1_gr_to_hr == 0) 588 kvmhv_remove_nested(gp); 589 } 590 591 struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid, 592 bool create) 593 { 594 struct kvm_nested_guest *gp, *newgp; 595 596 if (l1_lpid >= KVM_MAX_NESTED_GUESTS || 597 l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) 598 return NULL; 599 600 spin_lock(&kvm->mmu_lock); 601 gp = kvm->arch.nested_guests[l1_lpid]; 602 if (gp) 603 ++gp->refcnt; 604 spin_unlock(&kvm->mmu_lock); 605 606 if (gp || !create) 607 return gp; 608 609 newgp = kvmhv_alloc_nested(kvm, l1_lpid); 610 if (!newgp) 611 return NULL; 612 spin_lock(&kvm->mmu_lock); 613 if (kvm->arch.nested_guests[l1_lpid]) { 614 /* someone else beat us to it */ 615 gp = kvm->arch.nested_guests[l1_lpid]; 616 } else { 617 kvm->arch.nested_guests[l1_lpid] = newgp; 618 ++newgp->refcnt; 619 gp = newgp; 620 newgp = NULL; 621 if (l1_lpid > kvm->arch.max_nested_lpid) 622 kvm->arch.max_nested_lpid = l1_lpid; 623 } 624 ++gp->refcnt; 625 spin_unlock(&kvm->mmu_lock); 626 627 if (newgp) 628 kvmhv_release_nested(newgp); 629 630 return gp; 631 } 632 633 void kvmhv_put_nested(struct kvm_nested_guest *gp) 634 { 635 struct kvm *kvm = gp->l1_host; 636 long ref; 637 638 spin_lock(&kvm->mmu_lock); 639 ref = --gp->refcnt; 640 spin_unlock(&kvm->mmu_lock); 641 if (ref == 0) 642 kvmhv_release_nested(gp); 643 } 644 645 static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid) 646 { 647 if (lpid > kvm->arch.max_nested_lpid) 648 return NULL; 649 return kvm->arch.nested_guests[lpid]; 650 } 651 652 static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2) 653 { 654 return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK | 655 RMAP_NESTED_GPA_MASK)); 656 } 657 658 void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp, 659 struct rmap_nested **n_rmap) 660 { 661 struct llist_node *entry = ((struct llist_head *) rmapp)->first; 662 struct rmap_nested *cursor; 663 u64 rmap, new_rmap = (*n_rmap)->rmap; 664 665 /* Are there any existing entries? */ 666 if (!(*rmapp)) { 667 /* No -> use the rmap as a single entry */ 668 *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY; 669 return; 670 } 671 672 /* Do any entries match what we're trying to insert? */ 673 for_each_nest_rmap_safe(cursor, entry, &rmap) { 674 if (kvmhv_n_rmap_is_equal(rmap, new_rmap)) 675 return; 676 } 677 678 /* Do we need to create a list or just add the new entry? */ 679 rmap = *rmapp; 680 if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */ 681 *rmapp = 0UL; 682 llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp); 683 if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */ 684 (*n_rmap)->list.next = (struct llist_node *) rmap; 685 686 /* Set NULL so not freed by caller */ 687 *n_rmap = NULL; 688 } 689 690 static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap, 691 unsigned long hpa, unsigned long mask) 692 { 693 struct kvm_nested_guest *gp; 694 unsigned long gpa; 695 unsigned int shift, lpid; 696 pte_t *ptep; 697 698 gpa = n_rmap & RMAP_NESTED_GPA_MASK; 699 lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT; 700 gp = kvmhv_find_nested(kvm, lpid); 701 if (!gp) 702 return; 703 704 /* Find and invalidate the pte */ 705 ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); 706 /* Don't spuriously invalidate ptes if the pfn has changed */ 707 if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) 708 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid); 709 } 710 711 static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp, 712 unsigned long hpa, unsigned long mask) 713 { 714 struct llist_node *entry = llist_del_all((struct llist_head *) rmapp); 715 struct rmap_nested *cursor; 716 unsigned long rmap; 717 718 for_each_nest_rmap_safe(cursor, entry, &rmap) { 719 kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask); 720 kfree(cursor); 721 } 722 } 723 724 /* called with kvm->mmu_lock held */ 725 void kvmhv_remove_nest_rmap_range(struct kvm *kvm, 726 struct kvm_memory_slot *memslot, 727 unsigned long gpa, unsigned long hpa, 728 unsigned long nbytes) 729 { 730 unsigned long gfn, end_gfn; 731 unsigned long addr_mask; 732 733 if (!memslot) 734 return; 735 gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn; 736 end_gfn = gfn + (nbytes >> PAGE_SHIFT); 737 738 addr_mask = PTE_RPN_MASK & ~(nbytes - 1); 739 hpa &= addr_mask; 740 741 for (; gfn < end_gfn; gfn++) { 742 unsigned long *rmap = &memslot->arch.rmap[gfn]; 743 kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask); 744 } 745 } 746 747 static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free) 748 { 749 unsigned long page; 750 751 for (page = 0; page < free->npages; page++) { 752 unsigned long rmap, *rmapp = &free->arch.rmap[page]; 753 struct rmap_nested *cursor; 754 struct llist_node *entry; 755 756 entry = llist_del_all((struct llist_head *) rmapp); 757 for_each_nest_rmap_safe(cursor, entry, &rmap) 758 kfree(cursor); 759 } 760 } 761 762 static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu, 763 struct kvm_nested_guest *gp, 764 long gpa, int *shift_ret) 765 { 766 struct kvm *kvm = vcpu->kvm; 767 bool ret = false; 768 pte_t *ptep; 769 int shift; 770 771 spin_lock(&kvm->mmu_lock); 772 ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); 773 if (!shift) 774 shift = PAGE_SHIFT; 775 if (ptep && pte_present(*ptep)) { 776 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid); 777 ret = true; 778 } 779 spin_unlock(&kvm->mmu_lock); 780 781 if (shift_ret) 782 *shift_ret = shift; 783 return ret; 784 } 785 786 static inline int get_ric(unsigned int instr) 787 { 788 return (instr >> 18) & 0x3; 789 } 790 791 static inline int get_prs(unsigned int instr) 792 { 793 return (instr >> 17) & 0x1; 794 } 795 796 static inline int get_r(unsigned int instr) 797 { 798 return (instr >> 16) & 0x1; 799 } 800 801 static inline int get_lpid(unsigned long r_val) 802 { 803 return r_val & 0xffffffff; 804 } 805 806 static inline int get_is(unsigned long r_val) 807 { 808 return (r_val >> 10) & 0x3; 809 } 810 811 static inline int get_ap(unsigned long r_val) 812 { 813 return (r_val >> 5) & 0x7; 814 } 815 816 static inline long get_epn(unsigned long r_val) 817 { 818 return r_val >> 12; 819 } 820 821 static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid, 822 int ap, long epn) 823 { 824 struct kvm *kvm = vcpu->kvm; 825 struct kvm_nested_guest *gp; 826 long npages; 827 int shift, shadow_shift; 828 unsigned long addr; 829 830 shift = ap_to_shift(ap); 831 addr = epn << 12; 832 if (shift < 0) 833 /* Invalid ap encoding */ 834 return -EINVAL; 835 836 addr &= ~((1UL << shift) - 1); 837 npages = 1UL << (shift - PAGE_SHIFT); 838 839 gp = kvmhv_get_nested(kvm, lpid, false); 840 if (!gp) /* No such guest -> nothing to do */ 841 return 0; 842 mutex_lock(&gp->tlb_lock); 843 844 /* There may be more than one host page backing this single guest pte */ 845 do { 846 kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift); 847 848 npages -= 1UL << (shadow_shift - PAGE_SHIFT); 849 addr += 1UL << shadow_shift; 850 } while (npages > 0); 851 852 mutex_unlock(&gp->tlb_lock); 853 kvmhv_put_nested(gp); 854 return 0; 855 } 856 857 static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu, 858 struct kvm_nested_guest *gp, int ric) 859 { 860 struct kvm *kvm = vcpu->kvm; 861 862 mutex_lock(&gp->tlb_lock); 863 switch (ric) { 864 case 0: 865 /* Invalidate TLB */ 866 spin_lock(&kvm->mmu_lock); 867 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, 868 gp->shadow_lpid); 869 kvmhv_flush_lpid(gp->shadow_lpid); 870 spin_unlock(&kvm->mmu_lock); 871 break; 872 case 1: 873 /* 874 * Invalidate PWC 875 * We don't cache this -> nothing to do 876 */ 877 break; 878 case 2: 879 /* Invalidate TLB, PWC and caching of partition table entries */ 880 kvmhv_flush_nested(gp); 881 break; 882 default: 883 break; 884 } 885 mutex_unlock(&gp->tlb_lock); 886 } 887 888 static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric) 889 { 890 struct kvm *kvm = vcpu->kvm; 891 struct kvm_nested_guest *gp; 892 int i; 893 894 spin_lock(&kvm->mmu_lock); 895 for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { 896 gp = kvm->arch.nested_guests[i]; 897 if (gp) { 898 spin_unlock(&kvm->mmu_lock); 899 kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); 900 spin_lock(&kvm->mmu_lock); 901 } 902 } 903 spin_unlock(&kvm->mmu_lock); 904 } 905 906 static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr, 907 unsigned long rsval, unsigned long rbval) 908 { 909 struct kvm *kvm = vcpu->kvm; 910 struct kvm_nested_guest *gp; 911 int r, ric, prs, is, ap; 912 int lpid; 913 long epn; 914 int ret = 0; 915 916 ric = get_ric(instr); 917 prs = get_prs(instr); 918 r = get_r(instr); 919 lpid = get_lpid(rsval); 920 is = get_is(rbval); 921 922 /* 923 * These cases are invalid and are not handled: 924 * r != 1 -> Only radix supported 925 * prs == 1 -> Not HV privileged 926 * ric == 3 -> No cluster bombs for radix 927 * is == 1 -> Partition scoped translations not associated with pid 928 * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA 929 */ 930 if ((!r) || (prs) || (ric == 3) || (is == 1) || 931 ((!is) && (ric == 1 || ric == 2))) 932 return -EINVAL; 933 934 switch (is) { 935 case 0: 936 /* 937 * We know ric == 0 938 * Invalidate TLB for a given target address 939 */ 940 epn = get_epn(rbval); 941 ap = get_ap(rbval); 942 ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn); 943 break; 944 case 2: 945 /* Invalidate matching LPID */ 946 gp = kvmhv_get_nested(kvm, lpid, false); 947 if (gp) { 948 kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); 949 kvmhv_put_nested(gp); 950 } 951 break; 952 case 3: 953 /* Invalidate ALL LPIDs */ 954 kvmhv_emulate_tlbie_all_lpid(vcpu, ric); 955 break; 956 default: 957 ret = -EINVAL; 958 break; 959 } 960 961 return ret; 962 } 963 964 /* 965 * This handles the H_TLB_INVALIDATE hcall. 966 * Parameters are (r4) tlbie instruction code, (r5) rS contents, 967 * (r6) rB contents. 968 */ 969 long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu) 970 { 971 int ret; 972 973 ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4), 974 kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6)); 975 if (ret) 976 return H_PARAMETER; 977 return H_SUCCESS; 978 } 979 980 /* Used to convert a nested guest real address to a L1 guest real address */ 981 static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, 982 struct kvm_nested_guest *gp, 983 unsigned long n_gpa, unsigned long dsisr, 984 struct kvmppc_pte *gpte_p) 985 { 986 u64 fault_addr, flags = dsisr & DSISR_ISSTORE; 987 int ret; 988 989 ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr, 990 &fault_addr); 991 992 if (ret) { 993 /* We didn't find a pte */ 994 if (ret == -EINVAL) { 995 /* Unsupported mmu config */ 996 flags |= DSISR_UNSUPP_MMU; 997 } else if (ret == -ENOENT) { 998 /* No translation found */ 999 flags |= DSISR_NOHPTE; 1000 } else if (ret == -EFAULT) { 1001 /* Couldn't access L1 real address */ 1002 flags |= DSISR_PRTABLE_FAULT; 1003 vcpu->arch.fault_gpa = fault_addr; 1004 } else { 1005 /* Unknown error */ 1006 return ret; 1007 } 1008 goto forward_to_l1; 1009 } else { 1010 /* We found a pte -> check permissions */ 1011 if (dsisr & DSISR_ISSTORE) { 1012 /* Can we write? */ 1013 if (!gpte_p->may_write) { 1014 flags |= DSISR_PROTFAULT; 1015 goto forward_to_l1; 1016 } 1017 } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { 1018 /* Can we execute? */ 1019 if (!gpte_p->may_execute) { 1020 flags |= SRR1_ISI_N_OR_G; 1021 goto forward_to_l1; 1022 } 1023 } else { 1024 /* Can we read? */ 1025 if (!gpte_p->may_read && !gpte_p->may_write) { 1026 flags |= DSISR_PROTFAULT; 1027 goto forward_to_l1; 1028 } 1029 } 1030 } 1031 1032 return 0; 1033 1034 forward_to_l1: 1035 vcpu->arch.fault_dsisr = flags; 1036 if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { 1037 vcpu->arch.shregs.msr &= ~0x783f0000ul; 1038 vcpu->arch.shregs.msr |= flags; 1039 } 1040 return RESUME_HOST; 1041 } 1042 1043 static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu, 1044 struct kvm_nested_guest *gp, 1045 unsigned long n_gpa, 1046 struct kvmppc_pte gpte, 1047 unsigned long dsisr) 1048 { 1049 struct kvm *kvm = vcpu->kvm; 1050 bool writing = !!(dsisr & DSISR_ISSTORE); 1051 u64 pgflags; 1052 bool ret; 1053 1054 /* Are the rc bits set in the L1 partition scoped pte? */ 1055 pgflags = _PAGE_ACCESSED; 1056 if (writing) 1057 pgflags |= _PAGE_DIRTY; 1058 if (pgflags & ~gpte.rc) 1059 return RESUME_HOST; 1060 1061 spin_lock(&kvm->mmu_lock); 1062 /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */ 1063 ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing, 1064 gpte.raddr, kvm->arch.lpid); 1065 spin_unlock(&kvm->mmu_lock); 1066 if (!ret) 1067 return -EINVAL; 1068 1069 /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */ 1070 ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa, 1071 gp->shadow_lpid); 1072 if (!ret) 1073 return -EINVAL; 1074 return 0; 1075 } 1076 1077 static inline int kvmppc_radix_level_to_shift(int level) 1078 { 1079 switch (level) { 1080 case 2: 1081 return PUD_SHIFT; 1082 case 1: 1083 return PMD_SHIFT; 1084 default: 1085 return PAGE_SHIFT; 1086 } 1087 } 1088 1089 static inline int kvmppc_radix_shift_to_level(int shift) 1090 { 1091 if (shift == PUD_SHIFT) 1092 return 2; 1093 if (shift == PMD_SHIFT) 1094 return 1; 1095 if (shift == PAGE_SHIFT) 1096 return 0; 1097 WARN_ON_ONCE(1); 1098 return 0; 1099 } 1100 1101 /* called with gp->tlb_lock held */ 1102 static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, 1103 struct kvm_nested_guest *gp) 1104 { 1105 struct kvm *kvm = vcpu->kvm; 1106 struct kvm_memory_slot *memslot; 1107 struct rmap_nested *n_rmap; 1108 struct kvmppc_pte gpte; 1109 pte_t pte, *pte_p; 1110 unsigned long mmu_seq; 1111 unsigned long dsisr = vcpu->arch.fault_dsisr; 1112 unsigned long ea = vcpu->arch.fault_dar; 1113 unsigned long *rmapp; 1114 unsigned long n_gpa, gpa, gfn, perm = 0UL; 1115 unsigned int shift, l1_shift, level; 1116 bool writing = !!(dsisr & DSISR_ISSTORE); 1117 bool kvm_ro = false; 1118 long int ret; 1119 1120 if (!gp->l1_gr_to_hr) { 1121 kvmhv_update_ptbl_cache(gp); 1122 if (!gp->l1_gr_to_hr) 1123 return RESUME_HOST; 1124 } 1125 1126 /* Convert the nested guest real address into a L1 guest real address */ 1127 1128 n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL; 1129 if (!(dsisr & DSISR_PRTABLE_FAULT)) 1130 n_gpa |= ea & 0xFFF; 1131 ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte); 1132 1133 /* 1134 * If the hardware found a translation but we don't now have a usable 1135 * translation in the l1 partition-scoped tree, remove the shadow pte 1136 * and let the guest retry. 1137 */ 1138 if (ret == RESUME_HOST && 1139 (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G | 1140 DSISR_BAD_COPYPASTE))) 1141 goto inval; 1142 if (ret) 1143 return ret; 1144 1145 /* Failed to set the reference/change bits */ 1146 if (dsisr & DSISR_SET_RC) { 1147 ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr); 1148 if (ret == RESUME_HOST) 1149 return ret; 1150 if (ret) 1151 goto inval; 1152 dsisr &= ~DSISR_SET_RC; 1153 if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE | 1154 DSISR_PROTFAULT))) 1155 return RESUME_GUEST; 1156 } 1157 1158 /* 1159 * We took an HISI or HDSI while we were running a nested guest which 1160 * means we have no partition scoped translation for that. This means 1161 * we need to insert a pte for the mapping into our shadow_pgtable. 1162 */ 1163 1164 l1_shift = gpte.page_shift; 1165 if (l1_shift < PAGE_SHIFT) { 1166 /* We don't support l1 using a page size smaller than our own */ 1167 pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n", 1168 l1_shift, PAGE_SHIFT); 1169 return -EINVAL; 1170 } 1171 gpa = gpte.raddr; 1172 gfn = gpa >> PAGE_SHIFT; 1173 1174 /* 1. Get the corresponding host memslot */ 1175 1176 memslot = gfn_to_memslot(kvm, gfn); 1177 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { 1178 if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) { 1179 /* unusual error -> reflect to the guest as a DSI */ 1180 kvmppc_core_queue_data_storage(vcpu, ea, dsisr); 1181 return RESUME_GUEST; 1182 } 1183 /* passthrough of emulated MMIO case... */ 1184 pr_err("emulated MMIO passthrough?\n"); 1185 return -EINVAL; 1186 } 1187 if (memslot->flags & KVM_MEM_READONLY) { 1188 if (writing) { 1189 /* Give the guest a DSI */ 1190 kvmppc_core_queue_data_storage(vcpu, ea, 1191 DSISR_ISSTORE | DSISR_PROTFAULT); 1192 return RESUME_GUEST; 1193 } 1194 kvm_ro = true; 1195 } 1196 1197 /* 2. Find the host pte for this L1 guest real address */ 1198 1199 /* Used to check for invalidations in progress */ 1200 mmu_seq = kvm->mmu_notifier_seq; 1201 smp_rmb(); 1202 1203 /* See if can find translation in our partition scoped tables for L1 */ 1204 pte = __pte(0); 1205 spin_lock(&kvm->mmu_lock); 1206 pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); 1207 if (!shift) 1208 shift = PAGE_SHIFT; 1209 if (pte_p) 1210 pte = *pte_p; 1211 spin_unlock(&kvm->mmu_lock); 1212 1213 if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) { 1214 /* No suitable pte found -> try to insert a mapping */ 1215 ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, 1216 writing, kvm_ro, &pte, &level); 1217 if (ret == -EAGAIN) 1218 return RESUME_GUEST; 1219 else if (ret) 1220 return ret; 1221 shift = kvmppc_radix_level_to_shift(level); 1222 } 1223 1224 /* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */ 1225 1226 /* The permissions is the combination of the host and l1 guest ptes */ 1227 perm |= gpte.may_read ? 0UL : _PAGE_READ; 1228 perm |= gpte.may_write ? 0UL : _PAGE_WRITE; 1229 perm |= gpte.may_execute ? 0UL : _PAGE_EXEC; 1230 pte = __pte(pte_val(pte) & ~perm); 1231 1232 /* What size pte can we insert? */ 1233 if (shift > l1_shift) { 1234 u64 mask; 1235 unsigned int actual_shift = PAGE_SHIFT; 1236 if (PMD_SHIFT < l1_shift) 1237 actual_shift = PMD_SHIFT; 1238 mask = (1UL << shift) - (1UL << actual_shift); 1239 pte = __pte(pte_val(pte) | (gpa & mask)); 1240 shift = actual_shift; 1241 } 1242 level = kvmppc_radix_shift_to_level(shift); 1243 n_gpa &= ~((1UL << shift) - 1); 1244 1245 /* 4. Insert the pte into our shadow_pgtable */ 1246 1247 n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL); 1248 if (!n_rmap) 1249 return RESUME_GUEST; /* Let the guest try again */ 1250 n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) | 1251 (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT); 1252 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; 1253 ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level, 1254 mmu_seq, gp->shadow_lpid, rmapp, &n_rmap); 1255 if (n_rmap) 1256 kfree(n_rmap); 1257 if (ret == -EAGAIN) 1258 ret = RESUME_GUEST; /* Let the guest try again */ 1259 1260 return ret; 1261 1262 inval: 1263 kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL); 1264 return RESUME_GUEST; 1265 } 1266 1267 long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu) 1268 { 1269 struct kvm_nested_guest *gp = vcpu->arch.nested; 1270 long int ret; 1271 1272 mutex_lock(&gp->tlb_lock); 1273 ret = __kvmhv_nested_page_fault(vcpu, gp); 1274 mutex_unlock(&gp->tlb_lock); 1275 return ret; 1276 } 1277 1278 int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid) 1279 { 1280 int ret = -1; 1281 1282 spin_lock(&kvm->mmu_lock); 1283 while (++lpid <= kvm->arch.max_nested_lpid) { 1284 if (kvm->arch.nested_guests[lpid]) { 1285 ret = lpid; 1286 break; 1287 } 1288 } 1289 spin_unlock(&kvm->mmu_lock); 1290 return ret; 1291 } 1292