1 /* 2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. 4 * 5 * Authors: 6 * Paul Mackerras <paulus@au1.ibm.com> 7 * Alexander Graf <agraf@suse.de> 8 * Kevin Wolf <mail@kevin-wolf.de> 9 * 10 * Description: KVM functions specific to running on Book 3S 11 * processors in hypervisor mode (specifically POWER7 and later). 12 * 13 * This file is derived from arch/powerpc/kvm/book3s.c, 14 * by Alexander Graf <agraf@suse.de>. 15 * 16 * This program is free software; you can redistribute it and/or modify 17 * it under the terms of the GNU General Public License, version 2, as 18 * published by the Free Software Foundation. 19 */ 20 21 #include <linux/kvm_host.h> 22 #include <linux/err.h> 23 #include <linux/slab.h> 24 #include <linux/preempt.h> 25 #include <linux/sched.h> 26 #include <linux/delay.h> 27 #include <linux/export.h> 28 #include <linux/fs.h> 29 #include <linux/anon_inodes.h> 30 #include <linux/cpumask.h> 31 #include <linux/spinlock.h> 32 #include <linux/page-flags.h> 33 34 #include <asm/reg.h> 35 #include <asm/cputable.h> 36 #include <asm/cacheflush.h> 37 #include <asm/tlbflush.h> 38 #include <asm/uaccess.h> 39 #include <asm/io.h> 40 #include <asm/kvm_ppc.h> 41 #include <asm/kvm_book3s.h> 42 #include <asm/mmu_context.h> 43 #include <asm/lppaca.h> 44 #include <asm/processor.h> 45 #include <asm/cputhreads.h> 46 #include <asm/page.h> 47 #include <asm/hvcall.h> 48 #include <asm/switch_to.h> 49 #include <linux/gfp.h> 50 #include <linux/vmalloc.h> 51 #include <linux/highmem.h> 52 #include <linux/hugetlb.h> 53 54 /* #define EXIT_DEBUG */ 55 /* #define EXIT_DEBUG_SIMPLE */ 56 /* #define EXIT_DEBUG_INT */ 57 58 static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 59 static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu); 60 61 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 62 { 63 local_paca->kvm_hstate.kvm_vcpu = vcpu; 64 local_paca->kvm_hstate.kvm_vcore = vcpu->arch.vcore; 65 } 66 67 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 68 { 69 } 70 71 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 72 { 73 vcpu->arch.shregs.msr = msr; 74 kvmppc_end_cede(vcpu); 75 } 76 77 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 78 { 79 vcpu->arch.pvr = pvr; 80 } 81 82 void kvmppc_dump_regs(struct kvm_vcpu *vcpu) 83 { 84 int r; 85 86 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id); 87 pr_err("pc = %.16lx msr = %.16llx trap = %x\n", 88 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap); 89 for (r = 0; r < 16; ++r) 90 pr_err("r%2d = %.16lx r%d = %.16lx\n", 91 r, kvmppc_get_gpr(vcpu, r), 92 r+16, kvmppc_get_gpr(vcpu, r+16)); 93 pr_err("ctr = %.16lx lr = %.16lx\n", 94 vcpu->arch.ctr, vcpu->arch.lr); 95 pr_err("srr0 = %.16llx srr1 = %.16llx\n", 96 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1); 97 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n", 98 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1); 99 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n", 100 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3); 101 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n", 102 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr); 103 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar); 104 pr_err("fault dar = %.16lx dsisr = %.8x\n", 105 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 106 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max); 107 for (r = 0; r < vcpu->arch.slb_max; ++r) 108 pr_err(" ESID = %.16llx VSID = %.16llx\n", 109 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); 110 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", 111 vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1, 112 vcpu->arch.last_inst); 113 } 114 115 struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) 116 { 117 int r; 118 struct kvm_vcpu *v, *ret = NULL; 119 120 mutex_lock(&kvm->lock); 121 kvm_for_each_vcpu(r, v, kvm) { 122 if (v->vcpu_id == id) { 123 ret = v; 124 break; 125 } 126 } 127 mutex_unlock(&kvm->lock); 128 return ret; 129 } 130 131 static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) 132 { 133 vpa->shared_proc = 1; 134 vpa->yield_count = 1; 135 } 136 137 static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, 138 unsigned long flags, 139 unsigned long vcpuid, unsigned long vpa) 140 { 141 struct kvm *kvm = vcpu->kvm; 142 unsigned long len, nb; 143 void *va; 144 struct kvm_vcpu *tvcpu; 145 int err = H_PARAMETER; 146 147 tvcpu = kvmppc_find_vcpu(kvm, vcpuid); 148 if (!tvcpu) 149 return H_PARAMETER; 150 151 flags >>= 63 - 18; 152 flags &= 7; 153 if (flags == 0 || flags == 4) 154 return H_PARAMETER; 155 if (flags < 4) { 156 if (vpa & 0x7f) 157 return H_PARAMETER; 158 if (flags >= 2 && !tvcpu->arch.vpa) 159 return H_RESOURCE; 160 /* registering new area; convert logical addr to real */ 161 va = kvmppc_pin_guest_page(kvm, vpa, &nb); 162 if (va == NULL) 163 return H_PARAMETER; 164 if (flags <= 1) 165 len = *(unsigned short *)(va + 4); 166 else 167 len = *(unsigned int *)(va + 4); 168 if (len > nb) 169 goto out_unpin; 170 switch (flags) { 171 case 1: /* register VPA */ 172 if (len < 640) 173 goto out_unpin; 174 if (tvcpu->arch.vpa) 175 kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa); 176 tvcpu->arch.vpa = va; 177 init_vpa(vcpu, va); 178 break; 179 case 2: /* register DTL */ 180 if (len < 48) 181 goto out_unpin; 182 len -= len % 48; 183 if (tvcpu->arch.dtl) 184 kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl); 185 tvcpu->arch.dtl = va; 186 tvcpu->arch.dtl_end = va + len; 187 break; 188 case 3: /* register SLB shadow buffer */ 189 if (len < 16) 190 goto out_unpin; 191 if (tvcpu->arch.slb_shadow) 192 kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow); 193 tvcpu->arch.slb_shadow = va; 194 break; 195 } 196 } else { 197 switch (flags) { 198 case 5: /* unregister VPA */ 199 if (tvcpu->arch.slb_shadow || tvcpu->arch.dtl) 200 return H_RESOURCE; 201 if (!tvcpu->arch.vpa) 202 break; 203 kvmppc_unpin_guest_page(kvm, tvcpu->arch.vpa); 204 tvcpu->arch.vpa = NULL; 205 break; 206 case 6: /* unregister DTL */ 207 if (!tvcpu->arch.dtl) 208 break; 209 kvmppc_unpin_guest_page(kvm, tvcpu->arch.dtl); 210 tvcpu->arch.dtl = NULL; 211 break; 212 case 7: /* unregister SLB shadow buffer */ 213 if (!tvcpu->arch.slb_shadow) 214 break; 215 kvmppc_unpin_guest_page(kvm, tvcpu->arch.slb_shadow); 216 tvcpu->arch.slb_shadow = NULL; 217 break; 218 } 219 } 220 return H_SUCCESS; 221 222 out_unpin: 223 kvmppc_unpin_guest_page(kvm, va); 224 return err; 225 } 226 227 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) 228 { 229 unsigned long req = kvmppc_get_gpr(vcpu, 3); 230 unsigned long target, ret = H_SUCCESS; 231 struct kvm_vcpu *tvcpu; 232 233 switch (req) { 234 case H_ENTER: 235 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), 236 kvmppc_get_gpr(vcpu, 5), 237 kvmppc_get_gpr(vcpu, 6), 238 kvmppc_get_gpr(vcpu, 7)); 239 break; 240 case H_CEDE: 241 break; 242 case H_PROD: 243 target = kvmppc_get_gpr(vcpu, 4); 244 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); 245 if (!tvcpu) { 246 ret = H_PARAMETER; 247 break; 248 } 249 tvcpu->arch.prodded = 1; 250 smp_mb(); 251 if (vcpu->arch.ceded) { 252 if (waitqueue_active(&vcpu->wq)) { 253 wake_up_interruptible(&vcpu->wq); 254 vcpu->stat.halt_wakeup++; 255 } 256 } 257 break; 258 case H_CONFER: 259 break; 260 case H_REGISTER_VPA: 261 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), 262 kvmppc_get_gpr(vcpu, 5), 263 kvmppc_get_gpr(vcpu, 6)); 264 break; 265 default: 266 return RESUME_HOST; 267 } 268 kvmppc_set_gpr(vcpu, 3, ret); 269 vcpu->arch.hcall_needed = 0; 270 return RESUME_GUEST; 271 } 272 273 static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 274 struct task_struct *tsk) 275 { 276 int r = RESUME_HOST; 277 278 vcpu->stat.sum_exits++; 279 280 run->exit_reason = KVM_EXIT_UNKNOWN; 281 run->ready_for_interrupt_injection = 1; 282 switch (vcpu->arch.trap) { 283 /* We're good on these - the host merely wanted to get our attention */ 284 case BOOK3S_INTERRUPT_HV_DECREMENTER: 285 vcpu->stat.dec_exits++; 286 r = RESUME_GUEST; 287 break; 288 case BOOK3S_INTERRUPT_EXTERNAL: 289 vcpu->stat.ext_intr_exits++; 290 r = RESUME_GUEST; 291 break; 292 case BOOK3S_INTERRUPT_PERFMON: 293 r = RESUME_GUEST; 294 break; 295 case BOOK3S_INTERRUPT_PROGRAM: 296 { 297 ulong flags; 298 /* 299 * Normally program interrupts are delivered directly 300 * to the guest by the hardware, but we can get here 301 * as a result of a hypervisor emulation interrupt 302 * (e40) getting turned into a 700 by BML RTAS. 303 */ 304 flags = vcpu->arch.shregs.msr & 0x1f0000ull; 305 kvmppc_core_queue_program(vcpu, flags); 306 r = RESUME_GUEST; 307 break; 308 } 309 case BOOK3S_INTERRUPT_SYSCALL: 310 { 311 /* hcall - punt to userspace */ 312 int i; 313 314 if (vcpu->arch.shregs.msr & MSR_PR) { 315 /* sc 1 from userspace - reflect to guest syscall */ 316 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL); 317 r = RESUME_GUEST; 318 break; 319 } 320 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3); 321 for (i = 0; i < 9; ++i) 322 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i); 323 run->exit_reason = KVM_EXIT_PAPR_HCALL; 324 vcpu->arch.hcall_needed = 1; 325 r = RESUME_HOST; 326 break; 327 } 328 /* 329 * We get these next two if the guest accesses a page which it thinks 330 * it has mapped but which is not actually present, either because 331 * it is for an emulated I/O device or because the corresonding 332 * host page has been paged out. Any other HDSI/HISI interrupts 333 * have been handled already. 334 */ 335 case BOOK3S_INTERRUPT_H_DATA_STORAGE: 336 r = kvmppc_book3s_hv_page_fault(run, vcpu, 337 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 338 break; 339 case BOOK3S_INTERRUPT_H_INST_STORAGE: 340 r = kvmppc_book3s_hv_page_fault(run, vcpu, 341 kvmppc_get_pc(vcpu), 0); 342 break; 343 /* 344 * This occurs if the guest executes an illegal instruction. 345 * We just generate a program interrupt to the guest, since 346 * we don't emulate any guest instructions at this stage. 347 */ 348 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 349 kvmppc_core_queue_program(vcpu, 0x80000); 350 r = RESUME_GUEST; 351 break; 352 default: 353 kvmppc_dump_regs(vcpu); 354 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", 355 vcpu->arch.trap, kvmppc_get_pc(vcpu), 356 vcpu->arch.shregs.msr); 357 r = RESUME_HOST; 358 BUG(); 359 break; 360 } 361 362 return r; 363 } 364 365 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 366 struct kvm_sregs *sregs) 367 { 368 int i; 369 370 sregs->pvr = vcpu->arch.pvr; 371 372 memset(sregs, 0, sizeof(struct kvm_sregs)); 373 for (i = 0; i < vcpu->arch.slb_max; i++) { 374 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; 375 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; 376 } 377 378 return 0; 379 } 380 381 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 382 struct kvm_sregs *sregs) 383 { 384 int i, j; 385 386 kvmppc_set_pvr(vcpu, sregs->pvr); 387 388 j = 0; 389 for (i = 0; i < vcpu->arch.slb_nr; i++) { 390 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) { 391 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe; 392 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv; 393 ++j; 394 } 395 } 396 vcpu->arch.slb_max = j; 397 398 return 0; 399 } 400 401 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 402 { 403 int r = -EINVAL; 404 405 switch (reg->id) { 406 case KVM_REG_PPC_HIOR: 407 r = put_user(0, (u64 __user *)reg->addr); 408 break; 409 default: 410 break; 411 } 412 413 return r; 414 } 415 416 int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) 417 { 418 int r = -EINVAL; 419 420 switch (reg->id) { 421 case KVM_REG_PPC_HIOR: 422 { 423 u64 hior; 424 /* Only allow this to be set to zero */ 425 r = get_user(hior, (u64 __user *)reg->addr); 426 if (!r && (hior != 0)) 427 r = -EINVAL; 428 break; 429 } 430 default: 431 break; 432 } 433 434 return r; 435 } 436 437 int kvmppc_core_check_processor_compat(void) 438 { 439 if (cpu_has_feature(CPU_FTR_HVMODE)) 440 return 0; 441 return -EIO; 442 } 443 444 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 445 { 446 struct kvm_vcpu *vcpu; 447 int err = -EINVAL; 448 int core; 449 struct kvmppc_vcore *vcore; 450 451 core = id / threads_per_core; 452 if (core >= KVM_MAX_VCORES) 453 goto out; 454 455 err = -ENOMEM; 456 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 457 if (!vcpu) 458 goto out; 459 460 err = kvm_vcpu_init(vcpu, kvm, id); 461 if (err) 462 goto free_vcpu; 463 464 vcpu->arch.shared = &vcpu->arch.shregs; 465 vcpu->arch.last_cpu = -1; 466 vcpu->arch.mmcr[0] = MMCR0_FC; 467 vcpu->arch.ctrl = CTRL_RUNLATCH; 468 /* default to host PVR, since we can't spoof it */ 469 vcpu->arch.pvr = mfspr(SPRN_PVR); 470 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 471 472 kvmppc_mmu_book3s_hv_init(vcpu); 473 474 /* 475 * We consider the vcpu stopped until we see the first run ioctl for it. 476 */ 477 vcpu->arch.state = KVMPPC_VCPU_STOPPED; 478 479 init_waitqueue_head(&vcpu->arch.cpu_run); 480 481 mutex_lock(&kvm->lock); 482 vcore = kvm->arch.vcores[core]; 483 if (!vcore) { 484 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); 485 if (vcore) { 486 INIT_LIST_HEAD(&vcore->runnable_threads); 487 spin_lock_init(&vcore->lock); 488 init_waitqueue_head(&vcore->wq); 489 } 490 kvm->arch.vcores[core] = vcore; 491 } 492 mutex_unlock(&kvm->lock); 493 494 if (!vcore) 495 goto free_vcpu; 496 497 spin_lock(&vcore->lock); 498 ++vcore->num_threads; 499 spin_unlock(&vcore->lock); 500 vcpu->arch.vcore = vcore; 501 502 vcpu->arch.cpu_type = KVM_CPU_3S_64; 503 kvmppc_sanity_check(vcpu); 504 505 return vcpu; 506 507 free_vcpu: 508 kmem_cache_free(kvm_vcpu_cache, vcpu); 509 out: 510 return ERR_PTR(err); 511 } 512 513 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 514 { 515 if (vcpu->arch.dtl) 516 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl); 517 if (vcpu->arch.slb_shadow) 518 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow); 519 if (vcpu->arch.vpa) 520 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa); 521 kvm_vcpu_uninit(vcpu); 522 kmem_cache_free(kvm_vcpu_cache, vcpu); 523 } 524 525 static void kvmppc_set_timer(struct kvm_vcpu *vcpu) 526 { 527 unsigned long dec_nsec, now; 528 529 now = get_tb(); 530 if (now > vcpu->arch.dec_expires) { 531 /* decrementer has already gone negative */ 532 kvmppc_core_queue_dec(vcpu); 533 kvmppc_core_prepare_to_enter(vcpu); 534 return; 535 } 536 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC 537 / tb_ticks_per_sec; 538 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), 539 HRTIMER_MODE_REL); 540 vcpu->arch.timer_running = 1; 541 } 542 543 static void kvmppc_end_cede(struct kvm_vcpu *vcpu) 544 { 545 vcpu->arch.ceded = 0; 546 if (vcpu->arch.timer_running) { 547 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 548 vcpu->arch.timer_running = 0; 549 } 550 } 551 552 extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 553 extern void xics_wake_cpu(int cpu); 554 555 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, 556 struct kvm_vcpu *vcpu) 557 { 558 struct kvm_vcpu *v; 559 560 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) 561 return; 562 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 563 --vc->n_runnable; 564 ++vc->n_busy; 565 /* decrement the physical thread id of each following vcpu */ 566 v = vcpu; 567 list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) 568 --v->arch.ptid; 569 list_del(&vcpu->arch.run_list); 570 } 571 572 static void kvmppc_start_thread(struct kvm_vcpu *vcpu) 573 { 574 int cpu; 575 struct paca_struct *tpaca; 576 struct kvmppc_vcore *vc = vcpu->arch.vcore; 577 578 if (vcpu->arch.timer_running) { 579 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 580 vcpu->arch.timer_running = 0; 581 } 582 cpu = vc->pcpu + vcpu->arch.ptid; 583 tpaca = &paca[cpu]; 584 tpaca->kvm_hstate.kvm_vcpu = vcpu; 585 tpaca->kvm_hstate.kvm_vcore = vc; 586 tpaca->kvm_hstate.napping = 0; 587 vcpu->cpu = vc->pcpu; 588 smp_wmb(); 589 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) 590 if (vcpu->arch.ptid) { 591 tpaca->cpu_start = 0x80; 592 wmb(); 593 xics_wake_cpu(cpu); 594 ++vc->n_woken; 595 } 596 #endif 597 } 598 599 static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc) 600 { 601 int i; 602 603 HMT_low(); 604 i = 0; 605 while (vc->nap_count < vc->n_woken) { 606 if (++i >= 1000000) { 607 pr_err("kvmppc_wait_for_nap timeout %d %d\n", 608 vc->nap_count, vc->n_woken); 609 break; 610 } 611 cpu_relax(); 612 } 613 HMT_medium(); 614 } 615 616 /* 617 * Check that we are on thread 0 and that any other threads in 618 * this core are off-line. 619 */ 620 static int on_primary_thread(void) 621 { 622 int cpu = smp_processor_id(); 623 int thr = cpu_thread_in_core(cpu); 624 625 if (thr) 626 return 0; 627 while (++thr < threads_per_core) 628 if (cpu_online(cpu + thr)) 629 return 0; 630 return 1; 631 } 632 633 /* 634 * Run a set of guest threads on a physical core. 635 * Called with vc->lock held. 636 */ 637 static int kvmppc_run_core(struct kvmppc_vcore *vc) 638 { 639 struct kvm_vcpu *vcpu, *vcpu0, *vnext; 640 long ret; 641 u64 now; 642 int ptid; 643 644 /* don't start if any threads have a signal pending */ 645 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 646 if (signal_pending(vcpu->arch.run_task)) 647 return 0; 648 649 /* 650 * Make sure we are running on thread 0, and that 651 * secondary threads are offline. 652 * XXX we should also block attempts to bring any 653 * secondary threads online. 654 */ 655 if (threads_per_core > 1 && !on_primary_thread()) { 656 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 657 vcpu->arch.ret = -EBUSY; 658 goto out; 659 } 660 661 /* 662 * Assign physical thread IDs, first to non-ceded vcpus 663 * and then to ceded ones. 664 */ 665 ptid = 0; 666 vcpu0 = NULL; 667 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 668 if (!vcpu->arch.ceded) { 669 if (!ptid) 670 vcpu0 = vcpu; 671 vcpu->arch.ptid = ptid++; 672 } 673 } 674 if (!vcpu0) 675 return 0; /* nothing to run */ 676 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 677 if (vcpu->arch.ceded) 678 vcpu->arch.ptid = ptid++; 679 680 vc->n_woken = 0; 681 vc->nap_count = 0; 682 vc->entry_exit_count = 0; 683 vc->vcore_state = VCORE_RUNNING; 684 vc->in_guest = 0; 685 vc->pcpu = smp_processor_id(); 686 vc->napping_threads = 0; 687 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 688 kvmppc_start_thread(vcpu); 689 690 preempt_disable(); 691 spin_unlock(&vc->lock); 692 693 kvm_guest_enter(); 694 __kvmppc_vcore_entry(NULL, vcpu0); 695 696 spin_lock(&vc->lock); 697 /* disable sending of IPIs on virtual external irqs */ 698 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 699 vcpu->cpu = -1; 700 /* wait for secondary threads to finish writing their state to memory */ 701 if (vc->nap_count < vc->n_woken) 702 kvmppc_wait_for_nap(vc); 703 /* prevent other vcpu threads from doing kvmppc_start_thread() now */ 704 vc->vcore_state = VCORE_EXITING; 705 spin_unlock(&vc->lock); 706 707 /* make sure updates to secondary vcpu structs are visible now */ 708 smp_mb(); 709 kvm_guest_exit(); 710 711 preempt_enable(); 712 kvm_resched(vcpu); 713 714 now = get_tb(); 715 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 716 /* cancel pending dec exception if dec is positive */ 717 if (now < vcpu->arch.dec_expires && 718 kvmppc_core_pending_dec(vcpu)) 719 kvmppc_core_dequeue_dec(vcpu); 720 721 ret = RESUME_GUEST; 722 if (vcpu->arch.trap) 723 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, 724 vcpu->arch.run_task); 725 726 vcpu->arch.ret = ret; 727 vcpu->arch.trap = 0; 728 729 if (vcpu->arch.ceded) { 730 if (ret != RESUME_GUEST) 731 kvmppc_end_cede(vcpu); 732 else 733 kvmppc_set_timer(vcpu); 734 } 735 } 736 737 spin_lock(&vc->lock); 738 out: 739 vc->vcore_state = VCORE_INACTIVE; 740 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 741 arch.run_list) { 742 if (vcpu->arch.ret != RESUME_GUEST) { 743 kvmppc_remove_runnable(vc, vcpu); 744 wake_up(&vcpu->arch.cpu_run); 745 } 746 } 747 748 return 1; 749 } 750 751 /* 752 * Wait for some other vcpu thread to execute us, and 753 * wake us up when we need to handle something in the host. 754 */ 755 static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) 756 { 757 DEFINE_WAIT(wait); 758 759 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); 760 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) 761 schedule(); 762 finish_wait(&vcpu->arch.cpu_run, &wait); 763 } 764 765 /* 766 * All the vcpus in this vcore are idle, so wait for a decrementer 767 * or external interrupt to one of the vcpus. vc->lock is held. 768 */ 769 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) 770 { 771 DEFINE_WAIT(wait); 772 struct kvm_vcpu *v; 773 int all_idle = 1; 774 775 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); 776 vc->vcore_state = VCORE_SLEEPING; 777 spin_unlock(&vc->lock); 778 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { 779 if (!v->arch.ceded || v->arch.pending_exceptions) { 780 all_idle = 0; 781 break; 782 } 783 } 784 if (all_idle) 785 schedule(); 786 finish_wait(&vc->wq, &wait); 787 spin_lock(&vc->lock); 788 vc->vcore_state = VCORE_INACTIVE; 789 } 790 791 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 792 { 793 int n_ceded; 794 int prev_state; 795 struct kvmppc_vcore *vc; 796 struct kvm_vcpu *v, *vn; 797 798 kvm_run->exit_reason = 0; 799 vcpu->arch.ret = RESUME_GUEST; 800 vcpu->arch.trap = 0; 801 802 /* 803 * Synchronize with other threads in this virtual core 804 */ 805 vc = vcpu->arch.vcore; 806 spin_lock(&vc->lock); 807 vcpu->arch.ceded = 0; 808 vcpu->arch.run_task = current; 809 vcpu->arch.kvm_run = kvm_run; 810 prev_state = vcpu->arch.state; 811 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; 812 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); 813 ++vc->n_runnable; 814 815 /* 816 * This happens the first time this is called for a vcpu. 817 * If the vcore is already running, we may be able to start 818 * this thread straight away and have it join in. 819 */ 820 if (prev_state == KVMPPC_VCPU_STOPPED) { 821 if (vc->vcore_state == VCORE_RUNNING && 822 VCORE_EXIT_COUNT(vc) == 0) { 823 vcpu->arch.ptid = vc->n_runnable - 1; 824 kvmppc_start_thread(vcpu); 825 } 826 827 } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST) 828 --vc->n_busy; 829 830 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 831 !signal_pending(current)) { 832 if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) { 833 spin_unlock(&vc->lock); 834 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE); 835 spin_lock(&vc->lock); 836 continue; 837 } 838 n_ceded = 0; 839 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) 840 n_ceded += v->arch.ceded; 841 if (n_ceded == vc->n_runnable) 842 kvmppc_vcore_blocked(vc); 843 else 844 kvmppc_run_core(vc); 845 846 list_for_each_entry_safe(v, vn, &vc->runnable_threads, 847 arch.run_list) { 848 kvmppc_core_prepare_to_enter(v); 849 if (signal_pending(v->arch.run_task)) { 850 kvmppc_remove_runnable(vc, v); 851 v->stat.signal_exits++; 852 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR; 853 v->arch.ret = -EINTR; 854 wake_up(&v->arch.cpu_run); 855 } 856 } 857 } 858 859 if (signal_pending(current)) { 860 if (vc->vcore_state == VCORE_RUNNING || 861 vc->vcore_state == VCORE_EXITING) { 862 spin_unlock(&vc->lock); 863 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); 864 spin_lock(&vc->lock); 865 } 866 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 867 kvmppc_remove_runnable(vc, vcpu); 868 vcpu->stat.signal_exits++; 869 kvm_run->exit_reason = KVM_EXIT_INTR; 870 vcpu->arch.ret = -EINTR; 871 } 872 } 873 874 spin_unlock(&vc->lock); 875 return vcpu->arch.ret; 876 } 877 878 int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) 879 { 880 int r; 881 882 if (!vcpu->arch.sane) { 883 run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 884 return -EINVAL; 885 } 886 887 kvmppc_core_prepare_to_enter(vcpu); 888 889 /* No need to go into the guest when all we'll do is come back out */ 890 if (signal_pending(current)) { 891 run->exit_reason = KVM_EXIT_INTR; 892 return -EINTR; 893 } 894 895 /* On the first time here, set up VRMA or RMA */ 896 if (!vcpu->kvm->arch.rma_setup_done) { 897 r = kvmppc_hv_setup_rma(vcpu); 898 if (r) 899 return r; 900 } 901 902 flush_fp_to_thread(current); 903 flush_altivec_to_thread(current); 904 flush_vsx_to_thread(current); 905 vcpu->arch.wqp = &vcpu->arch.vcore->wq; 906 vcpu->arch.pgdir = current->mm->pgd; 907 908 do { 909 r = kvmppc_run_vcpu(run, vcpu); 910 911 if (run->exit_reason == KVM_EXIT_PAPR_HCALL && 912 !(vcpu->arch.shregs.msr & MSR_PR)) { 913 r = kvmppc_pseries_do_hcall(vcpu); 914 kvmppc_core_prepare_to_enter(vcpu); 915 } 916 } while (r == RESUME_GUEST); 917 return r; 918 } 919 920 static long kvmppc_stt_npages(unsigned long window_size) 921 { 922 return ALIGN((window_size >> SPAPR_TCE_SHIFT) 923 * sizeof(u64), PAGE_SIZE) / PAGE_SIZE; 924 } 925 926 static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt) 927 { 928 struct kvm *kvm = stt->kvm; 929 int i; 930 931 mutex_lock(&kvm->lock); 932 list_del(&stt->list); 933 for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++) 934 __free_page(stt->pages[i]); 935 kfree(stt); 936 mutex_unlock(&kvm->lock); 937 938 kvm_put_kvm(kvm); 939 } 940 941 static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 942 { 943 struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data; 944 struct page *page; 945 946 if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size)) 947 return VM_FAULT_SIGBUS; 948 949 page = stt->pages[vmf->pgoff]; 950 get_page(page); 951 vmf->page = page; 952 return 0; 953 } 954 955 static const struct vm_operations_struct kvm_spapr_tce_vm_ops = { 956 .fault = kvm_spapr_tce_fault, 957 }; 958 959 static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) 960 { 961 vma->vm_ops = &kvm_spapr_tce_vm_ops; 962 return 0; 963 } 964 965 static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) 966 { 967 struct kvmppc_spapr_tce_table *stt = filp->private_data; 968 969 release_spapr_tce_table(stt); 970 return 0; 971 } 972 973 static struct file_operations kvm_spapr_tce_fops = { 974 .mmap = kvm_spapr_tce_mmap, 975 .release = kvm_spapr_tce_release, 976 }; 977 978 long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, 979 struct kvm_create_spapr_tce *args) 980 { 981 struct kvmppc_spapr_tce_table *stt = NULL; 982 long npages; 983 int ret = -ENOMEM; 984 int i; 985 986 /* Check this LIOBN hasn't been previously allocated */ 987 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { 988 if (stt->liobn == args->liobn) 989 return -EBUSY; 990 } 991 992 npages = kvmppc_stt_npages(args->window_size); 993 994 stt = kzalloc(sizeof(*stt) + npages* sizeof(struct page *), 995 GFP_KERNEL); 996 if (!stt) 997 goto fail; 998 999 stt->liobn = args->liobn; 1000 stt->window_size = args->window_size; 1001 stt->kvm = kvm; 1002 1003 for (i = 0; i < npages; i++) { 1004 stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); 1005 if (!stt->pages[i]) 1006 goto fail; 1007 } 1008 1009 kvm_get_kvm(kvm); 1010 1011 mutex_lock(&kvm->lock); 1012 list_add(&stt->list, &kvm->arch.spapr_tce_tables); 1013 1014 mutex_unlock(&kvm->lock); 1015 1016 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, 1017 stt, O_RDWR); 1018 1019 fail: 1020 if (stt) { 1021 for (i = 0; i < npages; i++) 1022 if (stt->pages[i]) 1023 __free_page(stt->pages[i]); 1024 1025 kfree(stt); 1026 } 1027 return ret; 1028 } 1029 1030 /* Work out RMLS (real mode limit selector) field value for a given RMA size. 1031 Assumes POWER7 or PPC970. */ 1032 static inline int lpcr_rmls(unsigned long rma_size) 1033 { 1034 switch (rma_size) { 1035 case 32ul << 20: /* 32 MB */ 1036 if (cpu_has_feature(CPU_FTR_ARCH_206)) 1037 return 8; /* only supported on POWER7 */ 1038 return -1; 1039 case 64ul << 20: /* 64 MB */ 1040 return 3; 1041 case 128ul << 20: /* 128 MB */ 1042 return 7; 1043 case 256ul << 20: /* 256 MB */ 1044 return 4; 1045 case 1ul << 30: /* 1 GB */ 1046 return 2; 1047 case 16ul << 30: /* 16 GB */ 1048 return 1; 1049 case 256ul << 30: /* 256 GB */ 1050 return 0; 1051 default: 1052 return -1; 1053 } 1054 } 1055 1056 static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1057 { 1058 struct kvmppc_linear_info *ri = vma->vm_file->private_data; 1059 struct page *page; 1060 1061 if (vmf->pgoff >= ri->npages) 1062 return VM_FAULT_SIGBUS; 1063 1064 page = pfn_to_page(ri->base_pfn + vmf->pgoff); 1065 get_page(page); 1066 vmf->page = page; 1067 return 0; 1068 } 1069 1070 static const struct vm_operations_struct kvm_rma_vm_ops = { 1071 .fault = kvm_rma_fault, 1072 }; 1073 1074 static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) 1075 { 1076 vma->vm_flags |= VM_RESERVED; 1077 vma->vm_ops = &kvm_rma_vm_ops; 1078 return 0; 1079 } 1080 1081 static int kvm_rma_release(struct inode *inode, struct file *filp) 1082 { 1083 struct kvmppc_linear_info *ri = filp->private_data; 1084 1085 kvm_release_rma(ri); 1086 return 0; 1087 } 1088 1089 static struct file_operations kvm_rma_fops = { 1090 .mmap = kvm_rma_mmap, 1091 .release = kvm_rma_release, 1092 }; 1093 1094 long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) 1095 { 1096 struct kvmppc_linear_info *ri; 1097 long fd; 1098 1099 ri = kvm_alloc_rma(); 1100 if (!ri) 1101 return -ENOMEM; 1102 1103 fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR); 1104 if (fd < 0) 1105 kvm_release_rma(ri); 1106 1107 ret->rma_size = ri->npages << PAGE_SHIFT; 1108 return fd; 1109 } 1110 1111 /* 1112 * Get (and clear) the dirty memory log for a memory slot. 1113 */ 1114 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 1115 { 1116 struct kvm_memory_slot *memslot; 1117 int r; 1118 unsigned long n; 1119 1120 mutex_lock(&kvm->slots_lock); 1121 1122 r = -EINVAL; 1123 if (log->slot >= KVM_MEMORY_SLOTS) 1124 goto out; 1125 1126 memslot = id_to_memslot(kvm->memslots, log->slot); 1127 r = -ENOENT; 1128 if (!memslot->dirty_bitmap) 1129 goto out; 1130 1131 n = kvm_dirty_bitmap_bytes(memslot); 1132 memset(memslot->dirty_bitmap, 0, n); 1133 1134 r = kvmppc_hv_get_dirty_log(kvm, memslot); 1135 if (r) 1136 goto out; 1137 1138 r = -EFAULT; 1139 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 1140 goto out; 1141 1142 r = 0; 1143 out: 1144 mutex_unlock(&kvm->slots_lock); 1145 return r; 1146 } 1147 1148 static unsigned long slb_pgsize_encoding(unsigned long psize) 1149 { 1150 unsigned long senc = 0; 1151 1152 if (psize > 0x1000) { 1153 senc = SLB_VSID_L; 1154 if (psize == 0x10000) 1155 senc |= SLB_VSID_LP_01; 1156 } 1157 return senc; 1158 } 1159 1160 int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1161 struct kvm_userspace_memory_region *mem) 1162 { 1163 unsigned long npages; 1164 unsigned long *phys; 1165 1166 /* Allocate a slot_phys array */ 1167 phys = kvm->arch.slot_phys[mem->slot]; 1168 if (!kvm->arch.using_mmu_notifiers && !phys) { 1169 npages = mem->memory_size >> PAGE_SHIFT; 1170 phys = vzalloc(npages * sizeof(unsigned long)); 1171 if (!phys) 1172 return -ENOMEM; 1173 kvm->arch.slot_phys[mem->slot] = phys; 1174 kvm->arch.slot_npages[mem->slot] = npages; 1175 } 1176 1177 return 0; 1178 } 1179 1180 static void unpin_slot(struct kvm *kvm, int slot_id) 1181 { 1182 unsigned long *physp; 1183 unsigned long j, npages, pfn; 1184 struct page *page; 1185 1186 physp = kvm->arch.slot_phys[slot_id]; 1187 npages = kvm->arch.slot_npages[slot_id]; 1188 if (physp) { 1189 spin_lock(&kvm->arch.slot_phys_lock); 1190 for (j = 0; j < npages; j++) { 1191 if (!(physp[j] & KVMPPC_GOT_PAGE)) 1192 continue; 1193 pfn = physp[j] >> PAGE_SHIFT; 1194 page = pfn_to_page(pfn); 1195 SetPageDirty(page); 1196 put_page(page); 1197 } 1198 kvm->arch.slot_phys[slot_id] = NULL; 1199 spin_unlock(&kvm->arch.slot_phys_lock); 1200 vfree(physp); 1201 } 1202 } 1203 1204 void kvmppc_core_commit_memory_region(struct kvm *kvm, 1205 struct kvm_userspace_memory_region *mem) 1206 { 1207 } 1208 1209 static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) 1210 { 1211 int err = 0; 1212 struct kvm *kvm = vcpu->kvm; 1213 struct kvmppc_linear_info *ri = NULL; 1214 unsigned long hva; 1215 struct kvm_memory_slot *memslot; 1216 struct vm_area_struct *vma; 1217 unsigned long lpcr, senc; 1218 unsigned long psize, porder; 1219 unsigned long rma_size; 1220 unsigned long rmls; 1221 unsigned long *physp; 1222 unsigned long i, npages; 1223 1224 mutex_lock(&kvm->lock); 1225 if (kvm->arch.rma_setup_done) 1226 goto out; /* another vcpu beat us to it */ 1227 1228 /* Look up the memslot for guest physical address 0 */ 1229 memslot = gfn_to_memslot(kvm, 0); 1230 1231 /* We must have some memory at 0 by now */ 1232 err = -EINVAL; 1233 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 1234 goto out; 1235 1236 /* Look up the VMA for the start of this memory slot */ 1237 hva = memslot->userspace_addr; 1238 down_read(¤t->mm->mmap_sem); 1239 vma = find_vma(current->mm, hva); 1240 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO)) 1241 goto up_out; 1242 1243 psize = vma_kernel_pagesize(vma); 1244 porder = __ilog2(psize); 1245 1246 /* Is this one of our preallocated RMAs? */ 1247 if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && 1248 hva == vma->vm_start) 1249 ri = vma->vm_file->private_data; 1250 1251 up_read(¤t->mm->mmap_sem); 1252 1253 if (!ri) { 1254 /* On POWER7, use VRMA; on PPC970, give up */ 1255 err = -EPERM; 1256 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1257 pr_err("KVM: CPU requires an RMO\n"); 1258 goto out; 1259 } 1260 1261 /* We can handle 4k, 64k or 16M pages in the VRMA */ 1262 err = -EINVAL; 1263 if (!(psize == 0x1000 || psize == 0x10000 || 1264 psize == 0x1000000)) 1265 goto out; 1266 1267 /* Update VRMASD field in the LPCR */ 1268 senc = slb_pgsize_encoding(psize); 1269 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1270 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1271 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; 1272 lpcr |= senc << (LPCR_VRMASD_SH - 4); 1273 kvm->arch.lpcr = lpcr; 1274 1275 /* Create HPTEs in the hash page table for the VRMA */ 1276 kvmppc_map_vrma(vcpu, memslot, porder); 1277 1278 } else { 1279 /* Set up to use an RMO region */ 1280 rma_size = ri->npages; 1281 if (rma_size > memslot->npages) 1282 rma_size = memslot->npages; 1283 rma_size <<= PAGE_SHIFT; 1284 rmls = lpcr_rmls(rma_size); 1285 err = -EINVAL; 1286 if (rmls < 0) { 1287 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); 1288 goto out; 1289 } 1290 atomic_inc(&ri->use_count); 1291 kvm->arch.rma = ri; 1292 1293 /* Update LPCR and RMOR */ 1294 lpcr = kvm->arch.lpcr; 1295 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1296 /* PPC970; insert RMLS value (split field) in HID4 */ 1297 lpcr &= ~((1ul << HID4_RMLS0_SH) | 1298 (3ul << HID4_RMLS2_SH)); 1299 lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) | 1300 ((rmls & 3) << HID4_RMLS2_SH); 1301 /* RMOR is also in HID4 */ 1302 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) 1303 << HID4_RMOR_SH; 1304 } else { 1305 /* POWER7 */ 1306 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); 1307 lpcr |= rmls << LPCR_RMLS_SH; 1308 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; 1309 } 1310 kvm->arch.lpcr = lpcr; 1311 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", 1312 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); 1313 1314 /* Initialize phys addrs of pages in RMO */ 1315 npages = ri->npages; 1316 porder = __ilog2(npages); 1317 physp = kvm->arch.slot_phys[memslot->id]; 1318 spin_lock(&kvm->arch.slot_phys_lock); 1319 for (i = 0; i < npages; ++i) 1320 physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder; 1321 spin_unlock(&kvm->arch.slot_phys_lock); 1322 } 1323 1324 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ 1325 smp_wmb(); 1326 kvm->arch.rma_setup_done = 1; 1327 err = 0; 1328 out: 1329 mutex_unlock(&kvm->lock); 1330 return err; 1331 1332 up_out: 1333 up_read(¤t->mm->mmap_sem); 1334 goto out; 1335 } 1336 1337 int kvmppc_core_init_vm(struct kvm *kvm) 1338 { 1339 long r; 1340 unsigned long lpcr; 1341 1342 /* Allocate hashed page table */ 1343 r = kvmppc_alloc_hpt(kvm); 1344 if (r) 1345 return r; 1346 1347 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1348 1349 kvm->arch.rma = NULL; 1350 1351 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); 1352 1353 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1354 /* PPC970; HID4 is effectively the LPCR */ 1355 unsigned long lpid = kvm->arch.lpid; 1356 kvm->arch.host_lpid = 0; 1357 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); 1358 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); 1359 lpcr |= ((lpid >> 4) << HID4_LPID1_SH) | 1360 ((lpid & 0xf) << HID4_LPID5_SH); 1361 } else { 1362 /* POWER7; init LPCR for virtual RMA mode */ 1363 kvm->arch.host_lpid = mfspr(SPRN_LPID); 1364 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); 1365 lpcr &= LPCR_PECE | LPCR_LPES; 1366 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | 1367 LPCR_VPM0 | LPCR_VPM1; 1368 kvm->arch.vrma_slb_v = SLB_VSID_B_1T | 1369 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1370 } 1371 kvm->arch.lpcr = lpcr; 1372 1373 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); 1374 spin_lock_init(&kvm->arch.slot_phys_lock); 1375 return 0; 1376 } 1377 1378 void kvmppc_core_destroy_vm(struct kvm *kvm) 1379 { 1380 unsigned long i; 1381 1382 if (!kvm->arch.using_mmu_notifiers) 1383 for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) 1384 unpin_slot(kvm, i); 1385 1386 if (kvm->arch.rma) { 1387 kvm_release_rma(kvm->arch.rma); 1388 kvm->arch.rma = NULL; 1389 } 1390 1391 kvmppc_free_hpt(kvm); 1392 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 1393 } 1394 1395 /* These are stubs for now */ 1396 void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 1397 { 1398 } 1399 1400 /* We don't need to emulate any privileged instructions or dcbz */ 1401 int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 1402 unsigned int inst, int *advance) 1403 { 1404 return EMULATE_FAIL; 1405 } 1406 1407 int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 1408 { 1409 return EMULATE_FAIL; 1410 } 1411 1412 int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) 1413 { 1414 return EMULATE_FAIL; 1415 } 1416 1417 static int kvmppc_book3s_hv_init(void) 1418 { 1419 int r; 1420 1421 r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 1422 1423 if (r) 1424 return r; 1425 1426 r = kvmppc_mmu_hv_init(); 1427 1428 return r; 1429 } 1430 1431 static void kvmppc_book3s_hv_exit(void) 1432 { 1433 kvm_exit(); 1434 } 1435 1436 module_init(kvmppc_book3s_hv_init); 1437 module_exit(kvmppc_book3s_hv_exit); 1438