1 /* 2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. 4 * 5 * Authors: 6 * Paul Mackerras <paulus@au1.ibm.com> 7 * Alexander Graf <agraf@suse.de> 8 * Kevin Wolf <mail@kevin-wolf.de> 9 * 10 * Description: KVM functions specific to running on Book 3S 11 * processors in hypervisor mode (specifically POWER7 and later). 12 * 13 * This file is derived from arch/powerpc/kvm/book3s.c, 14 * by Alexander Graf <agraf@suse.de>. 15 * 16 * This program is free software; you can redistribute it and/or modify 17 * it under the terms of the GNU General Public License, version 2, as 18 * published by the Free Software Foundation. 19 */ 20 21 #include <linux/kvm_host.h> 22 #include <linux/err.h> 23 #include <linux/slab.h> 24 #include <linux/preempt.h> 25 #include <linux/sched.h> 26 #include <linux/delay.h> 27 #include <linux/export.h> 28 #include <linux/fs.h> 29 #include <linux/anon_inodes.h> 30 #include <linux/cpumask.h> 31 #include <linux/spinlock.h> 32 #include <linux/page-flags.h> 33 #include <linux/srcu.h> 34 35 #include <asm/reg.h> 36 #include <asm/cputable.h> 37 #include <asm/cacheflush.h> 38 #include <asm/tlbflush.h> 39 #include <asm/uaccess.h> 40 #include <asm/io.h> 41 #include <asm/kvm_ppc.h> 42 #include <asm/kvm_book3s.h> 43 #include <asm/mmu_context.h> 44 #include <asm/lppaca.h> 45 #include <asm/processor.h> 46 #include <asm/cputhreads.h> 47 #include <asm/page.h> 48 #include <asm/hvcall.h> 49 #include <asm/switch_to.h> 50 #include <asm/smp.h> 51 #include <linux/gfp.h> 52 #include <linux/vmalloc.h> 53 #include <linux/highmem.h> 54 #include <linux/hugetlb.h> 55 56 /* #define EXIT_DEBUG */ 57 /* #define EXIT_DEBUG_SIMPLE */ 58 /* #define EXIT_DEBUG_INT */ 59 60 /* Used to indicate that a guest page fault needs to be handled */ 61 #define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1) 62 63 /* Used as a "null" value for timebase values */ 64 #define TB_NIL (~(u64)0) 65 66 static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 67 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 68 69 void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) 70 { 71 int me; 72 int cpu = vcpu->cpu; 73 wait_queue_head_t *wqp; 74 75 wqp = kvm_arch_vcpu_wq(vcpu); 76 if (waitqueue_active(wqp)) { 77 wake_up_interruptible(wqp); 78 ++vcpu->stat.halt_wakeup; 79 } 80 81 me = get_cpu(); 82 83 /* CPU points to the first thread of the core */ 84 if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { 85 int real_cpu = cpu + vcpu->arch.ptid; 86 if (paca[real_cpu].kvm_hstate.xics_phys) 87 xics_wake_cpu(real_cpu); 88 else if (cpu_online(cpu)) 89 smp_send_reschedule(cpu); 90 } 91 put_cpu(); 92 } 93 94 /* 95 * We use the vcpu_load/put functions to measure stolen time. 96 * Stolen time is counted as time when either the vcpu is able to 97 * run as part of a virtual core, but the task running the vcore 98 * is preempted or sleeping, or when the vcpu needs something done 99 * in the kernel by the task running the vcpu, but that task is 100 * preempted or sleeping. Those two things have to be counted 101 * separately, since one of the vcpu tasks will take on the job 102 * of running the core, and the other vcpu tasks in the vcore will 103 * sleep waiting for it to do that, but that sleep shouldn't count 104 * as stolen time. 105 * 106 * Hence we accumulate stolen time when the vcpu can run as part of 107 * a vcore using vc->stolen_tb, and the stolen time when the vcpu 108 * needs its task to do other things in the kernel (for example, 109 * service a page fault) in busy_stolen. We don't accumulate 110 * stolen time for a vcore when it is inactive, or for a vcpu 111 * when it is in state RUNNING or NOTREADY. NOTREADY is a bit of 112 * a misnomer; it means that the vcpu task is not executing in 113 * the KVM_VCPU_RUN ioctl, i.e. it is in userspace or elsewhere in 114 * the kernel. We don't have any way of dividing up that time 115 * between time that the vcpu is genuinely stopped, time that 116 * the task is actively working on behalf of the vcpu, and time 117 * that the task is preempted, so we don't count any of it as 118 * stolen. 119 * 120 * Updates to busy_stolen are protected by arch.tbacct_lock; 121 * updates to vc->stolen_tb are protected by the arch.tbacct_lock 122 * of the vcpu that has taken responsibility for running the vcore 123 * (i.e. vc->runner). The stolen times are measured in units of 124 * timebase ticks. (Note that the != TB_NIL checks below are 125 * purely defensive; they should never fail.) 126 */ 127 128 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 129 { 130 struct kvmppc_vcore *vc = vcpu->arch.vcore; 131 132 spin_lock(&vcpu->arch.tbacct_lock); 133 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && 134 vc->preempt_tb != TB_NIL) { 135 vc->stolen_tb += mftb() - vc->preempt_tb; 136 vc->preempt_tb = TB_NIL; 137 } 138 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && 139 vcpu->arch.busy_preempt != TB_NIL) { 140 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; 141 vcpu->arch.busy_preempt = TB_NIL; 142 } 143 spin_unlock(&vcpu->arch.tbacct_lock); 144 } 145 146 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 147 { 148 struct kvmppc_vcore *vc = vcpu->arch.vcore; 149 150 spin_lock(&vcpu->arch.tbacct_lock); 151 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) 152 vc->preempt_tb = mftb(); 153 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) 154 vcpu->arch.busy_preempt = mftb(); 155 spin_unlock(&vcpu->arch.tbacct_lock); 156 } 157 158 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 159 { 160 vcpu->arch.shregs.msr = msr; 161 kvmppc_end_cede(vcpu); 162 } 163 164 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 165 { 166 vcpu->arch.pvr = pvr; 167 } 168 169 void kvmppc_dump_regs(struct kvm_vcpu *vcpu) 170 { 171 int r; 172 173 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id); 174 pr_err("pc = %.16lx msr = %.16llx trap = %x\n", 175 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap); 176 for (r = 0; r < 16; ++r) 177 pr_err("r%2d = %.16lx r%d = %.16lx\n", 178 r, kvmppc_get_gpr(vcpu, r), 179 r+16, kvmppc_get_gpr(vcpu, r+16)); 180 pr_err("ctr = %.16lx lr = %.16lx\n", 181 vcpu->arch.ctr, vcpu->arch.lr); 182 pr_err("srr0 = %.16llx srr1 = %.16llx\n", 183 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1); 184 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n", 185 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1); 186 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n", 187 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3); 188 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n", 189 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr); 190 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar); 191 pr_err("fault dar = %.16lx dsisr = %.8x\n", 192 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 193 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max); 194 for (r = 0; r < vcpu->arch.slb_max; ++r) 195 pr_err(" ESID = %.16llx VSID = %.16llx\n", 196 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); 197 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", 198 vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1, 199 vcpu->arch.last_inst); 200 } 201 202 struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) 203 { 204 int r; 205 struct kvm_vcpu *v, *ret = NULL; 206 207 mutex_lock(&kvm->lock); 208 kvm_for_each_vcpu(r, v, kvm) { 209 if (v->vcpu_id == id) { 210 ret = v; 211 break; 212 } 213 } 214 mutex_unlock(&kvm->lock); 215 return ret; 216 } 217 218 static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) 219 { 220 vpa->shared_proc = 1; 221 vpa->yield_count = 1; 222 } 223 224 static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, 225 unsigned long addr, unsigned long len) 226 { 227 /* check address is cacheline aligned */ 228 if (addr & (L1_CACHE_BYTES - 1)) 229 return -EINVAL; 230 spin_lock(&vcpu->arch.vpa_update_lock); 231 if (v->next_gpa != addr || v->len != len) { 232 v->next_gpa = addr; 233 v->len = addr ? len : 0; 234 v->update_pending = 1; 235 } 236 spin_unlock(&vcpu->arch.vpa_update_lock); 237 return 0; 238 } 239 240 /* Length for a per-processor buffer is passed in at offset 4 in the buffer */ 241 struct reg_vpa { 242 u32 dummy; 243 union { 244 u16 hword; 245 u32 word; 246 } length; 247 }; 248 249 static int vpa_is_registered(struct kvmppc_vpa *vpap) 250 { 251 if (vpap->update_pending) 252 return vpap->next_gpa != 0; 253 return vpap->pinned_addr != NULL; 254 } 255 256 static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, 257 unsigned long flags, 258 unsigned long vcpuid, unsigned long vpa) 259 { 260 struct kvm *kvm = vcpu->kvm; 261 unsigned long len, nb; 262 void *va; 263 struct kvm_vcpu *tvcpu; 264 int err; 265 int subfunc; 266 struct kvmppc_vpa *vpap; 267 268 tvcpu = kvmppc_find_vcpu(kvm, vcpuid); 269 if (!tvcpu) 270 return H_PARAMETER; 271 272 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK; 273 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL || 274 subfunc == H_VPA_REG_SLB) { 275 /* Registering new area - address must be cache-line aligned */ 276 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa) 277 return H_PARAMETER; 278 279 /* convert logical addr to kernel addr and read length */ 280 va = kvmppc_pin_guest_page(kvm, vpa, &nb); 281 if (va == NULL) 282 return H_PARAMETER; 283 if (subfunc == H_VPA_REG_VPA) 284 len = ((struct reg_vpa *)va)->length.hword; 285 else 286 len = ((struct reg_vpa *)va)->length.word; 287 kvmppc_unpin_guest_page(kvm, va, vpa, false); 288 289 /* Check length */ 290 if (len > nb || len < sizeof(struct reg_vpa)) 291 return H_PARAMETER; 292 } else { 293 vpa = 0; 294 len = 0; 295 } 296 297 err = H_PARAMETER; 298 vpap = NULL; 299 spin_lock(&tvcpu->arch.vpa_update_lock); 300 301 switch (subfunc) { 302 case H_VPA_REG_VPA: /* register VPA */ 303 if (len < sizeof(struct lppaca)) 304 break; 305 vpap = &tvcpu->arch.vpa; 306 err = 0; 307 break; 308 309 case H_VPA_REG_DTL: /* register DTL */ 310 if (len < sizeof(struct dtl_entry)) 311 break; 312 len -= len % sizeof(struct dtl_entry); 313 314 /* Check that they have previously registered a VPA */ 315 err = H_RESOURCE; 316 if (!vpa_is_registered(&tvcpu->arch.vpa)) 317 break; 318 319 vpap = &tvcpu->arch.dtl; 320 err = 0; 321 break; 322 323 case H_VPA_REG_SLB: /* register SLB shadow buffer */ 324 /* Check that they have previously registered a VPA */ 325 err = H_RESOURCE; 326 if (!vpa_is_registered(&tvcpu->arch.vpa)) 327 break; 328 329 vpap = &tvcpu->arch.slb_shadow; 330 err = 0; 331 break; 332 333 case H_VPA_DEREG_VPA: /* deregister VPA */ 334 /* Check they don't still have a DTL or SLB buf registered */ 335 err = H_RESOURCE; 336 if (vpa_is_registered(&tvcpu->arch.dtl) || 337 vpa_is_registered(&tvcpu->arch.slb_shadow)) 338 break; 339 340 vpap = &tvcpu->arch.vpa; 341 err = 0; 342 break; 343 344 case H_VPA_DEREG_DTL: /* deregister DTL */ 345 vpap = &tvcpu->arch.dtl; 346 err = 0; 347 break; 348 349 case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */ 350 vpap = &tvcpu->arch.slb_shadow; 351 err = 0; 352 break; 353 } 354 355 if (vpap) { 356 vpap->next_gpa = vpa; 357 vpap->len = len; 358 vpap->update_pending = 1; 359 } 360 361 spin_unlock(&tvcpu->arch.vpa_update_lock); 362 363 return err; 364 } 365 366 static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) 367 { 368 struct kvm *kvm = vcpu->kvm; 369 void *va; 370 unsigned long nb; 371 unsigned long gpa; 372 373 /* 374 * We need to pin the page pointed to by vpap->next_gpa, 375 * but we can't call kvmppc_pin_guest_page under the lock 376 * as it does get_user_pages() and down_read(). So we 377 * have to drop the lock, pin the page, then get the lock 378 * again and check that a new area didn't get registered 379 * in the meantime. 380 */ 381 for (;;) { 382 gpa = vpap->next_gpa; 383 spin_unlock(&vcpu->arch.vpa_update_lock); 384 va = NULL; 385 nb = 0; 386 if (gpa) 387 va = kvmppc_pin_guest_page(kvm, gpa, &nb); 388 spin_lock(&vcpu->arch.vpa_update_lock); 389 if (gpa == vpap->next_gpa) 390 break; 391 /* sigh... unpin that one and try again */ 392 if (va) 393 kvmppc_unpin_guest_page(kvm, va, gpa, false); 394 } 395 396 vpap->update_pending = 0; 397 if (va && nb < vpap->len) { 398 /* 399 * If it's now too short, it must be that userspace 400 * has changed the mappings underlying guest memory, 401 * so unregister the region. 402 */ 403 kvmppc_unpin_guest_page(kvm, va, gpa, false); 404 va = NULL; 405 } 406 if (vpap->pinned_addr) 407 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa, 408 vpap->dirty); 409 vpap->gpa = gpa; 410 vpap->pinned_addr = va; 411 vpap->dirty = false; 412 if (va) 413 vpap->pinned_end = va + vpap->len; 414 } 415 416 static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) 417 { 418 if (!(vcpu->arch.vpa.update_pending || 419 vcpu->arch.slb_shadow.update_pending || 420 vcpu->arch.dtl.update_pending)) 421 return; 422 423 spin_lock(&vcpu->arch.vpa_update_lock); 424 if (vcpu->arch.vpa.update_pending) { 425 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa); 426 if (vcpu->arch.vpa.pinned_addr) 427 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); 428 } 429 if (vcpu->arch.dtl.update_pending) { 430 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl); 431 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr; 432 vcpu->arch.dtl_index = 0; 433 } 434 if (vcpu->arch.slb_shadow.update_pending) 435 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow); 436 spin_unlock(&vcpu->arch.vpa_update_lock); 437 } 438 439 /* 440 * Return the accumulated stolen time for the vcore up until `now'. 441 * The caller should hold the vcore lock. 442 */ 443 static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) 444 { 445 u64 p; 446 447 /* 448 * If we are the task running the vcore, then since we hold 449 * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb 450 * can't be updated, so we don't need the tbacct_lock. 451 * If the vcore is inactive, it can't become active (since we 452 * hold the vcore lock), so the vcpu load/put functions won't 453 * update stolen_tb/preempt_tb, and we don't need tbacct_lock. 454 */ 455 if (vc->vcore_state != VCORE_INACTIVE && 456 vc->runner->arch.run_task != current) { 457 spin_lock(&vc->runner->arch.tbacct_lock); 458 p = vc->stolen_tb; 459 if (vc->preempt_tb != TB_NIL) 460 p += now - vc->preempt_tb; 461 spin_unlock(&vc->runner->arch.tbacct_lock); 462 } else { 463 p = vc->stolen_tb; 464 } 465 return p; 466 } 467 468 static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, 469 struct kvmppc_vcore *vc) 470 { 471 struct dtl_entry *dt; 472 struct lppaca *vpa; 473 unsigned long stolen; 474 unsigned long core_stolen; 475 u64 now; 476 477 dt = vcpu->arch.dtl_ptr; 478 vpa = vcpu->arch.vpa.pinned_addr; 479 now = mftb(); 480 core_stolen = vcore_stolen_time(vc, now); 481 stolen = core_stolen - vcpu->arch.stolen_logged; 482 vcpu->arch.stolen_logged = core_stolen; 483 spin_lock(&vcpu->arch.tbacct_lock); 484 stolen += vcpu->arch.busy_stolen; 485 vcpu->arch.busy_stolen = 0; 486 spin_unlock(&vcpu->arch.tbacct_lock); 487 if (!dt || !vpa) 488 return; 489 memset(dt, 0, sizeof(struct dtl_entry)); 490 dt->dispatch_reason = 7; 491 dt->processor_id = vc->pcpu + vcpu->arch.ptid; 492 dt->timebase = now; 493 dt->enqueue_to_dispatch_time = stolen; 494 dt->srr0 = kvmppc_get_pc(vcpu); 495 dt->srr1 = vcpu->arch.shregs.msr; 496 ++dt; 497 if (dt == vcpu->arch.dtl.pinned_end) 498 dt = vcpu->arch.dtl.pinned_addr; 499 vcpu->arch.dtl_ptr = dt; 500 /* order writing *dt vs. writing vpa->dtl_idx */ 501 smp_wmb(); 502 vpa->dtl_idx = ++vcpu->arch.dtl_index; 503 vcpu->arch.dtl.dirty = true; 504 } 505 506 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) 507 { 508 unsigned long req = kvmppc_get_gpr(vcpu, 3); 509 unsigned long target, ret = H_SUCCESS; 510 struct kvm_vcpu *tvcpu; 511 int idx, rc; 512 513 switch (req) { 514 case H_ENTER: 515 idx = srcu_read_lock(&vcpu->kvm->srcu); 516 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), 517 kvmppc_get_gpr(vcpu, 5), 518 kvmppc_get_gpr(vcpu, 6), 519 kvmppc_get_gpr(vcpu, 7)); 520 srcu_read_unlock(&vcpu->kvm->srcu, idx); 521 break; 522 case H_CEDE: 523 break; 524 case H_PROD: 525 target = kvmppc_get_gpr(vcpu, 4); 526 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); 527 if (!tvcpu) { 528 ret = H_PARAMETER; 529 break; 530 } 531 tvcpu->arch.prodded = 1; 532 smp_mb(); 533 if (vcpu->arch.ceded) { 534 if (waitqueue_active(&vcpu->wq)) { 535 wake_up_interruptible(&vcpu->wq); 536 vcpu->stat.halt_wakeup++; 537 } 538 } 539 break; 540 case H_CONFER: 541 break; 542 case H_REGISTER_VPA: 543 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), 544 kvmppc_get_gpr(vcpu, 5), 545 kvmppc_get_gpr(vcpu, 6)); 546 break; 547 case H_RTAS: 548 if (list_empty(&vcpu->kvm->arch.rtas_tokens)) 549 return RESUME_HOST; 550 551 rc = kvmppc_rtas_hcall(vcpu); 552 553 if (rc == -ENOENT) 554 return RESUME_HOST; 555 else if (rc == 0) 556 break; 557 558 /* Send the error out to userspace via KVM_RUN */ 559 return rc; 560 561 case H_XIRR: 562 case H_CPPR: 563 case H_EOI: 564 case H_IPI: 565 if (kvmppc_xics_enabled(vcpu)) { 566 ret = kvmppc_xics_hcall(vcpu, req); 567 break; 568 } /* fallthrough */ 569 default: 570 return RESUME_HOST; 571 } 572 kvmppc_set_gpr(vcpu, 3, ret); 573 vcpu->arch.hcall_needed = 0; 574 return RESUME_GUEST; 575 } 576 577 static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 578 struct task_struct *tsk) 579 { 580 int r = RESUME_HOST; 581 582 vcpu->stat.sum_exits++; 583 584 run->exit_reason = KVM_EXIT_UNKNOWN; 585 run->ready_for_interrupt_injection = 1; 586 switch (vcpu->arch.trap) { 587 /* We're good on these - the host merely wanted to get our attention */ 588 case BOOK3S_INTERRUPT_HV_DECREMENTER: 589 vcpu->stat.dec_exits++; 590 r = RESUME_GUEST; 591 break; 592 case BOOK3S_INTERRUPT_EXTERNAL: 593 vcpu->stat.ext_intr_exits++; 594 r = RESUME_GUEST; 595 break; 596 case BOOK3S_INTERRUPT_PERFMON: 597 r = RESUME_GUEST; 598 break; 599 case BOOK3S_INTERRUPT_MACHINE_CHECK: 600 /* 601 * Deliver a machine check interrupt to the guest. 602 * We have to do this, even if the host has handled the 603 * machine check, because machine checks use SRR0/1 and 604 * the interrupt might have trashed guest state in them. 605 */ 606 kvmppc_book3s_queue_irqprio(vcpu, 607 BOOK3S_INTERRUPT_MACHINE_CHECK); 608 r = RESUME_GUEST; 609 break; 610 case BOOK3S_INTERRUPT_PROGRAM: 611 { 612 ulong flags; 613 /* 614 * Normally program interrupts are delivered directly 615 * to the guest by the hardware, but we can get here 616 * as a result of a hypervisor emulation interrupt 617 * (e40) getting turned into a 700 by BML RTAS. 618 */ 619 flags = vcpu->arch.shregs.msr & 0x1f0000ull; 620 kvmppc_core_queue_program(vcpu, flags); 621 r = RESUME_GUEST; 622 break; 623 } 624 case BOOK3S_INTERRUPT_SYSCALL: 625 { 626 /* hcall - punt to userspace */ 627 int i; 628 629 if (vcpu->arch.shregs.msr & MSR_PR) { 630 /* sc 1 from userspace - reflect to guest syscall */ 631 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL); 632 r = RESUME_GUEST; 633 break; 634 } 635 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3); 636 for (i = 0; i < 9; ++i) 637 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i); 638 run->exit_reason = KVM_EXIT_PAPR_HCALL; 639 vcpu->arch.hcall_needed = 1; 640 r = RESUME_HOST; 641 break; 642 } 643 /* 644 * We get these next two if the guest accesses a page which it thinks 645 * it has mapped but which is not actually present, either because 646 * it is for an emulated I/O device or because the corresonding 647 * host page has been paged out. Any other HDSI/HISI interrupts 648 * have been handled already. 649 */ 650 case BOOK3S_INTERRUPT_H_DATA_STORAGE: 651 r = RESUME_PAGE_FAULT; 652 break; 653 case BOOK3S_INTERRUPT_H_INST_STORAGE: 654 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu); 655 vcpu->arch.fault_dsisr = 0; 656 r = RESUME_PAGE_FAULT; 657 break; 658 /* 659 * This occurs if the guest executes an illegal instruction. 660 * We just generate a program interrupt to the guest, since 661 * we don't emulate any guest instructions at this stage. 662 */ 663 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 664 kvmppc_core_queue_program(vcpu, 0x80000); 665 r = RESUME_GUEST; 666 break; 667 default: 668 kvmppc_dump_regs(vcpu); 669 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", 670 vcpu->arch.trap, kvmppc_get_pc(vcpu), 671 vcpu->arch.shregs.msr); 672 r = RESUME_HOST; 673 BUG(); 674 break; 675 } 676 677 return r; 678 } 679 680 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 681 struct kvm_sregs *sregs) 682 { 683 int i; 684 685 sregs->pvr = vcpu->arch.pvr; 686 687 memset(sregs, 0, sizeof(struct kvm_sregs)); 688 for (i = 0; i < vcpu->arch.slb_max; i++) { 689 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; 690 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; 691 } 692 693 return 0; 694 } 695 696 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 697 struct kvm_sregs *sregs) 698 { 699 int i, j; 700 701 kvmppc_set_pvr(vcpu, sregs->pvr); 702 703 j = 0; 704 for (i = 0; i < vcpu->arch.slb_nr; i++) { 705 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) { 706 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe; 707 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv; 708 ++j; 709 } 710 } 711 vcpu->arch.slb_max = j; 712 713 return 0; 714 } 715 716 int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 717 { 718 int r = 0; 719 long int i; 720 721 switch (id) { 722 case KVM_REG_PPC_HIOR: 723 *val = get_reg_val(id, 0); 724 break; 725 case KVM_REG_PPC_DABR: 726 *val = get_reg_val(id, vcpu->arch.dabr); 727 break; 728 case KVM_REG_PPC_DSCR: 729 *val = get_reg_val(id, vcpu->arch.dscr); 730 break; 731 case KVM_REG_PPC_PURR: 732 *val = get_reg_val(id, vcpu->arch.purr); 733 break; 734 case KVM_REG_PPC_SPURR: 735 *val = get_reg_val(id, vcpu->arch.spurr); 736 break; 737 case KVM_REG_PPC_AMR: 738 *val = get_reg_val(id, vcpu->arch.amr); 739 break; 740 case KVM_REG_PPC_UAMOR: 741 *val = get_reg_val(id, vcpu->arch.uamor); 742 break; 743 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: 744 i = id - KVM_REG_PPC_MMCR0; 745 *val = get_reg_val(id, vcpu->arch.mmcr[i]); 746 break; 747 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: 748 i = id - KVM_REG_PPC_PMC1; 749 *val = get_reg_val(id, vcpu->arch.pmc[i]); 750 break; 751 #ifdef CONFIG_VSX 752 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: 753 if (cpu_has_feature(CPU_FTR_VSX)) { 754 /* VSX => FP reg i is stored in arch.vsr[2*i] */ 755 long int i = id - KVM_REG_PPC_FPR0; 756 *val = get_reg_val(id, vcpu->arch.vsr[2 * i]); 757 } else { 758 /* let generic code handle it */ 759 r = -EINVAL; 760 } 761 break; 762 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: 763 if (cpu_has_feature(CPU_FTR_VSX)) { 764 long int i = id - KVM_REG_PPC_VSR0; 765 val->vsxval[0] = vcpu->arch.vsr[2 * i]; 766 val->vsxval[1] = vcpu->arch.vsr[2 * i + 1]; 767 } else { 768 r = -ENXIO; 769 } 770 break; 771 #endif /* CONFIG_VSX */ 772 case KVM_REG_PPC_VPA_ADDR: 773 spin_lock(&vcpu->arch.vpa_update_lock); 774 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); 775 spin_unlock(&vcpu->arch.vpa_update_lock); 776 break; 777 case KVM_REG_PPC_VPA_SLB: 778 spin_lock(&vcpu->arch.vpa_update_lock); 779 val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa; 780 val->vpaval.length = vcpu->arch.slb_shadow.len; 781 spin_unlock(&vcpu->arch.vpa_update_lock); 782 break; 783 case KVM_REG_PPC_VPA_DTL: 784 spin_lock(&vcpu->arch.vpa_update_lock); 785 val->vpaval.addr = vcpu->arch.dtl.next_gpa; 786 val->vpaval.length = vcpu->arch.dtl.len; 787 spin_unlock(&vcpu->arch.vpa_update_lock); 788 break; 789 default: 790 r = -EINVAL; 791 break; 792 } 793 794 return r; 795 } 796 797 int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 798 { 799 int r = 0; 800 long int i; 801 unsigned long addr, len; 802 803 switch (id) { 804 case KVM_REG_PPC_HIOR: 805 /* Only allow this to be set to zero */ 806 if (set_reg_val(id, *val)) 807 r = -EINVAL; 808 break; 809 case KVM_REG_PPC_DABR: 810 vcpu->arch.dabr = set_reg_val(id, *val); 811 break; 812 case KVM_REG_PPC_DSCR: 813 vcpu->arch.dscr = set_reg_val(id, *val); 814 break; 815 case KVM_REG_PPC_PURR: 816 vcpu->arch.purr = set_reg_val(id, *val); 817 break; 818 case KVM_REG_PPC_SPURR: 819 vcpu->arch.spurr = set_reg_val(id, *val); 820 break; 821 case KVM_REG_PPC_AMR: 822 vcpu->arch.amr = set_reg_val(id, *val); 823 break; 824 case KVM_REG_PPC_UAMOR: 825 vcpu->arch.uamor = set_reg_val(id, *val); 826 break; 827 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: 828 i = id - KVM_REG_PPC_MMCR0; 829 vcpu->arch.mmcr[i] = set_reg_val(id, *val); 830 break; 831 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: 832 i = id - KVM_REG_PPC_PMC1; 833 vcpu->arch.pmc[i] = set_reg_val(id, *val); 834 break; 835 #ifdef CONFIG_VSX 836 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: 837 if (cpu_has_feature(CPU_FTR_VSX)) { 838 /* VSX => FP reg i is stored in arch.vsr[2*i] */ 839 long int i = id - KVM_REG_PPC_FPR0; 840 vcpu->arch.vsr[2 * i] = set_reg_val(id, *val); 841 } else { 842 /* let generic code handle it */ 843 r = -EINVAL; 844 } 845 break; 846 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: 847 if (cpu_has_feature(CPU_FTR_VSX)) { 848 long int i = id - KVM_REG_PPC_VSR0; 849 vcpu->arch.vsr[2 * i] = val->vsxval[0]; 850 vcpu->arch.vsr[2 * i + 1] = val->vsxval[1]; 851 } else { 852 r = -ENXIO; 853 } 854 break; 855 #endif /* CONFIG_VSX */ 856 case KVM_REG_PPC_VPA_ADDR: 857 addr = set_reg_val(id, *val); 858 r = -EINVAL; 859 if (!addr && (vcpu->arch.slb_shadow.next_gpa || 860 vcpu->arch.dtl.next_gpa)) 861 break; 862 r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca)); 863 break; 864 case KVM_REG_PPC_VPA_SLB: 865 addr = val->vpaval.addr; 866 len = val->vpaval.length; 867 r = -EINVAL; 868 if (addr && !vcpu->arch.vpa.next_gpa) 869 break; 870 r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len); 871 break; 872 case KVM_REG_PPC_VPA_DTL: 873 addr = val->vpaval.addr; 874 len = val->vpaval.length; 875 r = -EINVAL; 876 if (addr && (len < sizeof(struct dtl_entry) || 877 !vcpu->arch.vpa.next_gpa)) 878 break; 879 len -= len % sizeof(struct dtl_entry); 880 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); 881 break; 882 default: 883 r = -EINVAL; 884 break; 885 } 886 887 return r; 888 } 889 890 int kvmppc_core_check_processor_compat(void) 891 { 892 if (cpu_has_feature(CPU_FTR_HVMODE)) 893 return 0; 894 return -EIO; 895 } 896 897 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 898 { 899 struct kvm_vcpu *vcpu; 900 int err = -EINVAL; 901 int core; 902 struct kvmppc_vcore *vcore; 903 904 core = id / threads_per_core; 905 if (core >= KVM_MAX_VCORES) 906 goto out; 907 908 err = -ENOMEM; 909 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 910 if (!vcpu) 911 goto out; 912 913 err = kvm_vcpu_init(vcpu, kvm, id); 914 if (err) 915 goto free_vcpu; 916 917 vcpu->arch.shared = &vcpu->arch.shregs; 918 vcpu->arch.mmcr[0] = MMCR0_FC; 919 vcpu->arch.ctrl = CTRL_RUNLATCH; 920 /* default to host PVR, since we can't spoof it */ 921 vcpu->arch.pvr = mfspr(SPRN_PVR); 922 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 923 spin_lock_init(&vcpu->arch.vpa_update_lock); 924 spin_lock_init(&vcpu->arch.tbacct_lock); 925 vcpu->arch.busy_preempt = TB_NIL; 926 927 kvmppc_mmu_book3s_hv_init(vcpu); 928 929 vcpu->arch.state = KVMPPC_VCPU_NOTREADY; 930 931 init_waitqueue_head(&vcpu->arch.cpu_run); 932 933 mutex_lock(&kvm->lock); 934 vcore = kvm->arch.vcores[core]; 935 if (!vcore) { 936 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); 937 if (vcore) { 938 INIT_LIST_HEAD(&vcore->runnable_threads); 939 spin_lock_init(&vcore->lock); 940 init_waitqueue_head(&vcore->wq); 941 vcore->preempt_tb = TB_NIL; 942 } 943 kvm->arch.vcores[core] = vcore; 944 kvm->arch.online_vcores++; 945 } 946 mutex_unlock(&kvm->lock); 947 948 if (!vcore) 949 goto free_vcpu; 950 951 spin_lock(&vcore->lock); 952 ++vcore->num_threads; 953 spin_unlock(&vcore->lock); 954 vcpu->arch.vcore = vcore; 955 956 vcpu->arch.cpu_type = KVM_CPU_3S_64; 957 kvmppc_sanity_check(vcpu); 958 959 return vcpu; 960 961 free_vcpu: 962 kmem_cache_free(kvm_vcpu_cache, vcpu); 963 out: 964 return ERR_PTR(err); 965 } 966 967 static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) 968 { 969 if (vpa->pinned_addr) 970 kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa, 971 vpa->dirty); 972 } 973 974 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 975 { 976 spin_lock(&vcpu->arch.vpa_update_lock); 977 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); 978 unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow); 979 unpin_vpa(vcpu->kvm, &vcpu->arch.vpa); 980 spin_unlock(&vcpu->arch.vpa_update_lock); 981 kvm_vcpu_uninit(vcpu); 982 kmem_cache_free(kvm_vcpu_cache, vcpu); 983 } 984 985 static void kvmppc_set_timer(struct kvm_vcpu *vcpu) 986 { 987 unsigned long dec_nsec, now; 988 989 now = get_tb(); 990 if (now > vcpu->arch.dec_expires) { 991 /* decrementer has already gone negative */ 992 kvmppc_core_queue_dec(vcpu); 993 kvmppc_core_prepare_to_enter(vcpu); 994 return; 995 } 996 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC 997 / tb_ticks_per_sec; 998 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), 999 HRTIMER_MODE_REL); 1000 vcpu->arch.timer_running = 1; 1001 } 1002 1003 static void kvmppc_end_cede(struct kvm_vcpu *vcpu) 1004 { 1005 vcpu->arch.ceded = 0; 1006 if (vcpu->arch.timer_running) { 1007 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 1008 vcpu->arch.timer_running = 0; 1009 } 1010 } 1011 1012 extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 1013 1014 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, 1015 struct kvm_vcpu *vcpu) 1016 { 1017 u64 now; 1018 1019 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) 1020 return; 1021 spin_lock(&vcpu->arch.tbacct_lock); 1022 now = mftb(); 1023 vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) - 1024 vcpu->arch.stolen_logged; 1025 vcpu->arch.busy_preempt = now; 1026 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 1027 spin_unlock(&vcpu->arch.tbacct_lock); 1028 --vc->n_runnable; 1029 list_del(&vcpu->arch.run_list); 1030 } 1031 1032 static int kvmppc_grab_hwthread(int cpu) 1033 { 1034 struct paca_struct *tpaca; 1035 long timeout = 1000; 1036 1037 tpaca = &paca[cpu]; 1038 1039 /* Ensure the thread won't go into the kernel if it wakes */ 1040 tpaca->kvm_hstate.hwthread_req = 1; 1041 tpaca->kvm_hstate.kvm_vcpu = NULL; 1042 1043 /* 1044 * If the thread is already executing in the kernel (e.g. handling 1045 * a stray interrupt), wait for it to get back to nap mode. 1046 * The smp_mb() is to ensure that our setting of hwthread_req 1047 * is visible before we look at hwthread_state, so if this 1048 * races with the code at system_reset_pSeries and the thread 1049 * misses our setting of hwthread_req, we are sure to see its 1050 * setting of hwthread_state, and vice versa. 1051 */ 1052 smp_mb(); 1053 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) { 1054 if (--timeout <= 0) { 1055 pr_err("KVM: couldn't grab cpu %d\n", cpu); 1056 return -EBUSY; 1057 } 1058 udelay(1); 1059 } 1060 return 0; 1061 } 1062 1063 static void kvmppc_release_hwthread(int cpu) 1064 { 1065 struct paca_struct *tpaca; 1066 1067 tpaca = &paca[cpu]; 1068 tpaca->kvm_hstate.hwthread_req = 0; 1069 tpaca->kvm_hstate.kvm_vcpu = NULL; 1070 } 1071 1072 static void kvmppc_start_thread(struct kvm_vcpu *vcpu) 1073 { 1074 int cpu; 1075 struct paca_struct *tpaca; 1076 struct kvmppc_vcore *vc = vcpu->arch.vcore; 1077 1078 if (vcpu->arch.timer_running) { 1079 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 1080 vcpu->arch.timer_running = 0; 1081 } 1082 cpu = vc->pcpu + vcpu->arch.ptid; 1083 tpaca = &paca[cpu]; 1084 tpaca->kvm_hstate.kvm_vcpu = vcpu; 1085 tpaca->kvm_hstate.kvm_vcore = vc; 1086 tpaca->kvm_hstate.napping = 0; 1087 vcpu->cpu = vc->pcpu; 1088 smp_wmb(); 1089 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) 1090 if (vcpu->arch.ptid) { 1091 xics_wake_cpu(cpu); 1092 ++vc->n_woken; 1093 } 1094 #endif 1095 } 1096 1097 static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc) 1098 { 1099 int i; 1100 1101 HMT_low(); 1102 i = 0; 1103 while (vc->nap_count < vc->n_woken) { 1104 if (++i >= 1000000) { 1105 pr_err("kvmppc_wait_for_nap timeout %d %d\n", 1106 vc->nap_count, vc->n_woken); 1107 break; 1108 } 1109 cpu_relax(); 1110 } 1111 HMT_medium(); 1112 } 1113 1114 /* 1115 * Check that we are on thread 0 and that any other threads in 1116 * this core are off-line. Then grab the threads so they can't 1117 * enter the kernel. 1118 */ 1119 static int on_primary_thread(void) 1120 { 1121 int cpu = smp_processor_id(); 1122 int thr = cpu_thread_in_core(cpu); 1123 1124 if (thr) 1125 return 0; 1126 while (++thr < threads_per_core) 1127 if (cpu_online(cpu + thr)) 1128 return 0; 1129 1130 /* Grab all hw threads so they can't go into the kernel */ 1131 for (thr = 1; thr < threads_per_core; ++thr) { 1132 if (kvmppc_grab_hwthread(cpu + thr)) { 1133 /* Couldn't grab one; let the others go */ 1134 do { 1135 kvmppc_release_hwthread(cpu + thr); 1136 } while (--thr > 0); 1137 return 0; 1138 } 1139 } 1140 return 1; 1141 } 1142 1143 /* 1144 * Run a set of guest threads on a physical core. 1145 * Called with vc->lock held. 1146 */ 1147 static void kvmppc_run_core(struct kvmppc_vcore *vc) 1148 { 1149 struct kvm_vcpu *vcpu, *vcpu0, *vnext; 1150 long ret; 1151 u64 now; 1152 int ptid, i, need_vpa_update; 1153 int srcu_idx; 1154 struct kvm_vcpu *vcpus_to_update[threads_per_core]; 1155 1156 /* don't start if any threads have a signal pending */ 1157 need_vpa_update = 0; 1158 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1159 if (signal_pending(vcpu->arch.run_task)) 1160 return; 1161 if (vcpu->arch.vpa.update_pending || 1162 vcpu->arch.slb_shadow.update_pending || 1163 vcpu->arch.dtl.update_pending) 1164 vcpus_to_update[need_vpa_update++] = vcpu; 1165 } 1166 1167 /* 1168 * Initialize *vc, in particular vc->vcore_state, so we can 1169 * drop the vcore lock if necessary. 1170 */ 1171 vc->n_woken = 0; 1172 vc->nap_count = 0; 1173 vc->entry_exit_count = 0; 1174 vc->vcore_state = VCORE_STARTING; 1175 vc->in_guest = 0; 1176 vc->napping_threads = 0; 1177 1178 /* 1179 * Updating any of the vpas requires calling kvmppc_pin_guest_page, 1180 * which can't be called with any spinlocks held. 1181 */ 1182 if (need_vpa_update) { 1183 spin_unlock(&vc->lock); 1184 for (i = 0; i < need_vpa_update; ++i) 1185 kvmppc_update_vpas(vcpus_to_update[i]); 1186 spin_lock(&vc->lock); 1187 } 1188 1189 /* 1190 * Assign physical thread IDs, first to non-ceded vcpus 1191 * and then to ceded ones. 1192 */ 1193 ptid = 0; 1194 vcpu0 = NULL; 1195 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1196 if (!vcpu->arch.ceded) { 1197 if (!ptid) 1198 vcpu0 = vcpu; 1199 vcpu->arch.ptid = ptid++; 1200 } 1201 } 1202 if (!vcpu0) 1203 goto out; /* nothing to run; should never happen */ 1204 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 1205 if (vcpu->arch.ceded) 1206 vcpu->arch.ptid = ptid++; 1207 1208 /* 1209 * Make sure we are running on thread 0, and that 1210 * secondary threads are offline. 1211 */ 1212 if (threads_per_core > 1 && !on_primary_thread()) { 1213 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 1214 vcpu->arch.ret = -EBUSY; 1215 goto out; 1216 } 1217 1218 vc->pcpu = smp_processor_id(); 1219 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1220 kvmppc_start_thread(vcpu); 1221 kvmppc_create_dtl_entry(vcpu, vc); 1222 } 1223 1224 vc->vcore_state = VCORE_RUNNING; 1225 preempt_disable(); 1226 spin_unlock(&vc->lock); 1227 1228 kvm_guest_enter(); 1229 1230 srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu); 1231 1232 __kvmppc_vcore_entry(NULL, vcpu0); 1233 1234 spin_lock(&vc->lock); 1235 /* disable sending of IPIs on virtual external irqs */ 1236 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 1237 vcpu->cpu = -1; 1238 /* wait for secondary threads to finish writing their state to memory */ 1239 if (vc->nap_count < vc->n_woken) 1240 kvmppc_wait_for_nap(vc); 1241 for (i = 0; i < threads_per_core; ++i) 1242 kvmppc_release_hwthread(vc->pcpu + i); 1243 /* prevent other vcpu threads from doing kvmppc_start_thread() now */ 1244 vc->vcore_state = VCORE_EXITING; 1245 spin_unlock(&vc->lock); 1246 1247 srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx); 1248 1249 /* make sure updates to secondary vcpu structs are visible now */ 1250 smp_mb(); 1251 kvm_guest_exit(); 1252 1253 preempt_enable(); 1254 kvm_resched(vcpu); 1255 1256 spin_lock(&vc->lock); 1257 now = get_tb(); 1258 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1259 /* cancel pending dec exception if dec is positive */ 1260 if (now < vcpu->arch.dec_expires && 1261 kvmppc_core_pending_dec(vcpu)) 1262 kvmppc_core_dequeue_dec(vcpu); 1263 1264 ret = RESUME_GUEST; 1265 if (vcpu->arch.trap) 1266 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, 1267 vcpu->arch.run_task); 1268 1269 vcpu->arch.ret = ret; 1270 vcpu->arch.trap = 0; 1271 1272 if (vcpu->arch.ceded) { 1273 if (ret != RESUME_GUEST) 1274 kvmppc_end_cede(vcpu); 1275 else 1276 kvmppc_set_timer(vcpu); 1277 } 1278 } 1279 1280 out: 1281 vc->vcore_state = VCORE_INACTIVE; 1282 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 1283 arch.run_list) { 1284 if (vcpu->arch.ret != RESUME_GUEST) { 1285 kvmppc_remove_runnable(vc, vcpu); 1286 wake_up(&vcpu->arch.cpu_run); 1287 } 1288 } 1289 } 1290 1291 /* 1292 * Wait for some other vcpu thread to execute us, and 1293 * wake us up when we need to handle something in the host. 1294 */ 1295 static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) 1296 { 1297 DEFINE_WAIT(wait); 1298 1299 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); 1300 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) 1301 schedule(); 1302 finish_wait(&vcpu->arch.cpu_run, &wait); 1303 } 1304 1305 /* 1306 * All the vcpus in this vcore are idle, so wait for a decrementer 1307 * or external interrupt to one of the vcpus. vc->lock is held. 1308 */ 1309 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) 1310 { 1311 DEFINE_WAIT(wait); 1312 1313 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); 1314 vc->vcore_state = VCORE_SLEEPING; 1315 spin_unlock(&vc->lock); 1316 schedule(); 1317 finish_wait(&vc->wq, &wait); 1318 spin_lock(&vc->lock); 1319 vc->vcore_state = VCORE_INACTIVE; 1320 } 1321 1322 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1323 { 1324 int n_ceded; 1325 struct kvmppc_vcore *vc; 1326 struct kvm_vcpu *v, *vn; 1327 1328 kvm_run->exit_reason = 0; 1329 vcpu->arch.ret = RESUME_GUEST; 1330 vcpu->arch.trap = 0; 1331 kvmppc_update_vpas(vcpu); 1332 1333 /* 1334 * Synchronize with other threads in this virtual core 1335 */ 1336 vc = vcpu->arch.vcore; 1337 spin_lock(&vc->lock); 1338 vcpu->arch.ceded = 0; 1339 vcpu->arch.run_task = current; 1340 vcpu->arch.kvm_run = kvm_run; 1341 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); 1342 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; 1343 vcpu->arch.busy_preempt = TB_NIL; 1344 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); 1345 ++vc->n_runnable; 1346 1347 /* 1348 * This happens the first time this is called for a vcpu. 1349 * If the vcore is already running, we may be able to start 1350 * this thread straight away and have it join in. 1351 */ 1352 if (!signal_pending(current)) { 1353 if (vc->vcore_state == VCORE_RUNNING && 1354 VCORE_EXIT_COUNT(vc) == 0) { 1355 vcpu->arch.ptid = vc->n_runnable - 1; 1356 kvmppc_create_dtl_entry(vcpu, vc); 1357 kvmppc_start_thread(vcpu); 1358 } else if (vc->vcore_state == VCORE_SLEEPING) { 1359 wake_up(&vc->wq); 1360 } 1361 1362 } 1363 1364 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 1365 !signal_pending(current)) { 1366 if (vc->vcore_state != VCORE_INACTIVE) { 1367 spin_unlock(&vc->lock); 1368 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE); 1369 spin_lock(&vc->lock); 1370 continue; 1371 } 1372 list_for_each_entry_safe(v, vn, &vc->runnable_threads, 1373 arch.run_list) { 1374 kvmppc_core_prepare_to_enter(v); 1375 if (signal_pending(v->arch.run_task)) { 1376 kvmppc_remove_runnable(vc, v); 1377 v->stat.signal_exits++; 1378 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR; 1379 v->arch.ret = -EINTR; 1380 wake_up(&v->arch.cpu_run); 1381 } 1382 } 1383 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) 1384 break; 1385 vc->runner = vcpu; 1386 n_ceded = 0; 1387 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { 1388 if (!v->arch.pending_exceptions) 1389 n_ceded += v->arch.ceded; 1390 else 1391 v->arch.ceded = 0; 1392 } 1393 if (n_ceded == vc->n_runnable) 1394 kvmppc_vcore_blocked(vc); 1395 else 1396 kvmppc_run_core(vc); 1397 vc->runner = NULL; 1398 } 1399 1400 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 1401 (vc->vcore_state == VCORE_RUNNING || 1402 vc->vcore_state == VCORE_EXITING)) { 1403 spin_unlock(&vc->lock); 1404 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); 1405 spin_lock(&vc->lock); 1406 } 1407 1408 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 1409 kvmppc_remove_runnable(vc, vcpu); 1410 vcpu->stat.signal_exits++; 1411 kvm_run->exit_reason = KVM_EXIT_INTR; 1412 vcpu->arch.ret = -EINTR; 1413 } 1414 1415 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) { 1416 /* Wake up some vcpu to run the core */ 1417 v = list_first_entry(&vc->runnable_threads, 1418 struct kvm_vcpu, arch.run_list); 1419 wake_up(&v->arch.cpu_run); 1420 } 1421 1422 spin_unlock(&vc->lock); 1423 return vcpu->arch.ret; 1424 } 1425 1426 int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) 1427 { 1428 int r; 1429 int srcu_idx; 1430 1431 if (!vcpu->arch.sane) { 1432 run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1433 return -EINVAL; 1434 } 1435 1436 kvmppc_core_prepare_to_enter(vcpu); 1437 1438 /* No need to go into the guest when all we'll do is come back out */ 1439 if (signal_pending(current)) { 1440 run->exit_reason = KVM_EXIT_INTR; 1441 return -EINTR; 1442 } 1443 1444 atomic_inc(&vcpu->kvm->arch.vcpus_running); 1445 /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */ 1446 smp_mb(); 1447 1448 /* On the first time here, set up HTAB and VRMA or RMA */ 1449 if (!vcpu->kvm->arch.rma_setup_done) { 1450 r = kvmppc_hv_setup_htab_rma(vcpu); 1451 if (r) 1452 goto out; 1453 } 1454 1455 flush_fp_to_thread(current); 1456 flush_altivec_to_thread(current); 1457 flush_vsx_to_thread(current); 1458 vcpu->arch.wqp = &vcpu->arch.vcore->wq; 1459 vcpu->arch.pgdir = current->mm->pgd; 1460 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 1461 1462 do { 1463 r = kvmppc_run_vcpu(run, vcpu); 1464 1465 if (run->exit_reason == KVM_EXIT_PAPR_HCALL && 1466 !(vcpu->arch.shregs.msr & MSR_PR)) { 1467 r = kvmppc_pseries_do_hcall(vcpu); 1468 kvmppc_core_prepare_to_enter(vcpu); 1469 } else if (r == RESUME_PAGE_FAULT) { 1470 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1471 r = kvmppc_book3s_hv_page_fault(run, vcpu, 1472 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 1473 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 1474 } 1475 } while (r == RESUME_GUEST); 1476 1477 out: 1478 vcpu->arch.state = KVMPPC_VCPU_NOTREADY; 1479 atomic_dec(&vcpu->kvm->arch.vcpus_running); 1480 return r; 1481 } 1482 1483 1484 /* Work out RMLS (real mode limit selector) field value for a given RMA size. 1485 Assumes POWER7 or PPC970. */ 1486 static inline int lpcr_rmls(unsigned long rma_size) 1487 { 1488 switch (rma_size) { 1489 case 32ul << 20: /* 32 MB */ 1490 if (cpu_has_feature(CPU_FTR_ARCH_206)) 1491 return 8; /* only supported on POWER7 */ 1492 return -1; 1493 case 64ul << 20: /* 64 MB */ 1494 return 3; 1495 case 128ul << 20: /* 128 MB */ 1496 return 7; 1497 case 256ul << 20: /* 256 MB */ 1498 return 4; 1499 case 1ul << 30: /* 1 GB */ 1500 return 2; 1501 case 16ul << 30: /* 16 GB */ 1502 return 1; 1503 case 256ul << 30: /* 256 GB */ 1504 return 0; 1505 default: 1506 return -1; 1507 } 1508 } 1509 1510 static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1511 { 1512 struct kvmppc_linear_info *ri = vma->vm_file->private_data; 1513 struct page *page; 1514 1515 if (vmf->pgoff >= ri->npages) 1516 return VM_FAULT_SIGBUS; 1517 1518 page = pfn_to_page(ri->base_pfn + vmf->pgoff); 1519 get_page(page); 1520 vmf->page = page; 1521 return 0; 1522 } 1523 1524 static const struct vm_operations_struct kvm_rma_vm_ops = { 1525 .fault = kvm_rma_fault, 1526 }; 1527 1528 static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) 1529 { 1530 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 1531 vma->vm_ops = &kvm_rma_vm_ops; 1532 return 0; 1533 } 1534 1535 static int kvm_rma_release(struct inode *inode, struct file *filp) 1536 { 1537 struct kvmppc_linear_info *ri = filp->private_data; 1538 1539 kvm_release_rma(ri); 1540 return 0; 1541 } 1542 1543 static const struct file_operations kvm_rma_fops = { 1544 .mmap = kvm_rma_mmap, 1545 .release = kvm_rma_release, 1546 }; 1547 1548 long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) 1549 { 1550 struct kvmppc_linear_info *ri; 1551 long fd; 1552 1553 ri = kvm_alloc_rma(); 1554 if (!ri) 1555 return -ENOMEM; 1556 1557 fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR); 1558 if (fd < 0) 1559 kvm_release_rma(ri); 1560 1561 ret->rma_size = ri->npages << PAGE_SHIFT; 1562 return fd; 1563 } 1564 1565 static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, 1566 int linux_psize) 1567 { 1568 struct mmu_psize_def *def = &mmu_psize_defs[linux_psize]; 1569 1570 if (!def->shift) 1571 return; 1572 (*sps)->page_shift = def->shift; 1573 (*sps)->slb_enc = def->sllp; 1574 (*sps)->enc[0].page_shift = def->shift; 1575 /* 1576 * Only return base page encoding. We don't want to return 1577 * all the supporting pte_enc, because our H_ENTER doesn't 1578 * support MPSS yet. Once they do, we can start passing all 1579 * support pte_enc here 1580 */ 1581 (*sps)->enc[0].pte_enc = def->penc[linux_psize]; 1582 (*sps)++; 1583 } 1584 1585 int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) 1586 { 1587 struct kvm_ppc_one_seg_page_size *sps; 1588 1589 info->flags = KVM_PPC_PAGE_SIZES_REAL; 1590 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 1591 info->flags |= KVM_PPC_1T_SEGMENTS; 1592 info->slb_size = mmu_slb_size; 1593 1594 /* We only support these sizes for now, and no muti-size segments */ 1595 sps = &info->sps[0]; 1596 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K); 1597 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K); 1598 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M); 1599 1600 return 0; 1601 } 1602 1603 /* 1604 * Get (and clear) the dirty memory log for a memory slot. 1605 */ 1606 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 1607 { 1608 struct kvm_memory_slot *memslot; 1609 int r; 1610 unsigned long n; 1611 1612 mutex_lock(&kvm->slots_lock); 1613 1614 r = -EINVAL; 1615 if (log->slot >= KVM_USER_MEM_SLOTS) 1616 goto out; 1617 1618 memslot = id_to_memslot(kvm->memslots, log->slot); 1619 r = -ENOENT; 1620 if (!memslot->dirty_bitmap) 1621 goto out; 1622 1623 n = kvm_dirty_bitmap_bytes(memslot); 1624 memset(memslot->dirty_bitmap, 0, n); 1625 1626 r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); 1627 if (r) 1628 goto out; 1629 1630 r = -EFAULT; 1631 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 1632 goto out; 1633 1634 r = 0; 1635 out: 1636 mutex_unlock(&kvm->slots_lock); 1637 return r; 1638 } 1639 1640 static void unpin_slot(struct kvm_memory_slot *memslot) 1641 { 1642 unsigned long *physp; 1643 unsigned long j, npages, pfn; 1644 struct page *page; 1645 1646 physp = memslot->arch.slot_phys; 1647 npages = memslot->npages; 1648 if (!physp) 1649 return; 1650 for (j = 0; j < npages; j++) { 1651 if (!(physp[j] & KVMPPC_GOT_PAGE)) 1652 continue; 1653 pfn = physp[j] >> PAGE_SHIFT; 1654 page = pfn_to_page(pfn); 1655 SetPageDirty(page); 1656 put_page(page); 1657 } 1658 } 1659 1660 void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 1661 struct kvm_memory_slot *dont) 1662 { 1663 if (!dont || free->arch.rmap != dont->arch.rmap) { 1664 vfree(free->arch.rmap); 1665 free->arch.rmap = NULL; 1666 } 1667 if (!dont || free->arch.slot_phys != dont->arch.slot_phys) { 1668 unpin_slot(free); 1669 vfree(free->arch.slot_phys); 1670 free->arch.slot_phys = NULL; 1671 } 1672 } 1673 1674 int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 1675 unsigned long npages) 1676 { 1677 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 1678 if (!slot->arch.rmap) 1679 return -ENOMEM; 1680 slot->arch.slot_phys = NULL; 1681 1682 return 0; 1683 } 1684 1685 int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1686 struct kvm_memory_slot *memslot, 1687 struct kvm_userspace_memory_region *mem) 1688 { 1689 unsigned long *phys; 1690 1691 /* Allocate a slot_phys array if needed */ 1692 phys = memslot->arch.slot_phys; 1693 if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) { 1694 phys = vzalloc(memslot->npages * sizeof(unsigned long)); 1695 if (!phys) 1696 return -ENOMEM; 1697 memslot->arch.slot_phys = phys; 1698 } 1699 1700 return 0; 1701 } 1702 1703 void kvmppc_core_commit_memory_region(struct kvm *kvm, 1704 struct kvm_userspace_memory_region *mem, 1705 const struct kvm_memory_slot *old) 1706 { 1707 unsigned long npages = mem->memory_size >> PAGE_SHIFT; 1708 struct kvm_memory_slot *memslot; 1709 1710 if (npages && old->npages) { 1711 /* 1712 * If modifying a memslot, reset all the rmap dirty bits. 1713 * If this is a new memslot, we don't need to do anything 1714 * since the rmap array starts out as all zeroes, 1715 * i.e. no pages are dirty. 1716 */ 1717 memslot = id_to_memslot(kvm->memslots, mem->slot); 1718 kvmppc_hv_get_dirty_log(kvm, memslot, NULL); 1719 } 1720 } 1721 1722 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) 1723 { 1724 int err = 0; 1725 struct kvm *kvm = vcpu->kvm; 1726 struct kvmppc_linear_info *ri = NULL; 1727 unsigned long hva; 1728 struct kvm_memory_slot *memslot; 1729 struct vm_area_struct *vma; 1730 unsigned long lpcr, senc; 1731 unsigned long psize, porder; 1732 unsigned long rma_size; 1733 unsigned long rmls; 1734 unsigned long *physp; 1735 unsigned long i, npages; 1736 int srcu_idx; 1737 1738 mutex_lock(&kvm->lock); 1739 if (kvm->arch.rma_setup_done) 1740 goto out; /* another vcpu beat us to it */ 1741 1742 /* Allocate hashed page table (if not done already) and reset it */ 1743 if (!kvm->arch.hpt_virt) { 1744 err = kvmppc_alloc_hpt(kvm, NULL); 1745 if (err) { 1746 pr_err("KVM: Couldn't alloc HPT\n"); 1747 goto out; 1748 } 1749 } 1750 1751 /* Look up the memslot for guest physical address 0 */ 1752 srcu_idx = srcu_read_lock(&kvm->srcu); 1753 memslot = gfn_to_memslot(kvm, 0); 1754 1755 /* We must have some memory at 0 by now */ 1756 err = -EINVAL; 1757 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 1758 goto out_srcu; 1759 1760 /* Look up the VMA for the start of this memory slot */ 1761 hva = memslot->userspace_addr; 1762 down_read(¤t->mm->mmap_sem); 1763 vma = find_vma(current->mm, hva); 1764 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO)) 1765 goto up_out; 1766 1767 psize = vma_kernel_pagesize(vma); 1768 porder = __ilog2(psize); 1769 1770 /* Is this one of our preallocated RMAs? */ 1771 if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && 1772 hva == vma->vm_start) 1773 ri = vma->vm_file->private_data; 1774 1775 up_read(¤t->mm->mmap_sem); 1776 1777 if (!ri) { 1778 /* On POWER7, use VRMA; on PPC970, give up */ 1779 err = -EPERM; 1780 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1781 pr_err("KVM: CPU requires an RMO\n"); 1782 goto out_srcu; 1783 } 1784 1785 /* We can handle 4k, 64k or 16M pages in the VRMA */ 1786 err = -EINVAL; 1787 if (!(psize == 0x1000 || psize == 0x10000 || 1788 psize == 0x1000000)) 1789 goto out_srcu; 1790 1791 /* Update VRMASD field in the LPCR */ 1792 senc = slb_pgsize_encoding(psize); 1793 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1794 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1795 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; 1796 lpcr |= senc << (LPCR_VRMASD_SH - 4); 1797 kvm->arch.lpcr = lpcr; 1798 1799 /* Create HPTEs in the hash page table for the VRMA */ 1800 kvmppc_map_vrma(vcpu, memslot, porder); 1801 1802 } else { 1803 /* Set up to use an RMO region */ 1804 rma_size = ri->npages; 1805 if (rma_size > memslot->npages) 1806 rma_size = memslot->npages; 1807 rma_size <<= PAGE_SHIFT; 1808 rmls = lpcr_rmls(rma_size); 1809 err = -EINVAL; 1810 if (rmls < 0) { 1811 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); 1812 goto out_srcu; 1813 } 1814 atomic_inc(&ri->use_count); 1815 kvm->arch.rma = ri; 1816 1817 /* Update LPCR and RMOR */ 1818 lpcr = kvm->arch.lpcr; 1819 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1820 /* PPC970; insert RMLS value (split field) in HID4 */ 1821 lpcr &= ~((1ul << HID4_RMLS0_SH) | 1822 (3ul << HID4_RMLS2_SH)); 1823 lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) | 1824 ((rmls & 3) << HID4_RMLS2_SH); 1825 /* RMOR is also in HID4 */ 1826 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) 1827 << HID4_RMOR_SH; 1828 } else { 1829 /* POWER7 */ 1830 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); 1831 lpcr |= rmls << LPCR_RMLS_SH; 1832 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; 1833 } 1834 kvm->arch.lpcr = lpcr; 1835 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", 1836 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); 1837 1838 /* Initialize phys addrs of pages in RMO */ 1839 npages = ri->npages; 1840 porder = __ilog2(npages); 1841 physp = memslot->arch.slot_phys; 1842 if (physp) { 1843 if (npages > memslot->npages) 1844 npages = memslot->npages; 1845 spin_lock(&kvm->arch.slot_phys_lock); 1846 for (i = 0; i < npages; ++i) 1847 physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + 1848 porder; 1849 spin_unlock(&kvm->arch.slot_phys_lock); 1850 } 1851 } 1852 1853 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ 1854 smp_wmb(); 1855 kvm->arch.rma_setup_done = 1; 1856 err = 0; 1857 out_srcu: 1858 srcu_read_unlock(&kvm->srcu, srcu_idx); 1859 out: 1860 mutex_unlock(&kvm->lock); 1861 return err; 1862 1863 up_out: 1864 up_read(¤t->mm->mmap_sem); 1865 goto out; 1866 } 1867 1868 int kvmppc_core_init_vm(struct kvm *kvm) 1869 { 1870 unsigned long lpcr, lpid; 1871 1872 /* Allocate the guest's logical partition ID */ 1873 1874 lpid = kvmppc_alloc_lpid(); 1875 if (lpid < 0) 1876 return -ENOMEM; 1877 kvm->arch.lpid = lpid; 1878 1879 /* 1880 * Since we don't flush the TLB when tearing down a VM, 1881 * and this lpid might have previously been used, 1882 * make sure we flush on each core before running the new VM. 1883 */ 1884 cpumask_setall(&kvm->arch.need_tlb_flush); 1885 1886 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1887 INIT_LIST_HEAD(&kvm->arch.rtas_tokens); 1888 1889 kvm->arch.rma = NULL; 1890 1891 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); 1892 1893 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1894 /* PPC970; HID4 is effectively the LPCR */ 1895 kvm->arch.host_lpid = 0; 1896 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); 1897 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); 1898 lpcr |= ((lpid >> 4) << HID4_LPID1_SH) | 1899 ((lpid & 0xf) << HID4_LPID5_SH); 1900 } else { 1901 /* POWER7; init LPCR for virtual RMA mode */ 1902 kvm->arch.host_lpid = mfspr(SPRN_LPID); 1903 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); 1904 lpcr &= LPCR_PECE | LPCR_LPES; 1905 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | 1906 LPCR_VPM0 | LPCR_VPM1; 1907 kvm->arch.vrma_slb_v = SLB_VSID_B_1T | 1908 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1909 } 1910 kvm->arch.lpcr = lpcr; 1911 1912 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); 1913 spin_lock_init(&kvm->arch.slot_phys_lock); 1914 1915 /* 1916 * Don't allow secondary CPU threads to come online 1917 * while any KVM VMs exist. 1918 */ 1919 inhibit_secondary_onlining(); 1920 1921 return 0; 1922 } 1923 1924 void kvmppc_core_destroy_vm(struct kvm *kvm) 1925 { 1926 uninhibit_secondary_onlining(); 1927 1928 if (kvm->arch.rma) { 1929 kvm_release_rma(kvm->arch.rma); 1930 kvm->arch.rma = NULL; 1931 } 1932 1933 kvmppc_rtas_tokens_free(kvm); 1934 1935 kvmppc_free_hpt(kvm); 1936 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 1937 } 1938 1939 /* These are stubs for now */ 1940 void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 1941 { 1942 } 1943 1944 /* We don't need to emulate any privileged instructions or dcbz */ 1945 int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 1946 unsigned int inst, int *advance) 1947 { 1948 return EMULATE_FAIL; 1949 } 1950 1951 int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 1952 { 1953 return EMULATE_FAIL; 1954 } 1955 1956 int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 1957 { 1958 return EMULATE_FAIL; 1959 } 1960 1961 static int kvmppc_book3s_hv_init(void) 1962 { 1963 int r; 1964 1965 r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 1966 1967 if (r) 1968 return r; 1969 1970 r = kvmppc_mmu_hv_init(); 1971 1972 return r; 1973 } 1974 1975 static void kvmppc_book3s_hv_exit(void) 1976 { 1977 kvm_exit(); 1978 } 1979 1980 module_init(kvmppc_book3s_hv_init); 1981 module_exit(kvmppc_book3s_hv_exit); 1982