1 /* 2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. 4 * 5 * Authors: 6 * Paul Mackerras <paulus@au1.ibm.com> 7 * Alexander Graf <agraf@suse.de> 8 * Kevin Wolf <mail@kevin-wolf.de> 9 * 10 * Description: KVM functions specific to running on Book 3S 11 * processors in hypervisor mode (specifically POWER7 and later). 12 * 13 * This file is derived from arch/powerpc/kvm/book3s.c, 14 * by Alexander Graf <agraf@suse.de>. 15 * 16 * This program is free software; you can redistribute it and/or modify 17 * it under the terms of the GNU General Public License, version 2, as 18 * published by the Free Software Foundation. 19 */ 20 21 #include <linux/kvm_host.h> 22 #include <linux/err.h> 23 #include <linux/slab.h> 24 #include <linux/preempt.h> 25 #include <linux/sched.h> 26 #include <linux/delay.h> 27 #include <linux/export.h> 28 #include <linux/fs.h> 29 #include <linux/anon_inodes.h> 30 #include <linux/cpumask.h> 31 #include <linux/spinlock.h> 32 #include <linux/page-flags.h> 33 #include <linux/srcu.h> 34 35 #include <asm/reg.h> 36 #include <asm/cputable.h> 37 #include <asm/cacheflush.h> 38 #include <asm/tlbflush.h> 39 #include <asm/uaccess.h> 40 #include <asm/io.h> 41 #include <asm/kvm_ppc.h> 42 #include <asm/kvm_book3s.h> 43 #include <asm/mmu_context.h> 44 #include <asm/lppaca.h> 45 #include <asm/processor.h> 46 #include <asm/cputhreads.h> 47 #include <asm/page.h> 48 #include <asm/hvcall.h> 49 #include <asm/switch_to.h> 50 #include <asm/smp.h> 51 #include <linux/gfp.h> 52 #include <linux/vmalloc.h> 53 #include <linux/highmem.h> 54 #include <linux/hugetlb.h> 55 56 /* #define EXIT_DEBUG */ 57 /* #define EXIT_DEBUG_SIMPLE */ 58 /* #define EXIT_DEBUG_INT */ 59 60 /* Used to indicate that a guest page fault needs to be handled */ 61 #define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1) 62 63 /* Used as a "null" value for timebase values */ 64 #define TB_NIL (~(u64)0) 65 66 static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 67 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 68 69 /* 70 * We use the vcpu_load/put functions to measure stolen time. 71 * Stolen time is counted as time when either the vcpu is able to 72 * run as part of a virtual core, but the task running the vcore 73 * is preempted or sleeping, or when the vcpu needs something done 74 * in the kernel by the task running the vcpu, but that task is 75 * preempted or sleeping. Those two things have to be counted 76 * separately, since one of the vcpu tasks will take on the job 77 * of running the core, and the other vcpu tasks in the vcore will 78 * sleep waiting for it to do that, but that sleep shouldn't count 79 * as stolen time. 80 * 81 * Hence we accumulate stolen time when the vcpu can run as part of 82 * a vcore using vc->stolen_tb, and the stolen time when the vcpu 83 * needs its task to do other things in the kernel (for example, 84 * service a page fault) in busy_stolen. We don't accumulate 85 * stolen time for a vcore when it is inactive, or for a vcpu 86 * when it is in state RUNNING or NOTREADY. NOTREADY is a bit of 87 * a misnomer; it means that the vcpu task is not executing in 88 * the KVM_VCPU_RUN ioctl, i.e. it is in userspace or elsewhere in 89 * the kernel. We don't have any way of dividing up that time 90 * between time that the vcpu is genuinely stopped, time that 91 * the task is actively working on behalf of the vcpu, and time 92 * that the task is preempted, so we don't count any of it as 93 * stolen. 94 * 95 * Updates to busy_stolen are protected by arch.tbacct_lock; 96 * updates to vc->stolen_tb are protected by the arch.tbacct_lock 97 * of the vcpu that has taken responsibility for running the vcore 98 * (i.e. vc->runner). The stolen times are measured in units of 99 * timebase ticks. (Note that the != TB_NIL checks below are 100 * purely defensive; they should never fail.) 101 */ 102 103 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 104 { 105 struct kvmppc_vcore *vc = vcpu->arch.vcore; 106 107 spin_lock(&vcpu->arch.tbacct_lock); 108 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && 109 vc->preempt_tb != TB_NIL) { 110 vc->stolen_tb += mftb() - vc->preempt_tb; 111 vc->preempt_tb = TB_NIL; 112 } 113 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && 114 vcpu->arch.busy_preempt != TB_NIL) { 115 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; 116 vcpu->arch.busy_preempt = TB_NIL; 117 } 118 spin_unlock(&vcpu->arch.tbacct_lock); 119 } 120 121 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 122 { 123 struct kvmppc_vcore *vc = vcpu->arch.vcore; 124 125 spin_lock(&vcpu->arch.tbacct_lock); 126 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) 127 vc->preempt_tb = mftb(); 128 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) 129 vcpu->arch.busy_preempt = mftb(); 130 spin_unlock(&vcpu->arch.tbacct_lock); 131 } 132 133 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 134 { 135 vcpu->arch.shregs.msr = msr; 136 kvmppc_end_cede(vcpu); 137 } 138 139 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) 140 { 141 vcpu->arch.pvr = pvr; 142 } 143 144 void kvmppc_dump_regs(struct kvm_vcpu *vcpu) 145 { 146 int r; 147 148 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id); 149 pr_err("pc = %.16lx msr = %.16llx trap = %x\n", 150 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap); 151 for (r = 0; r < 16; ++r) 152 pr_err("r%2d = %.16lx r%d = %.16lx\n", 153 r, kvmppc_get_gpr(vcpu, r), 154 r+16, kvmppc_get_gpr(vcpu, r+16)); 155 pr_err("ctr = %.16lx lr = %.16lx\n", 156 vcpu->arch.ctr, vcpu->arch.lr); 157 pr_err("srr0 = %.16llx srr1 = %.16llx\n", 158 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1); 159 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n", 160 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1); 161 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n", 162 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3); 163 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n", 164 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr); 165 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar); 166 pr_err("fault dar = %.16lx dsisr = %.8x\n", 167 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 168 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max); 169 for (r = 0; r < vcpu->arch.slb_max; ++r) 170 pr_err(" ESID = %.16llx VSID = %.16llx\n", 171 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); 172 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", 173 vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1, 174 vcpu->arch.last_inst); 175 } 176 177 struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) 178 { 179 int r; 180 struct kvm_vcpu *v, *ret = NULL; 181 182 mutex_lock(&kvm->lock); 183 kvm_for_each_vcpu(r, v, kvm) { 184 if (v->vcpu_id == id) { 185 ret = v; 186 break; 187 } 188 } 189 mutex_unlock(&kvm->lock); 190 return ret; 191 } 192 193 static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) 194 { 195 vpa->shared_proc = 1; 196 vpa->yield_count = 1; 197 } 198 199 static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, 200 unsigned long addr, unsigned long len) 201 { 202 /* check address is cacheline aligned */ 203 if (addr & (L1_CACHE_BYTES - 1)) 204 return -EINVAL; 205 spin_lock(&vcpu->arch.vpa_update_lock); 206 if (v->next_gpa != addr || v->len != len) { 207 v->next_gpa = addr; 208 v->len = addr ? len : 0; 209 v->update_pending = 1; 210 } 211 spin_unlock(&vcpu->arch.vpa_update_lock); 212 return 0; 213 } 214 215 /* Length for a per-processor buffer is passed in at offset 4 in the buffer */ 216 struct reg_vpa { 217 u32 dummy; 218 union { 219 u16 hword; 220 u32 word; 221 } length; 222 }; 223 224 static int vpa_is_registered(struct kvmppc_vpa *vpap) 225 { 226 if (vpap->update_pending) 227 return vpap->next_gpa != 0; 228 return vpap->pinned_addr != NULL; 229 } 230 231 static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, 232 unsigned long flags, 233 unsigned long vcpuid, unsigned long vpa) 234 { 235 struct kvm *kvm = vcpu->kvm; 236 unsigned long len, nb; 237 void *va; 238 struct kvm_vcpu *tvcpu; 239 int err; 240 int subfunc; 241 struct kvmppc_vpa *vpap; 242 243 tvcpu = kvmppc_find_vcpu(kvm, vcpuid); 244 if (!tvcpu) 245 return H_PARAMETER; 246 247 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK; 248 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL || 249 subfunc == H_VPA_REG_SLB) { 250 /* Registering new area - address must be cache-line aligned */ 251 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa) 252 return H_PARAMETER; 253 254 /* convert logical addr to kernel addr and read length */ 255 va = kvmppc_pin_guest_page(kvm, vpa, &nb); 256 if (va == NULL) 257 return H_PARAMETER; 258 if (subfunc == H_VPA_REG_VPA) 259 len = ((struct reg_vpa *)va)->length.hword; 260 else 261 len = ((struct reg_vpa *)va)->length.word; 262 kvmppc_unpin_guest_page(kvm, va); 263 264 /* Check length */ 265 if (len > nb || len < sizeof(struct reg_vpa)) 266 return H_PARAMETER; 267 } else { 268 vpa = 0; 269 len = 0; 270 } 271 272 err = H_PARAMETER; 273 vpap = NULL; 274 spin_lock(&tvcpu->arch.vpa_update_lock); 275 276 switch (subfunc) { 277 case H_VPA_REG_VPA: /* register VPA */ 278 if (len < sizeof(struct lppaca)) 279 break; 280 vpap = &tvcpu->arch.vpa; 281 err = 0; 282 break; 283 284 case H_VPA_REG_DTL: /* register DTL */ 285 if (len < sizeof(struct dtl_entry)) 286 break; 287 len -= len % sizeof(struct dtl_entry); 288 289 /* Check that they have previously registered a VPA */ 290 err = H_RESOURCE; 291 if (!vpa_is_registered(&tvcpu->arch.vpa)) 292 break; 293 294 vpap = &tvcpu->arch.dtl; 295 err = 0; 296 break; 297 298 case H_VPA_REG_SLB: /* register SLB shadow buffer */ 299 /* Check that they have previously registered a VPA */ 300 err = H_RESOURCE; 301 if (!vpa_is_registered(&tvcpu->arch.vpa)) 302 break; 303 304 vpap = &tvcpu->arch.slb_shadow; 305 err = 0; 306 break; 307 308 case H_VPA_DEREG_VPA: /* deregister VPA */ 309 /* Check they don't still have a DTL or SLB buf registered */ 310 err = H_RESOURCE; 311 if (vpa_is_registered(&tvcpu->arch.dtl) || 312 vpa_is_registered(&tvcpu->arch.slb_shadow)) 313 break; 314 315 vpap = &tvcpu->arch.vpa; 316 err = 0; 317 break; 318 319 case H_VPA_DEREG_DTL: /* deregister DTL */ 320 vpap = &tvcpu->arch.dtl; 321 err = 0; 322 break; 323 324 case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */ 325 vpap = &tvcpu->arch.slb_shadow; 326 err = 0; 327 break; 328 } 329 330 if (vpap) { 331 vpap->next_gpa = vpa; 332 vpap->len = len; 333 vpap->update_pending = 1; 334 } 335 336 spin_unlock(&tvcpu->arch.vpa_update_lock); 337 338 return err; 339 } 340 341 static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) 342 { 343 struct kvm *kvm = vcpu->kvm; 344 void *va; 345 unsigned long nb; 346 unsigned long gpa; 347 348 /* 349 * We need to pin the page pointed to by vpap->next_gpa, 350 * but we can't call kvmppc_pin_guest_page under the lock 351 * as it does get_user_pages() and down_read(). So we 352 * have to drop the lock, pin the page, then get the lock 353 * again and check that a new area didn't get registered 354 * in the meantime. 355 */ 356 for (;;) { 357 gpa = vpap->next_gpa; 358 spin_unlock(&vcpu->arch.vpa_update_lock); 359 va = NULL; 360 nb = 0; 361 if (gpa) 362 va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb); 363 spin_lock(&vcpu->arch.vpa_update_lock); 364 if (gpa == vpap->next_gpa) 365 break; 366 /* sigh... unpin that one and try again */ 367 if (va) 368 kvmppc_unpin_guest_page(kvm, va); 369 } 370 371 vpap->update_pending = 0; 372 if (va && nb < vpap->len) { 373 /* 374 * If it's now too short, it must be that userspace 375 * has changed the mappings underlying guest memory, 376 * so unregister the region. 377 */ 378 kvmppc_unpin_guest_page(kvm, va); 379 va = NULL; 380 } 381 if (vpap->pinned_addr) 382 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr); 383 vpap->pinned_addr = va; 384 if (va) 385 vpap->pinned_end = va + vpap->len; 386 } 387 388 static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) 389 { 390 if (!(vcpu->arch.vpa.update_pending || 391 vcpu->arch.slb_shadow.update_pending || 392 vcpu->arch.dtl.update_pending)) 393 return; 394 395 spin_lock(&vcpu->arch.vpa_update_lock); 396 if (vcpu->arch.vpa.update_pending) { 397 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa); 398 if (vcpu->arch.vpa.pinned_addr) 399 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); 400 } 401 if (vcpu->arch.dtl.update_pending) { 402 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl); 403 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr; 404 vcpu->arch.dtl_index = 0; 405 } 406 if (vcpu->arch.slb_shadow.update_pending) 407 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow); 408 spin_unlock(&vcpu->arch.vpa_update_lock); 409 } 410 411 /* 412 * Return the accumulated stolen time for the vcore up until `now'. 413 * The caller should hold the vcore lock. 414 */ 415 static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) 416 { 417 u64 p; 418 419 /* 420 * If we are the task running the vcore, then since we hold 421 * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb 422 * can't be updated, so we don't need the tbacct_lock. 423 * If the vcore is inactive, it can't become active (since we 424 * hold the vcore lock), so the vcpu load/put functions won't 425 * update stolen_tb/preempt_tb, and we don't need tbacct_lock. 426 */ 427 if (vc->vcore_state != VCORE_INACTIVE && 428 vc->runner->arch.run_task != current) { 429 spin_lock(&vc->runner->arch.tbacct_lock); 430 p = vc->stolen_tb; 431 if (vc->preempt_tb != TB_NIL) 432 p += now - vc->preempt_tb; 433 spin_unlock(&vc->runner->arch.tbacct_lock); 434 } else { 435 p = vc->stolen_tb; 436 } 437 return p; 438 } 439 440 static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, 441 struct kvmppc_vcore *vc) 442 { 443 struct dtl_entry *dt; 444 struct lppaca *vpa; 445 unsigned long stolen; 446 unsigned long core_stolen; 447 u64 now; 448 449 dt = vcpu->arch.dtl_ptr; 450 vpa = vcpu->arch.vpa.pinned_addr; 451 now = mftb(); 452 core_stolen = vcore_stolen_time(vc, now); 453 stolen = core_stolen - vcpu->arch.stolen_logged; 454 vcpu->arch.stolen_logged = core_stolen; 455 spin_lock(&vcpu->arch.tbacct_lock); 456 stolen += vcpu->arch.busy_stolen; 457 vcpu->arch.busy_stolen = 0; 458 spin_unlock(&vcpu->arch.tbacct_lock); 459 if (!dt || !vpa) 460 return; 461 memset(dt, 0, sizeof(struct dtl_entry)); 462 dt->dispatch_reason = 7; 463 dt->processor_id = vc->pcpu + vcpu->arch.ptid; 464 dt->timebase = now; 465 dt->enqueue_to_dispatch_time = stolen; 466 dt->srr0 = kvmppc_get_pc(vcpu); 467 dt->srr1 = vcpu->arch.shregs.msr; 468 ++dt; 469 if (dt == vcpu->arch.dtl.pinned_end) 470 dt = vcpu->arch.dtl.pinned_addr; 471 vcpu->arch.dtl_ptr = dt; 472 /* order writing *dt vs. writing vpa->dtl_idx */ 473 smp_wmb(); 474 vpa->dtl_idx = ++vcpu->arch.dtl_index; 475 } 476 477 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) 478 { 479 unsigned long req = kvmppc_get_gpr(vcpu, 3); 480 unsigned long target, ret = H_SUCCESS; 481 struct kvm_vcpu *tvcpu; 482 int idx; 483 484 switch (req) { 485 case H_ENTER: 486 idx = srcu_read_lock(&vcpu->kvm->srcu); 487 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), 488 kvmppc_get_gpr(vcpu, 5), 489 kvmppc_get_gpr(vcpu, 6), 490 kvmppc_get_gpr(vcpu, 7)); 491 srcu_read_unlock(&vcpu->kvm->srcu, idx); 492 break; 493 case H_CEDE: 494 break; 495 case H_PROD: 496 target = kvmppc_get_gpr(vcpu, 4); 497 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); 498 if (!tvcpu) { 499 ret = H_PARAMETER; 500 break; 501 } 502 tvcpu->arch.prodded = 1; 503 smp_mb(); 504 if (vcpu->arch.ceded) { 505 if (waitqueue_active(&vcpu->wq)) { 506 wake_up_interruptible(&vcpu->wq); 507 vcpu->stat.halt_wakeup++; 508 } 509 } 510 break; 511 case H_CONFER: 512 break; 513 case H_REGISTER_VPA: 514 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), 515 kvmppc_get_gpr(vcpu, 5), 516 kvmppc_get_gpr(vcpu, 6)); 517 break; 518 default: 519 return RESUME_HOST; 520 } 521 kvmppc_set_gpr(vcpu, 3, ret); 522 vcpu->arch.hcall_needed = 0; 523 return RESUME_GUEST; 524 } 525 526 static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 527 struct task_struct *tsk) 528 { 529 int r = RESUME_HOST; 530 531 vcpu->stat.sum_exits++; 532 533 run->exit_reason = KVM_EXIT_UNKNOWN; 534 run->ready_for_interrupt_injection = 1; 535 switch (vcpu->arch.trap) { 536 /* We're good on these - the host merely wanted to get our attention */ 537 case BOOK3S_INTERRUPT_HV_DECREMENTER: 538 vcpu->stat.dec_exits++; 539 r = RESUME_GUEST; 540 break; 541 case BOOK3S_INTERRUPT_EXTERNAL: 542 vcpu->stat.ext_intr_exits++; 543 r = RESUME_GUEST; 544 break; 545 case BOOK3S_INTERRUPT_PERFMON: 546 r = RESUME_GUEST; 547 break; 548 case BOOK3S_INTERRUPT_MACHINE_CHECK: 549 /* 550 * Deliver a machine check interrupt to the guest. 551 * We have to do this, even if the host has handled the 552 * machine check, because machine checks use SRR0/1 and 553 * the interrupt might have trashed guest state in them. 554 */ 555 kvmppc_book3s_queue_irqprio(vcpu, 556 BOOK3S_INTERRUPT_MACHINE_CHECK); 557 r = RESUME_GUEST; 558 break; 559 case BOOK3S_INTERRUPT_PROGRAM: 560 { 561 ulong flags; 562 /* 563 * Normally program interrupts are delivered directly 564 * to the guest by the hardware, but we can get here 565 * as a result of a hypervisor emulation interrupt 566 * (e40) getting turned into a 700 by BML RTAS. 567 */ 568 flags = vcpu->arch.shregs.msr & 0x1f0000ull; 569 kvmppc_core_queue_program(vcpu, flags); 570 r = RESUME_GUEST; 571 break; 572 } 573 case BOOK3S_INTERRUPT_SYSCALL: 574 { 575 /* hcall - punt to userspace */ 576 int i; 577 578 if (vcpu->arch.shregs.msr & MSR_PR) { 579 /* sc 1 from userspace - reflect to guest syscall */ 580 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL); 581 r = RESUME_GUEST; 582 break; 583 } 584 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3); 585 for (i = 0; i < 9; ++i) 586 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i); 587 run->exit_reason = KVM_EXIT_PAPR_HCALL; 588 vcpu->arch.hcall_needed = 1; 589 r = RESUME_HOST; 590 break; 591 } 592 /* 593 * We get these next two if the guest accesses a page which it thinks 594 * it has mapped but which is not actually present, either because 595 * it is for an emulated I/O device or because the corresonding 596 * host page has been paged out. Any other HDSI/HISI interrupts 597 * have been handled already. 598 */ 599 case BOOK3S_INTERRUPT_H_DATA_STORAGE: 600 r = RESUME_PAGE_FAULT; 601 break; 602 case BOOK3S_INTERRUPT_H_INST_STORAGE: 603 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu); 604 vcpu->arch.fault_dsisr = 0; 605 r = RESUME_PAGE_FAULT; 606 break; 607 /* 608 * This occurs if the guest executes an illegal instruction. 609 * We just generate a program interrupt to the guest, since 610 * we don't emulate any guest instructions at this stage. 611 */ 612 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 613 kvmppc_core_queue_program(vcpu, 0x80000); 614 r = RESUME_GUEST; 615 break; 616 default: 617 kvmppc_dump_regs(vcpu); 618 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", 619 vcpu->arch.trap, kvmppc_get_pc(vcpu), 620 vcpu->arch.shregs.msr); 621 r = RESUME_HOST; 622 BUG(); 623 break; 624 } 625 626 return r; 627 } 628 629 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 630 struct kvm_sregs *sregs) 631 { 632 int i; 633 634 sregs->pvr = vcpu->arch.pvr; 635 636 memset(sregs, 0, sizeof(struct kvm_sregs)); 637 for (i = 0; i < vcpu->arch.slb_max; i++) { 638 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; 639 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; 640 } 641 642 return 0; 643 } 644 645 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 646 struct kvm_sregs *sregs) 647 { 648 int i, j; 649 650 kvmppc_set_pvr(vcpu, sregs->pvr); 651 652 j = 0; 653 for (i = 0; i < vcpu->arch.slb_nr; i++) { 654 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) { 655 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe; 656 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv; 657 ++j; 658 } 659 } 660 vcpu->arch.slb_max = j; 661 662 return 0; 663 } 664 665 int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 666 { 667 int r = 0; 668 long int i; 669 670 switch (id) { 671 case KVM_REG_PPC_HIOR: 672 *val = get_reg_val(id, 0); 673 break; 674 case KVM_REG_PPC_DABR: 675 *val = get_reg_val(id, vcpu->arch.dabr); 676 break; 677 case KVM_REG_PPC_DSCR: 678 *val = get_reg_val(id, vcpu->arch.dscr); 679 break; 680 case KVM_REG_PPC_PURR: 681 *val = get_reg_val(id, vcpu->arch.purr); 682 break; 683 case KVM_REG_PPC_SPURR: 684 *val = get_reg_val(id, vcpu->arch.spurr); 685 break; 686 case KVM_REG_PPC_AMR: 687 *val = get_reg_val(id, vcpu->arch.amr); 688 break; 689 case KVM_REG_PPC_UAMOR: 690 *val = get_reg_val(id, vcpu->arch.uamor); 691 break; 692 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: 693 i = id - KVM_REG_PPC_MMCR0; 694 *val = get_reg_val(id, vcpu->arch.mmcr[i]); 695 break; 696 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: 697 i = id - KVM_REG_PPC_PMC1; 698 *val = get_reg_val(id, vcpu->arch.pmc[i]); 699 break; 700 #ifdef CONFIG_VSX 701 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: 702 if (cpu_has_feature(CPU_FTR_VSX)) { 703 /* VSX => FP reg i is stored in arch.vsr[2*i] */ 704 long int i = id - KVM_REG_PPC_FPR0; 705 *val = get_reg_val(id, vcpu->arch.vsr[2 * i]); 706 } else { 707 /* let generic code handle it */ 708 r = -EINVAL; 709 } 710 break; 711 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: 712 if (cpu_has_feature(CPU_FTR_VSX)) { 713 long int i = id - KVM_REG_PPC_VSR0; 714 val->vsxval[0] = vcpu->arch.vsr[2 * i]; 715 val->vsxval[1] = vcpu->arch.vsr[2 * i + 1]; 716 } else { 717 r = -ENXIO; 718 } 719 break; 720 #endif /* CONFIG_VSX */ 721 case KVM_REG_PPC_VPA_ADDR: 722 spin_lock(&vcpu->arch.vpa_update_lock); 723 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); 724 spin_unlock(&vcpu->arch.vpa_update_lock); 725 break; 726 case KVM_REG_PPC_VPA_SLB: 727 spin_lock(&vcpu->arch.vpa_update_lock); 728 val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa; 729 val->vpaval.length = vcpu->arch.slb_shadow.len; 730 spin_unlock(&vcpu->arch.vpa_update_lock); 731 break; 732 case KVM_REG_PPC_VPA_DTL: 733 spin_lock(&vcpu->arch.vpa_update_lock); 734 val->vpaval.addr = vcpu->arch.dtl.next_gpa; 735 val->vpaval.length = vcpu->arch.dtl.len; 736 spin_unlock(&vcpu->arch.vpa_update_lock); 737 break; 738 default: 739 r = -EINVAL; 740 break; 741 } 742 743 return r; 744 } 745 746 int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) 747 { 748 int r = 0; 749 long int i; 750 unsigned long addr, len; 751 752 switch (id) { 753 case KVM_REG_PPC_HIOR: 754 /* Only allow this to be set to zero */ 755 if (set_reg_val(id, *val)) 756 r = -EINVAL; 757 break; 758 case KVM_REG_PPC_DABR: 759 vcpu->arch.dabr = set_reg_val(id, *val); 760 break; 761 case KVM_REG_PPC_DSCR: 762 vcpu->arch.dscr = set_reg_val(id, *val); 763 break; 764 case KVM_REG_PPC_PURR: 765 vcpu->arch.purr = set_reg_val(id, *val); 766 break; 767 case KVM_REG_PPC_SPURR: 768 vcpu->arch.spurr = set_reg_val(id, *val); 769 break; 770 case KVM_REG_PPC_AMR: 771 vcpu->arch.amr = set_reg_val(id, *val); 772 break; 773 case KVM_REG_PPC_UAMOR: 774 vcpu->arch.uamor = set_reg_val(id, *val); 775 break; 776 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: 777 i = id - KVM_REG_PPC_MMCR0; 778 vcpu->arch.mmcr[i] = set_reg_val(id, *val); 779 break; 780 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: 781 i = id - KVM_REG_PPC_PMC1; 782 vcpu->arch.pmc[i] = set_reg_val(id, *val); 783 break; 784 #ifdef CONFIG_VSX 785 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: 786 if (cpu_has_feature(CPU_FTR_VSX)) { 787 /* VSX => FP reg i is stored in arch.vsr[2*i] */ 788 long int i = id - KVM_REG_PPC_FPR0; 789 vcpu->arch.vsr[2 * i] = set_reg_val(id, *val); 790 } else { 791 /* let generic code handle it */ 792 r = -EINVAL; 793 } 794 break; 795 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: 796 if (cpu_has_feature(CPU_FTR_VSX)) { 797 long int i = id - KVM_REG_PPC_VSR0; 798 vcpu->arch.vsr[2 * i] = val->vsxval[0]; 799 vcpu->arch.vsr[2 * i + 1] = val->vsxval[1]; 800 } else { 801 r = -ENXIO; 802 } 803 break; 804 #endif /* CONFIG_VSX */ 805 case KVM_REG_PPC_VPA_ADDR: 806 addr = set_reg_val(id, *val); 807 r = -EINVAL; 808 if (!addr && (vcpu->arch.slb_shadow.next_gpa || 809 vcpu->arch.dtl.next_gpa)) 810 break; 811 r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca)); 812 break; 813 case KVM_REG_PPC_VPA_SLB: 814 addr = val->vpaval.addr; 815 len = val->vpaval.length; 816 r = -EINVAL; 817 if (addr && !vcpu->arch.vpa.next_gpa) 818 break; 819 r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len); 820 break; 821 case KVM_REG_PPC_VPA_DTL: 822 addr = val->vpaval.addr; 823 len = val->vpaval.length; 824 r = -EINVAL; 825 if (addr && (len < sizeof(struct dtl_entry) || 826 !vcpu->arch.vpa.next_gpa)) 827 break; 828 len -= len % sizeof(struct dtl_entry); 829 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); 830 break; 831 default: 832 r = -EINVAL; 833 break; 834 } 835 836 return r; 837 } 838 839 int kvmppc_core_check_processor_compat(void) 840 { 841 if (cpu_has_feature(CPU_FTR_HVMODE)) 842 return 0; 843 return -EIO; 844 } 845 846 struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) 847 { 848 struct kvm_vcpu *vcpu; 849 int err = -EINVAL; 850 int core; 851 struct kvmppc_vcore *vcore; 852 853 core = id / threads_per_core; 854 if (core >= KVM_MAX_VCORES) 855 goto out; 856 857 err = -ENOMEM; 858 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 859 if (!vcpu) 860 goto out; 861 862 err = kvm_vcpu_init(vcpu, kvm, id); 863 if (err) 864 goto free_vcpu; 865 866 vcpu->arch.shared = &vcpu->arch.shregs; 867 vcpu->arch.mmcr[0] = MMCR0_FC; 868 vcpu->arch.ctrl = CTRL_RUNLATCH; 869 /* default to host PVR, since we can't spoof it */ 870 vcpu->arch.pvr = mfspr(SPRN_PVR); 871 kvmppc_set_pvr(vcpu, vcpu->arch.pvr); 872 spin_lock_init(&vcpu->arch.vpa_update_lock); 873 spin_lock_init(&vcpu->arch.tbacct_lock); 874 vcpu->arch.busy_preempt = TB_NIL; 875 876 kvmppc_mmu_book3s_hv_init(vcpu); 877 878 vcpu->arch.state = KVMPPC_VCPU_NOTREADY; 879 880 init_waitqueue_head(&vcpu->arch.cpu_run); 881 882 mutex_lock(&kvm->lock); 883 vcore = kvm->arch.vcores[core]; 884 if (!vcore) { 885 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); 886 if (vcore) { 887 INIT_LIST_HEAD(&vcore->runnable_threads); 888 spin_lock_init(&vcore->lock); 889 init_waitqueue_head(&vcore->wq); 890 vcore->preempt_tb = TB_NIL; 891 } 892 kvm->arch.vcores[core] = vcore; 893 kvm->arch.online_vcores++; 894 } 895 mutex_unlock(&kvm->lock); 896 897 if (!vcore) 898 goto free_vcpu; 899 900 spin_lock(&vcore->lock); 901 ++vcore->num_threads; 902 spin_unlock(&vcore->lock); 903 vcpu->arch.vcore = vcore; 904 905 vcpu->arch.cpu_type = KVM_CPU_3S_64; 906 kvmppc_sanity_check(vcpu); 907 908 return vcpu; 909 910 free_vcpu: 911 kmem_cache_free(kvm_vcpu_cache, vcpu); 912 out: 913 return ERR_PTR(err); 914 } 915 916 void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) 917 { 918 spin_lock(&vcpu->arch.vpa_update_lock); 919 if (vcpu->arch.dtl.pinned_addr) 920 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr); 921 if (vcpu->arch.slb_shadow.pinned_addr) 922 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr); 923 if (vcpu->arch.vpa.pinned_addr) 924 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr); 925 spin_unlock(&vcpu->arch.vpa_update_lock); 926 kvm_vcpu_uninit(vcpu); 927 kmem_cache_free(kvm_vcpu_cache, vcpu); 928 } 929 930 static void kvmppc_set_timer(struct kvm_vcpu *vcpu) 931 { 932 unsigned long dec_nsec, now; 933 934 now = get_tb(); 935 if (now > vcpu->arch.dec_expires) { 936 /* decrementer has already gone negative */ 937 kvmppc_core_queue_dec(vcpu); 938 kvmppc_core_prepare_to_enter(vcpu); 939 return; 940 } 941 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC 942 / tb_ticks_per_sec; 943 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), 944 HRTIMER_MODE_REL); 945 vcpu->arch.timer_running = 1; 946 } 947 948 static void kvmppc_end_cede(struct kvm_vcpu *vcpu) 949 { 950 vcpu->arch.ceded = 0; 951 if (vcpu->arch.timer_running) { 952 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 953 vcpu->arch.timer_running = 0; 954 } 955 } 956 957 extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); 958 extern void xics_wake_cpu(int cpu); 959 960 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, 961 struct kvm_vcpu *vcpu) 962 { 963 u64 now; 964 965 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) 966 return; 967 spin_lock(&vcpu->arch.tbacct_lock); 968 now = mftb(); 969 vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) - 970 vcpu->arch.stolen_logged; 971 vcpu->arch.busy_preempt = now; 972 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 973 spin_unlock(&vcpu->arch.tbacct_lock); 974 --vc->n_runnable; 975 list_del(&vcpu->arch.run_list); 976 } 977 978 static int kvmppc_grab_hwthread(int cpu) 979 { 980 struct paca_struct *tpaca; 981 long timeout = 1000; 982 983 tpaca = &paca[cpu]; 984 985 /* Ensure the thread won't go into the kernel if it wakes */ 986 tpaca->kvm_hstate.hwthread_req = 1; 987 tpaca->kvm_hstate.kvm_vcpu = NULL; 988 989 /* 990 * If the thread is already executing in the kernel (e.g. handling 991 * a stray interrupt), wait for it to get back to nap mode. 992 * The smp_mb() is to ensure that our setting of hwthread_req 993 * is visible before we look at hwthread_state, so if this 994 * races with the code at system_reset_pSeries and the thread 995 * misses our setting of hwthread_req, we are sure to see its 996 * setting of hwthread_state, and vice versa. 997 */ 998 smp_mb(); 999 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) { 1000 if (--timeout <= 0) { 1001 pr_err("KVM: couldn't grab cpu %d\n", cpu); 1002 return -EBUSY; 1003 } 1004 udelay(1); 1005 } 1006 return 0; 1007 } 1008 1009 static void kvmppc_release_hwthread(int cpu) 1010 { 1011 struct paca_struct *tpaca; 1012 1013 tpaca = &paca[cpu]; 1014 tpaca->kvm_hstate.hwthread_req = 0; 1015 tpaca->kvm_hstate.kvm_vcpu = NULL; 1016 } 1017 1018 static void kvmppc_start_thread(struct kvm_vcpu *vcpu) 1019 { 1020 int cpu; 1021 struct paca_struct *tpaca; 1022 struct kvmppc_vcore *vc = vcpu->arch.vcore; 1023 1024 if (vcpu->arch.timer_running) { 1025 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 1026 vcpu->arch.timer_running = 0; 1027 } 1028 cpu = vc->pcpu + vcpu->arch.ptid; 1029 tpaca = &paca[cpu]; 1030 tpaca->kvm_hstate.kvm_vcpu = vcpu; 1031 tpaca->kvm_hstate.kvm_vcore = vc; 1032 tpaca->kvm_hstate.napping = 0; 1033 vcpu->cpu = vc->pcpu; 1034 smp_wmb(); 1035 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) 1036 if (vcpu->arch.ptid) { 1037 xics_wake_cpu(cpu); 1038 ++vc->n_woken; 1039 } 1040 #endif 1041 } 1042 1043 static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc) 1044 { 1045 int i; 1046 1047 HMT_low(); 1048 i = 0; 1049 while (vc->nap_count < vc->n_woken) { 1050 if (++i >= 1000000) { 1051 pr_err("kvmppc_wait_for_nap timeout %d %d\n", 1052 vc->nap_count, vc->n_woken); 1053 break; 1054 } 1055 cpu_relax(); 1056 } 1057 HMT_medium(); 1058 } 1059 1060 /* 1061 * Check that we are on thread 0 and that any other threads in 1062 * this core are off-line. Then grab the threads so they can't 1063 * enter the kernel. 1064 */ 1065 static int on_primary_thread(void) 1066 { 1067 int cpu = smp_processor_id(); 1068 int thr = cpu_thread_in_core(cpu); 1069 1070 if (thr) 1071 return 0; 1072 while (++thr < threads_per_core) 1073 if (cpu_online(cpu + thr)) 1074 return 0; 1075 1076 /* Grab all hw threads so they can't go into the kernel */ 1077 for (thr = 1; thr < threads_per_core; ++thr) { 1078 if (kvmppc_grab_hwthread(cpu + thr)) { 1079 /* Couldn't grab one; let the others go */ 1080 do { 1081 kvmppc_release_hwthread(cpu + thr); 1082 } while (--thr > 0); 1083 return 0; 1084 } 1085 } 1086 return 1; 1087 } 1088 1089 /* 1090 * Run a set of guest threads on a physical core. 1091 * Called with vc->lock held. 1092 */ 1093 static void kvmppc_run_core(struct kvmppc_vcore *vc) 1094 { 1095 struct kvm_vcpu *vcpu, *vcpu0, *vnext; 1096 long ret; 1097 u64 now; 1098 int ptid, i, need_vpa_update; 1099 int srcu_idx; 1100 struct kvm_vcpu *vcpus_to_update[threads_per_core]; 1101 1102 /* don't start if any threads have a signal pending */ 1103 need_vpa_update = 0; 1104 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1105 if (signal_pending(vcpu->arch.run_task)) 1106 return; 1107 if (vcpu->arch.vpa.update_pending || 1108 vcpu->arch.slb_shadow.update_pending || 1109 vcpu->arch.dtl.update_pending) 1110 vcpus_to_update[need_vpa_update++] = vcpu; 1111 } 1112 1113 /* 1114 * Initialize *vc, in particular vc->vcore_state, so we can 1115 * drop the vcore lock if necessary. 1116 */ 1117 vc->n_woken = 0; 1118 vc->nap_count = 0; 1119 vc->entry_exit_count = 0; 1120 vc->vcore_state = VCORE_STARTING; 1121 vc->in_guest = 0; 1122 vc->napping_threads = 0; 1123 1124 /* 1125 * Updating any of the vpas requires calling kvmppc_pin_guest_page, 1126 * which can't be called with any spinlocks held. 1127 */ 1128 if (need_vpa_update) { 1129 spin_unlock(&vc->lock); 1130 for (i = 0; i < need_vpa_update; ++i) 1131 kvmppc_update_vpas(vcpus_to_update[i]); 1132 spin_lock(&vc->lock); 1133 } 1134 1135 /* 1136 * Assign physical thread IDs, first to non-ceded vcpus 1137 * and then to ceded ones. 1138 */ 1139 ptid = 0; 1140 vcpu0 = NULL; 1141 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1142 if (!vcpu->arch.ceded) { 1143 if (!ptid) 1144 vcpu0 = vcpu; 1145 vcpu->arch.ptid = ptid++; 1146 } 1147 } 1148 if (!vcpu0) 1149 goto out; /* nothing to run; should never happen */ 1150 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 1151 if (vcpu->arch.ceded) 1152 vcpu->arch.ptid = ptid++; 1153 1154 /* 1155 * Make sure we are running on thread 0, and that 1156 * secondary threads are offline. 1157 */ 1158 if (threads_per_core > 1 && !on_primary_thread()) { 1159 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 1160 vcpu->arch.ret = -EBUSY; 1161 goto out; 1162 } 1163 1164 vc->pcpu = smp_processor_id(); 1165 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1166 kvmppc_start_thread(vcpu); 1167 kvmppc_create_dtl_entry(vcpu, vc); 1168 } 1169 1170 vc->vcore_state = VCORE_RUNNING; 1171 preempt_disable(); 1172 spin_unlock(&vc->lock); 1173 1174 kvm_guest_enter(); 1175 1176 srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu); 1177 1178 __kvmppc_vcore_entry(NULL, vcpu0); 1179 1180 spin_lock(&vc->lock); 1181 /* disable sending of IPIs on virtual external irqs */ 1182 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 1183 vcpu->cpu = -1; 1184 /* wait for secondary threads to finish writing their state to memory */ 1185 if (vc->nap_count < vc->n_woken) 1186 kvmppc_wait_for_nap(vc); 1187 for (i = 0; i < threads_per_core; ++i) 1188 kvmppc_release_hwthread(vc->pcpu + i); 1189 /* prevent other vcpu threads from doing kvmppc_start_thread() now */ 1190 vc->vcore_state = VCORE_EXITING; 1191 spin_unlock(&vc->lock); 1192 1193 srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx); 1194 1195 /* make sure updates to secondary vcpu structs are visible now */ 1196 smp_mb(); 1197 kvm_guest_exit(); 1198 1199 preempt_enable(); 1200 kvm_resched(vcpu); 1201 1202 spin_lock(&vc->lock); 1203 now = get_tb(); 1204 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1205 /* cancel pending dec exception if dec is positive */ 1206 if (now < vcpu->arch.dec_expires && 1207 kvmppc_core_pending_dec(vcpu)) 1208 kvmppc_core_dequeue_dec(vcpu); 1209 1210 ret = RESUME_GUEST; 1211 if (vcpu->arch.trap) 1212 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu, 1213 vcpu->arch.run_task); 1214 1215 vcpu->arch.ret = ret; 1216 vcpu->arch.trap = 0; 1217 1218 if (vcpu->arch.ceded) { 1219 if (ret != RESUME_GUEST) 1220 kvmppc_end_cede(vcpu); 1221 else 1222 kvmppc_set_timer(vcpu); 1223 } 1224 } 1225 1226 out: 1227 vc->vcore_state = VCORE_INACTIVE; 1228 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 1229 arch.run_list) { 1230 if (vcpu->arch.ret != RESUME_GUEST) { 1231 kvmppc_remove_runnable(vc, vcpu); 1232 wake_up(&vcpu->arch.cpu_run); 1233 } 1234 } 1235 } 1236 1237 /* 1238 * Wait for some other vcpu thread to execute us, and 1239 * wake us up when we need to handle something in the host. 1240 */ 1241 static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) 1242 { 1243 DEFINE_WAIT(wait); 1244 1245 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); 1246 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) 1247 schedule(); 1248 finish_wait(&vcpu->arch.cpu_run, &wait); 1249 } 1250 1251 /* 1252 * All the vcpus in this vcore are idle, so wait for a decrementer 1253 * or external interrupt to one of the vcpus. vc->lock is held. 1254 */ 1255 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) 1256 { 1257 DEFINE_WAIT(wait); 1258 1259 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); 1260 vc->vcore_state = VCORE_SLEEPING; 1261 spin_unlock(&vc->lock); 1262 schedule(); 1263 finish_wait(&vc->wq, &wait); 1264 spin_lock(&vc->lock); 1265 vc->vcore_state = VCORE_INACTIVE; 1266 } 1267 1268 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1269 { 1270 int n_ceded; 1271 struct kvmppc_vcore *vc; 1272 struct kvm_vcpu *v, *vn; 1273 1274 kvm_run->exit_reason = 0; 1275 vcpu->arch.ret = RESUME_GUEST; 1276 vcpu->arch.trap = 0; 1277 kvmppc_update_vpas(vcpu); 1278 1279 /* 1280 * Synchronize with other threads in this virtual core 1281 */ 1282 vc = vcpu->arch.vcore; 1283 spin_lock(&vc->lock); 1284 vcpu->arch.ceded = 0; 1285 vcpu->arch.run_task = current; 1286 vcpu->arch.kvm_run = kvm_run; 1287 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); 1288 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; 1289 vcpu->arch.busy_preempt = TB_NIL; 1290 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); 1291 ++vc->n_runnable; 1292 1293 /* 1294 * This happens the first time this is called for a vcpu. 1295 * If the vcore is already running, we may be able to start 1296 * this thread straight away and have it join in. 1297 */ 1298 if (!signal_pending(current)) { 1299 if (vc->vcore_state == VCORE_RUNNING && 1300 VCORE_EXIT_COUNT(vc) == 0) { 1301 vcpu->arch.ptid = vc->n_runnable - 1; 1302 kvmppc_create_dtl_entry(vcpu, vc); 1303 kvmppc_start_thread(vcpu); 1304 } else if (vc->vcore_state == VCORE_SLEEPING) { 1305 wake_up(&vc->wq); 1306 } 1307 1308 } 1309 1310 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 1311 !signal_pending(current)) { 1312 if (vc->vcore_state != VCORE_INACTIVE) { 1313 spin_unlock(&vc->lock); 1314 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE); 1315 spin_lock(&vc->lock); 1316 continue; 1317 } 1318 list_for_each_entry_safe(v, vn, &vc->runnable_threads, 1319 arch.run_list) { 1320 kvmppc_core_prepare_to_enter(v); 1321 if (signal_pending(v->arch.run_task)) { 1322 kvmppc_remove_runnable(vc, v); 1323 v->stat.signal_exits++; 1324 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR; 1325 v->arch.ret = -EINTR; 1326 wake_up(&v->arch.cpu_run); 1327 } 1328 } 1329 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) 1330 break; 1331 vc->runner = vcpu; 1332 n_ceded = 0; 1333 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) 1334 if (!v->arch.pending_exceptions) 1335 n_ceded += v->arch.ceded; 1336 if (n_ceded == vc->n_runnable) 1337 kvmppc_vcore_blocked(vc); 1338 else 1339 kvmppc_run_core(vc); 1340 vc->runner = NULL; 1341 } 1342 1343 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 1344 (vc->vcore_state == VCORE_RUNNING || 1345 vc->vcore_state == VCORE_EXITING)) { 1346 spin_unlock(&vc->lock); 1347 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); 1348 spin_lock(&vc->lock); 1349 } 1350 1351 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 1352 kvmppc_remove_runnable(vc, vcpu); 1353 vcpu->stat.signal_exits++; 1354 kvm_run->exit_reason = KVM_EXIT_INTR; 1355 vcpu->arch.ret = -EINTR; 1356 } 1357 1358 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) { 1359 /* Wake up some vcpu to run the core */ 1360 v = list_first_entry(&vc->runnable_threads, 1361 struct kvm_vcpu, arch.run_list); 1362 wake_up(&v->arch.cpu_run); 1363 } 1364 1365 spin_unlock(&vc->lock); 1366 return vcpu->arch.ret; 1367 } 1368 1369 int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) 1370 { 1371 int r; 1372 int srcu_idx; 1373 1374 if (!vcpu->arch.sane) { 1375 run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1376 return -EINVAL; 1377 } 1378 1379 kvmppc_core_prepare_to_enter(vcpu); 1380 1381 /* No need to go into the guest when all we'll do is come back out */ 1382 if (signal_pending(current)) { 1383 run->exit_reason = KVM_EXIT_INTR; 1384 return -EINTR; 1385 } 1386 1387 atomic_inc(&vcpu->kvm->arch.vcpus_running); 1388 /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */ 1389 smp_mb(); 1390 1391 /* On the first time here, set up HTAB and VRMA or RMA */ 1392 if (!vcpu->kvm->arch.rma_setup_done) { 1393 r = kvmppc_hv_setup_htab_rma(vcpu); 1394 if (r) 1395 goto out; 1396 } 1397 1398 flush_fp_to_thread(current); 1399 flush_altivec_to_thread(current); 1400 flush_vsx_to_thread(current); 1401 vcpu->arch.wqp = &vcpu->arch.vcore->wq; 1402 vcpu->arch.pgdir = current->mm->pgd; 1403 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 1404 1405 do { 1406 r = kvmppc_run_vcpu(run, vcpu); 1407 1408 if (run->exit_reason == KVM_EXIT_PAPR_HCALL && 1409 !(vcpu->arch.shregs.msr & MSR_PR)) { 1410 r = kvmppc_pseries_do_hcall(vcpu); 1411 kvmppc_core_prepare_to_enter(vcpu); 1412 } else if (r == RESUME_PAGE_FAULT) { 1413 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 1414 r = kvmppc_book3s_hv_page_fault(run, vcpu, 1415 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 1416 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 1417 } 1418 } while (r == RESUME_GUEST); 1419 1420 out: 1421 vcpu->arch.state = KVMPPC_VCPU_NOTREADY; 1422 atomic_dec(&vcpu->kvm->arch.vcpus_running); 1423 return r; 1424 } 1425 1426 1427 /* Work out RMLS (real mode limit selector) field value for a given RMA size. 1428 Assumes POWER7 or PPC970. */ 1429 static inline int lpcr_rmls(unsigned long rma_size) 1430 { 1431 switch (rma_size) { 1432 case 32ul << 20: /* 32 MB */ 1433 if (cpu_has_feature(CPU_FTR_ARCH_206)) 1434 return 8; /* only supported on POWER7 */ 1435 return -1; 1436 case 64ul << 20: /* 64 MB */ 1437 return 3; 1438 case 128ul << 20: /* 128 MB */ 1439 return 7; 1440 case 256ul << 20: /* 256 MB */ 1441 return 4; 1442 case 1ul << 30: /* 1 GB */ 1443 return 2; 1444 case 16ul << 30: /* 16 GB */ 1445 return 1; 1446 case 256ul << 30: /* 256 GB */ 1447 return 0; 1448 default: 1449 return -1; 1450 } 1451 } 1452 1453 static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1454 { 1455 struct kvmppc_linear_info *ri = vma->vm_file->private_data; 1456 struct page *page; 1457 1458 if (vmf->pgoff >= ri->npages) 1459 return VM_FAULT_SIGBUS; 1460 1461 page = pfn_to_page(ri->base_pfn + vmf->pgoff); 1462 get_page(page); 1463 vmf->page = page; 1464 return 0; 1465 } 1466 1467 static const struct vm_operations_struct kvm_rma_vm_ops = { 1468 .fault = kvm_rma_fault, 1469 }; 1470 1471 static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma) 1472 { 1473 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 1474 vma->vm_ops = &kvm_rma_vm_ops; 1475 return 0; 1476 } 1477 1478 static int kvm_rma_release(struct inode *inode, struct file *filp) 1479 { 1480 struct kvmppc_linear_info *ri = filp->private_data; 1481 1482 kvm_release_rma(ri); 1483 return 0; 1484 } 1485 1486 static struct file_operations kvm_rma_fops = { 1487 .mmap = kvm_rma_mmap, 1488 .release = kvm_rma_release, 1489 }; 1490 1491 long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret) 1492 { 1493 struct kvmppc_linear_info *ri; 1494 long fd; 1495 1496 ri = kvm_alloc_rma(); 1497 if (!ri) 1498 return -ENOMEM; 1499 1500 fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR); 1501 if (fd < 0) 1502 kvm_release_rma(ri); 1503 1504 ret->rma_size = ri->npages << PAGE_SHIFT; 1505 return fd; 1506 } 1507 1508 static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, 1509 int linux_psize) 1510 { 1511 struct mmu_psize_def *def = &mmu_psize_defs[linux_psize]; 1512 1513 if (!def->shift) 1514 return; 1515 (*sps)->page_shift = def->shift; 1516 (*sps)->slb_enc = def->sllp; 1517 (*sps)->enc[0].page_shift = def->shift; 1518 (*sps)->enc[0].pte_enc = def->penc; 1519 (*sps)++; 1520 } 1521 1522 int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) 1523 { 1524 struct kvm_ppc_one_seg_page_size *sps; 1525 1526 info->flags = KVM_PPC_PAGE_SIZES_REAL; 1527 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 1528 info->flags |= KVM_PPC_1T_SEGMENTS; 1529 info->slb_size = mmu_slb_size; 1530 1531 /* We only support these sizes for now, and no muti-size segments */ 1532 sps = &info->sps[0]; 1533 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K); 1534 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K); 1535 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M); 1536 1537 return 0; 1538 } 1539 1540 /* 1541 * Get (and clear) the dirty memory log for a memory slot. 1542 */ 1543 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) 1544 { 1545 struct kvm_memory_slot *memslot; 1546 int r; 1547 unsigned long n; 1548 1549 mutex_lock(&kvm->slots_lock); 1550 1551 r = -EINVAL; 1552 if (log->slot >= KVM_USER_MEM_SLOTS) 1553 goto out; 1554 1555 memslot = id_to_memslot(kvm->memslots, log->slot); 1556 r = -ENOENT; 1557 if (!memslot->dirty_bitmap) 1558 goto out; 1559 1560 n = kvm_dirty_bitmap_bytes(memslot); 1561 memset(memslot->dirty_bitmap, 0, n); 1562 1563 r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); 1564 if (r) 1565 goto out; 1566 1567 r = -EFAULT; 1568 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 1569 goto out; 1570 1571 r = 0; 1572 out: 1573 mutex_unlock(&kvm->slots_lock); 1574 return r; 1575 } 1576 1577 static void unpin_slot(struct kvm_memory_slot *memslot) 1578 { 1579 unsigned long *physp; 1580 unsigned long j, npages, pfn; 1581 struct page *page; 1582 1583 physp = memslot->arch.slot_phys; 1584 npages = memslot->npages; 1585 if (!physp) 1586 return; 1587 for (j = 0; j < npages; j++) { 1588 if (!(physp[j] & KVMPPC_GOT_PAGE)) 1589 continue; 1590 pfn = physp[j] >> PAGE_SHIFT; 1591 page = pfn_to_page(pfn); 1592 SetPageDirty(page); 1593 put_page(page); 1594 } 1595 } 1596 1597 void kvmppc_core_free_memslot(struct kvm_memory_slot *free, 1598 struct kvm_memory_slot *dont) 1599 { 1600 if (!dont || free->arch.rmap != dont->arch.rmap) { 1601 vfree(free->arch.rmap); 1602 free->arch.rmap = NULL; 1603 } 1604 if (!dont || free->arch.slot_phys != dont->arch.slot_phys) { 1605 unpin_slot(free); 1606 vfree(free->arch.slot_phys); 1607 free->arch.slot_phys = NULL; 1608 } 1609 } 1610 1611 int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, 1612 unsigned long npages) 1613 { 1614 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 1615 if (!slot->arch.rmap) 1616 return -ENOMEM; 1617 slot->arch.slot_phys = NULL; 1618 1619 return 0; 1620 } 1621 1622 int kvmppc_core_prepare_memory_region(struct kvm *kvm, 1623 struct kvm_memory_slot *memslot, 1624 struct kvm_userspace_memory_region *mem) 1625 { 1626 unsigned long *phys; 1627 1628 /* Allocate a slot_phys array if needed */ 1629 phys = memslot->arch.slot_phys; 1630 if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) { 1631 phys = vzalloc(memslot->npages * sizeof(unsigned long)); 1632 if (!phys) 1633 return -ENOMEM; 1634 memslot->arch.slot_phys = phys; 1635 } 1636 1637 return 0; 1638 } 1639 1640 void kvmppc_core_commit_memory_region(struct kvm *kvm, 1641 struct kvm_userspace_memory_region *mem, 1642 struct kvm_memory_slot old) 1643 { 1644 unsigned long npages = mem->memory_size >> PAGE_SHIFT; 1645 struct kvm_memory_slot *memslot; 1646 1647 if (npages && old.npages) { 1648 /* 1649 * If modifying a memslot, reset all the rmap dirty bits. 1650 * If this is a new memslot, we don't need to do anything 1651 * since the rmap array starts out as all zeroes, 1652 * i.e. no pages are dirty. 1653 */ 1654 memslot = id_to_memslot(kvm->memslots, mem->slot); 1655 kvmppc_hv_get_dirty_log(kvm, memslot, NULL); 1656 } 1657 } 1658 1659 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) 1660 { 1661 int err = 0; 1662 struct kvm *kvm = vcpu->kvm; 1663 struct kvmppc_linear_info *ri = NULL; 1664 unsigned long hva; 1665 struct kvm_memory_slot *memslot; 1666 struct vm_area_struct *vma; 1667 unsigned long lpcr, senc; 1668 unsigned long psize, porder; 1669 unsigned long rma_size; 1670 unsigned long rmls; 1671 unsigned long *physp; 1672 unsigned long i, npages; 1673 int srcu_idx; 1674 1675 mutex_lock(&kvm->lock); 1676 if (kvm->arch.rma_setup_done) 1677 goto out; /* another vcpu beat us to it */ 1678 1679 /* Allocate hashed page table (if not done already) and reset it */ 1680 if (!kvm->arch.hpt_virt) { 1681 err = kvmppc_alloc_hpt(kvm, NULL); 1682 if (err) { 1683 pr_err("KVM: Couldn't alloc HPT\n"); 1684 goto out; 1685 } 1686 } 1687 1688 /* Look up the memslot for guest physical address 0 */ 1689 srcu_idx = srcu_read_lock(&kvm->srcu); 1690 memslot = gfn_to_memslot(kvm, 0); 1691 1692 /* We must have some memory at 0 by now */ 1693 err = -EINVAL; 1694 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 1695 goto out_srcu; 1696 1697 /* Look up the VMA for the start of this memory slot */ 1698 hva = memslot->userspace_addr; 1699 down_read(¤t->mm->mmap_sem); 1700 vma = find_vma(current->mm, hva); 1701 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO)) 1702 goto up_out; 1703 1704 psize = vma_kernel_pagesize(vma); 1705 porder = __ilog2(psize); 1706 1707 /* Is this one of our preallocated RMAs? */ 1708 if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops && 1709 hva == vma->vm_start) 1710 ri = vma->vm_file->private_data; 1711 1712 up_read(¤t->mm->mmap_sem); 1713 1714 if (!ri) { 1715 /* On POWER7, use VRMA; on PPC970, give up */ 1716 err = -EPERM; 1717 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1718 pr_err("KVM: CPU requires an RMO\n"); 1719 goto out_srcu; 1720 } 1721 1722 /* We can handle 4k, 64k or 16M pages in the VRMA */ 1723 err = -EINVAL; 1724 if (!(psize == 0x1000 || psize == 0x10000 || 1725 psize == 0x1000000)) 1726 goto out_srcu; 1727 1728 /* Update VRMASD field in the LPCR */ 1729 senc = slb_pgsize_encoding(psize); 1730 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 1731 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1732 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; 1733 lpcr |= senc << (LPCR_VRMASD_SH - 4); 1734 kvm->arch.lpcr = lpcr; 1735 1736 /* Create HPTEs in the hash page table for the VRMA */ 1737 kvmppc_map_vrma(vcpu, memslot, porder); 1738 1739 } else { 1740 /* Set up to use an RMO region */ 1741 rma_size = ri->npages; 1742 if (rma_size > memslot->npages) 1743 rma_size = memslot->npages; 1744 rma_size <<= PAGE_SHIFT; 1745 rmls = lpcr_rmls(rma_size); 1746 err = -EINVAL; 1747 if (rmls < 0) { 1748 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); 1749 goto out_srcu; 1750 } 1751 atomic_inc(&ri->use_count); 1752 kvm->arch.rma = ri; 1753 1754 /* Update LPCR and RMOR */ 1755 lpcr = kvm->arch.lpcr; 1756 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1757 /* PPC970; insert RMLS value (split field) in HID4 */ 1758 lpcr &= ~((1ul << HID4_RMLS0_SH) | 1759 (3ul << HID4_RMLS2_SH)); 1760 lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) | 1761 ((rmls & 3) << HID4_RMLS2_SH); 1762 /* RMOR is also in HID4 */ 1763 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff) 1764 << HID4_RMOR_SH; 1765 } else { 1766 /* POWER7 */ 1767 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L); 1768 lpcr |= rmls << LPCR_RMLS_SH; 1769 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT; 1770 } 1771 kvm->arch.lpcr = lpcr; 1772 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n", 1773 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr); 1774 1775 /* Initialize phys addrs of pages in RMO */ 1776 npages = ri->npages; 1777 porder = __ilog2(npages); 1778 physp = memslot->arch.slot_phys; 1779 if (physp) { 1780 if (npages > memslot->npages) 1781 npages = memslot->npages; 1782 spin_lock(&kvm->arch.slot_phys_lock); 1783 for (i = 0; i < npages; ++i) 1784 physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + 1785 porder; 1786 spin_unlock(&kvm->arch.slot_phys_lock); 1787 } 1788 } 1789 1790 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ 1791 smp_wmb(); 1792 kvm->arch.rma_setup_done = 1; 1793 err = 0; 1794 out_srcu: 1795 srcu_read_unlock(&kvm->srcu, srcu_idx); 1796 out: 1797 mutex_unlock(&kvm->lock); 1798 return err; 1799 1800 up_out: 1801 up_read(¤t->mm->mmap_sem); 1802 goto out; 1803 } 1804 1805 int kvmppc_core_init_vm(struct kvm *kvm) 1806 { 1807 unsigned long lpcr, lpid; 1808 1809 /* Allocate the guest's logical partition ID */ 1810 1811 lpid = kvmppc_alloc_lpid(); 1812 if (lpid < 0) 1813 return -ENOMEM; 1814 kvm->arch.lpid = lpid; 1815 1816 /* 1817 * Since we don't flush the TLB when tearing down a VM, 1818 * and this lpid might have previously been used, 1819 * make sure we flush on each core before running the new VM. 1820 */ 1821 cpumask_setall(&kvm->arch.need_tlb_flush); 1822 1823 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); 1824 1825 kvm->arch.rma = NULL; 1826 1827 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); 1828 1829 if (cpu_has_feature(CPU_FTR_ARCH_201)) { 1830 /* PPC970; HID4 is effectively the LPCR */ 1831 kvm->arch.host_lpid = 0; 1832 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); 1833 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); 1834 lpcr |= ((lpid >> 4) << HID4_LPID1_SH) | 1835 ((lpid & 0xf) << HID4_LPID5_SH); 1836 } else { 1837 /* POWER7; init LPCR for virtual RMA mode */ 1838 kvm->arch.host_lpid = mfspr(SPRN_LPID); 1839 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); 1840 lpcr &= LPCR_PECE | LPCR_LPES; 1841 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | 1842 LPCR_VPM0 | LPCR_VPM1; 1843 kvm->arch.vrma_slb_v = SLB_VSID_B_1T | 1844 (VRMA_VSID << SLB_VSID_SHIFT_1T); 1845 } 1846 kvm->arch.lpcr = lpcr; 1847 1848 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); 1849 spin_lock_init(&kvm->arch.slot_phys_lock); 1850 1851 /* 1852 * Don't allow secondary CPU threads to come online 1853 * while any KVM VMs exist. 1854 */ 1855 inhibit_secondary_onlining(); 1856 1857 return 0; 1858 } 1859 1860 void kvmppc_core_destroy_vm(struct kvm *kvm) 1861 { 1862 uninhibit_secondary_onlining(); 1863 1864 if (kvm->arch.rma) { 1865 kvm_release_rma(kvm->arch.rma); 1866 kvm->arch.rma = NULL; 1867 } 1868 1869 kvmppc_free_hpt(kvm); 1870 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 1871 } 1872 1873 /* These are stubs for now */ 1874 void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end) 1875 { 1876 } 1877 1878 /* We don't need to emulate any privileged instructions or dcbz */ 1879 int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, 1880 unsigned int inst, int *advance) 1881 { 1882 return EMULATE_FAIL; 1883 } 1884 1885 int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) 1886 { 1887 return EMULATE_FAIL; 1888 } 1889 1890 int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) 1891 { 1892 return EMULATE_FAIL; 1893 } 1894 1895 static int kvmppc_book3s_hv_init(void) 1896 { 1897 int r; 1898 1899 r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 1900 1901 if (r) 1902 return r; 1903 1904 r = kvmppc_mmu_hv_init(); 1905 1906 return r; 1907 } 1908 1909 static void kvmppc_book3s_hv_exit(void) 1910 { 1911 kvm_exit(); 1912 } 1913 1914 module_init(kvmppc_book3s_hv_init); 1915 module_exit(kvmppc_book3s_hv_exit); 1916