1 /* 2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. 4 * 5 * Authors: 6 * Paul Mackerras <paulus@au1.ibm.com> 7 * Alexander Graf <agraf@suse.de> 8 * Kevin Wolf <mail@kevin-wolf.de> 9 * 10 * Description: KVM functions specific to running on Book 3S 11 * processors in hypervisor mode (specifically POWER7 and later). 12 * 13 * This file is derived from arch/powerpc/kvm/book3s.c, 14 * by Alexander Graf <agraf@suse.de>. 15 * 16 * This program is free software; you can redistribute it and/or modify 17 * it under the terms of the GNU General Public License, version 2, as 18 * published by the Free Software Foundation. 19 */ 20 21 #include <linux/kvm_host.h> 22 #include <linux/err.h> 23 #include <linux/slab.h> 24 #include <linux/preempt.h> 25 #include <linux/sched.h> 26 #include <linux/delay.h> 27 #include <linux/export.h> 28 #include <linux/fs.h> 29 #include <linux/anon_inodes.h> 30 #include <linux/cpumask.h> 31 #include <linux/spinlock.h> 32 #include <linux/page-flags.h> 33 #include <linux/srcu.h> 34 #include <linux/miscdevice.h> 35 36 #include <asm/reg.h> 37 #include <asm/cputable.h> 38 #include <asm/cache.h> 39 #include <asm/cacheflush.h> 40 #include <asm/tlbflush.h> 41 #include <asm/uaccess.h> 42 #include <asm/io.h> 43 #include <asm/kvm_ppc.h> 44 #include <asm/kvm_book3s.h> 45 #include <asm/mmu_context.h> 46 #include <asm/lppaca.h> 47 #include <asm/processor.h> 48 #include <asm/cputhreads.h> 49 #include <asm/page.h> 50 #include <asm/hvcall.h> 51 #include <asm/switch_to.h> 52 #include <asm/smp.h> 53 #include <linux/gfp.h> 54 #include <linux/vmalloc.h> 55 #include <linux/highmem.h> 56 #include <linux/hugetlb.h> 57 #include <linux/module.h> 58 59 #include "book3s.h" 60 61 #define CREATE_TRACE_POINTS 62 #include "trace_hv.h" 63 64 /* #define EXIT_DEBUG */ 65 /* #define EXIT_DEBUG_SIMPLE */ 66 /* #define EXIT_DEBUG_INT */ 67 68 /* Used to indicate that a guest page fault needs to be handled */ 69 #define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1) 70 71 /* Used as a "null" value for timebase values */ 72 #define TB_NIL (~(u64)0) 73 74 static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1); 75 76 #if defined(CONFIG_PPC_64K_PAGES) 77 #define MPP_BUFFER_ORDER 0 78 #elif defined(CONFIG_PPC_4K_PAGES) 79 #define MPP_BUFFER_ORDER 3 80 #endif 81 82 83 static void kvmppc_end_cede(struct kvm_vcpu *vcpu); 84 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); 85 86 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) 87 { 88 int me; 89 int cpu = vcpu->cpu; 90 wait_queue_head_t *wqp; 91 92 wqp = kvm_arch_vcpu_wq(vcpu); 93 if (waitqueue_active(wqp)) { 94 wake_up_interruptible(wqp); 95 ++vcpu->stat.halt_wakeup; 96 } 97 98 me = get_cpu(); 99 100 /* CPU points to the first thread of the core */ 101 if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { 102 #ifdef CONFIG_PPC_ICP_NATIVE 103 int real_cpu = cpu + vcpu->arch.ptid; 104 if (paca[real_cpu].kvm_hstate.xics_phys) 105 xics_wake_cpu(real_cpu); 106 else 107 #endif 108 if (cpu_online(cpu)) 109 smp_send_reschedule(cpu); 110 } 111 put_cpu(); 112 } 113 114 /* 115 * We use the vcpu_load/put functions to measure stolen time. 116 * Stolen time is counted as time when either the vcpu is able to 117 * run as part of a virtual core, but the task running the vcore 118 * is preempted or sleeping, or when the vcpu needs something done 119 * in the kernel by the task running the vcpu, but that task is 120 * preempted or sleeping. Those two things have to be counted 121 * separately, since one of the vcpu tasks will take on the job 122 * of running the core, and the other vcpu tasks in the vcore will 123 * sleep waiting for it to do that, but that sleep shouldn't count 124 * as stolen time. 125 * 126 * Hence we accumulate stolen time when the vcpu can run as part of 127 * a vcore using vc->stolen_tb, and the stolen time when the vcpu 128 * needs its task to do other things in the kernel (for example, 129 * service a page fault) in busy_stolen. We don't accumulate 130 * stolen time for a vcore when it is inactive, or for a vcpu 131 * when it is in state RUNNING or NOTREADY. NOTREADY is a bit of 132 * a misnomer; it means that the vcpu task is not executing in 133 * the KVM_VCPU_RUN ioctl, i.e. it is in userspace or elsewhere in 134 * the kernel. We don't have any way of dividing up that time 135 * between time that the vcpu is genuinely stopped, time that 136 * the task is actively working on behalf of the vcpu, and time 137 * that the task is preempted, so we don't count any of it as 138 * stolen. 139 * 140 * Updates to busy_stolen are protected by arch.tbacct_lock; 141 * updates to vc->stolen_tb are protected by the vcore->stoltb_lock 142 * lock. The stolen times are measured in units of timebase ticks. 143 * (Note that the != TB_NIL checks below are purely defensive; 144 * they should never fail.) 145 */ 146 147 static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu) 148 { 149 struct kvmppc_vcore *vc = vcpu->arch.vcore; 150 unsigned long flags; 151 152 /* 153 * We can test vc->runner without taking the vcore lock, 154 * because only this task ever sets vc->runner to this 155 * vcpu, and once it is set to this vcpu, only this task 156 * ever sets it to NULL. 157 */ 158 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { 159 spin_lock_irqsave(&vc->stoltb_lock, flags); 160 if (vc->preempt_tb != TB_NIL) { 161 vc->stolen_tb += mftb() - vc->preempt_tb; 162 vc->preempt_tb = TB_NIL; 163 } 164 spin_unlock_irqrestore(&vc->stoltb_lock, flags); 165 } 166 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 167 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && 168 vcpu->arch.busy_preempt != TB_NIL) { 169 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; 170 vcpu->arch.busy_preempt = TB_NIL; 171 } 172 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); 173 } 174 175 static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu) 176 { 177 struct kvmppc_vcore *vc = vcpu->arch.vcore; 178 unsigned long flags; 179 180 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) { 181 spin_lock_irqsave(&vc->stoltb_lock, flags); 182 vc->preempt_tb = mftb(); 183 spin_unlock_irqrestore(&vc->stoltb_lock, flags); 184 } 185 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags); 186 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) 187 vcpu->arch.busy_preempt = mftb(); 188 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags); 189 } 190 191 static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr) 192 { 193 vcpu->arch.shregs.msr = msr; 194 kvmppc_end_cede(vcpu); 195 } 196 197 void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) 198 { 199 vcpu->arch.pvr = pvr; 200 } 201 202 int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) 203 { 204 unsigned long pcr = 0; 205 struct kvmppc_vcore *vc = vcpu->arch.vcore; 206 207 if (arch_compat) { 208 switch (arch_compat) { 209 case PVR_ARCH_205: 210 /* 211 * If an arch bit is set in PCR, all the defined 212 * higher-order arch bits also have to be set. 213 */ 214 pcr = PCR_ARCH_206 | PCR_ARCH_205; 215 break; 216 case PVR_ARCH_206: 217 case PVR_ARCH_206p: 218 pcr = PCR_ARCH_206; 219 break; 220 case PVR_ARCH_207: 221 break; 222 default: 223 return -EINVAL; 224 } 225 226 if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { 227 /* POWER7 can't emulate POWER8 */ 228 if (!(pcr & PCR_ARCH_206)) 229 return -EINVAL; 230 pcr &= ~PCR_ARCH_206; 231 } 232 } 233 234 spin_lock(&vc->lock); 235 vc->arch_compat = arch_compat; 236 vc->pcr = pcr; 237 spin_unlock(&vc->lock); 238 239 return 0; 240 } 241 242 void kvmppc_dump_regs(struct kvm_vcpu *vcpu) 243 { 244 int r; 245 246 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id); 247 pr_err("pc = %.16lx msr = %.16llx trap = %x\n", 248 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap); 249 for (r = 0; r < 16; ++r) 250 pr_err("r%2d = %.16lx r%d = %.16lx\n", 251 r, kvmppc_get_gpr(vcpu, r), 252 r+16, kvmppc_get_gpr(vcpu, r+16)); 253 pr_err("ctr = %.16lx lr = %.16lx\n", 254 vcpu->arch.ctr, vcpu->arch.lr); 255 pr_err("srr0 = %.16llx srr1 = %.16llx\n", 256 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1); 257 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n", 258 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1); 259 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n", 260 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3); 261 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n", 262 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr); 263 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar); 264 pr_err("fault dar = %.16lx dsisr = %.8x\n", 265 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 266 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max); 267 for (r = 0; r < vcpu->arch.slb_max; ++r) 268 pr_err(" ESID = %.16llx VSID = %.16llx\n", 269 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv); 270 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n", 271 vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1, 272 vcpu->arch.last_inst); 273 } 274 275 struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) 276 { 277 int r; 278 struct kvm_vcpu *v, *ret = NULL; 279 280 mutex_lock(&kvm->lock); 281 kvm_for_each_vcpu(r, v, kvm) { 282 if (v->vcpu_id == id) { 283 ret = v; 284 break; 285 } 286 } 287 mutex_unlock(&kvm->lock); 288 return ret; 289 } 290 291 static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) 292 { 293 vpa->__old_status |= LPPACA_OLD_SHARED_PROC; 294 vpa->yield_count = cpu_to_be32(1); 295 } 296 297 static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, 298 unsigned long addr, unsigned long len) 299 { 300 /* check address is cacheline aligned */ 301 if (addr & (L1_CACHE_BYTES - 1)) 302 return -EINVAL; 303 spin_lock(&vcpu->arch.vpa_update_lock); 304 if (v->next_gpa != addr || v->len != len) { 305 v->next_gpa = addr; 306 v->len = addr ? len : 0; 307 v->update_pending = 1; 308 } 309 spin_unlock(&vcpu->arch.vpa_update_lock); 310 return 0; 311 } 312 313 /* Length for a per-processor buffer is passed in at offset 4 in the buffer */ 314 struct reg_vpa { 315 u32 dummy; 316 union { 317 __be16 hword; 318 __be32 word; 319 } length; 320 }; 321 322 static int vpa_is_registered(struct kvmppc_vpa *vpap) 323 { 324 if (vpap->update_pending) 325 return vpap->next_gpa != 0; 326 return vpap->pinned_addr != NULL; 327 } 328 329 static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu, 330 unsigned long flags, 331 unsigned long vcpuid, unsigned long vpa) 332 { 333 struct kvm *kvm = vcpu->kvm; 334 unsigned long len, nb; 335 void *va; 336 struct kvm_vcpu *tvcpu; 337 int err; 338 int subfunc; 339 struct kvmppc_vpa *vpap; 340 341 tvcpu = kvmppc_find_vcpu(kvm, vcpuid); 342 if (!tvcpu) 343 return H_PARAMETER; 344 345 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK; 346 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL || 347 subfunc == H_VPA_REG_SLB) { 348 /* Registering new area - address must be cache-line aligned */ 349 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa) 350 return H_PARAMETER; 351 352 /* convert logical addr to kernel addr and read length */ 353 va = kvmppc_pin_guest_page(kvm, vpa, &nb); 354 if (va == NULL) 355 return H_PARAMETER; 356 if (subfunc == H_VPA_REG_VPA) 357 len = be16_to_cpu(((struct reg_vpa *)va)->length.hword); 358 else 359 len = be32_to_cpu(((struct reg_vpa *)va)->length.word); 360 kvmppc_unpin_guest_page(kvm, va, vpa, false); 361 362 /* Check length */ 363 if (len > nb || len < sizeof(struct reg_vpa)) 364 return H_PARAMETER; 365 } else { 366 vpa = 0; 367 len = 0; 368 } 369 370 err = H_PARAMETER; 371 vpap = NULL; 372 spin_lock(&tvcpu->arch.vpa_update_lock); 373 374 switch (subfunc) { 375 case H_VPA_REG_VPA: /* register VPA */ 376 if (len < sizeof(struct lppaca)) 377 break; 378 vpap = &tvcpu->arch.vpa; 379 err = 0; 380 break; 381 382 case H_VPA_REG_DTL: /* register DTL */ 383 if (len < sizeof(struct dtl_entry)) 384 break; 385 len -= len % sizeof(struct dtl_entry); 386 387 /* Check that they have previously registered a VPA */ 388 err = H_RESOURCE; 389 if (!vpa_is_registered(&tvcpu->arch.vpa)) 390 break; 391 392 vpap = &tvcpu->arch.dtl; 393 err = 0; 394 break; 395 396 case H_VPA_REG_SLB: /* register SLB shadow buffer */ 397 /* Check that they have previously registered a VPA */ 398 err = H_RESOURCE; 399 if (!vpa_is_registered(&tvcpu->arch.vpa)) 400 break; 401 402 vpap = &tvcpu->arch.slb_shadow; 403 err = 0; 404 break; 405 406 case H_VPA_DEREG_VPA: /* deregister VPA */ 407 /* Check they don't still have a DTL or SLB buf registered */ 408 err = H_RESOURCE; 409 if (vpa_is_registered(&tvcpu->arch.dtl) || 410 vpa_is_registered(&tvcpu->arch.slb_shadow)) 411 break; 412 413 vpap = &tvcpu->arch.vpa; 414 err = 0; 415 break; 416 417 case H_VPA_DEREG_DTL: /* deregister DTL */ 418 vpap = &tvcpu->arch.dtl; 419 err = 0; 420 break; 421 422 case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */ 423 vpap = &tvcpu->arch.slb_shadow; 424 err = 0; 425 break; 426 } 427 428 if (vpap) { 429 vpap->next_gpa = vpa; 430 vpap->len = len; 431 vpap->update_pending = 1; 432 } 433 434 spin_unlock(&tvcpu->arch.vpa_update_lock); 435 436 return err; 437 } 438 439 static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) 440 { 441 struct kvm *kvm = vcpu->kvm; 442 void *va; 443 unsigned long nb; 444 unsigned long gpa; 445 446 /* 447 * We need to pin the page pointed to by vpap->next_gpa, 448 * but we can't call kvmppc_pin_guest_page under the lock 449 * as it does get_user_pages() and down_read(). So we 450 * have to drop the lock, pin the page, then get the lock 451 * again and check that a new area didn't get registered 452 * in the meantime. 453 */ 454 for (;;) { 455 gpa = vpap->next_gpa; 456 spin_unlock(&vcpu->arch.vpa_update_lock); 457 va = NULL; 458 nb = 0; 459 if (gpa) 460 va = kvmppc_pin_guest_page(kvm, gpa, &nb); 461 spin_lock(&vcpu->arch.vpa_update_lock); 462 if (gpa == vpap->next_gpa) 463 break; 464 /* sigh... unpin that one and try again */ 465 if (va) 466 kvmppc_unpin_guest_page(kvm, va, gpa, false); 467 } 468 469 vpap->update_pending = 0; 470 if (va && nb < vpap->len) { 471 /* 472 * If it's now too short, it must be that userspace 473 * has changed the mappings underlying guest memory, 474 * so unregister the region. 475 */ 476 kvmppc_unpin_guest_page(kvm, va, gpa, false); 477 va = NULL; 478 } 479 if (vpap->pinned_addr) 480 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa, 481 vpap->dirty); 482 vpap->gpa = gpa; 483 vpap->pinned_addr = va; 484 vpap->dirty = false; 485 if (va) 486 vpap->pinned_end = va + vpap->len; 487 } 488 489 static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) 490 { 491 if (!(vcpu->arch.vpa.update_pending || 492 vcpu->arch.slb_shadow.update_pending || 493 vcpu->arch.dtl.update_pending)) 494 return; 495 496 spin_lock(&vcpu->arch.vpa_update_lock); 497 if (vcpu->arch.vpa.update_pending) { 498 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa); 499 if (vcpu->arch.vpa.pinned_addr) 500 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); 501 } 502 if (vcpu->arch.dtl.update_pending) { 503 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl); 504 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr; 505 vcpu->arch.dtl_index = 0; 506 } 507 if (vcpu->arch.slb_shadow.update_pending) 508 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow); 509 spin_unlock(&vcpu->arch.vpa_update_lock); 510 } 511 512 /* 513 * Return the accumulated stolen time for the vcore up until `now'. 514 * The caller should hold the vcore lock. 515 */ 516 static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) 517 { 518 u64 p; 519 unsigned long flags; 520 521 spin_lock_irqsave(&vc->stoltb_lock, flags); 522 p = vc->stolen_tb; 523 if (vc->vcore_state != VCORE_INACTIVE && 524 vc->preempt_tb != TB_NIL) 525 p += now - vc->preempt_tb; 526 spin_unlock_irqrestore(&vc->stoltb_lock, flags); 527 return p; 528 } 529 530 static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, 531 struct kvmppc_vcore *vc) 532 { 533 struct dtl_entry *dt; 534 struct lppaca *vpa; 535 unsigned long stolen; 536 unsigned long core_stolen; 537 u64 now; 538 539 dt = vcpu->arch.dtl_ptr; 540 vpa = vcpu->arch.vpa.pinned_addr; 541 now = mftb(); 542 core_stolen = vcore_stolen_time(vc, now); 543 stolen = core_stolen - vcpu->arch.stolen_logged; 544 vcpu->arch.stolen_logged = core_stolen; 545 spin_lock_irq(&vcpu->arch.tbacct_lock); 546 stolen += vcpu->arch.busy_stolen; 547 vcpu->arch.busy_stolen = 0; 548 spin_unlock_irq(&vcpu->arch.tbacct_lock); 549 if (!dt || !vpa) 550 return; 551 memset(dt, 0, sizeof(struct dtl_entry)); 552 dt->dispatch_reason = 7; 553 dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid); 554 dt->timebase = cpu_to_be64(now + vc->tb_offset); 555 dt->enqueue_to_dispatch_time = cpu_to_be32(stolen); 556 dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu)); 557 dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr); 558 ++dt; 559 if (dt == vcpu->arch.dtl.pinned_end) 560 dt = vcpu->arch.dtl.pinned_addr; 561 vcpu->arch.dtl_ptr = dt; 562 /* order writing *dt vs. writing vpa->dtl_idx */ 563 smp_wmb(); 564 vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index); 565 vcpu->arch.dtl.dirty = true; 566 } 567 568 static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu) 569 { 570 if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207) 571 return true; 572 if ((!vcpu->arch.vcore->arch_compat) && 573 cpu_has_feature(CPU_FTR_ARCH_207S)) 574 return true; 575 return false; 576 } 577 578 static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, 579 unsigned long resource, unsigned long value1, 580 unsigned long value2) 581 { 582 switch (resource) { 583 case H_SET_MODE_RESOURCE_SET_CIABR: 584 if (!kvmppc_power8_compatible(vcpu)) 585 return H_P2; 586 if (value2) 587 return H_P4; 588 if (mflags) 589 return H_UNSUPPORTED_FLAG_START; 590 /* Guests can't breakpoint the hypervisor */ 591 if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER) 592 return H_P3; 593 vcpu->arch.ciabr = value1; 594 return H_SUCCESS; 595 case H_SET_MODE_RESOURCE_SET_DAWR: 596 if (!kvmppc_power8_compatible(vcpu)) 597 return H_P2; 598 if (mflags) 599 return H_UNSUPPORTED_FLAG_START; 600 if (value2 & DABRX_HYP) 601 return H_P4; 602 vcpu->arch.dawr = value1; 603 vcpu->arch.dawrx = value2; 604 return H_SUCCESS; 605 default: 606 return H_TOO_HARD; 607 } 608 } 609 610 static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target) 611 { 612 struct kvmppc_vcore *vcore = target->arch.vcore; 613 614 /* 615 * We expect to have been called by the real mode handler 616 * (kvmppc_rm_h_confer()) which would have directly returned 617 * H_SUCCESS if the source vcore wasn't idle (e.g. if it may 618 * have useful work to do and should not confer) so we don't 619 * recheck that here. 620 */ 621 622 spin_lock(&vcore->lock); 623 if (target->arch.state == KVMPPC_VCPU_RUNNABLE && 624 vcore->vcore_state != VCORE_INACTIVE) 625 target = vcore->runner; 626 spin_unlock(&vcore->lock); 627 628 return kvm_vcpu_yield_to(target); 629 } 630 631 static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu) 632 { 633 int yield_count = 0; 634 struct lppaca *lppaca; 635 636 spin_lock(&vcpu->arch.vpa_update_lock); 637 lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr; 638 if (lppaca) 639 yield_count = lppaca->yield_count; 640 spin_unlock(&vcpu->arch.vpa_update_lock); 641 return yield_count; 642 } 643 644 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) 645 { 646 unsigned long req = kvmppc_get_gpr(vcpu, 3); 647 unsigned long target, ret = H_SUCCESS; 648 int yield_count; 649 struct kvm_vcpu *tvcpu; 650 int idx, rc; 651 652 if (req <= MAX_HCALL_OPCODE && 653 !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls)) 654 return RESUME_HOST; 655 656 switch (req) { 657 case H_CEDE: 658 break; 659 case H_PROD: 660 target = kvmppc_get_gpr(vcpu, 4); 661 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); 662 if (!tvcpu) { 663 ret = H_PARAMETER; 664 break; 665 } 666 tvcpu->arch.prodded = 1; 667 smp_mb(); 668 if (vcpu->arch.ceded) { 669 if (waitqueue_active(&vcpu->wq)) { 670 wake_up_interruptible(&vcpu->wq); 671 vcpu->stat.halt_wakeup++; 672 } 673 } 674 break; 675 case H_CONFER: 676 target = kvmppc_get_gpr(vcpu, 4); 677 if (target == -1) 678 break; 679 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target); 680 if (!tvcpu) { 681 ret = H_PARAMETER; 682 break; 683 } 684 yield_count = kvmppc_get_gpr(vcpu, 5); 685 if (kvmppc_get_yield_count(tvcpu) != yield_count) 686 break; 687 kvm_arch_vcpu_yield_to(tvcpu); 688 break; 689 case H_REGISTER_VPA: 690 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4), 691 kvmppc_get_gpr(vcpu, 5), 692 kvmppc_get_gpr(vcpu, 6)); 693 break; 694 case H_RTAS: 695 if (list_empty(&vcpu->kvm->arch.rtas_tokens)) 696 return RESUME_HOST; 697 698 idx = srcu_read_lock(&vcpu->kvm->srcu); 699 rc = kvmppc_rtas_hcall(vcpu); 700 srcu_read_unlock(&vcpu->kvm->srcu, idx); 701 702 if (rc == -ENOENT) 703 return RESUME_HOST; 704 else if (rc == 0) 705 break; 706 707 /* Send the error out to userspace via KVM_RUN */ 708 return rc; 709 case H_SET_MODE: 710 ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4), 711 kvmppc_get_gpr(vcpu, 5), 712 kvmppc_get_gpr(vcpu, 6), 713 kvmppc_get_gpr(vcpu, 7)); 714 if (ret == H_TOO_HARD) 715 return RESUME_HOST; 716 break; 717 case H_XIRR: 718 case H_CPPR: 719 case H_EOI: 720 case H_IPI: 721 case H_IPOLL: 722 case H_XIRR_X: 723 if (kvmppc_xics_enabled(vcpu)) { 724 ret = kvmppc_xics_hcall(vcpu, req); 725 break; 726 } /* fallthrough */ 727 default: 728 return RESUME_HOST; 729 } 730 kvmppc_set_gpr(vcpu, 3, ret); 731 vcpu->arch.hcall_needed = 0; 732 return RESUME_GUEST; 733 } 734 735 static int kvmppc_hcall_impl_hv(unsigned long cmd) 736 { 737 switch (cmd) { 738 case H_CEDE: 739 case H_PROD: 740 case H_CONFER: 741 case H_REGISTER_VPA: 742 case H_SET_MODE: 743 #ifdef CONFIG_KVM_XICS 744 case H_XIRR: 745 case H_CPPR: 746 case H_EOI: 747 case H_IPI: 748 case H_IPOLL: 749 case H_XIRR_X: 750 #endif 751 return 1; 752 } 753 754 /* See if it's in the real-mode table */ 755 return kvmppc_hcall_impl_hv_realmode(cmd); 756 } 757 758 static int kvmppc_emulate_debug_inst(struct kvm_run *run, 759 struct kvm_vcpu *vcpu) 760 { 761 u32 last_inst; 762 763 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != 764 EMULATE_DONE) { 765 /* 766 * Fetch failed, so return to guest and 767 * try executing it again. 768 */ 769 return RESUME_GUEST; 770 } 771 772 if (last_inst == KVMPPC_INST_SW_BREAKPOINT) { 773 run->exit_reason = KVM_EXIT_DEBUG; 774 run->debug.arch.address = kvmppc_get_pc(vcpu); 775 return RESUME_HOST; 776 } else { 777 kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 778 return RESUME_GUEST; 779 } 780 } 781 782 static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, 783 struct task_struct *tsk) 784 { 785 int r = RESUME_HOST; 786 787 vcpu->stat.sum_exits++; 788 789 run->exit_reason = KVM_EXIT_UNKNOWN; 790 run->ready_for_interrupt_injection = 1; 791 switch (vcpu->arch.trap) { 792 /* We're good on these - the host merely wanted to get our attention */ 793 case BOOK3S_INTERRUPT_HV_DECREMENTER: 794 vcpu->stat.dec_exits++; 795 r = RESUME_GUEST; 796 break; 797 case BOOK3S_INTERRUPT_EXTERNAL: 798 case BOOK3S_INTERRUPT_H_DOORBELL: 799 vcpu->stat.ext_intr_exits++; 800 r = RESUME_GUEST; 801 break; 802 /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/ 803 case BOOK3S_INTERRUPT_HMI: 804 case BOOK3S_INTERRUPT_PERFMON: 805 r = RESUME_GUEST; 806 break; 807 case BOOK3S_INTERRUPT_MACHINE_CHECK: 808 /* 809 * Deliver a machine check interrupt to the guest. 810 * We have to do this, even if the host has handled the 811 * machine check, because machine checks use SRR0/1 and 812 * the interrupt might have trashed guest state in them. 813 */ 814 kvmppc_book3s_queue_irqprio(vcpu, 815 BOOK3S_INTERRUPT_MACHINE_CHECK); 816 r = RESUME_GUEST; 817 break; 818 case BOOK3S_INTERRUPT_PROGRAM: 819 { 820 ulong flags; 821 /* 822 * Normally program interrupts are delivered directly 823 * to the guest by the hardware, but we can get here 824 * as a result of a hypervisor emulation interrupt 825 * (e40) getting turned into a 700 by BML RTAS. 826 */ 827 flags = vcpu->arch.shregs.msr & 0x1f0000ull; 828 kvmppc_core_queue_program(vcpu, flags); 829 r = RESUME_GUEST; 830 break; 831 } 832 case BOOK3S_INTERRUPT_SYSCALL: 833 { 834 /* hcall - punt to userspace */ 835 int i; 836 837 /* hypercall with MSR_PR has already been handled in rmode, 838 * and never reaches here. 839 */ 840 841 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3); 842 for (i = 0; i < 9; ++i) 843 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i); 844 run->exit_reason = KVM_EXIT_PAPR_HCALL; 845 vcpu->arch.hcall_needed = 1; 846 r = RESUME_HOST; 847 break; 848 } 849 /* 850 * We get these next two if the guest accesses a page which it thinks 851 * it has mapped but which is not actually present, either because 852 * it is for an emulated I/O device or because the corresonding 853 * host page has been paged out. Any other HDSI/HISI interrupts 854 * have been handled already. 855 */ 856 case BOOK3S_INTERRUPT_H_DATA_STORAGE: 857 r = RESUME_PAGE_FAULT; 858 break; 859 case BOOK3S_INTERRUPT_H_INST_STORAGE: 860 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu); 861 vcpu->arch.fault_dsisr = 0; 862 r = RESUME_PAGE_FAULT; 863 break; 864 /* 865 * This occurs if the guest executes an illegal instruction. 866 * If the guest debug is disabled, generate a program interrupt 867 * to the guest. If guest debug is enabled, we need to check 868 * whether the instruction is a software breakpoint instruction. 869 * Accordingly return to Guest or Host. 870 */ 871 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 872 if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED) 873 vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ? 874 swab32(vcpu->arch.emul_inst) : 875 vcpu->arch.emul_inst; 876 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) { 877 r = kvmppc_emulate_debug_inst(run, vcpu); 878 } else { 879 kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 880 r = RESUME_GUEST; 881 } 882 break; 883 /* 884 * This occurs if the guest (kernel or userspace), does something that 885 * is prohibited by HFSCR. We just generate a program interrupt to 886 * the guest. 887 */ 888 case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: 889 kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 890 r = RESUME_GUEST; 891 break; 892 default: 893 kvmppc_dump_regs(vcpu); 894 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n", 895 vcpu->arch.trap, kvmppc_get_pc(vcpu), 896 vcpu->arch.shregs.msr); 897 run->hw.hardware_exit_reason = vcpu->arch.trap; 898 r = RESUME_HOST; 899 break; 900 } 901 902 return r; 903 } 904 905 static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu, 906 struct kvm_sregs *sregs) 907 { 908 int i; 909 910 memset(sregs, 0, sizeof(struct kvm_sregs)); 911 sregs->pvr = vcpu->arch.pvr; 912 for (i = 0; i < vcpu->arch.slb_max; i++) { 913 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige; 914 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; 915 } 916 917 return 0; 918 } 919 920 static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu, 921 struct kvm_sregs *sregs) 922 { 923 int i, j; 924 925 /* Only accept the same PVR as the host's, since we can't spoof it */ 926 if (sregs->pvr != vcpu->arch.pvr) 927 return -EINVAL; 928 929 j = 0; 930 for (i = 0; i < vcpu->arch.slb_nr; i++) { 931 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) { 932 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe; 933 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv; 934 ++j; 935 } 936 } 937 vcpu->arch.slb_max = j; 938 939 return 0; 940 } 941 942 static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr, 943 bool preserve_top32) 944 { 945 struct kvmppc_vcore *vc = vcpu->arch.vcore; 946 u64 mask; 947 948 spin_lock(&vc->lock); 949 /* 950 * If ILE (interrupt little-endian) has changed, update the 951 * MSR_LE bit in the intr_msr for each vcpu in this vcore. 952 */ 953 if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) { 954 struct kvm *kvm = vcpu->kvm; 955 struct kvm_vcpu *vcpu; 956 int i; 957 958 mutex_lock(&kvm->lock); 959 kvm_for_each_vcpu(i, vcpu, kvm) { 960 if (vcpu->arch.vcore != vc) 961 continue; 962 if (new_lpcr & LPCR_ILE) 963 vcpu->arch.intr_msr |= MSR_LE; 964 else 965 vcpu->arch.intr_msr &= ~MSR_LE; 966 } 967 mutex_unlock(&kvm->lock); 968 } 969 970 /* 971 * Userspace can only modify DPFD (default prefetch depth), 972 * ILE (interrupt little-endian) and TC (translation control). 973 * On POWER8 userspace can also modify AIL (alt. interrupt loc.) 974 */ 975 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC; 976 if (cpu_has_feature(CPU_FTR_ARCH_207S)) 977 mask |= LPCR_AIL; 978 979 /* Broken 32-bit version of LPCR must not clear top bits */ 980 if (preserve_top32) 981 mask &= 0xFFFFFFFF; 982 vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask); 983 spin_unlock(&vc->lock); 984 } 985 986 static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, 987 union kvmppc_one_reg *val) 988 { 989 int r = 0; 990 long int i; 991 992 switch (id) { 993 case KVM_REG_PPC_DEBUG_INST: 994 *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT); 995 break; 996 case KVM_REG_PPC_HIOR: 997 *val = get_reg_val(id, 0); 998 break; 999 case KVM_REG_PPC_DABR: 1000 *val = get_reg_val(id, vcpu->arch.dabr); 1001 break; 1002 case KVM_REG_PPC_DABRX: 1003 *val = get_reg_val(id, vcpu->arch.dabrx); 1004 break; 1005 case KVM_REG_PPC_DSCR: 1006 *val = get_reg_val(id, vcpu->arch.dscr); 1007 break; 1008 case KVM_REG_PPC_PURR: 1009 *val = get_reg_val(id, vcpu->arch.purr); 1010 break; 1011 case KVM_REG_PPC_SPURR: 1012 *val = get_reg_val(id, vcpu->arch.spurr); 1013 break; 1014 case KVM_REG_PPC_AMR: 1015 *val = get_reg_val(id, vcpu->arch.amr); 1016 break; 1017 case KVM_REG_PPC_UAMOR: 1018 *val = get_reg_val(id, vcpu->arch.uamor); 1019 break; 1020 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: 1021 i = id - KVM_REG_PPC_MMCR0; 1022 *val = get_reg_val(id, vcpu->arch.mmcr[i]); 1023 break; 1024 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: 1025 i = id - KVM_REG_PPC_PMC1; 1026 *val = get_reg_val(id, vcpu->arch.pmc[i]); 1027 break; 1028 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2: 1029 i = id - KVM_REG_PPC_SPMC1; 1030 *val = get_reg_val(id, vcpu->arch.spmc[i]); 1031 break; 1032 case KVM_REG_PPC_SIAR: 1033 *val = get_reg_val(id, vcpu->arch.siar); 1034 break; 1035 case KVM_REG_PPC_SDAR: 1036 *val = get_reg_val(id, vcpu->arch.sdar); 1037 break; 1038 case KVM_REG_PPC_SIER: 1039 *val = get_reg_val(id, vcpu->arch.sier); 1040 break; 1041 case KVM_REG_PPC_IAMR: 1042 *val = get_reg_val(id, vcpu->arch.iamr); 1043 break; 1044 case KVM_REG_PPC_PSPB: 1045 *val = get_reg_val(id, vcpu->arch.pspb); 1046 break; 1047 case KVM_REG_PPC_DPDES: 1048 *val = get_reg_val(id, vcpu->arch.vcore->dpdes); 1049 break; 1050 case KVM_REG_PPC_DAWR: 1051 *val = get_reg_val(id, vcpu->arch.dawr); 1052 break; 1053 case KVM_REG_PPC_DAWRX: 1054 *val = get_reg_val(id, vcpu->arch.dawrx); 1055 break; 1056 case KVM_REG_PPC_CIABR: 1057 *val = get_reg_val(id, vcpu->arch.ciabr); 1058 break; 1059 case KVM_REG_PPC_CSIGR: 1060 *val = get_reg_val(id, vcpu->arch.csigr); 1061 break; 1062 case KVM_REG_PPC_TACR: 1063 *val = get_reg_val(id, vcpu->arch.tacr); 1064 break; 1065 case KVM_REG_PPC_TCSCR: 1066 *val = get_reg_val(id, vcpu->arch.tcscr); 1067 break; 1068 case KVM_REG_PPC_PID: 1069 *val = get_reg_val(id, vcpu->arch.pid); 1070 break; 1071 case KVM_REG_PPC_ACOP: 1072 *val = get_reg_val(id, vcpu->arch.acop); 1073 break; 1074 case KVM_REG_PPC_WORT: 1075 *val = get_reg_val(id, vcpu->arch.wort); 1076 break; 1077 case KVM_REG_PPC_VPA_ADDR: 1078 spin_lock(&vcpu->arch.vpa_update_lock); 1079 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); 1080 spin_unlock(&vcpu->arch.vpa_update_lock); 1081 break; 1082 case KVM_REG_PPC_VPA_SLB: 1083 spin_lock(&vcpu->arch.vpa_update_lock); 1084 val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa; 1085 val->vpaval.length = vcpu->arch.slb_shadow.len; 1086 spin_unlock(&vcpu->arch.vpa_update_lock); 1087 break; 1088 case KVM_REG_PPC_VPA_DTL: 1089 spin_lock(&vcpu->arch.vpa_update_lock); 1090 val->vpaval.addr = vcpu->arch.dtl.next_gpa; 1091 val->vpaval.length = vcpu->arch.dtl.len; 1092 spin_unlock(&vcpu->arch.vpa_update_lock); 1093 break; 1094 case KVM_REG_PPC_TB_OFFSET: 1095 *val = get_reg_val(id, vcpu->arch.vcore->tb_offset); 1096 break; 1097 case KVM_REG_PPC_LPCR: 1098 case KVM_REG_PPC_LPCR_64: 1099 *val = get_reg_val(id, vcpu->arch.vcore->lpcr); 1100 break; 1101 case KVM_REG_PPC_PPR: 1102 *val = get_reg_val(id, vcpu->arch.ppr); 1103 break; 1104 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1105 case KVM_REG_PPC_TFHAR: 1106 *val = get_reg_val(id, vcpu->arch.tfhar); 1107 break; 1108 case KVM_REG_PPC_TFIAR: 1109 *val = get_reg_val(id, vcpu->arch.tfiar); 1110 break; 1111 case KVM_REG_PPC_TEXASR: 1112 *val = get_reg_val(id, vcpu->arch.texasr); 1113 break; 1114 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31: 1115 i = id - KVM_REG_PPC_TM_GPR0; 1116 *val = get_reg_val(id, vcpu->arch.gpr_tm[i]); 1117 break; 1118 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63: 1119 { 1120 int j; 1121 i = id - KVM_REG_PPC_TM_VSR0; 1122 if (i < 32) 1123 for (j = 0; j < TS_FPRWIDTH; j++) 1124 val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j]; 1125 else { 1126 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 1127 val->vval = vcpu->arch.vr_tm.vr[i-32]; 1128 else 1129 r = -ENXIO; 1130 } 1131 break; 1132 } 1133 case KVM_REG_PPC_TM_CR: 1134 *val = get_reg_val(id, vcpu->arch.cr_tm); 1135 break; 1136 case KVM_REG_PPC_TM_LR: 1137 *val = get_reg_val(id, vcpu->arch.lr_tm); 1138 break; 1139 case KVM_REG_PPC_TM_CTR: 1140 *val = get_reg_val(id, vcpu->arch.ctr_tm); 1141 break; 1142 case KVM_REG_PPC_TM_FPSCR: 1143 *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr); 1144 break; 1145 case KVM_REG_PPC_TM_AMR: 1146 *val = get_reg_val(id, vcpu->arch.amr_tm); 1147 break; 1148 case KVM_REG_PPC_TM_PPR: 1149 *val = get_reg_val(id, vcpu->arch.ppr_tm); 1150 break; 1151 case KVM_REG_PPC_TM_VRSAVE: 1152 *val = get_reg_val(id, vcpu->arch.vrsave_tm); 1153 break; 1154 case KVM_REG_PPC_TM_VSCR: 1155 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 1156 *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]); 1157 else 1158 r = -ENXIO; 1159 break; 1160 case KVM_REG_PPC_TM_DSCR: 1161 *val = get_reg_val(id, vcpu->arch.dscr_tm); 1162 break; 1163 case KVM_REG_PPC_TM_TAR: 1164 *val = get_reg_val(id, vcpu->arch.tar_tm); 1165 break; 1166 #endif 1167 case KVM_REG_PPC_ARCH_COMPAT: 1168 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); 1169 break; 1170 default: 1171 r = -EINVAL; 1172 break; 1173 } 1174 1175 return r; 1176 } 1177 1178 static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, 1179 union kvmppc_one_reg *val) 1180 { 1181 int r = 0; 1182 long int i; 1183 unsigned long addr, len; 1184 1185 switch (id) { 1186 case KVM_REG_PPC_HIOR: 1187 /* Only allow this to be set to zero */ 1188 if (set_reg_val(id, *val)) 1189 r = -EINVAL; 1190 break; 1191 case KVM_REG_PPC_DABR: 1192 vcpu->arch.dabr = set_reg_val(id, *val); 1193 break; 1194 case KVM_REG_PPC_DABRX: 1195 vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP; 1196 break; 1197 case KVM_REG_PPC_DSCR: 1198 vcpu->arch.dscr = set_reg_val(id, *val); 1199 break; 1200 case KVM_REG_PPC_PURR: 1201 vcpu->arch.purr = set_reg_val(id, *val); 1202 break; 1203 case KVM_REG_PPC_SPURR: 1204 vcpu->arch.spurr = set_reg_val(id, *val); 1205 break; 1206 case KVM_REG_PPC_AMR: 1207 vcpu->arch.amr = set_reg_val(id, *val); 1208 break; 1209 case KVM_REG_PPC_UAMOR: 1210 vcpu->arch.uamor = set_reg_val(id, *val); 1211 break; 1212 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS: 1213 i = id - KVM_REG_PPC_MMCR0; 1214 vcpu->arch.mmcr[i] = set_reg_val(id, *val); 1215 break; 1216 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: 1217 i = id - KVM_REG_PPC_PMC1; 1218 vcpu->arch.pmc[i] = set_reg_val(id, *val); 1219 break; 1220 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2: 1221 i = id - KVM_REG_PPC_SPMC1; 1222 vcpu->arch.spmc[i] = set_reg_val(id, *val); 1223 break; 1224 case KVM_REG_PPC_SIAR: 1225 vcpu->arch.siar = set_reg_val(id, *val); 1226 break; 1227 case KVM_REG_PPC_SDAR: 1228 vcpu->arch.sdar = set_reg_val(id, *val); 1229 break; 1230 case KVM_REG_PPC_SIER: 1231 vcpu->arch.sier = set_reg_val(id, *val); 1232 break; 1233 case KVM_REG_PPC_IAMR: 1234 vcpu->arch.iamr = set_reg_val(id, *val); 1235 break; 1236 case KVM_REG_PPC_PSPB: 1237 vcpu->arch.pspb = set_reg_val(id, *val); 1238 break; 1239 case KVM_REG_PPC_DPDES: 1240 vcpu->arch.vcore->dpdes = set_reg_val(id, *val); 1241 break; 1242 case KVM_REG_PPC_DAWR: 1243 vcpu->arch.dawr = set_reg_val(id, *val); 1244 break; 1245 case KVM_REG_PPC_DAWRX: 1246 vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP; 1247 break; 1248 case KVM_REG_PPC_CIABR: 1249 vcpu->arch.ciabr = set_reg_val(id, *val); 1250 /* Don't allow setting breakpoints in hypervisor code */ 1251 if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) 1252 vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */ 1253 break; 1254 case KVM_REG_PPC_CSIGR: 1255 vcpu->arch.csigr = set_reg_val(id, *val); 1256 break; 1257 case KVM_REG_PPC_TACR: 1258 vcpu->arch.tacr = set_reg_val(id, *val); 1259 break; 1260 case KVM_REG_PPC_TCSCR: 1261 vcpu->arch.tcscr = set_reg_val(id, *val); 1262 break; 1263 case KVM_REG_PPC_PID: 1264 vcpu->arch.pid = set_reg_val(id, *val); 1265 break; 1266 case KVM_REG_PPC_ACOP: 1267 vcpu->arch.acop = set_reg_val(id, *val); 1268 break; 1269 case KVM_REG_PPC_WORT: 1270 vcpu->arch.wort = set_reg_val(id, *val); 1271 break; 1272 case KVM_REG_PPC_VPA_ADDR: 1273 addr = set_reg_val(id, *val); 1274 r = -EINVAL; 1275 if (!addr && (vcpu->arch.slb_shadow.next_gpa || 1276 vcpu->arch.dtl.next_gpa)) 1277 break; 1278 r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca)); 1279 break; 1280 case KVM_REG_PPC_VPA_SLB: 1281 addr = val->vpaval.addr; 1282 len = val->vpaval.length; 1283 r = -EINVAL; 1284 if (addr && !vcpu->arch.vpa.next_gpa) 1285 break; 1286 r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len); 1287 break; 1288 case KVM_REG_PPC_VPA_DTL: 1289 addr = val->vpaval.addr; 1290 len = val->vpaval.length; 1291 r = -EINVAL; 1292 if (addr && (len < sizeof(struct dtl_entry) || 1293 !vcpu->arch.vpa.next_gpa)) 1294 break; 1295 len -= len % sizeof(struct dtl_entry); 1296 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); 1297 break; 1298 case KVM_REG_PPC_TB_OFFSET: 1299 /* round up to multiple of 2^24 */ 1300 vcpu->arch.vcore->tb_offset = 1301 ALIGN(set_reg_val(id, *val), 1UL << 24); 1302 break; 1303 case KVM_REG_PPC_LPCR: 1304 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true); 1305 break; 1306 case KVM_REG_PPC_LPCR_64: 1307 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false); 1308 break; 1309 case KVM_REG_PPC_PPR: 1310 vcpu->arch.ppr = set_reg_val(id, *val); 1311 break; 1312 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1313 case KVM_REG_PPC_TFHAR: 1314 vcpu->arch.tfhar = set_reg_val(id, *val); 1315 break; 1316 case KVM_REG_PPC_TFIAR: 1317 vcpu->arch.tfiar = set_reg_val(id, *val); 1318 break; 1319 case KVM_REG_PPC_TEXASR: 1320 vcpu->arch.texasr = set_reg_val(id, *val); 1321 break; 1322 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31: 1323 i = id - KVM_REG_PPC_TM_GPR0; 1324 vcpu->arch.gpr_tm[i] = set_reg_val(id, *val); 1325 break; 1326 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63: 1327 { 1328 int j; 1329 i = id - KVM_REG_PPC_TM_VSR0; 1330 if (i < 32) 1331 for (j = 0; j < TS_FPRWIDTH; j++) 1332 vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j]; 1333 else 1334 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 1335 vcpu->arch.vr_tm.vr[i-32] = val->vval; 1336 else 1337 r = -ENXIO; 1338 break; 1339 } 1340 case KVM_REG_PPC_TM_CR: 1341 vcpu->arch.cr_tm = set_reg_val(id, *val); 1342 break; 1343 case KVM_REG_PPC_TM_LR: 1344 vcpu->arch.lr_tm = set_reg_val(id, *val); 1345 break; 1346 case KVM_REG_PPC_TM_CTR: 1347 vcpu->arch.ctr_tm = set_reg_val(id, *val); 1348 break; 1349 case KVM_REG_PPC_TM_FPSCR: 1350 vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val); 1351 break; 1352 case KVM_REG_PPC_TM_AMR: 1353 vcpu->arch.amr_tm = set_reg_val(id, *val); 1354 break; 1355 case KVM_REG_PPC_TM_PPR: 1356 vcpu->arch.ppr_tm = set_reg_val(id, *val); 1357 break; 1358 case KVM_REG_PPC_TM_VRSAVE: 1359 vcpu->arch.vrsave_tm = set_reg_val(id, *val); 1360 break; 1361 case KVM_REG_PPC_TM_VSCR: 1362 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 1363 vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val); 1364 else 1365 r = - ENXIO; 1366 break; 1367 case KVM_REG_PPC_TM_DSCR: 1368 vcpu->arch.dscr_tm = set_reg_val(id, *val); 1369 break; 1370 case KVM_REG_PPC_TM_TAR: 1371 vcpu->arch.tar_tm = set_reg_val(id, *val); 1372 break; 1373 #endif 1374 case KVM_REG_PPC_ARCH_COMPAT: 1375 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); 1376 break; 1377 default: 1378 r = -EINVAL; 1379 break; 1380 } 1381 1382 return r; 1383 } 1384 1385 static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) 1386 { 1387 struct kvmppc_vcore *vcore; 1388 1389 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL); 1390 1391 if (vcore == NULL) 1392 return NULL; 1393 1394 INIT_LIST_HEAD(&vcore->runnable_threads); 1395 spin_lock_init(&vcore->lock); 1396 spin_lock_init(&vcore->stoltb_lock); 1397 init_waitqueue_head(&vcore->wq); 1398 vcore->preempt_tb = TB_NIL; 1399 vcore->lpcr = kvm->arch.lpcr; 1400 vcore->first_vcpuid = core * threads_per_subcore; 1401 vcore->kvm = kvm; 1402 1403 vcore->mpp_buffer_is_valid = false; 1404 1405 if (cpu_has_feature(CPU_FTR_ARCH_207S)) 1406 vcore->mpp_buffer = (void *)__get_free_pages( 1407 GFP_KERNEL|__GFP_ZERO, 1408 MPP_BUFFER_ORDER); 1409 1410 return vcore; 1411 } 1412 1413 static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, 1414 unsigned int id) 1415 { 1416 struct kvm_vcpu *vcpu; 1417 int err = -EINVAL; 1418 int core; 1419 struct kvmppc_vcore *vcore; 1420 1421 core = id / threads_per_subcore; 1422 if (core >= KVM_MAX_VCORES) 1423 goto out; 1424 1425 err = -ENOMEM; 1426 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 1427 if (!vcpu) 1428 goto out; 1429 1430 err = kvm_vcpu_init(vcpu, kvm, id); 1431 if (err) 1432 goto free_vcpu; 1433 1434 vcpu->arch.shared = &vcpu->arch.shregs; 1435 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 1436 /* 1437 * The shared struct is never shared on HV, 1438 * so we can always use host endianness 1439 */ 1440 #ifdef __BIG_ENDIAN__ 1441 vcpu->arch.shared_big_endian = true; 1442 #else 1443 vcpu->arch.shared_big_endian = false; 1444 #endif 1445 #endif 1446 vcpu->arch.mmcr[0] = MMCR0_FC; 1447 vcpu->arch.ctrl = CTRL_RUNLATCH; 1448 /* default to host PVR, since we can't spoof it */ 1449 kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR)); 1450 spin_lock_init(&vcpu->arch.vpa_update_lock); 1451 spin_lock_init(&vcpu->arch.tbacct_lock); 1452 vcpu->arch.busy_preempt = TB_NIL; 1453 vcpu->arch.intr_msr = MSR_SF | MSR_ME; 1454 1455 kvmppc_mmu_book3s_hv_init(vcpu); 1456 1457 vcpu->arch.state = KVMPPC_VCPU_NOTREADY; 1458 1459 init_waitqueue_head(&vcpu->arch.cpu_run); 1460 1461 mutex_lock(&kvm->lock); 1462 vcore = kvm->arch.vcores[core]; 1463 if (!vcore) { 1464 vcore = kvmppc_vcore_create(kvm, core); 1465 kvm->arch.vcores[core] = vcore; 1466 kvm->arch.online_vcores++; 1467 } 1468 mutex_unlock(&kvm->lock); 1469 1470 if (!vcore) 1471 goto free_vcpu; 1472 1473 spin_lock(&vcore->lock); 1474 ++vcore->num_threads; 1475 spin_unlock(&vcore->lock); 1476 vcpu->arch.vcore = vcore; 1477 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid; 1478 1479 vcpu->arch.cpu_type = KVM_CPU_3S_64; 1480 kvmppc_sanity_check(vcpu); 1481 1482 return vcpu; 1483 1484 free_vcpu: 1485 kmem_cache_free(kvm_vcpu_cache, vcpu); 1486 out: 1487 return ERR_PTR(err); 1488 } 1489 1490 static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa) 1491 { 1492 if (vpa->pinned_addr) 1493 kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa, 1494 vpa->dirty); 1495 } 1496 1497 static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu) 1498 { 1499 spin_lock(&vcpu->arch.vpa_update_lock); 1500 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl); 1501 unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow); 1502 unpin_vpa(vcpu->kvm, &vcpu->arch.vpa); 1503 spin_unlock(&vcpu->arch.vpa_update_lock); 1504 kvm_vcpu_uninit(vcpu); 1505 kmem_cache_free(kvm_vcpu_cache, vcpu); 1506 } 1507 1508 static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu) 1509 { 1510 /* Indicate we want to get back into the guest */ 1511 return 1; 1512 } 1513 1514 static void kvmppc_set_timer(struct kvm_vcpu *vcpu) 1515 { 1516 unsigned long dec_nsec, now; 1517 1518 now = get_tb(); 1519 if (now > vcpu->arch.dec_expires) { 1520 /* decrementer has already gone negative */ 1521 kvmppc_core_queue_dec(vcpu); 1522 kvmppc_core_prepare_to_enter(vcpu); 1523 return; 1524 } 1525 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC 1526 / tb_ticks_per_sec; 1527 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec), 1528 HRTIMER_MODE_REL); 1529 vcpu->arch.timer_running = 1; 1530 } 1531 1532 static void kvmppc_end_cede(struct kvm_vcpu *vcpu) 1533 { 1534 vcpu->arch.ceded = 0; 1535 if (vcpu->arch.timer_running) { 1536 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 1537 vcpu->arch.timer_running = 0; 1538 } 1539 } 1540 1541 extern void __kvmppc_vcore_entry(void); 1542 1543 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, 1544 struct kvm_vcpu *vcpu) 1545 { 1546 u64 now; 1547 1548 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) 1549 return; 1550 spin_lock_irq(&vcpu->arch.tbacct_lock); 1551 now = mftb(); 1552 vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) - 1553 vcpu->arch.stolen_logged; 1554 vcpu->arch.busy_preempt = now; 1555 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 1556 spin_unlock_irq(&vcpu->arch.tbacct_lock); 1557 --vc->n_runnable; 1558 list_del(&vcpu->arch.run_list); 1559 } 1560 1561 static int kvmppc_grab_hwthread(int cpu) 1562 { 1563 struct paca_struct *tpaca; 1564 long timeout = 10000; 1565 1566 tpaca = &paca[cpu]; 1567 1568 /* Ensure the thread won't go into the kernel if it wakes */ 1569 tpaca->kvm_hstate.hwthread_req = 1; 1570 tpaca->kvm_hstate.kvm_vcpu = NULL; 1571 1572 /* 1573 * If the thread is already executing in the kernel (e.g. handling 1574 * a stray interrupt), wait for it to get back to nap mode. 1575 * The smp_mb() is to ensure that our setting of hwthread_req 1576 * is visible before we look at hwthread_state, so if this 1577 * races with the code at system_reset_pSeries and the thread 1578 * misses our setting of hwthread_req, we are sure to see its 1579 * setting of hwthread_state, and vice versa. 1580 */ 1581 smp_mb(); 1582 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) { 1583 if (--timeout <= 0) { 1584 pr_err("KVM: couldn't grab cpu %d\n", cpu); 1585 return -EBUSY; 1586 } 1587 udelay(1); 1588 } 1589 return 0; 1590 } 1591 1592 static void kvmppc_release_hwthread(int cpu) 1593 { 1594 struct paca_struct *tpaca; 1595 1596 tpaca = &paca[cpu]; 1597 tpaca->kvm_hstate.hwthread_req = 0; 1598 tpaca->kvm_hstate.kvm_vcpu = NULL; 1599 } 1600 1601 static void kvmppc_start_thread(struct kvm_vcpu *vcpu) 1602 { 1603 int cpu; 1604 struct paca_struct *tpaca; 1605 struct kvmppc_vcore *vc = vcpu->arch.vcore; 1606 1607 if (vcpu->arch.timer_running) { 1608 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 1609 vcpu->arch.timer_running = 0; 1610 } 1611 cpu = vc->pcpu + vcpu->arch.ptid; 1612 tpaca = &paca[cpu]; 1613 tpaca->kvm_hstate.kvm_vcpu = vcpu; 1614 tpaca->kvm_hstate.kvm_vcore = vc; 1615 tpaca->kvm_hstate.ptid = vcpu->arch.ptid; 1616 vcpu->cpu = vc->pcpu; 1617 smp_wmb(); 1618 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) 1619 if (cpu != smp_processor_id()) { 1620 xics_wake_cpu(cpu); 1621 if (vcpu->arch.ptid) 1622 ++vc->n_woken; 1623 } 1624 #endif 1625 } 1626 1627 static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc) 1628 { 1629 int i; 1630 1631 HMT_low(); 1632 i = 0; 1633 while (vc->nap_count < vc->n_woken) { 1634 if (++i >= 1000000) { 1635 pr_err("kvmppc_wait_for_nap timeout %d %d\n", 1636 vc->nap_count, vc->n_woken); 1637 break; 1638 } 1639 cpu_relax(); 1640 } 1641 HMT_medium(); 1642 } 1643 1644 /* 1645 * Check that we are on thread 0 and that any other threads in 1646 * this core are off-line. Then grab the threads so they can't 1647 * enter the kernel. 1648 */ 1649 static int on_primary_thread(void) 1650 { 1651 int cpu = smp_processor_id(); 1652 int thr; 1653 1654 /* Are we on a primary subcore? */ 1655 if (cpu_thread_in_subcore(cpu)) 1656 return 0; 1657 1658 thr = 0; 1659 while (++thr < threads_per_subcore) 1660 if (cpu_online(cpu + thr)) 1661 return 0; 1662 1663 /* Grab all hw threads so they can't go into the kernel */ 1664 for (thr = 1; thr < threads_per_subcore; ++thr) { 1665 if (kvmppc_grab_hwthread(cpu + thr)) { 1666 /* Couldn't grab one; let the others go */ 1667 do { 1668 kvmppc_release_hwthread(cpu + thr); 1669 } while (--thr > 0); 1670 return 0; 1671 } 1672 } 1673 return 1; 1674 } 1675 1676 static void kvmppc_start_saving_l2_cache(struct kvmppc_vcore *vc) 1677 { 1678 phys_addr_t phy_addr, mpp_addr; 1679 1680 phy_addr = (phys_addr_t)virt_to_phys(vc->mpp_buffer); 1681 mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; 1682 1683 mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_ABORT); 1684 logmpp(mpp_addr | PPC_LOGMPP_LOG_L2); 1685 1686 vc->mpp_buffer_is_valid = true; 1687 } 1688 1689 static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc) 1690 { 1691 phys_addr_t phy_addr, mpp_addr; 1692 1693 phy_addr = virt_to_phys(vc->mpp_buffer); 1694 mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK; 1695 1696 /* We must abort any in-progress save operations to ensure 1697 * the table is valid so that prefetch engine knows when to 1698 * stop prefetching. */ 1699 logmpp(mpp_addr | PPC_LOGMPP_LOG_ABORT); 1700 mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE); 1701 } 1702 1703 /* 1704 * Run a set of guest threads on a physical core. 1705 * Called with vc->lock held. 1706 */ 1707 static void kvmppc_run_core(struct kvmppc_vcore *vc) 1708 { 1709 struct kvm_vcpu *vcpu, *vnext; 1710 long ret; 1711 u64 now; 1712 int i, need_vpa_update; 1713 int srcu_idx; 1714 struct kvm_vcpu *vcpus_to_update[threads_per_core]; 1715 1716 /* don't start if any threads have a signal pending */ 1717 need_vpa_update = 0; 1718 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1719 if (signal_pending(vcpu->arch.run_task)) 1720 return; 1721 if (vcpu->arch.vpa.update_pending || 1722 vcpu->arch.slb_shadow.update_pending || 1723 vcpu->arch.dtl.update_pending) 1724 vcpus_to_update[need_vpa_update++] = vcpu; 1725 } 1726 1727 /* 1728 * Initialize *vc, in particular vc->vcore_state, so we can 1729 * drop the vcore lock if necessary. 1730 */ 1731 vc->n_woken = 0; 1732 vc->nap_count = 0; 1733 vc->entry_exit_count = 0; 1734 vc->preempt_tb = TB_NIL; 1735 vc->vcore_state = VCORE_STARTING; 1736 vc->in_guest = 0; 1737 vc->napping_threads = 0; 1738 vc->conferring_threads = 0; 1739 1740 /* 1741 * Updating any of the vpas requires calling kvmppc_pin_guest_page, 1742 * which can't be called with any spinlocks held. 1743 */ 1744 if (need_vpa_update) { 1745 spin_unlock(&vc->lock); 1746 for (i = 0; i < need_vpa_update; ++i) 1747 kvmppc_update_vpas(vcpus_to_update[i]); 1748 spin_lock(&vc->lock); 1749 } 1750 1751 /* 1752 * Make sure we are running on primary threads, and that secondary 1753 * threads are offline. Also check if the number of threads in this 1754 * guest are greater than the current system threads per guest. 1755 */ 1756 if ((threads_per_core > 1) && 1757 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { 1758 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 1759 vcpu->arch.ret = -EBUSY; 1760 goto out; 1761 } 1762 1763 1764 vc->pcpu = smp_processor_id(); 1765 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1766 kvmppc_start_thread(vcpu); 1767 kvmppc_create_dtl_entry(vcpu, vc); 1768 trace_kvm_guest_enter(vcpu); 1769 } 1770 1771 /* Set this explicitly in case thread 0 doesn't have a vcpu */ 1772 get_paca()->kvm_hstate.kvm_vcore = vc; 1773 get_paca()->kvm_hstate.ptid = 0; 1774 1775 vc->vcore_state = VCORE_RUNNING; 1776 preempt_disable(); 1777 1778 trace_kvmppc_run_core(vc, 0); 1779 1780 spin_unlock(&vc->lock); 1781 1782 kvm_guest_enter(); 1783 1784 srcu_idx = srcu_read_lock(&vc->kvm->srcu); 1785 1786 if (vc->mpp_buffer_is_valid) 1787 kvmppc_start_restoring_l2_cache(vc); 1788 1789 __kvmppc_vcore_entry(); 1790 1791 spin_lock(&vc->lock); 1792 1793 if (vc->mpp_buffer) 1794 kvmppc_start_saving_l2_cache(vc); 1795 1796 /* disable sending of IPIs on virtual external irqs */ 1797 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) 1798 vcpu->cpu = -1; 1799 /* wait for secondary threads to finish writing their state to memory */ 1800 if (vc->nap_count < vc->n_woken) 1801 kvmppc_wait_for_nap(vc); 1802 for (i = 0; i < threads_per_subcore; ++i) 1803 kvmppc_release_hwthread(vc->pcpu + i); 1804 /* prevent other vcpu threads from doing kvmppc_start_thread() now */ 1805 vc->vcore_state = VCORE_EXITING; 1806 spin_unlock(&vc->lock); 1807 1808 srcu_read_unlock(&vc->kvm->srcu, srcu_idx); 1809 1810 /* make sure updates to secondary vcpu structs are visible now */ 1811 smp_mb(); 1812 kvm_guest_exit(); 1813 1814 preempt_enable(); 1815 cond_resched(); 1816 1817 spin_lock(&vc->lock); 1818 now = get_tb(); 1819 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1820 /* cancel pending dec exception if dec is positive */ 1821 if (now < vcpu->arch.dec_expires && 1822 kvmppc_core_pending_dec(vcpu)) 1823 kvmppc_core_dequeue_dec(vcpu); 1824 1825 trace_kvm_guest_exit(vcpu); 1826 1827 ret = RESUME_GUEST; 1828 if (vcpu->arch.trap) 1829 ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu, 1830 vcpu->arch.run_task); 1831 1832 vcpu->arch.ret = ret; 1833 vcpu->arch.trap = 0; 1834 1835 if (vcpu->arch.ceded) { 1836 if (!is_kvmppc_resume_guest(ret)) 1837 kvmppc_end_cede(vcpu); 1838 else 1839 kvmppc_set_timer(vcpu); 1840 } 1841 } 1842 1843 out: 1844 vc->vcore_state = VCORE_INACTIVE; 1845 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 1846 arch.run_list) { 1847 if (!is_kvmppc_resume_guest(vcpu->arch.ret)) { 1848 kvmppc_remove_runnable(vc, vcpu); 1849 wake_up(&vcpu->arch.cpu_run); 1850 } 1851 } 1852 1853 trace_kvmppc_run_core(vc, 1); 1854 } 1855 1856 /* 1857 * Wait for some other vcpu thread to execute us, and 1858 * wake us up when we need to handle something in the host. 1859 */ 1860 static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) 1861 { 1862 DEFINE_WAIT(wait); 1863 1864 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state); 1865 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) 1866 schedule(); 1867 finish_wait(&vcpu->arch.cpu_run, &wait); 1868 } 1869 1870 /* 1871 * All the vcpus in this vcore are idle, so wait for a decrementer 1872 * or external interrupt to one of the vcpus. vc->lock is held. 1873 */ 1874 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) 1875 { 1876 struct kvm_vcpu *vcpu; 1877 int do_sleep = 1; 1878 1879 DEFINE_WAIT(wait); 1880 1881 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); 1882 1883 /* 1884 * Check one last time for pending exceptions and ceded state after 1885 * we put ourselves on the wait queue 1886 */ 1887 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { 1888 if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) { 1889 do_sleep = 0; 1890 break; 1891 } 1892 } 1893 1894 if (!do_sleep) { 1895 finish_wait(&vc->wq, &wait); 1896 return; 1897 } 1898 1899 vc->vcore_state = VCORE_SLEEPING; 1900 trace_kvmppc_vcore_blocked(vc, 0); 1901 spin_unlock(&vc->lock); 1902 schedule(); 1903 finish_wait(&vc->wq, &wait); 1904 spin_lock(&vc->lock); 1905 vc->vcore_state = VCORE_INACTIVE; 1906 trace_kvmppc_vcore_blocked(vc, 1); 1907 } 1908 1909 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1910 { 1911 int n_ceded; 1912 struct kvmppc_vcore *vc; 1913 struct kvm_vcpu *v, *vn; 1914 1915 trace_kvmppc_run_vcpu_enter(vcpu); 1916 1917 kvm_run->exit_reason = 0; 1918 vcpu->arch.ret = RESUME_GUEST; 1919 vcpu->arch.trap = 0; 1920 kvmppc_update_vpas(vcpu); 1921 1922 /* 1923 * Synchronize with other threads in this virtual core 1924 */ 1925 vc = vcpu->arch.vcore; 1926 spin_lock(&vc->lock); 1927 vcpu->arch.ceded = 0; 1928 vcpu->arch.run_task = current; 1929 vcpu->arch.kvm_run = kvm_run; 1930 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); 1931 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; 1932 vcpu->arch.busy_preempt = TB_NIL; 1933 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); 1934 ++vc->n_runnable; 1935 1936 /* 1937 * This happens the first time this is called for a vcpu. 1938 * If the vcore is already running, we may be able to start 1939 * this thread straight away and have it join in. 1940 */ 1941 if (!signal_pending(current)) { 1942 if (vc->vcore_state == VCORE_RUNNING && 1943 VCORE_EXIT_COUNT(vc) == 0) { 1944 kvmppc_create_dtl_entry(vcpu, vc); 1945 kvmppc_start_thread(vcpu); 1946 trace_kvm_guest_enter(vcpu); 1947 } else if (vc->vcore_state == VCORE_SLEEPING) { 1948 wake_up(&vc->wq); 1949 } 1950 1951 } 1952 1953 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 1954 !signal_pending(current)) { 1955 if (vc->vcore_state != VCORE_INACTIVE) { 1956 spin_unlock(&vc->lock); 1957 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE); 1958 spin_lock(&vc->lock); 1959 continue; 1960 } 1961 list_for_each_entry_safe(v, vn, &vc->runnable_threads, 1962 arch.run_list) { 1963 kvmppc_core_prepare_to_enter(v); 1964 if (signal_pending(v->arch.run_task)) { 1965 kvmppc_remove_runnable(vc, v); 1966 v->stat.signal_exits++; 1967 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR; 1968 v->arch.ret = -EINTR; 1969 wake_up(&v->arch.cpu_run); 1970 } 1971 } 1972 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) 1973 break; 1974 vc->runner = vcpu; 1975 n_ceded = 0; 1976 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { 1977 if (!v->arch.pending_exceptions) 1978 n_ceded += v->arch.ceded; 1979 else 1980 v->arch.ceded = 0; 1981 } 1982 if (n_ceded == vc->n_runnable) 1983 kvmppc_vcore_blocked(vc); 1984 else 1985 kvmppc_run_core(vc); 1986 vc->runner = NULL; 1987 } 1988 1989 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && 1990 (vc->vcore_state == VCORE_RUNNING || 1991 vc->vcore_state == VCORE_EXITING)) { 1992 spin_unlock(&vc->lock); 1993 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); 1994 spin_lock(&vc->lock); 1995 } 1996 1997 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { 1998 kvmppc_remove_runnable(vc, vcpu); 1999 vcpu->stat.signal_exits++; 2000 kvm_run->exit_reason = KVM_EXIT_INTR; 2001 vcpu->arch.ret = -EINTR; 2002 } 2003 2004 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) { 2005 /* Wake up some vcpu to run the core */ 2006 v = list_first_entry(&vc->runnable_threads, 2007 struct kvm_vcpu, arch.run_list); 2008 wake_up(&v->arch.cpu_run); 2009 } 2010 2011 trace_kvmppc_run_vcpu_exit(vcpu, kvm_run); 2012 spin_unlock(&vc->lock); 2013 return vcpu->arch.ret; 2014 } 2015 2016 static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) 2017 { 2018 int r; 2019 int srcu_idx; 2020 2021 if (!vcpu->arch.sane) { 2022 run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 2023 return -EINVAL; 2024 } 2025 2026 kvmppc_core_prepare_to_enter(vcpu); 2027 2028 /* No need to go into the guest when all we'll do is come back out */ 2029 if (signal_pending(current)) { 2030 run->exit_reason = KVM_EXIT_INTR; 2031 return -EINTR; 2032 } 2033 2034 atomic_inc(&vcpu->kvm->arch.vcpus_running); 2035 /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */ 2036 smp_mb(); 2037 2038 /* On the first time here, set up HTAB and VRMA */ 2039 if (!vcpu->kvm->arch.rma_setup_done) { 2040 r = kvmppc_hv_setup_htab_rma(vcpu); 2041 if (r) 2042 goto out; 2043 } 2044 2045 flush_fp_to_thread(current); 2046 flush_altivec_to_thread(current); 2047 flush_vsx_to_thread(current); 2048 vcpu->arch.wqp = &vcpu->arch.vcore->wq; 2049 vcpu->arch.pgdir = current->mm->pgd; 2050 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; 2051 2052 do { 2053 r = kvmppc_run_vcpu(run, vcpu); 2054 2055 if (run->exit_reason == KVM_EXIT_PAPR_HCALL && 2056 !(vcpu->arch.shregs.msr & MSR_PR)) { 2057 trace_kvm_hcall_enter(vcpu); 2058 r = kvmppc_pseries_do_hcall(vcpu); 2059 trace_kvm_hcall_exit(vcpu, r); 2060 kvmppc_core_prepare_to_enter(vcpu); 2061 } else if (r == RESUME_PAGE_FAULT) { 2062 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 2063 r = kvmppc_book3s_hv_page_fault(run, vcpu, 2064 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 2065 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 2066 } 2067 } while (is_kvmppc_resume_guest(r)); 2068 2069 out: 2070 vcpu->arch.state = KVMPPC_VCPU_NOTREADY; 2071 atomic_dec(&vcpu->kvm->arch.vcpus_running); 2072 return r; 2073 } 2074 2075 static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps, 2076 int linux_psize) 2077 { 2078 struct mmu_psize_def *def = &mmu_psize_defs[linux_psize]; 2079 2080 if (!def->shift) 2081 return; 2082 (*sps)->page_shift = def->shift; 2083 (*sps)->slb_enc = def->sllp; 2084 (*sps)->enc[0].page_shift = def->shift; 2085 (*sps)->enc[0].pte_enc = def->penc[linux_psize]; 2086 /* 2087 * Add 16MB MPSS support if host supports it 2088 */ 2089 if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) { 2090 (*sps)->enc[1].page_shift = 24; 2091 (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M]; 2092 } 2093 (*sps)++; 2094 } 2095 2096 static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm, 2097 struct kvm_ppc_smmu_info *info) 2098 { 2099 struct kvm_ppc_one_seg_page_size *sps; 2100 2101 info->flags = KVM_PPC_PAGE_SIZES_REAL; 2102 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 2103 info->flags |= KVM_PPC_1T_SEGMENTS; 2104 info->slb_size = mmu_slb_size; 2105 2106 /* We only support these sizes for now, and no muti-size segments */ 2107 sps = &info->sps[0]; 2108 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K); 2109 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K); 2110 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M); 2111 2112 return 0; 2113 } 2114 2115 /* 2116 * Get (and clear) the dirty memory log for a memory slot. 2117 */ 2118 static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm, 2119 struct kvm_dirty_log *log) 2120 { 2121 struct kvm_memory_slot *memslot; 2122 int r; 2123 unsigned long n; 2124 2125 mutex_lock(&kvm->slots_lock); 2126 2127 r = -EINVAL; 2128 if (log->slot >= KVM_USER_MEM_SLOTS) 2129 goto out; 2130 2131 memslot = id_to_memslot(kvm->memslots, log->slot); 2132 r = -ENOENT; 2133 if (!memslot->dirty_bitmap) 2134 goto out; 2135 2136 n = kvm_dirty_bitmap_bytes(memslot); 2137 memset(memslot->dirty_bitmap, 0, n); 2138 2139 r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); 2140 if (r) 2141 goto out; 2142 2143 r = -EFAULT; 2144 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) 2145 goto out; 2146 2147 r = 0; 2148 out: 2149 mutex_unlock(&kvm->slots_lock); 2150 return r; 2151 } 2152 2153 static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, 2154 struct kvm_memory_slot *dont) 2155 { 2156 if (!dont || free->arch.rmap != dont->arch.rmap) { 2157 vfree(free->arch.rmap); 2158 free->arch.rmap = NULL; 2159 } 2160 } 2161 2162 static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot, 2163 unsigned long npages) 2164 { 2165 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); 2166 if (!slot->arch.rmap) 2167 return -ENOMEM; 2168 2169 return 0; 2170 } 2171 2172 static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, 2173 struct kvm_memory_slot *memslot, 2174 struct kvm_userspace_memory_region *mem) 2175 { 2176 return 0; 2177 } 2178 2179 static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, 2180 struct kvm_userspace_memory_region *mem, 2181 const struct kvm_memory_slot *old) 2182 { 2183 unsigned long npages = mem->memory_size >> PAGE_SHIFT; 2184 struct kvm_memory_slot *memslot; 2185 2186 if (npages && old->npages) { 2187 /* 2188 * If modifying a memslot, reset all the rmap dirty bits. 2189 * If this is a new memslot, we don't need to do anything 2190 * since the rmap array starts out as all zeroes, 2191 * i.e. no pages are dirty. 2192 */ 2193 memslot = id_to_memslot(kvm->memslots, mem->slot); 2194 kvmppc_hv_get_dirty_log(kvm, memslot, NULL); 2195 } 2196 } 2197 2198 /* 2199 * Update LPCR values in kvm->arch and in vcores. 2200 * Caller must hold kvm->lock. 2201 */ 2202 void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask) 2203 { 2204 long int i; 2205 u32 cores_done = 0; 2206 2207 if ((kvm->arch.lpcr & mask) == lpcr) 2208 return; 2209 2210 kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr; 2211 2212 for (i = 0; i < KVM_MAX_VCORES; ++i) { 2213 struct kvmppc_vcore *vc = kvm->arch.vcores[i]; 2214 if (!vc) 2215 continue; 2216 spin_lock(&vc->lock); 2217 vc->lpcr = (vc->lpcr & ~mask) | lpcr; 2218 spin_unlock(&vc->lock); 2219 if (++cores_done >= kvm->arch.online_vcores) 2220 break; 2221 } 2222 } 2223 2224 static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) 2225 { 2226 return; 2227 } 2228 2229 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) 2230 { 2231 int err = 0; 2232 struct kvm *kvm = vcpu->kvm; 2233 unsigned long hva; 2234 struct kvm_memory_slot *memslot; 2235 struct vm_area_struct *vma; 2236 unsigned long lpcr = 0, senc; 2237 unsigned long psize, porder; 2238 int srcu_idx; 2239 2240 mutex_lock(&kvm->lock); 2241 if (kvm->arch.rma_setup_done) 2242 goto out; /* another vcpu beat us to it */ 2243 2244 /* Allocate hashed page table (if not done already) and reset it */ 2245 if (!kvm->arch.hpt_virt) { 2246 err = kvmppc_alloc_hpt(kvm, NULL); 2247 if (err) { 2248 pr_err("KVM: Couldn't alloc HPT\n"); 2249 goto out; 2250 } 2251 } 2252 2253 /* Look up the memslot for guest physical address 0 */ 2254 srcu_idx = srcu_read_lock(&kvm->srcu); 2255 memslot = gfn_to_memslot(kvm, 0); 2256 2257 /* We must have some memory at 0 by now */ 2258 err = -EINVAL; 2259 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 2260 goto out_srcu; 2261 2262 /* Look up the VMA for the start of this memory slot */ 2263 hva = memslot->userspace_addr; 2264 down_read(¤t->mm->mmap_sem); 2265 vma = find_vma(current->mm, hva); 2266 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO)) 2267 goto up_out; 2268 2269 psize = vma_kernel_pagesize(vma); 2270 porder = __ilog2(psize); 2271 2272 up_read(¤t->mm->mmap_sem); 2273 2274 /* We can handle 4k, 64k or 16M pages in the VRMA */ 2275 err = -EINVAL; 2276 if (!(psize == 0x1000 || psize == 0x10000 || 2277 psize == 0x1000000)) 2278 goto out_srcu; 2279 2280 /* Update VRMASD field in the LPCR */ 2281 senc = slb_pgsize_encoding(psize); 2282 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | 2283 (VRMA_VSID << SLB_VSID_SHIFT_1T); 2284 /* the -4 is to account for senc values starting at 0x10 */ 2285 lpcr = senc << (LPCR_VRMASD_SH - 4); 2286 2287 /* Create HPTEs in the hash page table for the VRMA */ 2288 kvmppc_map_vrma(vcpu, memslot, porder); 2289 2290 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); 2291 2292 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ 2293 smp_wmb(); 2294 kvm->arch.rma_setup_done = 1; 2295 err = 0; 2296 out_srcu: 2297 srcu_read_unlock(&kvm->srcu, srcu_idx); 2298 out: 2299 mutex_unlock(&kvm->lock); 2300 return err; 2301 2302 up_out: 2303 up_read(¤t->mm->mmap_sem); 2304 goto out_srcu; 2305 } 2306 2307 static int kvmppc_core_init_vm_hv(struct kvm *kvm) 2308 { 2309 unsigned long lpcr, lpid; 2310 2311 /* Allocate the guest's logical partition ID */ 2312 2313 lpid = kvmppc_alloc_lpid(); 2314 if ((long)lpid < 0) 2315 return -ENOMEM; 2316 kvm->arch.lpid = lpid; 2317 2318 /* 2319 * Since we don't flush the TLB when tearing down a VM, 2320 * and this lpid might have previously been used, 2321 * make sure we flush on each core before running the new VM. 2322 */ 2323 cpumask_setall(&kvm->arch.need_tlb_flush); 2324 2325 /* Start out with the default set of hcalls enabled */ 2326 memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, 2327 sizeof(kvm->arch.enabled_hcalls)); 2328 2329 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); 2330 2331 /* Init LPCR for virtual RMA mode */ 2332 kvm->arch.host_lpid = mfspr(SPRN_LPID); 2333 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); 2334 lpcr &= LPCR_PECE | LPCR_LPES; 2335 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | 2336 LPCR_VPM0 | LPCR_VPM1; 2337 kvm->arch.vrma_slb_v = SLB_VSID_B_1T | 2338 (VRMA_VSID << SLB_VSID_SHIFT_1T); 2339 /* On POWER8 turn on online bit to enable PURR/SPURR */ 2340 if (cpu_has_feature(CPU_FTR_ARCH_207S)) 2341 lpcr |= LPCR_ONL; 2342 kvm->arch.lpcr = lpcr; 2343 2344 /* 2345 * Track that we now have a HV mode VM active. This blocks secondary 2346 * CPU threads from coming online. 2347 */ 2348 kvm_hv_vm_activated(); 2349 2350 return 0; 2351 } 2352 2353 static void kvmppc_free_vcores(struct kvm *kvm) 2354 { 2355 long int i; 2356 2357 for (i = 0; i < KVM_MAX_VCORES; ++i) { 2358 if (kvm->arch.vcores[i] && kvm->arch.vcores[i]->mpp_buffer) { 2359 struct kvmppc_vcore *vc = kvm->arch.vcores[i]; 2360 free_pages((unsigned long)vc->mpp_buffer, 2361 MPP_BUFFER_ORDER); 2362 } 2363 kfree(kvm->arch.vcores[i]); 2364 } 2365 kvm->arch.online_vcores = 0; 2366 } 2367 2368 static void kvmppc_core_destroy_vm_hv(struct kvm *kvm) 2369 { 2370 kvm_hv_vm_deactivated(); 2371 2372 kvmppc_free_vcores(kvm); 2373 2374 kvmppc_free_hpt(kvm); 2375 } 2376 2377 /* We don't need to emulate any privileged instructions or dcbz */ 2378 static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, 2379 unsigned int inst, int *advance) 2380 { 2381 return EMULATE_FAIL; 2382 } 2383 2384 static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn, 2385 ulong spr_val) 2386 { 2387 return EMULATE_FAIL; 2388 } 2389 2390 static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn, 2391 ulong *spr_val) 2392 { 2393 return EMULATE_FAIL; 2394 } 2395 2396 static int kvmppc_core_check_processor_compat_hv(void) 2397 { 2398 if (!cpu_has_feature(CPU_FTR_HVMODE) || 2399 !cpu_has_feature(CPU_FTR_ARCH_206)) 2400 return -EIO; 2401 return 0; 2402 } 2403 2404 static long kvm_arch_vm_ioctl_hv(struct file *filp, 2405 unsigned int ioctl, unsigned long arg) 2406 { 2407 struct kvm *kvm __maybe_unused = filp->private_data; 2408 void __user *argp = (void __user *)arg; 2409 long r; 2410 2411 switch (ioctl) { 2412 2413 case KVM_PPC_ALLOCATE_HTAB: { 2414 u32 htab_order; 2415 2416 r = -EFAULT; 2417 if (get_user(htab_order, (u32 __user *)argp)) 2418 break; 2419 r = kvmppc_alloc_reset_hpt(kvm, &htab_order); 2420 if (r) 2421 break; 2422 r = -EFAULT; 2423 if (put_user(htab_order, (u32 __user *)argp)) 2424 break; 2425 r = 0; 2426 break; 2427 } 2428 2429 case KVM_PPC_GET_HTAB_FD: { 2430 struct kvm_get_htab_fd ghf; 2431 2432 r = -EFAULT; 2433 if (copy_from_user(&ghf, argp, sizeof(ghf))) 2434 break; 2435 r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); 2436 break; 2437 } 2438 2439 default: 2440 r = -ENOTTY; 2441 } 2442 2443 return r; 2444 } 2445 2446 /* 2447 * List of hcall numbers to enable by default. 2448 * For compatibility with old userspace, we enable by default 2449 * all hcalls that were implemented before the hcall-enabling 2450 * facility was added. Note this list should not include H_RTAS. 2451 */ 2452 static unsigned int default_hcall_list[] = { 2453 H_REMOVE, 2454 H_ENTER, 2455 H_READ, 2456 H_PROTECT, 2457 H_BULK_REMOVE, 2458 H_GET_TCE, 2459 H_PUT_TCE, 2460 H_SET_DABR, 2461 H_SET_XDABR, 2462 H_CEDE, 2463 H_PROD, 2464 H_CONFER, 2465 H_REGISTER_VPA, 2466 #ifdef CONFIG_KVM_XICS 2467 H_EOI, 2468 H_CPPR, 2469 H_IPI, 2470 H_IPOLL, 2471 H_XIRR, 2472 H_XIRR_X, 2473 #endif 2474 0 2475 }; 2476 2477 static void init_default_hcalls(void) 2478 { 2479 int i; 2480 unsigned int hcall; 2481 2482 for (i = 0; default_hcall_list[i]; ++i) { 2483 hcall = default_hcall_list[i]; 2484 WARN_ON(!kvmppc_hcall_impl_hv(hcall)); 2485 __set_bit(hcall / 4, default_enabled_hcalls); 2486 } 2487 } 2488 2489 static struct kvmppc_ops kvm_ops_hv = { 2490 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, 2491 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, 2492 .get_one_reg = kvmppc_get_one_reg_hv, 2493 .set_one_reg = kvmppc_set_one_reg_hv, 2494 .vcpu_load = kvmppc_core_vcpu_load_hv, 2495 .vcpu_put = kvmppc_core_vcpu_put_hv, 2496 .set_msr = kvmppc_set_msr_hv, 2497 .vcpu_run = kvmppc_vcpu_run_hv, 2498 .vcpu_create = kvmppc_core_vcpu_create_hv, 2499 .vcpu_free = kvmppc_core_vcpu_free_hv, 2500 .check_requests = kvmppc_core_check_requests_hv, 2501 .get_dirty_log = kvm_vm_ioctl_get_dirty_log_hv, 2502 .flush_memslot = kvmppc_core_flush_memslot_hv, 2503 .prepare_memory_region = kvmppc_core_prepare_memory_region_hv, 2504 .commit_memory_region = kvmppc_core_commit_memory_region_hv, 2505 .unmap_hva = kvm_unmap_hva_hv, 2506 .unmap_hva_range = kvm_unmap_hva_range_hv, 2507 .age_hva = kvm_age_hva_hv, 2508 .test_age_hva = kvm_test_age_hva_hv, 2509 .set_spte_hva = kvm_set_spte_hva_hv, 2510 .mmu_destroy = kvmppc_mmu_destroy_hv, 2511 .free_memslot = kvmppc_core_free_memslot_hv, 2512 .create_memslot = kvmppc_core_create_memslot_hv, 2513 .init_vm = kvmppc_core_init_vm_hv, 2514 .destroy_vm = kvmppc_core_destroy_vm_hv, 2515 .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv, 2516 .emulate_op = kvmppc_core_emulate_op_hv, 2517 .emulate_mtspr = kvmppc_core_emulate_mtspr_hv, 2518 .emulate_mfspr = kvmppc_core_emulate_mfspr_hv, 2519 .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, 2520 .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, 2521 .hcall_implemented = kvmppc_hcall_impl_hv, 2522 }; 2523 2524 static int kvmppc_book3s_init_hv(void) 2525 { 2526 int r; 2527 /* 2528 * FIXME!! Do we need to check on all cpus ? 2529 */ 2530 r = kvmppc_core_check_processor_compat_hv(); 2531 if (r < 0) 2532 return -ENODEV; 2533 2534 kvm_ops_hv.owner = THIS_MODULE; 2535 kvmppc_hv_ops = &kvm_ops_hv; 2536 2537 init_default_hcalls(); 2538 2539 r = kvmppc_mmu_hv_init(); 2540 return r; 2541 } 2542 2543 static void kvmppc_book3s_exit_hv(void) 2544 { 2545 kvmppc_hv_ops = NULL; 2546 } 2547 2548 module_init(kvmppc_book3s_init_hv); 2549 module_exit(kvmppc_book3s_exit_hv); 2550 MODULE_LICENSE("GPL"); 2551 MODULE_ALIAS_MISCDEV(KVM_MINOR); 2552 MODULE_ALIAS("devname:kvm"); 2553