1 /* 2 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. 3 * 4 * Authors: 5 * Alexander Graf <agraf@suse.de> 6 * Kevin Wolf <mail@kevin-wolf.de> 7 * Paul Mackerras <paulus@samba.org> 8 * 9 * Description: 10 * Functions relating to running KVM on Book 3S processors where 11 * we don't have access to hypervisor mode, and we run the guest 12 * in problem state (user mode). 13 * 14 * This file is derived from arch/powerpc/kvm/44x.c, 15 * by Hollis Blanchard <hollisb@us.ibm.com>. 16 * 17 * This program is free software; you can redistribute it and/or modify 18 * it under the terms of the GNU General Public License, version 2, as 19 * published by the Free Software Foundation. 20 */ 21 22 #include <linux/kvm_host.h> 23 #include <linux/export.h> 24 #include <linux/err.h> 25 #include <linux/slab.h> 26 27 #include <asm/reg.h> 28 #include <asm/cputable.h> 29 #include <asm/cacheflush.h> 30 #include <asm/tlbflush.h> 31 #include <asm/uaccess.h> 32 #include <asm/io.h> 33 #include <asm/kvm_ppc.h> 34 #include <asm/kvm_book3s.h> 35 #include <asm/mmu_context.h> 36 #include <asm/switch_to.h> 37 #include <asm/firmware.h> 38 #include <asm/hvcall.h> 39 #include <linux/gfp.h> 40 #include <linux/sched.h> 41 #include <linux/vmalloc.h> 42 #include <linux/highmem.h> 43 #include <linux/module.h> 44 #include <linux/miscdevice.h> 45 46 #include "book3s.h" 47 48 #define CREATE_TRACE_POINTS 49 #include "trace_pr.h" 50 51 /* #define EXIT_DEBUG */ 52 /* #define DEBUG_EXT */ 53 54 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 55 ulong msr); 56 57 /* Some compatibility defines */ 58 #ifdef CONFIG_PPC_BOOK3S_32 59 #define MSR_USER32 MSR_USER 60 #define MSR_USER64 MSR_USER 61 #define HW_PAGE_SIZE PAGE_SIZE 62 #endif 63 64 static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) 65 { 66 #ifdef CONFIG_PPC_BOOK3S_64 67 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 68 memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb)); 69 svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; 70 svcpu->in_use = 0; 71 svcpu_put(svcpu); 72 #endif 73 vcpu->cpu = smp_processor_id(); 74 #ifdef CONFIG_PPC_BOOK3S_32 75 current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; 76 #endif 77 } 78 79 static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) 80 { 81 #ifdef CONFIG_PPC_BOOK3S_64 82 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 83 if (svcpu->in_use) { 84 kvmppc_copy_from_svcpu(vcpu, svcpu); 85 } 86 memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); 87 to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; 88 svcpu_put(svcpu); 89 #endif 90 91 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); 92 vcpu->cpu = -1; 93 } 94 95 /* Copy data needed by real-mode code from vcpu to shadow vcpu */ 96 void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, 97 struct kvm_vcpu *vcpu) 98 { 99 svcpu->gpr[0] = vcpu->arch.gpr[0]; 100 svcpu->gpr[1] = vcpu->arch.gpr[1]; 101 svcpu->gpr[2] = vcpu->arch.gpr[2]; 102 svcpu->gpr[3] = vcpu->arch.gpr[3]; 103 svcpu->gpr[4] = vcpu->arch.gpr[4]; 104 svcpu->gpr[5] = vcpu->arch.gpr[5]; 105 svcpu->gpr[6] = vcpu->arch.gpr[6]; 106 svcpu->gpr[7] = vcpu->arch.gpr[7]; 107 svcpu->gpr[8] = vcpu->arch.gpr[8]; 108 svcpu->gpr[9] = vcpu->arch.gpr[9]; 109 svcpu->gpr[10] = vcpu->arch.gpr[10]; 110 svcpu->gpr[11] = vcpu->arch.gpr[11]; 111 svcpu->gpr[12] = vcpu->arch.gpr[12]; 112 svcpu->gpr[13] = vcpu->arch.gpr[13]; 113 svcpu->cr = vcpu->arch.cr; 114 svcpu->xer = vcpu->arch.xer; 115 svcpu->ctr = vcpu->arch.ctr; 116 svcpu->lr = vcpu->arch.lr; 117 svcpu->pc = vcpu->arch.pc; 118 svcpu->in_use = true; 119 } 120 121 /* Copy data touched by real-mode code from shadow vcpu back to vcpu */ 122 void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, 123 struct kvmppc_book3s_shadow_vcpu *svcpu) 124 { 125 /* 126 * vcpu_put would just call us again because in_use hasn't 127 * been updated yet. 128 */ 129 preempt_disable(); 130 131 /* 132 * Maybe we were already preempted and synced the svcpu from 133 * our preempt notifiers. Don't bother touching this svcpu then. 134 */ 135 if (!svcpu->in_use) 136 goto out; 137 138 vcpu->arch.gpr[0] = svcpu->gpr[0]; 139 vcpu->arch.gpr[1] = svcpu->gpr[1]; 140 vcpu->arch.gpr[2] = svcpu->gpr[2]; 141 vcpu->arch.gpr[3] = svcpu->gpr[3]; 142 vcpu->arch.gpr[4] = svcpu->gpr[4]; 143 vcpu->arch.gpr[5] = svcpu->gpr[5]; 144 vcpu->arch.gpr[6] = svcpu->gpr[6]; 145 vcpu->arch.gpr[7] = svcpu->gpr[7]; 146 vcpu->arch.gpr[8] = svcpu->gpr[8]; 147 vcpu->arch.gpr[9] = svcpu->gpr[9]; 148 vcpu->arch.gpr[10] = svcpu->gpr[10]; 149 vcpu->arch.gpr[11] = svcpu->gpr[11]; 150 vcpu->arch.gpr[12] = svcpu->gpr[12]; 151 vcpu->arch.gpr[13] = svcpu->gpr[13]; 152 vcpu->arch.cr = svcpu->cr; 153 vcpu->arch.xer = svcpu->xer; 154 vcpu->arch.ctr = svcpu->ctr; 155 vcpu->arch.lr = svcpu->lr; 156 vcpu->arch.pc = svcpu->pc; 157 vcpu->arch.shadow_srr1 = svcpu->shadow_srr1; 158 vcpu->arch.fault_dar = svcpu->fault_dar; 159 vcpu->arch.fault_dsisr = svcpu->fault_dsisr; 160 vcpu->arch.last_inst = svcpu->last_inst; 161 svcpu->in_use = false; 162 163 out: 164 preempt_enable(); 165 } 166 167 static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) 168 { 169 int r = 1; /* Indicate we want to get back into the guest */ 170 171 /* We misuse TLB_FLUSH to indicate that we want to clear 172 all shadow cache entries */ 173 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) 174 kvmppc_mmu_pte_flush(vcpu, 0, 0); 175 176 return r; 177 } 178 179 /************* MMU Notifiers *************/ 180 static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start, 181 unsigned long end) 182 { 183 long i; 184 struct kvm_vcpu *vcpu; 185 struct kvm_memslots *slots; 186 struct kvm_memory_slot *memslot; 187 188 slots = kvm_memslots(kvm); 189 kvm_for_each_memslot(memslot, slots) { 190 unsigned long hva_start, hva_end; 191 gfn_t gfn, gfn_end; 192 193 hva_start = max(start, memslot->userspace_addr); 194 hva_end = min(end, memslot->userspace_addr + 195 (memslot->npages << PAGE_SHIFT)); 196 if (hva_start >= hva_end) 197 continue; 198 /* 199 * {gfn(page) | page intersects with [hva_start, hva_end)} = 200 * {gfn, gfn+1, ..., gfn_end-1}. 201 */ 202 gfn = hva_to_gfn_memslot(hva_start, memslot); 203 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 204 kvm_for_each_vcpu(i, vcpu, kvm) 205 kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT, 206 gfn_end << PAGE_SHIFT); 207 } 208 } 209 210 static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva) 211 { 212 trace_kvm_unmap_hva(hva); 213 214 do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); 215 216 return 0; 217 } 218 219 static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, 220 unsigned long end) 221 { 222 do_kvm_unmap_hva(kvm, start, end); 223 224 return 0; 225 } 226 227 static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) 228 { 229 /* XXX could be more clever ;) */ 230 return 0; 231 } 232 233 static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva) 234 { 235 /* XXX could be more clever ;) */ 236 return 0; 237 } 238 239 static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte) 240 { 241 /* The page will get remapped properly on its next fault */ 242 do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); 243 } 244 245 /*****************************************/ 246 247 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) 248 { 249 ulong smsr = vcpu->arch.shared->msr; 250 251 /* Guest MSR values */ 252 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE; 253 /* Process MSR values */ 254 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; 255 /* External providers the guest reserved */ 256 smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext); 257 /* 64-bit Process MSR values */ 258 #ifdef CONFIG_PPC_BOOK3S_64 259 smsr |= MSR_ISF | MSR_HV; 260 #endif 261 vcpu->arch.shadow_msr = smsr; 262 } 263 264 static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) 265 { 266 ulong old_msr = vcpu->arch.shared->msr; 267 268 #ifdef EXIT_DEBUG 269 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); 270 #endif 271 272 msr &= to_book3s(vcpu)->msr_mask; 273 vcpu->arch.shared->msr = msr; 274 kvmppc_recalc_shadow_msr(vcpu); 275 276 if (msr & MSR_POW) { 277 if (!vcpu->arch.pending_exceptions) { 278 kvm_vcpu_block(vcpu); 279 clear_bit(KVM_REQ_UNHALT, &vcpu->requests); 280 vcpu->stat.halt_wakeup++; 281 282 /* Unset POW bit after we woke up */ 283 msr &= ~MSR_POW; 284 vcpu->arch.shared->msr = msr; 285 } 286 } 287 288 if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) != 289 (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { 290 kvmppc_mmu_flush_segments(vcpu); 291 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 292 293 /* Preload magic page segment when in kernel mode */ 294 if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) { 295 struct kvm_vcpu_arch *a = &vcpu->arch; 296 297 if (msr & MSR_DR) 298 kvmppc_mmu_map_segment(vcpu, a->magic_page_ea); 299 else 300 kvmppc_mmu_map_segment(vcpu, a->magic_page_pa); 301 } 302 } 303 304 /* 305 * When switching from 32 to 64-bit, we may have a stale 32-bit 306 * magic page around, we need to flush it. Typically 32-bit magic 307 * page will be instanciated when calling into RTAS. Note: We 308 * assume that such transition only happens while in kernel mode, 309 * ie, we never transition from user 32-bit to kernel 64-bit with 310 * a 32-bit magic page around. 311 */ 312 if (vcpu->arch.magic_page_pa && 313 !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) { 314 /* going from RTAS to normal kernel code */ 315 kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa, 316 ~0xFFFUL); 317 } 318 319 /* Preload FPU if it's enabled */ 320 if (vcpu->arch.shared->msr & MSR_FP) 321 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 322 } 323 324 void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) 325 { 326 u32 host_pvr; 327 328 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; 329 vcpu->arch.pvr = pvr; 330 #ifdef CONFIG_PPC_BOOK3S_64 331 if ((pvr >= 0x330000) && (pvr < 0x70330000)) { 332 kvmppc_mmu_book3s_64_init(vcpu); 333 if (!to_book3s(vcpu)->hior_explicit) 334 to_book3s(vcpu)->hior = 0xfff00000; 335 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; 336 vcpu->arch.cpu_type = KVM_CPU_3S_64; 337 } else 338 #endif 339 { 340 kvmppc_mmu_book3s_32_init(vcpu); 341 if (!to_book3s(vcpu)->hior_explicit) 342 to_book3s(vcpu)->hior = 0; 343 to_book3s(vcpu)->msr_mask = 0xffffffffULL; 344 vcpu->arch.cpu_type = KVM_CPU_3S_32; 345 } 346 347 kvmppc_sanity_check(vcpu); 348 349 /* If we are in hypervisor level on 970, we can tell the CPU to 350 * treat DCBZ as 32 bytes store */ 351 vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; 352 if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) && 353 !strcmp(cur_cpu_spec->platform, "ppc970")) 354 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 355 356 /* Cell performs badly if MSR_FEx are set. So let's hope nobody 357 really needs them in a VM on Cell and force disable them. */ 358 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) 359 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); 360 361 /* 362 * If they're asking for POWER6 or later, set the flag 363 * indicating that we can do multiple large page sizes 364 * and 1TB segments. 365 * Also set the flag that indicates that tlbie has the large 366 * page bit in the RB operand instead of the instruction. 367 */ 368 switch (PVR_VER(pvr)) { 369 case PVR_POWER6: 370 case PVR_POWER7: 371 case PVR_POWER7p: 372 case PVR_POWER8: 373 vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | 374 BOOK3S_HFLAG_NEW_TLBIE; 375 break; 376 } 377 378 #ifdef CONFIG_PPC_BOOK3S_32 379 /* 32 bit Book3S always has 32 byte dcbz */ 380 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 381 #endif 382 383 /* On some CPUs we can execute paired single operations natively */ 384 asm ( "mfpvr %0" : "=r"(host_pvr)); 385 switch (host_pvr) { 386 case 0x00080200: /* lonestar 2.0 */ 387 case 0x00088202: /* lonestar 2.2 */ 388 case 0x70000100: /* gekko 1.0 */ 389 case 0x00080100: /* gekko 2.0 */ 390 case 0x00083203: /* gekko 2.3a */ 391 case 0x00083213: /* gekko 2.3b */ 392 case 0x00083204: /* gekko 2.4 */ 393 case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ 394 case 0x00087200: /* broadway */ 395 vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS; 396 /* Enable HID2.PSE - in case we need it later */ 397 mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29)); 398 } 399 } 400 401 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To 402 * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to 403 * emulate 32 bytes dcbz length. 404 * 405 * The Book3s_64 inventors also realized this case and implemented a special bit 406 * in the HID5 register, which is a hypervisor ressource. Thus we can't use it. 407 * 408 * My approach here is to patch the dcbz instruction on executing pages. 409 */ 410 static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) 411 { 412 struct page *hpage; 413 u64 hpage_offset; 414 u32 *page; 415 int i; 416 417 hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); 418 if (is_error_page(hpage)) 419 return; 420 421 hpage_offset = pte->raddr & ~PAGE_MASK; 422 hpage_offset &= ~0xFFFULL; 423 hpage_offset /= 4; 424 425 get_page(hpage); 426 page = kmap_atomic(hpage); 427 428 /* patch dcbz into reserved instruction, so we trap */ 429 for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) 430 if ((page[i] & 0xff0007ff) == INS_DCBZ) 431 page[i] &= 0xfffffff7; 432 433 kunmap_atomic(page); 434 put_page(hpage); 435 } 436 437 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) 438 { 439 ulong mp_pa = vcpu->arch.magic_page_pa; 440 441 if (!(vcpu->arch.shared->msr & MSR_SF)) 442 mp_pa = (uint32_t)mp_pa; 443 444 if (unlikely(mp_pa) && 445 unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { 446 return 1; 447 } 448 449 return kvm_is_visible_gfn(vcpu->kvm, gfn); 450 } 451 452 int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, 453 ulong eaddr, int vec) 454 { 455 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); 456 bool iswrite = false; 457 int r = RESUME_GUEST; 458 int relocated; 459 int page_found = 0; 460 struct kvmppc_pte pte; 461 bool is_mmio = false; 462 bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false; 463 bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false; 464 u64 vsid; 465 466 relocated = data ? dr : ir; 467 if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE)) 468 iswrite = true; 469 470 /* Resolve real address if translation turned on */ 471 if (relocated) { 472 page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite); 473 } else { 474 pte.may_execute = true; 475 pte.may_read = true; 476 pte.may_write = true; 477 pte.raddr = eaddr & KVM_PAM; 478 pte.eaddr = eaddr; 479 pte.vpage = eaddr >> 12; 480 pte.page_size = MMU_PAGE_64K; 481 } 482 483 switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) { 484 case 0: 485 pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); 486 break; 487 case MSR_DR: 488 case MSR_IR: 489 vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); 490 491 if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR) 492 pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); 493 else 494 pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); 495 pte.vpage |= vsid; 496 497 if (vsid == -1) 498 page_found = -EINVAL; 499 break; 500 } 501 502 if (vcpu->arch.mmu.is_dcbz32(vcpu) && 503 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { 504 /* 505 * If we do the dcbz hack, we have to NX on every execution, 506 * so we can patch the executing code. This renders our guest 507 * NX-less. 508 */ 509 pte.may_execute = !data; 510 } 511 512 if (page_found == -ENOENT) { 513 /* Page not found in guest PTE entries */ 514 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 515 vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr; 516 vcpu->arch.shared->msr |= 517 vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL; 518 kvmppc_book3s_queue_irqprio(vcpu, vec); 519 } else if (page_found == -EPERM) { 520 /* Storage protection */ 521 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 522 vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; 523 vcpu->arch.shared->dsisr |= DSISR_PROTFAULT; 524 vcpu->arch.shared->msr |= 525 vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL; 526 kvmppc_book3s_queue_irqprio(vcpu, vec); 527 } else if (page_found == -EINVAL) { 528 /* Page not found in guest SLB */ 529 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 530 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); 531 } else if (!is_mmio && 532 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { 533 if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { 534 /* 535 * There is already a host HPTE there, presumably 536 * a read-only one for a page the guest thinks 537 * is writable, so get rid of it first. 538 */ 539 kvmppc_mmu_unmap_page(vcpu, &pte); 540 } 541 /* The guest's PTE is not mapped yet. Map on the host */ 542 kvmppc_mmu_map_page(vcpu, &pte, iswrite); 543 if (data) 544 vcpu->stat.sp_storage++; 545 else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 546 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) 547 kvmppc_patch_dcbz(vcpu, &pte); 548 } else { 549 /* MMIO */ 550 vcpu->stat.mmio_exits++; 551 vcpu->arch.paddr_accessed = pte.raddr; 552 vcpu->arch.vaddr_accessed = pte.eaddr; 553 r = kvmppc_emulate_mmio(run, vcpu); 554 if ( r == RESUME_HOST_NV ) 555 r = RESUME_HOST; 556 } 557 558 return r; 559 } 560 561 static inline int get_fpr_index(int i) 562 { 563 return i * TS_FPRWIDTH; 564 } 565 566 /* Give up external provider (FPU, Altivec, VSX) */ 567 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) 568 { 569 struct thread_struct *t = ¤t->thread; 570 571 /* 572 * VSX instructions can access FP and vector registers, so if 573 * we are giving up VSX, make sure we give up FP and VMX as well. 574 */ 575 if (msr & MSR_VSX) 576 msr |= MSR_FP | MSR_VEC; 577 578 msr &= vcpu->arch.guest_owned_ext; 579 if (!msr) 580 return; 581 582 #ifdef DEBUG_EXT 583 printk(KERN_INFO "Giving up ext 0x%lx\n", msr); 584 #endif 585 586 if (msr & MSR_FP) { 587 /* 588 * Note that on CPUs with VSX, giveup_fpu stores 589 * both the traditional FP registers and the added VSX 590 * registers into thread.fp_state.fpr[]. 591 */ 592 if (t->regs->msr & MSR_FP) 593 giveup_fpu(current); 594 t->fp_save_area = NULL; 595 } 596 597 #ifdef CONFIG_ALTIVEC 598 if (msr & MSR_VEC) { 599 if (current->thread.regs->msr & MSR_VEC) 600 giveup_altivec(current); 601 t->vr_save_area = NULL; 602 } 603 #endif 604 605 vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX); 606 kvmppc_recalc_shadow_msr(vcpu); 607 } 608 609 static int kvmppc_read_inst(struct kvm_vcpu *vcpu) 610 { 611 ulong srr0 = kvmppc_get_pc(vcpu); 612 u32 last_inst = kvmppc_get_last_inst(vcpu); 613 int ret; 614 615 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); 616 if (ret == -ENOENT) { 617 ulong msr = vcpu->arch.shared->msr; 618 619 msr = kvmppc_set_field(msr, 33, 33, 1); 620 msr = kvmppc_set_field(msr, 34, 36, 0); 621 vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0); 622 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); 623 return EMULATE_AGAIN; 624 } 625 626 return EMULATE_DONE; 627 } 628 629 static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) 630 { 631 632 /* Need to do paired single emulation? */ 633 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) 634 return EMULATE_DONE; 635 636 /* Read out the instruction */ 637 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) 638 /* Need to emulate */ 639 return EMULATE_FAIL; 640 641 return EMULATE_AGAIN; 642 } 643 644 /* Handle external providers (FPU, Altivec, VSX) */ 645 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 646 ulong msr) 647 { 648 struct thread_struct *t = ¤t->thread; 649 650 /* When we have paired singles, we emulate in software */ 651 if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) 652 return RESUME_GUEST; 653 654 if (!(vcpu->arch.shared->msr & msr)) { 655 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 656 return RESUME_GUEST; 657 } 658 659 if (msr == MSR_VSX) { 660 /* No VSX? Give an illegal instruction interrupt */ 661 #ifdef CONFIG_VSX 662 if (!cpu_has_feature(CPU_FTR_VSX)) 663 #endif 664 { 665 kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 666 return RESUME_GUEST; 667 } 668 669 /* 670 * We have to load up all the FP and VMX registers before 671 * we can let the guest use VSX instructions. 672 */ 673 msr = MSR_FP | MSR_VEC | MSR_VSX; 674 } 675 676 /* See if we already own all the ext(s) needed */ 677 msr &= ~vcpu->arch.guest_owned_ext; 678 if (!msr) 679 return RESUME_GUEST; 680 681 #ifdef DEBUG_EXT 682 printk(KERN_INFO "Loading up ext 0x%lx\n", msr); 683 #endif 684 685 if (msr & MSR_FP) { 686 enable_kernel_fp(); 687 load_fp_state(&vcpu->arch.fp); 688 t->fp_save_area = &vcpu->arch.fp; 689 } 690 691 if (msr & MSR_VEC) { 692 #ifdef CONFIG_ALTIVEC 693 enable_kernel_altivec(); 694 load_vr_state(&vcpu->arch.vr); 695 t->vr_save_area = &vcpu->arch.vr; 696 #endif 697 } 698 699 t->regs->msr |= msr; 700 vcpu->arch.guest_owned_ext |= msr; 701 kvmppc_recalc_shadow_msr(vcpu); 702 703 return RESUME_GUEST; 704 } 705 706 /* 707 * Kernel code using FP or VMX could have flushed guest state to 708 * the thread_struct; if so, get it back now. 709 */ 710 static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) 711 { 712 unsigned long lost_ext; 713 714 lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr; 715 if (!lost_ext) 716 return; 717 718 if (lost_ext & MSR_FP) { 719 enable_kernel_fp(); 720 load_fp_state(&vcpu->arch.fp); 721 } 722 #ifdef CONFIG_ALTIVEC 723 if (lost_ext & MSR_VEC) { 724 enable_kernel_altivec(); 725 load_vr_state(&vcpu->arch.vr); 726 } 727 #endif 728 current->thread.regs->msr |= lost_ext; 729 } 730 731 int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, 732 unsigned int exit_nr) 733 { 734 int r = RESUME_HOST; 735 int s; 736 737 vcpu->stat.sum_exits++; 738 739 run->exit_reason = KVM_EXIT_UNKNOWN; 740 run->ready_for_interrupt_injection = 1; 741 742 /* We get here with MSR.EE=1 */ 743 744 trace_kvm_exit(exit_nr, vcpu); 745 kvm_guest_exit(); 746 747 switch (exit_nr) { 748 case BOOK3S_INTERRUPT_INST_STORAGE: 749 { 750 ulong shadow_srr1 = vcpu->arch.shadow_srr1; 751 vcpu->stat.pf_instruc++; 752 753 #ifdef CONFIG_PPC_BOOK3S_32 754 /* We set segments as unused segments when invalidating them. So 755 * treat the respective fault as segment fault. */ 756 { 757 struct kvmppc_book3s_shadow_vcpu *svcpu; 758 u32 sr; 759 760 svcpu = svcpu_get(vcpu); 761 sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]; 762 svcpu_put(svcpu); 763 if (sr == SR_INVALID) { 764 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 765 r = RESUME_GUEST; 766 break; 767 } 768 } 769 #endif 770 771 /* only care about PTEG not found errors, but leave NX alone */ 772 if (shadow_srr1 & 0x40000000) { 773 int idx = srcu_read_lock(&vcpu->kvm->srcu); 774 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); 775 srcu_read_unlock(&vcpu->kvm->srcu, idx); 776 vcpu->stat.sp_instruc++; 777 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 778 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { 779 /* 780 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page, 781 * so we can't use the NX bit inside the guest. Let's cross our fingers, 782 * that no guest that needs the dcbz hack does NX. 783 */ 784 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); 785 r = RESUME_GUEST; 786 } else { 787 vcpu->arch.shared->msr |= shadow_srr1 & 0x58000000; 788 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 789 r = RESUME_GUEST; 790 } 791 break; 792 } 793 case BOOK3S_INTERRUPT_DATA_STORAGE: 794 { 795 ulong dar = kvmppc_get_fault_dar(vcpu); 796 u32 fault_dsisr = vcpu->arch.fault_dsisr; 797 vcpu->stat.pf_storage++; 798 799 #ifdef CONFIG_PPC_BOOK3S_32 800 /* We set segments as unused segments when invalidating them. So 801 * treat the respective fault as segment fault. */ 802 { 803 struct kvmppc_book3s_shadow_vcpu *svcpu; 804 u32 sr; 805 806 svcpu = svcpu_get(vcpu); 807 sr = svcpu->sr[dar >> SID_SHIFT]; 808 svcpu_put(svcpu); 809 if (sr == SR_INVALID) { 810 kvmppc_mmu_map_segment(vcpu, dar); 811 r = RESUME_GUEST; 812 break; 813 } 814 } 815 #endif 816 817 /* 818 * We need to handle missing shadow PTEs, and 819 * protection faults due to us mapping a page read-only 820 * when the guest thinks it is writable. 821 */ 822 if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) { 823 int idx = srcu_read_lock(&vcpu->kvm->srcu); 824 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); 825 srcu_read_unlock(&vcpu->kvm->srcu, idx); 826 } else { 827 vcpu->arch.shared->dar = dar; 828 vcpu->arch.shared->dsisr = fault_dsisr; 829 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 830 r = RESUME_GUEST; 831 } 832 break; 833 } 834 case BOOK3S_INTERRUPT_DATA_SEGMENT: 835 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { 836 vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu); 837 kvmppc_book3s_queue_irqprio(vcpu, 838 BOOK3S_INTERRUPT_DATA_SEGMENT); 839 } 840 r = RESUME_GUEST; 841 break; 842 case BOOK3S_INTERRUPT_INST_SEGMENT: 843 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) { 844 kvmppc_book3s_queue_irqprio(vcpu, 845 BOOK3S_INTERRUPT_INST_SEGMENT); 846 } 847 r = RESUME_GUEST; 848 break; 849 /* We're good on these - the host merely wanted to get our attention */ 850 case BOOK3S_INTERRUPT_DECREMENTER: 851 case BOOK3S_INTERRUPT_HV_DECREMENTER: 852 case BOOK3S_INTERRUPT_DOORBELL: 853 vcpu->stat.dec_exits++; 854 r = RESUME_GUEST; 855 break; 856 case BOOK3S_INTERRUPT_EXTERNAL: 857 case BOOK3S_INTERRUPT_EXTERNAL_LEVEL: 858 case BOOK3S_INTERRUPT_EXTERNAL_HV: 859 vcpu->stat.ext_intr_exits++; 860 r = RESUME_GUEST; 861 break; 862 case BOOK3S_INTERRUPT_PERFMON: 863 r = RESUME_GUEST; 864 break; 865 case BOOK3S_INTERRUPT_PROGRAM: 866 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 867 { 868 enum emulation_result er; 869 ulong flags; 870 871 program_interrupt: 872 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; 873 874 if (vcpu->arch.shared->msr & MSR_PR) { 875 #ifdef EXIT_DEBUG 876 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); 877 #endif 878 if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != 879 (INS_DCBZ & 0xfffffff7)) { 880 kvmppc_core_queue_program(vcpu, flags); 881 r = RESUME_GUEST; 882 break; 883 } 884 } 885 886 vcpu->stat.emulated_inst_exits++; 887 er = kvmppc_emulate_instruction(run, vcpu); 888 switch (er) { 889 case EMULATE_DONE: 890 r = RESUME_GUEST_NV; 891 break; 892 case EMULATE_AGAIN: 893 r = RESUME_GUEST; 894 break; 895 case EMULATE_FAIL: 896 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", 897 __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); 898 kvmppc_core_queue_program(vcpu, flags); 899 r = RESUME_GUEST; 900 break; 901 case EMULATE_DO_MMIO: 902 run->exit_reason = KVM_EXIT_MMIO; 903 r = RESUME_HOST_NV; 904 break; 905 case EMULATE_EXIT_USER: 906 r = RESUME_HOST_NV; 907 break; 908 default: 909 BUG(); 910 } 911 break; 912 } 913 case BOOK3S_INTERRUPT_SYSCALL: 914 if (vcpu->arch.papr_enabled && 915 (kvmppc_get_last_sc(vcpu) == 0x44000022) && 916 !(vcpu->arch.shared->msr & MSR_PR)) { 917 /* SC 1 papr hypercalls */ 918 ulong cmd = kvmppc_get_gpr(vcpu, 3); 919 int i; 920 921 #ifdef CONFIG_PPC_BOOK3S_64 922 if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { 923 r = RESUME_GUEST; 924 break; 925 } 926 #endif 927 928 run->papr_hcall.nr = cmd; 929 for (i = 0; i < 9; ++i) { 930 ulong gpr = kvmppc_get_gpr(vcpu, 4 + i); 931 run->papr_hcall.args[i] = gpr; 932 } 933 run->exit_reason = KVM_EXIT_PAPR_HCALL; 934 vcpu->arch.hcall_needed = 1; 935 r = RESUME_HOST; 936 } else if (vcpu->arch.osi_enabled && 937 (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && 938 (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { 939 /* MOL hypercalls */ 940 u64 *gprs = run->osi.gprs; 941 int i; 942 943 run->exit_reason = KVM_EXIT_OSI; 944 for (i = 0; i < 32; i++) 945 gprs[i] = kvmppc_get_gpr(vcpu, i); 946 vcpu->arch.osi_needed = 1; 947 r = RESUME_HOST_NV; 948 } else if (!(vcpu->arch.shared->msr & MSR_PR) && 949 (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { 950 /* KVM PV hypercalls */ 951 kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); 952 r = RESUME_GUEST; 953 } else { 954 /* Guest syscalls */ 955 vcpu->stat.syscall_exits++; 956 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 957 r = RESUME_GUEST; 958 } 959 break; 960 case BOOK3S_INTERRUPT_FP_UNAVAIL: 961 case BOOK3S_INTERRUPT_ALTIVEC: 962 case BOOK3S_INTERRUPT_VSX: 963 { 964 int ext_msr = 0; 965 966 switch (exit_nr) { 967 case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; 968 case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; 969 case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; 970 } 971 972 switch (kvmppc_check_ext(vcpu, exit_nr)) { 973 case EMULATE_DONE: 974 /* everything ok - let's enable the ext */ 975 r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); 976 break; 977 case EMULATE_FAIL: 978 /* we need to emulate this instruction */ 979 goto program_interrupt; 980 break; 981 default: 982 /* nothing to worry about - go again */ 983 break; 984 } 985 break; 986 } 987 case BOOK3S_INTERRUPT_ALIGNMENT: 988 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { 989 vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu, 990 kvmppc_get_last_inst(vcpu)); 991 vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu, 992 kvmppc_get_last_inst(vcpu)); 993 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 994 } 995 r = RESUME_GUEST; 996 break; 997 case BOOK3S_INTERRUPT_MACHINE_CHECK: 998 case BOOK3S_INTERRUPT_TRACE: 999 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 1000 r = RESUME_GUEST; 1001 break; 1002 default: 1003 { 1004 ulong shadow_srr1 = vcpu->arch.shadow_srr1; 1005 /* Ugh - bork here! What did we get? */ 1006 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 1007 exit_nr, kvmppc_get_pc(vcpu), shadow_srr1); 1008 r = RESUME_HOST; 1009 BUG(); 1010 break; 1011 } 1012 } 1013 1014 if (!(r & RESUME_HOST)) { 1015 /* To avoid clobbering exit_reason, only check for signals if 1016 * we aren't already exiting to userspace for some other 1017 * reason. */ 1018 1019 /* 1020 * Interrupts could be timers for the guest which we have to 1021 * inject again, so let's postpone them until we're in the guest 1022 * and if we really did time things so badly, then we just exit 1023 * again due to a host external interrupt. 1024 */ 1025 s = kvmppc_prepare_to_enter(vcpu); 1026 if (s <= 0) 1027 r = s; 1028 else { 1029 /* interrupts now hard-disabled */ 1030 kvmppc_fix_ee_before_entry(); 1031 } 1032 1033 kvmppc_handle_lost_ext(vcpu); 1034 } 1035 1036 trace_kvm_book3s_reenter(r, vcpu); 1037 1038 return r; 1039 } 1040 1041 static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu, 1042 struct kvm_sregs *sregs) 1043 { 1044 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1045 int i; 1046 1047 sregs->pvr = vcpu->arch.pvr; 1048 1049 sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; 1050 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { 1051 for (i = 0; i < 64; i++) { 1052 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i; 1053 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; 1054 } 1055 } else { 1056 for (i = 0; i < 16; i++) 1057 sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i]; 1058 1059 for (i = 0; i < 8; i++) { 1060 sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw; 1061 sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; 1062 } 1063 } 1064 1065 return 0; 1066 } 1067 1068 static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu, 1069 struct kvm_sregs *sregs) 1070 { 1071 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1072 int i; 1073 1074 kvmppc_set_pvr_pr(vcpu, sregs->pvr); 1075 1076 vcpu3s->sdr1 = sregs->u.s.sdr1; 1077 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { 1078 for (i = 0; i < 64; i++) { 1079 vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv, 1080 sregs->u.s.ppc64.slb[i].slbe); 1081 } 1082 } else { 1083 for (i = 0; i < 16; i++) { 1084 vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]); 1085 } 1086 for (i = 0; i < 8; i++) { 1087 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false, 1088 (u32)sregs->u.s.ppc32.ibat[i]); 1089 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true, 1090 (u32)(sregs->u.s.ppc32.ibat[i] >> 32)); 1091 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false, 1092 (u32)sregs->u.s.ppc32.dbat[i]); 1093 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true, 1094 (u32)(sregs->u.s.ppc32.dbat[i] >> 32)); 1095 } 1096 } 1097 1098 /* Flush the MMU after messing with the segments */ 1099 kvmppc_mmu_pte_flush(vcpu, 0, 0); 1100 1101 return 0; 1102 } 1103 1104 static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, 1105 union kvmppc_one_reg *val) 1106 { 1107 int r = 0; 1108 1109 switch (id) { 1110 case KVM_REG_PPC_HIOR: 1111 *val = get_reg_val(id, to_book3s(vcpu)->hior); 1112 break; 1113 default: 1114 r = -EINVAL; 1115 break; 1116 } 1117 1118 return r; 1119 } 1120 1121 static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, 1122 union kvmppc_one_reg *val) 1123 { 1124 int r = 0; 1125 1126 switch (id) { 1127 case KVM_REG_PPC_HIOR: 1128 to_book3s(vcpu)->hior = set_reg_val(id, *val); 1129 to_book3s(vcpu)->hior_explicit = true; 1130 break; 1131 default: 1132 r = -EINVAL; 1133 break; 1134 } 1135 1136 return r; 1137 } 1138 1139 static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, 1140 unsigned int id) 1141 { 1142 struct kvmppc_vcpu_book3s *vcpu_book3s; 1143 struct kvm_vcpu *vcpu; 1144 int err = -ENOMEM; 1145 unsigned long p; 1146 1147 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 1148 if (!vcpu) 1149 goto out; 1150 1151 vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); 1152 if (!vcpu_book3s) 1153 goto free_vcpu; 1154 vcpu->arch.book3s = vcpu_book3s; 1155 1156 #ifdef CONFIG_KVM_BOOK3S_32 1157 vcpu->arch.shadow_vcpu = 1158 kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL); 1159 if (!vcpu->arch.shadow_vcpu) 1160 goto free_vcpu3s; 1161 #endif 1162 1163 err = kvm_vcpu_init(vcpu, kvm, id); 1164 if (err) 1165 goto free_shadow_vcpu; 1166 1167 err = -ENOMEM; 1168 p = __get_free_page(GFP_KERNEL|__GFP_ZERO); 1169 if (!p) 1170 goto uninit_vcpu; 1171 /* the real shared page fills the last 4k of our page */ 1172 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); 1173 1174 #ifdef CONFIG_PPC_BOOK3S_64 1175 /* 1176 * Default to the same as the host if we're on sufficiently 1177 * recent machine that we have 1TB segments; 1178 * otherwise default to PPC970FX. 1179 */ 1180 vcpu->arch.pvr = 0x3C0301; 1181 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 1182 vcpu->arch.pvr = mfspr(SPRN_PVR); 1183 #else 1184 /* default to book3s_32 (750) */ 1185 vcpu->arch.pvr = 0x84202; 1186 #endif 1187 kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr); 1188 vcpu->arch.slb_nr = 64; 1189 1190 vcpu->arch.shadow_msr = MSR_USER64; 1191 1192 err = kvmppc_mmu_init(vcpu); 1193 if (err < 0) 1194 goto uninit_vcpu; 1195 1196 return vcpu; 1197 1198 uninit_vcpu: 1199 kvm_vcpu_uninit(vcpu); 1200 free_shadow_vcpu: 1201 #ifdef CONFIG_KVM_BOOK3S_32 1202 kfree(vcpu->arch.shadow_vcpu); 1203 free_vcpu3s: 1204 #endif 1205 vfree(vcpu_book3s); 1206 free_vcpu: 1207 kmem_cache_free(kvm_vcpu_cache, vcpu); 1208 out: 1209 return ERR_PTR(err); 1210 } 1211 1212 static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) 1213 { 1214 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 1215 1216 free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); 1217 kvm_vcpu_uninit(vcpu); 1218 #ifdef CONFIG_KVM_BOOK3S_32 1219 kfree(vcpu->arch.shadow_vcpu); 1220 #endif 1221 vfree(vcpu_book3s); 1222 kmem_cache_free(kvm_vcpu_cache, vcpu); 1223 } 1224 1225 static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1226 { 1227 int ret; 1228 #ifdef CONFIG_ALTIVEC 1229 unsigned long uninitialized_var(vrsave); 1230 #endif 1231 1232 /* Check if we can run the vcpu at all */ 1233 if (!vcpu->arch.sane) { 1234 kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1235 ret = -EINVAL; 1236 goto out; 1237 } 1238 1239 /* 1240 * Interrupts could be timers for the guest which we have to inject 1241 * again, so let's postpone them until we're in the guest and if we 1242 * really did time things so badly, then we just exit again due to 1243 * a host external interrupt. 1244 */ 1245 ret = kvmppc_prepare_to_enter(vcpu); 1246 if (ret <= 0) 1247 goto out; 1248 /* interrupts now hard-disabled */ 1249 1250 /* Save FPU state in thread_struct */ 1251 if (current->thread.regs->msr & MSR_FP) 1252 giveup_fpu(current); 1253 1254 #ifdef CONFIG_ALTIVEC 1255 /* Save Altivec state in thread_struct */ 1256 if (current->thread.regs->msr & MSR_VEC) 1257 giveup_altivec(current); 1258 #endif 1259 1260 #ifdef CONFIG_VSX 1261 /* Save VSX state in thread_struct */ 1262 if (current->thread.regs->msr & MSR_VSX) 1263 __giveup_vsx(current); 1264 #endif 1265 1266 /* Preload FPU if it's enabled */ 1267 if (vcpu->arch.shared->msr & MSR_FP) 1268 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 1269 1270 kvmppc_fix_ee_before_entry(); 1271 1272 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 1273 1274 /* No need for kvm_guest_exit. It's done in handle_exit. 1275 We also get here with interrupts enabled. */ 1276 1277 /* Make sure we save the guest FPU/Altivec/VSX state */ 1278 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); 1279 1280 out: 1281 vcpu->mode = OUTSIDE_GUEST_MODE; 1282 return ret; 1283 } 1284 1285 /* 1286 * Get (and clear) the dirty memory log for a memory slot. 1287 */ 1288 static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, 1289 struct kvm_dirty_log *log) 1290 { 1291 struct kvm_memory_slot *memslot; 1292 struct kvm_vcpu *vcpu; 1293 ulong ga, ga_end; 1294 int is_dirty = 0; 1295 int r; 1296 unsigned long n; 1297 1298 mutex_lock(&kvm->slots_lock); 1299 1300 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1301 if (r) 1302 goto out; 1303 1304 /* If nothing is dirty, don't bother messing with page tables. */ 1305 if (is_dirty) { 1306 memslot = id_to_memslot(kvm->memslots, log->slot); 1307 1308 ga = memslot->base_gfn << PAGE_SHIFT; 1309 ga_end = ga + (memslot->npages << PAGE_SHIFT); 1310 1311 kvm_for_each_vcpu(n, vcpu, kvm) 1312 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); 1313 1314 n = kvm_dirty_bitmap_bytes(memslot); 1315 memset(memslot->dirty_bitmap, 0, n); 1316 } 1317 1318 r = 0; 1319 out: 1320 mutex_unlock(&kvm->slots_lock); 1321 return r; 1322 } 1323 1324 static void kvmppc_core_flush_memslot_pr(struct kvm *kvm, 1325 struct kvm_memory_slot *memslot) 1326 { 1327 return; 1328 } 1329 1330 static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, 1331 struct kvm_memory_slot *memslot, 1332 struct kvm_userspace_memory_region *mem) 1333 { 1334 return 0; 1335 } 1336 1337 static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, 1338 struct kvm_userspace_memory_region *mem, 1339 const struct kvm_memory_slot *old) 1340 { 1341 return; 1342 } 1343 1344 static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free, 1345 struct kvm_memory_slot *dont) 1346 { 1347 return; 1348 } 1349 1350 static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot, 1351 unsigned long npages) 1352 { 1353 return 0; 1354 } 1355 1356 1357 #ifdef CONFIG_PPC64 1358 static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, 1359 struct kvm_ppc_smmu_info *info) 1360 { 1361 long int i; 1362 struct kvm_vcpu *vcpu; 1363 1364 info->flags = 0; 1365 1366 /* SLB is always 64 entries */ 1367 info->slb_size = 64; 1368 1369 /* Standard 4k base page size segment */ 1370 info->sps[0].page_shift = 12; 1371 info->sps[0].slb_enc = 0; 1372 info->sps[0].enc[0].page_shift = 12; 1373 info->sps[0].enc[0].pte_enc = 0; 1374 1375 /* 1376 * 64k large page size. 1377 * We only want to put this in if the CPUs we're emulating 1378 * support it, but unfortunately we don't have a vcpu easily 1379 * to hand here to test. Just pick the first vcpu, and if 1380 * that doesn't exist yet, report the minimum capability, 1381 * i.e., no 64k pages. 1382 * 1T segment support goes along with 64k pages. 1383 */ 1384 i = 1; 1385 vcpu = kvm_get_vcpu(kvm, 0); 1386 if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) { 1387 info->flags = KVM_PPC_1T_SEGMENTS; 1388 info->sps[i].page_shift = 16; 1389 info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01; 1390 info->sps[i].enc[0].page_shift = 16; 1391 info->sps[i].enc[0].pte_enc = 1; 1392 ++i; 1393 } 1394 1395 /* Standard 16M large page size segment */ 1396 info->sps[i].page_shift = 24; 1397 info->sps[i].slb_enc = SLB_VSID_L; 1398 info->sps[i].enc[0].page_shift = 24; 1399 info->sps[i].enc[0].pte_enc = 0; 1400 1401 return 0; 1402 } 1403 #else 1404 static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, 1405 struct kvm_ppc_smmu_info *info) 1406 { 1407 /* We should not get called */ 1408 BUG(); 1409 } 1410 #endif /* CONFIG_PPC64 */ 1411 1412 static unsigned int kvm_global_user_count = 0; 1413 static DEFINE_SPINLOCK(kvm_global_user_count_lock); 1414 1415 static int kvmppc_core_init_vm_pr(struct kvm *kvm) 1416 { 1417 mutex_init(&kvm->arch.hpt_mutex); 1418 1419 if (firmware_has_feature(FW_FEATURE_SET_MODE)) { 1420 spin_lock(&kvm_global_user_count_lock); 1421 if (++kvm_global_user_count == 1) 1422 pSeries_disable_reloc_on_exc(); 1423 spin_unlock(&kvm_global_user_count_lock); 1424 } 1425 return 0; 1426 } 1427 1428 static void kvmppc_core_destroy_vm_pr(struct kvm *kvm) 1429 { 1430 #ifdef CONFIG_PPC64 1431 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 1432 #endif 1433 1434 if (firmware_has_feature(FW_FEATURE_SET_MODE)) { 1435 spin_lock(&kvm_global_user_count_lock); 1436 BUG_ON(kvm_global_user_count == 0); 1437 if (--kvm_global_user_count == 0) 1438 pSeries_enable_reloc_on_exc(); 1439 spin_unlock(&kvm_global_user_count_lock); 1440 } 1441 } 1442 1443 static int kvmppc_core_check_processor_compat_pr(void) 1444 { 1445 /* we are always compatible */ 1446 return 0; 1447 } 1448 1449 static long kvm_arch_vm_ioctl_pr(struct file *filp, 1450 unsigned int ioctl, unsigned long arg) 1451 { 1452 return -ENOTTY; 1453 } 1454 1455 static struct kvmppc_ops kvm_ops_pr = { 1456 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr, 1457 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr, 1458 .get_one_reg = kvmppc_get_one_reg_pr, 1459 .set_one_reg = kvmppc_set_one_reg_pr, 1460 .vcpu_load = kvmppc_core_vcpu_load_pr, 1461 .vcpu_put = kvmppc_core_vcpu_put_pr, 1462 .set_msr = kvmppc_set_msr_pr, 1463 .vcpu_run = kvmppc_vcpu_run_pr, 1464 .vcpu_create = kvmppc_core_vcpu_create_pr, 1465 .vcpu_free = kvmppc_core_vcpu_free_pr, 1466 .check_requests = kvmppc_core_check_requests_pr, 1467 .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr, 1468 .flush_memslot = kvmppc_core_flush_memslot_pr, 1469 .prepare_memory_region = kvmppc_core_prepare_memory_region_pr, 1470 .commit_memory_region = kvmppc_core_commit_memory_region_pr, 1471 .unmap_hva = kvm_unmap_hva_pr, 1472 .unmap_hva_range = kvm_unmap_hva_range_pr, 1473 .age_hva = kvm_age_hva_pr, 1474 .test_age_hva = kvm_test_age_hva_pr, 1475 .set_spte_hva = kvm_set_spte_hva_pr, 1476 .mmu_destroy = kvmppc_mmu_destroy_pr, 1477 .free_memslot = kvmppc_core_free_memslot_pr, 1478 .create_memslot = kvmppc_core_create_memslot_pr, 1479 .init_vm = kvmppc_core_init_vm_pr, 1480 .destroy_vm = kvmppc_core_destroy_vm_pr, 1481 .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr, 1482 .emulate_op = kvmppc_core_emulate_op_pr, 1483 .emulate_mtspr = kvmppc_core_emulate_mtspr_pr, 1484 .emulate_mfspr = kvmppc_core_emulate_mfspr_pr, 1485 .fast_vcpu_kick = kvm_vcpu_kick, 1486 .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, 1487 }; 1488 1489 1490 int kvmppc_book3s_init_pr(void) 1491 { 1492 int r; 1493 1494 r = kvmppc_core_check_processor_compat_pr(); 1495 if (r < 0) 1496 return r; 1497 1498 kvm_ops_pr.owner = THIS_MODULE; 1499 kvmppc_pr_ops = &kvm_ops_pr; 1500 1501 r = kvmppc_mmu_hpte_sysinit(); 1502 return r; 1503 } 1504 1505 void kvmppc_book3s_exit_pr(void) 1506 { 1507 kvmppc_pr_ops = NULL; 1508 kvmppc_mmu_hpte_sysexit(); 1509 } 1510 1511 /* 1512 * We only support separate modules for book3s 64 1513 */ 1514 #ifdef CONFIG_PPC_BOOK3S_64 1515 1516 module_init(kvmppc_book3s_init_pr); 1517 module_exit(kvmppc_book3s_exit_pr); 1518 1519 MODULE_LICENSE("GPL"); 1520 MODULE_ALIAS_MISCDEV(KVM_MINOR); 1521 MODULE_ALIAS("devname:kvm"); 1522 #endif 1523