1 /* 2 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved. 3 * 4 * Authors: 5 * Alexander Graf <agraf@suse.de> 6 * Kevin Wolf <mail@kevin-wolf.de> 7 * Paul Mackerras <paulus@samba.org> 8 * 9 * Description: 10 * Functions relating to running KVM on Book 3S processors where 11 * we don't have access to hypervisor mode, and we run the guest 12 * in problem state (user mode). 13 * 14 * This file is derived from arch/powerpc/kvm/44x.c, 15 * by Hollis Blanchard <hollisb@us.ibm.com>. 16 * 17 * This program is free software; you can redistribute it and/or modify 18 * it under the terms of the GNU General Public License, version 2, as 19 * published by the Free Software Foundation. 20 */ 21 22 #include <linux/kvm_host.h> 23 #include <linux/export.h> 24 #include <linux/err.h> 25 #include <linux/slab.h> 26 27 #include <asm/reg.h> 28 #include <asm/cputable.h> 29 #include <asm/cacheflush.h> 30 #include <asm/tlbflush.h> 31 #include <asm/uaccess.h> 32 #include <asm/io.h> 33 #include <asm/kvm_ppc.h> 34 #include <asm/kvm_book3s.h> 35 #include <asm/mmu_context.h> 36 #include <asm/switch_to.h> 37 #include <asm/firmware.h> 38 #include <asm/hvcall.h> 39 #include <linux/gfp.h> 40 #include <linux/sched.h> 41 #include <linux/vmalloc.h> 42 #include <linux/highmem.h> 43 #include <linux/module.h> 44 #include <linux/miscdevice.h> 45 46 #include "book3s.h" 47 48 #define CREATE_TRACE_POINTS 49 #include "trace_pr.h" 50 51 /* #define EXIT_DEBUG */ 52 /* #define DEBUG_EXT */ 53 54 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 55 ulong msr); 56 static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); 57 58 /* Some compatibility defines */ 59 #ifdef CONFIG_PPC_BOOK3S_32 60 #define MSR_USER32 MSR_USER 61 #define MSR_USER64 MSR_USER 62 #define HW_PAGE_SIZE PAGE_SIZE 63 #endif 64 65 static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) 66 { 67 #ifdef CONFIG_PPC_BOOK3S_64 68 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 69 memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb)); 70 svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; 71 svcpu->in_use = 0; 72 svcpu_put(svcpu); 73 #endif 74 vcpu->cpu = smp_processor_id(); 75 #ifdef CONFIG_PPC_BOOK3S_32 76 current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu; 77 #endif 78 } 79 80 static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) 81 { 82 #ifdef CONFIG_PPC_BOOK3S_64 83 struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu); 84 if (svcpu->in_use) { 85 kvmppc_copy_from_svcpu(vcpu, svcpu); 86 } 87 memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); 88 to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; 89 svcpu_put(svcpu); 90 #endif 91 92 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); 93 kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); 94 vcpu->cpu = -1; 95 } 96 97 /* Copy data needed by real-mode code from vcpu to shadow vcpu */ 98 void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu, 99 struct kvm_vcpu *vcpu) 100 { 101 svcpu->gpr[0] = vcpu->arch.gpr[0]; 102 svcpu->gpr[1] = vcpu->arch.gpr[1]; 103 svcpu->gpr[2] = vcpu->arch.gpr[2]; 104 svcpu->gpr[3] = vcpu->arch.gpr[3]; 105 svcpu->gpr[4] = vcpu->arch.gpr[4]; 106 svcpu->gpr[5] = vcpu->arch.gpr[5]; 107 svcpu->gpr[6] = vcpu->arch.gpr[6]; 108 svcpu->gpr[7] = vcpu->arch.gpr[7]; 109 svcpu->gpr[8] = vcpu->arch.gpr[8]; 110 svcpu->gpr[9] = vcpu->arch.gpr[9]; 111 svcpu->gpr[10] = vcpu->arch.gpr[10]; 112 svcpu->gpr[11] = vcpu->arch.gpr[11]; 113 svcpu->gpr[12] = vcpu->arch.gpr[12]; 114 svcpu->gpr[13] = vcpu->arch.gpr[13]; 115 svcpu->cr = vcpu->arch.cr; 116 svcpu->xer = vcpu->arch.xer; 117 svcpu->ctr = vcpu->arch.ctr; 118 svcpu->lr = vcpu->arch.lr; 119 svcpu->pc = vcpu->arch.pc; 120 #ifdef CONFIG_PPC_BOOK3S_64 121 svcpu->shadow_fscr = vcpu->arch.shadow_fscr; 122 #endif 123 svcpu->in_use = true; 124 } 125 126 /* Copy data touched by real-mode code from shadow vcpu back to vcpu */ 127 void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu, 128 struct kvmppc_book3s_shadow_vcpu *svcpu) 129 { 130 /* 131 * vcpu_put would just call us again because in_use hasn't 132 * been updated yet. 133 */ 134 preempt_disable(); 135 136 /* 137 * Maybe we were already preempted and synced the svcpu from 138 * our preempt notifiers. Don't bother touching this svcpu then. 139 */ 140 if (!svcpu->in_use) 141 goto out; 142 143 vcpu->arch.gpr[0] = svcpu->gpr[0]; 144 vcpu->arch.gpr[1] = svcpu->gpr[1]; 145 vcpu->arch.gpr[2] = svcpu->gpr[2]; 146 vcpu->arch.gpr[3] = svcpu->gpr[3]; 147 vcpu->arch.gpr[4] = svcpu->gpr[4]; 148 vcpu->arch.gpr[5] = svcpu->gpr[5]; 149 vcpu->arch.gpr[6] = svcpu->gpr[6]; 150 vcpu->arch.gpr[7] = svcpu->gpr[7]; 151 vcpu->arch.gpr[8] = svcpu->gpr[8]; 152 vcpu->arch.gpr[9] = svcpu->gpr[9]; 153 vcpu->arch.gpr[10] = svcpu->gpr[10]; 154 vcpu->arch.gpr[11] = svcpu->gpr[11]; 155 vcpu->arch.gpr[12] = svcpu->gpr[12]; 156 vcpu->arch.gpr[13] = svcpu->gpr[13]; 157 vcpu->arch.cr = svcpu->cr; 158 vcpu->arch.xer = svcpu->xer; 159 vcpu->arch.ctr = svcpu->ctr; 160 vcpu->arch.lr = svcpu->lr; 161 vcpu->arch.pc = svcpu->pc; 162 vcpu->arch.shadow_srr1 = svcpu->shadow_srr1; 163 vcpu->arch.fault_dar = svcpu->fault_dar; 164 vcpu->arch.fault_dsisr = svcpu->fault_dsisr; 165 vcpu->arch.last_inst = svcpu->last_inst; 166 #ifdef CONFIG_PPC_BOOK3S_64 167 vcpu->arch.shadow_fscr = svcpu->shadow_fscr; 168 #endif 169 svcpu->in_use = false; 170 171 out: 172 preempt_enable(); 173 } 174 175 static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu) 176 { 177 int r = 1; /* Indicate we want to get back into the guest */ 178 179 /* We misuse TLB_FLUSH to indicate that we want to clear 180 all shadow cache entries */ 181 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) 182 kvmppc_mmu_pte_flush(vcpu, 0, 0); 183 184 return r; 185 } 186 187 /************* MMU Notifiers *************/ 188 static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start, 189 unsigned long end) 190 { 191 long i; 192 struct kvm_vcpu *vcpu; 193 struct kvm_memslots *slots; 194 struct kvm_memory_slot *memslot; 195 196 slots = kvm_memslots(kvm); 197 kvm_for_each_memslot(memslot, slots) { 198 unsigned long hva_start, hva_end; 199 gfn_t gfn, gfn_end; 200 201 hva_start = max(start, memslot->userspace_addr); 202 hva_end = min(end, memslot->userspace_addr + 203 (memslot->npages << PAGE_SHIFT)); 204 if (hva_start >= hva_end) 205 continue; 206 /* 207 * {gfn(page) | page intersects with [hva_start, hva_end)} = 208 * {gfn, gfn+1, ..., gfn_end-1}. 209 */ 210 gfn = hva_to_gfn_memslot(hva_start, memslot); 211 gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); 212 kvm_for_each_vcpu(i, vcpu, kvm) 213 kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT, 214 gfn_end << PAGE_SHIFT); 215 } 216 } 217 218 static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva) 219 { 220 trace_kvm_unmap_hva(hva); 221 222 do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); 223 224 return 0; 225 } 226 227 static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start, 228 unsigned long end) 229 { 230 do_kvm_unmap_hva(kvm, start, end); 231 232 return 0; 233 } 234 235 static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva) 236 { 237 /* XXX could be more clever ;) */ 238 return 0; 239 } 240 241 static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva) 242 { 243 /* XXX could be more clever ;) */ 244 return 0; 245 } 246 247 static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte) 248 { 249 /* The page will get remapped properly on its next fault */ 250 do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE); 251 } 252 253 /*****************************************/ 254 255 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) 256 { 257 ulong guest_msr = kvmppc_get_msr(vcpu); 258 ulong smsr = guest_msr; 259 260 /* Guest MSR values */ 261 smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE; 262 /* Process MSR values */ 263 smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; 264 /* External providers the guest reserved */ 265 smsr |= (guest_msr & vcpu->arch.guest_owned_ext); 266 /* 64-bit Process MSR values */ 267 #ifdef CONFIG_PPC_BOOK3S_64 268 smsr |= MSR_ISF | MSR_HV; 269 #endif 270 vcpu->arch.shadow_msr = smsr; 271 } 272 273 static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) 274 { 275 ulong old_msr = kvmppc_get_msr(vcpu); 276 277 #ifdef EXIT_DEBUG 278 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); 279 #endif 280 281 msr &= to_book3s(vcpu)->msr_mask; 282 kvmppc_set_msr_fast(vcpu, msr); 283 kvmppc_recalc_shadow_msr(vcpu); 284 285 if (msr & MSR_POW) { 286 if (!vcpu->arch.pending_exceptions) { 287 kvm_vcpu_block(vcpu); 288 clear_bit(KVM_REQ_UNHALT, &vcpu->requests); 289 vcpu->stat.halt_wakeup++; 290 291 /* Unset POW bit after we woke up */ 292 msr &= ~MSR_POW; 293 kvmppc_set_msr_fast(vcpu, msr); 294 } 295 } 296 297 if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) != 298 (old_msr & (MSR_PR|MSR_IR|MSR_DR))) { 299 kvmppc_mmu_flush_segments(vcpu); 300 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 301 302 /* Preload magic page segment when in kernel mode */ 303 if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) { 304 struct kvm_vcpu_arch *a = &vcpu->arch; 305 306 if (msr & MSR_DR) 307 kvmppc_mmu_map_segment(vcpu, a->magic_page_ea); 308 else 309 kvmppc_mmu_map_segment(vcpu, a->magic_page_pa); 310 } 311 } 312 313 /* 314 * When switching from 32 to 64-bit, we may have a stale 32-bit 315 * magic page around, we need to flush it. Typically 32-bit magic 316 * page will be instanciated when calling into RTAS. Note: We 317 * assume that such transition only happens while in kernel mode, 318 * ie, we never transition from user 32-bit to kernel 64-bit with 319 * a 32-bit magic page around. 320 */ 321 if (vcpu->arch.magic_page_pa && 322 !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) { 323 /* going from RTAS to normal kernel code */ 324 kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa, 325 ~0xFFFUL); 326 } 327 328 /* Preload FPU if it's enabled */ 329 if (kvmppc_get_msr(vcpu) & MSR_FP) 330 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 331 } 332 333 void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) 334 { 335 u32 host_pvr; 336 337 vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB; 338 vcpu->arch.pvr = pvr; 339 #ifdef CONFIG_PPC_BOOK3S_64 340 if ((pvr >= 0x330000) && (pvr < 0x70330000)) { 341 kvmppc_mmu_book3s_64_init(vcpu); 342 if (!to_book3s(vcpu)->hior_explicit) 343 to_book3s(vcpu)->hior = 0xfff00000; 344 to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL; 345 vcpu->arch.cpu_type = KVM_CPU_3S_64; 346 } else 347 #endif 348 { 349 kvmppc_mmu_book3s_32_init(vcpu); 350 if (!to_book3s(vcpu)->hior_explicit) 351 to_book3s(vcpu)->hior = 0; 352 to_book3s(vcpu)->msr_mask = 0xffffffffULL; 353 vcpu->arch.cpu_type = KVM_CPU_3S_32; 354 } 355 356 kvmppc_sanity_check(vcpu); 357 358 /* If we are in hypervisor level on 970, we can tell the CPU to 359 * treat DCBZ as 32 bytes store */ 360 vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32; 361 if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) && 362 !strcmp(cur_cpu_spec->platform, "ppc970")) 363 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 364 365 /* Cell performs badly if MSR_FEx are set. So let's hope nobody 366 really needs them in a VM on Cell and force disable them. */ 367 if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) 368 to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); 369 370 /* 371 * If they're asking for POWER6 or later, set the flag 372 * indicating that we can do multiple large page sizes 373 * and 1TB segments. 374 * Also set the flag that indicates that tlbie has the large 375 * page bit in the RB operand instead of the instruction. 376 */ 377 switch (PVR_VER(pvr)) { 378 case PVR_POWER6: 379 case PVR_POWER7: 380 case PVR_POWER7p: 381 case PVR_POWER8: 382 vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | 383 BOOK3S_HFLAG_NEW_TLBIE; 384 break; 385 } 386 387 #ifdef CONFIG_PPC_BOOK3S_32 388 /* 32 bit Book3S always has 32 byte dcbz */ 389 vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; 390 #endif 391 392 /* On some CPUs we can execute paired single operations natively */ 393 asm ( "mfpvr %0" : "=r"(host_pvr)); 394 switch (host_pvr) { 395 case 0x00080200: /* lonestar 2.0 */ 396 case 0x00088202: /* lonestar 2.2 */ 397 case 0x70000100: /* gekko 1.0 */ 398 case 0x00080100: /* gekko 2.0 */ 399 case 0x00083203: /* gekko 2.3a */ 400 case 0x00083213: /* gekko 2.3b */ 401 case 0x00083204: /* gekko 2.4 */ 402 case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ 403 case 0x00087200: /* broadway */ 404 vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS; 405 /* Enable HID2.PSE - in case we need it later */ 406 mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29)); 407 } 408 } 409 410 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To 411 * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to 412 * emulate 32 bytes dcbz length. 413 * 414 * The Book3s_64 inventors also realized this case and implemented a special bit 415 * in the HID5 register, which is a hypervisor ressource. Thus we can't use it. 416 * 417 * My approach here is to patch the dcbz instruction on executing pages. 418 */ 419 static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) 420 { 421 struct page *hpage; 422 u64 hpage_offset; 423 u32 *page; 424 int i; 425 426 hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); 427 if (is_error_page(hpage)) 428 return; 429 430 hpage_offset = pte->raddr & ~PAGE_MASK; 431 hpage_offset &= ~0xFFFULL; 432 hpage_offset /= 4; 433 434 get_page(hpage); 435 page = kmap_atomic(hpage); 436 437 /* patch dcbz into reserved instruction, so we trap */ 438 for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) 439 if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ) 440 page[i] &= cpu_to_be32(0xfffffff7); 441 442 kunmap_atomic(page); 443 put_page(hpage); 444 } 445 446 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) 447 { 448 ulong mp_pa = vcpu->arch.magic_page_pa; 449 450 if (!(kvmppc_get_msr(vcpu) & MSR_SF)) 451 mp_pa = (uint32_t)mp_pa; 452 453 if (unlikely(mp_pa) && 454 unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) { 455 return 1; 456 } 457 458 return kvm_is_visible_gfn(vcpu->kvm, gfn); 459 } 460 461 int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, 462 ulong eaddr, int vec) 463 { 464 bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE); 465 bool iswrite = false; 466 int r = RESUME_GUEST; 467 int relocated; 468 int page_found = 0; 469 struct kvmppc_pte pte; 470 bool is_mmio = false; 471 bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false; 472 bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false; 473 u64 vsid; 474 475 relocated = data ? dr : ir; 476 if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE)) 477 iswrite = true; 478 479 /* Resolve real address if translation turned on */ 480 if (relocated) { 481 page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite); 482 } else { 483 pte.may_execute = true; 484 pte.may_read = true; 485 pte.may_write = true; 486 pte.raddr = eaddr & KVM_PAM; 487 pte.eaddr = eaddr; 488 pte.vpage = eaddr >> 12; 489 pte.page_size = MMU_PAGE_64K; 490 } 491 492 switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) { 493 case 0: 494 pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12)); 495 break; 496 case MSR_DR: 497 case MSR_IR: 498 vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); 499 500 if ((kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) == MSR_DR) 501 pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12)); 502 else 503 pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12)); 504 pte.vpage |= vsid; 505 506 if (vsid == -1) 507 page_found = -EINVAL; 508 break; 509 } 510 511 if (vcpu->arch.mmu.is_dcbz32(vcpu) && 512 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { 513 /* 514 * If we do the dcbz hack, we have to NX on every execution, 515 * so we can patch the executing code. This renders our guest 516 * NX-less. 517 */ 518 pte.may_execute = !data; 519 } 520 521 if (page_found == -ENOENT) { 522 /* Page not found in guest PTE entries */ 523 u64 ssrr1 = vcpu->arch.shadow_srr1; 524 u64 msr = kvmppc_get_msr(vcpu); 525 kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); 526 kvmppc_set_dsisr(vcpu, vcpu->arch.fault_dsisr); 527 kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL)); 528 kvmppc_book3s_queue_irqprio(vcpu, vec); 529 } else if (page_found == -EPERM) { 530 /* Storage protection */ 531 u32 dsisr = vcpu->arch.fault_dsisr; 532 u64 ssrr1 = vcpu->arch.shadow_srr1; 533 u64 msr = kvmppc_get_msr(vcpu); 534 kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); 535 dsisr = (dsisr & ~DSISR_NOHPTE) | DSISR_PROTFAULT; 536 kvmppc_set_dsisr(vcpu, dsisr); 537 kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL)); 538 kvmppc_book3s_queue_irqprio(vcpu, vec); 539 } else if (page_found == -EINVAL) { 540 /* Page not found in guest SLB */ 541 kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); 542 kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); 543 } else if (!is_mmio && 544 kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) { 545 if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { 546 /* 547 * There is already a host HPTE there, presumably 548 * a read-only one for a page the guest thinks 549 * is writable, so get rid of it first. 550 */ 551 kvmppc_mmu_unmap_page(vcpu, &pte); 552 } 553 /* The guest's PTE is not mapped yet. Map on the host */ 554 kvmppc_mmu_map_page(vcpu, &pte, iswrite); 555 if (data) 556 vcpu->stat.sp_storage++; 557 else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 558 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) 559 kvmppc_patch_dcbz(vcpu, &pte); 560 } else { 561 /* MMIO */ 562 vcpu->stat.mmio_exits++; 563 vcpu->arch.paddr_accessed = pte.raddr; 564 vcpu->arch.vaddr_accessed = pte.eaddr; 565 r = kvmppc_emulate_mmio(run, vcpu); 566 if ( r == RESUME_HOST_NV ) 567 r = RESUME_HOST; 568 } 569 570 return r; 571 } 572 573 static inline int get_fpr_index(int i) 574 { 575 return i * TS_FPRWIDTH; 576 } 577 578 /* Give up external provider (FPU, Altivec, VSX) */ 579 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) 580 { 581 struct thread_struct *t = ¤t->thread; 582 583 /* 584 * VSX instructions can access FP and vector registers, so if 585 * we are giving up VSX, make sure we give up FP and VMX as well. 586 */ 587 if (msr & MSR_VSX) 588 msr |= MSR_FP | MSR_VEC; 589 590 msr &= vcpu->arch.guest_owned_ext; 591 if (!msr) 592 return; 593 594 #ifdef DEBUG_EXT 595 printk(KERN_INFO "Giving up ext 0x%lx\n", msr); 596 #endif 597 598 if (msr & MSR_FP) { 599 /* 600 * Note that on CPUs with VSX, giveup_fpu stores 601 * both the traditional FP registers and the added VSX 602 * registers into thread.fp_state.fpr[]. 603 */ 604 if (t->regs->msr & MSR_FP) 605 giveup_fpu(current); 606 t->fp_save_area = NULL; 607 } 608 609 #ifdef CONFIG_ALTIVEC 610 if (msr & MSR_VEC) { 611 if (current->thread.regs->msr & MSR_VEC) 612 giveup_altivec(current); 613 t->vr_save_area = NULL; 614 } 615 #endif 616 617 vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX); 618 kvmppc_recalc_shadow_msr(vcpu); 619 } 620 621 /* Give up facility (TAR / EBB / DSCR) */ 622 static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac) 623 { 624 #ifdef CONFIG_PPC_BOOK3S_64 625 if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) { 626 /* Facility not available to the guest, ignore giveup request*/ 627 return; 628 } 629 630 switch (fac) { 631 case FSCR_TAR_LG: 632 vcpu->arch.tar = mfspr(SPRN_TAR); 633 mtspr(SPRN_TAR, current->thread.tar); 634 vcpu->arch.shadow_fscr &= ~FSCR_TAR; 635 break; 636 } 637 #endif 638 } 639 640 static int kvmppc_read_inst(struct kvm_vcpu *vcpu) 641 { 642 ulong srr0 = kvmppc_get_pc(vcpu); 643 u32 last_inst = kvmppc_get_last_inst(vcpu); 644 int ret; 645 646 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); 647 if (ret == -ENOENT) { 648 ulong msr = kvmppc_get_msr(vcpu); 649 650 msr = kvmppc_set_field(msr, 33, 33, 1); 651 msr = kvmppc_set_field(msr, 34, 36, 0); 652 msr = kvmppc_set_field(msr, 42, 47, 0); 653 kvmppc_set_msr_fast(vcpu, msr); 654 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); 655 return EMULATE_AGAIN; 656 } 657 658 return EMULATE_DONE; 659 } 660 661 static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) 662 { 663 664 /* Need to do paired single emulation? */ 665 if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) 666 return EMULATE_DONE; 667 668 /* Read out the instruction */ 669 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) 670 /* Need to emulate */ 671 return EMULATE_FAIL; 672 673 return EMULATE_AGAIN; 674 } 675 676 /* Handle external providers (FPU, Altivec, VSX) */ 677 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, 678 ulong msr) 679 { 680 struct thread_struct *t = ¤t->thread; 681 682 /* When we have paired singles, we emulate in software */ 683 if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) 684 return RESUME_GUEST; 685 686 if (!(kvmppc_get_msr(vcpu) & msr)) { 687 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 688 return RESUME_GUEST; 689 } 690 691 if (msr == MSR_VSX) { 692 /* No VSX? Give an illegal instruction interrupt */ 693 #ifdef CONFIG_VSX 694 if (!cpu_has_feature(CPU_FTR_VSX)) 695 #endif 696 { 697 kvmppc_core_queue_program(vcpu, SRR1_PROGILL); 698 return RESUME_GUEST; 699 } 700 701 /* 702 * We have to load up all the FP and VMX registers before 703 * we can let the guest use VSX instructions. 704 */ 705 msr = MSR_FP | MSR_VEC | MSR_VSX; 706 } 707 708 /* See if we already own all the ext(s) needed */ 709 msr &= ~vcpu->arch.guest_owned_ext; 710 if (!msr) 711 return RESUME_GUEST; 712 713 #ifdef DEBUG_EXT 714 printk(KERN_INFO "Loading up ext 0x%lx\n", msr); 715 #endif 716 717 if (msr & MSR_FP) { 718 preempt_disable(); 719 enable_kernel_fp(); 720 load_fp_state(&vcpu->arch.fp); 721 t->fp_save_area = &vcpu->arch.fp; 722 preempt_enable(); 723 } 724 725 if (msr & MSR_VEC) { 726 #ifdef CONFIG_ALTIVEC 727 preempt_disable(); 728 enable_kernel_altivec(); 729 load_vr_state(&vcpu->arch.vr); 730 t->vr_save_area = &vcpu->arch.vr; 731 preempt_enable(); 732 #endif 733 } 734 735 t->regs->msr |= msr; 736 vcpu->arch.guest_owned_ext |= msr; 737 kvmppc_recalc_shadow_msr(vcpu); 738 739 return RESUME_GUEST; 740 } 741 742 /* 743 * Kernel code using FP or VMX could have flushed guest state to 744 * the thread_struct; if so, get it back now. 745 */ 746 static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu) 747 { 748 unsigned long lost_ext; 749 750 lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr; 751 if (!lost_ext) 752 return; 753 754 if (lost_ext & MSR_FP) { 755 preempt_disable(); 756 enable_kernel_fp(); 757 load_fp_state(&vcpu->arch.fp); 758 preempt_enable(); 759 } 760 #ifdef CONFIG_ALTIVEC 761 if (lost_ext & MSR_VEC) { 762 preempt_disable(); 763 enable_kernel_altivec(); 764 load_vr_state(&vcpu->arch.vr); 765 preempt_enable(); 766 } 767 #endif 768 current->thread.regs->msr |= lost_ext; 769 } 770 771 #ifdef CONFIG_PPC_BOOK3S_64 772 773 static void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac) 774 { 775 /* Inject the Interrupt Cause field and trigger a guest interrupt */ 776 vcpu->arch.fscr &= ~(0xffULL << 56); 777 vcpu->arch.fscr |= (fac << 56); 778 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL); 779 } 780 781 static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac) 782 { 783 enum emulation_result er = EMULATE_FAIL; 784 785 if (!(kvmppc_get_msr(vcpu) & MSR_PR)) 786 er = kvmppc_emulate_instruction(vcpu->run, vcpu); 787 788 if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) { 789 /* Couldn't emulate, trigger interrupt in guest */ 790 kvmppc_trigger_fac_interrupt(vcpu, fac); 791 } 792 } 793 794 /* Enable facilities (TAR, EBB, DSCR) for the guest */ 795 static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac) 796 { 797 bool guest_fac_enabled; 798 BUG_ON(!cpu_has_feature(CPU_FTR_ARCH_207S)); 799 800 /* 801 * Not every facility is enabled by FSCR bits, check whether the 802 * guest has this facility enabled at all. 803 */ 804 switch (fac) { 805 case FSCR_TAR_LG: 806 case FSCR_EBB_LG: 807 guest_fac_enabled = (vcpu->arch.fscr & (1ULL << fac)); 808 break; 809 case FSCR_TM_LG: 810 guest_fac_enabled = kvmppc_get_msr(vcpu) & MSR_TM; 811 break; 812 default: 813 guest_fac_enabled = false; 814 break; 815 } 816 817 if (!guest_fac_enabled) { 818 /* Facility not enabled by the guest */ 819 kvmppc_trigger_fac_interrupt(vcpu, fac); 820 return RESUME_GUEST; 821 } 822 823 switch (fac) { 824 case FSCR_TAR_LG: 825 /* TAR switching isn't lazy in Linux yet */ 826 current->thread.tar = mfspr(SPRN_TAR); 827 mtspr(SPRN_TAR, vcpu->arch.tar); 828 vcpu->arch.shadow_fscr |= FSCR_TAR; 829 break; 830 default: 831 kvmppc_emulate_fac(vcpu, fac); 832 break; 833 } 834 835 return RESUME_GUEST; 836 } 837 #endif 838 839 int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, 840 unsigned int exit_nr) 841 { 842 int r = RESUME_HOST; 843 int s; 844 845 vcpu->stat.sum_exits++; 846 847 run->exit_reason = KVM_EXIT_UNKNOWN; 848 run->ready_for_interrupt_injection = 1; 849 850 /* We get here with MSR.EE=1 */ 851 852 trace_kvm_exit(exit_nr, vcpu); 853 kvm_guest_exit(); 854 855 switch (exit_nr) { 856 case BOOK3S_INTERRUPT_INST_STORAGE: 857 { 858 ulong shadow_srr1 = vcpu->arch.shadow_srr1; 859 vcpu->stat.pf_instruc++; 860 861 #ifdef CONFIG_PPC_BOOK3S_32 862 /* We set segments as unused segments when invalidating them. So 863 * treat the respective fault as segment fault. */ 864 { 865 struct kvmppc_book3s_shadow_vcpu *svcpu; 866 u32 sr; 867 868 svcpu = svcpu_get(vcpu); 869 sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]; 870 svcpu_put(svcpu); 871 if (sr == SR_INVALID) { 872 kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)); 873 r = RESUME_GUEST; 874 break; 875 } 876 } 877 #endif 878 879 /* only care about PTEG not found errors, but leave NX alone */ 880 if (shadow_srr1 & 0x40000000) { 881 int idx = srcu_read_lock(&vcpu->kvm->srcu); 882 r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr); 883 srcu_read_unlock(&vcpu->kvm->srcu, idx); 884 vcpu->stat.sp_instruc++; 885 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 886 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) { 887 /* 888 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page, 889 * so we can't use the NX bit inside the guest. Let's cross our fingers, 890 * that no guest that needs the dcbz hack does NX. 891 */ 892 kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL); 893 r = RESUME_GUEST; 894 } else { 895 u64 msr = kvmppc_get_msr(vcpu); 896 msr |= shadow_srr1 & 0x58000000; 897 kvmppc_set_msr_fast(vcpu, msr); 898 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 899 r = RESUME_GUEST; 900 } 901 break; 902 } 903 case BOOK3S_INTERRUPT_DATA_STORAGE: 904 { 905 ulong dar = kvmppc_get_fault_dar(vcpu); 906 u32 fault_dsisr = vcpu->arch.fault_dsisr; 907 vcpu->stat.pf_storage++; 908 909 #ifdef CONFIG_PPC_BOOK3S_32 910 /* We set segments as unused segments when invalidating them. So 911 * treat the respective fault as segment fault. */ 912 { 913 struct kvmppc_book3s_shadow_vcpu *svcpu; 914 u32 sr; 915 916 svcpu = svcpu_get(vcpu); 917 sr = svcpu->sr[dar >> SID_SHIFT]; 918 svcpu_put(svcpu); 919 if (sr == SR_INVALID) { 920 kvmppc_mmu_map_segment(vcpu, dar); 921 r = RESUME_GUEST; 922 break; 923 } 924 } 925 #endif 926 927 /* 928 * We need to handle missing shadow PTEs, and 929 * protection faults due to us mapping a page read-only 930 * when the guest thinks it is writable. 931 */ 932 if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) { 933 int idx = srcu_read_lock(&vcpu->kvm->srcu); 934 r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr); 935 srcu_read_unlock(&vcpu->kvm->srcu, idx); 936 } else { 937 kvmppc_set_dar(vcpu, dar); 938 kvmppc_set_dsisr(vcpu, fault_dsisr); 939 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 940 r = RESUME_GUEST; 941 } 942 break; 943 } 944 case BOOK3S_INTERRUPT_DATA_SEGMENT: 945 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) { 946 kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); 947 kvmppc_book3s_queue_irqprio(vcpu, 948 BOOK3S_INTERRUPT_DATA_SEGMENT); 949 } 950 r = RESUME_GUEST; 951 break; 952 case BOOK3S_INTERRUPT_INST_SEGMENT: 953 if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) { 954 kvmppc_book3s_queue_irqprio(vcpu, 955 BOOK3S_INTERRUPT_INST_SEGMENT); 956 } 957 r = RESUME_GUEST; 958 break; 959 /* We're good on these - the host merely wanted to get our attention */ 960 case BOOK3S_INTERRUPT_DECREMENTER: 961 case BOOK3S_INTERRUPT_HV_DECREMENTER: 962 case BOOK3S_INTERRUPT_DOORBELL: 963 vcpu->stat.dec_exits++; 964 r = RESUME_GUEST; 965 break; 966 case BOOK3S_INTERRUPT_EXTERNAL: 967 case BOOK3S_INTERRUPT_EXTERNAL_LEVEL: 968 case BOOK3S_INTERRUPT_EXTERNAL_HV: 969 vcpu->stat.ext_intr_exits++; 970 r = RESUME_GUEST; 971 break; 972 case BOOK3S_INTERRUPT_PERFMON: 973 r = RESUME_GUEST; 974 break; 975 case BOOK3S_INTERRUPT_PROGRAM: 976 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 977 { 978 enum emulation_result er; 979 ulong flags; 980 981 program_interrupt: 982 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; 983 984 if (kvmppc_get_msr(vcpu) & MSR_PR) { 985 #ifdef EXIT_DEBUG 986 printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); 987 #endif 988 if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) != 989 (INS_DCBZ & 0xfffffff7)) { 990 kvmppc_core_queue_program(vcpu, flags); 991 r = RESUME_GUEST; 992 break; 993 } 994 } 995 996 vcpu->stat.emulated_inst_exits++; 997 er = kvmppc_emulate_instruction(run, vcpu); 998 switch (er) { 999 case EMULATE_DONE: 1000 r = RESUME_GUEST_NV; 1001 break; 1002 case EMULATE_AGAIN: 1003 r = RESUME_GUEST; 1004 break; 1005 case EMULATE_FAIL: 1006 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", 1007 __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu)); 1008 kvmppc_core_queue_program(vcpu, flags); 1009 r = RESUME_GUEST; 1010 break; 1011 case EMULATE_DO_MMIO: 1012 run->exit_reason = KVM_EXIT_MMIO; 1013 r = RESUME_HOST_NV; 1014 break; 1015 case EMULATE_EXIT_USER: 1016 r = RESUME_HOST_NV; 1017 break; 1018 default: 1019 BUG(); 1020 } 1021 break; 1022 } 1023 case BOOK3S_INTERRUPT_SYSCALL: 1024 if (vcpu->arch.papr_enabled && 1025 (kvmppc_get_last_sc(vcpu) == 0x44000022) && 1026 !(kvmppc_get_msr(vcpu) & MSR_PR)) { 1027 /* SC 1 papr hypercalls */ 1028 ulong cmd = kvmppc_get_gpr(vcpu, 3); 1029 int i; 1030 1031 #ifdef CONFIG_PPC_BOOK3S_64 1032 if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) { 1033 r = RESUME_GUEST; 1034 break; 1035 } 1036 #endif 1037 1038 run->papr_hcall.nr = cmd; 1039 for (i = 0; i < 9; ++i) { 1040 ulong gpr = kvmppc_get_gpr(vcpu, 4 + i); 1041 run->papr_hcall.args[i] = gpr; 1042 } 1043 run->exit_reason = KVM_EXIT_PAPR_HCALL; 1044 vcpu->arch.hcall_needed = 1; 1045 r = RESUME_HOST; 1046 } else if (vcpu->arch.osi_enabled && 1047 (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && 1048 (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { 1049 /* MOL hypercalls */ 1050 u64 *gprs = run->osi.gprs; 1051 int i; 1052 1053 run->exit_reason = KVM_EXIT_OSI; 1054 for (i = 0; i < 32; i++) 1055 gprs[i] = kvmppc_get_gpr(vcpu, i); 1056 vcpu->arch.osi_needed = 1; 1057 r = RESUME_HOST_NV; 1058 } else if (!(kvmppc_get_msr(vcpu) & MSR_PR) && 1059 (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) { 1060 /* KVM PV hypercalls */ 1061 kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu)); 1062 r = RESUME_GUEST; 1063 } else { 1064 /* Guest syscalls */ 1065 vcpu->stat.syscall_exits++; 1066 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 1067 r = RESUME_GUEST; 1068 } 1069 break; 1070 case BOOK3S_INTERRUPT_FP_UNAVAIL: 1071 case BOOK3S_INTERRUPT_ALTIVEC: 1072 case BOOK3S_INTERRUPT_VSX: 1073 { 1074 int ext_msr = 0; 1075 1076 switch (exit_nr) { 1077 case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; 1078 case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; 1079 case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; 1080 } 1081 1082 switch (kvmppc_check_ext(vcpu, exit_nr)) { 1083 case EMULATE_DONE: 1084 /* everything ok - let's enable the ext */ 1085 r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); 1086 break; 1087 case EMULATE_FAIL: 1088 /* we need to emulate this instruction */ 1089 goto program_interrupt; 1090 break; 1091 default: 1092 /* nothing to worry about - go again */ 1093 break; 1094 } 1095 break; 1096 } 1097 case BOOK3S_INTERRUPT_ALIGNMENT: 1098 if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { 1099 u32 last_inst = kvmppc_get_last_inst(vcpu); 1100 u32 dsisr; 1101 u64 dar; 1102 1103 dsisr = kvmppc_alignment_dsisr(vcpu, last_inst); 1104 dar = kvmppc_alignment_dar(vcpu, last_inst); 1105 1106 kvmppc_set_dsisr(vcpu, dsisr); 1107 kvmppc_set_dar(vcpu, dar); 1108 1109 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 1110 } 1111 r = RESUME_GUEST; 1112 break; 1113 #ifdef CONFIG_PPC_BOOK3S_64 1114 case BOOK3S_INTERRUPT_FAC_UNAVAIL: 1115 kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56); 1116 r = RESUME_GUEST; 1117 break; 1118 #endif 1119 case BOOK3S_INTERRUPT_MACHINE_CHECK: 1120 case BOOK3S_INTERRUPT_TRACE: 1121 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 1122 r = RESUME_GUEST; 1123 break; 1124 default: 1125 { 1126 ulong shadow_srr1 = vcpu->arch.shadow_srr1; 1127 /* Ugh - bork here! What did we get? */ 1128 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 1129 exit_nr, kvmppc_get_pc(vcpu), shadow_srr1); 1130 r = RESUME_HOST; 1131 BUG(); 1132 break; 1133 } 1134 } 1135 1136 if (!(r & RESUME_HOST)) { 1137 /* To avoid clobbering exit_reason, only check for signals if 1138 * we aren't already exiting to userspace for some other 1139 * reason. */ 1140 1141 /* 1142 * Interrupts could be timers for the guest which we have to 1143 * inject again, so let's postpone them until we're in the guest 1144 * and if we really did time things so badly, then we just exit 1145 * again due to a host external interrupt. 1146 */ 1147 s = kvmppc_prepare_to_enter(vcpu); 1148 if (s <= 0) 1149 r = s; 1150 else { 1151 /* interrupts now hard-disabled */ 1152 kvmppc_fix_ee_before_entry(); 1153 } 1154 1155 kvmppc_handle_lost_ext(vcpu); 1156 } 1157 1158 trace_kvm_book3s_reenter(r, vcpu); 1159 1160 return r; 1161 } 1162 1163 static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu, 1164 struct kvm_sregs *sregs) 1165 { 1166 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1167 int i; 1168 1169 sregs->pvr = vcpu->arch.pvr; 1170 1171 sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1; 1172 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { 1173 for (i = 0; i < 64; i++) { 1174 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i; 1175 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv; 1176 } 1177 } else { 1178 for (i = 0; i < 16; i++) 1179 sregs->u.s.ppc32.sr[i] = kvmppc_get_sr(vcpu, i); 1180 1181 for (i = 0; i < 8; i++) { 1182 sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw; 1183 sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw; 1184 } 1185 } 1186 1187 return 0; 1188 } 1189 1190 static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu, 1191 struct kvm_sregs *sregs) 1192 { 1193 struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); 1194 int i; 1195 1196 kvmppc_set_pvr_pr(vcpu, sregs->pvr); 1197 1198 vcpu3s->sdr1 = sregs->u.s.sdr1; 1199 if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) { 1200 for (i = 0; i < 64; i++) { 1201 vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv, 1202 sregs->u.s.ppc64.slb[i].slbe); 1203 } 1204 } else { 1205 for (i = 0; i < 16; i++) { 1206 vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]); 1207 } 1208 for (i = 0; i < 8; i++) { 1209 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false, 1210 (u32)sregs->u.s.ppc32.ibat[i]); 1211 kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true, 1212 (u32)(sregs->u.s.ppc32.ibat[i] >> 32)); 1213 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false, 1214 (u32)sregs->u.s.ppc32.dbat[i]); 1215 kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true, 1216 (u32)(sregs->u.s.ppc32.dbat[i] >> 32)); 1217 } 1218 } 1219 1220 /* Flush the MMU after messing with the segments */ 1221 kvmppc_mmu_pte_flush(vcpu, 0, 0); 1222 1223 return 0; 1224 } 1225 1226 static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, 1227 union kvmppc_one_reg *val) 1228 { 1229 int r = 0; 1230 1231 switch (id) { 1232 case KVM_REG_PPC_HIOR: 1233 *val = get_reg_val(id, to_book3s(vcpu)->hior); 1234 break; 1235 case KVM_REG_PPC_LPCR: 1236 /* 1237 * We are only interested in the LPCR_ILE bit 1238 */ 1239 if (vcpu->arch.intr_msr & MSR_LE) 1240 *val = get_reg_val(id, LPCR_ILE); 1241 else 1242 *val = get_reg_val(id, 0); 1243 break; 1244 default: 1245 r = -EINVAL; 1246 break; 1247 } 1248 1249 return r; 1250 } 1251 1252 static void kvmppc_set_lpcr_pr(struct kvm_vcpu *vcpu, u64 new_lpcr) 1253 { 1254 if (new_lpcr & LPCR_ILE) 1255 vcpu->arch.intr_msr |= MSR_LE; 1256 else 1257 vcpu->arch.intr_msr &= ~MSR_LE; 1258 } 1259 1260 static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id, 1261 union kvmppc_one_reg *val) 1262 { 1263 int r = 0; 1264 1265 switch (id) { 1266 case KVM_REG_PPC_HIOR: 1267 to_book3s(vcpu)->hior = set_reg_val(id, *val); 1268 to_book3s(vcpu)->hior_explicit = true; 1269 break; 1270 case KVM_REG_PPC_LPCR: 1271 kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val)); 1272 break; 1273 default: 1274 r = -EINVAL; 1275 break; 1276 } 1277 1278 return r; 1279 } 1280 1281 static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, 1282 unsigned int id) 1283 { 1284 struct kvmppc_vcpu_book3s *vcpu_book3s; 1285 struct kvm_vcpu *vcpu; 1286 int err = -ENOMEM; 1287 unsigned long p; 1288 1289 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 1290 if (!vcpu) 1291 goto out; 1292 1293 vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s)); 1294 if (!vcpu_book3s) 1295 goto free_vcpu; 1296 vcpu->arch.book3s = vcpu_book3s; 1297 1298 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER 1299 vcpu->arch.shadow_vcpu = 1300 kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL); 1301 if (!vcpu->arch.shadow_vcpu) 1302 goto free_vcpu3s; 1303 #endif 1304 1305 err = kvm_vcpu_init(vcpu, kvm, id); 1306 if (err) 1307 goto free_shadow_vcpu; 1308 1309 err = -ENOMEM; 1310 p = __get_free_page(GFP_KERNEL|__GFP_ZERO); 1311 if (!p) 1312 goto uninit_vcpu; 1313 /* the real shared page fills the last 4k of our page */ 1314 vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096); 1315 #ifdef CONFIG_PPC_BOOK3S_64 1316 /* Always start the shared struct in native endian mode */ 1317 #ifdef __BIG_ENDIAN__ 1318 vcpu->arch.shared_big_endian = true; 1319 #else 1320 vcpu->arch.shared_big_endian = false; 1321 #endif 1322 1323 /* 1324 * Default to the same as the host if we're on sufficiently 1325 * recent machine that we have 1TB segments; 1326 * otherwise default to PPC970FX. 1327 */ 1328 vcpu->arch.pvr = 0x3C0301; 1329 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 1330 vcpu->arch.pvr = mfspr(SPRN_PVR); 1331 vcpu->arch.intr_msr = MSR_SF; 1332 #else 1333 /* default to book3s_32 (750) */ 1334 vcpu->arch.pvr = 0x84202; 1335 #endif 1336 kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr); 1337 vcpu->arch.slb_nr = 64; 1338 1339 vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE; 1340 1341 err = kvmppc_mmu_init(vcpu); 1342 if (err < 0) 1343 goto uninit_vcpu; 1344 1345 return vcpu; 1346 1347 uninit_vcpu: 1348 kvm_vcpu_uninit(vcpu); 1349 free_shadow_vcpu: 1350 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER 1351 kfree(vcpu->arch.shadow_vcpu); 1352 free_vcpu3s: 1353 #endif 1354 vfree(vcpu_book3s); 1355 free_vcpu: 1356 kmem_cache_free(kvm_vcpu_cache, vcpu); 1357 out: 1358 return ERR_PTR(err); 1359 } 1360 1361 static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) 1362 { 1363 struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); 1364 1365 free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); 1366 kvm_vcpu_uninit(vcpu); 1367 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER 1368 kfree(vcpu->arch.shadow_vcpu); 1369 #endif 1370 vfree(vcpu_book3s); 1371 kmem_cache_free(kvm_vcpu_cache, vcpu); 1372 } 1373 1374 static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1375 { 1376 int ret; 1377 #ifdef CONFIG_ALTIVEC 1378 unsigned long uninitialized_var(vrsave); 1379 #endif 1380 1381 /* Check if we can run the vcpu at all */ 1382 if (!vcpu->arch.sane) { 1383 kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1384 ret = -EINVAL; 1385 goto out; 1386 } 1387 1388 /* 1389 * Interrupts could be timers for the guest which we have to inject 1390 * again, so let's postpone them until we're in the guest and if we 1391 * really did time things so badly, then we just exit again due to 1392 * a host external interrupt. 1393 */ 1394 ret = kvmppc_prepare_to_enter(vcpu); 1395 if (ret <= 0) 1396 goto out; 1397 /* interrupts now hard-disabled */ 1398 1399 /* Save FPU state in thread_struct */ 1400 if (current->thread.regs->msr & MSR_FP) 1401 giveup_fpu(current); 1402 1403 #ifdef CONFIG_ALTIVEC 1404 /* Save Altivec state in thread_struct */ 1405 if (current->thread.regs->msr & MSR_VEC) 1406 giveup_altivec(current); 1407 #endif 1408 1409 #ifdef CONFIG_VSX 1410 /* Save VSX state in thread_struct */ 1411 if (current->thread.regs->msr & MSR_VSX) 1412 __giveup_vsx(current); 1413 #endif 1414 1415 /* Preload FPU if it's enabled */ 1416 if (kvmppc_get_msr(vcpu) & MSR_FP) 1417 kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); 1418 1419 kvmppc_fix_ee_before_entry(); 1420 1421 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 1422 1423 /* No need for kvm_guest_exit. It's done in handle_exit. 1424 We also get here with interrupts enabled. */ 1425 1426 /* Make sure we save the guest FPU/Altivec/VSX state */ 1427 kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); 1428 1429 /* Make sure we save the guest TAR/EBB/DSCR state */ 1430 kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); 1431 1432 out: 1433 vcpu->mode = OUTSIDE_GUEST_MODE; 1434 return ret; 1435 } 1436 1437 /* 1438 * Get (and clear) the dirty memory log for a memory slot. 1439 */ 1440 static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm, 1441 struct kvm_dirty_log *log) 1442 { 1443 struct kvm_memory_slot *memslot; 1444 struct kvm_vcpu *vcpu; 1445 ulong ga, ga_end; 1446 int is_dirty = 0; 1447 int r; 1448 unsigned long n; 1449 1450 mutex_lock(&kvm->slots_lock); 1451 1452 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1453 if (r) 1454 goto out; 1455 1456 /* If nothing is dirty, don't bother messing with page tables. */ 1457 if (is_dirty) { 1458 memslot = id_to_memslot(kvm->memslots, log->slot); 1459 1460 ga = memslot->base_gfn << PAGE_SHIFT; 1461 ga_end = ga + (memslot->npages << PAGE_SHIFT); 1462 1463 kvm_for_each_vcpu(n, vcpu, kvm) 1464 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); 1465 1466 n = kvm_dirty_bitmap_bytes(memslot); 1467 memset(memslot->dirty_bitmap, 0, n); 1468 } 1469 1470 r = 0; 1471 out: 1472 mutex_unlock(&kvm->slots_lock); 1473 return r; 1474 } 1475 1476 static void kvmppc_core_flush_memslot_pr(struct kvm *kvm, 1477 struct kvm_memory_slot *memslot) 1478 { 1479 return; 1480 } 1481 1482 static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, 1483 struct kvm_memory_slot *memslot, 1484 struct kvm_userspace_memory_region *mem) 1485 { 1486 return 0; 1487 } 1488 1489 static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, 1490 struct kvm_userspace_memory_region *mem, 1491 const struct kvm_memory_slot *old) 1492 { 1493 return; 1494 } 1495 1496 static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free, 1497 struct kvm_memory_slot *dont) 1498 { 1499 return; 1500 } 1501 1502 static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot, 1503 unsigned long npages) 1504 { 1505 return 0; 1506 } 1507 1508 1509 #ifdef CONFIG_PPC64 1510 static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, 1511 struct kvm_ppc_smmu_info *info) 1512 { 1513 long int i; 1514 struct kvm_vcpu *vcpu; 1515 1516 info->flags = 0; 1517 1518 /* SLB is always 64 entries */ 1519 info->slb_size = 64; 1520 1521 /* Standard 4k base page size segment */ 1522 info->sps[0].page_shift = 12; 1523 info->sps[0].slb_enc = 0; 1524 info->sps[0].enc[0].page_shift = 12; 1525 info->sps[0].enc[0].pte_enc = 0; 1526 1527 /* 1528 * 64k large page size. 1529 * We only want to put this in if the CPUs we're emulating 1530 * support it, but unfortunately we don't have a vcpu easily 1531 * to hand here to test. Just pick the first vcpu, and if 1532 * that doesn't exist yet, report the minimum capability, 1533 * i.e., no 64k pages. 1534 * 1T segment support goes along with 64k pages. 1535 */ 1536 i = 1; 1537 vcpu = kvm_get_vcpu(kvm, 0); 1538 if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) { 1539 info->flags = KVM_PPC_1T_SEGMENTS; 1540 info->sps[i].page_shift = 16; 1541 info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01; 1542 info->sps[i].enc[0].page_shift = 16; 1543 info->sps[i].enc[0].pte_enc = 1; 1544 ++i; 1545 } 1546 1547 /* Standard 16M large page size segment */ 1548 info->sps[i].page_shift = 24; 1549 info->sps[i].slb_enc = SLB_VSID_L; 1550 info->sps[i].enc[0].page_shift = 24; 1551 info->sps[i].enc[0].pte_enc = 0; 1552 1553 return 0; 1554 } 1555 #else 1556 static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, 1557 struct kvm_ppc_smmu_info *info) 1558 { 1559 /* We should not get called */ 1560 BUG(); 1561 } 1562 #endif /* CONFIG_PPC64 */ 1563 1564 static unsigned int kvm_global_user_count = 0; 1565 static DEFINE_SPINLOCK(kvm_global_user_count_lock); 1566 1567 static int kvmppc_core_init_vm_pr(struct kvm *kvm) 1568 { 1569 mutex_init(&kvm->arch.hpt_mutex); 1570 1571 if (firmware_has_feature(FW_FEATURE_SET_MODE)) { 1572 spin_lock(&kvm_global_user_count_lock); 1573 if (++kvm_global_user_count == 1) 1574 pSeries_disable_reloc_on_exc(); 1575 spin_unlock(&kvm_global_user_count_lock); 1576 } 1577 return 0; 1578 } 1579 1580 static void kvmppc_core_destroy_vm_pr(struct kvm *kvm) 1581 { 1582 #ifdef CONFIG_PPC64 1583 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables)); 1584 #endif 1585 1586 if (firmware_has_feature(FW_FEATURE_SET_MODE)) { 1587 spin_lock(&kvm_global_user_count_lock); 1588 BUG_ON(kvm_global_user_count == 0); 1589 if (--kvm_global_user_count == 0) 1590 pSeries_enable_reloc_on_exc(); 1591 spin_unlock(&kvm_global_user_count_lock); 1592 } 1593 } 1594 1595 static int kvmppc_core_check_processor_compat_pr(void) 1596 { 1597 /* we are always compatible */ 1598 return 0; 1599 } 1600 1601 static long kvm_arch_vm_ioctl_pr(struct file *filp, 1602 unsigned int ioctl, unsigned long arg) 1603 { 1604 return -ENOTTY; 1605 } 1606 1607 static struct kvmppc_ops kvm_ops_pr = { 1608 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr, 1609 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr, 1610 .get_one_reg = kvmppc_get_one_reg_pr, 1611 .set_one_reg = kvmppc_set_one_reg_pr, 1612 .vcpu_load = kvmppc_core_vcpu_load_pr, 1613 .vcpu_put = kvmppc_core_vcpu_put_pr, 1614 .set_msr = kvmppc_set_msr_pr, 1615 .vcpu_run = kvmppc_vcpu_run_pr, 1616 .vcpu_create = kvmppc_core_vcpu_create_pr, 1617 .vcpu_free = kvmppc_core_vcpu_free_pr, 1618 .check_requests = kvmppc_core_check_requests_pr, 1619 .get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr, 1620 .flush_memslot = kvmppc_core_flush_memslot_pr, 1621 .prepare_memory_region = kvmppc_core_prepare_memory_region_pr, 1622 .commit_memory_region = kvmppc_core_commit_memory_region_pr, 1623 .unmap_hva = kvm_unmap_hva_pr, 1624 .unmap_hva_range = kvm_unmap_hva_range_pr, 1625 .age_hva = kvm_age_hva_pr, 1626 .test_age_hva = kvm_test_age_hva_pr, 1627 .set_spte_hva = kvm_set_spte_hva_pr, 1628 .mmu_destroy = kvmppc_mmu_destroy_pr, 1629 .free_memslot = kvmppc_core_free_memslot_pr, 1630 .create_memslot = kvmppc_core_create_memslot_pr, 1631 .init_vm = kvmppc_core_init_vm_pr, 1632 .destroy_vm = kvmppc_core_destroy_vm_pr, 1633 .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr, 1634 .emulate_op = kvmppc_core_emulate_op_pr, 1635 .emulate_mtspr = kvmppc_core_emulate_mtspr_pr, 1636 .emulate_mfspr = kvmppc_core_emulate_mfspr_pr, 1637 .fast_vcpu_kick = kvm_vcpu_kick, 1638 .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, 1639 }; 1640 1641 1642 int kvmppc_book3s_init_pr(void) 1643 { 1644 int r; 1645 1646 r = kvmppc_core_check_processor_compat_pr(); 1647 if (r < 0) 1648 return r; 1649 1650 kvm_ops_pr.owner = THIS_MODULE; 1651 kvmppc_pr_ops = &kvm_ops_pr; 1652 1653 r = kvmppc_mmu_hpte_sysinit(); 1654 return r; 1655 } 1656 1657 void kvmppc_book3s_exit_pr(void) 1658 { 1659 kvmppc_pr_ops = NULL; 1660 kvmppc_mmu_hpte_sysexit(); 1661 } 1662 1663 /* 1664 * We only support separate modules for book3s 64 1665 */ 1666 #ifdef CONFIG_PPC_BOOK3S_64 1667 1668 module_init(kvmppc_book3s_init_pr); 1669 module_exit(kvmppc_book3s_exit_pr); 1670 1671 MODULE_LICENSE("GPL"); 1672 MODULE_ALIAS_MISCDEV(KVM_MINOR); 1673 MODULE_ALIAS("devname:kvm"); 1674 #endif 1675