xref: /openbmc/linux/arch/powerpc/kvm/book3s_pr.c (revision 62e7ca52)
1 /*
2  * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
3  *
4  * Authors:
5  *    Alexander Graf <agraf@suse.de>
6  *    Kevin Wolf <mail@kevin-wolf.de>
7  *    Paul Mackerras <paulus@samba.org>
8  *
9  * Description:
10  * Functions relating to running KVM on Book 3S processors where
11  * we don't have access to hypervisor mode, and we run the guest
12  * in problem state (user mode).
13  *
14  * This file is derived from arch/powerpc/kvm/44x.c,
15  * by Hollis Blanchard <hollisb@us.ibm.com>.
16  *
17  * This program is free software; you can redistribute it and/or modify
18  * it under the terms of the GNU General Public License, version 2, as
19  * published by the Free Software Foundation.
20  */
21 
22 #include <linux/kvm_host.h>
23 #include <linux/export.h>
24 #include <linux/err.h>
25 #include <linux/slab.h>
26 
27 #include <asm/reg.h>
28 #include <asm/cputable.h>
29 #include <asm/cacheflush.h>
30 #include <asm/tlbflush.h>
31 #include <asm/uaccess.h>
32 #include <asm/io.h>
33 #include <asm/kvm_ppc.h>
34 #include <asm/kvm_book3s.h>
35 #include <asm/mmu_context.h>
36 #include <asm/switch_to.h>
37 #include <asm/firmware.h>
38 #include <asm/hvcall.h>
39 #include <linux/gfp.h>
40 #include <linux/sched.h>
41 #include <linux/vmalloc.h>
42 #include <linux/highmem.h>
43 #include <linux/module.h>
44 #include <linux/miscdevice.h>
45 
46 #include "book3s.h"
47 
48 #define CREATE_TRACE_POINTS
49 #include "trace_pr.h"
50 
51 /* #define EXIT_DEBUG */
52 /* #define DEBUG_EXT */
53 
54 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
55 			     ulong msr);
56 static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
57 
58 /* Some compatibility defines */
59 #ifdef CONFIG_PPC_BOOK3S_32
60 #define MSR_USER32 MSR_USER
61 #define MSR_USER64 MSR_USER
62 #define HW_PAGE_SIZE PAGE_SIZE
63 #endif
64 
65 static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
66 {
67 #ifdef CONFIG_PPC_BOOK3S_64
68 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
69 	memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
70 	svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
71 	svcpu->in_use = 0;
72 	svcpu_put(svcpu);
73 #endif
74 	vcpu->cpu = smp_processor_id();
75 #ifdef CONFIG_PPC_BOOK3S_32
76 	current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
77 #endif
78 }
79 
80 static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
81 {
82 #ifdef CONFIG_PPC_BOOK3S_64
83 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
84 	if (svcpu->in_use) {
85 		kvmppc_copy_from_svcpu(vcpu, svcpu);
86 	}
87 	memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
88 	to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
89 	svcpu_put(svcpu);
90 #endif
91 
92 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
93 	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
94 	vcpu->cpu = -1;
95 }
96 
97 /* Copy data needed by real-mode code from vcpu to shadow vcpu */
98 void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
99 			  struct kvm_vcpu *vcpu)
100 {
101 	svcpu->gpr[0] = vcpu->arch.gpr[0];
102 	svcpu->gpr[1] = vcpu->arch.gpr[1];
103 	svcpu->gpr[2] = vcpu->arch.gpr[2];
104 	svcpu->gpr[3] = vcpu->arch.gpr[3];
105 	svcpu->gpr[4] = vcpu->arch.gpr[4];
106 	svcpu->gpr[5] = vcpu->arch.gpr[5];
107 	svcpu->gpr[6] = vcpu->arch.gpr[6];
108 	svcpu->gpr[7] = vcpu->arch.gpr[7];
109 	svcpu->gpr[8] = vcpu->arch.gpr[8];
110 	svcpu->gpr[9] = vcpu->arch.gpr[9];
111 	svcpu->gpr[10] = vcpu->arch.gpr[10];
112 	svcpu->gpr[11] = vcpu->arch.gpr[11];
113 	svcpu->gpr[12] = vcpu->arch.gpr[12];
114 	svcpu->gpr[13] = vcpu->arch.gpr[13];
115 	svcpu->cr  = vcpu->arch.cr;
116 	svcpu->xer = vcpu->arch.xer;
117 	svcpu->ctr = vcpu->arch.ctr;
118 	svcpu->lr  = vcpu->arch.lr;
119 	svcpu->pc  = vcpu->arch.pc;
120 #ifdef CONFIG_PPC_BOOK3S_64
121 	svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
122 #endif
123 	svcpu->in_use = true;
124 }
125 
126 /* Copy data touched by real-mode code from shadow vcpu back to vcpu */
127 void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
128 			    struct kvmppc_book3s_shadow_vcpu *svcpu)
129 {
130 	/*
131 	 * vcpu_put would just call us again because in_use hasn't
132 	 * been updated yet.
133 	 */
134 	preempt_disable();
135 
136 	/*
137 	 * Maybe we were already preempted and synced the svcpu from
138 	 * our preempt notifiers. Don't bother touching this svcpu then.
139 	 */
140 	if (!svcpu->in_use)
141 		goto out;
142 
143 	vcpu->arch.gpr[0] = svcpu->gpr[0];
144 	vcpu->arch.gpr[1] = svcpu->gpr[1];
145 	vcpu->arch.gpr[2] = svcpu->gpr[2];
146 	vcpu->arch.gpr[3] = svcpu->gpr[3];
147 	vcpu->arch.gpr[4] = svcpu->gpr[4];
148 	vcpu->arch.gpr[5] = svcpu->gpr[5];
149 	vcpu->arch.gpr[6] = svcpu->gpr[6];
150 	vcpu->arch.gpr[7] = svcpu->gpr[7];
151 	vcpu->arch.gpr[8] = svcpu->gpr[8];
152 	vcpu->arch.gpr[9] = svcpu->gpr[9];
153 	vcpu->arch.gpr[10] = svcpu->gpr[10];
154 	vcpu->arch.gpr[11] = svcpu->gpr[11];
155 	vcpu->arch.gpr[12] = svcpu->gpr[12];
156 	vcpu->arch.gpr[13] = svcpu->gpr[13];
157 	vcpu->arch.cr  = svcpu->cr;
158 	vcpu->arch.xer = svcpu->xer;
159 	vcpu->arch.ctr = svcpu->ctr;
160 	vcpu->arch.lr  = svcpu->lr;
161 	vcpu->arch.pc  = svcpu->pc;
162 	vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
163 	vcpu->arch.fault_dar   = svcpu->fault_dar;
164 	vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
165 	vcpu->arch.last_inst   = svcpu->last_inst;
166 #ifdef CONFIG_PPC_BOOK3S_64
167 	vcpu->arch.shadow_fscr = svcpu->shadow_fscr;
168 #endif
169 	svcpu->in_use = false;
170 
171 out:
172 	preempt_enable();
173 }
174 
175 static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
176 {
177 	int r = 1; /* Indicate we want to get back into the guest */
178 
179 	/* We misuse TLB_FLUSH to indicate that we want to clear
180 	   all shadow cache entries */
181 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
182 		kvmppc_mmu_pte_flush(vcpu, 0, 0);
183 
184 	return r;
185 }
186 
187 /************* MMU Notifiers *************/
188 static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
189 			     unsigned long end)
190 {
191 	long i;
192 	struct kvm_vcpu *vcpu;
193 	struct kvm_memslots *slots;
194 	struct kvm_memory_slot *memslot;
195 
196 	slots = kvm_memslots(kvm);
197 	kvm_for_each_memslot(memslot, slots) {
198 		unsigned long hva_start, hva_end;
199 		gfn_t gfn, gfn_end;
200 
201 		hva_start = max(start, memslot->userspace_addr);
202 		hva_end = min(end, memslot->userspace_addr +
203 					(memslot->npages << PAGE_SHIFT));
204 		if (hva_start >= hva_end)
205 			continue;
206 		/*
207 		 * {gfn(page) | page intersects with [hva_start, hva_end)} =
208 		 * {gfn, gfn+1, ..., gfn_end-1}.
209 		 */
210 		gfn = hva_to_gfn_memslot(hva_start, memslot);
211 		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
212 		kvm_for_each_vcpu(i, vcpu, kvm)
213 			kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT,
214 					      gfn_end << PAGE_SHIFT);
215 	}
216 }
217 
218 static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
219 {
220 	trace_kvm_unmap_hva(hva);
221 
222 	do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
223 
224 	return 0;
225 }
226 
227 static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
228 				  unsigned long end)
229 {
230 	do_kvm_unmap_hva(kvm, start, end);
231 
232 	return 0;
233 }
234 
235 static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
236 {
237 	/* XXX could be more clever ;) */
238 	return 0;
239 }
240 
241 static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva)
242 {
243 	/* XXX could be more clever ;) */
244 	return 0;
245 }
246 
247 static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
248 {
249 	/* The page will get remapped properly on its next fault */
250 	do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
251 }
252 
253 /*****************************************/
254 
255 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
256 {
257 	ulong guest_msr = kvmppc_get_msr(vcpu);
258 	ulong smsr = guest_msr;
259 
260 	/* Guest MSR values */
261 	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
262 	/* Process MSR values */
263 	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
264 	/* External providers the guest reserved */
265 	smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
266 	/* 64-bit Process MSR values */
267 #ifdef CONFIG_PPC_BOOK3S_64
268 	smsr |= MSR_ISF | MSR_HV;
269 #endif
270 	vcpu->arch.shadow_msr = smsr;
271 }
272 
273 static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
274 {
275 	ulong old_msr = kvmppc_get_msr(vcpu);
276 
277 #ifdef EXIT_DEBUG
278 	printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
279 #endif
280 
281 	msr &= to_book3s(vcpu)->msr_mask;
282 	kvmppc_set_msr_fast(vcpu, msr);
283 	kvmppc_recalc_shadow_msr(vcpu);
284 
285 	if (msr & MSR_POW) {
286 		if (!vcpu->arch.pending_exceptions) {
287 			kvm_vcpu_block(vcpu);
288 			clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
289 			vcpu->stat.halt_wakeup++;
290 
291 			/* Unset POW bit after we woke up */
292 			msr &= ~MSR_POW;
293 			kvmppc_set_msr_fast(vcpu, msr);
294 		}
295 	}
296 
297 	if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) !=
298 		   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
299 		kvmppc_mmu_flush_segments(vcpu);
300 		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
301 
302 		/* Preload magic page segment when in kernel mode */
303 		if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
304 			struct kvm_vcpu_arch *a = &vcpu->arch;
305 
306 			if (msr & MSR_DR)
307 				kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
308 			else
309 				kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
310 		}
311 	}
312 
313 	/*
314 	 * When switching from 32 to 64-bit, we may have a stale 32-bit
315 	 * magic page around, we need to flush it. Typically 32-bit magic
316 	 * page will be instanciated when calling into RTAS. Note: We
317 	 * assume that such transition only happens while in kernel mode,
318 	 * ie, we never transition from user 32-bit to kernel 64-bit with
319 	 * a 32-bit magic page around.
320 	 */
321 	if (vcpu->arch.magic_page_pa &&
322 	    !(old_msr & MSR_PR) && !(old_msr & MSR_SF) && (msr & MSR_SF)) {
323 		/* going from RTAS to normal kernel code */
324 		kvmppc_mmu_pte_flush(vcpu, (uint32_t)vcpu->arch.magic_page_pa,
325 				     ~0xFFFUL);
326 	}
327 
328 	/* Preload FPU if it's enabled */
329 	if (kvmppc_get_msr(vcpu) & MSR_FP)
330 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
331 }
332 
333 void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
334 {
335 	u32 host_pvr;
336 
337 	vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
338 	vcpu->arch.pvr = pvr;
339 #ifdef CONFIG_PPC_BOOK3S_64
340 	if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
341 		kvmppc_mmu_book3s_64_init(vcpu);
342 		if (!to_book3s(vcpu)->hior_explicit)
343 			to_book3s(vcpu)->hior = 0xfff00000;
344 		to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
345 		vcpu->arch.cpu_type = KVM_CPU_3S_64;
346 	} else
347 #endif
348 	{
349 		kvmppc_mmu_book3s_32_init(vcpu);
350 		if (!to_book3s(vcpu)->hior_explicit)
351 			to_book3s(vcpu)->hior = 0;
352 		to_book3s(vcpu)->msr_mask = 0xffffffffULL;
353 		vcpu->arch.cpu_type = KVM_CPU_3S_32;
354 	}
355 
356 	kvmppc_sanity_check(vcpu);
357 
358 	/* If we are in hypervisor level on 970, we can tell the CPU to
359 	 * treat DCBZ as 32 bytes store */
360 	vcpu->arch.hflags &= ~BOOK3S_HFLAG_DCBZ32;
361 	if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV) &&
362 	    !strcmp(cur_cpu_spec->platform, "ppc970"))
363 		vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
364 
365 	/* Cell performs badly if MSR_FEx are set. So let's hope nobody
366 	   really needs them in a VM on Cell and force disable them. */
367 	if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
368 		to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
369 
370 	/*
371 	 * If they're asking for POWER6 or later, set the flag
372 	 * indicating that we can do multiple large page sizes
373 	 * and 1TB segments.
374 	 * Also set the flag that indicates that tlbie has the large
375 	 * page bit in the RB operand instead of the instruction.
376 	 */
377 	switch (PVR_VER(pvr)) {
378 	case PVR_POWER6:
379 	case PVR_POWER7:
380 	case PVR_POWER7p:
381 	case PVR_POWER8:
382 		vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
383 			BOOK3S_HFLAG_NEW_TLBIE;
384 		break;
385 	}
386 
387 #ifdef CONFIG_PPC_BOOK3S_32
388 	/* 32 bit Book3S always has 32 byte dcbz */
389 	vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
390 #endif
391 
392 	/* On some CPUs we can execute paired single operations natively */
393 	asm ( "mfpvr %0" : "=r"(host_pvr));
394 	switch (host_pvr) {
395 	case 0x00080200:	/* lonestar 2.0 */
396 	case 0x00088202:	/* lonestar 2.2 */
397 	case 0x70000100:	/* gekko 1.0 */
398 	case 0x00080100:	/* gekko 2.0 */
399 	case 0x00083203:	/* gekko 2.3a */
400 	case 0x00083213:	/* gekko 2.3b */
401 	case 0x00083204:	/* gekko 2.4 */
402 	case 0x00083214:	/* gekko 2.4e (8SE) - retail HW2 */
403 	case 0x00087200:	/* broadway */
404 		vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
405 		/* Enable HID2.PSE - in case we need it later */
406 		mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
407 	}
408 }
409 
410 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
411  * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
412  * emulate 32 bytes dcbz length.
413  *
414  * The Book3s_64 inventors also realized this case and implemented a special bit
415  * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
416  *
417  * My approach here is to patch the dcbz instruction on executing pages.
418  */
419 static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
420 {
421 	struct page *hpage;
422 	u64 hpage_offset;
423 	u32 *page;
424 	int i;
425 
426 	hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
427 	if (is_error_page(hpage))
428 		return;
429 
430 	hpage_offset = pte->raddr & ~PAGE_MASK;
431 	hpage_offset &= ~0xFFFULL;
432 	hpage_offset /= 4;
433 
434 	get_page(hpage);
435 	page = kmap_atomic(hpage);
436 
437 	/* patch dcbz into reserved instruction, so we trap */
438 	for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
439 		if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ)
440 			page[i] &= cpu_to_be32(0xfffffff7);
441 
442 	kunmap_atomic(page);
443 	put_page(hpage);
444 }
445 
446 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
447 {
448 	ulong mp_pa = vcpu->arch.magic_page_pa;
449 
450 	if (!(kvmppc_get_msr(vcpu) & MSR_SF))
451 		mp_pa = (uint32_t)mp_pa;
452 
453 	if (unlikely(mp_pa) &&
454 	    unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
455 		return 1;
456 	}
457 
458 	return kvm_is_visible_gfn(vcpu->kvm, gfn);
459 }
460 
461 int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
462 			    ulong eaddr, int vec)
463 {
464 	bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
465 	bool iswrite = false;
466 	int r = RESUME_GUEST;
467 	int relocated;
468 	int page_found = 0;
469 	struct kvmppc_pte pte;
470 	bool is_mmio = false;
471 	bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
472 	bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
473 	u64 vsid;
474 
475 	relocated = data ? dr : ir;
476 	if (data && (vcpu->arch.fault_dsisr & DSISR_ISSTORE))
477 		iswrite = true;
478 
479 	/* Resolve real address if translation turned on */
480 	if (relocated) {
481 		page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte, data, iswrite);
482 	} else {
483 		pte.may_execute = true;
484 		pte.may_read = true;
485 		pte.may_write = true;
486 		pte.raddr = eaddr & KVM_PAM;
487 		pte.eaddr = eaddr;
488 		pte.vpage = eaddr >> 12;
489 		pte.page_size = MMU_PAGE_64K;
490 	}
491 
492 	switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
493 	case 0:
494 		pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
495 		break;
496 	case MSR_DR:
497 	case MSR_IR:
498 		vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
499 
500 		if ((kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) == MSR_DR)
501 			pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
502 		else
503 			pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
504 		pte.vpage |= vsid;
505 
506 		if (vsid == -1)
507 			page_found = -EINVAL;
508 		break;
509 	}
510 
511 	if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
512 	   (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
513 		/*
514 		 * If we do the dcbz hack, we have to NX on every execution,
515 		 * so we can patch the executing code. This renders our guest
516 		 * NX-less.
517 		 */
518 		pte.may_execute = !data;
519 	}
520 
521 	if (page_found == -ENOENT) {
522 		/* Page not found in guest PTE entries */
523 		u64 ssrr1 = vcpu->arch.shadow_srr1;
524 		u64 msr = kvmppc_get_msr(vcpu);
525 		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
526 		kvmppc_set_dsisr(vcpu, vcpu->arch.fault_dsisr);
527 		kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
528 		kvmppc_book3s_queue_irqprio(vcpu, vec);
529 	} else if (page_found == -EPERM) {
530 		/* Storage protection */
531 		u32 dsisr = vcpu->arch.fault_dsisr;
532 		u64 ssrr1 = vcpu->arch.shadow_srr1;
533 		u64 msr = kvmppc_get_msr(vcpu);
534 		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
535 		dsisr = (dsisr & ~DSISR_NOHPTE) | DSISR_PROTFAULT;
536 		kvmppc_set_dsisr(vcpu, dsisr);
537 		kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
538 		kvmppc_book3s_queue_irqprio(vcpu, vec);
539 	} else if (page_found == -EINVAL) {
540 		/* Page not found in guest SLB */
541 		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
542 		kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
543 	} else if (!is_mmio &&
544 		   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
545 		if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
546 			/*
547 			 * There is already a host HPTE there, presumably
548 			 * a read-only one for a page the guest thinks
549 			 * is writable, so get rid of it first.
550 			 */
551 			kvmppc_mmu_unmap_page(vcpu, &pte);
552 		}
553 		/* The guest's PTE is not mapped yet. Map on the host */
554 		kvmppc_mmu_map_page(vcpu, &pte, iswrite);
555 		if (data)
556 			vcpu->stat.sp_storage++;
557 		else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
558 			 (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32)))
559 			kvmppc_patch_dcbz(vcpu, &pte);
560 	} else {
561 		/* MMIO */
562 		vcpu->stat.mmio_exits++;
563 		vcpu->arch.paddr_accessed = pte.raddr;
564 		vcpu->arch.vaddr_accessed = pte.eaddr;
565 		r = kvmppc_emulate_mmio(run, vcpu);
566 		if ( r == RESUME_HOST_NV )
567 			r = RESUME_HOST;
568 	}
569 
570 	return r;
571 }
572 
573 static inline int get_fpr_index(int i)
574 {
575 	return i * TS_FPRWIDTH;
576 }
577 
578 /* Give up external provider (FPU, Altivec, VSX) */
579 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
580 {
581 	struct thread_struct *t = &current->thread;
582 
583 	/*
584 	 * VSX instructions can access FP and vector registers, so if
585 	 * we are giving up VSX, make sure we give up FP and VMX as well.
586 	 */
587 	if (msr & MSR_VSX)
588 		msr |= MSR_FP | MSR_VEC;
589 
590 	msr &= vcpu->arch.guest_owned_ext;
591 	if (!msr)
592 		return;
593 
594 #ifdef DEBUG_EXT
595 	printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
596 #endif
597 
598 	if (msr & MSR_FP) {
599 		/*
600 		 * Note that on CPUs with VSX, giveup_fpu stores
601 		 * both the traditional FP registers and the added VSX
602 		 * registers into thread.fp_state.fpr[].
603 		 */
604 		if (t->regs->msr & MSR_FP)
605 			giveup_fpu(current);
606 		t->fp_save_area = NULL;
607 	}
608 
609 #ifdef CONFIG_ALTIVEC
610 	if (msr & MSR_VEC) {
611 		if (current->thread.regs->msr & MSR_VEC)
612 			giveup_altivec(current);
613 		t->vr_save_area = NULL;
614 	}
615 #endif
616 
617 	vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX);
618 	kvmppc_recalc_shadow_msr(vcpu);
619 }
620 
621 /* Give up facility (TAR / EBB / DSCR) */
622 static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
623 {
624 #ifdef CONFIG_PPC_BOOK3S_64
625 	if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) {
626 		/* Facility not available to the guest, ignore giveup request*/
627 		return;
628 	}
629 
630 	switch (fac) {
631 	case FSCR_TAR_LG:
632 		vcpu->arch.tar = mfspr(SPRN_TAR);
633 		mtspr(SPRN_TAR, current->thread.tar);
634 		vcpu->arch.shadow_fscr &= ~FSCR_TAR;
635 		break;
636 	}
637 #endif
638 }
639 
640 static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
641 {
642 	ulong srr0 = kvmppc_get_pc(vcpu);
643 	u32 last_inst = kvmppc_get_last_inst(vcpu);
644 	int ret;
645 
646 	ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
647 	if (ret == -ENOENT) {
648 		ulong msr = kvmppc_get_msr(vcpu);
649 
650 		msr = kvmppc_set_field(msr, 33, 33, 1);
651 		msr = kvmppc_set_field(msr, 34, 36, 0);
652 		msr = kvmppc_set_field(msr, 42, 47, 0);
653 		kvmppc_set_msr_fast(vcpu, msr);
654 		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
655 		return EMULATE_AGAIN;
656 	}
657 
658 	return EMULATE_DONE;
659 }
660 
661 static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
662 {
663 
664 	/* Need to do paired single emulation? */
665 	if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
666 		return EMULATE_DONE;
667 
668 	/* Read out the instruction */
669 	if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
670 		/* Need to emulate */
671 		return EMULATE_FAIL;
672 
673 	return EMULATE_AGAIN;
674 }
675 
676 /* Handle external providers (FPU, Altivec, VSX) */
677 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
678 			     ulong msr)
679 {
680 	struct thread_struct *t = &current->thread;
681 
682 	/* When we have paired singles, we emulate in software */
683 	if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
684 		return RESUME_GUEST;
685 
686 	if (!(kvmppc_get_msr(vcpu) & msr)) {
687 		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
688 		return RESUME_GUEST;
689 	}
690 
691 	if (msr == MSR_VSX) {
692 		/* No VSX?  Give an illegal instruction interrupt */
693 #ifdef CONFIG_VSX
694 		if (!cpu_has_feature(CPU_FTR_VSX))
695 #endif
696 		{
697 			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
698 			return RESUME_GUEST;
699 		}
700 
701 		/*
702 		 * We have to load up all the FP and VMX registers before
703 		 * we can let the guest use VSX instructions.
704 		 */
705 		msr = MSR_FP | MSR_VEC | MSR_VSX;
706 	}
707 
708 	/* See if we already own all the ext(s) needed */
709 	msr &= ~vcpu->arch.guest_owned_ext;
710 	if (!msr)
711 		return RESUME_GUEST;
712 
713 #ifdef DEBUG_EXT
714 	printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
715 #endif
716 
717 	if (msr & MSR_FP) {
718 		preempt_disable();
719 		enable_kernel_fp();
720 		load_fp_state(&vcpu->arch.fp);
721 		t->fp_save_area = &vcpu->arch.fp;
722 		preempt_enable();
723 	}
724 
725 	if (msr & MSR_VEC) {
726 #ifdef CONFIG_ALTIVEC
727 		preempt_disable();
728 		enable_kernel_altivec();
729 		load_vr_state(&vcpu->arch.vr);
730 		t->vr_save_area = &vcpu->arch.vr;
731 		preempt_enable();
732 #endif
733 	}
734 
735 	t->regs->msr |= msr;
736 	vcpu->arch.guest_owned_ext |= msr;
737 	kvmppc_recalc_shadow_msr(vcpu);
738 
739 	return RESUME_GUEST;
740 }
741 
742 /*
743  * Kernel code using FP or VMX could have flushed guest state to
744  * the thread_struct; if so, get it back now.
745  */
746 static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
747 {
748 	unsigned long lost_ext;
749 
750 	lost_ext = vcpu->arch.guest_owned_ext & ~current->thread.regs->msr;
751 	if (!lost_ext)
752 		return;
753 
754 	if (lost_ext & MSR_FP) {
755 		preempt_disable();
756 		enable_kernel_fp();
757 		load_fp_state(&vcpu->arch.fp);
758 		preempt_enable();
759 	}
760 #ifdef CONFIG_ALTIVEC
761 	if (lost_ext & MSR_VEC) {
762 		preempt_disable();
763 		enable_kernel_altivec();
764 		load_vr_state(&vcpu->arch.vr);
765 		preempt_enable();
766 	}
767 #endif
768 	current->thread.regs->msr |= lost_ext;
769 }
770 
771 #ifdef CONFIG_PPC_BOOK3S_64
772 
773 static void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
774 {
775 	/* Inject the Interrupt Cause field and trigger a guest interrupt */
776 	vcpu->arch.fscr &= ~(0xffULL << 56);
777 	vcpu->arch.fscr |= (fac << 56);
778 	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
779 }
780 
781 static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac)
782 {
783 	enum emulation_result er = EMULATE_FAIL;
784 
785 	if (!(kvmppc_get_msr(vcpu) & MSR_PR))
786 		er = kvmppc_emulate_instruction(vcpu->run, vcpu);
787 
788 	if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) {
789 		/* Couldn't emulate, trigger interrupt in guest */
790 		kvmppc_trigger_fac_interrupt(vcpu, fac);
791 	}
792 }
793 
794 /* Enable facilities (TAR, EBB, DSCR) for the guest */
795 static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
796 {
797 	bool guest_fac_enabled;
798 	BUG_ON(!cpu_has_feature(CPU_FTR_ARCH_207S));
799 
800 	/*
801 	 * Not every facility is enabled by FSCR bits, check whether the
802 	 * guest has this facility enabled at all.
803 	 */
804 	switch (fac) {
805 	case FSCR_TAR_LG:
806 	case FSCR_EBB_LG:
807 		guest_fac_enabled = (vcpu->arch.fscr & (1ULL << fac));
808 		break;
809 	case FSCR_TM_LG:
810 		guest_fac_enabled = kvmppc_get_msr(vcpu) & MSR_TM;
811 		break;
812 	default:
813 		guest_fac_enabled = false;
814 		break;
815 	}
816 
817 	if (!guest_fac_enabled) {
818 		/* Facility not enabled by the guest */
819 		kvmppc_trigger_fac_interrupt(vcpu, fac);
820 		return RESUME_GUEST;
821 	}
822 
823 	switch (fac) {
824 	case FSCR_TAR_LG:
825 		/* TAR switching isn't lazy in Linux yet */
826 		current->thread.tar = mfspr(SPRN_TAR);
827 		mtspr(SPRN_TAR, vcpu->arch.tar);
828 		vcpu->arch.shadow_fscr |= FSCR_TAR;
829 		break;
830 	default:
831 		kvmppc_emulate_fac(vcpu, fac);
832 		break;
833 	}
834 
835 	return RESUME_GUEST;
836 }
837 #endif
838 
839 int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
840 			  unsigned int exit_nr)
841 {
842 	int r = RESUME_HOST;
843 	int s;
844 
845 	vcpu->stat.sum_exits++;
846 
847 	run->exit_reason = KVM_EXIT_UNKNOWN;
848 	run->ready_for_interrupt_injection = 1;
849 
850 	/* We get here with MSR.EE=1 */
851 
852 	trace_kvm_exit(exit_nr, vcpu);
853 	kvm_guest_exit();
854 
855 	switch (exit_nr) {
856 	case BOOK3S_INTERRUPT_INST_STORAGE:
857 	{
858 		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
859 		vcpu->stat.pf_instruc++;
860 
861 #ifdef CONFIG_PPC_BOOK3S_32
862 		/* We set segments as unused segments when invalidating them. So
863 		 * treat the respective fault as segment fault. */
864 		{
865 			struct kvmppc_book3s_shadow_vcpu *svcpu;
866 			u32 sr;
867 
868 			svcpu = svcpu_get(vcpu);
869 			sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
870 			svcpu_put(svcpu);
871 			if (sr == SR_INVALID) {
872 				kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
873 				r = RESUME_GUEST;
874 				break;
875 			}
876 		}
877 #endif
878 
879 		/* only care about PTEG not found errors, but leave NX alone */
880 		if (shadow_srr1 & 0x40000000) {
881 			int idx = srcu_read_lock(&vcpu->kvm->srcu);
882 			r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
883 			srcu_read_unlock(&vcpu->kvm->srcu, idx);
884 			vcpu->stat.sp_instruc++;
885 		} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
886 			  (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
887 			/*
888 			 * XXX If we do the dcbz hack we use the NX bit to flush&patch the page,
889 			 *     so we can't use the NX bit inside the guest. Let's cross our fingers,
890 			 *     that no guest that needs the dcbz hack does NX.
891 			 */
892 			kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
893 			r = RESUME_GUEST;
894 		} else {
895 			u64 msr = kvmppc_get_msr(vcpu);
896 			msr |= shadow_srr1 & 0x58000000;
897 			kvmppc_set_msr_fast(vcpu, msr);
898 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
899 			r = RESUME_GUEST;
900 		}
901 		break;
902 	}
903 	case BOOK3S_INTERRUPT_DATA_STORAGE:
904 	{
905 		ulong dar = kvmppc_get_fault_dar(vcpu);
906 		u32 fault_dsisr = vcpu->arch.fault_dsisr;
907 		vcpu->stat.pf_storage++;
908 
909 #ifdef CONFIG_PPC_BOOK3S_32
910 		/* We set segments as unused segments when invalidating them. So
911 		 * treat the respective fault as segment fault. */
912 		{
913 			struct kvmppc_book3s_shadow_vcpu *svcpu;
914 			u32 sr;
915 
916 			svcpu = svcpu_get(vcpu);
917 			sr = svcpu->sr[dar >> SID_SHIFT];
918 			svcpu_put(svcpu);
919 			if (sr == SR_INVALID) {
920 				kvmppc_mmu_map_segment(vcpu, dar);
921 				r = RESUME_GUEST;
922 				break;
923 			}
924 		}
925 #endif
926 
927 		/*
928 		 * We need to handle missing shadow PTEs, and
929 		 * protection faults due to us mapping a page read-only
930 		 * when the guest thinks it is writable.
931 		 */
932 		if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
933 			int idx = srcu_read_lock(&vcpu->kvm->srcu);
934 			r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
935 			srcu_read_unlock(&vcpu->kvm->srcu, idx);
936 		} else {
937 			kvmppc_set_dar(vcpu, dar);
938 			kvmppc_set_dsisr(vcpu, fault_dsisr);
939 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
940 			r = RESUME_GUEST;
941 		}
942 		break;
943 	}
944 	case BOOK3S_INTERRUPT_DATA_SEGMENT:
945 		if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
946 			kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
947 			kvmppc_book3s_queue_irqprio(vcpu,
948 				BOOK3S_INTERRUPT_DATA_SEGMENT);
949 		}
950 		r = RESUME_GUEST;
951 		break;
952 	case BOOK3S_INTERRUPT_INST_SEGMENT:
953 		if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
954 			kvmppc_book3s_queue_irqprio(vcpu,
955 				BOOK3S_INTERRUPT_INST_SEGMENT);
956 		}
957 		r = RESUME_GUEST;
958 		break;
959 	/* We're good on these - the host merely wanted to get our attention */
960 	case BOOK3S_INTERRUPT_DECREMENTER:
961 	case BOOK3S_INTERRUPT_HV_DECREMENTER:
962 	case BOOK3S_INTERRUPT_DOORBELL:
963 		vcpu->stat.dec_exits++;
964 		r = RESUME_GUEST;
965 		break;
966 	case BOOK3S_INTERRUPT_EXTERNAL:
967 	case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
968 	case BOOK3S_INTERRUPT_EXTERNAL_HV:
969 		vcpu->stat.ext_intr_exits++;
970 		r = RESUME_GUEST;
971 		break;
972 	case BOOK3S_INTERRUPT_PERFMON:
973 		r = RESUME_GUEST;
974 		break;
975 	case BOOK3S_INTERRUPT_PROGRAM:
976 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
977 	{
978 		enum emulation_result er;
979 		ulong flags;
980 
981 program_interrupt:
982 		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
983 
984 		if (kvmppc_get_msr(vcpu) & MSR_PR) {
985 #ifdef EXIT_DEBUG
986 			printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
987 #endif
988 			if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
989 			    (INS_DCBZ & 0xfffffff7)) {
990 				kvmppc_core_queue_program(vcpu, flags);
991 				r = RESUME_GUEST;
992 				break;
993 			}
994 		}
995 
996 		vcpu->stat.emulated_inst_exits++;
997 		er = kvmppc_emulate_instruction(run, vcpu);
998 		switch (er) {
999 		case EMULATE_DONE:
1000 			r = RESUME_GUEST_NV;
1001 			break;
1002 		case EMULATE_AGAIN:
1003 			r = RESUME_GUEST;
1004 			break;
1005 		case EMULATE_FAIL:
1006 			printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
1007 			       __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
1008 			kvmppc_core_queue_program(vcpu, flags);
1009 			r = RESUME_GUEST;
1010 			break;
1011 		case EMULATE_DO_MMIO:
1012 			run->exit_reason = KVM_EXIT_MMIO;
1013 			r = RESUME_HOST_NV;
1014 			break;
1015 		case EMULATE_EXIT_USER:
1016 			r = RESUME_HOST_NV;
1017 			break;
1018 		default:
1019 			BUG();
1020 		}
1021 		break;
1022 	}
1023 	case BOOK3S_INTERRUPT_SYSCALL:
1024 		if (vcpu->arch.papr_enabled &&
1025 		    (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
1026 		    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
1027 			/* SC 1 papr hypercalls */
1028 			ulong cmd = kvmppc_get_gpr(vcpu, 3);
1029 			int i;
1030 
1031 #ifdef CONFIG_PPC_BOOK3S_64
1032 			if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) {
1033 				r = RESUME_GUEST;
1034 				break;
1035 			}
1036 #endif
1037 
1038 			run->papr_hcall.nr = cmd;
1039 			for (i = 0; i < 9; ++i) {
1040 				ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
1041 				run->papr_hcall.args[i] = gpr;
1042 			}
1043 			run->exit_reason = KVM_EXIT_PAPR_HCALL;
1044 			vcpu->arch.hcall_needed = 1;
1045 			r = RESUME_HOST;
1046 		} else if (vcpu->arch.osi_enabled &&
1047 		    (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
1048 		    (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
1049 			/* MOL hypercalls */
1050 			u64 *gprs = run->osi.gprs;
1051 			int i;
1052 
1053 			run->exit_reason = KVM_EXIT_OSI;
1054 			for (i = 0; i < 32; i++)
1055 				gprs[i] = kvmppc_get_gpr(vcpu, i);
1056 			vcpu->arch.osi_needed = 1;
1057 			r = RESUME_HOST_NV;
1058 		} else if (!(kvmppc_get_msr(vcpu) & MSR_PR) &&
1059 		    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
1060 			/* KVM PV hypercalls */
1061 			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
1062 			r = RESUME_GUEST;
1063 		} else {
1064 			/* Guest syscalls */
1065 			vcpu->stat.syscall_exits++;
1066 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1067 			r = RESUME_GUEST;
1068 		}
1069 		break;
1070 	case BOOK3S_INTERRUPT_FP_UNAVAIL:
1071 	case BOOK3S_INTERRUPT_ALTIVEC:
1072 	case BOOK3S_INTERRUPT_VSX:
1073 	{
1074 		int ext_msr = 0;
1075 
1076 		switch (exit_nr) {
1077 		case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP;  break;
1078 		case BOOK3S_INTERRUPT_ALTIVEC:    ext_msr = MSR_VEC; break;
1079 		case BOOK3S_INTERRUPT_VSX:        ext_msr = MSR_VSX; break;
1080 		}
1081 
1082 		switch (kvmppc_check_ext(vcpu, exit_nr)) {
1083 		case EMULATE_DONE:
1084 			/* everything ok - let's enable the ext */
1085 			r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
1086 			break;
1087 		case EMULATE_FAIL:
1088 			/* we need to emulate this instruction */
1089 			goto program_interrupt;
1090 			break;
1091 		default:
1092 			/* nothing to worry about - go again */
1093 			break;
1094 		}
1095 		break;
1096 	}
1097 	case BOOK3S_INTERRUPT_ALIGNMENT:
1098 		if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
1099 			u32 last_inst = kvmppc_get_last_inst(vcpu);
1100 			u32 dsisr;
1101 			u64 dar;
1102 
1103 			dsisr = kvmppc_alignment_dsisr(vcpu, last_inst);
1104 			dar = kvmppc_alignment_dar(vcpu, last_inst);
1105 
1106 			kvmppc_set_dsisr(vcpu, dsisr);
1107 			kvmppc_set_dar(vcpu, dar);
1108 
1109 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1110 		}
1111 		r = RESUME_GUEST;
1112 		break;
1113 #ifdef CONFIG_PPC_BOOK3S_64
1114 	case BOOK3S_INTERRUPT_FAC_UNAVAIL:
1115 		kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
1116 		r = RESUME_GUEST;
1117 		break;
1118 #endif
1119 	case BOOK3S_INTERRUPT_MACHINE_CHECK:
1120 	case BOOK3S_INTERRUPT_TRACE:
1121 		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1122 		r = RESUME_GUEST;
1123 		break;
1124 	default:
1125 	{
1126 		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
1127 		/* Ugh - bork here! What did we get? */
1128 		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
1129 			exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
1130 		r = RESUME_HOST;
1131 		BUG();
1132 		break;
1133 	}
1134 	}
1135 
1136 	if (!(r & RESUME_HOST)) {
1137 		/* To avoid clobbering exit_reason, only check for signals if
1138 		 * we aren't already exiting to userspace for some other
1139 		 * reason. */
1140 
1141 		/*
1142 		 * Interrupts could be timers for the guest which we have to
1143 		 * inject again, so let's postpone them until we're in the guest
1144 		 * and if we really did time things so badly, then we just exit
1145 		 * again due to a host external interrupt.
1146 		 */
1147 		s = kvmppc_prepare_to_enter(vcpu);
1148 		if (s <= 0)
1149 			r = s;
1150 		else {
1151 			/* interrupts now hard-disabled */
1152 			kvmppc_fix_ee_before_entry();
1153 		}
1154 
1155 		kvmppc_handle_lost_ext(vcpu);
1156 	}
1157 
1158 	trace_kvm_book3s_reenter(r, vcpu);
1159 
1160 	return r;
1161 }
1162 
1163 static int kvm_arch_vcpu_ioctl_get_sregs_pr(struct kvm_vcpu *vcpu,
1164 					    struct kvm_sregs *sregs)
1165 {
1166 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
1167 	int i;
1168 
1169 	sregs->pvr = vcpu->arch.pvr;
1170 
1171 	sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
1172 	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
1173 		for (i = 0; i < 64; i++) {
1174 			sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i;
1175 			sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
1176 		}
1177 	} else {
1178 		for (i = 0; i < 16; i++)
1179 			sregs->u.s.ppc32.sr[i] = kvmppc_get_sr(vcpu, i);
1180 
1181 		for (i = 0; i < 8; i++) {
1182 			sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
1183 			sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
1184 		}
1185 	}
1186 
1187 	return 0;
1188 }
1189 
1190 static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
1191 					    struct kvm_sregs *sregs)
1192 {
1193 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
1194 	int i;
1195 
1196 	kvmppc_set_pvr_pr(vcpu, sregs->pvr);
1197 
1198 	vcpu3s->sdr1 = sregs->u.s.sdr1;
1199 	if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
1200 		for (i = 0; i < 64; i++) {
1201 			vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
1202 						    sregs->u.s.ppc64.slb[i].slbe);
1203 		}
1204 	} else {
1205 		for (i = 0; i < 16; i++) {
1206 			vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
1207 		}
1208 		for (i = 0; i < 8; i++) {
1209 			kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), false,
1210 				       (u32)sregs->u.s.ppc32.ibat[i]);
1211 			kvmppc_set_bat(vcpu, &(vcpu3s->ibat[i]), true,
1212 				       (u32)(sregs->u.s.ppc32.ibat[i] >> 32));
1213 			kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), false,
1214 				       (u32)sregs->u.s.ppc32.dbat[i]);
1215 			kvmppc_set_bat(vcpu, &(vcpu3s->dbat[i]), true,
1216 				       (u32)(sregs->u.s.ppc32.dbat[i] >> 32));
1217 		}
1218 	}
1219 
1220 	/* Flush the MMU after messing with the segments */
1221 	kvmppc_mmu_pte_flush(vcpu, 0, 0);
1222 
1223 	return 0;
1224 }
1225 
1226 static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
1227 				 union kvmppc_one_reg *val)
1228 {
1229 	int r = 0;
1230 
1231 	switch (id) {
1232 	case KVM_REG_PPC_HIOR:
1233 		*val = get_reg_val(id, to_book3s(vcpu)->hior);
1234 		break;
1235 	case KVM_REG_PPC_LPCR:
1236 		/*
1237 		 * We are only interested in the LPCR_ILE bit
1238 		 */
1239 		if (vcpu->arch.intr_msr & MSR_LE)
1240 			*val = get_reg_val(id, LPCR_ILE);
1241 		else
1242 			*val = get_reg_val(id, 0);
1243 		break;
1244 	default:
1245 		r = -EINVAL;
1246 		break;
1247 	}
1248 
1249 	return r;
1250 }
1251 
1252 static void kvmppc_set_lpcr_pr(struct kvm_vcpu *vcpu, u64 new_lpcr)
1253 {
1254 	if (new_lpcr & LPCR_ILE)
1255 		vcpu->arch.intr_msr |= MSR_LE;
1256 	else
1257 		vcpu->arch.intr_msr &= ~MSR_LE;
1258 }
1259 
1260 static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
1261 				 union kvmppc_one_reg *val)
1262 {
1263 	int r = 0;
1264 
1265 	switch (id) {
1266 	case KVM_REG_PPC_HIOR:
1267 		to_book3s(vcpu)->hior = set_reg_val(id, *val);
1268 		to_book3s(vcpu)->hior_explicit = true;
1269 		break;
1270 	case KVM_REG_PPC_LPCR:
1271 		kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
1272 		break;
1273 	default:
1274 		r = -EINVAL;
1275 		break;
1276 	}
1277 
1278 	return r;
1279 }
1280 
1281 static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
1282 						   unsigned int id)
1283 {
1284 	struct kvmppc_vcpu_book3s *vcpu_book3s;
1285 	struct kvm_vcpu *vcpu;
1286 	int err = -ENOMEM;
1287 	unsigned long p;
1288 
1289 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1290 	if (!vcpu)
1291 		goto out;
1292 
1293 	vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
1294 	if (!vcpu_book3s)
1295 		goto free_vcpu;
1296 	vcpu->arch.book3s = vcpu_book3s;
1297 
1298 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
1299 	vcpu->arch.shadow_vcpu =
1300 		kzalloc(sizeof(*vcpu->arch.shadow_vcpu), GFP_KERNEL);
1301 	if (!vcpu->arch.shadow_vcpu)
1302 		goto free_vcpu3s;
1303 #endif
1304 
1305 	err = kvm_vcpu_init(vcpu, kvm, id);
1306 	if (err)
1307 		goto free_shadow_vcpu;
1308 
1309 	err = -ENOMEM;
1310 	p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
1311 	if (!p)
1312 		goto uninit_vcpu;
1313 	/* the real shared page fills the last 4k of our page */
1314 	vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
1315 #ifdef CONFIG_PPC_BOOK3S_64
1316 	/* Always start the shared struct in native endian mode */
1317 #ifdef __BIG_ENDIAN__
1318         vcpu->arch.shared_big_endian = true;
1319 #else
1320         vcpu->arch.shared_big_endian = false;
1321 #endif
1322 
1323 	/*
1324 	 * Default to the same as the host if we're on sufficiently
1325 	 * recent machine that we have 1TB segments;
1326 	 * otherwise default to PPC970FX.
1327 	 */
1328 	vcpu->arch.pvr = 0x3C0301;
1329 	if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1330 		vcpu->arch.pvr = mfspr(SPRN_PVR);
1331 	vcpu->arch.intr_msr = MSR_SF;
1332 #else
1333 	/* default to book3s_32 (750) */
1334 	vcpu->arch.pvr = 0x84202;
1335 #endif
1336 	kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
1337 	vcpu->arch.slb_nr = 64;
1338 
1339 	vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;
1340 
1341 	err = kvmppc_mmu_init(vcpu);
1342 	if (err < 0)
1343 		goto uninit_vcpu;
1344 
1345 	return vcpu;
1346 
1347 uninit_vcpu:
1348 	kvm_vcpu_uninit(vcpu);
1349 free_shadow_vcpu:
1350 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
1351 	kfree(vcpu->arch.shadow_vcpu);
1352 free_vcpu3s:
1353 #endif
1354 	vfree(vcpu_book3s);
1355 free_vcpu:
1356 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1357 out:
1358 	return ERR_PTR(err);
1359 }
1360 
1361 static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
1362 {
1363 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
1364 
1365 	free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
1366 	kvm_vcpu_uninit(vcpu);
1367 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
1368 	kfree(vcpu->arch.shadow_vcpu);
1369 #endif
1370 	vfree(vcpu_book3s);
1371 	kmem_cache_free(kvm_vcpu_cache, vcpu);
1372 }
1373 
1374 static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1375 {
1376 	int ret;
1377 #ifdef CONFIG_ALTIVEC
1378 	unsigned long uninitialized_var(vrsave);
1379 #endif
1380 
1381 	/* Check if we can run the vcpu at all */
1382 	if (!vcpu->arch.sane) {
1383 		kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1384 		ret = -EINVAL;
1385 		goto out;
1386 	}
1387 
1388 	/*
1389 	 * Interrupts could be timers for the guest which we have to inject
1390 	 * again, so let's postpone them until we're in the guest and if we
1391 	 * really did time things so badly, then we just exit again due to
1392 	 * a host external interrupt.
1393 	 */
1394 	ret = kvmppc_prepare_to_enter(vcpu);
1395 	if (ret <= 0)
1396 		goto out;
1397 	/* interrupts now hard-disabled */
1398 
1399 	/* Save FPU state in thread_struct */
1400 	if (current->thread.regs->msr & MSR_FP)
1401 		giveup_fpu(current);
1402 
1403 #ifdef CONFIG_ALTIVEC
1404 	/* Save Altivec state in thread_struct */
1405 	if (current->thread.regs->msr & MSR_VEC)
1406 		giveup_altivec(current);
1407 #endif
1408 
1409 #ifdef CONFIG_VSX
1410 	/* Save VSX state in thread_struct */
1411 	if (current->thread.regs->msr & MSR_VSX)
1412 		__giveup_vsx(current);
1413 #endif
1414 
1415 	/* Preload FPU if it's enabled */
1416 	if (kvmppc_get_msr(vcpu) & MSR_FP)
1417 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
1418 
1419 	kvmppc_fix_ee_before_entry();
1420 
1421 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
1422 
1423 	/* No need for kvm_guest_exit. It's done in handle_exit.
1424 	   We also get here with interrupts enabled. */
1425 
1426 	/* Make sure we save the guest FPU/Altivec/VSX state */
1427 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
1428 
1429 	/* Make sure we save the guest TAR/EBB/DSCR state */
1430 	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
1431 
1432 out:
1433 	vcpu->mode = OUTSIDE_GUEST_MODE;
1434 	return ret;
1435 }
1436 
1437 /*
1438  * Get (and clear) the dirty memory log for a memory slot.
1439  */
1440 static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
1441 					 struct kvm_dirty_log *log)
1442 {
1443 	struct kvm_memory_slot *memslot;
1444 	struct kvm_vcpu *vcpu;
1445 	ulong ga, ga_end;
1446 	int is_dirty = 0;
1447 	int r;
1448 	unsigned long n;
1449 
1450 	mutex_lock(&kvm->slots_lock);
1451 
1452 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
1453 	if (r)
1454 		goto out;
1455 
1456 	/* If nothing is dirty, don't bother messing with page tables. */
1457 	if (is_dirty) {
1458 		memslot = id_to_memslot(kvm->memslots, log->slot);
1459 
1460 		ga = memslot->base_gfn << PAGE_SHIFT;
1461 		ga_end = ga + (memslot->npages << PAGE_SHIFT);
1462 
1463 		kvm_for_each_vcpu(n, vcpu, kvm)
1464 			kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
1465 
1466 		n = kvm_dirty_bitmap_bytes(memslot);
1467 		memset(memslot->dirty_bitmap, 0, n);
1468 	}
1469 
1470 	r = 0;
1471 out:
1472 	mutex_unlock(&kvm->slots_lock);
1473 	return r;
1474 }
1475 
1476 static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
1477 					 struct kvm_memory_slot *memslot)
1478 {
1479 	return;
1480 }
1481 
1482 static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
1483 					struct kvm_memory_slot *memslot,
1484 					struct kvm_userspace_memory_region *mem)
1485 {
1486 	return 0;
1487 }
1488 
1489 static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
1490 				struct kvm_userspace_memory_region *mem,
1491 				const struct kvm_memory_slot *old)
1492 {
1493 	return;
1494 }
1495 
1496 static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free,
1497 					struct kvm_memory_slot *dont)
1498 {
1499 	return;
1500 }
1501 
1502 static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot,
1503 					 unsigned long npages)
1504 {
1505 	return 0;
1506 }
1507 
1508 
1509 #ifdef CONFIG_PPC64
1510 static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
1511 					 struct kvm_ppc_smmu_info *info)
1512 {
1513 	long int i;
1514 	struct kvm_vcpu *vcpu;
1515 
1516 	info->flags = 0;
1517 
1518 	/* SLB is always 64 entries */
1519 	info->slb_size = 64;
1520 
1521 	/* Standard 4k base page size segment */
1522 	info->sps[0].page_shift = 12;
1523 	info->sps[0].slb_enc = 0;
1524 	info->sps[0].enc[0].page_shift = 12;
1525 	info->sps[0].enc[0].pte_enc = 0;
1526 
1527 	/*
1528 	 * 64k large page size.
1529 	 * We only want to put this in if the CPUs we're emulating
1530 	 * support it, but unfortunately we don't have a vcpu easily
1531 	 * to hand here to test.  Just pick the first vcpu, and if
1532 	 * that doesn't exist yet, report the minimum capability,
1533 	 * i.e., no 64k pages.
1534 	 * 1T segment support goes along with 64k pages.
1535 	 */
1536 	i = 1;
1537 	vcpu = kvm_get_vcpu(kvm, 0);
1538 	if (vcpu && (vcpu->arch.hflags & BOOK3S_HFLAG_MULTI_PGSIZE)) {
1539 		info->flags = KVM_PPC_1T_SEGMENTS;
1540 		info->sps[i].page_shift = 16;
1541 		info->sps[i].slb_enc = SLB_VSID_L | SLB_VSID_LP_01;
1542 		info->sps[i].enc[0].page_shift = 16;
1543 		info->sps[i].enc[0].pte_enc = 1;
1544 		++i;
1545 	}
1546 
1547 	/* Standard 16M large page size segment */
1548 	info->sps[i].page_shift = 24;
1549 	info->sps[i].slb_enc = SLB_VSID_L;
1550 	info->sps[i].enc[0].page_shift = 24;
1551 	info->sps[i].enc[0].pte_enc = 0;
1552 
1553 	return 0;
1554 }
1555 #else
1556 static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
1557 					 struct kvm_ppc_smmu_info *info)
1558 {
1559 	/* We should not get called */
1560 	BUG();
1561 }
1562 #endif /* CONFIG_PPC64 */
1563 
1564 static unsigned int kvm_global_user_count = 0;
1565 static DEFINE_SPINLOCK(kvm_global_user_count_lock);
1566 
1567 static int kvmppc_core_init_vm_pr(struct kvm *kvm)
1568 {
1569 	mutex_init(&kvm->arch.hpt_mutex);
1570 
1571 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
1572 		spin_lock(&kvm_global_user_count_lock);
1573 		if (++kvm_global_user_count == 1)
1574 			pSeries_disable_reloc_on_exc();
1575 		spin_unlock(&kvm_global_user_count_lock);
1576 	}
1577 	return 0;
1578 }
1579 
1580 static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
1581 {
1582 #ifdef CONFIG_PPC64
1583 	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
1584 #endif
1585 
1586 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
1587 		spin_lock(&kvm_global_user_count_lock);
1588 		BUG_ON(kvm_global_user_count == 0);
1589 		if (--kvm_global_user_count == 0)
1590 			pSeries_enable_reloc_on_exc();
1591 		spin_unlock(&kvm_global_user_count_lock);
1592 	}
1593 }
1594 
1595 static int kvmppc_core_check_processor_compat_pr(void)
1596 {
1597 	/* we are always compatible */
1598 	return 0;
1599 }
1600 
1601 static long kvm_arch_vm_ioctl_pr(struct file *filp,
1602 				 unsigned int ioctl, unsigned long arg)
1603 {
1604 	return -ENOTTY;
1605 }
1606 
1607 static struct kvmppc_ops kvm_ops_pr = {
1608 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr,
1609 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr,
1610 	.get_one_reg = kvmppc_get_one_reg_pr,
1611 	.set_one_reg = kvmppc_set_one_reg_pr,
1612 	.vcpu_load   = kvmppc_core_vcpu_load_pr,
1613 	.vcpu_put    = kvmppc_core_vcpu_put_pr,
1614 	.set_msr     = kvmppc_set_msr_pr,
1615 	.vcpu_run    = kvmppc_vcpu_run_pr,
1616 	.vcpu_create = kvmppc_core_vcpu_create_pr,
1617 	.vcpu_free   = kvmppc_core_vcpu_free_pr,
1618 	.check_requests = kvmppc_core_check_requests_pr,
1619 	.get_dirty_log = kvm_vm_ioctl_get_dirty_log_pr,
1620 	.flush_memslot = kvmppc_core_flush_memslot_pr,
1621 	.prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
1622 	.commit_memory_region = kvmppc_core_commit_memory_region_pr,
1623 	.unmap_hva = kvm_unmap_hva_pr,
1624 	.unmap_hva_range = kvm_unmap_hva_range_pr,
1625 	.age_hva  = kvm_age_hva_pr,
1626 	.test_age_hva = kvm_test_age_hva_pr,
1627 	.set_spte_hva = kvm_set_spte_hva_pr,
1628 	.mmu_destroy  = kvmppc_mmu_destroy_pr,
1629 	.free_memslot = kvmppc_core_free_memslot_pr,
1630 	.create_memslot = kvmppc_core_create_memslot_pr,
1631 	.init_vm = kvmppc_core_init_vm_pr,
1632 	.destroy_vm = kvmppc_core_destroy_vm_pr,
1633 	.get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
1634 	.emulate_op = kvmppc_core_emulate_op_pr,
1635 	.emulate_mtspr = kvmppc_core_emulate_mtspr_pr,
1636 	.emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
1637 	.fast_vcpu_kick = kvm_vcpu_kick,
1638 	.arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
1639 };
1640 
1641 
1642 int kvmppc_book3s_init_pr(void)
1643 {
1644 	int r;
1645 
1646 	r = kvmppc_core_check_processor_compat_pr();
1647 	if (r < 0)
1648 		return r;
1649 
1650 	kvm_ops_pr.owner = THIS_MODULE;
1651 	kvmppc_pr_ops = &kvm_ops_pr;
1652 
1653 	r = kvmppc_mmu_hpte_sysinit();
1654 	return r;
1655 }
1656 
1657 void kvmppc_book3s_exit_pr(void)
1658 {
1659 	kvmppc_pr_ops = NULL;
1660 	kvmppc_mmu_hpte_sysexit();
1661 }
1662 
1663 /*
1664  * We only support separate modules for book3s 64
1665  */
1666 #ifdef CONFIG_PPC_BOOK3S_64
1667 
1668 module_init(kvmppc_book3s_init_pr);
1669 module_exit(kvmppc_book3s_exit_pr);
1670 
1671 MODULE_LICENSE("GPL");
1672 MODULE_ALIAS_MISCDEV(KVM_MINOR);
1673 MODULE_ALIAS("devname:kvm");
1674 #endif
1675