1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9 * GNU General Public License for more details.
10 *
11 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
12 *
13 * Derived from book3s_rmhandlers.S and other files, which are:
14 *
15 * Copyright SUSE Linux Products GmbH 2009
16 *
17 * Authors: Alexander Graf <agraf@suse.de>
18 */
19
20#include <asm/ppc_asm.h>
21#include <asm/kvm_asm.h>
22#include <asm/reg.h>
23#include <asm/mmu.h>
24#include <asm/page.h>
25#include <asm/ptrace.h>
26#include <asm/hvcall.h>
27#include <asm/asm-offsets.h>
28#include <asm/exception-64s.h>
29#include <asm/kvm_book3s_asm.h>
30#include <asm/mmu-hash64.h>
31#include <asm/tm.h>
32
33#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
34
35#ifdef __LITTLE_ENDIAN__
36#error Need to fix lppaca and SLB shadow accesses in little endian mode
37#endif
38
39/* Values in HSTATE_NAPPING(r13) */
40#define NAPPING_CEDE	1
41#define NAPPING_NOVCPU	2
42
43/*
44 * Call kvmppc_hv_entry in real mode.
45 * Must be called with interrupts hard-disabled.
46 *
47 * Input Registers:
48 *
49 * LR = return address to continue at after eventually re-enabling MMU
50 */
51_GLOBAL(kvmppc_hv_entry_trampoline)
52	mflr	r0
53	std	r0, PPC_LR_STKOFF(r1)
54	stdu	r1, -112(r1)
55	mfmsr	r10
56	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
57	li	r0,MSR_RI
58	andc	r0,r10,r0
59	li	r6,MSR_IR | MSR_DR
60	andc	r6,r10,r6
61	mtmsrd	r0,1		/* clear RI in MSR */
62	mtsrr0	r5
63	mtsrr1	r6
64	RFI
65
66kvmppc_call_hv_entry:
67	ld	r4, HSTATE_KVM_VCPU(r13)
68	bl	kvmppc_hv_entry
69
70	/* Back from guest - restore host state and return to caller */
71
72BEGIN_FTR_SECTION
73	/* Restore host DABR and DABRX */
74	ld	r5,HSTATE_DABR(r13)
75	li	r6,7
76	mtspr	SPRN_DABR,r5
77	mtspr	SPRN_DABRX,r6
78END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
79
80	/* Restore SPRG3 */
81	ld	r3,PACA_SPRG_VDSO(r13)
82	mtspr	SPRN_SPRG_VDSO_WRITE,r3
83
84	/* Reload the host's PMU registers */
85	ld	r3, PACALPPACAPTR(r13)	/* is the host using the PMU? */
86	lbz	r4, LPPACA_PMCINUSE(r3)
87	cmpwi	r4, 0
88	beq	23f			/* skip if not */
89BEGIN_FTR_SECTION
90	ld	r3, HSTATE_MMCR(r13)
91	andi.	r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
92	cmpwi	r4, MMCR0_PMAO
93	beql	kvmppc_fix_pmao
94END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
95	lwz	r3, HSTATE_PMC(r13)
96	lwz	r4, HSTATE_PMC + 4(r13)
97	lwz	r5, HSTATE_PMC + 8(r13)
98	lwz	r6, HSTATE_PMC + 12(r13)
99	lwz	r8, HSTATE_PMC + 16(r13)
100	lwz	r9, HSTATE_PMC + 20(r13)
101BEGIN_FTR_SECTION
102	lwz	r10, HSTATE_PMC + 24(r13)
103	lwz	r11, HSTATE_PMC + 28(r13)
104END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
105	mtspr	SPRN_PMC1, r3
106	mtspr	SPRN_PMC2, r4
107	mtspr	SPRN_PMC3, r5
108	mtspr	SPRN_PMC4, r6
109	mtspr	SPRN_PMC5, r8
110	mtspr	SPRN_PMC6, r9
111BEGIN_FTR_SECTION
112	mtspr	SPRN_PMC7, r10
113	mtspr	SPRN_PMC8, r11
114END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
115	ld	r3, HSTATE_MMCR(r13)
116	ld	r4, HSTATE_MMCR + 8(r13)
117	ld	r5, HSTATE_MMCR + 16(r13)
118	ld	r6, HSTATE_MMCR + 24(r13)
119	ld	r7, HSTATE_MMCR + 32(r13)
120	mtspr	SPRN_MMCR1, r4
121	mtspr	SPRN_MMCRA, r5
122	mtspr	SPRN_SIAR, r6
123	mtspr	SPRN_SDAR, r7
124BEGIN_FTR_SECTION
125	ld	r8, HSTATE_MMCR + 40(r13)
126	ld	r9, HSTATE_MMCR + 48(r13)
127	mtspr	SPRN_MMCR2, r8
128	mtspr	SPRN_SIER, r9
129END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
130	mtspr	SPRN_MMCR0, r3
131	isync
13223:
133
134	/*
135	 * Reload DEC.  HDEC interrupts were disabled when
136	 * we reloaded the host's LPCR value.
137	 */
138	ld	r3, HSTATE_DECEXP(r13)
139	mftb	r4
140	subf	r4, r4, r3
141	mtspr	SPRN_DEC, r4
142
143	/*
144	 * For external and machine check interrupts, we need
145	 * to call the Linux handler to process the interrupt.
146	 * We do that by jumping to absolute address 0x500 for
147	 * external interrupts, or the machine_check_fwnmi label
148	 * for machine checks (since firmware might have patched
149	 * the vector area at 0x200).  The [h]rfid at the end of the
150	 * handler will return to the book3s_hv_interrupts.S code.
151	 * For other interrupts we do the rfid to get back
152	 * to the book3s_hv_interrupts.S code here.
153	 */
154	ld	r8, 112+PPC_LR_STKOFF(r1)
155	addi	r1, r1, 112
156	ld	r7, HSTATE_HOST_MSR(r13)
157
158	cmpwi	cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
159	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
160BEGIN_FTR_SECTION
161	beq	11f
162END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
163
164	/* RFI into the highmem handler, or branch to interrupt handler */
165	mfmsr	r6
166	li	r0, MSR_RI
167	andc	r6, r6, r0
168	mtmsrd	r6, 1			/* Clear RI in MSR */
169	mtsrr0	r8
170	mtsrr1	r7
171	beqa	0x500			/* external interrupt (PPC970) */
172	beq	cr1, 13f		/* machine check */
173	RFI
174
175	/* On POWER7, we have external interrupts set to use HSRR0/1 */
17611:	mtspr	SPRN_HSRR0, r8
177	mtspr	SPRN_HSRR1, r7
178	ba	0x500
179
18013:	b	machine_check_fwnmi
181
182kvmppc_primary_no_guest:
183	/* We handle this much like a ceded vcpu */
184	/* set our bit in napping_threads */
185	ld	r5, HSTATE_KVM_VCORE(r13)
186	lbz	r7, HSTATE_PTID(r13)
187	li	r0, 1
188	sld	r0, r0, r7
189	addi	r6, r5, VCORE_NAPPING_THREADS
1901:	lwarx	r3, 0, r6
191	or	r3, r3, r0
192	stwcx.	r3, 0, r6
193	bne	1b
194	/* order napping_threads update vs testing entry_exit_count */
195	isync
196	li	r12, 0
197	lwz	r7, VCORE_ENTRY_EXIT(r5)
198	cmpwi	r7, 0x100
199	bge	kvm_novcpu_exit	/* another thread already exiting */
200	li	r3, NAPPING_NOVCPU
201	stb	r3, HSTATE_NAPPING(r13)
202	li	r3, 1
203	stb	r3, HSTATE_HWTHREAD_REQ(r13)
204
205	b	kvm_do_nap
206
207kvm_novcpu_wakeup:
208	ld	r1, HSTATE_HOST_R1(r13)
209	ld	r5, HSTATE_KVM_VCORE(r13)
210	li	r0, 0
211	stb	r0, HSTATE_NAPPING(r13)
212	stb	r0, HSTATE_HWTHREAD_REQ(r13)
213
214	/* check the wake reason */
215	bl	kvmppc_check_wake_reason
216
217	/* see if any other thread is already exiting */
218	lwz	r0, VCORE_ENTRY_EXIT(r5)
219	cmpwi	r0, 0x100
220	bge	kvm_novcpu_exit
221
222	/* clear our bit in napping_threads */
223	lbz	r7, HSTATE_PTID(r13)
224	li	r0, 1
225	sld	r0, r0, r7
226	addi	r6, r5, VCORE_NAPPING_THREADS
2274:	lwarx	r7, 0, r6
228	andc	r7, r7, r0
229	stwcx.	r7, 0, r6
230	bne	4b
231
232	/* See if the wake reason means we need to exit */
233	cmpdi	r3, 0
234	bge	kvm_novcpu_exit
235
236	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
237	ld	r4, HSTATE_KVM_VCPU(r13)
238	cmpdi	r4, 0
239	bne	kvmppc_got_guest
240
241kvm_novcpu_exit:
242	b	hdec_soon
243
244/*
245 * We come in here when wakened from nap mode.
246 * Relocation is off and most register values are lost.
247 * r13 points to the PACA.
248 */
249	.globl	kvm_start_guest
250kvm_start_guest:
251
252	/* Set runlatch bit the minute you wake up from nap */
253	mfspr	r1, SPRN_CTRLF
254	ori 	r1, r1, 1
255	mtspr	SPRN_CTRLT, r1
256
257	ld	r2,PACATOC(r13)
258
259	li	r0,KVM_HWTHREAD_IN_KVM
260	stb	r0,HSTATE_HWTHREAD_STATE(r13)
261
262	/* NV GPR values from power7_idle() will no longer be valid */
263	li	r0,1
264	stb	r0,PACA_NAPSTATELOST(r13)
265
266	/* were we napping due to cede? */
267	lbz	r0,HSTATE_NAPPING(r13)
268	cmpwi	r0,NAPPING_CEDE
269	beq	kvm_end_cede
270	cmpwi	r0,NAPPING_NOVCPU
271	beq	kvm_novcpu_wakeup
272
273	ld	r1,PACAEMERGSP(r13)
274	subi	r1,r1,STACK_FRAME_OVERHEAD
275
276	/*
277	 * We weren't napping due to cede, so this must be a secondary
278	 * thread being woken up to run a guest, or being woken up due
279	 * to a stray IPI.  (Or due to some machine check or hypervisor
280	 * maintenance interrupt while the core is in KVM.)
281	 */
282
283	/* Check the wake reason in SRR1 to see why we got here */
284	bl	kvmppc_check_wake_reason
285	cmpdi	r3, 0
286	bge	kvm_no_guest
287
288	/* get vcpu pointer, NULL if we have no vcpu to run */
289	ld	r4,HSTATE_KVM_VCPU(r13)
290	cmpdi	r4,0
291	/* if we have no vcpu to run, go back to sleep */
292	beq	kvm_no_guest
293
294	/* Set HSTATE_DSCR(r13) to something sensible */
295	ld	r6, PACA_DSCR(r13)
296	std	r6, HSTATE_DSCR(r13)
297
298	bl	kvmppc_hv_entry
299
300	/* Back from the guest, go back to nap */
301	/* Clear our vcpu pointer so we don't come back in early */
302	li	r0, 0
303	std	r0, HSTATE_KVM_VCPU(r13)
304	/*
305	 * Make sure we clear HSTATE_KVM_VCPU(r13) before incrementing
306	 * the nap_count, because once the increment to nap_count is
307	 * visible we could be given another vcpu.
308	 */
309	lwsync
310
311	/* increment the nap count and then go to nap mode */
312	ld	r4, HSTATE_KVM_VCORE(r13)
313	addi	r4, r4, VCORE_NAP_COUNT
31451:	lwarx	r3, 0, r4
315	addi	r3, r3, 1
316	stwcx.	r3, 0, r4
317	bne	51b
318
319kvm_no_guest:
320	li	r0, KVM_HWTHREAD_IN_NAP
321	stb	r0, HSTATE_HWTHREAD_STATE(r13)
322kvm_do_nap:
323	/* Clear the runlatch bit before napping */
324	mfspr	r2, SPRN_CTRLF
325	clrrdi	r2, r2, 1
326	mtspr	SPRN_CTRLT, r2
327
328	li	r3, LPCR_PECE0
329	mfspr	r4, SPRN_LPCR
330	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
331	mtspr	SPRN_LPCR, r4
332	isync
333	std	r0, HSTATE_SCRATCH0(r13)
334	ptesync
335	ld	r0, HSTATE_SCRATCH0(r13)
3361:	cmpd	r0, r0
337	bne	1b
338	nap
339	b	.
340
341/******************************************************************************
342 *                                                                            *
343 *                               Entry code                                   *
344 *                                                                            *
345 *****************************************************************************/
346
347.global kvmppc_hv_entry
348kvmppc_hv_entry:
349
350	/* Required state:
351	 *
352	 * R4 = vcpu pointer (or NULL)
353	 * MSR = ~IR|DR
354	 * R13 = PACA
355	 * R1 = host R1
356	 * all other volatile GPRS = free
357	 */
358	mflr	r0
359	std	r0, PPC_LR_STKOFF(r1)
360	stdu	r1, -112(r1)
361
362	/* Save R1 in the PACA */
363	std	r1, HSTATE_HOST_R1(r13)
364
365	li	r6, KVM_GUEST_MODE_HOST_HV
366	stb	r6, HSTATE_IN_GUEST(r13)
367
368	/* Clear out SLB */
369	li	r6,0
370	slbmte	r6,r6
371	slbia
372	ptesync
373
374BEGIN_FTR_SECTION
375	b	30f
376END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
377	/*
378	 * POWER7 host -> guest partition switch code.
379	 * We don't have to lock against concurrent tlbies,
380	 * but we do have to coordinate across hardware threads.
381	 */
382	/* Increment entry count iff exit count is zero. */
383	ld	r5,HSTATE_KVM_VCORE(r13)
384	addi	r9,r5,VCORE_ENTRY_EXIT
38521:	lwarx	r3,0,r9
386	cmpwi	r3,0x100		/* any threads starting to exit? */
387	bge	secondary_too_late	/* if so we're too late to the party */
388	addi	r3,r3,1
389	stwcx.	r3,0,r9
390	bne	21b
391
392	/* Primary thread switches to guest partition. */
393	ld	r9,VCORE_KVM(r5)	/* pointer to struct kvm */
394	lbz	r6,HSTATE_PTID(r13)
395	cmpwi	r6,0
396	bne	20f
397	ld	r6,KVM_SDR1(r9)
398	lwz	r7,KVM_LPID(r9)
399	li	r0,LPID_RSVD		/* switch to reserved LPID */
400	mtspr	SPRN_LPID,r0
401	ptesync
402	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
403	mtspr	SPRN_LPID,r7
404	isync
405
406	/* See if we need to flush the TLB */
407	lhz	r6,PACAPACAINDEX(r13)	/* test_bit(cpu, need_tlb_flush) */
408	clrldi	r7,r6,64-6		/* extract bit number (6 bits) */
409	srdi	r6,r6,6			/* doubleword number */
410	sldi	r6,r6,3			/* address offset */
411	add	r6,r6,r9
412	addi	r6,r6,KVM_NEED_FLUSH	/* dword in kvm->arch.need_tlb_flush */
413	li	r0,1
414	sld	r0,r0,r7
415	ld	r7,0(r6)
416	and.	r7,r7,r0
417	beq	22f
41823:	ldarx	r7,0,r6			/* if set, clear the bit */
419	andc	r7,r7,r0
420	stdcx.	r7,0,r6
421	bne	23b
422	/* Flush the TLB of any entries for this LPID */
423	/* use arch 2.07S as a proxy for POWER8 */
424BEGIN_FTR_SECTION
425	li	r6,512			/* POWER8 has 512 sets */
426FTR_SECTION_ELSE
427	li	r6,128			/* POWER7 has 128 sets */
428ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
429	mtctr	r6
430	li	r7,0x800		/* IS field = 0b10 */
431	ptesync
43228:	tlbiel	r7
433	addi	r7,r7,0x1000
434	bdnz	28b
435	ptesync
436
437	/* Add timebase offset onto timebase */
43822:	ld	r8,VCORE_TB_OFFSET(r5)
439	cmpdi	r8,0
440	beq	37f
441	mftb	r6		/* current host timebase */
442	add	r8,r8,r6
443	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */
444	mftb	r7		/* check if lower 24 bits overflowed */
445	clrldi	r6,r6,40
446	clrldi	r7,r7,40
447	cmpld	r7,r6
448	bge	37f
449	addis	r8,r8,0x100	/* if so, increment upper 40 bits */
450	mtspr	SPRN_TBU40,r8
451
452	/* Load guest PCR value to select appropriate compat mode */
45337:	ld	r7, VCORE_PCR(r5)
454	cmpdi	r7, 0
455	beq	38f
456	mtspr	SPRN_PCR, r7
45738:
458
459BEGIN_FTR_SECTION
460	/* DPDES is shared between threads */
461	ld	r8, VCORE_DPDES(r5)
462	mtspr	SPRN_DPDES, r8
463END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
464
465	li	r0,1
466	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
467	b	10f
468
469	/* Secondary threads wait for primary to have done partition switch */
47020:	lbz	r0,VCORE_IN_GUEST(r5)
471	cmpwi	r0,0
472	beq	20b
473
474	/* Set LPCR and RMOR. */
47510:	ld	r8,VCORE_LPCR(r5)
476	mtspr	SPRN_LPCR,r8
477	ld	r8,KVM_RMOR(r9)
478	mtspr	SPRN_RMOR,r8
479	isync
480
481	/* Check if HDEC expires soon */
482	mfspr	r3,SPRN_HDEC
483	cmpwi	r3,512		/* 1 microsecond */
484	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
485	blt	hdec_soon
486	b	31f
487
488	/*
489	 * PPC970 host -> guest partition switch code.
490	 * We have to lock against concurrent tlbies,
491	 * using native_tlbie_lock to lock against host tlbies
492	 * and kvm->arch.tlbie_lock to lock against guest tlbies.
493	 * We also have to invalidate the TLB since its
494	 * entries aren't tagged with the LPID.
495	 */
49630:	ld	r5,HSTATE_KVM_VCORE(r13)
497	ld	r9,VCORE_KVM(r5)	/* pointer to struct kvm */
498
499	/* first take native_tlbie_lock */
500	.section ".toc","aw"
501toc_tlbie_lock:
502	.tc	native_tlbie_lock[TC],native_tlbie_lock
503	.previous
504	ld	r3,toc_tlbie_lock@toc(2)
505#ifdef __BIG_ENDIAN__
506	lwz	r8,PACA_LOCK_TOKEN(r13)
507#else
508	lwz	r8,PACAPACAINDEX(r13)
509#endif
51024:	lwarx	r0,0,r3
511	cmpwi	r0,0
512	bne	24b
513	stwcx.	r8,0,r3
514	bne	24b
515	isync
516
517	ld	r5,HSTATE_KVM_VCORE(r13)
518	ld	r7,VCORE_LPCR(r5)	/* use vcore->lpcr to store HID4 */
519	li	r0,0x18f
520	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */
521	or	r0,r7,r0
522	ptesync
523	sync
524	mtspr	SPRN_HID4,r0		/* switch to reserved LPID */
525	isync
526	li	r0,0
527	stw	r0,0(r3)		/* drop native_tlbie_lock */
528
529	/* invalidate the whole TLB */
530	li	r0,256
531	mtctr	r0
532	li	r6,0
53325:	tlbiel	r6
534	addi	r6,r6,0x1000
535	bdnz	25b
536	ptesync
537
538	/* Take the guest's tlbie_lock */
539	addi	r3,r9,KVM_TLBIE_LOCK
54024:	lwarx	r0,0,r3
541	cmpwi	r0,0
542	bne	24b
543	stwcx.	r8,0,r3
544	bne	24b
545	isync
546	ld	r6,KVM_SDR1(r9)
547	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
548
549	/* Set up HID4 with the guest's LPID etc. */
550	sync
551	mtspr	SPRN_HID4,r7
552	isync
553
554	/* drop the guest's tlbie_lock */
555	li	r0,0
556	stw	r0,0(r3)
557
558	/* Check if HDEC expires soon */
559	mfspr	r3,SPRN_HDEC
560	cmpwi	r3,10
561	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
562	blt	hdec_soon
563
564	/* Enable HDEC interrupts */
565	mfspr	r0,SPRN_HID0
566	li	r3,1
567	rldimi	r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
568	sync
569	mtspr	SPRN_HID0,r0
570	mfspr	r0,SPRN_HID0
571	mfspr	r0,SPRN_HID0
572	mfspr	r0,SPRN_HID0
573	mfspr	r0,SPRN_HID0
574	mfspr	r0,SPRN_HID0
575	mfspr	r0,SPRN_HID0
57631:
577	/* Do we have a guest vcpu to run? */
578	cmpdi	r4, 0
579	beq	kvmppc_primary_no_guest
580kvmppc_got_guest:
581
582	/* Load up guest SLB entries */
583	lwz	r5,VCPU_SLB_MAX(r4)
584	cmpwi	r5,0
585	beq	9f
586	mtctr	r5
587	addi	r6,r4,VCPU_SLB
5881:	ld	r8,VCPU_SLB_E(r6)
589	ld	r9,VCPU_SLB_V(r6)
590	slbmte	r9,r8
591	addi	r6,r6,VCPU_SLB_SIZE
592	bdnz	1b
5939:
594	/* Increment yield count if they have a VPA */
595	ld	r3, VCPU_VPA(r4)
596	cmpdi	r3, 0
597	beq	25f
598	lwz	r5, LPPACA_YIELDCOUNT(r3)
599	addi	r5, r5, 1
600	stw	r5, LPPACA_YIELDCOUNT(r3)
601	li	r6, 1
602	stb	r6, VCPU_VPA_DIRTY(r4)
60325:
604
605BEGIN_FTR_SECTION
606	/* Save purr/spurr */
607	mfspr	r5,SPRN_PURR
608	mfspr	r6,SPRN_SPURR
609	std	r5,HSTATE_PURR(r13)
610	std	r6,HSTATE_SPURR(r13)
611	ld	r7,VCPU_PURR(r4)
612	ld	r8,VCPU_SPURR(r4)
613	mtspr	SPRN_PURR,r7
614	mtspr	SPRN_SPURR,r8
615END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
616
617BEGIN_FTR_SECTION
618	/* Set partition DABR */
619	/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
620	lwz	r5,VCPU_DABRX(r4)
621	ld	r6,VCPU_DABR(r4)
622	mtspr	SPRN_DABRX,r5
623	mtspr	SPRN_DABR,r6
624 BEGIN_FTR_SECTION_NESTED(89)
625	isync
626 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
627END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
628
629#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
630BEGIN_FTR_SECTION
631	b	skip_tm
632END_FTR_SECTION_IFCLR(CPU_FTR_TM)
633
634	/* Turn on TM/FP/VSX/VMX so we can restore them. */
635	mfmsr	r5
636	li	r6, MSR_TM >> 32
637	sldi	r6, r6, 32
638	or	r5, r5, r6
639	ori	r5, r5, MSR_FP
640	oris	r5, r5, (MSR_VEC | MSR_VSX)@h
641	mtmsrd	r5
642
643	/*
644	 * The user may change these outside of a transaction, so they must
645	 * always be context switched.
646	 */
647	ld	r5, VCPU_TFHAR(r4)
648	ld	r6, VCPU_TFIAR(r4)
649	ld	r7, VCPU_TEXASR(r4)
650	mtspr	SPRN_TFHAR, r5
651	mtspr	SPRN_TFIAR, r6
652	mtspr	SPRN_TEXASR, r7
653
654	ld	r5, VCPU_MSR(r4)
655	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
656	beq	skip_tm	/* TM not active in guest */
657
658	/* Make sure the failure summary is set, otherwise we'll program check
659	 * when we trechkpt.  It's possible that this might have been not set
660	 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
661	 * host.
662	 */
663	oris	r7, r7, (TEXASR_FS)@h
664	mtspr	SPRN_TEXASR, r7
665
666	/*
667	 * We need to load up the checkpointed state for the guest.
668	 * We need to do this early as it will blow away any GPRs, VSRs and
669	 * some SPRs.
670	 */
671
672	mr	r31, r4
673	addi	r3, r31, VCPU_FPRS_TM
674	bl	.load_fp_state
675	addi	r3, r31, VCPU_VRS_TM
676	bl	.load_vr_state
677	mr	r4, r31
678	lwz	r7, VCPU_VRSAVE_TM(r4)
679	mtspr	SPRN_VRSAVE, r7
680
681	ld	r5, VCPU_LR_TM(r4)
682	lwz	r6, VCPU_CR_TM(r4)
683	ld	r7, VCPU_CTR_TM(r4)
684	ld	r8, VCPU_AMR_TM(r4)
685	ld	r9, VCPU_TAR_TM(r4)
686	mtlr	r5
687	mtcr	r6
688	mtctr	r7
689	mtspr	SPRN_AMR, r8
690	mtspr	SPRN_TAR, r9
691
692	/*
693	 * Load up PPR and DSCR values but don't put them in the actual SPRs
694	 * till the last moment to avoid running with userspace PPR and DSCR for
695	 * too long.
696	 */
697	ld	r29, VCPU_DSCR_TM(r4)
698	ld	r30, VCPU_PPR_TM(r4)
699
700	std	r2, PACATMSCRATCH(r13) /* Save TOC */
701
702	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
703	li	r5, 0
704	mtmsrd	r5, 1
705
706	/* Load GPRs r0-r28 */
707	reg = 0
708	.rept	29
709	ld	reg, VCPU_GPRS_TM(reg)(r31)
710	reg = reg + 1
711	.endr
712
713	mtspr	SPRN_DSCR, r29
714	mtspr	SPRN_PPR, r30
715
716	/* Load final GPRs */
717	ld	29, VCPU_GPRS_TM(29)(r31)
718	ld	30, VCPU_GPRS_TM(30)(r31)
719	ld	31, VCPU_GPRS_TM(31)(r31)
720
721	/* TM checkpointed state is now setup.  All GPRs are now volatile. */
722	TRECHKPT
723
724	/* Now let's get back the state we need. */
725	HMT_MEDIUM
726	GET_PACA(r13)
727	ld	r29, HSTATE_DSCR(r13)
728	mtspr	SPRN_DSCR, r29
729	ld	r4, HSTATE_KVM_VCPU(r13)
730	ld	r1, HSTATE_HOST_R1(r13)
731	ld	r2, PACATMSCRATCH(r13)
732
733	/* Set the MSR RI since we have our registers back. */
734	li	r5, MSR_RI
735	mtmsrd	r5, 1
736skip_tm:
737#endif
738
739	/* Load guest PMU registers */
740	/* R4 is live here (vcpu pointer) */
741	li	r3, 1
742	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
743	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
744	isync
745BEGIN_FTR_SECTION
746	ld	r3, VCPU_MMCR(r4)
747	andi.	r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
748	cmpwi	r5, MMCR0_PMAO
749	beql	kvmppc_fix_pmao
750END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
751	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
752	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
753	lwz	r6, VCPU_PMC + 8(r4)
754	lwz	r7, VCPU_PMC + 12(r4)
755	lwz	r8, VCPU_PMC + 16(r4)
756	lwz	r9, VCPU_PMC + 20(r4)
757BEGIN_FTR_SECTION
758	lwz	r10, VCPU_PMC + 24(r4)
759	lwz	r11, VCPU_PMC + 28(r4)
760END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
761	mtspr	SPRN_PMC1, r3
762	mtspr	SPRN_PMC2, r5
763	mtspr	SPRN_PMC3, r6
764	mtspr	SPRN_PMC4, r7
765	mtspr	SPRN_PMC5, r8
766	mtspr	SPRN_PMC6, r9
767BEGIN_FTR_SECTION
768	mtspr	SPRN_PMC7, r10
769	mtspr	SPRN_PMC8, r11
770END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
771	ld	r3, VCPU_MMCR(r4)
772	ld	r5, VCPU_MMCR + 8(r4)
773	ld	r6, VCPU_MMCR + 16(r4)
774	ld	r7, VCPU_SIAR(r4)
775	ld	r8, VCPU_SDAR(r4)
776	mtspr	SPRN_MMCR1, r5
777	mtspr	SPRN_MMCRA, r6
778	mtspr	SPRN_SIAR, r7
779	mtspr	SPRN_SDAR, r8
780BEGIN_FTR_SECTION
781	ld	r5, VCPU_MMCR + 24(r4)
782	ld	r6, VCPU_SIER(r4)
783	lwz	r7, VCPU_PMC + 24(r4)
784	lwz	r8, VCPU_PMC + 28(r4)
785	ld	r9, VCPU_MMCR + 32(r4)
786	mtspr	SPRN_MMCR2, r5
787	mtspr	SPRN_SIER, r6
788	mtspr	SPRN_SPMC1, r7
789	mtspr	SPRN_SPMC2, r8
790	mtspr	SPRN_MMCRS, r9
791END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
792	mtspr	SPRN_MMCR0, r3
793	isync
794
795	/* Load up FP, VMX and VSX registers */
796	bl	kvmppc_load_fp
797
798	ld	r14, VCPU_GPR(R14)(r4)
799	ld	r15, VCPU_GPR(R15)(r4)
800	ld	r16, VCPU_GPR(R16)(r4)
801	ld	r17, VCPU_GPR(R17)(r4)
802	ld	r18, VCPU_GPR(R18)(r4)
803	ld	r19, VCPU_GPR(R19)(r4)
804	ld	r20, VCPU_GPR(R20)(r4)
805	ld	r21, VCPU_GPR(R21)(r4)
806	ld	r22, VCPU_GPR(R22)(r4)
807	ld	r23, VCPU_GPR(R23)(r4)
808	ld	r24, VCPU_GPR(R24)(r4)
809	ld	r25, VCPU_GPR(R25)(r4)
810	ld	r26, VCPU_GPR(R26)(r4)
811	ld	r27, VCPU_GPR(R27)(r4)
812	ld	r28, VCPU_GPR(R28)(r4)
813	ld	r29, VCPU_GPR(R29)(r4)
814	ld	r30, VCPU_GPR(R30)(r4)
815	ld	r31, VCPU_GPR(R31)(r4)
816
817BEGIN_FTR_SECTION
818	/* Switch DSCR to guest value */
819	ld	r5, VCPU_DSCR(r4)
820	mtspr	SPRN_DSCR, r5
821END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
822
823BEGIN_FTR_SECTION
824	/* Skip next section on POWER7 or PPC970 */
825	b	8f
826END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
827	/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
828	mfmsr	r8
829	li	r0, 1
830	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
831	mtmsrd	r8
832
833	/* Load up POWER8-specific registers */
834	ld	r5, VCPU_IAMR(r4)
835	lwz	r6, VCPU_PSPB(r4)
836	ld	r7, VCPU_FSCR(r4)
837	mtspr	SPRN_IAMR, r5
838	mtspr	SPRN_PSPB, r6
839	mtspr	SPRN_FSCR, r7
840	ld	r5, VCPU_DAWR(r4)
841	ld	r6, VCPU_DAWRX(r4)
842	ld	r7, VCPU_CIABR(r4)
843	ld	r8, VCPU_TAR(r4)
844	mtspr	SPRN_DAWR, r5
845	mtspr	SPRN_DAWRX, r6
846	mtspr	SPRN_CIABR, r7
847	mtspr	SPRN_TAR, r8
848	ld	r5, VCPU_IC(r4)
849	ld	r6, VCPU_VTB(r4)
850	mtspr	SPRN_IC, r5
851	mtspr	SPRN_VTB, r6
852	ld	r8, VCPU_EBBHR(r4)
853	mtspr	SPRN_EBBHR, r8
854	ld	r5, VCPU_EBBRR(r4)
855	ld	r6, VCPU_BESCR(r4)
856	ld	r7, VCPU_CSIGR(r4)
857	ld	r8, VCPU_TACR(r4)
858	mtspr	SPRN_EBBRR, r5
859	mtspr	SPRN_BESCR, r6
860	mtspr	SPRN_CSIGR, r7
861	mtspr	SPRN_TACR, r8
862	ld	r5, VCPU_TCSCR(r4)
863	ld	r6, VCPU_ACOP(r4)
864	lwz	r7, VCPU_GUEST_PID(r4)
865	ld	r8, VCPU_WORT(r4)
866	mtspr	SPRN_TCSCR, r5
867	mtspr	SPRN_ACOP, r6
868	mtspr	SPRN_PID, r7
869	mtspr	SPRN_WORT, r8
8708:
871
872	/*
873	 * Set the decrementer to the guest decrementer.
874	 */
875	ld	r8,VCPU_DEC_EXPIRES(r4)
876	/* r8 is a host timebase value here, convert to guest TB */
877	ld	r5,HSTATE_KVM_VCORE(r13)
878	ld	r6,VCORE_TB_OFFSET(r5)
879	add	r8,r8,r6
880	mftb	r7
881	subf	r3,r7,r8
882	mtspr	SPRN_DEC,r3
883	stw	r3,VCPU_DEC(r4)
884
885	ld	r5, VCPU_SPRG0(r4)
886	ld	r6, VCPU_SPRG1(r4)
887	ld	r7, VCPU_SPRG2(r4)
888	ld	r8, VCPU_SPRG3(r4)
889	mtspr	SPRN_SPRG0, r5
890	mtspr	SPRN_SPRG1, r6
891	mtspr	SPRN_SPRG2, r7
892	mtspr	SPRN_SPRG3, r8
893
894	/* Load up DAR and DSISR */
895	ld	r5, VCPU_DAR(r4)
896	lwz	r6, VCPU_DSISR(r4)
897	mtspr	SPRN_DAR, r5
898	mtspr	SPRN_DSISR, r6
899
900BEGIN_FTR_SECTION
901	/* Restore AMR and UAMOR, set AMOR to all 1s */
902	ld	r5,VCPU_AMR(r4)
903	ld	r6,VCPU_UAMOR(r4)
904	li	r7,-1
905	mtspr	SPRN_AMR,r5
906	mtspr	SPRN_UAMOR,r6
907	mtspr	SPRN_AMOR,r7
908END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
909
910	/* Restore state of CTRL run bit; assume 1 on entry */
911	lwz	r5,VCPU_CTRL(r4)
912	andi.	r5,r5,1
913	bne	4f
914	mfspr	r6,SPRN_CTRLF
915	clrrdi	r6,r6,1
916	mtspr	SPRN_CTRLT,r6
9174:
918	ld	r6, VCPU_CTR(r4)
919	lwz	r7, VCPU_XER(r4)
920
921	mtctr	r6
922	mtxer	r7
923
924kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
925	ld	r10, VCPU_PC(r4)
926	ld	r11, VCPU_MSR(r4)
927	ld	r6, VCPU_SRR0(r4)
928	ld	r7, VCPU_SRR1(r4)
929	mtspr	SPRN_SRR0, r6
930	mtspr	SPRN_SRR1, r7
931
932deliver_guest_interrupt:
933	/* r11 = vcpu->arch.msr & ~MSR_HV */
934	rldicl	r11, r11, 63 - MSR_HV_LG, 1
935	rotldi	r11, r11, 1 + MSR_HV_LG
936	ori	r11, r11, MSR_ME
937
938	/* Check if we can deliver an external or decrementer interrupt now */
939	ld	r0, VCPU_PENDING_EXC(r4)
940	rldicl	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
941	cmpdi	cr1, r0, 0
942	andi.	r8, r11, MSR_EE
943BEGIN_FTR_SECTION
944	mfspr	r8, SPRN_LPCR
945	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
946	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
947	mtspr	SPRN_LPCR, r8
948	isync
949END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
950	beq	5f
951	li	r0, BOOK3S_INTERRUPT_EXTERNAL
952	bne	cr1, 12f
953	mfspr	r0, SPRN_DEC
954	cmpwi	r0, 0
955	li	r0, BOOK3S_INTERRUPT_DECREMENTER
956	bge	5f
957
95812:	mtspr	SPRN_SRR0, r10
959	mr	r10,r0
960	mtspr	SPRN_SRR1, r11
961	mr	r9, r4
962	bl	kvmppc_msr_interrupt
9635:
964
965/*
966 * Required state:
967 * R4 = vcpu
968 * R10: value for HSRR0
969 * R11: value for HSRR1
970 * R13 = PACA
971 */
972fast_guest_return:
973	li	r0,0
974	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
975	mtspr	SPRN_HSRR0,r10
976	mtspr	SPRN_HSRR1,r11
977
978	/* Activate guest mode, so faults get handled by KVM */
979	li	r9, KVM_GUEST_MODE_GUEST_HV
980	stb	r9, HSTATE_IN_GUEST(r13)
981
982	/* Enter guest */
983
984BEGIN_FTR_SECTION
985	ld	r5, VCPU_CFAR(r4)
986	mtspr	SPRN_CFAR, r5
987END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
988BEGIN_FTR_SECTION
989	ld	r0, VCPU_PPR(r4)
990END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
991
992	ld	r5, VCPU_LR(r4)
993	lwz	r6, VCPU_CR(r4)
994	mtlr	r5
995	mtcr	r6
996
997	ld	r1, VCPU_GPR(R1)(r4)
998	ld	r2, VCPU_GPR(R2)(r4)
999	ld	r3, VCPU_GPR(R3)(r4)
1000	ld	r5, VCPU_GPR(R5)(r4)
1001	ld	r6, VCPU_GPR(R6)(r4)
1002	ld	r7, VCPU_GPR(R7)(r4)
1003	ld	r8, VCPU_GPR(R8)(r4)
1004	ld	r9, VCPU_GPR(R9)(r4)
1005	ld	r10, VCPU_GPR(R10)(r4)
1006	ld	r11, VCPU_GPR(R11)(r4)
1007	ld	r12, VCPU_GPR(R12)(r4)
1008	ld	r13, VCPU_GPR(R13)(r4)
1009
1010BEGIN_FTR_SECTION
1011	mtspr	SPRN_PPR, r0
1012END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1013	ld	r0, VCPU_GPR(R0)(r4)
1014	ld	r4, VCPU_GPR(R4)(r4)
1015
1016	hrfid
1017	b	.
1018
1019/******************************************************************************
1020 *                                                                            *
1021 *                               Exit code                                    *
1022 *                                                                            *
1023 *****************************************************************************/
1024
1025/*
1026 * We come here from the first-level interrupt handlers.
1027 */
1028	.globl	kvmppc_interrupt_hv
1029kvmppc_interrupt_hv:
1030	/*
1031	 * Register contents:
1032	 * R12		= interrupt vector
1033	 * R13		= PACA
1034	 * guest CR, R12 saved in shadow VCPU SCRATCH1/0
1035	 * guest R13 saved in SPRN_SCRATCH0
1036	 */
1037	std	r9, HSTATE_SCRATCH2(r13)
1038
1039	lbz	r9, HSTATE_IN_GUEST(r13)
1040	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
1041	beq	kvmppc_bad_host_intr
1042#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
1043	cmpwi	r9, KVM_GUEST_MODE_GUEST
1044	ld	r9, HSTATE_SCRATCH2(r13)
1045	beq	kvmppc_interrupt_pr
1046#endif
1047	/* We're now back in the host but in guest MMU context */
1048	li	r9, KVM_GUEST_MODE_HOST_HV
1049	stb	r9, HSTATE_IN_GUEST(r13)
1050
1051	ld	r9, HSTATE_KVM_VCPU(r13)
1052
1053	/* Save registers */
1054
1055	std	r0, VCPU_GPR(R0)(r9)
1056	std	r1, VCPU_GPR(R1)(r9)
1057	std	r2, VCPU_GPR(R2)(r9)
1058	std	r3, VCPU_GPR(R3)(r9)
1059	std	r4, VCPU_GPR(R4)(r9)
1060	std	r5, VCPU_GPR(R5)(r9)
1061	std	r6, VCPU_GPR(R6)(r9)
1062	std	r7, VCPU_GPR(R7)(r9)
1063	std	r8, VCPU_GPR(R8)(r9)
1064	ld	r0, HSTATE_SCRATCH2(r13)
1065	std	r0, VCPU_GPR(R9)(r9)
1066	std	r10, VCPU_GPR(R10)(r9)
1067	std	r11, VCPU_GPR(R11)(r9)
1068	ld	r3, HSTATE_SCRATCH0(r13)
1069	lwz	r4, HSTATE_SCRATCH1(r13)
1070	std	r3, VCPU_GPR(R12)(r9)
1071	stw	r4, VCPU_CR(r9)
1072BEGIN_FTR_SECTION
1073	ld	r3, HSTATE_CFAR(r13)
1074	std	r3, VCPU_CFAR(r9)
1075END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1076BEGIN_FTR_SECTION
1077	ld	r4, HSTATE_PPR(r13)
1078	std	r4, VCPU_PPR(r9)
1079END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1080
1081	/* Restore R1/R2 so we can handle faults */
1082	ld	r1, HSTATE_HOST_R1(r13)
1083	ld	r2, PACATOC(r13)
1084
1085	mfspr	r10, SPRN_SRR0
1086	mfspr	r11, SPRN_SRR1
1087	std	r10, VCPU_SRR0(r9)
1088	std	r11, VCPU_SRR1(r9)
1089	andi.	r0, r12, 2		/* need to read HSRR0/1? */
1090	beq	1f
1091	mfspr	r10, SPRN_HSRR0
1092	mfspr	r11, SPRN_HSRR1
1093	clrrdi	r12, r12, 2
10941:	std	r10, VCPU_PC(r9)
1095	std	r11, VCPU_MSR(r9)
1096
1097	GET_SCRATCH0(r3)
1098	mflr	r4
1099	std	r3, VCPU_GPR(R13)(r9)
1100	std	r4, VCPU_LR(r9)
1101
1102	stw	r12,VCPU_TRAP(r9)
1103
1104	/* Save HEIR (HV emulation assist reg) in last_inst
1105	   if this is an HEI (HV emulation interrupt, e40) */
1106	li	r3,KVM_INST_FETCH_FAILED
1107BEGIN_FTR_SECTION
1108	cmpwi	r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
1109	bne	11f
1110	mfspr	r3,SPRN_HEIR
1111END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
111211:	stw	r3,VCPU_LAST_INST(r9)
1113
1114	/* these are volatile across C function calls */
1115	mfctr	r3
1116	mfxer	r4
1117	std	r3, VCPU_CTR(r9)
1118	stw	r4, VCPU_XER(r9)
1119
1120BEGIN_FTR_SECTION
1121	/* If this is a page table miss then see if it's theirs or ours */
1122	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
1123	beq	kvmppc_hdsi
1124	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1125	beq	kvmppc_hisi
1126END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1127
1128	/* See if this is a leftover HDEC interrupt */
1129	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
1130	bne	2f
1131	mfspr	r3,SPRN_HDEC
1132	cmpwi	r3,0
1133	bge	ignore_hdec
11342:
1135	/* See if this is an hcall we can handle in real mode */
1136	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
1137	beq	hcall_try_real_mode
1138
1139	/* Only handle external interrupts here on arch 206 and later */
1140BEGIN_FTR_SECTION
1141	b	ext_interrupt_to_host
1142END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
1143
1144	/* External interrupt ? */
1145	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
1146	bne+	ext_interrupt_to_host
1147
1148	/* External interrupt, first check for host_ipi. If this is
1149	 * set, we know the host wants us out so let's do it now
1150	 */
1151	bl	kvmppc_read_intr
1152	cmpdi	r3, 0
1153	bgt	ext_interrupt_to_host
1154
1155	/* Check if any CPU is heading out to the host, if so head out too */
1156	ld	r5, HSTATE_KVM_VCORE(r13)
1157	lwz	r0, VCORE_ENTRY_EXIT(r5)
1158	cmpwi	r0, 0x100
1159	bge	ext_interrupt_to_host
1160
1161	/* Return to guest after delivering any pending interrupt */
1162	mr	r4, r9
1163	b	deliver_guest_interrupt
1164
1165ext_interrupt_to_host:
1166
1167guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
1168	/* Save more register state  */
1169	mfdar	r6
1170	mfdsisr	r7
1171	std	r6, VCPU_DAR(r9)
1172	stw	r7, VCPU_DSISR(r9)
1173BEGIN_FTR_SECTION
1174	/* don't overwrite fault_dar/fault_dsisr if HDSI */
1175	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
1176	beq	6f
1177END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1178	std	r6, VCPU_FAULT_DAR(r9)
1179	stw	r7, VCPU_FAULT_DSISR(r9)
1180
1181	/* See if it is a machine check */
1182	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1183	beq	machine_check_realmode
1184mc_cont:
1185
1186	/* Save guest CTRL register, set runlatch to 1 */
11876:	mfspr	r6,SPRN_CTRLF
1188	stw	r6,VCPU_CTRL(r9)
1189	andi.	r0,r6,1
1190	bne	4f
1191	ori	r6,r6,1
1192	mtspr	SPRN_CTRLT,r6
11934:
1194	/* Read the guest SLB and save it away */
1195	lwz	r0,VCPU_SLB_NR(r9)	/* number of entries in SLB */
1196	mtctr	r0
1197	li	r6,0
1198	addi	r7,r9,VCPU_SLB
1199	li	r5,0
12001:	slbmfee	r8,r6
1201	andis.	r0,r8,SLB_ESID_V@h
1202	beq	2f
1203	add	r8,r8,r6		/* put index in */
1204	slbmfev	r3,r6
1205	std	r8,VCPU_SLB_E(r7)
1206	std	r3,VCPU_SLB_V(r7)
1207	addi	r7,r7,VCPU_SLB_SIZE
1208	addi	r5,r5,1
12092:	addi	r6,r6,1
1210	bdnz	1b
1211	stw	r5,VCPU_SLB_MAX(r9)
1212
1213	/*
1214	 * Save the guest PURR/SPURR
1215	 */
1216BEGIN_FTR_SECTION
1217	mfspr	r5,SPRN_PURR
1218	mfspr	r6,SPRN_SPURR
1219	ld	r7,VCPU_PURR(r9)
1220	ld	r8,VCPU_SPURR(r9)
1221	std	r5,VCPU_PURR(r9)
1222	std	r6,VCPU_SPURR(r9)
1223	subf	r5,r7,r5
1224	subf	r6,r8,r6
1225
1226	/*
1227	 * Restore host PURR/SPURR and add guest times
1228	 * so that the time in the guest gets accounted.
1229	 */
1230	ld	r3,HSTATE_PURR(r13)
1231	ld	r4,HSTATE_SPURR(r13)
1232	add	r3,r3,r5
1233	add	r4,r4,r6
1234	mtspr	SPRN_PURR,r3
1235	mtspr	SPRN_SPURR,r4
1236END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
1237
1238	/* Save DEC */
1239	mfspr	r5,SPRN_DEC
1240	mftb	r6
1241	extsw	r5,r5
1242	add	r5,r5,r6
1243	/* r5 is a guest timebase value here, convert to host TB */
1244	ld	r3,HSTATE_KVM_VCORE(r13)
1245	ld	r4,VCORE_TB_OFFSET(r3)
1246	subf	r5,r4,r5
1247	std	r5,VCPU_DEC_EXPIRES(r9)
1248
1249BEGIN_FTR_SECTION
1250	b	8f
1251END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
1252	/* Save POWER8-specific registers */
1253	mfspr	r5, SPRN_IAMR
1254	mfspr	r6, SPRN_PSPB
1255	mfspr	r7, SPRN_FSCR
1256	std	r5, VCPU_IAMR(r9)
1257	stw	r6, VCPU_PSPB(r9)
1258	std	r7, VCPU_FSCR(r9)
1259	mfspr	r5, SPRN_IC
1260	mfspr	r6, SPRN_VTB
1261	mfspr	r7, SPRN_TAR
1262	std	r5, VCPU_IC(r9)
1263	std	r6, VCPU_VTB(r9)
1264	std	r7, VCPU_TAR(r9)
1265	mfspr	r8, SPRN_EBBHR
1266	std	r8, VCPU_EBBHR(r9)
1267	mfspr	r5, SPRN_EBBRR
1268	mfspr	r6, SPRN_BESCR
1269	mfspr	r7, SPRN_CSIGR
1270	mfspr	r8, SPRN_TACR
1271	std	r5, VCPU_EBBRR(r9)
1272	std	r6, VCPU_BESCR(r9)
1273	std	r7, VCPU_CSIGR(r9)
1274	std	r8, VCPU_TACR(r9)
1275	mfspr	r5, SPRN_TCSCR
1276	mfspr	r6, SPRN_ACOP
1277	mfspr	r7, SPRN_PID
1278	mfspr	r8, SPRN_WORT
1279	std	r5, VCPU_TCSCR(r9)
1280	std	r6, VCPU_ACOP(r9)
1281	stw	r7, VCPU_GUEST_PID(r9)
1282	std	r8, VCPU_WORT(r9)
12838:
1284
1285	/* Save and reset AMR and UAMOR before turning on the MMU */
1286BEGIN_FTR_SECTION
1287	mfspr	r5,SPRN_AMR
1288	mfspr	r6,SPRN_UAMOR
1289	std	r5,VCPU_AMR(r9)
1290	std	r6,VCPU_UAMOR(r9)
1291	li	r6,0
1292	mtspr	SPRN_AMR,r6
1293END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1294
1295	/* Switch DSCR back to host value */
1296BEGIN_FTR_SECTION
1297	mfspr	r8, SPRN_DSCR
1298	ld	r7, HSTATE_DSCR(r13)
1299	std	r8, VCPU_DSCR(r9)
1300	mtspr	SPRN_DSCR, r7
1301END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1302
1303	/* Save non-volatile GPRs */
1304	std	r14, VCPU_GPR(R14)(r9)
1305	std	r15, VCPU_GPR(R15)(r9)
1306	std	r16, VCPU_GPR(R16)(r9)
1307	std	r17, VCPU_GPR(R17)(r9)
1308	std	r18, VCPU_GPR(R18)(r9)
1309	std	r19, VCPU_GPR(R19)(r9)
1310	std	r20, VCPU_GPR(R20)(r9)
1311	std	r21, VCPU_GPR(R21)(r9)
1312	std	r22, VCPU_GPR(R22)(r9)
1313	std	r23, VCPU_GPR(R23)(r9)
1314	std	r24, VCPU_GPR(R24)(r9)
1315	std	r25, VCPU_GPR(R25)(r9)
1316	std	r26, VCPU_GPR(R26)(r9)
1317	std	r27, VCPU_GPR(R27)(r9)
1318	std	r28, VCPU_GPR(R28)(r9)
1319	std	r29, VCPU_GPR(R29)(r9)
1320	std	r30, VCPU_GPR(R30)(r9)
1321	std	r31, VCPU_GPR(R31)(r9)
1322
1323	/* Save SPRGs */
1324	mfspr	r3, SPRN_SPRG0
1325	mfspr	r4, SPRN_SPRG1
1326	mfspr	r5, SPRN_SPRG2
1327	mfspr	r6, SPRN_SPRG3
1328	std	r3, VCPU_SPRG0(r9)
1329	std	r4, VCPU_SPRG1(r9)
1330	std	r5, VCPU_SPRG2(r9)
1331	std	r6, VCPU_SPRG3(r9)
1332
1333	/* save FP state */
1334	mr	r3, r9
1335	bl	kvmppc_save_fp
1336
1337#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1338BEGIN_FTR_SECTION
1339	b	2f
1340END_FTR_SECTION_IFCLR(CPU_FTR_TM)
1341	/* Turn on TM. */
1342	mfmsr	r8
1343	li	r0, 1
1344	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
1345	mtmsrd	r8
1346
1347	ld	r5, VCPU_MSR(r9)
1348	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
1349	beq	1f	/* TM not active in guest. */
1350
1351	li	r3, TM_CAUSE_KVM_RESCHED
1352
1353	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
1354	li	r5, 0
1355	mtmsrd	r5, 1
1356
1357	/* All GPRs are volatile at this point. */
1358	TRECLAIM(R3)
1359
1360	/* Temporarily store r13 and r9 so we have some regs to play with */
1361	SET_SCRATCH0(r13)
1362	GET_PACA(r13)
1363	std	r9, PACATMSCRATCH(r13)
1364	ld	r9, HSTATE_KVM_VCPU(r13)
1365
1366	/* Get a few more GPRs free. */
1367	std	r29, VCPU_GPRS_TM(29)(r9)
1368	std	r30, VCPU_GPRS_TM(30)(r9)
1369	std	r31, VCPU_GPRS_TM(31)(r9)
1370
1371	/* Save away PPR and DSCR soon so don't run with user values. */
1372	mfspr	r31, SPRN_PPR
1373	HMT_MEDIUM
1374	mfspr	r30, SPRN_DSCR
1375	ld	r29, HSTATE_DSCR(r13)
1376	mtspr	SPRN_DSCR, r29
1377
1378	/* Save all but r9, r13 & r29-r31 */
1379	reg = 0
1380	.rept	29
1381	.if (reg != 9) && (reg != 13)
1382	std	reg, VCPU_GPRS_TM(reg)(r9)
1383	.endif
1384	reg = reg + 1
1385	.endr
1386	/* ... now save r13 */
1387	GET_SCRATCH0(r4)
1388	std	r4, VCPU_GPRS_TM(13)(r9)
1389	/* ... and save r9 */
1390	ld	r4, PACATMSCRATCH(r13)
1391	std	r4, VCPU_GPRS_TM(9)(r9)
1392
1393	/* Reload stack pointer and TOC. */
1394	ld	r1, HSTATE_HOST_R1(r13)
1395	ld	r2, PACATOC(r13)
1396
1397	/* Set MSR RI now we have r1 and r13 back. */
1398	li	r5, MSR_RI
1399	mtmsrd	r5, 1
1400
1401	/* Save away checkpinted SPRs. */
1402	std	r31, VCPU_PPR_TM(r9)
1403	std	r30, VCPU_DSCR_TM(r9)
1404	mflr	r5
1405	mfcr	r6
1406	mfctr	r7
1407	mfspr	r8, SPRN_AMR
1408	mfspr	r10, SPRN_TAR
1409	std	r5, VCPU_LR_TM(r9)
1410	stw	r6, VCPU_CR_TM(r9)
1411	std	r7, VCPU_CTR_TM(r9)
1412	std	r8, VCPU_AMR_TM(r9)
1413	std	r10, VCPU_TAR_TM(r9)
1414
1415	/* Restore r12 as trap number. */
1416	lwz	r12, VCPU_TRAP(r9)
1417
1418	/* Save FP/VSX. */
1419	addi	r3, r9, VCPU_FPRS_TM
1420	bl	.store_fp_state
1421	addi	r3, r9, VCPU_VRS_TM
1422	bl	.store_vr_state
1423	mfspr	r6, SPRN_VRSAVE
1424	stw	r6, VCPU_VRSAVE_TM(r9)
14251:
1426	/*
1427	 * We need to save these SPRs after the treclaim so that the software
1428	 * error code is recorded correctly in the TEXASR.  Also the user may
1429	 * change these outside of a transaction, so they must always be
1430	 * context switched.
1431	 */
1432	mfspr	r5, SPRN_TFHAR
1433	mfspr	r6, SPRN_TFIAR
1434	mfspr	r7, SPRN_TEXASR
1435	std	r5, VCPU_TFHAR(r9)
1436	std	r6, VCPU_TFIAR(r9)
1437	std	r7, VCPU_TEXASR(r9)
14382:
1439#endif
1440
1441	/* Increment yield count if they have a VPA */
1442	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */
1443	cmpdi	r8, 0
1444	beq	25f
1445	lwz	r3, LPPACA_YIELDCOUNT(r8)
1446	addi	r3, r3, 1
1447	stw	r3, LPPACA_YIELDCOUNT(r8)
1448	li	r3, 1
1449	stb	r3, VCPU_VPA_DIRTY(r9)
145025:
1451	/* Save PMU registers if requested */
1452	/* r8 and cr0.eq are live here */
1453BEGIN_FTR_SECTION
1454	/*
1455	 * POWER8 seems to have a hardware bug where setting
1456	 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
1457	 * when some counters are already negative doesn't seem
1458	 * to cause a performance monitor alert (and hence interrupt).
1459	 * The effect of this is that when saving the PMU state,
1460	 * if there is no PMU alert pending when we read MMCR0
1461	 * before freezing the counters, but one becomes pending
1462	 * before we read the counters, we lose it.
1463	 * To work around this, we need a way to freeze the counters
1464	 * before reading MMCR0.  Normally, freezing the counters
1465	 * is done by writing MMCR0 (to set MMCR0[FC]) which
1466	 * unavoidably writes MMCR0[PMA0] as well.  On POWER8,
1467	 * we can also freeze the counters using MMCR2, by writing
1468	 * 1s to all the counter freeze condition bits (there are
1469	 * 9 bits each for 6 counters).
1470	 */
1471	li	r3, -1			/* set all freeze bits */
1472	clrrdi	r3, r3, 10
1473	mfspr	r10, SPRN_MMCR2
1474	mtspr	SPRN_MMCR2, r3
1475	isync
1476END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1477	li	r3, 1
1478	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
1479	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
1480	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
1481	mfspr	r6, SPRN_MMCRA
1482BEGIN_FTR_SECTION
1483	/* On P7, clear MMCRA in order to disable SDAR updates */
1484	li	r7, 0
1485	mtspr	SPRN_MMCRA, r7
1486END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
1487	isync
1488	beq	21f			/* if no VPA, save PMU stuff anyway */
1489	lbz	r7, LPPACA_PMCINUSE(r8)
1490	cmpwi	r7, 0			/* did they ask for PMU stuff to be saved? */
1491	bne	21f
1492	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */
1493	b	22f
149421:	mfspr	r5, SPRN_MMCR1
1495	mfspr	r7, SPRN_SIAR
1496	mfspr	r8, SPRN_SDAR
1497	std	r4, VCPU_MMCR(r9)
1498	std	r5, VCPU_MMCR + 8(r9)
1499	std	r6, VCPU_MMCR + 16(r9)
1500BEGIN_FTR_SECTION
1501	std	r10, VCPU_MMCR + 24(r9)
1502END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1503	std	r7, VCPU_SIAR(r9)
1504	std	r8, VCPU_SDAR(r9)
1505	mfspr	r3, SPRN_PMC1
1506	mfspr	r4, SPRN_PMC2
1507	mfspr	r5, SPRN_PMC3
1508	mfspr	r6, SPRN_PMC4
1509	mfspr	r7, SPRN_PMC5
1510	mfspr	r8, SPRN_PMC6
1511BEGIN_FTR_SECTION
1512	mfspr	r10, SPRN_PMC7
1513	mfspr	r11, SPRN_PMC8
1514END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1515	stw	r3, VCPU_PMC(r9)
1516	stw	r4, VCPU_PMC + 4(r9)
1517	stw	r5, VCPU_PMC + 8(r9)
1518	stw	r6, VCPU_PMC + 12(r9)
1519	stw	r7, VCPU_PMC + 16(r9)
1520	stw	r8, VCPU_PMC + 20(r9)
1521BEGIN_FTR_SECTION
1522	stw	r10, VCPU_PMC + 24(r9)
1523	stw	r11, VCPU_PMC + 28(r9)
1524END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1525BEGIN_FTR_SECTION
1526	mfspr	r5, SPRN_SIER
1527	mfspr	r6, SPRN_SPMC1
1528	mfspr	r7, SPRN_SPMC2
1529	mfspr	r8, SPRN_MMCRS
1530	std	r5, VCPU_SIER(r9)
1531	stw	r6, VCPU_PMC + 24(r9)
1532	stw	r7, VCPU_PMC + 28(r9)
1533	std	r8, VCPU_MMCR + 32(r9)
1534	lis	r4, 0x8000
1535	mtspr	SPRN_MMCRS, r4
1536END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
153722:
1538	/* Clear out SLB */
1539	li	r5,0
1540	slbmte	r5,r5
1541	slbia
1542	ptesync
1543
1544hdec_soon:			/* r12 = trap, r13 = paca */
1545BEGIN_FTR_SECTION
1546	b	32f
1547END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
1548	/*
1549	 * POWER7 guest -> host partition switch code.
1550	 * We don't have to lock against tlbies but we do
1551	 * have to coordinate the hardware threads.
1552	 */
1553	/* Increment the threads-exiting-guest count in the 0xff00
1554	   bits of vcore->entry_exit_count */
1555	ld	r5,HSTATE_KVM_VCORE(r13)
1556	addi	r6,r5,VCORE_ENTRY_EXIT
155741:	lwarx	r3,0,r6
1558	addi	r0,r3,0x100
1559	stwcx.	r0,0,r6
1560	bne	41b
1561	isync		/* order stwcx. vs. reading napping_threads */
1562
1563	/*
1564	 * At this point we have an interrupt that we have to pass
1565	 * up to the kernel or qemu; we can't handle it in real mode.
1566	 * Thus we have to do a partition switch, so we have to
1567	 * collect the other threads, if we are the first thread
1568	 * to take an interrupt.  To do this, we set the HDEC to 0,
1569	 * which causes an HDEC interrupt in all threads within 2ns
1570	 * because the HDEC register is shared between all 4 threads.
1571	 * However, we don't need to bother if this is an HDEC
1572	 * interrupt, since the other threads will already be on their
1573	 * way here in that case.
1574	 */
1575	cmpwi	r3,0x100	/* Are we the first here? */
1576	bge	43f
1577	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
1578	beq	40f
1579	li	r0,0
1580	mtspr	SPRN_HDEC,r0
158140:
1582	/*
1583	 * Send an IPI to any napping threads, since an HDEC interrupt
1584	 * doesn't wake CPUs up from nap.
1585	 */
1586	lwz	r3,VCORE_NAPPING_THREADS(r5)
1587	lbz	r4,HSTATE_PTID(r13)
1588	li	r0,1
1589	sld	r0,r0,r4
1590	andc.	r3,r3,r0		/* no sense IPI'ing ourselves */
1591	beq	43f
1592	/* Order entry/exit update vs. IPIs */
1593	sync
1594	mulli	r4,r4,PACA_SIZE		/* get paca for thread 0 */
1595	subf	r6,r4,r13
159642:	andi.	r0,r3,1
1597	beq	44f
1598	ld	r8,HSTATE_XICS_PHYS(r6)	/* get thread's XICS reg addr */
1599	li	r0,IPI_PRIORITY
1600	li	r7,XICS_MFRR
1601	stbcix	r0,r7,r8		/* trigger the IPI */
160244:	srdi.	r3,r3,1
1603	addi	r6,r6,PACA_SIZE
1604	bne	42b
1605
1606secondary_too_late:
1607	/* Secondary threads wait for primary to do partition switch */
160843:	ld	r5,HSTATE_KVM_VCORE(r13)
1609	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
1610	lbz	r3,HSTATE_PTID(r13)
1611	cmpwi	r3,0
1612	beq	15f
1613	HMT_LOW
161413:	lbz	r3,VCORE_IN_GUEST(r5)
1615	cmpwi	r3,0
1616	bne	13b
1617	HMT_MEDIUM
1618	b	16f
1619
1620	/* Primary thread waits for all the secondaries to exit guest */
162115:	lwz	r3,VCORE_ENTRY_EXIT(r5)
1622	srwi	r0,r3,8
1623	clrldi	r3,r3,56
1624	cmpw	r3,r0
1625	bne	15b
1626	isync
1627
1628	/* Primary thread switches back to host partition */
1629	ld	r6,KVM_HOST_SDR1(r4)
1630	lwz	r7,KVM_HOST_LPID(r4)
1631	li	r8,LPID_RSVD		/* switch to reserved LPID */
1632	mtspr	SPRN_LPID,r8
1633	ptesync
1634	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
1635	mtspr	SPRN_LPID,r7
1636	isync
1637
1638BEGIN_FTR_SECTION
1639	/* DPDES is shared between threads */
1640	mfspr	r7, SPRN_DPDES
1641	std	r7, VCORE_DPDES(r5)
1642	/* clear DPDES so we don't get guest doorbells in the host */
1643	li	r8, 0
1644	mtspr	SPRN_DPDES, r8
1645END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1646
1647	/* Subtract timebase offset from timebase */
1648	ld	r8,VCORE_TB_OFFSET(r5)
1649	cmpdi	r8,0
1650	beq	17f
1651	mftb	r6			/* current guest timebase */
1652	subf	r8,r8,r6
1653	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */
1654	mftb	r7			/* check if lower 24 bits overflowed */
1655	clrldi	r6,r6,40
1656	clrldi	r7,r7,40
1657	cmpld	r7,r6
1658	bge	17f
1659	addis	r8,r8,0x100		/* if so, increment upper 40 bits */
1660	mtspr	SPRN_TBU40,r8
1661
1662	/* Reset PCR */
166317:	ld	r0, VCORE_PCR(r5)
1664	cmpdi	r0, 0
1665	beq	18f
1666	li	r0, 0
1667	mtspr	SPRN_PCR, r0
166818:
1669	/* Signal secondary CPUs to continue */
1670	stb	r0,VCORE_IN_GUEST(r5)
1671	lis	r8,0x7fff		/* MAX_INT@h */
1672	mtspr	SPRN_HDEC,r8
1673
167416:	ld	r8,KVM_HOST_LPCR(r4)
1675	mtspr	SPRN_LPCR,r8
1676	isync
1677	b	33f
1678
1679	/*
1680	 * PPC970 guest -> host partition switch code.
1681	 * We have to lock against concurrent tlbies, and
1682	 * we have to flush the whole TLB.
1683	 */
168432:	ld	r5,HSTATE_KVM_VCORE(r13)
1685	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
1686
1687	/* Take the guest's tlbie_lock */
1688#ifdef __BIG_ENDIAN__
1689	lwz	r8,PACA_LOCK_TOKEN(r13)
1690#else
1691	lwz	r8,PACAPACAINDEX(r13)
1692#endif
1693	addi	r3,r4,KVM_TLBIE_LOCK
169424:	lwarx	r0,0,r3
1695	cmpwi	r0,0
1696	bne	24b
1697	stwcx.	r8,0,r3
1698	bne	24b
1699	isync
1700
1701	ld	r7,KVM_HOST_LPCR(r4)	/* use kvm->arch.host_lpcr for HID4 */
1702	li	r0,0x18f
1703	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */
1704	or	r0,r7,r0
1705	ptesync
1706	sync
1707	mtspr	SPRN_HID4,r0		/* switch to reserved LPID */
1708	isync
1709	li	r0,0
1710	stw	r0,0(r3)		/* drop guest tlbie_lock */
1711
1712	/* invalidate the whole TLB */
1713	li	r0,256
1714	mtctr	r0
1715	li	r6,0
171625:	tlbiel	r6
1717	addi	r6,r6,0x1000
1718	bdnz	25b
1719	ptesync
1720
1721	/* take native_tlbie_lock */
1722	ld	r3,toc_tlbie_lock@toc(2)
172324:	lwarx	r0,0,r3
1724	cmpwi	r0,0
1725	bne	24b
1726	stwcx.	r8,0,r3
1727	bne	24b
1728	isync
1729
1730	ld	r6,KVM_HOST_SDR1(r4)
1731	mtspr	SPRN_SDR1,r6		/* switch to host page table */
1732
1733	/* Set up host HID4 value */
1734	sync
1735	mtspr	SPRN_HID4,r7
1736	isync
1737	li	r0,0
1738	stw	r0,0(r3)		/* drop native_tlbie_lock */
1739
1740	lis	r8,0x7fff		/* MAX_INT@h */
1741	mtspr	SPRN_HDEC,r8
1742
1743	/* Disable HDEC interrupts */
1744	mfspr	r0,SPRN_HID0
1745	li	r3,0
1746	rldimi	r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
1747	sync
1748	mtspr	SPRN_HID0,r0
1749	mfspr	r0,SPRN_HID0
1750	mfspr	r0,SPRN_HID0
1751	mfspr	r0,SPRN_HID0
1752	mfspr	r0,SPRN_HID0
1753	mfspr	r0,SPRN_HID0
1754	mfspr	r0,SPRN_HID0
1755
1756	/* load host SLB entries */
175733:	ld	r8,PACA_SLBSHADOWPTR(r13)
1758
1759	.rept	SLB_NUM_BOLTED
1760	ld	r5,SLBSHADOW_SAVEAREA(r8)
1761	ld	r6,SLBSHADOW_SAVEAREA+8(r8)
1762	andis.	r7,r5,SLB_ESID_V@h
1763	beq	1f
1764	slbmte	r6,r5
17651:	addi	r8,r8,16
1766	.endr
1767
1768	/* Unset guest mode */
1769	li	r0, KVM_GUEST_MODE_NONE
1770	stb	r0, HSTATE_IN_GUEST(r13)
1771
1772	ld	r0, 112+PPC_LR_STKOFF(r1)
1773	addi	r1, r1, 112
1774	mtlr	r0
1775	blr
1776
1777/*
1778 * Check whether an HDSI is an HPTE not found fault or something else.
1779 * If it is an HPTE not found fault that is due to the guest accessing
1780 * a page that they have mapped but which we have paged out, then
1781 * we continue on with the guest exit path.  In all other cases,
1782 * reflect the HDSI to the guest as a DSI.
1783 */
1784kvmppc_hdsi:
1785	mfspr	r4, SPRN_HDAR
1786	mfspr	r6, SPRN_HDSISR
1787	/* HPTE not found fault or protection fault? */
1788	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
1789	beq	1f			/* if not, send it to the guest */
1790	andi.	r0, r11, MSR_DR		/* data relocation enabled? */
1791	beq	3f
1792	clrrdi	r0, r4, 28
1793	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
1794	bne	1f			/* if no SLB entry found */
17954:	std	r4, VCPU_FAULT_DAR(r9)
1796	stw	r6, VCPU_FAULT_DSISR(r9)
1797
1798	/* Search the hash table. */
1799	mr	r3, r9			/* vcpu pointer */
1800	li	r7, 1			/* data fault */
1801	bl	kvmppc_hpte_hv_fault
1802	ld	r9, HSTATE_KVM_VCPU(r13)
1803	ld	r10, VCPU_PC(r9)
1804	ld	r11, VCPU_MSR(r9)
1805	li	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
1806	cmpdi	r3, 0			/* retry the instruction */
1807	beq	6f
1808	cmpdi	r3, -1			/* handle in kernel mode */
1809	beq	guest_exit_cont
1810	cmpdi	r3, -2			/* MMIO emulation; need instr word */
1811	beq	2f
1812
1813	/* Synthesize a DSI for the guest */
1814	ld	r4, VCPU_FAULT_DAR(r9)
1815	mr	r6, r3
18161:	mtspr	SPRN_DAR, r4
1817	mtspr	SPRN_DSISR, r6
1818	mtspr	SPRN_SRR0, r10
1819	mtspr	SPRN_SRR1, r11
1820	li	r10, BOOK3S_INTERRUPT_DATA_STORAGE
1821	bl	kvmppc_msr_interrupt
1822fast_interrupt_c_return:
18236:	ld	r7, VCPU_CTR(r9)
1824	lwz	r8, VCPU_XER(r9)
1825	mtctr	r7
1826	mtxer	r8
1827	mr	r4, r9
1828	b	fast_guest_return
1829
18303:	ld	r5, VCPU_KVM(r9)	/* not relocated, use VRMA */
1831	ld	r5, KVM_VRMA_SLB_V(r5)
1832	b	4b
1833
1834	/* If this is for emulated MMIO, load the instruction word */
18352:	li	r8, KVM_INST_FETCH_FAILED	/* In case lwz faults */
1836
1837	/* Set guest mode to 'jump over instruction' so if lwz faults
1838	 * we'll just continue at the next IP. */
1839	li	r0, KVM_GUEST_MODE_SKIP
1840	stb	r0, HSTATE_IN_GUEST(r13)
1841
1842	/* Do the access with MSR:DR enabled */
1843	mfmsr	r3
1844	ori	r4, r3, MSR_DR		/* Enable paging for data */
1845	mtmsrd	r4
1846	lwz	r8, 0(r10)
1847	mtmsrd	r3
1848
1849	/* Store the result */
1850	stw	r8, VCPU_LAST_INST(r9)
1851
1852	/* Unset guest mode. */
1853	li	r0, KVM_GUEST_MODE_HOST_HV
1854	stb	r0, HSTATE_IN_GUEST(r13)
1855	b	guest_exit_cont
1856
1857/*
1858 * Similarly for an HISI, reflect it to the guest as an ISI unless
1859 * it is an HPTE not found fault for a page that we have paged out.
1860 */
1861kvmppc_hisi:
1862	andis.	r0, r11, SRR1_ISI_NOPT@h
1863	beq	1f
1864	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
1865	beq	3f
1866	clrrdi	r0, r10, 28
1867	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
1868	bne	1f			/* if no SLB entry found */
18694:
1870	/* Search the hash table. */
1871	mr	r3, r9			/* vcpu pointer */
1872	mr	r4, r10
1873	mr	r6, r11
1874	li	r7, 0			/* instruction fault */
1875	bl	kvmppc_hpte_hv_fault
1876	ld	r9, HSTATE_KVM_VCPU(r13)
1877	ld	r10, VCPU_PC(r9)
1878	ld	r11, VCPU_MSR(r9)
1879	li	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1880	cmpdi	r3, 0			/* retry the instruction */
1881	beq	fast_interrupt_c_return
1882	cmpdi	r3, -1			/* handle in kernel mode */
1883	beq	guest_exit_cont
1884
1885	/* Synthesize an ISI for the guest */
1886	mr	r11, r3
18871:	mtspr	SPRN_SRR0, r10
1888	mtspr	SPRN_SRR1, r11
1889	li	r10, BOOK3S_INTERRUPT_INST_STORAGE
1890	bl	kvmppc_msr_interrupt
1891	b	fast_interrupt_c_return
1892
18933:	ld	r6, VCPU_KVM(r9)	/* not relocated, use VRMA */
1894	ld	r5, KVM_VRMA_SLB_V(r6)
1895	b	4b
1896
1897/*
1898 * Try to handle an hcall in real mode.
1899 * Returns to the guest if we handle it, or continues on up to
1900 * the kernel if we can't (i.e. if we don't have a handler for
1901 * it, or if the handler returns H_TOO_HARD).
1902 */
1903	.globl	hcall_try_real_mode
1904hcall_try_real_mode:
1905	ld	r3,VCPU_GPR(R3)(r9)
1906	andi.	r0,r11,MSR_PR
1907	/* sc 1 from userspace - reflect to guest syscall */
1908	bne	sc_1_fast_return
1909	clrrdi	r3,r3,2
1910	cmpldi	r3,hcall_real_table_end - hcall_real_table
1911	bge	guest_exit_cont
1912	LOAD_REG_ADDR(r4, hcall_real_table)
1913	lwax	r3,r3,r4
1914	cmpwi	r3,0
1915	beq	guest_exit_cont
1916	add	r3,r3,r4
1917	mtctr	r3
1918	mr	r3,r9		/* get vcpu pointer */
1919	ld	r4,VCPU_GPR(R4)(r9)
1920	bctrl
1921	cmpdi	r3,H_TOO_HARD
1922	beq	hcall_real_fallback
1923	ld	r4,HSTATE_KVM_VCPU(r13)
1924	std	r3,VCPU_GPR(R3)(r4)
1925	ld	r10,VCPU_PC(r4)
1926	ld	r11,VCPU_MSR(r4)
1927	b	fast_guest_return
1928
1929sc_1_fast_return:
1930	mtspr	SPRN_SRR0,r10
1931	mtspr	SPRN_SRR1,r11
1932	li	r10, BOOK3S_INTERRUPT_SYSCALL
1933	bl	kvmppc_msr_interrupt
1934	mr	r4,r9
1935	b	fast_guest_return
1936
1937	/* We've attempted a real mode hcall, but it's punted it back
1938	 * to userspace.  We need to restore some clobbered volatiles
1939	 * before resuming the pass-it-to-qemu path */
1940hcall_real_fallback:
1941	li	r12,BOOK3S_INTERRUPT_SYSCALL
1942	ld	r9, HSTATE_KVM_VCPU(r13)
1943
1944	b	guest_exit_cont
1945
1946	.globl	hcall_real_table
1947hcall_real_table:
1948	.long	0		/* 0 - unused */
1949	.long	DOTSYM(kvmppc_h_remove) - hcall_real_table
1950	.long	DOTSYM(kvmppc_h_enter) - hcall_real_table
1951	.long	DOTSYM(kvmppc_h_read) - hcall_real_table
1952	.long	0		/* 0x10 - H_CLEAR_MOD */
1953	.long	0		/* 0x14 - H_CLEAR_REF */
1954	.long	DOTSYM(kvmppc_h_protect) - hcall_real_table
1955	.long	DOTSYM(kvmppc_h_get_tce) - hcall_real_table
1956	.long	DOTSYM(kvmppc_h_put_tce) - hcall_real_table
1957	.long	0		/* 0x24 - H_SET_SPRG0 */
1958	.long	DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
1959	.long	0		/* 0x2c */
1960	.long	0		/* 0x30 */
1961	.long	0		/* 0x34 */
1962	.long	0		/* 0x38 */
1963	.long	0		/* 0x3c */
1964	.long	0		/* 0x40 */
1965	.long	0		/* 0x44 */
1966	.long	0		/* 0x48 */
1967	.long	0		/* 0x4c */
1968	.long	0		/* 0x50 */
1969	.long	0		/* 0x54 */
1970	.long	0		/* 0x58 */
1971	.long	0		/* 0x5c */
1972	.long	0		/* 0x60 */
1973#ifdef CONFIG_KVM_XICS
1974	.long	DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
1975	.long	DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
1976	.long	DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
1977	.long	0		/* 0x70 - H_IPOLL */
1978	.long	DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
1979#else
1980	.long	0		/* 0x64 - H_EOI */
1981	.long	0		/* 0x68 - H_CPPR */
1982	.long	0		/* 0x6c - H_IPI */
1983	.long	0		/* 0x70 - H_IPOLL */
1984	.long	0		/* 0x74 - H_XIRR */
1985#endif
1986	.long	0		/* 0x78 */
1987	.long	0		/* 0x7c */
1988	.long	0		/* 0x80 */
1989	.long	0		/* 0x84 */
1990	.long	0		/* 0x88 */
1991	.long	0		/* 0x8c */
1992	.long	0		/* 0x90 */
1993	.long	0		/* 0x94 */
1994	.long	0		/* 0x98 */
1995	.long	0		/* 0x9c */
1996	.long	0		/* 0xa0 */
1997	.long	0		/* 0xa4 */
1998	.long	0		/* 0xa8 */
1999	.long	0		/* 0xac */
2000	.long	0		/* 0xb0 */
2001	.long	0		/* 0xb4 */
2002	.long	0		/* 0xb8 */
2003	.long	0		/* 0xbc */
2004	.long	0		/* 0xc0 */
2005	.long	0		/* 0xc4 */
2006	.long	0		/* 0xc8 */
2007	.long	0		/* 0xcc */
2008	.long	0		/* 0xd0 */
2009	.long	0		/* 0xd4 */
2010	.long	0		/* 0xd8 */
2011	.long	0		/* 0xdc */
2012	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
2013	.long	0		/* 0xe4 */
2014	.long	0		/* 0xe8 */
2015	.long	0		/* 0xec */
2016	.long	0		/* 0xf0 */
2017	.long	0		/* 0xf4 */
2018	.long	0		/* 0xf8 */
2019	.long	0		/* 0xfc */
2020	.long	0		/* 0x100 */
2021	.long	0		/* 0x104 */
2022	.long	0		/* 0x108 */
2023	.long	0		/* 0x10c */
2024	.long	0		/* 0x110 */
2025	.long	0		/* 0x114 */
2026	.long	0		/* 0x118 */
2027	.long	0		/* 0x11c */
2028	.long	0		/* 0x120 */
2029	.long	DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table
2030	.long	0		/* 0x128 */
2031	.long	0		/* 0x12c */
2032	.long	0		/* 0x130 */
2033	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
2034hcall_real_table_end:
2035
2036ignore_hdec:
2037	mr	r4,r9
2038	b	fast_guest_return
2039
2040_GLOBAL(kvmppc_h_set_xdabr)
2041	andi.	r0, r5, DABRX_USER | DABRX_KERNEL
2042	beq	6f
2043	li	r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
2044	andc.	r0, r5, r0
2045	beq	3f
20466:	li	r3, H_PARAMETER
2047	blr
2048
2049_GLOBAL(kvmppc_h_set_dabr)
2050	li	r5, DABRX_USER | DABRX_KERNEL
20513:
2052BEGIN_FTR_SECTION
2053	b	2f
2054END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2055	std	r4,VCPU_DABR(r3)
2056	stw	r5, VCPU_DABRX(r3)
2057	mtspr	SPRN_DABRX, r5
2058	/* Work around P7 bug where DABR can get corrupted on mtspr */
20591:	mtspr	SPRN_DABR,r4
2060	mfspr	r5, SPRN_DABR
2061	cmpd	r4, r5
2062	bne	1b
2063	isync
2064	li	r3,0
2065	blr
2066
2067	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
20682:	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
2069	rlwimi	r5, r4, 1, DAWRX_WT
2070	clrrdi	r4, r4, 3
2071	std	r4, VCPU_DAWR(r3)
2072	std	r5, VCPU_DAWRX(r3)
2073	mtspr	SPRN_DAWR, r4
2074	mtspr	SPRN_DAWRX, r5
2075	li	r3, 0
2076	blr
2077
2078_GLOBAL(kvmppc_h_cede)
2079	ori	r11,r11,MSR_EE
2080	std	r11,VCPU_MSR(r3)
2081	li	r0,1
2082	stb	r0,VCPU_CEDED(r3)
2083	sync			/* order setting ceded vs. testing prodded */
2084	lbz	r5,VCPU_PRODDED(r3)
2085	cmpwi	r5,0
2086	bne	kvm_cede_prodded
2087	li	r0,0		/* set trap to 0 to say hcall is handled */
2088	stw	r0,VCPU_TRAP(r3)
2089	li	r0,H_SUCCESS
2090	std	r0,VCPU_GPR(R3)(r3)
2091BEGIN_FTR_SECTION
2092	b	kvm_cede_exit	/* just send it up to host on 970 */
2093END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
2094
2095	/*
2096	 * Set our bit in the bitmask of napping threads unless all the
2097	 * other threads are already napping, in which case we send this
2098	 * up to the host.
2099	 */
2100	ld	r5,HSTATE_KVM_VCORE(r13)
2101	lbz	r6,HSTATE_PTID(r13)
2102	lwz	r8,VCORE_ENTRY_EXIT(r5)
2103	clrldi	r8,r8,56
2104	li	r0,1
2105	sld	r0,r0,r6
2106	addi	r6,r5,VCORE_NAPPING_THREADS
210731:	lwarx	r4,0,r6
2108	or	r4,r4,r0
2109	PPC_POPCNTW(R7,R4)
2110	cmpw	r7,r8
2111	bge	kvm_cede_exit
2112	stwcx.	r4,0,r6
2113	bne	31b
2114	/* order napping_threads update vs testing entry_exit_count */
2115	isync
2116	li	r0,NAPPING_CEDE
2117	stb	r0,HSTATE_NAPPING(r13)
2118	lwz	r7,VCORE_ENTRY_EXIT(r5)
2119	cmpwi	r7,0x100
2120	bge	33f		/* another thread already exiting */
2121
2122/*
2123 * Although not specifically required by the architecture, POWER7
2124 * preserves the following registers in nap mode, even if an SMT mode
2125 * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
2126 * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
2127 */
2128	/* Save non-volatile GPRs */
2129	std	r14, VCPU_GPR(R14)(r3)
2130	std	r15, VCPU_GPR(R15)(r3)
2131	std	r16, VCPU_GPR(R16)(r3)
2132	std	r17, VCPU_GPR(R17)(r3)
2133	std	r18, VCPU_GPR(R18)(r3)
2134	std	r19, VCPU_GPR(R19)(r3)
2135	std	r20, VCPU_GPR(R20)(r3)
2136	std	r21, VCPU_GPR(R21)(r3)
2137	std	r22, VCPU_GPR(R22)(r3)
2138	std	r23, VCPU_GPR(R23)(r3)
2139	std	r24, VCPU_GPR(R24)(r3)
2140	std	r25, VCPU_GPR(R25)(r3)
2141	std	r26, VCPU_GPR(R26)(r3)
2142	std	r27, VCPU_GPR(R27)(r3)
2143	std	r28, VCPU_GPR(R28)(r3)
2144	std	r29, VCPU_GPR(R29)(r3)
2145	std	r30, VCPU_GPR(R30)(r3)
2146	std	r31, VCPU_GPR(R31)(r3)
2147
2148	/* save FP state */
2149	bl	kvmppc_save_fp
2150
2151	/*
2152	 * Take a nap until a decrementer or external or doobell interrupt
2153	 * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
2154	 * runlatch bit before napping.
2155	 */
2156	mfspr	r2, SPRN_CTRLF
2157	clrrdi	r2, r2, 1
2158	mtspr	SPRN_CTRLT, r2
2159
2160	li	r0,1
2161	stb	r0,HSTATE_HWTHREAD_REQ(r13)
2162	mfspr	r5,SPRN_LPCR
2163	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
2164BEGIN_FTR_SECTION
2165	oris	r5,r5,LPCR_PECEDP@h
2166END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2167	mtspr	SPRN_LPCR,r5
2168	isync
2169	li	r0, 0
2170	std	r0, HSTATE_SCRATCH0(r13)
2171	ptesync
2172	ld	r0, HSTATE_SCRATCH0(r13)
21731:	cmpd	r0, r0
2174	bne	1b
2175	nap
2176	b	.
2177
217833:	mr	r4, r3
2179	li	r3, 0
2180	li	r12, 0
2181	b	34f
2182
2183kvm_end_cede:
2184	/* get vcpu pointer */
2185	ld	r4, HSTATE_KVM_VCPU(r13)
2186
2187	/* Woken by external or decrementer interrupt */
2188	ld	r1, HSTATE_HOST_R1(r13)
2189
2190	/* load up FP state */
2191	bl	kvmppc_load_fp
2192
2193	/* Load NV GPRS */
2194	ld	r14, VCPU_GPR(R14)(r4)
2195	ld	r15, VCPU_GPR(R15)(r4)
2196	ld	r16, VCPU_GPR(R16)(r4)
2197	ld	r17, VCPU_GPR(R17)(r4)
2198	ld	r18, VCPU_GPR(R18)(r4)
2199	ld	r19, VCPU_GPR(R19)(r4)
2200	ld	r20, VCPU_GPR(R20)(r4)
2201	ld	r21, VCPU_GPR(R21)(r4)
2202	ld	r22, VCPU_GPR(R22)(r4)
2203	ld	r23, VCPU_GPR(R23)(r4)
2204	ld	r24, VCPU_GPR(R24)(r4)
2205	ld	r25, VCPU_GPR(R25)(r4)
2206	ld	r26, VCPU_GPR(R26)(r4)
2207	ld	r27, VCPU_GPR(R27)(r4)
2208	ld	r28, VCPU_GPR(R28)(r4)
2209	ld	r29, VCPU_GPR(R29)(r4)
2210	ld	r30, VCPU_GPR(R30)(r4)
2211	ld	r31, VCPU_GPR(R31)(r4)
2212
2213	/* Check the wake reason in SRR1 to see why we got here */
2214	bl	kvmppc_check_wake_reason
2215
2216	/* clear our bit in vcore->napping_threads */
221734:	ld	r5,HSTATE_KVM_VCORE(r13)
2218	lbz	r7,HSTATE_PTID(r13)
2219	li	r0,1
2220	sld	r0,r0,r7
2221	addi	r6,r5,VCORE_NAPPING_THREADS
222232:	lwarx	r7,0,r6
2223	andc	r7,r7,r0
2224	stwcx.	r7,0,r6
2225	bne	32b
2226	li	r0,0
2227	stb	r0,HSTATE_NAPPING(r13)
2228
2229	/* See if the wake reason means we need to exit */
2230	stw	r12, VCPU_TRAP(r4)
2231	mr	r9, r4
2232	cmpdi	r3, 0
2233	bgt	guest_exit_cont
2234
2235	/* see if any other thread is already exiting */
2236	lwz	r0,VCORE_ENTRY_EXIT(r5)
2237	cmpwi	r0,0x100
2238	bge	guest_exit_cont
2239
2240	b	kvmppc_cede_reentry	/* if not go back to guest */
2241
2242	/* cede when already previously prodded case */
2243kvm_cede_prodded:
2244	li	r0,0
2245	stb	r0,VCPU_PRODDED(r3)
2246	sync			/* order testing prodded vs. clearing ceded */
2247	stb	r0,VCPU_CEDED(r3)
2248	li	r3,H_SUCCESS
2249	blr
2250
2251	/* we've ceded but we want to give control to the host */
2252kvm_cede_exit:
2253	b	hcall_real_fallback
2254
2255	/* Try to handle a machine check in real mode */
2256machine_check_realmode:
2257	mr	r3, r9		/* get vcpu pointer */
2258	bl	kvmppc_realmode_machine_check
2259	nop
2260	cmpdi	r3, 0		/* Did we handle MCE ? */
2261	ld	r9, HSTATE_KVM_VCPU(r13)
2262	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
2263	/*
2264	 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
2265	 * machine check interrupt (set HSRR0 to 0x200). And for handled
2266	 * errors (no-fatal), just go back to guest execution with current
2267	 * HSRR0 instead of exiting guest. This new approach will inject
2268	 * machine check to guest for fatal error causing guest to crash.
2269	 *
2270	 * The old code used to return to host for unhandled errors which
2271	 * was causing guest to hang with soft lockups inside guest and
2272	 * makes it difficult to recover guest instance.
2273	 */
2274	ld	r10, VCPU_PC(r9)
2275	ld	r11, VCPU_MSR(r9)
2276	bne	2f	/* Continue guest execution. */
2277	/* If not, deliver a machine check.  SRR0/1 are already set */
2278	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
2279	ld	r11, VCPU_MSR(r9)
2280	bl	kvmppc_msr_interrupt
22812:	b	fast_interrupt_c_return
2282
2283/*
2284 * Check the reason we woke from nap, and take appropriate action.
2285 * Returns:
2286 *	0 if nothing needs to be done
2287 *	1 if something happened that needs to be handled by the host
2288 *	-1 if there was a guest wakeup (IPI)
2289 *
2290 * Also sets r12 to the interrupt vector for any interrupt that needs
2291 * to be handled now by the host (0x500 for external interrupt), or zero.
2292 */
2293kvmppc_check_wake_reason:
2294	mfspr	r6, SPRN_SRR1
2295BEGIN_FTR_SECTION
2296	rlwinm	r6, r6, 45-31, 0xf	/* extract wake reason field (P8) */
2297FTR_SECTION_ELSE
2298	rlwinm	r6, r6, 45-31, 0xe	/* P7 wake reason field is 3 bits */
2299ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
2300	cmpwi	r6, 8			/* was it an external interrupt? */
2301	li	r12, BOOK3S_INTERRUPT_EXTERNAL
2302	beq	kvmppc_read_intr	/* if so, see what it was */
2303	li	r3, 0
2304	li	r12, 0
2305	cmpwi	r6, 6			/* was it the decrementer? */
2306	beq	0f
2307BEGIN_FTR_SECTION
2308	cmpwi	r6, 5			/* privileged doorbell? */
2309	beq	0f
2310	cmpwi	r6, 3			/* hypervisor doorbell? */
2311	beq	3f
2312END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2313	li	r3, 1			/* anything else, return 1 */
23140:	blr
2315
2316	/* hypervisor doorbell */
23173:	li	r12, BOOK3S_INTERRUPT_H_DOORBELL
2318	li	r3, 1
2319	blr
2320
2321/*
2322 * Determine what sort of external interrupt is pending (if any).
2323 * Returns:
2324 *	0 if no interrupt is pending
2325 *	1 if an interrupt is pending that needs to be handled by the host
2326 *	-1 if there was a guest wakeup IPI (which has now been cleared)
2327 */
2328kvmppc_read_intr:
2329	/* see if a host IPI is pending */
2330	li	r3, 1
2331	lbz	r0, HSTATE_HOST_IPI(r13)
2332	cmpwi	r0, 0
2333	bne	1f
2334
2335	/* Now read the interrupt from the ICP */
2336	ld	r6, HSTATE_XICS_PHYS(r13)
2337	li	r7, XICS_XIRR
2338	cmpdi	r6, 0
2339	beq-	1f
2340	lwzcix	r0, r6, r7
2341	rlwinm.	r3, r0, 0, 0xffffff
2342	sync
2343	beq	1f			/* if nothing pending in the ICP */
2344
2345	/* We found something in the ICP...
2346	 *
2347	 * If it's not an IPI, stash it in the PACA and return to
2348	 * the host, we don't (yet) handle directing real external
2349	 * interrupts directly to the guest
2350	 */
2351	cmpwi	r3, XICS_IPI		/* if there is, is it an IPI? */
2352	bne	42f
2353
2354	/* It's an IPI, clear the MFRR and EOI it */
2355	li	r3, 0xff
2356	li	r8, XICS_MFRR
2357	stbcix	r3, r6, r8		/* clear the IPI */
2358	stwcix	r0, r6, r7		/* EOI it */
2359	sync
2360
2361	/* We need to re-check host IPI now in case it got set in the
2362	 * meantime. If it's clear, we bounce the interrupt to the
2363	 * guest
2364	 */
2365	lbz	r0, HSTATE_HOST_IPI(r13)
2366	cmpwi	r0, 0
2367	bne-	43f
2368
2369	/* OK, it's an IPI for us */
2370	li	r3, -1
23711:	blr
2372
237342:	/* It's not an IPI and it's for the host, stash it in the PACA
2374	 * before exit, it will be picked up by the host ICP driver
2375	 */
2376	stw	r0, HSTATE_SAVED_XIRR(r13)
2377	li	r3, 1
2378	b	1b
2379
238043:	/* We raced with the host, we need to resend that IPI, bummer */
2381	li	r0, IPI_PRIORITY
2382	stbcix	r0, r6, r8		/* set the IPI */
2383	sync
2384	li	r3, 1
2385	b	1b
2386
2387/*
2388 * Save away FP, VMX and VSX registers.
2389 * r3 = vcpu pointer
2390 * N.B. r30 and r31 are volatile across this function,
2391 * thus it is not callable from C.
2392 */
2393kvmppc_save_fp:
2394	mflr	r30
2395	mr	r31,r3
2396	mfmsr	r5
2397	ori	r8,r5,MSR_FP
2398#ifdef CONFIG_ALTIVEC
2399BEGIN_FTR_SECTION
2400	oris	r8,r8,MSR_VEC@h
2401END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
2402#endif
2403#ifdef CONFIG_VSX
2404BEGIN_FTR_SECTION
2405	oris	r8,r8,MSR_VSX@h
2406END_FTR_SECTION_IFSET(CPU_FTR_VSX)
2407#endif
2408	mtmsrd	r8
2409	isync
2410	addi	r3,r3,VCPU_FPRS
2411	bl	.store_fp_state
2412#ifdef CONFIG_ALTIVEC
2413BEGIN_FTR_SECTION
2414	addi	r3,r31,VCPU_VRS
2415	bl	.store_vr_state
2416END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
2417#endif
2418	mfspr	r6,SPRN_VRSAVE
2419	stw	r6,VCPU_VRSAVE(r31)
2420	mtlr	r30
2421	blr
2422
2423/*
2424 * Load up FP, VMX and VSX registers
2425 * r4 = vcpu pointer
2426 * N.B. r30 and r31 are volatile across this function,
2427 * thus it is not callable from C.
2428 */
2429kvmppc_load_fp:
2430	mflr	r30
2431	mr	r31,r4
2432	mfmsr	r9
2433	ori	r8,r9,MSR_FP
2434#ifdef CONFIG_ALTIVEC
2435BEGIN_FTR_SECTION
2436	oris	r8,r8,MSR_VEC@h
2437END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
2438#endif
2439#ifdef CONFIG_VSX
2440BEGIN_FTR_SECTION
2441	oris	r8,r8,MSR_VSX@h
2442END_FTR_SECTION_IFSET(CPU_FTR_VSX)
2443#endif
2444	mtmsrd	r8
2445	isync
2446	addi	r3,r4,VCPU_FPRS
2447	bl	.load_fp_state
2448#ifdef CONFIG_ALTIVEC
2449BEGIN_FTR_SECTION
2450	addi	r3,r31,VCPU_VRS
2451	bl	.load_vr_state
2452END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
2453#endif
2454	lwz	r7,VCPU_VRSAVE(r31)
2455	mtspr	SPRN_VRSAVE,r7
2456	mtlr	r30
2457	mr	r4,r31
2458	blr
2459
2460/*
2461 * We come here if we get any exception or interrupt while we are
2462 * executing host real mode code while in guest MMU context.
2463 * For now just spin, but we should do something better.
2464 */
2465kvmppc_bad_host_intr:
2466	b	.
2467
2468/*
2469 * This mimics the MSR transition on IRQ delivery.  The new guest MSR is taken
2470 * from VCPU_INTR_MSR and is modified based on the required TM state changes.
2471 *   r11 has the guest MSR value (in/out)
2472 *   r9 has a vcpu pointer (in)
2473 *   r0 is used as a scratch register
2474 */
2475kvmppc_msr_interrupt:
2476	rldicl	r0, r11, 64 - MSR_TS_S_LG, 62
2477	cmpwi	r0, 2 /* Check if we are in transactional state..  */
2478	ld	r11, VCPU_INTR_MSR(r9)
2479	bne	1f
2480	/* ... if transactional, change to suspended */
2481	li	r0, 1
24821:	rldimi	r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
2483	blr
2484
2485/*
2486 * This works around a hardware bug on POWER8E processors, where
2487 * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
2488 * performance monitor interrupt.  Instead, when we need to have
2489 * an interrupt pending, we have to arrange for a counter to overflow.
2490 */
2491kvmppc_fix_pmao:
2492	li	r3, 0
2493	mtspr	SPRN_MMCR2, r3
2494	lis	r3, (MMCR0_PMXE | MMCR0_FCECE)@h
2495	ori	r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN
2496	mtspr	SPRN_MMCR0, r3
2497	lis	r3, 0x7fff
2498	ori	r3, r3, 0xffff
2499	mtspr	SPRN_PMC6, r3
2500	isync
2501	blr
2502