xref: /openbmc/linux/arch/powerpc/kvm/book3s_hv_rmhandlers.S (revision 6396bb221514d2876fd6dc0aa2a1f240d99b37bb)
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License, version 2, as
4 * published by the Free Software Foundation.
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9 * GNU General Public License for more details.
10 *
11 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
12 *
13 * Derived from book3s_rmhandlers.S and other files, which are:
14 *
15 * Copyright SUSE Linux Products GmbH 2009
16 *
17 * Authors: Alexander Graf <agraf@suse.de>
18 */
19
20#include <asm/ppc_asm.h>
21#include <asm/kvm_asm.h>
22#include <asm/reg.h>
23#include <asm/mmu.h>
24#include <asm/page.h>
25#include <asm/ptrace.h>
26#include <asm/hvcall.h>
27#include <asm/asm-offsets.h>
28#include <asm/exception-64s.h>
29#include <asm/kvm_book3s_asm.h>
30#include <asm/book3s/64/mmu-hash.h>
31#include <asm/tm.h>
32#include <asm/opal.h>
33#include <asm/xive-regs.h>
34#include <asm/thread_info.h>
35
36/* Sign-extend HDEC if not on POWER9 */
37#define EXTEND_HDEC(reg)			\
38BEGIN_FTR_SECTION;				\
39	extsw	reg, reg;			\
40END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
41
42#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
43
44/* Values in HSTATE_NAPPING(r13) */
45#define NAPPING_CEDE	1
46#define NAPPING_NOVCPU	2
47
48/* Stack frame offsets for kvmppc_hv_entry */
49#define SFS			160
50#define STACK_SLOT_TRAP		(SFS-4)
51#define STACK_SLOT_TID		(SFS-16)
52#define STACK_SLOT_PSSCR	(SFS-24)
53#define STACK_SLOT_PID		(SFS-32)
54#define STACK_SLOT_IAMR		(SFS-40)
55#define STACK_SLOT_CIABR	(SFS-48)
56#define STACK_SLOT_DAWR		(SFS-56)
57#define STACK_SLOT_DAWRX	(SFS-64)
58#define STACK_SLOT_HFSCR	(SFS-72)
59
60/*
61 * Call kvmppc_hv_entry in real mode.
62 * Must be called with interrupts hard-disabled.
63 *
64 * Input Registers:
65 *
66 * LR = return address to continue at after eventually re-enabling MMU
67 */
68_GLOBAL_TOC(kvmppc_hv_entry_trampoline)
69	mflr	r0
70	std	r0, PPC_LR_STKOFF(r1)
71	stdu	r1, -112(r1)
72	mfmsr	r10
73	std	r10, HSTATE_HOST_MSR(r13)
74	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
75	li	r0,MSR_RI
76	andc	r0,r10,r0
77	li	r6,MSR_IR | MSR_DR
78	andc	r6,r10,r6
79	mtmsrd	r0,1		/* clear RI in MSR */
80	mtsrr0	r5
81	mtsrr1	r6
82	RFI_TO_KERNEL
83
84kvmppc_call_hv_entry:
85BEGIN_FTR_SECTION
86	/* On P9, do LPCR setting, if necessary */
87	ld	r3, HSTATE_SPLIT_MODE(r13)
88	cmpdi	r3, 0
89	beq	46f
90	lwz	r4, KVM_SPLIT_DO_SET(r3)
91	cmpwi	r4, 0
92	beq	46f
93	bl	kvmhv_p9_set_lpcr
94	nop
9546:
96END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
97
98	ld	r4, HSTATE_KVM_VCPU(r13)
99	bl	kvmppc_hv_entry
100
101	/* Back from guest - restore host state and return to caller */
102
103BEGIN_FTR_SECTION
104	/* Restore host DABR and DABRX */
105	ld	r5,HSTATE_DABR(r13)
106	li	r6,7
107	mtspr	SPRN_DABR,r5
108	mtspr	SPRN_DABRX,r6
109END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
110
111	/* Restore SPRG3 */
112	ld	r3,PACA_SPRG_VDSO(r13)
113	mtspr	SPRN_SPRG_VDSO_WRITE,r3
114
115	/* Reload the host's PMU registers */
116	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
117	cmpwi	r4, 0
118	beq	23f			/* skip if not */
119BEGIN_FTR_SECTION
120	ld	r3, HSTATE_MMCR0(r13)
121	andi.	r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
122	cmpwi	r4, MMCR0_PMAO
123	beql	kvmppc_fix_pmao
124END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
125	lwz	r3, HSTATE_PMC1(r13)
126	lwz	r4, HSTATE_PMC2(r13)
127	lwz	r5, HSTATE_PMC3(r13)
128	lwz	r6, HSTATE_PMC4(r13)
129	lwz	r8, HSTATE_PMC5(r13)
130	lwz	r9, HSTATE_PMC6(r13)
131	mtspr	SPRN_PMC1, r3
132	mtspr	SPRN_PMC2, r4
133	mtspr	SPRN_PMC3, r5
134	mtspr	SPRN_PMC4, r6
135	mtspr	SPRN_PMC5, r8
136	mtspr	SPRN_PMC6, r9
137	ld	r3, HSTATE_MMCR0(r13)
138	ld	r4, HSTATE_MMCR1(r13)
139	ld	r5, HSTATE_MMCRA(r13)
140	ld	r6, HSTATE_SIAR(r13)
141	ld	r7, HSTATE_SDAR(r13)
142	mtspr	SPRN_MMCR1, r4
143	mtspr	SPRN_MMCRA, r5
144	mtspr	SPRN_SIAR, r6
145	mtspr	SPRN_SDAR, r7
146BEGIN_FTR_SECTION
147	ld	r8, HSTATE_MMCR2(r13)
148	ld	r9, HSTATE_SIER(r13)
149	mtspr	SPRN_MMCR2, r8
150	mtspr	SPRN_SIER, r9
151END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
152	mtspr	SPRN_MMCR0, r3
153	isync
15423:
155
156	/*
157	 * Reload DEC.  HDEC interrupts were disabled when
158	 * we reloaded the host's LPCR value.
159	 */
160	ld	r3, HSTATE_DECEXP(r13)
161	mftb	r4
162	subf	r4, r4, r3
163	mtspr	SPRN_DEC, r4
164
165	/* hwthread_req may have got set by cede or no vcpu, so clear it */
166	li	r0, 0
167	stb	r0, HSTATE_HWTHREAD_REQ(r13)
168
169	/*
170	 * For external interrupts we need to call the Linux
171	 * handler to process the interrupt. We do that by jumping
172	 * to absolute address 0x500 for external interrupts.
173	 * The [h]rfid at the end of the handler will return to
174	 * the book3s_hv_interrupts.S code. For other interrupts
175	 * we do the rfid to get back to the book3s_hv_interrupts.S
176	 * code here.
177	 */
178	ld	r8, 112+PPC_LR_STKOFF(r1)
179	addi	r1, r1, 112
180	ld	r7, HSTATE_HOST_MSR(r13)
181
182	/* Return the trap number on this thread as the return value */
183	mr	r3, r12
184
185	/*
186	 * If we came back from the guest via a relocation-on interrupt,
187	 * we will be in virtual mode at this point, which makes it a
188	 * little easier to get back to the caller.
189	 */
190	mfmsr	r0
191	andi.	r0, r0, MSR_IR		/* in real mode? */
192	bne	.Lvirt_return
193
194	/* RFI into the highmem handler */
195	mfmsr	r6
196	li	r0, MSR_RI
197	andc	r6, r6, r0
198	mtmsrd	r6, 1			/* Clear RI in MSR */
199	mtsrr0	r8
200	mtsrr1	r7
201	RFI_TO_KERNEL
202
203	/* Virtual-mode return */
204.Lvirt_return:
205	mtlr	r8
206	blr
207
208kvmppc_primary_no_guest:
209	/* We handle this much like a ceded vcpu */
210	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
211	/* HDEC may be larger than DEC for arch >= v3.00, but since the */
212	/* HDEC value came from DEC in the first place, it will fit */
213	mfspr	r3, SPRN_HDEC
214	mtspr	SPRN_DEC, r3
215	/*
216	 * Make sure the primary has finished the MMU switch.
217	 * We should never get here on a secondary thread, but
218	 * check it for robustness' sake.
219	 */
220	ld	r5, HSTATE_KVM_VCORE(r13)
22165:	lbz	r0, VCORE_IN_GUEST(r5)
222	cmpwi	r0, 0
223	beq	65b
224	/* Set LPCR. */
225	ld	r8,VCORE_LPCR(r5)
226	mtspr	SPRN_LPCR,r8
227	isync
228	/* set our bit in napping_threads */
229	ld	r5, HSTATE_KVM_VCORE(r13)
230	lbz	r7, HSTATE_PTID(r13)
231	li	r0, 1
232	sld	r0, r0, r7
233	addi	r6, r5, VCORE_NAPPING_THREADS
2341:	lwarx	r3, 0, r6
235	or	r3, r3, r0
236	stwcx.	r3, 0, r6
237	bne	1b
238	/* order napping_threads update vs testing entry_exit_map */
239	isync
240	li	r12, 0
241	lwz	r7, VCORE_ENTRY_EXIT(r5)
242	cmpwi	r7, 0x100
243	bge	kvm_novcpu_exit	/* another thread already exiting */
244	li	r3, NAPPING_NOVCPU
245	stb	r3, HSTATE_NAPPING(r13)
246
247	li	r3, 0		/* Don't wake on privileged (OS) doorbell */
248	b	kvm_do_nap
249
250/*
251 * kvm_novcpu_wakeup
252 *	Entered from kvm_start_guest if kvm_hstate.napping is set
253 *	to NAPPING_NOVCPU
254 *		r2 = kernel TOC
255 *		r13 = paca
256 */
257kvm_novcpu_wakeup:
258	ld	r1, HSTATE_HOST_R1(r13)
259	ld	r5, HSTATE_KVM_VCORE(r13)
260	li	r0, 0
261	stb	r0, HSTATE_NAPPING(r13)
262
263	/* check the wake reason */
264	bl	kvmppc_check_wake_reason
265
266	/*
267	 * Restore volatile registers since we could have called
268	 * a C routine in kvmppc_check_wake_reason.
269	 *	r5 = VCORE
270	 */
271	ld	r5, HSTATE_KVM_VCORE(r13)
272
273	/* see if any other thread is already exiting */
274	lwz	r0, VCORE_ENTRY_EXIT(r5)
275	cmpwi	r0, 0x100
276	bge	kvm_novcpu_exit
277
278	/* clear our bit in napping_threads */
279	lbz	r7, HSTATE_PTID(r13)
280	li	r0, 1
281	sld	r0, r0, r7
282	addi	r6, r5, VCORE_NAPPING_THREADS
2834:	lwarx	r7, 0, r6
284	andc	r7, r7, r0
285	stwcx.	r7, 0, r6
286	bne	4b
287
288	/* See if the wake reason means we need to exit */
289	cmpdi	r3, 0
290	bge	kvm_novcpu_exit
291
292	/* See if our timeslice has expired (HDEC is negative) */
293	mfspr	r0, SPRN_HDEC
294	EXTEND_HDEC(r0)
295	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
296	cmpdi	r0, 0
297	blt	kvm_novcpu_exit
298
299	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
300	ld	r4, HSTATE_KVM_VCPU(r13)
301	cmpdi	r4, 0
302	beq	kvmppc_primary_no_guest
303
304#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
305	addi	r3, r4, VCPU_TB_RMENTRY
306	bl	kvmhv_start_timing
307#endif
308	b	kvmppc_got_guest
309
310kvm_novcpu_exit:
311#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
312	ld	r4, HSTATE_KVM_VCPU(r13)
313	cmpdi	r4, 0
314	beq	13f
315	addi	r3, r4, VCPU_TB_RMEXIT
316	bl	kvmhv_accumulate_time
317#endif
31813:	mr	r3, r12
319	stw	r12, STACK_SLOT_TRAP(r1)
320	bl	kvmhv_commence_exit
321	nop
322	b	kvmhv_switch_to_host
323
324/*
325 * We come in here when wakened from nap mode.
326 * Relocation is off and most register values are lost.
327 * r13 points to the PACA.
328 * r3 contains the SRR1 wakeup value, SRR1 is trashed.
329 */
330	.globl	kvm_start_guest
331kvm_start_guest:
332	/* Set runlatch bit the minute you wake up from nap */
333	mfspr	r0, SPRN_CTRLF
334	ori 	r0, r0, 1
335	mtspr	SPRN_CTRLT, r0
336
337	/*
338	 * Could avoid this and pass it through in r3. For now,
339	 * code expects it to be in SRR1.
340	 */
341	mtspr	SPRN_SRR1,r3
342
343	ld	r2,PACATOC(r13)
344
345	li	r0,0
346	stb	r0,PACA_FTRACE_ENABLED(r13)
347
348	li	r0,KVM_HWTHREAD_IN_KVM
349	stb	r0,HSTATE_HWTHREAD_STATE(r13)
350
351	/* NV GPR values from power7_idle() will no longer be valid */
352	li	r0,1
353	stb	r0,PACA_NAPSTATELOST(r13)
354
355	/* were we napping due to cede? */
356	lbz	r0,HSTATE_NAPPING(r13)
357	cmpwi	r0,NAPPING_CEDE
358	beq	kvm_end_cede
359	cmpwi	r0,NAPPING_NOVCPU
360	beq	kvm_novcpu_wakeup
361
362	ld	r1,PACAEMERGSP(r13)
363	subi	r1,r1,STACK_FRAME_OVERHEAD
364
365	/*
366	 * We weren't napping due to cede, so this must be a secondary
367	 * thread being woken up to run a guest, or being woken up due
368	 * to a stray IPI.  (Or due to some machine check or hypervisor
369	 * maintenance interrupt while the core is in KVM.)
370	 */
371
372	/* Check the wake reason in SRR1 to see why we got here */
373	bl	kvmppc_check_wake_reason
374	/*
375	 * kvmppc_check_wake_reason could invoke a C routine, but we
376	 * have no volatile registers to restore when we return.
377	 */
378
379	cmpdi	r3, 0
380	bge	kvm_no_guest
381
382	/* get vcore pointer, NULL if we have nothing to run */
383	ld	r5,HSTATE_KVM_VCORE(r13)
384	cmpdi	r5,0
385	/* if we have no vcore to run, go back to sleep */
386	beq	kvm_no_guest
387
388kvm_secondary_got_guest:
389
390	/* Set HSTATE_DSCR(r13) to something sensible */
391	ld	r6, PACA_DSCR_DEFAULT(r13)
392	std	r6, HSTATE_DSCR(r13)
393
394	/* On thread 0 of a subcore, set HDEC to max */
395	lbz	r4, HSTATE_PTID(r13)
396	cmpwi	r4, 0
397	bne	63f
398	LOAD_REG_ADDR(r6, decrementer_max)
399	ld	r6, 0(r6)
400	mtspr	SPRN_HDEC, r6
401	/* and set per-LPAR registers, if doing dynamic micro-threading */
402	ld	r6, HSTATE_SPLIT_MODE(r13)
403	cmpdi	r6, 0
404	beq	63f
405BEGIN_FTR_SECTION
406	ld	r0, KVM_SPLIT_RPR(r6)
407	mtspr	SPRN_RPR, r0
408	ld	r0, KVM_SPLIT_PMMAR(r6)
409	mtspr	SPRN_PMMAR, r0
410	ld	r0, KVM_SPLIT_LDBAR(r6)
411	mtspr	SPRN_LDBAR, r0
412	isync
413FTR_SECTION_ELSE
414	/* On P9 we use the split_info for coordinating LPCR changes */
415	lwz	r4, KVM_SPLIT_DO_SET(r6)
416	cmpwi	r4, 0
417	beq	1f
418	mr	r3, r6
419	bl	kvmhv_p9_set_lpcr
420	nop
4211:
422ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
42363:
424	/* Order load of vcpu after load of vcore */
425	lwsync
426	ld	r4, HSTATE_KVM_VCPU(r13)
427	bl	kvmppc_hv_entry
428
429	/* Back from the guest, go back to nap */
430	/* Clear our vcpu and vcore pointers so we don't come back in early */
431	li	r0, 0
432	std	r0, HSTATE_KVM_VCPU(r13)
433	/*
434	 * Once we clear HSTATE_KVM_VCORE(r13), the code in
435	 * kvmppc_run_core() is going to assume that all our vcpu
436	 * state is visible in memory.  This lwsync makes sure
437	 * that that is true.
438	 */
439	lwsync
440	std	r0, HSTATE_KVM_VCORE(r13)
441
442	/*
443	 * All secondaries exiting guest will fall through this path.
444	 * Before proceeding, just check for HMI interrupt and
445	 * invoke opal hmi handler. By now we are sure that the
446	 * primary thread on this core/subcore has already made partition
447	 * switch/TB resync and we are good to call opal hmi handler.
448	 */
449	cmpwi	r12, BOOK3S_INTERRUPT_HMI
450	bne	kvm_no_guest
451
452	li	r3,0			/* NULL argument */
453	bl	hmi_exception_realmode
454/*
455 * At this point we have finished executing in the guest.
456 * We need to wait for hwthread_req to become zero, since
457 * we may not turn on the MMU while hwthread_req is non-zero.
458 * While waiting we also need to check if we get given a vcpu to run.
459 */
460kvm_no_guest:
461	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
462	cmpwi	r3, 0
463	bne	53f
464	HMT_MEDIUM
465	li	r0, KVM_HWTHREAD_IN_KERNEL
466	stb	r0, HSTATE_HWTHREAD_STATE(r13)
467	/* need to recheck hwthread_req after a barrier, to avoid race */
468	sync
469	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
470	cmpwi	r3, 0
471	bne	54f
472/*
473 * We jump to pnv_wakeup_loss, which will return to the caller
474 * of power7_nap in the powernv cpu offline loop.  The value we
475 * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss
476 * requires SRR1 in r12.
477 */
478	li	r3, LPCR_PECE0
479	mfspr	r4, SPRN_LPCR
480	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
481	mtspr	SPRN_LPCR, r4
482	li	r3, 0
483	mfspr	r12,SPRN_SRR1
484	b	pnv_wakeup_loss
485
48653:	HMT_LOW
487	ld	r5, HSTATE_KVM_VCORE(r13)
488	cmpdi	r5, 0
489	bne	60f
490	ld	r3, HSTATE_SPLIT_MODE(r13)
491	cmpdi	r3, 0
492	beq	kvm_no_guest
493	lwz	r0, KVM_SPLIT_DO_SET(r3)
494	cmpwi	r0, 0
495	bne	kvmhv_do_set
496	lwz	r0, KVM_SPLIT_DO_RESTORE(r3)
497	cmpwi	r0, 0
498	bne	kvmhv_do_restore
499	lbz	r0, KVM_SPLIT_DO_NAP(r3)
500	cmpwi	r0, 0
501	beq	kvm_no_guest
502	HMT_MEDIUM
503	b	kvm_unsplit_nap
50460:	HMT_MEDIUM
505	b	kvm_secondary_got_guest
506
50754:	li	r0, KVM_HWTHREAD_IN_KVM
508	stb	r0, HSTATE_HWTHREAD_STATE(r13)
509	b	kvm_no_guest
510
511kvmhv_do_set:
512	/* Set LPCR, LPIDR etc. on P9 */
513	HMT_MEDIUM
514	bl	kvmhv_p9_set_lpcr
515	nop
516	b	kvm_no_guest
517
518kvmhv_do_restore:
519	HMT_MEDIUM
520	bl	kvmhv_p9_restore_lpcr
521	nop
522	b	kvm_no_guest
523
524/*
525 * Here the primary thread is trying to return the core to
526 * whole-core mode, so we need to nap.
527 */
528kvm_unsplit_nap:
529	/*
530	 * When secondaries are napping in kvm_unsplit_nap() with
531	 * hwthread_req = 1, HMI goes ignored even though subcores are
532	 * already exited the guest. Hence HMI keeps waking up secondaries
533	 * from nap in a loop and secondaries always go back to nap since
534	 * no vcore is assigned to them. This makes impossible for primary
535	 * thread to get hold of secondary threads resulting into a soft
536	 * lockup in KVM path.
537	 *
538	 * Let us check if HMI is pending and handle it before we go to nap.
539	 */
540	cmpwi	r12, BOOK3S_INTERRUPT_HMI
541	bne	55f
542	li	r3, 0			/* NULL argument */
543	bl	hmi_exception_realmode
54455:
545	/*
546	 * Ensure that secondary doesn't nap when it has
547	 * its vcore pointer set.
548	 */
549	sync		/* matches smp_mb() before setting split_info.do_nap */
550	ld	r0, HSTATE_KVM_VCORE(r13)
551	cmpdi	r0, 0
552	bne	kvm_no_guest
553	/* clear any pending message */
554BEGIN_FTR_SECTION
555	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
556	PPC_MSGCLR(6)
557END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
558	/* Set kvm_split_mode.napped[tid] = 1 */
559	ld	r3, HSTATE_SPLIT_MODE(r13)
560	li	r0, 1
561	lbz	r4, HSTATE_TID(r13)
562	addi	r4, r4, KVM_SPLIT_NAPPED
563	stbx	r0, r3, r4
564	/* Check the do_nap flag again after setting napped[] */
565	sync
566	lbz	r0, KVM_SPLIT_DO_NAP(r3)
567	cmpwi	r0, 0
568	beq	57f
569	li	r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
570	mfspr	r5, SPRN_LPCR
571	rlwimi	r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
572	b	kvm_nap_sequence
573
57457:	li	r0, 0
575	stbx	r0, r3, r4
576	b	kvm_no_guest
577
578/******************************************************************************
579 *                                                                            *
580 *                               Entry code                                   *
581 *                                                                            *
582 *****************************************************************************/
583
584.global kvmppc_hv_entry
585kvmppc_hv_entry:
586
587	/* Required state:
588	 *
589	 * R4 = vcpu pointer (or NULL)
590	 * MSR = ~IR|DR
591	 * R13 = PACA
592	 * R1 = host R1
593	 * R2 = TOC
594	 * all other volatile GPRS = free
595	 * Does not preserve non-volatile GPRs or CR fields
596	 */
597	mflr	r0
598	std	r0, PPC_LR_STKOFF(r1)
599	stdu	r1, -SFS(r1)
600
601	/* Save R1 in the PACA */
602	std	r1, HSTATE_HOST_R1(r13)
603
604	li	r6, KVM_GUEST_MODE_HOST_HV
605	stb	r6, HSTATE_IN_GUEST(r13)
606
607#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
608	/* Store initial timestamp */
609	cmpdi	r4, 0
610	beq	1f
611	addi	r3, r4, VCPU_TB_RMENTRY
612	bl	kvmhv_start_timing
6131:
614#endif
615
616	/* Use cr7 as an indication of radix mode */
617	ld	r5, HSTATE_KVM_VCORE(r13)
618	ld	r9, VCORE_KVM(r5)	/* pointer to struct kvm */
619	lbz	r0, KVM_RADIX(r9)
620	cmpwi	cr7, r0, 0
621
622	/*
623	 * POWER7/POWER8 host -> guest partition switch code.
624	 * We don't have to lock against concurrent tlbies,
625	 * but we do have to coordinate across hardware threads.
626	 */
627	/* Set bit in entry map iff exit map is zero. */
628	li	r7, 1
629	lbz	r6, HSTATE_PTID(r13)
630	sld	r7, r7, r6
631	addi	r8, r5, VCORE_ENTRY_EXIT
63221:	lwarx	r3, 0, r8
633	cmpwi	r3, 0x100		/* any threads starting to exit? */
634	bge	secondary_too_late	/* if so we're too late to the party */
635	or	r3, r3, r7
636	stwcx.	r3, 0, r8
637	bne	21b
638
639	/* Primary thread switches to guest partition. */
640	cmpwi	r6,0
641	bne	10f
642	lwz	r7,KVM_LPID(r9)
643BEGIN_FTR_SECTION
644	ld	r6,KVM_SDR1(r9)
645	li	r0,LPID_RSVD		/* switch to reserved LPID */
646	mtspr	SPRN_LPID,r0
647	ptesync
648	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
649END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
650	mtspr	SPRN_LPID,r7
651	isync
652
653	/* See if we need to flush the TLB */
654	lhz	r6,PACAPACAINDEX(r13)	/* test_bit(cpu, need_tlb_flush) */
655BEGIN_FTR_SECTION
656	/*
657	 * On POWER9, individual threads can come in here, but the
658	 * TLB is shared between the 4 threads in a core, hence
659	 * invalidating on one thread invalidates for all.
660	 * Thus we make all 4 threads use the same bit here.
661	 */
662	clrrdi	r6,r6,2
663END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
664	clrldi	r7,r6,64-6		/* extract bit number (6 bits) */
665	srdi	r6,r6,6			/* doubleword number */
666	sldi	r6,r6,3			/* address offset */
667	add	r6,r6,r9
668	addi	r6,r6,KVM_NEED_FLUSH	/* dword in kvm->arch.need_tlb_flush */
669	li	r8,1
670	sld	r8,r8,r7
671	ld	r7,0(r6)
672	and.	r7,r7,r8
673	beq	22f
674	/* Flush the TLB of any entries for this LPID */
675	lwz	r0,KVM_TLB_SETS(r9)
676	mtctr	r0
677	li	r7,0x800		/* IS field = 0b10 */
678	ptesync
679	li	r0,0			/* RS for P9 version of tlbiel */
680	bne	cr7, 29f
68128:	tlbiel	r7			/* On P9, rs=0, RIC=0, PRS=0, R=0 */
682	addi	r7,r7,0x1000
683	bdnz	28b
684	b	30f
68529:	PPC_TLBIEL(7,0,2,1,1)		/* for radix, RIC=2, PRS=1, R=1 */
686	addi	r7,r7,0x1000
687	bdnz	29b
68830:	ptesync
68923:	ldarx	r7,0,r6			/* clear the bit after TLB flushed */
690	andc	r7,r7,r8
691	stdcx.	r7,0,r6
692	bne	23b
693
694	/* Add timebase offset onto timebase */
69522:	ld	r8,VCORE_TB_OFFSET(r5)
696	cmpdi	r8,0
697	beq	37f
698	std	r8, VCORE_TB_OFFSET_APPL(r5)
699	mftb	r6		/* current host timebase */
700	add	r8,r8,r6
701	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */
702	mftb	r7		/* check if lower 24 bits overflowed */
703	clrldi	r6,r6,40
704	clrldi	r7,r7,40
705	cmpld	r7,r6
706	bge	37f
707	addis	r8,r8,0x100	/* if so, increment upper 40 bits */
708	mtspr	SPRN_TBU40,r8
709
710	/* Load guest PCR value to select appropriate compat mode */
71137:	ld	r7, VCORE_PCR(r5)
712	cmpdi	r7, 0
713	beq	38f
714	mtspr	SPRN_PCR, r7
71538:
716
717BEGIN_FTR_SECTION
718	/* DPDES and VTB are shared between threads */
719	ld	r8, VCORE_DPDES(r5)
720	ld	r7, VCORE_VTB(r5)
721	mtspr	SPRN_DPDES, r8
722	mtspr	SPRN_VTB, r7
723END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
724
725	/* Mark the subcore state as inside guest */
726	bl	kvmppc_subcore_enter_guest
727	nop
728	ld	r5, HSTATE_KVM_VCORE(r13)
729	ld	r4, HSTATE_KVM_VCPU(r13)
730	li	r0,1
731	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
732
733	/* Do we have a guest vcpu to run? */
73410:	cmpdi	r4, 0
735	beq	kvmppc_primary_no_guest
736kvmppc_got_guest:
737	/* Increment yield count if they have a VPA */
738	ld	r3, VCPU_VPA(r4)
739	cmpdi	r3, 0
740	beq	25f
741	li	r6, LPPACA_YIELDCOUNT
742	LWZX_BE	r5, r3, r6
743	addi	r5, r5, 1
744	STWX_BE	r5, r3, r6
745	li	r6, 1
746	stb	r6, VCPU_VPA_DIRTY(r4)
74725:
748
749	/* Save purr/spurr */
750	mfspr	r5,SPRN_PURR
751	mfspr	r6,SPRN_SPURR
752	std	r5,HSTATE_PURR(r13)
753	std	r6,HSTATE_SPURR(r13)
754	ld	r7,VCPU_PURR(r4)
755	ld	r8,VCPU_SPURR(r4)
756	mtspr	SPRN_PURR,r7
757	mtspr	SPRN_SPURR,r8
758
759	/* Save host values of some registers */
760BEGIN_FTR_SECTION
761	mfspr	r5, SPRN_TIDR
762	mfspr	r6, SPRN_PSSCR
763	mfspr	r7, SPRN_PID
764	mfspr	r8, SPRN_IAMR
765	std	r5, STACK_SLOT_TID(r1)
766	std	r6, STACK_SLOT_PSSCR(r1)
767	std	r7, STACK_SLOT_PID(r1)
768	std	r8, STACK_SLOT_IAMR(r1)
769	mfspr	r5, SPRN_HFSCR
770	std	r5, STACK_SLOT_HFSCR(r1)
771END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
772BEGIN_FTR_SECTION
773	mfspr	r5, SPRN_CIABR
774	mfspr	r6, SPRN_DAWR
775	mfspr	r7, SPRN_DAWRX
776	std	r5, STACK_SLOT_CIABR(r1)
777	std	r6, STACK_SLOT_DAWR(r1)
778	std	r7, STACK_SLOT_DAWRX(r1)
779END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
780
781BEGIN_FTR_SECTION
782	/* Set partition DABR */
783	/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
784	lwz	r5,VCPU_DABRX(r4)
785	ld	r6,VCPU_DABR(r4)
786	mtspr	SPRN_DABRX,r5
787	mtspr	SPRN_DABR,r6
788	isync
789END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
790
791#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
792/*
793 * Branch around the call if both CPU_FTR_TM and
794 * CPU_FTR_P9_TM_HV_ASSIST are off.
795 */
796BEGIN_FTR_SECTION
797	b	91f
798END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
799	/*
800	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
801	 */
802	bl	kvmppc_restore_tm
80391:
804#endif
805
806	/* Load guest PMU registers */
807	/* R4 is live here (vcpu pointer) */
808	li	r3, 1
809	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
810	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
811	isync
812BEGIN_FTR_SECTION
813	ld	r3, VCPU_MMCR(r4)
814	andi.	r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
815	cmpwi	r5, MMCR0_PMAO
816	beql	kvmppc_fix_pmao
817END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
818	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
819	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
820	lwz	r6, VCPU_PMC + 8(r4)
821	lwz	r7, VCPU_PMC + 12(r4)
822	lwz	r8, VCPU_PMC + 16(r4)
823	lwz	r9, VCPU_PMC + 20(r4)
824	mtspr	SPRN_PMC1, r3
825	mtspr	SPRN_PMC2, r5
826	mtspr	SPRN_PMC3, r6
827	mtspr	SPRN_PMC4, r7
828	mtspr	SPRN_PMC5, r8
829	mtspr	SPRN_PMC6, r9
830	ld	r3, VCPU_MMCR(r4)
831	ld	r5, VCPU_MMCR + 8(r4)
832	ld	r6, VCPU_MMCR + 16(r4)
833	ld	r7, VCPU_SIAR(r4)
834	ld	r8, VCPU_SDAR(r4)
835	mtspr	SPRN_MMCR1, r5
836	mtspr	SPRN_MMCRA, r6
837	mtspr	SPRN_SIAR, r7
838	mtspr	SPRN_SDAR, r8
839BEGIN_FTR_SECTION
840	ld	r5, VCPU_MMCR + 24(r4)
841	ld	r6, VCPU_SIER(r4)
842	mtspr	SPRN_MMCR2, r5
843	mtspr	SPRN_SIER, r6
844BEGIN_FTR_SECTION_NESTED(96)
845	lwz	r7, VCPU_PMC + 24(r4)
846	lwz	r8, VCPU_PMC + 28(r4)
847	ld	r9, VCPU_MMCR + 32(r4)
848	mtspr	SPRN_SPMC1, r7
849	mtspr	SPRN_SPMC2, r8
850	mtspr	SPRN_MMCRS, r9
851END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
852END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
853	mtspr	SPRN_MMCR0, r3
854	isync
855
856	/* Load up FP, VMX and VSX registers */
857	bl	kvmppc_load_fp
858
859	ld	r14, VCPU_GPR(R14)(r4)
860	ld	r15, VCPU_GPR(R15)(r4)
861	ld	r16, VCPU_GPR(R16)(r4)
862	ld	r17, VCPU_GPR(R17)(r4)
863	ld	r18, VCPU_GPR(R18)(r4)
864	ld	r19, VCPU_GPR(R19)(r4)
865	ld	r20, VCPU_GPR(R20)(r4)
866	ld	r21, VCPU_GPR(R21)(r4)
867	ld	r22, VCPU_GPR(R22)(r4)
868	ld	r23, VCPU_GPR(R23)(r4)
869	ld	r24, VCPU_GPR(R24)(r4)
870	ld	r25, VCPU_GPR(R25)(r4)
871	ld	r26, VCPU_GPR(R26)(r4)
872	ld	r27, VCPU_GPR(R27)(r4)
873	ld	r28, VCPU_GPR(R28)(r4)
874	ld	r29, VCPU_GPR(R29)(r4)
875	ld	r30, VCPU_GPR(R30)(r4)
876	ld	r31, VCPU_GPR(R31)(r4)
877
878	/* Switch DSCR to guest value */
879	ld	r5, VCPU_DSCR(r4)
880	mtspr	SPRN_DSCR, r5
881
882BEGIN_FTR_SECTION
883	/* Skip next section on POWER7 */
884	b	8f
885END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
886	/* Load up POWER8-specific registers */
887	ld	r5, VCPU_IAMR(r4)
888	lwz	r6, VCPU_PSPB(r4)
889	ld	r7, VCPU_FSCR(r4)
890	mtspr	SPRN_IAMR, r5
891	mtspr	SPRN_PSPB, r6
892	mtspr	SPRN_FSCR, r7
893	ld	r5, VCPU_DAWR(r4)
894	ld	r6, VCPU_DAWRX(r4)
895	ld	r7, VCPU_CIABR(r4)
896	ld	r8, VCPU_TAR(r4)
897	/*
898	 * Handle broken DAWR case by not writing it. This means we
899	 * can still store the DAWR register for migration.
900	 */
901BEGIN_FTR_SECTION
902	mtspr	SPRN_DAWR, r5
903	mtspr	SPRN_DAWRX, r6
904END_FTR_SECTION_IFSET(CPU_FTR_DAWR)
905	mtspr	SPRN_CIABR, r7
906	mtspr	SPRN_TAR, r8
907	ld	r5, VCPU_IC(r4)
908	ld	r8, VCPU_EBBHR(r4)
909	mtspr	SPRN_IC, r5
910	mtspr	SPRN_EBBHR, r8
911	ld	r5, VCPU_EBBRR(r4)
912	ld	r6, VCPU_BESCR(r4)
913	lwz	r7, VCPU_GUEST_PID(r4)
914	ld	r8, VCPU_WORT(r4)
915	mtspr	SPRN_EBBRR, r5
916	mtspr	SPRN_BESCR, r6
917	mtspr	SPRN_PID, r7
918	mtspr	SPRN_WORT, r8
919BEGIN_FTR_SECTION
920	PPC_INVALIDATE_ERAT
921END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
922BEGIN_FTR_SECTION
923	/* POWER8-only registers */
924	ld	r5, VCPU_TCSCR(r4)
925	ld	r6, VCPU_ACOP(r4)
926	ld	r7, VCPU_CSIGR(r4)
927	ld	r8, VCPU_TACR(r4)
928	mtspr	SPRN_TCSCR, r5
929	mtspr	SPRN_ACOP, r6
930	mtspr	SPRN_CSIGR, r7
931	mtspr	SPRN_TACR, r8
932	nop
933FTR_SECTION_ELSE
934	/* POWER9-only registers */
935	ld	r5, VCPU_TID(r4)
936	ld	r6, VCPU_PSSCR(r4)
937	lbz	r8, HSTATE_FAKE_SUSPEND(r13)
938	oris	r6, r6, PSSCR_EC@h	/* This makes stop trap to HV */
939	rldimi	r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
940	ld	r7, VCPU_HFSCR(r4)
941	mtspr	SPRN_TIDR, r5
942	mtspr	SPRN_PSSCR, r6
943	mtspr	SPRN_HFSCR, r7
944ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
9458:
946
947	ld	r5, VCPU_SPRG0(r4)
948	ld	r6, VCPU_SPRG1(r4)
949	ld	r7, VCPU_SPRG2(r4)
950	ld	r8, VCPU_SPRG3(r4)
951	mtspr	SPRN_SPRG0, r5
952	mtspr	SPRN_SPRG1, r6
953	mtspr	SPRN_SPRG2, r7
954	mtspr	SPRN_SPRG3, r8
955
956	/* Load up DAR and DSISR */
957	ld	r5, VCPU_DAR(r4)
958	lwz	r6, VCPU_DSISR(r4)
959	mtspr	SPRN_DAR, r5
960	mtspr	SPRN_DSISR, r6
961
962	/* Restore AMR and UAMOR, set AMOR to all 1s */
963	ld	r5,VCPU_AMR(r4)
964	ld	r6,VCPU_UAMOR(r4)
965	li	r7,-1
966	mtspr	SPRN_AMR,r5
967	mtspr	SPRN_UAMOR,r6
968	mtspr	SPRN_AMOR,r7
969
970	/* Restore state of CTRL run bit; assume 1 on entry */
971	lwz	r5,VCPU_CTRL(r4)
972	andi.	r5,r5,1
973	bne	4f
974	mfspr	r6,SPRN_CTRLF
975	clrrdi	r6,r6,1
976	mtspr	SPRN_CTRLT,r6
9774:
978	/* Secondary threads wait for primary to have done partition switch */
979	ld	r5, HSTATE_KVM_VCORE(r13)
980	lbz	r6, HSTATE_PTID(r13)
981	cmpwi	r6, 0
982	beq	21f
983	lbz	r0, VCORE_IN_GUEST(r5)
984	cmpwi	r0, 0
985	bne	21f
986	HMT_LOW
98720:	lwz	r3, VCORE_ENTRY_EXIT(r5)
988	cmpwi	r3, 0x100
989	bge	no_switch_exit
990	lbz	r0, VCORE_IN_GUEST(r5)
991	cmpwi	r0, 0
992	beq	20b
993	HMT_MEDIUM
99421:
995	/* Set LPCR. */
996	ld	r8,VCORE_LPCR(r5)
997	mtspr	SPRN_LPCR,r8
998	isync
999
1000	/*
1001	 * Set the decrementer to the guest decrementer.
1002	 */
1003	ld	r8,VCPU_DEC_EXPIRES(r4)
1004	/* r8 is a host timebase value here, convert to guest TB */
1005	ld	r5,HSTATE_KVM_VCORE(r13)
1006	ld	r6,VCORE_TB_OFFSET_APPL(r5)
1007	add	r8,r8,r6
1008	mftb	r7
1009	subf	r3,r7,r8
1010	mtspr	SPRN_DEC,r3
1011
1012	/* Check if HDEC expires soon */
1013	mfspr	r3, SPRN_HDEC
1014	EXTEND_HDEC(r3)
1015	cmpdi	r3, 512		/* 1 microsecond */
1016	blt	hdec_soon
1017
1018	/* For hash guest, clear out and reload the SLB */
1019	ld	r6, VCPU_KVM(r4)
1020	lbz	r0, KVM_RADIX(r6)
1021	cmpwi	r0, 0
1022	bne	9f
1023	li	r6, 0
1024	slbmte	r6, r6
1025	slbia
1026	ptesync
1027
1028	/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
1029	lwz	r5,VCPU_SLB_MAX(r4)
1030	cmpwi	r5,0
1031	beq	9f
1032	mtctr	r5
1033	addi	r6,r4,VCPU_SLB
10341:	ld	r8,VCPU_SLB_E(r6)
1035	ld	r9,VCPU_SLB_V(r6)
1036	slbmte	r9,r8
1037	addi	r6,r6,VCPU_SLB_SIZE
1038	bdnz	1b
10399:
1040
1041#ifdef CONFIG_KVM_XICS
1042	/* We are entering the guest on that thread, push VCPU to XIVE */
1043	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
1044	cmpldi	cr0, r10, 0
1045	beq	no_xive
1046	ld	r11, VCPU_XIVE_SAVED_STATE(r4)
1047	li	r9, TM_QW1_OS
1048	eieio
1049	stdcix	r11,r9,r10
1050	lwz	r11, VCPU_XIVE_CAM_WORD(r4)
1051	li	r9, TM_QW1_OS + TM_WORD2
1052	stwcix	r11,r9,r10
1053	li	r9, 1
1054	stb	r9, VCPU_XIVE_PUSHED(r4)
1055	eieio
1056
1057	/*
1058	 * We clear the irq_pending flag. There is a small chance of a
1059	 * race vs. the escalation interrupt happening on another
1060	 * processor setting it again, but the only consequence is to
1061	 * cause a spurrious wakeup on the next H_CEDE which is not an
1062	 * issue.
1063	 */
1064	li	r0,0
1065	stb	r0, VCPU_IRQ_PENDING(r4)
1066
1067	/*
1068	 * In single escalation mode, if the escalation interrupt is
1069	 * on, we mask it.
1070	 */
1071	lbz	r0, VCPU_XIVE_ESC_ON(r4)
1072	cmpwi	r0,0
1073	beq	1f
1074	ld	r10, VCPU_XIVE_ESC_RADDR(r4)
1075	li	r9, XIVE_ESB_SET_PQ_01
1076	ldcix	r0, r10, r9
1077	sync
1078
1079	/* We have a possible subtle race here: The escalation interrupt might
1080	 * have fired and be on its way to the host queue while we mask it,
1081	 * and if we unmask it early enough (re-cede right away), there is
1082	 * a theorical possibility that it fires again, thus landing in the
1083	 * target queue more than once which is a big no-no.
1084	 *
1085	 * Fortunately, solving this is rather easy. If the above load setting
1086	 * PQ to 01 returns a previous value where P is set, then we know the
1087	 * escalation interrupt is somewhere on its way to the host. In that
1088	 * case we simply don't clear the xive_esc_on flag below. It will be
1089	 * eventually cleared by the handler for the escalation interrupt.
1090	 *
1091	 * Then, when doing a cede, we check that flag again before re-enabling
1092	 * the escalation interrupt, and if set, we abort the cede.
1093	 */
1094	andi.	r0, r0, XIVE_ESB_VAL_P
1095	bne-	1f
1096
1097	/* Now P is 0, we can clear the flag */
1098	li	r0, 0
1099	stb	r0, VCPU_XIVE_ESC_ON(r4)
11001:
1101no_xive:
1102#endif /* CONFIG_KVM_XICS */
1103
1104deliver_guest_interrupt:
1105	ld	r6, VCPU_CTR(r4)
1106	ld	r7, VCPU_XER(r4)
1107
1108	mtctr	r6
1109	mtxer	r7
1110
1111kvmppc_cede_reentry:		/* r4 = vcpu, r13 = paca */
1112	ld	r10, VCPU_PC(r4)
1113	ld	r11, VCPU_MSR(r4)
1114	ld	r6, VCPU_SRR0(r4)
1115	ld	r7, VCPU_SRR1(r4)
1116	mtspr	SPRN_SRR0, r6
1117	mtspr	SPRN_SRR1, r7
1118
1119	/* r11 = vcpu->arch.msr & ~MSR_HV */
1120	rldicl	r11, r11, 63 - MSR_HV_LG, 1
1121	rotldi	r11, r11, 1 + MSR_HV_LG
1122	ori	r11, r11, MSR_ME
1123
1124	/* Check if we can deliver an external or decrementer interrupt now */
1125	ld	r0, VCPU_PENDING_EXC(r4)
1126	rldicl	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
1127	cmpdi	cr1, r0, 0
1128	andi.	r8, r11, MSR_EE
1129	mfspr	r8, SPRN_LPCR
1130	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
1131	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
1132	mtspr	SPRN_LPCR, r8
1133	isync
1134	beq	5f
1135	li	r0, BOOK3S_INTERRUPT_EXTERNAL
1136	bne	cr1, 12f
1137	mfspr	r0, SPRN_DEC
1138BEGIN_FTR_SECTION
1139	/* On POWER9 check whether the guest has large decrementer enabled */
1140	andis.	r8, r8, LPCR_LD@h
1141	bne	15f
1142END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1143	extsw	r0, r0
114415:	cmpdi	r0, 0
1145	li	r0, BOOK3S_INTERRUPT_DECREMENTER
1146	bge	5f
1147
114812:	mtspr	SPRN_SRR0, r10
1149	mr	r10,r0
1150	mtspr	SPRN_SRR1, r11
1151	mr	r9, r4
1152	bl	kvmppc_msr_interrupt
11535:
1154BEGIN_FTR_SECTION
1155	b	fast_guest_return
1156END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
1157	/* On POWER9, check for pending doorbell requests */
1158	lbz	r0, VCPU_DBELL_REQ(r4)
1159	cmpwi	r0, 0
1160	beq	fast_guest_return
1161	ld	r5, HSTATE_KVM_VCORE(r13)
1162	/* Set DPDES register so the CPU will take a doorbell interrupt */
1163	li	r0, 1
1164	mtspr	SPRN_DPDES, r0
1165	std	r0, VCORE_DPDES(r5)
1166	/* Make sure other cpus see vcore->dpdes set before dbell req clear */
1167	lwsync
1168	/* Clear the pending doorbell request */
1169	li	r0, 0
1170	stb	r0, VCPU_DBELL_REQ(r4)
1171
1172/*
1173 * Required state:
1174 * R4 = vcpu
1175 * R10: value for HSRR0
1176 * R11: value for HSRR1
1177 * R13 = PACA
1178 */
1179fast_guest_return:
1180	li	r0,0
1181	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
1182	mtspr	SPRN_HSRR0,r10
1183	mtspr	SPRN_HSRR1,r11
1184
1185	/* Activate guest mode, so faults get handled by KVM */
1186	li	r9, KVM_GUEST_MODE_GUEST_HV
1187	stb	r9, HSTATE_IN_GUEST(r13)
1188
1189#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1190	/* Accumulate timing */
1191	addi	r3, r4, VCPU_TB_GUEST
1192	bl	kvmhv_accumulate_time
1193#endif
1194
1195	/* Enter guest */
1196
1197BEGIN_FTR_SECTION
1198	ld	r5, VCPU_CFAR(r4)
1199	mtspr	SPRN_CFAR, r5
1200END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1201BEGIN_FTR_SECTION
1202	ld	r0, VCPU_PPR(r4)
1203END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1204
1205	ld	r5, VCPU_LR(r4)
1206	lwz	r6, VCPU_CR(r4)
1207	mtlr	r5
1208	mtcr	r6
1209
1210	ld	r1, VCPU_GPR(R1)(r4)
1211	ld	r2, VCPU_GPR(R2)(r4)
1212	ld	r3, VCPU_GPR(R3)(r4)
1213	ld	r5, VCPU_GPR(R5)(r4)
1214	ld	r6, VCPU_GPR(R6)(r4)
1215	ld	r7, VCPU_GPR(R7)(r4)
1216	ld	r8, VCPU_GPR(R8)(r4)
1217	ld	r9, VCPU_GPR(R9)(r4)
1218	ld	r10, VCPU_GPR(R10)(r4)
1219	ld	r11, VCPU_GPR(R11)(r4)
1220	ld	r12, VCPU_GPR(R12)(r4)
1221	ld	r13, VCPU_GPR(R13)(r4)
1222
1223BEGIN_FTR_SECTION
1224	mtspr	SPRN_PPR, r0
1225END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1226
1227/* Move canary into DSISR to check for later */
1228BEGIN_FTR_SECTION
1229	li	r0, 0x7fff
1230	mtspr	SPRN_HDSISR, r0
1231END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1232
1233	ld	r0, VCPU_GPR(R0)(r4)
1234	ld	r4, VCPU_GPR(R4)(r4)
1235	HRFI_TO_GUEST
1236	b	.
1237
1238secondary_too_late:
1239	li	r12, 0
1240	stw	r12, STACK_SLOT_TRAP(r1)
1241	cmpdi	r4, 0
1242	beq	11f
1243	stw	r12, VCPU_TRAP(r4)
1244#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1245	addi	r3, r4, VCPU_TB_RMEXIT
1246	bl	kvmhv_accumulate_time
1247#endif
124811:	b	kvmhv_switch_to_host
1249
1250no_switch_exit:
1251	HMT_MEDIUM
1252	li	r12, 0
1253	b	12f
1254hdec_soon:
1255	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
125612:	stw	r12, VCPU_TRAP(r4)
1257	mr	r9, r4
1258#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1259	addi	r3, r4, VCPU_TB_RMEXIT
1260	bl	kvmhv_accumulate_time
1261#endif
1262	b	guest_bypass
1263
1264/******************************************************************************
1265 *                                                                            *
1266 *                               Exit code                                    *
1267 *                                                                            *
1268 *****************************************************************************/
1269
1270/*
1271 * We come here from the first-level interrupt handlers.
1272 */
1273	.globl	kvmppc_interrupt_hv
1274kvmppc_interrupt_hv:
1275	/*
1276	 * Register contents:
1277	 * R12		= (guest CR << 32) | interrupt vector
1278	 * R13		= PACA
1279	 * guest R12 saved in shadow VCPU SCRATCH0
1280	 * guest CTR saved in shadow VCPU SCRATCH1 if RELOCATABLE
1281	 * guest R13 saved in SPRN_SCRATCH0
1282	 */
1283	std	r9, HSTATE_SCRATCH2(r13)
1284	lbz	r9, HSTATE_IN_GUEST(r13)
1285	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
1286	beq	kvmppc_bad_host_intr
1287#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
1288	cmpwi	r9, KVM_GUEST_MODE_GUEST
1289	ld	r9, HSTATE_SCRATCH2(r13)
1290	beq	kvmppc_interrupt_pr
1291#endif
1292	/* We're now back in the host but in guest MMU context */
1293	li	r9, KVM_GUEST_MODE_HOST_HV
1294	stb	r9, HSTATE_IN_GUEST(r13)
1295
1296	ld	r9, HSTATE_KVM_VCPU(r13)
1297
1298	/* Save registers */
1299
1300	std	r0, VCPU_GPR(R0)(r9)
1301	std	r1, VCPU_GPR(R1)(r9)
1302	std	r2, VCPU_GPR(R2)(r9)
1303	std	r3, VCPU_GPR(R3)(r9)
1304	std	r4, VCPU_GPR(R4)(r9)
1305	std	r5, VCPU_GPR(R5)(r9)
1306	std	r6, VCPU_GPR(R6)(r9)
1307	std	r7, VCPU_GPR(R7)(r9)
1308	std	r8, VCPU_GPR(R8)(r9)
1309	ld	r0, HSTATE_SCRATCH2(r13)
1310	std	r0, VCPU_GPR(R9)(r9)
1311	std	r10, VCPU_GPR(R10)(r9)
1312	std	r11, VCPU_GPR(R11)(r9)
1313	ld	r3, HSTATE_SCRATCH0(r13)
1314	std	r3, VCPU_GPR(R12)(r9)
1315	/* CR is in the high half of r12 */
1316	srdi	r4, r12, 32
1317	stw	r4, VCPU_CR(r9)
1318BEGIN_FTR_SECTION
1319	ld	r3, HSTATE_CFAR(r13)
1320	std	r3, VCPU_CFAR(r9)
1321END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1322BEGIN_FTR_SECTION
1323	ld	r4, HSTATE_PPR(r13)
1324	std	r4, VCPU_PPR(r9)
1325END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1326
1327	/* Restore R1/R2 so we can handle faults */
1328	ld	r1, HSTATE_HOST_R1(r13)
1329	ld	r2, PACATOC(r13)
1330
1331	mfspr	r10, SPRN_SRR0
1332	mfspr	r11, SPRN_SRR1
1333	std	r10, VCPU_SRR0(r9)
1334	std	r11, VCPU_SRR1(r9)
1335	/* trap is in the low half of r12, clear CR from the high half */
1336	clrldi	r12, r12, 32
1337	andi.	r0, r12, 2		/* need to read HSRR0/1? */
1338	beq	1f
1339	mfspr	r10, SPRN_HSRR0
1340	mfspr	r11, SPRN_HSRR1
1341	clrrdi	r12, r12, 2
13421:	std	r10, VCPU_PC(r9)
1343	std	r11, VCPU_MSR(r9)
1344
1345	GET_SCRATCH0(r3)
1346	mflr	r4
1347	std	r3, VCPU_GPR(R13)(r9)
1348	std	r4, VCPU_LR(r9)
1349
1350	stw	r12,VCPU_TRAP(r9)
1351
1352	/*
1353	 * Now that we have saved away SRR0/1 and HSRR0/1,
1354	 * interrupts are recoverable in principle, so set MSR_RI.
1355	 * This becomes important for relocation-on interrupts from
1356	 * the guest, which we can get in radix mode on POWER9.
1357	 */
1358	li	r0, MSR_RI
1359	mtmsrd	r0, 1
1360
1361#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1362	addi	r3, r9, VCPU_TB_RMINTR
1363	mr	r4, r9
1364	bl	kvmhv_accumulate_time
1365	ld	r5, VCPU_GPR(R5)(r9)
1366	ld	r6, VCPU_GPR(R6)(r9)
1367	ld	r7, VCPU_GPR(R7)(r9)
1368	ld	r8, VCPU_GPR(R8)(r9)
1369#endif
1370
1371	/* Save HEIR (HV emulation assist reg) in emul_inst
1372	   if this is an HEI (HV emulation interrupt, e40) */
1373	li	r3,KVM_INST_FETCH_FAILED
1374	stw	r3,VCPU_LAST_INST(r9)
1375	cmpwi	r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
1376	bne	11f
1377	mfspr	r3,SPRN_HEIR
137811:	stw	r3,VCPU_HEIR(r9)
1379
1380	/* these are volatile across C function calls */
1381#ifdef CONFIG_RELOCATABLE
1382	ld	r3, HSTATE_SCRATCH1(r13)
1383	mtctr	r3
1384#else
1385	mfctr	r3
1386#endif
1387	mfxer	r4
1388	std	r3, VCPU_CTR(r9)
1389	std	r4, VCPU_XER(r9)
1390
1391#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1392	/* For softpatch interrupt, go off and do TM instruction emulation */
1393	cmpwi	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
1394	beq	kvmppc_tm_emul
1395#endif
1396
1397	/* If this is a page table miss then see if it's theirs or ours */
1398	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
1399	beq	kvmppc_hdsi
1400	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1401	beq	kvmppc_hisi
1402
1403	/* See if this is a leftover HDEC interrupt */
1404	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
1405	bne	2f
1406	mfspr	r3,SPRN_HDEC
1407	EXTEND_HDEC(r3)
1408	cmpdi	r3,0
1409	mr	r4,r9
1410	bge	fast_guest_return
14112:
1412	/* See if this is an hcall we can handle in real mode */
1413	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
1414	beq	hcall_try_real_mode
1415
1416	/* Hypervisor doorbell - exit only if host IPI flag set */
1417	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
1418	bne	3f
1419BEGIN_FTR_SECTION
1420	PPC_MSGSYNC
1421	lwsync
1422END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1423	lbz	r0, HSTATE_HOST_IPI(r13)
1424	cmpwi	r0, 0
1425	beq	4f
1426	b	guest_exit_cont
14273:
1428	/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
1429	cmpwi	r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
1430	bne	14f
1431	mfspr	r3, SPRN_HFSCR
1432	std	r3, VCPU_HFSCR(r9)
1433	b	guest_exit_cont
143414:
1435	/* External interrupt ? */
1436	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
1437	bne+	guest_exit_cont
1438
1439	/* External interrupt, first check for host_ipi. If this is
1440	 * set, we know the host wants us out so let's do it now
1441	 */
1442	bl	kvmppc_read_intr
1443
1444	/*
1445	 * Restore the active volatile registers after returning from
1446	 * a C function.
1447	 */
1448	ld	r9, HSTATE_KVM_VCPU(r13)
1449	li	r12, BOOK3S_INTERRUPT_EXTERNAL
1450
1451	/*
1452	 * kvmppc_read_intr return codes:
1453	 *
1454	 * Exit to host (r3 > 0)
1455	 *   1 An interrupt is pending that needs to be handled by the host
1456	 *     Exit guest and return to host by branching to guest_exit_cont
1457	 *
1458	 *   2 Passthrough that needs completion in the host
1459	 *     Exit guest and return to host by branching to guest_exit_cont
1460	 *     However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
1461	 *     to indicate to the host to complete handling the interrupt
1462	 *
1463	 * Before returning to guest, we check if any CPU is heading out
1464	 * to the host and if so, we head out also. If no CPUs are heading
1465	 * check return values <= 0.
1466	 *
1467	 * Return to guest (r3 <= 0)
1468	 *  0 No external interrupt is pending
1469	 * -1 A guest wakeup IPI (which has now been cleared)
1470	 *    In either case, we return to guest to deliver any pending
1471	 *    guest interrupts.
1472	 *
1473	 * -2 A PCI passthrough external interrupt was handled
1474	 *    (interrupt was delivered directly to guest)
1475	 *    Return to guest to deliver any pending guest interrupts.
1476	 */
1477
1478	cmpdi	r3, 1
1479	ble	1f
1480
1481	/* Return code = 2 */
1482	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
1483	stw	r12, VCPU_TRAP(r9)
1484	b	guest_exit_cont
1485
14861:	/* Return code <= 1 */
1487	cmpdi	r3, 0
1488	bgt	guest_exit_cont
1489
1490	/* Return code <= 0 */
14914:	ld	r5, HSTATE_KVM_VCORE(r13)
1492	lwz	r0, VCORE_ENTRY_EXIT(r5)
1493	cmpwi	r0, 0x100
1494	mr	r4, r9
1495	blt	deliver_guest_interrupt
1496
1497guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
1498	/* Save more register state  */
1499	mfdar	r6
1500	mfdsisr	r7
1501	std	r6, VCPU_DAR(r9)
1502	stw	r7, VCPU_DSISR(r9)
1503	/* don't overwrite fault_dar/fault_dsisr if HDSI */
1504	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
1505	beq	mc_cont
1506	std	r6, VCPU_FAULT_DAR(r9)
1507	stw	r7, VCPU_FAULT_DSISR(r9)
1508
1509	/* See if it is a machine check */
1510	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1511	beq	machine_check_realmode
1512mc_cont:
1513#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1514	addi	r3, r9, VCPU_TB_RMEXIT
1515	mr	r4, r9
1516	bl	kvmhv_accumulate_time
1517#endif
1518#ifdef CONFIG_KVM_XICS
1519	/* We are exiting, pull the VP from the XIVE */
1520	lbz	r0, VCPU_XIVE_PUSHED(r9)
1521	cmpwi	cr0, r0, 0
1522	beq	1f
1523	li	r7, TM_SPC_PULL_OS_CTX
1524	li	r6, TM_QW1_OS
1525	mfmsr	r0
1526	andi.	r0, r0, MSR_DR		/* in real mode? */
1527	beq	2f
1528	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
1529	cmpldi	cr0, r10, 0
1530	beq	1f
1531	/* First load to pull the context, we ignore the value */
1532	eieio
1533	lwzx	r11, r7, r10
1534	/* Second load to recover the context state (Words 0 and 1) */
1535	ldx	r11, r6, r10
1536	b	3f
15372:	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
1538	cmpldi	cr0, r10, 0
1539	beq	1f
1540	/* First load to pull the context, we ignore the value */
1541	eieio
1542	lwzcix	r11, r7, r10
1543	/* Second load to recover the context state (Words 0 and 1) */
1544	ldcix	r11, r6, r10
15453:	std	r11, VCPU_XIVE_SAVED_STATE(r9)
1546	/* Fixup some of the state for the next load */
1547	li	r10, 0
1548	li	r0, 0xff
1549	stb	r10, VCPU_XIVE_PUSHED(r9)
1550	stb	r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
1551	stb	r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
1552	eieio
15531:
1554#endif /* CONFIG_KVM_XICS */
1555
1556	/* For hash guest, read the guest SLB and save it away */
1557	ld	r5, VCPU_KVM(r9)
1558	lbz	r0, KVM_RADIX(r5)
1559	li	r5, 0
1560	cmpwi	r0, 0
1561	bne	3f			/* for radix, save 0 entries */
1562	lwz	r0,VCPU_SLB_NR(r9)	/* number of entries in SLB */
1563	mtctr	r0
1564	li	r6,0
1565	addi	r7,r9,VCPU_SLB
15661:	slbmfee	r8,r6
1567	andis.	r0,r8,SLB_ESID_V@h
1568	beq	2f
1569	add	r8,r8,r6		/* put index in */
1570	slbmfev	r3,r6
1571	std	r8,VCPU_SLB_E(r7)
1572	std	r3,VCPU_SLB_V(r7)
1573	addi	r7,r7,VCPU_SLB_SIZE
1574	addi	r5,r5,1
15752:	addi	r6,r6,1
1576	bdnz	1b
1577	/* Finally clear out the SLB */
1578	li	r0,0
1579	slbmte	r0,r0
1580	slbia
1581	ptesync
15823:	stw	r5,VCPU_SLB_MAX(r9)
1583
1584	/* load host SLB entries */
1585BEGIN_MMU_FTR_SECTION
1586	b	0f
1587END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
1588	ld	r8,PACA_SLBSHADOWPTR(r13)
1589
1590	.rept	SLB_NUM_BOLTED
1591	li	r3, SLBSHADOW_SAVEAREA
1592	LDX_BE	r5, r8, r3
1593	addi	r3, r3, 8
1594	LDX_BE	r6, r8, r3
1595	andis.	r7,r5,SLB_ESID_V@h
1596	beq	1f
1597	slbmte	r6,r5
15981:	addi	r8,r8,16
1599	.endr
16000:
1601
1602guest_bypass:
1603	stw	r12, STACK_SLOT_TRAP(r1)
1604
1605	/* Save DEC */
1606	/* Do this before kvmhv_commence_exit so we know TB is guest TB */
1607	ld	r3, HSTATE_KVM_VCORE(r13)
1608	mfspr	r5,SPRN_DEC
1609	mftb	r6
1610	/* On P9, if the guest has large decr enabled, don't sign extend */
1611BEGIN_FTR_SECTION
1612	ld	r4, VCORE_LPCR(r3)
1613	andis.	r4, r4, LPCR_LD@h
1614	bne	16f
1615END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1616	extsw	r5,r5
161716:	add	r5,r5,r6
1618	/* r5 is a guest timebase value here, convert to host TB */
1619	ld	r4,VCORE_TB_OFFSET_APPL(r3)
1620	subf	r5,r4,r5
1621	std	r5,VCPU_DEC_EXPIRES(r9)
1622
1623	/* Increment exit count, poke other threads to exit */
1624	mr 	r3, r12
1625	bl	kvmhv_commence_exit
1626	nop
1627	ld	r9, HSTATE_KVM_VCPU(r13)
1628
1629	/* Stop others sending VCPU interrupts to this physical CPU */
1630	li	r0, -1
1631	stw	r0, VCPU_CPU(r9)
1632	stw	r0, VCPU_THREAD_CPU(r9)
1633
1634	/* Save guest CTRL register, set runlatch to 1 */
1635	mfspr	r6,SPRN_CTRLF
1636	stw	r6,VCPU_CTRL(r9)
1637	andi.	r0,r6,1
1638	bne	4f
1639	ori	r6,r6,1
1640	mtspr	SPRN_CTRLT,r6
16414:
1642	/*
1643	 * Save the guest PURR/SPURR
1644	 */
1645	mfspr	r5,SPRN_PURR
1646	mfspr	r6,SPRN_SPURR
1647	ld	r7,VCPU_PURR(r9)
1648	ld	r8,VCPU_SPURR(r9)
1649	std	r5,VCPU_PURR(r9)
1650	std	r6,VCPU_SPURR(r9)
1651	subf	r5,r7,r5
1652	subf	r6,r8,r6
1653
1654	/*
1655	 * Restore host PURR/SPURR and add guest times
1656	 * so that the time in the guest gets accounted.
1657	 */
1658	ld	r3,HSTATE_PURR(r13)
1659	ld	r4,HSTATE_SPURR(r13)
1660	add	r3,r3,r5
1661	add	r4,r4,r6
1662	mtspr	SPRN_PURR,r3
1663	mtspr	SPRN_SPURR,r4
1664
1665BEGIN_FTR_SECTION
1666	b	8f
1667END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
1668	/* Save POWER8-specific registers */
1669	mfspr	r5, SPRN_IAMR
1670	mfspr	r6, SPRN_PSPB
1671	mfspr	r7, SPRN_FSCR
1672	std	r5, VCPU_IAMR(r9)
1673	stw	r6, VCPU_PSPB(r9)
1674	std	r7, VCPU_FSCR(r9)
1675	mfspr	r5, SPRN_IC
1676	mfspr	r7, SPRN_TAR
1677	std	r5, VCPU_IC(r9)
1678	std	r7, VCPU_TAR(r9)
1679	mfspr	r8, SPRN_EBBHR
1680	std	r8, VCPU_EBBHR(r9)
1681	mfspr	r5, SPRN_EBBRR
1682	mfspr	r6, SPRN_BESCR
1683	mfspr	r7, SPRN_PID
1684	mfspr	r8, SPRN_WORT
1685	std	r5, VCPU_EBBRR(r9)
1686	std	r6, VCPU_BESCR(r9)
1687	stw	r7, VCPU_GUEST_PID(r9)
1688	std	r8, VCPU_WORT(r9)
1689BEGIN_FTR_SECTION
1690	mfspr	r5, SPRN_TCSCR
1691	mfspr	r6, SPRN_ACOP
1692	mfspr	r7, SPRN_CSIGR
1693	mfspr	r8, SPRN_TACR
1694	std	r5, VCPU_TCSCR(r9)
1695	std	r6, VCPU_ACOP(r9)
1696	std	r7, VCPU_CSIGR(r9)
1697	std	r8, VCPU_TACR(r9)
1698FTR_SECTION_ELSE
1699	mfspr	r5, SPRN_TIDR
1700	mfspr	r6, SPRN_PSSCR
1701	std	r5, VCPU_TID(r9)
1702	rldicl	r6, r6, 4, 50		/* r6 &= PSSCR_GUEST_VIS */
1703	rotldi	r6, r6, 60
1704	std	r6, VCPU_PSSCR(r9)
1705	/* Restore host HFSCR value */
1706	ld	r7, STACK_SLOT_HFSCR(r1)
1707	mtspr	SPRN_HFSCR, r7
1708ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
1709	/*
1710	 * Restore various registers to 0, where non-zero values
1711	 * set by the guest could disrupt the host.
1712	 */
1713	li	r0, 0
1714	mtspr	SPRN_PSPB, r0
1715	mtspr	SPRN_WORT, r0
1716BEGIN_FTR_SECTION
1717	mtspr	SPRN_IAMR, r0
1718	mtspr	SPRN_TCSCR, r0
1719	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
1720	li	r0, 1
1721	sldi	r0, r0, 31
1722	mtspr	SPRN_MMCRS, r0
1723END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
17248:
1725
1726	/* Save and reset AMR and UAMOR before turning on the MMU */
1727	mfspr	r5,SPRN_AMR
1728	mfspr	r6,SPRN_UAMOR
1729	std	r5,VCPU_AMR(r9)
1730	std	r6,VCPU_UAMOR(r9)
1731	li	r6,0
1732	mtspr	SPRN_AMR,r6
1733	mtspr	SPRN_UAMOR, r6
1734
1735	/* Switch DSCR back to host value */
1736	mfspr	r8, SPRN_DSCR
1737	ld	r7, HSTATE_DSCR(r13)
1738	std	r8, VCPU_DSCR(r9)
1739	mtspr	SPRN_DSCR, r7
1740
1741	/* Save non-volatile GPRs */
1742	std	r14, VCPU_GPR(R14)(r9)
1743	std	r15, VCPU_GPR(R15)(r9)
1744	std	r16, VCPU_GPR(R16)(r9)
1745	std	r17, VCPU_GPR(R17)(r9)
1746	std	r18, VCPU_GPR(R18)(r9)
1747	std	r19, VCPU_GPR(R19)(r9)
1748	std	r20, VCPU_GPR(R20)(r9)
1749	std	r21, VCPU_GPR(R21)(r9)
1750	std	r22, VCPU_GPR(R22)(r9)
1751	std	r23, VCPU_GPR(R23)(r9)
1752	std	r24, VCPU_GPR(R24)(r9)
1753	std	r25, VCPU_GPR(R25)(r9)
1754	std	r26, VCPU_GPR(R26)(r9)
1755	std	r27, VCPU_GPR(R27)(r9)
1756	std	r28, VCPU_GPR(R28)(r9)
1757	std	r29, VCPU_GPR(R29)(r9)
1758	std	r30, VCPU_GPR(R30)(r9)
1759	std	r31, VCPU_GPR(R31)(r9)
1760
1761	/* Save SPRGs */
1762	mfspr	r3, SPRN_SPRG0
1763	mfspr	r4, SPRN_SPRG1
1764	mfspr	r5, SPRN_SPRG2
1765	mfspr	r6, SPRN_SPRG3
1766	std	r3, VCPU_SPRG0(r9)
1767	std	r4, VCPU_SPRG1(r9)
1768	std	r5, VCPU_SPRG2(r9)
1769	std	r6, VCPU_SPRG3(r9)
1770
1771	/* save FP state */
1772	mr	r3, r9
1773	bl	kvmppc_save_fp
1774
1775#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1776/*
1777 * Branch around the call if both CPU_FTR_TM and
1778 * CPU_FTR_P9_TM_HV_ASSIST are off.
1779 */
1780BEGIN_FTR_SECTION
1781	b	91f
1782END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
1783	/*
1784	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
1785	 */
1786	bl	kvmppc_save_tm
178791:
1788#endif
1789
1790	/* Increment yield count if they have a VPA */
1791	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */
1792	cmpdi	r8, 0
1793	beq	25f
1794	li	r4, LPPACA_YIELDCOUNT
1795	LWZX_BE	r3, r8, r4
1796	addi	r3, r3, 1
1797	STWX_BE	r3, r8, r4
1798	li	r3, 1
1799	stb	r3, VCPU_VPA_DIRTY(r9)
180025:
1801	/* Save PMU registers if requested */
1802	/* r8 and cr0.eq are live here */
1803BEGIN_FTR_SECTION
1804	/*
1805	 * POWER8 seems to have a hardware bug where setting
1806	 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
1807	 * when some counters are already negative doesn't seem
1808	 * to cause a performance monitor alert (and hence interrupt).
1809	 * The effect of this is that when saving the PMU state,
1810	 * if there is no PMU alert pending when we read MMCR0
1811	 * before freezing the counters, but one becomes pending
1812	 * before we read the counters, we lose it.
1813	 * To work around this, we need a way to freeze the counters
1814	 * before reading MMCR0.  Normally, freezing the counters
1815	 * is done by writing MMCR0 (to set MMCR0[FC]) which
1816	 * unavoidably writes MMCR0[PMA0] as well.  On POWER8,
1817	 * we can also freeze the counters using MMCR2, by writing
1818	 * 1s to all the counter freeze condition bits (there are
1819	 * 9 bits each for 6 counters).
1820	 */
1821	li	r3, -1			/* set all freeze bits */
1822	clrrdi	r3, r3, 10
1823	mfspr	r10, SPRN_MMCR2
1824	mtspr	SPRN_MMCR2, r3
1825	isync
1826END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1827	li	r3, 1
1828	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
1829	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
1830	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
1831	mfspr	r6, SPRN_MMCRA
1832	/* Clear MMCRA in order to disable SDAR updates */
1833	li	r7, 0
1834	mtspr	SPRN_MMCRA, r7
1835	isync
1836	beq	21f			/* if no VPA, save PMU stuff anyway */
1837	lbz	r7, LPPACA_PMCINUSE(r8)
1838	cmpwi	r7, 0			/* did they ask for PMU stuff to be saved? */
1839	bne	21f
1840	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */
1841	b	22f
184221:	mfspr	r5, SPRN_MMCR1
1843	mfspr	r7, SPRN_SIAR
1844	mfspr	r8, SPRN_SDAR
1845	std	r4, VCPU_MMCR(r9)
1846	std	r5, VCPU_MMCR + 8(r9)
1847	std	r6, VCPU_MMCR + 16(r9)
1848BEGIN_FTR_SECTION
1849	std	r10, VCPU_MMCR + 24(r9)
1850END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1851	std	r7, VCPU_SIAR(r9)
1852	std	r8, VCPU_SDAR(r9)
1853	mfspr	r3, SPRN_PMC1
1854	mfspr	r4, SPRN_PMC2
1855	mfspr	r5, SPRN_PMC3
1856	mfspr	r6, SPRN_PMC4
1857	mfspr	r7, SPRN_PMC5
1858	mfspr	r8, SPRN_PMC6
1859	stw	r3, VCPU_PMC(r9)
1860	stw	r4, VCPU_PMC + 4(r9)
1861	stw	r5, VCPU_PMC + 8(r9)
1862	stw	r6, VCPU_PMC + 12(r9)
1863	stw	r7, VCPU_PMC + 16(r9)
1864	stw	r8, VCPU_PMC + 20(r9)
1865BEGIN_FTR_SECTION
1866	mfspr	r5, SPRN_SIER
1867	std	r5, VCPU_SIER(r9)
1868BEGIN_FTR_SECTION_NESTED(96)
1869	mfspr	r6, SPRN_SPMC1
1870	mfspr	r7, SPRN_SPMC2
1871	mfspr	r8, SPRN_MMCRS
1872	stw	r6, VCPU_PMC + 24(r9)
1873	stw	r7, VCPU_PMC + 28(r9)
1874	std	r8, VCPU_MMCR + 32(r9)
1875	lis	r4, 0x8000
1876	mtspr	SPRN_MMCRS, r4
1877END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
1878END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
187922:
1880
1881	/* Restore host values of some registers */
1882BEGIN_FTR_SECTION
1883	ld	r5, STACK_SLOT_CIABR(r1)
1884	ld	r6, STACK_SLOT_DAWR(r1)
1885	ld	r7, STACK_SLOT_DAWRX(r1)
1886	mtspr	SPRN_CIABR, r5
1887	/*
1888	 * If the DAWR doesn't work, it's ok to write these here as
1889	 * this value should always be zero
1890	*/
1891	mtspr	SPRN_DAWR, r6
1892	mtspr	SPRN_DAWRX, r7
1893END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1894BEGIN_FTR_SECTION
1895	ld	r5, STACK_SLOT_TID(r1)
1896	ld	r6, STACK_SLOT_PSSCR(r1)
1897	ld	r7, STACK_SLOT_PID(r1)
1898	ld	r8, STACK_SLOT_IAMR(r1)
1899	mtspr	SPRN_TIDR, r5
1900	mtspr	SPRN_PSSCR, r6
1901	mtspr	SPRN_PID, r7
1902	mtspr	SPRN_IAMR, r8
1903END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1904
1905#ifdef CONFIG_PPC_RADIX_MMU
1906	/*
1907	 * Are we running hash or radix ?
1908	 */
1909	ld	r5, VCPU_KVM(r9)
1910	lbz	r0, KVM_RADIX(r5)
1911	cmpwi	cr2, r0, 0
1912	beq	cr2, 4f
1913
1914	/*
1915	 * Radix: do eieio; tlbsync; ptesync sequence in case we
1916	 * interrupted the guest between a tlbie and a ptesync.
1917	 */
1918	eieio
1919	tlbsync
1920	ptesync
1921
1922	/* Radix: Handle the case where the guest used an illegal PID */
1923	LOAD_REG_ADDR(r4, mmu_base_pid)
1924	lwz	r3, VCPU_GUEST_PID(r9)
1925	lwz	r5, 0(r4)
1926	cmpw	cr0,r3,r5
1927	blt	2f
1928
1929	/*
1930	 * Illegal PID, the HW might have prefetched and cached in the TLB
1931	 * some translations for the  LPID 0 / guest PID combination which
1932	 * Linux doesn't know about, so we need to flush that PID out of
1933	 * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
1934	 * the right context.
1935	*/
1936	li	r0,0
1937	mtspr	SPRN_LPID,r0
1938	isync
1939
1940	/* Then do a congruence class local flush */
1941	ld	r6,VCPU_KVM(r9)
1942	lwz	r0,KVM_TLB_SETS(r6)
1943	mtctr	r0
1944	li	r7,0x400		/* IS field = 0b01 */
1945	ptesync
1946	sldi	r0,r3,32		/* RS has PID */
19471:	PPC_TLBIEL(7,0,2,1,1)		/* RIC=2, PRS=1, R=1 */
1948	addi	r7,r7,0x1000
1949	bdnz	1b
1950	ptesync
1951
19522:	/* Flush the ERAT on radix P9 DD1 guest exit */
1953BEGIN_FTR_SECTION
1954	PPC_INVALIDATE_ERAT
1955END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
19564:
1957#endif /* CONFIG_PPC_RADIX_MMU */
1958
1959	/*
1960	 * POWER7/POWER8 guest -> host partition switch code.
1961	 * We don't have to lock against tlbies but we do
1962	 * have to coordinate the hardware threads.
1963	 * Here STACK_SLOT_TRAP(r1) contains the trap number.
1964	 */
1965kvmhv_switch_to_host:
1966	/* Secondary threads wait for primary to do partition switch */
1967	ld	r5,HSTATE_KVM_VCORE(r13)
1968	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
1969	lbz	r3,HSTATE_PTID(r13)
1970	cmpwi	r3,0
1971	beq	15f
1972	HMT_LOW
197313:	lbz	r3,VCORE_IN_GUEST(r5)
1974	cmpwi	r3,0
1975	bne	13b
1976	HMT_MEDIUM
1977	b	16f
1978
1979	/* Primary thread waits for all the secondaries to exit guest */
198015:	lwz	r3,VCORE_ENTRY_EXIT(r5)
1981	rlwinm	r0,r3,32-8,0xff
1982	clrldi	r3,r3,56
1983	cmpw	r3,r0
1984	bne	15b
1985	isync
1986
1987	/* Did we actually switch to the guest at all? */
1988	lbz	r6, VCORE_IN_GUEST(r5)
1989	cmpwi	r6, 0
1990	beq	19f
1991
1992	/* Primary thread switches back to host partition */
1993	lwz	r7,KVM_HOST_LPID(r4)
1994BEGIN_FTR_SECTION
1995	ld	r6,KVM_HOST_SDR1(r4)
1996	li	r8,LPID_RSVD		/* switch to reserved LPID */
1997	mtspr	SPRN_LPID,r8
1998	ptesync
1999	mtspr	SPRN_SDR1,r6		/* switch to host page table */
2000END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
2001	mtspr	SPRN_LPID,r7
2002	isync
2003
2004BEGIN_FTR_SECTION
2005	/* DPDES and VTB are shared between threads */
2006	mfspr	r7, SPRN_DPDES
2007	mfspr	r8, SPRN_VTB
2008	std	r7, VCORE_DPDES(r5)
2009	std	r8, VCORE_VTB(r5)
2010	/* clear DPDES so we don't get guest doorbells in the host */
2011	li	r8, 0
2012	mtspr	SPRN_DPDES, r8
2013END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2014
2015	/* If HMI, call kvmppc_realmode_hmi_handler() */
2016	lwz	r12, STACK_SLOT_TRAP(r1)
2017	cmpwi	r12, BOOK3S_INTERRUPT_HMI
2018	bne	27f
2019	bl	kvmppc_realmode_hmi_handler
2020	nop
2021	cmpdi	r3, 0
2022	/*
2023	 * At this point kvmppc_realmode_hmi_handler may have resync-ed
2024	 * the TB, and if it has, we must not subtract the guest timebase
2025	 * offset from the timebase. So, skip it.
2026	 *
2027	 * Also, do not call kvmppc_subcore_exit_guest() because it has
2028	 * been invoked as part of kvmppc_realmode_hmi_handler().
2029	 */
2030	beq	30f
2031
203227:
2033	/* Subtract timebase offset from timebase */
2034	ld	r8, VCORE_TB_OFFSET_APPL(r5)
2035	cmpdi	r8,0
2036	beq	17f
2037	li	r0, 0
2038	std	r0, VCORE_TB_OFFSET_APPL(r5)
2039	mftb	r6			/* current guest timebase */
2040	subf	r8,r8,r6
2041	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */
2042	mftb	r7			/* check if lower 24 bits overflowed */
2043	clrldi	r6,r6,40
2044	clrldi	r7,r7,40
2045	cmpld	r7,r6
2046	bge	17f
2047	addis	r8,r8,0x100		/* if so, increment upper 40 bits */
2048	mtspr	SPRN_TBU40,r8
2049
205017:	bl	kvmppc_subcore_exit_guest
2051	nop
205230:	ld	r5,HSTATE_KVM_VCORE(r13)
2053	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
2054
2055	/* Reset PCR */
2056	ld	r0, VCORE_PCR(r5)
2057	cmpdi	r0, 0
2058	beq	18f
2059	li	r0, 0
2060	mtspr	SPRN_PCR, r0
206118:
2062	/* Signal secondary CPUs to continue */
2063	stb	r0,VCORE_IN_GUEST(r5)
206419:	lis	r8,0x7fff		/* MAX_INT@h */
2065	mtspr	SPRN_HDEC,r8
2066
206716:
2068BEGIN_FTR_SECTION
2069	/* On POWER9 with HPT-on-radix we need to wait for all other threads */
2070	ld	r3, HSTATE_SPLIT_MODE(r13)
2071	cmpdi	r3, 0
2072	beq	47f
2073	lwz	r8, KVM_SPLIT_DO_RESTORE(r3)
2074	cmpwi	r8, 0
2075	beq	47f
2076	bl	kvmhv_p9_restore_lpcr
2077	nop
2078	b	48f
207947:
2080END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2081	ld	r8,KVM_HOST_LPCR(r4)
2082	mtspr	SPRN_LPCR,r8
2083	isync
208448:
2085#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
2086	/* Finish timing, if we have a vcpu */
2087	ld	r4, HSTATE_KVM_VCPU(r13)
2088	cmpdi	r4, 0
2089	li	r3, 0
2090	beq	2f
2091	bl	kvmhv_accumulate_time
20922:
2093#endif
2094	/* Unset guest mode */
2095	li	r0, KVM_GUEST_MODE_NONE
2096	stb	r0, HSTATE_IN_GUEST(r13)
2097
2098	lwz	r12, STACK_SLOT_TRAP(r1)	/* return trap # in r12 */
2099	ld	r0, SFS+PPC_LR_STKOFF(r1)
2100	addi	r1, r1, SFS
2101	mtlr	r0
2102	blr
2103
2104#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2105/*
2106 * Softpatch interrupt for transactional memory emulation cases
2107 * on POWER9 DD2.2.  This is early in the guest exit path - we
2108 * haven't saved registers or done a treclaim yet.
2109 */
2110kvmppc_tm_emul:
2111	/* Save instruction image in HEIR */
2112	mfspr	r3, SPRN_HEIR
2113	stw	r3, VCPU_HEIR(r9)
2114
2115	/*
2116	 * The cases we want to handle here are those where the guest
2117	 * is in real suspend mode and is trying to transition to
2118	 * transactional mode.
2119	 */
2120	lbz	r0, HSTATE_FAKE_SUSPEND(r13)
2121	cmpwi	r0, 0		/* keep exiting guest if in fake suspend */
2122	bne	guest_exit_cont
2123	rldicl	r3, r11, 64 - MSR_TS_S_LG, 62
2124	cmpwi	r3, 1		/* or if not in suspend state */
2125	bne	guest_exit_cont
2126
2127	/* Call C code to do the emulation */
2128	mr	r3, r9
2129	bl	kvmhv_p9_tm_emulation_early
2130	nop
2131	ld	r9, HSTATE_KVM_VCPU(r13)
2132	li	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
2133	cmpwi	r3, 0
2134	beq	guest_exit_cont		/* continue exiting if not handled */
2135	ld	r10, VCPU_PC(r9)
2136	ld	r11, VCPU_MSR(r9)
2137	b	fast_interrupt_c_return	/* go back to guest if handled */
2138#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
2139
2140/*
2141 * Check whether an HDSI is an HPTE not found fault or something else.
2142 * If it is an HPTE not found fault that is due to the guest accessing
2143 * a page that they have mapped but which we have paged out, then
2144 * we continue on with the guest exit path.  In all other cases,
2145 * reflect the HDSI to the guest as a DSI.
2146 */
2147kvmppc_hdsi:
2148	ld	r3, VCPU_KVM(r9)
2149	lbz	r0, KVM_RADIX(r3)
2150	mfspr	r4, SPRN_HDAR
2151	mfspr	r6, SPRN_HDSISR
2152BEGIN_FTR_SECTION
2153	/* Look for DSISR canary. If we find it, retry instruction */
2154	cmpdi	r6, 0x7fff
2155	beq	6f
2156END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2157	cmpwi	r0, 0
2158	bne	.Lradix_hdsi		/* on radix, just save DAR/DSISR/ASDR */
2159	/* HPTE not found fault or protection fault? */
2160	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
2161	beq	1f			/* if not, send it to the guest */
2162	andi.	r0, r11, MSR_DR		/* data relocation enabled? */
2163	beq	3f
2164BEGIN_FTR_SECTION
2165	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
2166	b	4f
2167END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2168	clrrdi	r0, r4, 28
2169	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
2170	li	r0, BOOK3S_INTERRUPT_DATA_SEGMENT
2171	bne	7f			/* if no SLB entry found */
21724:	std	r4, VCPU_FAULT_DAR(r9)
2173	stw	r6, VCPU_FAULT_DSISR(r9)
2174
2175	/* Search the hash table. */
2176	mr	r3, r9			/* vcpu pointer */
2177	li	r7, 1			/* data fault */
2178	bl	kvmppc_hpte_hv_fault
2179	ld	r9, HSTATE_KVM_VCPU(r13)
2180	ld	r10, VCPU_PC(r9)
2181	ld	r11, VCPU_MSR(r9)
2182	li	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
2183	cmpdi	r3, 0			/* retry the instruction */
2184	beq	6f
2185	cmpdi	r3, -1			/* handle in kernel mode */
2186	beq	guest_exit_cont
2187	cmpdi	r3, -2			/* MMIO emulation; need instr word */
2188	beq	2f
2189
2190	/* Synthesize a DSI (or DSegI) for the guest */
2191	ld	r4, VCPU_FAULT_DAR(r9)
2192	mr	r6, r3
21931:	li	r0, BOOK3S_INTERRUPT_DATA_STORAGE
2194	mtspr	SPRN_DSISR, r6
21957:	mtspr	SPRN_DAR, r4
2196	mtspr	SPRN_SRR0, r10
2197	mtspr	SPRN_SRR1, r11
2198	mr	r10, r0
2199	bl	kvmppc_msr_interrupt
2200fast_interrupt_c_return:
22016:	ld	r7, VCPU_CTR(r9)
2202	ld	r8, VCPU_XER(r9)
2203	mtctr	r7
2204	mtxer	r8
2205	mr	r4, r9
2206	b	fast_guest_return
2207
22083:	ld	r5, VCPU_KVM(r9)	/* not relocated, use VRMA */
2209	ld	r5, KVM_VRMA_SLB_V(r5)
2210	b	4b
2211
2212	/* If this is for emulated MMIO, load the instruction word */
22132:	li	r8, KVM_INST_FETCH_FAILED	/* In case lwz faults */
2214
2215	/* Set guest mode to 'jump over instruction' so if lwz faults
2216	 * we'll just continue at the next IP. */
2217	li	r0, KVM_GUEST_MODE_SKIP
2218	stb	r0, HSTATE_IN_GUEST(r13)
2219
2220	/* Do the access with MSR:DR enabled */
2221	mfmsr	r3
2222	ori	r4, r3, MSR_DR		/* Enable paging for data */
2223	mtmsrd	r4
2224	lwz	r8, 0(r10)
2225	mtmsrd	r3
2226
2227	/* Store the result */
2228	stw	r8, VCPU_LAST_INST(r9)
2229
2230	/* Unset guest mode. */
2231	li	r0, KVM_GUEST_MODE_HOST_HV
2232	stb	r0, HSTATE_IN_GUEST(r13)
2233	b	guest_exit_cont
2234
2235.Lradix_hdsi:
2236	std	r4, VCPU_FAULT_DAR(r9)
2237	stw	r6, VCPU_FAULT_DSISR(r9)
2238.Lradix_hisi:
2239	mfspr	r5, SPRN_ASDR
2240	std	r5, VCPU_FAULT_GPA(r9)
2241	b	guest_exit_cont
2242
2243/*
2244 * Similarly for an HISI, reflect it to the guest as an ISI unless
2245 * it is an HPTE not found fault for a page that we have paged out.
2246 */
2247kvmppc_hisi:
2248	ld	r3, VCPU_KVM(r9)
2249	lbz	r0, KVM_RADIX(r3)
2250	cmpwi	r0, 0
2251	bne	.Lradix_hisi		/* for radix, just save ASDR */
2252	andis.	r0, r11, SRR1_ISI_NOPT@h
2253	beq	1f
2254	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
2255	beq	3f
2256BEGIN_FTR_SECTION
2257	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
2258	b	4f
2259END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2260	clrrdi	r0, r10, 28
2261	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
2262	li	r0, BOOK3S_INTERRUPT_INST_SEGMENT
2263	bne	7f			/* if no SLB entry found */
22644:
2265	/* Search the hash table. */
2266	mr	r3, r9			/* vcpu pointer */
2267	mr	r4, r10
2268	mr	r6, r11
2269	li	r7, 0			/* instruction fault */
2270	bl	kvmppc_hpte_hv_fault
2271	ld	r9, HSTATE_KVM_VCPU(r13)
2272	ld	r10, VCPU_PC(r9)
2273	ld	r11, VCPU_MSR(r9)
2274	li	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
2275	cmpdi	r3, 0			/* retry the instruction */
2276	beq	fast_interrupt_c_return
2277	cmpdi	r3, -1			/* handle in kernel mode */
2278	beq	guest_exit_cont
2279
2280	/* Synthesize an ISI (or ISegI) for the guest */
2281	mr	r11, r3
22821:	li	r0, BOOK3S_INTERRUPT_INST_STORAGE
22837:	mtspr	SPRN_SRR0, r10
2284	mtspr	SPRN_SRR1, r11
2285	mr	r10, r0
2286	bl	kvmppc_msr_interrupt
2287	b	fast_interrupt_c_return
2288
22893:	ld	r6, VCPU_KVM(r9)	/* not relocated, use VRMA */
2290	ld	r5, KVM_VRMA_SLB_V(r6)
2291	b	4b
2292
2293/*
2294 * Try to handle an hcall in real mode.
2295 * Returns to the guest if we handle it, or continues on up to
2296 * the kernel if we can't (i.e. if we don't have a handler for
2297 * it, or if the handler returns H_TOO_HARD).
2298 *
2299 * r5 - r8 contain hcall args,
2300 * r9 = vcpu, r10 = pc, r11 = msr, r12 = trap, r13 = paca
2301 */
2302hcall_try_real_mode:
2303	ld	r3,VCPU_GPR(R3)(r9)
2304	andi.	r0,r11,MSR_PR
2305	/* sc 1 from userspace - reflect to guest syscall */
2306	bne	sc_1_fast_return
2307	clrrdi	r3,r3,2
2308	cmpldi	r3,hcall_real_table_end - hcall_real_table
2309	bge	guest_exit_cont
2310	/* See if this hcall is enabled for in-kernel handling */
2311	ld	r4, VCPU_KVM(r9)
2312	srdi	r0, r3, 8	/* r0 = (r3 / 4) >> 6 */
2313	sldi	r0, r0, 3	/* index into kvm->arch.enabled_hcalls[] */
2314	add	r4, r4, r0
2315	ld	r0, KVM_ENABLED_HCALLS(r4)
2316	rlwinm	r4, r3, 32-2, 0x3f	/* r4 = (r3 / 4) & 0x3f */
2317	srd	r0, r0, r4
2318	andi.	r0, r0, 1
2319	beq	guest_exit_cont
2320	/* Get pointer to handler, if any, and call it */
2321	LOAD_REG_ADDR(r4, hcall_real_table)
2322	lwax	r3,r3,r4
2323	cmpwi	r3,0
2324	beq	guest_exit_cont
2325	add	r12,r3,r4
2326	mtctr	r12
2327	mr	r3,r9		/* get vcpu pointer */
2328	ld	r4,VCPU_GPR(R4)(r9)
2329	bctrl
2330	cmpdi	r3,H_TOO_HARD
2331	beq	hcall_real_fallback
2332	ld	r4,HSTATE_KVM_VCPU(r13)
2333	std	r3,VCPU_GPR(R3)(r4)
2334	ld	r10,VCPU_PC(r4)
2335	ld	r11,VCPU_MSR(r4)
2336	b	fast_guest_return
2337
2338sc_1_fast_return:
2339	mtspr	SPRN_SRR0,r10
2340	mtspr	SPRN_SRR1,r11
2341	li	r10, BOOK3S_INTERRUPT_SYSCALL
2342	bl	kvmppc_msr_interrupt
2343	mr	r4,r9
2344	b	fast_guest_return
2345
2346	/* We've attempted a real mode hcall, but it's punted it back
2347	 * to userspace.  We need to restore some clobbered volatiles
2348	 * before resuming the pass-it-to-qemu path */
2349hcall_real_fallback:
2350	li	r12,BOOK3S_INTERRUPT_SYSCALL
2351	ld	r9, HSTATE_KVM_VCPU(r13)
2352
2353	b	guest_exit_cont
2354
2355	.globl	hcall_real_table
2356hcall_real_table:
2357	.long	0		/* 0 - unused */
2358	.long	DOTSYM(kvmppc_h_remove) - hcall_real_table
2359	.long	DOTSYM(kvmppc_h_enter) - hcall_real_table
2360	.long	DOTSYM(kvmppc_h_read) - hcall_real_table
2361	.long	DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
2362	.long	DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
2363	.long	DOTSYM(kvmppc_h_protect) - hcall_real_table
2364	.long	DOTSYM(kvmppc_h_get_tce) - hcall_real_table
2365	.long	DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table
2366	.long	0		/* 0x24 - H_SET_SPRG0 */
2367	.long	DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
2368	.long	0		/* 0x2c */
2369	.long	0		/* 0x30 */
2370	.long	0		/* 0x34 */
2371	.long	0		/* 0x38 */
2372	.long	0		/* 0x3c */
2373	.long	0		/* 0x40 */
2374	.long	0		/* 0x44 */
2375	.long	0		/* 0x48 */
2376	.long	0		/* 0x4c */
2377	.long	0		/* 0x50 */
2378	.long	0		/* 0x54 */
2379	.long	0		/* 0x58 */
2380	.long	0		/* 0x5c */
2381	.long	0		/* 0x60 */
2382#ifdef CONFIG_KVM_XICS
2383	.long	DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
2384	.long	DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
2385	.long	DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
2386	.long	DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
2387	.long	DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
2388#else
2389	.long	0		/* 0x64 - H_EOI */
2390	.long	0		/* 0x68 - H_CPPR */
2391	.long	0		/* 0x6c - H_IPI */
2392	.long	0		/* 0x70 - H_IPOLL */
2393	.long	0		/* 0x74 - H_XIRR */
2394#endif
2395	.long	0		/* 0x78 */
2396	.long	0		/* 0x7c */
2397	.long	0		/* 0x80 */
2398	.long	0		/* 0x84 */
2399	.long	0		/* 0x88 */
2400	.long	0		/* 0x8c */
2401	.long	0		/* 0x90 */
2402	.long	0		/* 0x94 */
2403	.long	0		/* 0x98 */
2404	.long	0		/* 0x9c */
2405	.long	0		/* 0xa0 */
2406	.long	0		/* 0xa4 */
2407	.long	0		/* 0xa8 */
2408	.long	0		/* 0xac */
2409	.long	0		/* 0xb0 */
2410	.long	0		/* 0xb4 */
2411	.long	0		/* 0xb8 */
2412	.long	0		/* 0xbc */
2413	.long	0		/* 0xc0 */
2414	.long	0		/* 0xc4 */
2415	.long	0		/* 0xc8 */
2416	.long	0		/* 0xcc */
2417	.long	0		/* 0xd0 */
2418	.long	0		/* 0xd4 */
2419	.long	0		/* 0xd8 */
2420	.long	0		/* 0xdc */
2421	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
2422	.long	DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
2423	.long	0		/* 0xe8 */
2424	.long	0		/* 0xec */
2425	.long	0		/* 0xf0 */
2426	.long	0		/* 0xf4 */
2427	.long	0		/* 0xf8 */
2428	.long	0		/* 0xfc */
2429	.long	0		/* 0x100 */
2430	.long	0		/* 0x104 */
2431	.long	0		/* 0x108 */
2432	.long	0		/* 0x10c */
2433	.long	0		/* 0x110 */
2434	.long	0		/* 0x114 */
2435	.long	0		/* 0x118 */
2436	.long	0		/* 0x11c */
2437	.long	0		/* 0x120 */
2438	.long	DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table
2439	.long	0		/* 0x128 */
2440	.long	0		/* 0x12c */
2441	.long	0		/* 0x130 */
2442	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
2443	.long	DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table
2444	.long	DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
2445	.long	0		/* 0x140 */
2446	.long	0		/* 0x144 */
2447	.long	0		/* 0x148 */
2448	.long	0		/* 0x14c */
2449	.long	0		/* 0x150 */
2450	.long	0		/* 0x154 */
2451	.long	0		/* 0x158 */
2452	.long	0		/* 0x15c */
2453	.long	0		/* 0x160 */
2454	.long	0		/* 0x164 */
2455	.long	0		/* 0x168 */
2456	.long	0		/* 0x16c */
2457	.long	0		/* 0x170 */
2458	.long	0		/* 0x174 */
2459	.long	0		/* 0x178 */
2460	.long	0		/* 0x17c */
2461	.long	0		/* 0x180 */
2462	.long	0		/* 0x184 */
2463	.long	0		/* 0x188 */
2464	.long	0		/* 0x18c */
2465	.long	0		/* 0x190 */
2466	.long	0		/* 0x194 */
2467	.long	0		/* 0x198 */
2468	.long	0		/* 0x19c */
2469	.long	0		/* 0x1a0 */
2470	.long	0		/* 0x1a4 */
2471	.long	0		/* 0x1a8 */
2472	.long	0		/* 0x1ac */
2473	.long	0		/* 0x1b0 */
2474	.long	0		/* 0x1b4 */
2475	.long	0		/* 0x1b8 */
2476	.long	0		/* 0x1bc */
2477	.long	0		/* 0x1c0 */
2478	.long	0		/* 0x1c4 */
2479	.long	0		/* 0x1c8 */
2480	.long	0		/* 0x1cc */
2481	.long	0		/* 0x1d0 */
2482	.long	0		/* 0x1d4 */
2483	.long	0		/* 0x1d8 */
2484	.long	0		/* 0x1dc */
2485	.long	0		/* 0x1e0 */
2486	.long	0		/* 0x1e4 */
2487	.long	0		/* 0x1e8 */
2488	.long	0		/* 0x1ec */
2489	.long	0		/* 0x1f0 */
2490	.long	0		/* 0x1f4 */
2491	.long	0		/* 0x1f8 */
2492	.long	0		/* 0x1fc */
2493	.long	0		/* 0x200 */
2494	.long	0		/* 0x204 */
2495	.long	0		/* 0x208 */
2496	.long	0		/* 0x20c */
2497	.long	0		/* 0x210 */
2498	.long	0		/* 0x214 */
2499	.long	0		/* 0x218 */
2500	.long	0		/* 0x21c */
2501	.long	0		/* 0x220 */
2502	.long	0		/* 0x224 */
2503	.long	0		/* 0x228 */
2504	.long	0		/* 0x22c */
2505	.long	0		/* 0x230 */
2506	.long	0		/* 0x234 */
2507	.long	0		/* 0x238 */
2508	.long	0		/* 0x23c */
2509	.long	0		/* 0x240 */
2510	.long	0		/* 0x244 */
2511	.long	0		/* 0x248 */
2512	.long	0		/* 0x24c */
2513	.long	0		/* 0x250 */
2514	.long	0		/* 0x254 */
2515	.long	0		/* 0x258 */
2516	.long	0		/* 0x25c */
2517	.long	0		/* 0x260 */
2518	.long	0		/* 0x264 */
2519	.long	0		/* 0x268 */
2520	.long	0		/* 0x26c */
2521	.long	0		/* 0x270 */
2522	.long	0		/* 0x274 */
2523	.long	0		/* 0x278 */
2524	.long	0		/* 0x27c */
2525	.long	0		/* 0x280 */
2526	.long	0		/* 0x284 */
2527	.long	0		/* 0x288 */
2528	.long	0		/* 0x28c */
2529	.long	0		/* 0x290 */
2530	.long	0		/* 0x294 */
2531	.long	0		/* 0x298 */
2532	.long	0		/* 0x29c */
2533	.long	0		/* 0x2a0 */
2534	.long	0		/* 0x2a4 */
2535	.long	0		/* 0x2a8 */
2536	.long	0		/* 0x2ac */
2537	.long	0		/* 0x2b0 */
2538	.long	0		/* 0x2b4 */
2539	.long	0		/* 0x2b8 */
2540	.long	0		/* 0x2bc */
2541	.long	0		/* 0x2c0 */
2542	.long	0		/* 0x2c4 */
2543	.long	0		/* 0x2c8 */
2544	.long	0		/* 0x2cc */
2545	.long	0		/* 0x2d0 */
2546	.long	0		/* 0x2d4 */
2547	.long	0		/* 0x2d8 */
2548	.long	0		/* 0x2dc */
2549	.long	0		/* 0x2e0 */
2550	.long	0		/* 0x2e4 */
2551	.long	0		/* 0x2e8 */
2552	.long	0		/* 0x2ec */
2553	.long	0		/* 0x2f0 */
2554	.long	0		/* 0x2f4 */
2555	.long	0		/* 0x2f8 */
2556#ifdef CONFIG_KVM_XICS
2557	.long	DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
2558#else
2559	.long	0		/* 0x2fc - H_XIRR_X*/
2560#endif
2561	.long	DOTSYM(kvmppc_h_random) - hcall_real_table
2562	.globl	hcall_real_table_end
2563hcall_real_table_end:
2564
2565_GLOBAL(kvmppc_h_set_xdabr)
2566	andi.	r0, r5, DABRX_USER | DABRX_KERNEL
2567	beq	6f
2568	li	r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
2569	andc.	r0, r5, r0
2570	beq	3f
25716:	li	r3, H_PARAMETER
2572	blr
2573
2574_GLOBAL(kvmppc_h_set_dabr)
2575	li	r5, DABRX_USER | DABRX_KERNEL
25763:
2577BEGIN_FTR_SECTION
2578	b	2f
2579END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2580	std	r4,VCPU_DABR(r3)
2581	stw	r5, VCPU_DABRX(r3)
2582	mtspr	SPRN_DABRX, r5
2583	/* Work around P7 bug where DABR can get corrupted on mtspr */
25841:	mtspr	SPRN_DABR,r4
2585	mfspr	r5, SPRN_DABR
2586	cmpd	r4, r5
2587	bne	1b
2588	isync
2589	li	r3,0
2590	blr
2591
25922:
2593BEGIN_FTR_SECTION
2594	/* POWER9 with disabled DAWR */
2595	li	r3, H_HARDWARE
2596	blr
2597END_FTR_SECTION_IFCLR(CPU_FTR_DAWR)
2598	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
2599	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
2600	rlwimi	r5, r4, 2, DAWRX_WT
2601	clrrdi	r4, r4, 3
2602	std	r4, VCPU_DAWR(r3)
2603	std	r5, VCPU_DAWRX(r3)
2604	mtspr	SPRN_DAWR, r4
2605	mtspr	SPRN_DAWRX, r5
2606	li	r3, 0
2607	blr
2608
2609_GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
2610	ori	r11,r11,MSR_EE
2611	std	r11,VCPU_MSR(r3)
2612	li	r0,1
2613	stb	r0,VCPU_CEDED(r3)
2614	sync			/* order setting ceded vs. testing prodded */
2615	lbz	r5,VCPU_PRODDED(r3)
2616	cmpwi	r5,0
2617	bne	kvm_cede_prodded
2618	li	r12,0		/* set trap to 0 to say hcall is handled */
2619	stw	r12,VCPU_TRAP(r3)
2620	li	r0,H_SUCCESS
2621	std	r0,VCPU_GPR(R3)(r3)
2622
2623	/*
2624	 * Set our bit in the bitmask of napping threads unless all the
2625	 * other threads are already napping, in which case we send this
2626	 * up to the host.
2627	 */
2628	ld	r5,HSTATE_KVM_VCORE(r13)
2629	lbz	r6,HSTATE_PTID(r13)
2630	lwz	r8,VCORE_ENTRY_EXIT(r5)
2631	clrldi	r8,r8,56
2632	li	r0,1
2633	sld	r0,r0,r6
2634	addi	r6,r5,VCORE_NAPPING_THREADS
263531:	lwarx	r4,0,r6
2636	or	r4,r4,r0
2637	cmpw	r4,r8
2638	beq	kvm_cede_exit
2639	stwcx.	r4,0,r6
2640	bne	31b
2641	/* order napping_threads update vs testing entry_exit_map */
2642	isync
2643	li	r0,NAPPING_CEDE
2644	stb	r0,HSTATE_NAPPING(r13)
2645	lwz	r7,VCORE_ENTRY_EXIT(r5)
2646	cmpwi	r7,0x100
2647	bge	33f		/* another thread already exiting */
2648
2649/*
2650 * Although not specifically required by the architecture, POWER7
2651 * preserves the following registers in nap mode, even if an SMT mode
2652 * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
2653 * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
2654 */
2655	/* Save non-volatile GPRs */
2656	std	r14, VCPU_GPR(R14)(r3)
2657	std	r15, VCPU_GPR(R15)(r3)
2658	std	r16, VCPU_GPR(R16)(r3)
2659	std	r17, VCPU_GPR(R17)(r3)
2660	std	r18, VCPU_GPR(R18)(r3)
2661	std	r19, VCPU_GPR(R19)(r3)
2662	std	r20, VCPU_GPR(R20)(r3)
2663	std	r21, VCPU_GPR(R21)(r3)
2664	std	r22, VCPU_GPR(R22)(r3)
2665	std	r23, VCPU_GPR(R23)(r3)
2666	std	r24, VCPU_GPR(R24)(r3)
2667	std	r25, VCPU_GPR(R25)(r3)
2668	std	r26, VCPU_GPR(R26)(r3)
2669	std	r27, VCPU_GPR(R27)(r3)
2670	std	r28, VCPU_GPR(R28)(r3)
2671	std	r29, VCPU_GPR(R29)(r3)
2672	std	r30, VCPU_GPR(R30)(r3)
2673	std	r31, VCPU_GPR(R31)(r3)
2674
2675	/* save FP state */
2676	bl	kvmppc_save_fp
2677
2678#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2679/*
2680 * Branch around the call if both CPU_FTR_TM and
2681 * CPU_FTR_P9_TM_HV_ASSIST are off.
2682 */
2683BEGIN_FTR_SECTION
2684	b	91f
2685END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
2686	/*
2687	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
2688	 */
2689	ld	r9, HSTATE_KVM_VCPU(r13)
2690	bl	kvmppc_save_tm
269191:
2692#endif
2693
2694	/*
2695	 * Set DEC to the smaller of DEC and HDEC, so that we wake
2696	 * no later than the end of our timeslice (HDEC interrupts
2697	 * don't wake us from nap).
2698	 */
2699	mfspr	r3, SPRN_DEC
2700	mfspr	r4, SPRN_HDEC
2701	mftb	r5
2702BEGIN_FTR_SECTION
2703	/* On P9 check whether the guest has large decrementer mode enabled */
2704	ld	r6, HSTATE_KVM_VCORE(r13)
2705	ld	r6, VCORE_LPCR(r6)
2706	andis.	r6, r6, LPCR_LD@h
2707	bne	68f
2708END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2709	extsw	r3, r3
271068:	EXTEND_HDEC(r4)
2711	cmpd	r3, r4
2712	ble	67f
2713	mtspr	SPRN_DEC, r4
271467:
2715	/* save expiry time of guest decrementer */
2716	add	r3, r3, r5
2717	ld	r4, HSTATE_KVM_VCPU(r13)
2718	ld	r5, HSTATE_KVM_VCORE(r13)
2719	ld	r6, VCORE_TB_OFFSET_APPL(r5)
2720	subf	r3, r6, r3	/* convert to host TB value */
2721	std	r3, VCPU_DEC_EXPIRES(r4)
2722
2723#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
2724	ld	r4, HSTATE_KVM_VCPU(r13)
2725	addi	r3, r4, VCPU_TB_CEDE
2726	bl	kvmhv_accumulate_time
2727#endif
2728
2729	lis	r3, LPCR_PECEDP@h	/* Do wake on privileged doorbell */
2730
2731	/*
2732	 * Take a nap until a decrementer or external or doobell interrupt
2733	 * occurs, with PECE1 and PECE0 set in LPCR.
2734	 * On POWER8, set PECEDH, and if we are ceding, also set PECEDP.
2735	 * Also clear the runlatch bit before napping.
2736	 */
2737kvm_do_nap:
2738	mfspr	r0, SPRN_CTRLF
2739	clrrdi	r0, r0, 1
2740	mtspr	SPRN_CTRLT, r0
2741
2742	li	r0,1
2743	stb	r0,HSTATE_HWTHREAD_REQ(r13)
2744	mfspr	r5,SPRN_LPCR
2745	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
2746BEGIN_FTR_SECTION
2747	ori	r5, r5, LPCR_PECEDH
2748	rlwimi	r5, r3, 0, LPCR_PECEDP
2749END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2750
2751kvm_nap_sequence:		/* desired LPCR value in r5 */
2752BEGIN_FTR_SECTION
2753	/*
2754	 * PSSCR bits:	exit criterion = 1 (wakeup based on LPCR at sreset)
2755	 *		enable state loss = 1 (allow SMT mode switch)
2756	 *		requested level = 0 (just stop dispatching)
2757	 */
2758	lis	r3, (PSSCR_EC | PSSCR_ESL)@h
2759	mtspr	SPRN_PSSCR, r3
2760	/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
2761	li	r4, LPCR_PECE_HVEE@higher
2762	sldi	r4, r4, 32
2763	or	r5, r5, r4
2764END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2765	mtspr	SPRN_LPCR,r5
2766	isync
2767	li	r0, 0
2768	std	r0, HSTATE_SCRATCH0(r13)
2769	ptesync
2770	ld	r0, HSTATE_SCRATCH0(r13)
27711:	cmpd	r0, r0
2772	bne	1b
2773BEGIN_FTR_SECTION
2774	nap
2775FTR_SECTION_ELSE
2776	PPC_STOP
2777ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
2778	b	.
2779
278033:	mr	r4, r3
2781	li	r3, 0
2782	li	r12, 0
2783	b	34f
2784
2785kvm_end_cede:
2786	/* get vcpu pointer */
2787	ld	r4, HSTATE_KVM_VCPU(r13)
2788
2789	/* Woken by external or decrementer interrupt */
2790	ld	r1, HSTATE_HOST_R1(r13)
2791
2792#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
2793	addi	r3, r4, VCPU_TB_RMINTR
2794	bl	kvmhv_accumulate_time
2795#endif
2796
2797#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2798/*
2799 * Branch around the call if both CPU_FTR_TM and
2800 * CPU_FTR_P9_TM_HV_ASSIST are off.
2801 */
2802BEGIN_FTR_SECTION
2803	b	91f
2804END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
2805	/*
2806	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
2807	 */
2808	bl	kvmppc_restore_tm
280991:
2810#endif
2811
2812	/* load up FP state */
2813	bl	kvmppc_load_fp
2814
2815	/* Restore guest decrementer */
2816	ld	r3, VCPU_DEC_EXPIRES(r4)
2817	ld	r5, HSTATE_KVM_VCORE(r13)
2818	ld	r6, VCORE_TB_OFFSET_APPL(r5)
2819	add	r3, r3, r6	/* convert host TB to guest TB value */
2820	mftb	r7
2821	subf	r3, r7, r3
2822	mtspr	SPRN_DEC, r3
2823
2824	/* Load NV GPRS */
2825	ld	r14, VCPU_GPR(R14)(r4)
2826	ld	r15, VCPU_GPR(R15)(r4)
2827	ld	r16, VCPU_GPR(R16)(r4)
2828	ld	r17, VCPU_GPR(R17)(r4)
2829	ld	r18, VCPU_GPR(R18)(r4)
2830	ld	r19, VCPU_GPR(R19)(r4)
2831	ld	r20, VCPU_GPR(R20)(r4)
2832	ld	r21, VCPU_GPR(R21)(r4)
2833	ld	r22, VCPU_GPR(R22)(r4)
2834	ld	r23, VCPU_GPR(R23)(r4)
2835	ld	r24, VCPU_GPR(R24)(r4)
2836	ld	r25, VCPU_GPR(R25)(r4)
2837	ld	r26, VCPU_GPR(R26)(r4)
2838	ld	r27, VCPU_GPR(R27)(r4)
2839	ld	r28, VCPU_GPR(R28)(r4)
2840	ld	r29, VCPU_GPR(R29)(r4)
2841	ld	r30, VCPU_GPR(R30)(r4)
2842	ld	r31, VCPU_GPR(R31)(r4)
2843
2844	/* Check the wake reason in SRR1 to see why we got here */
2845	bl	kvmppc_check_wake_reason
2846
2847	/*
2848	 * Restore volatile registers since we could have called a
2849	 * C routine in kvmppc_check_wake_reason
2850	 *	r4 = VCPU
2851	 * r3 tells us whether we need to return to host or not
2852	 * WARNING: it gets checked further down:
2853	 * should not modify r3 until this check is done.
2854	 */
2855	ld	r4, HSTATE_KVM_VCPU(r13)
2856
2857	/* clear our bit in vcore->napping_threads */
285834:	ld	r5,HSTATE_KVM_VCORE(r13)
2859	lbz	r7,HSTATE_PTID(r13)
2860	li	r0,1
2861	sld	r0,r0,r7
2862	addi	r6,r5,VCORE_NAPPING_THREADS
286332:	lwarx	r7,0,r6
2864	andc	r7,r7,r0
2865	stwcx.	r7,0,r6
2866	bne	32b
2867	li	r0,0
2868	stb	r0,HSTATE_NAPPING(r13)
2869
2870	/* See if the wake reason saved in r3 means we need to exit */
2871	stw	r12, VCPU_TRAP(r4)
2872	mr	r9, r4
2873	cmpdi	r3, 0
2874	bgt	guest_exit_cont
2875
2876	/* see if any other thread is already exiting */
2877	lwz	r0,VCORE_ENTRY_EXIT(r5)
2878	cmpwi	r0,0x100
2879	bge	guest_exit_cont
2880
2881	b	kvmppc_cede_reentry	/* if not go back to guest */
2882
2883	/* cede when already previously prodded case */
2884kvm_cede_prodded:
2885	li	r0,0
2886	stb	r0,VCPU_PRODDED(r3)
2887	sync			/* order testing prodded vs. clearing ceded */
2888	stb	r0,VCPU_CEDED(r3)
2889	li	r3,H_SUCCESS
2890	blr
2891
2892	/* we've ceded but we want to give control to the host */
2893kvm_cede_exit:
2894	ld	r9, HSTATE_KVM_VCPU(r13)
2895#ifdef CONFIG_KVM_XICS
2896	/* Abort if we still have a pending escalation */
2897	lbz	r5, VCPU_XIVE_ESC_ON(r9)
2898	cmpwi	r5, 0
2899	beq	1f
2900	li	r0, 0
2901	stb	r0, VCPU_CEDED(r9)
29021:	/* Enable XIVE escalation */
2903	li	r5, XIVE_ESB_SET_PQ_00
2904	mfmsr	r0
2905	andi.	r0, r0, MSR_DR		/* in real mode? */
2906	beq	1f
2907	ld	r10, VCPU_XIVE_ESC_VADDR(r9)
2908	cmpdi	r10, 0
2909	beq	3f
2910	ldx	r0, r10, r5
2911	b	2f
29121:	ld	r10, VCPU_XIVE_ESC_RADDR(r9)
2913	cmpdi	r10, 0
2914	beq	3f
2915	ldcix	r0, r10, r5
29162:	sync
2917	li	r0, 1
2918	stb	r0, VCPU_XIVE_ESC_ON(r9)
2919#endif /* CONFIG_KVM_XICS */
29203:	b	guest_exit_cont
2921
2922	/* Try to handle a machine check in real mode */
2923machine_check_realmode:
2924	mr	r3, r9		/* get vcpu pointer */
2925	bl	kvmppc_realmode_machine_check
2926	nop
2927	ld	r9, HSTATE_KVM_VCPU(r13)
2928	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
2929	/*
2930	 * For the guest that is FWNMI capable, deliver all the MCE errors
2931	 * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
2932	 * reason. This new approach injects machine check errors in guest
2933	 * address space to guest with additional information in the form
2934	 * of RTAS event, thus enabling guest kernel to suitably handle
2935	 * such errors.
2936	 *
2937	 * For the guest that is not FWNMI capable (old QEMU) fallback
2938	 * to old behaviour for backward compatibility:
2939	 * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
2940	 * through machine check interrupt (set HSRR0 to 0x200).
2941	 * For handled errors (no-fatal), just go back to guest execution
2942	 * with current HSRR0.
2943	 * if we receive machine check with MSR(RI=0) then deliver it to
2944	 * guest as machine check causing guest to crash.
2945	 */
2946	ld	r11, VCPU_MSR(r9)
2947	rldicl.	r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
2948	bne	mc_cont			/* if so, exit to host */
2949	/* Check if guest is capable of handling NMI exit */
2950	ld	r10, VCPU_KVM(r9)
2951	lbz	r10, KVM_FWNMI(r10)
2952	cmpdi	r10, 1			/* FWNMI capable? */
2953	beq	mc_cont			/* if so, exit with KVM_EXIT_NMI. */
2954
2955	/* if not, fall through for backward compatibility. */
2956	andi.	r10, r11, MSR_RI	/* check for unrecoverable exception */
2957	beq	1f			/* Deliver a machine check to guest */
2958	ld	r10, VCPU_PC(r9)
2959	cmpdi	r3, 0		/* Did we handle MCE ? */
2960	bne	2f	/* Continue guest execution. */
2961	/* If not, deliver a machine check.  SRR0/1 are already set */
29621:	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
2963	bl	kvmppc_msr_interrupt
29642:	b	fast_interrupt_c_return
2965
2966/*
2967 * Check the reason we woke from nap, and take appropriate action.
2968 * Returns (in r3):
2969 *	0 if nothing needs to be done
2970 *	1 if something happened that needs to be handled by the host
2971 *	-1 if there was a guest wakeup (IPI or msgsnd)
2972 *	-2 if we handled a PCI passthrough interrupt (returned by
2973 *		kvmppc_read_intr only)
2974 *
2975 * Also sets r12 to the interrupt vector for any interrupt that needs
2976 * to be handled now by the host (0x500 for external interrupt), or zero.
2977 * Modifies all volatile registers (since it may call a C function).
2978 * This routine calls kvmppc_read_intr, a C function, if an external
2979 * interrupt is pending.
2980 */
2981kvmppc_check_wake_reason:
2982	mfspr	r6, SPRN_SRR1
2983BEGIN_FTR_SECTION
2984	rlwinm	r6, r6, 45-31, 0xf	/* extract wake reason field (P8) */
2985FTR_SECTION_ELSE
2986	rlwinm	r6, r6, 45-31, 0xe	/* P7 wake reason field is 3 bits */
2987ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
2988	cmpwi	r6, 8			/* was it an external interrupt? */
2989	beq	7f			/* if so, see what it was */
2990	li	r3, 0
2991	li	r12, 0
2992	cmpwi	r6, 6			/* was it the decrementer? */
2993	beq	0f
2994BEGIN_FTR_SECTION
2995	cmpwi	r6, 5			/* privileged doorbell? */
2996	beq	0f
2997	cmpwi	r6, 3			/* hypervisor doorbell? */
2998	beq	3f
2999END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3000	cmpwi	r6, 0xa			/* Hypervisor maintenance ? */
3001	beq	4f
3002	li	r3, 1			/* anything else, return 1 */
30030:	blr
3004
3005	/* hypervisor doorbell */
30063:	li	r12, BOOK3S_INTERRUPT_H_DOORBELL
3007
3008	/*
3009	 * Clear the doorbell as we will invoke the handler
3010	 * explicitly in the guest exit path.
3011	 */
3012	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
3013	PPC_MSGCLR(6)
3014	/* see if it's a host IPI */
3015	li	r3, 1
3016BEGIN_FTR_SECTION
3017	PPC_MSGSYNC
3018	lwsync
3019END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
3020	lbz	r0, HSTATE_HOST_IPI(r13)
3021	cmpwi	r0, 0
3022	bnelr
3023	/* if not, return -1 */
3024	li	r3, -1
3025	blr
3026
3027	/* Woken up due to Hypervisor maintenance interrupt */
30284:	li	r12, BOOK3S_INTERRUPT_HMI
3029	li	r3, 1
3030	blr
3031
3032	/* external interrupt - create a stack frame so we can call C */
30337:	mflr	r0
3034	std	r0, PPC_LR_STKOFF(r1)
3035	stdu	r1, -PPC_MIN_STKFRM(r1)
3036	bl	kvmppc_read_intr
3037	nop
3038	li	r12, BOOK3S_INTERRUPT_EXTERNAL
3039	cmpdi	r3, 1
3040	ble	1f
3041
3042	/*
3043	 * Return code of 2 means PCI passthrough interrupt, but
3044	 * we need to return back to host to complete handling the
3045	 * interrupt. Trap reason is expected in r12 by guest
3046	 * exit code.
3047	 */
3048	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
30491:
3050	ld	r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
3051	addi	r1, r1, PPC_MIN_STKFRM
3052	mtlr	r0
3053	blr
3054
3055/*
3056 * Save away FP, VMX and VSX registers.
3057 * r3 = vcpu pointer
3058 * N.B. r30 and r31 are volatile across this function,
3059 * thus it is not callable from C.
3060 */
3061kvmppc_save_fp:
3062	mflr	r30
3063	mr	r31,r3
3064	mfmsr	r5
3065	ori	r8,r5,MSR_FP
3066#ifdef CONFIG_ALTIVEC
3067BEGIN_FTR_SECTION
3068	oris	r8,r8,MSR_VEC@h
3069END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3070#endif
3071#ifdef CONFIG_VSX
3072BEGIN_FTR_SECTION
3073	oris	r8,r8,MSR_VSX@h
3074END_FTR_SECTION_IFSET(CPU_FTR_VSX)
3075#endif
3076	mtmsrd	r8
3077	addi	r3,r3,VCPU_FPRS
3078	bl	store_fp_state
3079#ifdef CONFIG_ALTIVEC
3080BEGIN_FTR_SECTION
3081	addi	r3,r31,VCPU_VRS
3082	bl	store_vr_state
3083END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3084#endif
3085	mfspr	r6,SPRN_VRSAVE
3086	stw	r6,VCPU_VRSAVE(r31)
3087	mtlr	r30
3088	blr
3089
3090/*
3091 * Load up FP, VMX and VSX registers
3092 * r4 = vcpu pointer
3093 * N.B. r30 and r31 are volatile across this function,
3094 * thus it is not callable from C.
3095 */
3096kvmppc_load_fp:
3097	mflr	r30
3098	mr	r31,r4
3099	mfmsr	r9
3100	ori	r8,r9,MSR_FP
3101#ifdef CONFIG_ALTIVEC
3102BEGIN_FTR_SECTION
3103	oris	r8,r8,MSR_VEC@h
3104END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3105#endif
3106#ifdef CONFIG_VSX
3107BEGIN_FTR_SECTION
3108	oris	r8,r8,MSR_VSX@h
3109END_FTR_SECTION_IFSET(CPU_FTR_VSX)
3110#endif
3111	mtmsrd	r8
3112	addi	r3,r4,VCPU_FPRS
3113	bl	load_fp_state
3114#ifdef CONFIG_ALTIVEC
3115BEGIN_FTR_SECTION
3116	addi	r3,r31,VCPU_VRS
3117	bl	load_vr_state
3118END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3119#endif
3120	lwz	r7,VCPU_VRSAVE(r31)
3121	mtspr	SPRN_VRSAVE,r7
3122	mtlr	r30
3123	mr	r4,r31
3124	blr
3125
3126#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
3127/*
3128 * Save transactional state and TM-related registers.
3129 * Called with r9 pointing to the vcpu struct.
3130 * This can modify all checkpointed registers, but
3131 * restores r1, r2 and r9 (vcpu pointer) before exit.
3132 */
3133kvmppc_save_tm:
3134	mflr	r0
3135	std	r0, PPC_LR_STKOFF(r1)
3136	stdu	r1, -PPC_MIN_STKFRM(r1)
3137
3138	/* Turn on TM. */
3139	mfmsr	r8
3140	li	r0, 1
3141	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
3142	mtmsrd	r8
3143
3144	ld	r5, VCPU_MSR(r9)
3145	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
3146	beq	1f	/* TM not active in guest. */
3147
3148	std	r1, HSTATE_HOST_R1(r13)
3149	li	r3, TM_CAUSE_KVM_RESCHED
3150
3151BEGIN_FTR_SECTION
3152	lbz	r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
3153	cmpwi	r0, 0
3154	beq	3f
3155	rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
3156	beq	4f
3157BEGIN_FTR_SECTION_NESTED(96)
3158	bl	pnv_power9_force_smt4_catch
3159END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
3160	nop
3161	b	6f
31623:
3163	/* Emulation of the treclaim instruction needs TEXASR before treclaim */
3164	mfspr	r6, SPRN_TEXASR
3165	std	r6, VCPU_ORIG_TEXASR(r9)
31666:
3167END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
3168
3169	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
3170	li	r5, 0
3171	mtmsrd	r5, 1
3172
3173	/* All GPRs are volatile at this point. */
3174	TRECLAIM(R3)
3175
3176	/* Temporarily store r13 and r9 so we have some regs to play with */
3177	SET_SCRATCH0(r13)
3178	GET_PACA(r13)
3179	std	r9, PACATMSCRATCH(r13)
3180
3181	/* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */
3182BEGIN_FTR_SECTION
3183	lbz	r9, HSTATE_FAKE_SUSPEND(r13)
3184	cmpwi	r9, 0
3185	beq	2f
3186	/*
3187	 * We were in fake suspend, so we are not going to save the
3188	 * register state as the guest checkpointed state (since
3189	 * we already have it), therefore we can now use any volatile GPR.
3190	 */
3191	/* Reload stack pointer and TOC. */
3192	ld	r1, HSTATE_HOST_R1(r13)
3193	ld	r2, PACATOC(r13)
3194	/* Set MSR RI now we have r1 and r13 back. */
3195	li	r5, MSR_RI
3196	mtmsrd	r5, 1
3197	HMT_MEDIUM
3198	ld	r6, HSTATE_DSCR(r13)
3199	mtspr	SPRN_DSCR, r6
3200BEGIN_FTR_SECTION_NESTED(96)
3201	bl	pnv_power9_force_smt4_release
3202END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
3203	nop
3204
32054:
3206	mfspr	r3, SPRN_PSSCR
3207	/* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */
3208	li	r0, PSSCR_FAKE_SUSPEND
3209	andc	r3, r3, r0
3210	mtspr	SPRN_PSSCR, r3
3211	ld	r9, HSTATE_KVM_VCPU(r13)
3212	/* Don't save TEXASR, use value from last exit in real suspend state */
3213	b	11f
32142:
3215END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
3216
3217	ld	r9, HSTATE_KVM_VCPU(r13)
3218
3219	/* Get a few more GPRs free. */
3220	std	r29, VCPU_GPRS_TM(29)(r9)
3221	std	r30, VCPU_GPRS_TM(30)(r9)
3222	std	r31, VCPU_GPRS_TM(31)(r9)
3223
3224	/* Save away PPR and DSCR soon so don't run with user values. */
3225	mfspr	r31, SPRN_PPR
3226	HMT_MEDIUM
3227	mfspr	r30, SPRN_DSCR
3228	ld	r29, HSTATE_DSCR(r13)
3229	mtspr	SPRN_DSCR, r29
3230
3231	/* Save all but r9, r13 & r29-r31 */
3232	reg = 0
3233	.rept	29
3234	.if (reg != 9) && (reg != 13)
3235	std	reg, VCPU_GPRS_TM(reg)(r9)
3236	.endif
3237	reg = reg + 1
3238	.endr
3239	/* ... now save r13 */
3240	GET_SCRATCH0(r4)
3241	std	r4, VCPU_GPRS_TM(13)(r9)
3242	/* ... and save r9 */
3243	ld	r4, PACATMSCRATCH(r13)
3244	std	r4, VCPU_GPRS_TM(9)(r9)
3245
3246	/* Reload stack pointer and TOC. */
3247	ld	r1, HSTATE_HOST_R1(r13)
3248	ld	r2, PACATOC(r13)
3249
3250	/* Set MSR RI now we have r1 and r13 back. */
3251	li	r5, MSR_RI
3252	mtmsrd	r5, 1
3253
3254	/* Save away checkpinted SPRs. */
3255	std	r31, VCPU_PPR_TM(r9)
3256	std	r30, VCPU_DSCR_TM(r9)
3257	mflr	r5
3258	mfcr	r6
3259	mfctr	r7
3260	mfspr	r8, SPRN_AMR
3261	mfspr	r10, SPRN_TAR
3262	mfxer	r11
3263	std	r5, VCPU_LR_TM(r9)
3264	stw	r6, VCPU_CR_TM(r9)
3265	std	r7, VCPU_CTR_TM(r9)
3266	std	r8, VCPU_AMR_TM(r9)
3267	std	r10, VCPU_TAR_TM(r9)
3268	std	r11, VCPU_XER_TM(r9)
3269
3270	/* Restore r12 as trap number. */
3271	lwz	r12, VCPU_TRAP(r9)
3272
3273	/* Save FP/VSX. */
3274	addi	r3, r9, VCPU_FPRS_TM
3275	bl	store_fp_state
3276	addi	r3, r9, VCPU_VRS_TM
3277	bl	store_vr_state
3278	mfspr	r6, SPRN_VRSAVE
3279	stw	r6, VCPU_VRSAVE_TM(r9)
32801:
3281	/*
3282	 * We need to save these SPRs after the treclaim so that the software
3283	 * error code is recorded correctly in the TEXASR.  Also the user may
3284	 * change these outside of a transaction, so they must always be
3285	 * context switched.
3286	 */
3287	mfspr	r7, SPRN_TEXASR
3288	std	r7, VCPU_TEXASR(r9)
328911:
3290	mfspr	r5, SPRN_TFHAR
3291	mfspr	r6, SPRN_TFIAR
3292	std	r5, VCPU_TFHAR(r9)
3293	std	r6, VCPU_TFIAR(r9)
3294
3295	addi	r1, r1, PPC_MIN_STKFRM
3296	ld	r0, PPC_LR_STKOFF(r1)
3297	mtlr	r0
3298	blr
3299
3300/*
3301 * Restore transactional state and TM-related registers.
3302 * Called with r4 pointing to the vcpu struct.
3303 * This potentially modifies all checkpointed registers.
3304 * It restores r1, r2, r4 from the PACA.
3305 */
3306kvmppc_restore_tm:
3307	mflr	r0
3308	std	r0, PPC_LR_STKOFF(r1)
3309
3310	/* Turn on TM/FP/VSX/VMX so we can restore them. */
3311	mfmsr	r5
3312	li	r6, MSR_TM >> 32
3313	sldi	r6, r6, 32
3314	or	r5, r5, r6
3315	ori	r5, r5, MSR_FP
3316	oris	r5, r5, (MSR_VEC | MSR_VSX)@h
3317	mtmsrd	r5
3318
3319	/*
3320	 * The user may change these outside of a transaction, so they must
3321	 * always be context switched.
3322	 */
3323	ld	r5, VCPU_TFHAR(r4)
3324	ld	r6, VCPU_TFIAR(r4)
3325	ld	r7, VCPU_TEXASR(r4)
3326	mtspr	SPRN_TFHAR, r5
3327	mtspr	SPRN_TFIAR, r6
3328	mtspr	SPRN_TEXASR, r7
3329
3330	li	r0, 0
3331	stb	r0, HSTATE_FAKE_SUSPEND(r13)
3332	ld	r5, VCPU_MSR(r4)
3333	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
3334	beqlr		/* TM not active in guest */
3335	std	r1, HSTATE_HOST_R1(r13)
3336
3337	/* Make sure the failure summary is set, otherwise we'll program check
3338	 * when we trechkpt.  It's possible that this might have been not set
3339	 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
3340	 * host.
3341	 */
3342	oris	r7, r7, (TEXASR_FS)@h
3343	mtspr	SPRN_TEXASR, r7
3344
3345	/*
3346	 * If we are doing TM emulation for the guest on a POWER9 DD2,
3347	 * then we don't actually do a trechkpt -- we either set up
3348	 * fake-suspend mode, or emulate a TM rollback.
3349	 */
3350BEGIN_FTR_SECTION
3351	b	.Ldo_tm_fake_load
3352END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
3353
3354	/*
3355	 * We need to load up the checkpointed state for the guest.
3356	 * We need to do this early as it will blow away any GPRs, VSRs and
3357	 * some SPRs.
3358	 */
3359
3360	mr	r31, r4
3361	addi	r3, r31, VCPU_FPRS_TM
3362	bl	load_fp_state
3363	addi	r3, r31, VCPU_VRS_TM
3364	bl	load_vr_state
3365	mr	r4, r31
3366	lwz	r7, VCPU_VRSAVE_TM(r4)
3367	mtspr	SPRN_VRSAVE, r7
3368
3369	ld	r5, VCPU_LR_TM(r4)
3370	lwz	r6, VCPU_CR_TM(r4)
3371	ld	r7, VCPU_CTR_TM(r4)
3372	ld	r8, VCPU_AMR_TM(r4)
3373	ld	r9, VCPU_TAR_TM(r4)
3374	ld	r10, VCPU_XER_TM(r4)
3375	mtlr	r5
3376	mtcr	r6
3377	mtctr	r7
3378	mtspr	SPRN_AMR, r8
3379	mtspr	SPRN_TAR, r9
3380	mtxer	r10
3381
3382	/*
3383	 * Load up PPR and DSCR values but don't put them in the actual SPRs
3384	 * till the last moment to avoid running with userspace PPR and DSCR for
3385	 * too long.
3386	 */
3387	ld	r29, VCPU_DSCR_TM(r4)
3388	ld	r30, VCPU_PPR_TM(r4)
3389
3390	std	r2, PACATMSCRATCH(r13) /* Save TOC */
3391
3392	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
3393	li	r5, 0
3394	mtmsrd	r5, 1
3395
3396	/* Load GPRs r0-r28 */
3397	reg = 0
3398	.rept	29
3399	ld	reg, VCPU_GPRS_TM(reg)(r31)
3400	reg = reg + 1
3401	.endr
3402
3403	mtspr	SPRN_DSCR, r29
3404	mtspr	SPRN_PPR, r30
3405
3406	/* Load final GPRs */
3407	ld	29, VCPU_GPRS_TM(29)(r31)
3408	ld	30, VCPU_GPRS_TM(30)(r31)
3409	ld	31, VCPU_GPRS_TM(31)(r31)
3410
3411	/* TM checkpointed state is now setup.  All GPRs are now volatile. */
3412	TRECHKPT
3413
3414	/* Now let's get back the state we need. */
3415	HMT_MEDIUM
3416	GET_PACA(r13)
3417	ld	r29, HSTATE_DSCR(r13)
3418	mtspr	SPRN_DSCR, r29
3419	ld	r4, HSTATE_KVM_VCPU(r13)
3420	ld	r1, HSTATE_HOST_R1(r13)
3421	ld	r2, PACATMSCRATCH(r13)
3422
3423	/* Set the MSR RI since we have our registers back. */
3424	li	r5, MSR_RI
3425	mtmsrd	r5, 1
34269:
3427	ld	r0, PPC_LR_STKOFF(r1)
3428	mtlr	r0
3429	blr
3430
3431.Ldo_tm_fake_load:
3432	cmpwi	r5, 1		/* check for suspended state */
3433	bgt	10f
3434	stb	r5, HSTATE_FAKE_SUSPEND(r13)
3435	b	9b		/* and return */
343610:	stdu	r1, -PPC_MIN_STKFRM(r1)
3437	/* guest is in transactional state, so simulate rollback */
3438	mr	r3, r4
3439	bl	kvmhv_emulate_tm_rollback
3440	nop
3441	ld      r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */
3442	addi	r1, r1, PPC_MIN_STKFRM
3443	b	9b
3444#endif
3445
3446/*
3447 * We come here if we get any exception or interrupt while we are
3448 * executing host real mode code while in guest MMU context.
3449 * r12 is (CR << 32) | vector
3450 * r13 points to our PACA
3451 * r12 is saved in HSTATE_SCRATCH0(r13)
3452 * ctr is saved in HSTATE_SCRATCH1(r13) if RELOCATABLE
3453 * r9 is saved in HSTATE_SCRATCH2(r13)
3454 * r13 is saved in HSPRG1
3455 * cfar is saved in HSTATE_CFAR(r13)
3456 * ppr is saved in HSTATE_PPR(r13)
3457 */
3458kvmppc_bad_host_intr:
3459	/*
3460	 * Switch to the emergency stack, but start half-way down in
3461	 * case we were already on it.
3462	 */
3463	mr	r9, r1
3464	std	r1, PACAR1(r13)
3465	ld	r1, PACAEMERGSP(r13)
3466	subi	r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE
3467	std	r9, 0(r1)
3468	std	r0, GPR0(r1)
3469	std	r9, GPR1(r1)
3470	std	r2, GPR2(r1)
3471	SAVE_4GPRS(3, r1)
3472	SAVE_2GPRS(7, r1)
3473	srdi	r0, r12, 32
3474	clrldi	r12, r12, 32
3475	std	r0, _CCR(r1)
3476	std	r12, _TRAP(r1)
3477	andi.	r0, r12, 2
3478	beq	1f
3479	mfspr	r3, SPRN_HSRR0
3480	mfspr	r4, SPRN_HSRR1
3481	mfspr	r5, SPRN_HDAR
3482	mfspr	r6, SPRN_HDSISR
3483	b	2f
34841:	mfspr	r3, SPRN_SRR0
3485	mfspr	r4, SPRN_SRR1
3486	mfspr	r5, SPRN_DAR
3487	mfspr	r6, SPRN_DSISR
34882:	std	r3, _NIP(r1)
3489	std	r4, _MSR(r1)
3490	std	r5, _DAR(r1)
3491	std	r6, _DSISR(r1)
3492	ld	r9, HSTATE_SCRATCH2(r13)
3493	ld	r12, HSTATE_SCRATCH0(r13)
3494	GET_SCRATCH0(r0)
3495	SAVE_4GPRS(9, r1)
3496	std	r0, GPR13(r1)
3497	SAVE_NVGPRS(r1)
3498	ld	r5, HSTATE_CFAR(r13)
3499	std	r5, ORIG_GPR3(r1)
3500	mflr	r3
3501#ifdef CONFIG_RELOCATABLE
3502	ld	r4, HSTATE_SCRATCH1(r13)
3503#else
3504	mfctr	r4
3505#endif
3506	mfxer	r5
3507	lbz	r6, PACAIRQSOFTMASK(r13)
3508	std	r3, _LINK(r1)
3509	std	r4, _CTR(r1)
3510	std	r5, _XER(r1)
3511	std	r6, SOFTE(r1)
3512	ld	r2, PACATOC(r13)
3513	LOAD_REG_IMMEDIATE(3, 0x7265677368657265)
3514	std	r3, STACK_FRAME_OVERHEAD-16(r1)
3515
3516	/*
3517	 * On POWER9 do a minimal restore of the MMU and call C code,
3518	 * which will print a message and panic.
3519	 * XXX On POWER7 and POWER8, we just spin here since we don't
3520	 * know what the other threads are doing (and we don't want to
3521	 * coordinate with them) - but at least we now have register state
3522	 * in memory that we might be able to look at from another CPU.
3523	 */
3524BEGIN_FTR_SECTION
3525	b	.
3526END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
3527	ld	r9, HSTATE_KVM_VCPU(r13)
3528	ld	r10, VCPU_KVM(r9)
3529
3530	li	r0, 0
3531	mtspr	SPRN_AMR, r0
3532	mtspr	SPRN_IAMR, r0
3533	mtspr	SPRN_CIABR, r0
3534	mtspr	SPRN_DAWRX, r0
3535
3536	/* Flush the ERAT on radix P9 DD1 guest exit */
3537BEGIN_FTR_SECTION
3538	PPC_INVALIDATE_ERAT
3539END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
3540
3541BEGIN_MMU_FTR_SECTION
3542	b	4f
3543END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
3544
3545	slbmte	r0, r0
3546	slbia
3547	ptesync
3548	ld	r8, PACA_SLBSHADOWPTR(r13)
3549	.rept	SLB_NUM_BOLTED
3550	li	r3, SLBSHADOW_SAVEAREA
3551	LDX_BE	r5, r8, r3
3552	addi	r3, r3, 8
3553	LDX_BE	r6, r8, r3
3554	andis.	r7, r5, SLB_ESID_V@h
3555	beq	3f
3556	slbmte	r6, r5
35573:	addi	r8, r8, 16
3558	.endr
3559
35604:	lwz	r7, KVM_HOST_LPID(r10)
3561	mtspr	SPRN_LPID, r7
3562	mtspr	SPRN_PID, r0
3563	ld	r8, KVM_HOST_LPCR(r10)
3564	mtspr	SPRN_LPCR, r8
3565	isync
3566	li	r0, KVM_GUEST_MODE_NONE
3567	stb	r0, HSTATE_IN_GUEST(r13)
3568
3569	/*
3570	 * Turn on the MMU and jump to C code
3571	 */
3572	bcl	20, 31, .+4
35735:	mflr	r3
3574	addi	r3, r3, 9f - 5b
3575	ld	r4, PACAKMSR(r13)
3576	mtspr	SPRN_SRR0, r3
3577	mtspr	SPRN_SRR1, r4
3578	RFI_TO_KERNEL
35799:	addi	r3, r1, STACK_FRAME_OVERHEAD
3580	bl	kvmppc_bad_interrupt
3581	b	9b
3582
3583/*
3584 * This mimics the MSR transition on IRQ delivery.  The new guest MSR is taken
3585 * from VCPU_INTR_MSR and is modified based on the required TM state changes.
3586 *   r11 has the guest MSR value (in/out)
3587 *   r9 has a vcpu pointer (in)
3588 *   r0 is used as a scratch register
3589 */
3590kvmppc_msr_interrupt:
3591	rldicl	r0, r11, 64 - MSR_TS_S_LG, 62
3592	cmpwi	r0, 2 /* Check if we are in transactional state..  */
3593	ld	r11, VCPU_INTR_MSR(r9)
3594	bne	1f
3595	/* ... if transactional, change to suspended */
3596	li	r0, 1
35971:	rldimi	r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
3598	blr
3599
3600/*
3601 * This works around a hardware bug on POWER8E processors, where
3602 * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
3603 * performance monitor interrupt.  Instead, when we need to have
3604 * an interrupt pending, we have to arrange for a counter to overflow.
3605 */
3606kvmppc_fix_pmao:
3607	li	r3, 0
3608	mtspr	SPRN_MMCR2, r3
3609	lis	r3, (MMCR0_PMXE | MMCR0_FCECE)@h
3610	ori	r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN
3611	mtspr	SPRN_MMCR0, r3
3612	lis	r3, 0x7fff
3613	ori	r3, r3, 0xffff
3614	mtspr	SPRN_PMC6, r3
3615	isync
3616	blr
3617
3618#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
3619/*
3620 * Start timing an activity
3621 * r3 = pointer to time accumulation struct, r4 = vcpu
3622 */
3623kvmhv_start_timing:
3624	ld	r5, HSTATE_KVM_VCORE(r13)
3625	ld	r6, VCORE_TB_OFFSET_APPL(r5)
3626	mftb	r5
3627	subf	r5, r6, r5	/* subtract current timebase offset */
3628	std	r3, VCPU_CUR_ACTIVITY(r4)
3629	std	r5, VCPU_ACTIVITY_START(r4)
3630	blr
3631
3632/*
3633 * Accumulate time to one activity and start another.
3634 * r3 = pointer to new time accumulation struct, r4 = vcpu
3635 */
3636kvmhv_accumulate_time:
3637	ld	r5, HSTATE_KVM_VCORE(r13)
3638	ld	r8, VCORE_TB_OFFSET_APPL(r5)
3639	ld	r5, VCPU_CUR_ACTIVITY(r4)
3640	ld	r6, VCPU_ACTIVITY_START(r4)
3641	std	r3, VCPU_CUR_ACTIVITY(r4)
3642	mftb	r7
3643	subf	r7, r8, r7	/* subtract current timebase offset */
3644	std	r7, VCPU_ACTIVITY_START(r4)
3645	cmpdi	r5, 0
3646	beqlr
3647	subf	r3, r6, r7
3648	ld	r8, TAS_SEQCOUNT(r5)
3649	cmpdi	r8, 0
3650	addi	r8, r8, 1
3651	std	r8, TAS_SEQCOUNT(r5)
3652	lwsync
3653	ld	r7, TAS_TOTAL(r5)
3654	add	r7, r7, r3
3655	std	r7, TAS_TOTAL(r5)
3656	ld	r6, TAS_MIN(r5)
3657	ld	r7, TAS_MAX(r5)
3658	beq	3f
3659	cmpd	r3, r6
3660	bge	1f
36613:	std	r3, TAS_MIN(r5)
36621:	cmpd	r3, r7
3663	ble	2f
3664	std	r3, TAS_MAX(r5)
36652:	lwsync
3666	addi	r8, r8, 1
3667	std	r8, TAS_SEQCOUNT(r5)
3668	blr
3669#endif
3670