xref: /openbmc/linux/arch/powerpc/kvm/book3s_hv_rmhandlers.S (revision a1dff44b354c0e2721aeae075a287d07daf1c76b)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *
4 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
5 *
6 * Derived from book3s_rmhandlers.S and other files, which are:
7 *
8 * Copyright SUSE Linux Products GmbH 2009
9 *
10 * Authors: Alexander Graf <agraf@suse.de>
11 */
12
13#include <asm/ppc_asm.h>
14#include <asm/code-patching-asm.h>
15#include <asm/kvm_asm.h>
16#include <asm/reg.h>
17#include <asm/mmu.h>
18#include <asm/page.h>
19#include <asm/ptrace.h>
20#include <asm/hvcall.h>
21#include <asm/asm-offsets.h>
22#include <asm/exception-64s.h>
23#include <asm/kvm_book3s_asm.h>
24#include <asm/book3s/64/mmu-hash.h>
25#include <asm/export.h>
26#include <asm/tm.h>
27#include <asm/opal.h>
28#include <asm/xive-regs.h>
29#include <asm/thread_info.h>
30#include <asm/asm-compat.h>
31#include <asm/feature-fixups.h>
32#include <asm/cpuidle.h>
33#include <asm/ultravisor-api.h>
34
35/* Sign-extend HDEC if not on POWER9 */
36#define EXTEND_HDEC(reg)			\
37BEGIN_FTR_SECTION;				\
38	extsw	reg, reg;			\
39END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
40
41/* Values in HSTATE_NAPPING(r13) */
42#define NAPPING_CEDE	1
43#define NAPPING_NOVCPU	2
44#define NAPPING_UNSPLIT	3
45
46/* Stack frame offsets for kvmppc_hv_entry */
47#define SFS			208
48#define STACK_SLOT_TRAP		(SFS-4)
49#define STACK_SLOT_SHORT_PATH	(SFS-8)
50#define STACK_SLOT_TID		(SFS-16)
51#define STACK_SLOT_PSSCR	(SFS-24)
52#define STACK_SLOT_PID		(SFS-32)
53#define STACK_SLOT_IAMR		(SFS-40)
54#define STACK_SLOT_CIABR	(SFS-48)
55#define STACK_SLOT_DAWR0	(SFS-56)
56#define STACK_SLOT_DAWRX0	(SFS-64)
57#define STACK_SLOT_HFSCR	(SFS-72)
58#define STACK_SLOT_AMR		(SFS-80)
59#define STACK_SLOT_UAMOR	(SFS-88)
60#define STACK_SLOT_DAWR1	(SFS-96)
61#define STACK_SLOT_DAWRX1	(SFS-104)
62/* the following is used by the P9 short path */
63#define STACK_SLOT_NVGPRS	(SFS-152)	/* 18 gprs */
64
65/*
66 * Call kvmppc_hv_entry in real mode.
67 * Must be called with interrupts hard-disabled.
68 *
69 * Input Registers:
70 *
71 * LR = return address to continue at after eventually re-enabling MMU
72 */
73_GLOBAL_TOC(kvmppc_hv_entry_trampoline)
74	mflr	r0
75	std	r0, PPC_LR_STKOFF(r1)
76	stdu	r1, -112(r1)
77	mfmsr	r10
78	std	r10, HSTATE_HOST_MSR(r13)
79	LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
80	li	r0,MSR_RI
81	andc	r0,r10,r0
82	li	r6,MSR_IR | MSR_DR
83	andc	r6,r10,r6
84	mtmsrd	r0,1		/* clear RI in MSR */
85	mtsrr0	r5
86	mtsrr1	r6
87	RFI_TO_KERNEL
88
89kvmppc_call_hv_entry:
90	ld	r4, HSTATE_KVM_VCPU(r13)
91	bl	kvmppc_hv_entry
92
93	/* Back from guest - restore host state and return to caller */
94
95BEGIN_FTR_SECTION
96	/* Restore host DABR and DABRX */
97	ld	r5,HSTATE_DABR(r13)
98	li	r6,7
99	mtspr	SPRN_DABR,r5
100	mtspr	SPRN_DABRX,r6
101END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
102
103	/* Restore SPRG3 */
104	ld	r3,PACA_SPRG_VDSO(r13)
105	mtspr	SPRN_SPRG_VDSO_WRITE,r3
106
107	/* Reload the host's PMU registers */
108	bl	kvmhv_load_host_pmu
109
110	/*
111	 * Reload DEC.  HDEC interrupts were disabled when
112	 * we reloaded the host's LPCR value.
113	 */
114	ld	r3, HSTATE_DECEXP(r13)
115	mftb	r4
116	subf	r4, r4, r3
117	mtspr	SPRN_DEC, r4
118
119	/* hwthread_req may have got set by cede or no vcpu, so clear it */
120	li	r0, 0
121	stb	r0, HSTATE_HWTHREAD_REQ(r13)
122
123	/*
124	 * For external interrupts we need to call the Linux
125	 * handler to process the interrupt. We do that by jumping
126	 * to absolute address 0x500 for external interrupts.
127	 * The [h]rfid at the end of the handler will return to
128	 * the book3s_hv_interrupts.S code. For other interrupts
129	 * we do the rfid to get back to the book3s_hv_interrupts.S
130	 * code here.
131	 */
132	ld	r8, 112+PPC_LR_STKOFF(r1)
133	addi	r1, r1, 112
134	ld	r7, HSTATE_HOST_MSR(r13)
135
136	/* Return the trap number on this thread as the return value */
137	mr	r3, r12
138
139	/*
140	 * If we came back from the guest via a relocation-on interrupt,
141	 * we will be in virtual mode at this point, which makes it a
142	 * little easier to get back to the caller.
143	 */
144	mfmsr	r0
145	andi.	r0, r0, MSR_IR		/* in real mode? */
146	bne	.Lvirt_return
147
148	/* RFI into the highmem handler */
149	mfmsr	r6
150	li	r0, MSR_RI
151	andc	r6, r6, r0
152	mtmsrd	r6, 1			/* Clear RI in MSR */
153	mtsrr0	r8
154	mtsrr1	r7
155	RFI_TO_KERNEL
156
157	/* Virtual-mode return */
158.Lvirt_return:
159	mtlr	r8
160	blr
161
162kvmppc_primary_no_guest:
163	/* We handle this much like a ceded vcpu */
164	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
165	/* HDEC may be larger than DEC for arch >= v3.00, but since the */
166	/* HDEC value came from DEC in the first place, it will fit */
167	mfspr	r3, SPRN_HDEC
168	mtspr	SPRN_DEC, r3
169	/*
170	 * Make sure the primary has finished the MMU switch.
171	 * We should never get here on a secondary thread, but
172	 * check it for robustness' sake.
173	 */
174	ld	r5, HSTATE_KVM_VCORE(r13)
17565:	lbz	r0, VCORE_IN_GUEST(r5)
176	cmpwi	r0, 0
177	beq	65b
178	/* Set LPCR. */
179	ld	r8,VCORE_LPCR(r5)
180	mtspr	SPRN_LPCR,r8
181	isync
182	/* set our bit in napping_threads */
183	ld	r5, HSTATE_KVM_VCORE(r13)
184	lbz	r7, HSTATE_PTID(r13)
185	li	r0, 1
186	sld	r0, r0, r7
187	addi	r6, r5, VCORE_NAPPING_THREADS
1881:	lwarx	r3, 0, r6
189	or	r3, r3, r0
190	stwcx.	r3, 0, r6
191	bne	1b
192	/* order napping_threads update vs testing entry_exit_map */
193	isync
194	li	r12, 0
195	lwz	r7, VCORE_ENTRY_EXIT(r5)
196	cmpwi	r7, 0x100
197	bge	kvm_novcpu_exit	/* another thread already exiting */
198	li	r3, NAPPING_NOVCPU
199	stb	r3, HSTATE_NAPPING(r13)
200
201	li	r3, 0		/* Don't wake on privileged (OS) doorbell */
202	b	kvm_do_nap
203
204/*
205 * kvm_novcpu_wakeup
206 *	Entered from kvm_start_guest if kvm_hstate.napping is set
207 *	to NAPPING_NOVCPU
208 *		r2 = kernel TOC
209 *		r13 = paca
210 */
211kvm_novcpu_wakeup:
212	ld	r1, HSTATE_HOST_R1(r13)
213	ld	r5, HSTATE_KVM_VCORE(r13)
214	li	r0, 0
215	stb	r0, HSTATE_NAPPING(r13)
216
217	/* check the wake reason */
218	bl	kvmppc_check_wake_reason
219
220	/*
221	 * Restore volatile registers since we could have called
222	 * a C routine in kvmppc_check_wake_reason.
223	 *	r5 = VCORE
224	 */
225	ld	r5, HSTATE_KVM_VCORE(r13)
226
227	/* see if any other thread is already exiting */
228	lwz	r0, VCORE_ENTRY_EXIT(r5)
229	cmpwi	r0, 0x100
230	bge	kvm_novcpu_exit
231
232	/* clear our bit in napping_threads */
233	lbz	r7, HSTATE_PTID(r13)
234	li	r0, 1
235	sld	r0, r0, r7
236	addi	r6, r5, VCORE_NAPPING_THREADS
2374:	lwarx	r7, 0, r6
238	andc	r7, r7, r0
239	stwcx.	r7, 0, r6
240	bne	4b
241
242	/* See if the wake reason means we need to exit */
243	cmpdi	r3, 0
244	bge	kvm_novcpu_exit
245
246	/* See if our timeslice has expired (HDEC is negative) */
247	mfspr	r0, SPRN_HDEC
248	EXTEND_HDEC(r0)
249	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
250	cmpdi	r0, 0
251	blt	kvm_novcpu_exit
252
253	/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
254	ld	r4, HSTATE_KVM_VCPU(r13)
255	cmpdi	r4, 0
256	beq	kvmppc_primary_no_guest
257
258#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
259	addi	r3, r4, VCPU_TB_RMENTRY
260	bl	kvmhv_start_timing
261#endif
262	b	kvmppc_got_guest
263
264kvm_novcpu_exit:
265#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
266	ld	r4, HSTATE_KVM_VCPU(r13)
267	cmpdi	r4, 0
268	beq	13f
269	addi	r3, r4, VCPU_TB_RMEXIT
270	bl	kvmhv_accumulate_time
271#endif
27213:	mr	r3, r12
273	stw	r12, STACK_SLOT_TRAP(r1)
274	bl	kvmhv_commence_exit
275	nop
276	b	kvmhv_switch_to_host
277
278/*
279 * We come in here when wakened from Linux offline idle code.
280 * Relocation is off
281 * r3 contains the SRR1 wakeup value, SRR1 is trashed.
282 */
283_GLOBAL(idle_kvm_start_guest)
284	ld	r4,PACAEMERGSP(r13)
285	mfcr	r5
286	mflr	r0
287	std	r1,0(r4)
288	std	r5,8(r4)
289	std	r0,16(r4)
290	subi	r1,r4,STACK_FRAME_OVERHEAD
291	SAVE_NVGPRS(r1)
292
293	/*
294	 * Could avoid this and pass it through in r3. For now,
295	 * code expects it to be in SRR1.
296	 */
297	mtspr	SPRN_SRR1,r3
298
299	li	r0,0
300	stb	r0,PACA_FTRACE_ENABLED(r13)
301
302	li	r0,KVM_HWTHREAD_IN_KVM
303	stb	r0,HSTATE_HWTHREAD_STATE(r13)
304
305	/* kvm cede / napping does not come through here */
306	lbz	r0,HSTATE_NAPPING(r13)
307	twnei	r0,0
308
309	b	1f
310
311kvm_unsplit_wakeup:
312	li	r0, 0
313	stb	r0, HSTATE_NAPPING(r13)
314
3151:
316
317	/*
318	 * We weren't napping due to cede, so this must be a secondary
319	 * thread being woken up to run a guest, or being woken up due
320	 * to a stray IPI.  (Or due to some machine check or hypervisor
321	 * maintenance interrupt while the core is in KVM.)
322	 */
323
324	/* Check the wake reason in SRR1 to see why we got here */
325	bl	kvmppc_check_wake_reason
326	/*
327	 * kvmppc_check_wake_reason could invoke a C routine, but we
328	 * have no volatile registers to restore when we return.
329	 */
330
331	cmpdi	r3, 0
332	bge	kvm_no_guest
333
334	/* get vcore pointer, NULL if we have nothing to run */
335	ld	r5,HSTATE_KVM_VCORE(r13)
336	cmpdi	r5,0
337	/* if we have no vcore to run, go back to sleep */
338	beq	kvm_no_guest
339
340kvm_secondary_got_guest:
341
342	/* Set HSTATE_DSCR(r13) to something sensible */
343	ld	r6, PACA_DSCR_DEFAULT(r13)
344	std	r6, HSTATE_DSCR(r13)
345
346	/* On thread 0 of a subcore, set HDEC to max */
347	lbz	r4, HSTATE_PTID(r13)
348	cmpwi	r4, 0
349	bne	63f
350	LOAD_REG_ADDR(r6, decrementer_max)
351	ld	r6, 0(r6)
352	mtspr	SPRN_HDEC, r6
353BEGIN_FTR_SECTION
354	/* and set per-LPAR registers, if doing dynamic micro-threading */
355	ld	r6, HSTATE_SPLIT_MODE(r13)
356	cmpdi	r6, 0
357	beq	63f
358	ld	r0, KVM_SPLIT_RPR(r6)
359	mtspr	SPRN_RPR, r0
360	ld	r0, KVM_SPLIT_PMMAR(r6)
361	mtspr	SPRN_PMMAR, r0
362	ld	r0, KVM_SPLIT_LDBAR(r6)
363	mtspr	SPRN_LDBAR, r0
364	isync
365END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
36663:
367	/* Order load of vcpu after load of vcore */
368	lwsync
369	ld	r4, HSTATE_KVM_VCPU(r13)
370	bl	kvmppc_hv_entry
371
372	/* Back from the guest, go back to nap */
373	/* Clear our vcpu and vcore pointers so we don't come back in early */
374	li	r0, 0
375	std	r0, HSTATE_KVM_VCPU(r13)
376	/*
377	 * Once we clear HSTATE_KVM_VCORE(r13), the code in
378	 * kvmppc_run_core() is going to assume that all our vcpu
379	 * state is visible in memory.  This lwsync makes sure
380	 * that that is true.
381	 */
382	lwsync
383	std	r0, HSTATE_KVM_VCORE(r13)
384
385	/*
386	 * All secondaries exiting guest will fall through this path.
387	 * Before proceeding, just check for HMI interrupt and
388	 * invoke opal hmi handler. By now we are sure that the
389	 * primary thread on this core/subcore has already made partition
390	 * switch/TB resync and we are good to call opal hmi handler.
391	 */
392	cmpwi	r12, BOOK3S_INTERRUPT_HMI
393	bne	kvm_no_guest
394
395	li	r3,0			/* NULL argument */
396	bl	hmi_exception_realmode
397/*
398 * At this point we have finished executing in the guest.
399 * We need to wait for hwthread_req to become zero, since
400 * we may not turn on the MMU while hwthread_req is non-zero.
401 * While waiting we also need to check if we get given a vcpu to run.
402 */
403kvm_no_guest:
404	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
405	cmpwi	r3, 0
406	bne	53f
407	HMT_MEDIUM
408	li	r0, KVM_HWTHREAD_IN_KERNEL
409	stb	r0, HSTATE_HWTHREAD_STATE(r13)
410	/* need to recheck hwthread_req after a barrier, to avoid race */
411	sync
412	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
413	cmpwi	r3, 0
414	bne	54f
415
416	/*
417	 * Jump to idle_return_gpr_loss, which returns to the
418	 * idle_kvm_start_guest caller.
419	 */
420	li	r3, LPCR_PECE0
421	mfspr	r4, SPRN_LPCR
422	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
423	mtspr	SPRN_LPCR, r4
424	/* set up r3 for return */
425	mfspr	r3,SPRN_SRR1
426	REST_NVGPRS(r1)
427	addi	r1, r1, STACK_FRAME_OVERHEAD
428	ld	r0, 16(r1)
429	ld	r5, 8(r1)
430	ld	r1, 0(r1)
431	mtlr	r0
432	mtcr	r5
433	blr
434
43553:
436BEGIN_FTR_SECTION
437	HMT_LOW
438	ld	r5, HSTATE_KVM_VCORE(r13)
439	cmpdi	r5, 0
440	bne	60f
441	ld	r3, HSTATE_SPLIT_MODE(r13)
442	cmpdi	r3, 0
443	beq	kvm_no_guest
444	lbz	r0, KVM_SPLIT_DO_NAP(r3)
445	cmpwi	r0, 0
446	beq	kvm_no_guest
447	HMT_MEDIUM
448	b	kvm_unsplit_nap
44960:	HMT_MEDIUM
450	b	kvm_secondary_got_guest
451FTR_SECTION_ELSE
452	HMT_LOW
453	ld	r5, HSTATE_KVM_VCORE(r13)
454	cmpdi	r5, 0
455	beq	kvm_no_guest
456	HMT_MEDIUM
457	b	kvm_secondary_got_guest
458ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
459
46054:	li	r0, KVM_HWTHREAD_IN_KVM
461	stb	r0, HSTATE_HWTHREAD_STATE(r13)
462	b	kvm_no_guest
463
464/*
465 * Here the primary thread is trying to return the core to
466 * whole-core mode, so we need to nap.
467 */
468kvm_unsplit_nap:
469	/*
470	 * When secondaries are napping in kvm_unsplit_nap() with
471	 * hwthread_req = 1, HMI goes ignored even though subcores are
472	 * already exited the guest. Hence HMI keeps waking up secondaries
473	 * from nap in a loop and secondaries always go back to nap since
474	 * no vcore is assigned to them. This makes impossible for primary
475	 * thread to get hold of secondary threads resulting into a soft
476	 * lockup in KVM path.
477	 *
478	 * Let us check if HMI is pending and handle it before we go to nap.
479	 */
480	cmpwi	r12, BOOK3S_INTERRUPT_HMI
481	bne	55f
482	li	r3, 0			/* NULL argument */
483	bl	hmi_exception_realmode
48455:
485	/*
486	 * Ensure that secondary doesn't nap when it has
487	 * its vcore pointer set.
488	 */
489	sync		/* matches smp_mb() before setting split_info.do_nap */
490	ld	r0, HSTATE_KVM_VCORE(r13)
491	cmpdi	r0, 0
492	bne	kvm_no_guest
493	/* clear any pending message */
494BEGIN_FTR_SECTION
495	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
496	PPC_MSGCLR(6)
497END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
498	/* Set kvm_split_mode.napped[tid] = 1 */
499	ld	r3, HSTATE_SPLIT_MODE(r13)
500	li	r0, 1
501	lhz	r4, PACAPACAINDEX(r13)
502	clrldi	r4, r4, 61	/* micro-threading => P8 => 8 threads/core */
503	addi	r4, r4, KVM_SPLIT_NAPPED
504	stbx	r0, r3, r4
505	/* Check the do_nap flag again after setting napped[] */
506	sync
507	lbz	r0, KVM_SPLIT_DO_NAP(r3)
508	cmpwi	r0, 0
509	beq	57f
510	li	r3, NAPPING_UNSPLIT
511	stb	r3, HSTATE_NAPPING(r13)
512	li	r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
513	mfspr	r5, SPRN_LPCR
514	rlwimi	r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
515	b	kvm_nap_sequence
516
51757:	li	r0, 0
518	stbx	r0, r3, r4
519	b	kvm_no_guest
520
521/******************************************************************************
522 *                                                                            *
523 *                               Entry code                                   *
524 *                                                                            *
525 *****************************************************************************/
526
527.global kvmppc_hv_entry
528kvmppc_hv_entry:
529
530	/* Required state:
531	 *
532	 * R4 = vcpu pointer (or NULL)
533	 * MSR = ~IR|DR
534	 * R13 = PACA
535	 * R1 = host R1
536	 * R2 = TOC
537	 * all other volatile GPRS = free
538	 * Does not preserve non-volatile GPRs or CR fields
539	 */
540	mflr	r0
541	std	r0, PPC_LR_STKOFF(r1)
542	stdu	r1, -SFS(r1)
543
544	/* Save R1 in the PACA */
545	std	r1, HSTATE_HOST_R1(r13)
546
547	li	r6, KVM_GUEST_MODE_HOST_HV
548	stb	r6, HSTATE_IN_GUEST(r13)
549
550#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
551	/* Store initial timestamp */
552	cmpdi	r4, 0
553	beq	1f
554	addi	r3, r4, VCPU_TB_RMENTRY
555	bl	kvmhv_start_timing
5561:
557#endif
558
559	ld	r5, HSTATE_KVM_VCORE(r13)
560	ld	r9, VCORE_KVM(r5)	/* pointer to struct kvm */
561
562	/*
563	 * POWER7/POWER8 host -> guest partition switch code.
564	 * We don't have to lock against concurrent tlbies,
565	 * but we do have to coordinate across hardware threads.
566	 */
567	/* Set bit in entry map iff exit map is zero. */
568	li	r7, 1
569	lbz	r6, HSTATE_PTID(r13)
570	sld	r7, r7, r6
571	addi	r8, r5, VCORE_ENTRY_EXIT
57221:	lwarx	r3, 0, r8
573	cmpwi	r3, 0x100		/* any threads starting to exit? */
574	bge	secondary_too_late	/* if so we're too late to the party */
575	or	r3, r3, r7
576	stwcx.	r3, 0, r8
577	bne	21b
578
579	/* Primary thread switches to guest partition. */
580	cmpwi	r6,0
581	bne	10f
582
583	lwz	r7,KVM_LPID(r9)
584BEGIN_FTR_SECTION
585	ld	r6,KVM_SDR1(r9)
586	li	r0,LPID_RSVD		/* switch to reserved LPID */
587	mtspr	SPRN_LPID,r0
588	ptesync
589	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
590END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
591	mtspr	SPRN_LPID,r7
592	isync
593
594	/* See if we need to flush the TLB. */
595	mr	r3, r9			/* kvm pointer */
596	lhz	r4, PACAPACAINDEX(r13)	/* physical cpu number */
597	li	r5, 0			/* nested vcpu pointer */
598	bl	kvmppc_check_need_tlb_flush
599	nop
600	ld	r5, HSTATE_KVM_VCORE(r13)
601
602	/* Add timebase offset onto timebase */
60322:	ld	r8,VCORE_TB_OFFSET(r5)
604	cmpdi	r8,0
605	beq	37f
606	std	r8, VCORE_TB_OFFSET_APPL(r5)
607	mftb	r6		/* current host timebase */
608	add	r8,r8,r6
609	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */
610	mftb	r7		/* check if lower 24 bits overflowed */
611	clrldi	r6,r6,40
612	clrldi	r7,r7,40
613	cmpld	r7,r6
614	bge	37f
615	addis	r8,r8,0x100	/* if so, increment upper 40 bits */
616	mtspr	SPRN_TBU40,r8
617
618	/* Load guest PCR value to select appropriate compat mode */
61937:	ld	r7, VCORE_PCR(r5)
620	LOAD_REG_IMMEDIATE(r6, PCR_MASK)
621	cmpld	r7, r6
622	beq	38f
623	or	r7, r7, r6
624	mtspr	SPRN_PCR, r7
62538:
626
627BEGIN_FTR_SECTION
628	/* DPDES and VTB are shared between threads */
629	ld	r8, VCORE_DPDES(r5)
630	ld	r7, VCORE_VTB(r5)
631	mtspr	SPRN_DPDES, r8
632	mtspr	SPRN_VTB, r7
633END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
634
635	/* Mark the subcore state as inside guest */
636	bl	kvmppc_subcore_enter_guest
637	nop
638	ld	r5, HSTATE_KVM_VCORE(r13)
639	ld	r4, HSTATE_KVM_VCPU(r13)
640	li	r0,1
641	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
642
643	/* Do we have a guest vcpu to run? */
64410:	cmpdi	r4, 0
645	beq	kvmppc_primary_no_guest
646kvmppc_got_guest:
647	/* Increment yield count if they have a VPA */
648	ld	r3, VCPU_VPA(r4)
649	cmpdi	r3, 0
650	beq	25f
651	li	r6, LPPACA_YIELDCOUNT
652	LWZX_BE	r5, r3, r6
653	addi	r5, r5, 1
654	STWX_BE	r5, r3, r6
655	li	r6, 1
656	stb	r6, VCPU_VPA_DIRTY(r4)
65725:
658
659	/* Save purr/spurr */
660	mfspr	r5,SPRN_PURR
661	mfspr	r6,SPRN_SPURR
662	std	r5,HSTATE_PURR(r13)
663	std	r6,HSTATE_SPURR(r13)
664	ld	r7,VCPU_PURR(r4)
665	ld	r8,VCPU_SPURR(r4)
666	mtspr	SPRN_PURR,r7
667	mtspr	SPRN_SPURR,r8
668
669	/* Save host values of some registers */
670BEGIN_FTR_SECTION
671	mfspr	r5, SPRN_TIDR
672	mfspr	r6, SPRN_PSSCR
673	mfspr	r7, SPRN_PID
674	std	r5, STACK_SLOT_TID(r1)
675	std	r6, STACK_SLOT_PSSCR(r1)
676	std	r7, STACK_SLOT_PID(r1)
677	mfspr	r5, SPRN_HFSCR
678	std	r5, STACK_SLOT_HFSCR(r1)
679END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
680BEGIN_FTR_SECTION
681	mfspr	r5, SPRN_CIABR
682	mfspr	r6, SPRN_DAWR0
683	mfspr	r7, SPRN_DAWRX0
684	mfspr	r8, SPRN_IAMR
685	std	r5, STACK_SLOT_CIABR(r1)
686	std	r6, STACK_SLOT_DAWR0(r1)
687	std	r7, STACK_SLOT_DAWRX0(r1)
688	std	r8, STACK_SLOT_IAMR(r1)
689END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
690BEGIN_FTR_SECTION
691	mfspr	r6, SPRN_DAWR1
692	mfspr	r7, SPRN_DAWRX1
693	std	r6, STACK_SLOT_DAWR1(r1)
694	std	r7, STACK_SLOT_DAWRX1(r1)
695END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
696
697	mfspr	r5, SPRN_AMR
698	std	r5, STACK_SLOT_AMR(r1)
699	mfspr	r6, SPRN_UAMOR
700	std	r6, STACK_SLOT_UAMOR(r1)
701
702BEGIN_FTR_SECTION
703	/* Set partition DABR */
704	/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
705	lwz	r5,VCPU_DABRX(r4)
706	ld	r6,VCPU_DABR(r4)
707	mtspr	SPRN_DABRX,r5
708	mtspr	SPRN_DABR,r6
709	isync
710END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
711
712#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
713/*
714 * Branch around the call if both CPU_FTR_TM and
715 * CPU_FTR_P9_TM_HV_ASSIST are off.
716 */
717BEGIN_FTR_SECTION
718	b	91f
719END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
720	/*
721	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
722	 */
723	mr      r3, r4
724	ld      r4, VCPU_MSR(r3)
725	li	r5, 0			/* don't preserve non-vol regs */
726	bl	kvmppc_restore_tm_hv
727	nop
728	ld	r4, HSTATE_KVM_VCPU(r13)
72991:
730#endif
731
732	/* Load guest PMU registers; r4 = vcpu pointer here */
733	mr	r3, r4
734	bl	kvmhv_load_guest_pmu
735
736	/* Load up FP, VMX and VSX registers */
737	ld	r4, HSTATE_KVM_VCPU(r13)
738	bl	kvmppc_load_fp
739
740	ld	r14, VCPU_GPR(R14)(r4)
741	ld	r15, VCPU_GPR(R15)(r4)
742	ld	r16, VCPU_GPR(R16)(r4)
743	ld	r17, VCPU_GPR(R17)(r4)
744	ld	r18, VCPU_GPR(R18)(r4)
745	ld	r19, VCPU_GPR(R19)(r4)
746	ld	r20, VCPU_GPR(R20)(r4)
747	ld	r21, VCPU_GPR(R21)(r4)
748	ld	r22, VCPU_GPR(R22)(r4)
749	ld	r23, VCPU_GPR(R23)(r4)
750	ld	r24, VCPU_GPR(R24)(r4)
751	ld	r25, VCPU_GPR(R25)(r4)
752	ld	r26, VCPU_GPR(R26)(r4)
753	ld	r27, VCPU_GPR(R27)(r4)
754	ld	r28, VCPU_GPR(R28)(r4)
755	ld	r29, VCPU_GPR(R29)(r4)
756	ld	r30, VCPU_GPR(R30)(r4)
757	ld	r31, VCPU_GPR(R31)(r4)
758
759	/* Switch DSCR to guest value */
760	ld	r5, VCPU_DSCR(r4)
761	mtspr	SPRN_DSCR, r5
762
763BEGIN_FTR_SECTION
764	/* Skip next section on POWER7 */
765	b	8f
766END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
767	/* Load up POWER8-specific registers */
768	ld	r5, VCPU_IAMR(r4)
769	lwz	r6, VCPU_PSPB(r4)
770	ld	r7, VCPU_FSCR(r4)
771	mtspr	SPRN_IAMR, r5
772	mtspr	SPRN_PSPB, r6
773	mtspr	SPRN_FSCR, r7
774	/*
775	 * Handle broken DAWR case by not writing it. This means we
776	 * can still store the DAWR register for migration.
777	 */
778	LOAD_REG_ADDR(r5, dawr_force_enable)
779	lbz	r5, 0(r5)
780	cmpdi	r5, 0
781	beq	1f
782	ld	r5, VCPU_DAWR0(r4)
783	ld	r6, VCPU_DAWRX0(r4)
784	mtspr	SPRN_DAWR0, r5
785	mtspr	SPRN_DAWRX0, r6
786BEGIN_FTR_SECTION
787	ld	r5, VCPU_DAWR1(r4)
788	ld	r6, VCPU_DAWRX1(r4)
789	mtspr	SPRN_DAWR1, r5
790	mtspr	SPRN_DAWRX1, r6
791END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
7921:
793	ld	r7, VCPU_CIABR(r4)
794	ld	r8, VCPU_TAR(r4)
795	mtspr	SPRN_CIABR, r7
796	mtspr	SPRN_TAR, r8
797	ld	r5, VCPU_IC(r4)
798	ld	r8, VCPU_EBBHR(r4)
799	mtspr	SPRN_IC, r5
800	mtspr	SPRN_EBBHR, r8
801	ld	r5, VCPU_EBBRR(r4)
802	ld	r6, VCPU_BESCR(r4)
803	lwz	r7, VCPU_GUEST_PID(r4)
804	ld	r8, VCPU_WORT(r4)
805	mtspr	SPRN_EBBRR, r5
806	mtspr	SPRN_BESCR, r6
807	mtspr	SPRN_PID, r7
808	mtspr	SPRN_WORT, r8
809BEGIN_FTR_SECTION
810	/* POWER8-only registers */
811	ld	r5, VCPU_TCSCR(r4)
812	ld	r6, VCPU_ACOP(r4)
813	ld	r7, VCPU_CSIGR(r4)
814	ld	r8, VCPU_TACR(r4)
815	mtspr	SPRN_TCSCR, r5
816	mtspr	SPRN_ACOP, r6
817	mtspr	SPRN_CSIGR, r7
818	mtspr	SPRN_TACR, r8
819	nop
820FTR_SECTION_ELSE
821	/* POWER9-only registers */
822	ld	r5, VCPU_TID(r4)
823	ld	r6, VCPU_PSSCR(r4)
824	lbz	r8, HSTATE_FAKE_SUSPEND(r13)
825	oris	r6, r6, PSSCR_EC@h	/* This makes stop trap to HV */
826	rldimi	r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
827	ld	r7, VCPU_HFSCR(r4)
828	mtspr	SPRN_TIDR, r5
829	mtspr	SPRN_PSSCR, r6
830	mtspr	SPRN_HFSCR, r7
831ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8328:
833
834	ld	r5, VCPU_SPRG0(r4)
835	ld	r6, VCPU_SPRG1(r4)
836	ld	r7, VCPU_SPRG2(r4)
837	ld	r8, VCPU_SPRG3(r4)
838	mtspr	SPRN_SPRG0, r5
839	mtspr	SPRN_SPRG1, r6
840	mtspr	SPRN_SPRG2, r7
841	mtspr	SPRN_SPRG3, r8
842
843	/* Load up DAR and DSISR */
844	ld	r5, VCPU_DAR(r4)
845	lwz	r6, VCPU_DSISR(r4)
846	mtspr	SPRN_DAR, r5
847	mtspr	SPRN_DSISR, r6
848
849	/* Restore AMR and UAMOR, set AMOR to all 1s */
850	ld	r5,VCPU_AMR(r4)
851	ld	r6,VCPU_UAMOR(r4)
852	li	r7,-1
853	mtspr	SPRN_AMR,r5
854	mtspr	SPRN_UAMOR,r6
855	mtspr	SPRN_AMOR,r7
856
857	/* Restore state of CTRL run bit; assume 1 on entry */
858	lwz	r5,VCPU_CTRL(r4)
859	andi.	r5,r5,1
860	bne	4f
861	mfspr	r6,SPRN_CTRLF
862	clrrdi	r6,r6,1
863	mtspr	SPRN_CTRLT,r6
8644:
865	/* Secondary threads wait for primary to have done partition switch */
866	ld	r5, HSTATE_KVM_VCORE(r13)
867	lbz	r6, HSTATE_PTID(r13)
868	cmpwi	r6, 0
869	beq	21f
870	lbz	r0, VCORE_IN_GUEST(r5)
871	cmpwi	r0, 0
872	bne	21f
873	HMT_LOW
87420:	lwz	r3, VCORE_ENTRY_EXIT(r5)
875	cmpwi	r3, 0x100
876	bge	no_switch_exit
877	lbz	r0, VCORE_IN_GUEST(r5)
878	cmpwi	r0, 0
879	beq	20b
880	HMT_MEDIUM
88121:
882	/* Set LPCR. */
883	ld	r8,VCORE_LPCR(r5)
884	mtspr	SPRN_LPCR,r8
885	isync
886
887	/*
888	 * Set the decrementer to the guest decrementer.
889	 */
890	ld	r8,VCPU_DEC_EXPIRES(r4)
891	/* r8 is a host timebase value here, convert to guest TB */
892	ld	r5,HSTATE_KVM_VCORE(r13)
893	ld	r6,VCORE_TB_OFFSET_APPL(r5)
894	add	r8,r8,r6
895	mftb	r7
896	subf	r3,r7,r8
897	mtspr	SPRN_DEC,r3
898
899	/* Check if HDEC expires soon */
900	mfspr	r3, SPRN_HDEC
901	EXTEND_HDEC(r3)
902	cmpdi	r3, 512		/* 1 microsecond */
903	blt	hdec_soon
904
905	ld	r6, VCPU_KVM(r4)
906	lbz	r0, KVM_RADIX(r6)
907	cmpwi	r0, 0
908	bne	9f
909
910	/* For hash guest, clear out and reload the SLB */
911BEGIN_MMU_FTR_SECTION
912	/* Radix host won't have populated the SLB, so no need to clear */
913	li	r6, 0
914	slbmte	r6, r6
915	PPC_SLBIA(6)
916	ptesync
917END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
918
919	/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
920	lwz	r5,VCPU_SLB_MAX(r4)
921	cmpwi	r5,0
922	beq	9f
923	mtctr	r5
924	addi	r6,r4,VCPU_SLB
9251:	ld	r8,VCPU_SLB_E(r6)
926	ld	r9,VCPU_SLB_V(r6)
927	slbmte	r9,r8
928	addi	r6,r6,VCPU_SLB_SIZE
929	bdnz	1b
9309:
931
932#ifdef CONFIG_KVM_XICS
933	/* We are entering the guest on that thread, push VCPU to XIVE */
934	ld	r11, VCPU_XIVE_SAVED_STATE(r4)
935	li	r9, TM_QW1_OS
936	lwz	r8, VCPU_XIVE_CAM_WORD(r4)
937	cmpwi	r8, 0
938	beq	no_xive
939	li	r7, TM_QW1_OS + TM_WORD2
940	mfmsr	r0
941	andi.	r0, r0, MSR_DR		/* in real mode? */
942	beq	2f
943	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
944	cmpldi	cr1, r10, 0
945	beq     cr1, no_xive
946	eieio
947	stdx	r11,r9,r10
948	stwx	r8,r7,r10
949	b	3f
9502:	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
951	cmpldi	cr1, r10, 0
952	beq	cr1, no_xive
953	eieio
954	stdcix	r11,r9,r10
955	stwcix	r8,r7,r10
9563:	li	r9, 1
957	stb	r9, VCPU_XIVE_PUSHED(r4)
958	eieio
959
960	/*
961	 * We clear the irq_pending flag. There is a small chance of a
962	 * race vs. the escalation interrupt happening on another
963	 * processor setting it again, but the only consequence is to
964	 * cause a spurrious wakeup on the next H_CEDE which is not an
965	 * issue.
966	 */
967	li	r0,0
968	stb	r0, VCPU_IRQ_PENDING(r4)
969
970	/*
971	 * In single escalation mode, if the escalation interrupt is
972	 * on, we mask it.
973	 */
974	lbz	r0, VCPU_XIVE_ESC_ON(r4)
975	cmpwi	cr1, r0,0
976	beq	cr1, 1f
977	li	r9, XIVE_ESB_SET_PQ_01
978	beq	4f			/* in real mode? */
979	ld	r10, VCPU_XIVE_ESC_VADDR(r4)
980	ldx	r0, r10, r9
981	b	5f
9824:	ld	r10, VCPU_XIVE_ESC_RADDR(r4)
983	ldcix	r0, r10, r9
9845:	sync
985
986	/* We have a possible subtle race here: The escalation interrupt might
987	 * have fired and be on its way to the host queue while we mask it,
988	 * and if we unmask it early enough (re-cede right away), there is
989	 * a theorical possibility that it fires again, thus landing in the
990	 * target queue more than once which is a big no-no.
991	 *
992	 * Fortunately, solving this is rather easy. If the above load setting
993	 * PQ to 01 returns a previous value where P is set, then we know the
994	 * escalation interrupt is somewhere on its way to the host. In that
995	 * case we simply don't clear the xive_esc_on flag below. It will be
996	 * eventually cleared by the handler for the escalation interrupt.
997	 *
998	 * Then, when doing a cede, we check that flag again before re-enabling
999	 * the escalation interrupt, and if set, we abort the cede.
1000	 */
1001	andi.	r0, r0, XIVE_ESB_VAL_P
1002	bne-	1f
1003
1004	/* Now P is 0, we can clear the flag */
1005	li	r0, 0
1006	stb	r0, VCPU_XIVE_ESC_ON(r4)
10071:
1008no_xive:
1009#endif /* CONFIG_KVM_XICS */
1010
1011	li	r0, 0
1012	stw	r0, STACK_SLOT_SHORT_PATH(r1)
1013
1014deliver_guest_interrupt:	/* r4 = vcpu, r13 = paca */
1015	/* Check if we can deliver an external or decrementer interrupt now */
1016	ld	r0, VCPU_PENDING_EXC(r4)
1017BEGIN_FTR_SECTION
1018	/* On POWER9, also check for emulated doorbell interrupt */
1019	lbz	r3, VCPU_DBELL_REQ(r4)
1020	or	r0, r0, r3
1021END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1022	cmpdi	r0, 0
1023	beq	71f
1024	mr	r3, r4
1025	bl	kvmppc_guest_entry_inject_int
1026	ld	r4, HSTATE_KVM_VCPU(r13)
102771:
1028	ld	r6, VCPU_SRR0(r4)
1029	ld	r7, VCPU_SRR1(r4)
1030	mtspr	SPRN_SRR0, r6
1031	mtspr	SPRN_SRR1, r7
1032
1033fast_guest_entry_c:
1034	ld	r10, VCPU_PC(r4)
1035	ld	r11, VCPU_MSR(r4)
1036	/* r11 = vcpu->arch.msr & ~MSR_HV */
1037	rldicl	r11, r11, 63 - MSR_HV_LG, 1
1038	rotldi	r11, r11, 1 + MSR_HV_LG
1039	ori	r11, r11, MSR_ME
1040
1041	ld	r6, VCPU_CTR(r4)
1042	ld	r7, VCPU_XER(r4)
1043	mtctr	r6
1044	mtxer	r7
1045
1046/*
1047 * Required state:
1048 * R4 = vcpu
1049 * R10: value for HSRR0
1050 * R11: value for HSRR1
1051 * R13 = PACA
1052 */
1053fast_guest_return:
1054	li	r0,0
1055	stb	r0,VCPU_CEDED(r4)	/* cancel cede */
1056	mtspr	SPRN_HSRR0,r10
1057	mtspr	SPRN_HSRR1,r11
1058
1059	/* Activate guest mode, so faults get handled by KVM */
1060	li	r9, KVM_GUEST_MODE_GUEST_HV
1061	stb	r9, HSTATE_IN_GUEST(r13)
1062
1063#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1064	/* Accumulate timing */
1065	addi	r3, r4, VCPU_TB_GUEST
1066	bl	kvmhv_accumulate_time
1067#endif
1068
1069	/* Enter guest */
1070
1071BEGIN_FTR_SECTION
1072	ld	r5, VCPU_CFAR(r4)
1073	mtspr	SPRN_CFAR, r5
1074END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1075BEGIN_FTR_SECTION
1076	ld	r0, VCPU_PPR(r4)
1077END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1078
1079	ld	r5, VCPU_LR(r4)
1080	mtlr	r5
1081
1082	ld	r1, VCPU_GPR(R1)(r4)
1083	ld	r5, VCPU_GPR(R5)(r4)
1084	ld	r8, VCPU_GPR(R8)(r4)
1085	ld	r9, VCPU_GPR(R9)(r4)
1086	ld	r10, VCPU_GPR(R10)(r4)
1087	ld	r11, VCPU_GPR(R11)(r4)
1088	ld	r12, VCPU_GPR(R12)(r4)
1089	ld	r13, VCPU_GPR(R13)(r4)
1090
1091BEGIN_FTR_SECTION
1092	mtspr	SPRN_PPR, r0
1093END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1094
1095/* Move canary into DSISR to check for later */
1096BEGIN_FTR_SECTION
1097	li	r0, 0x7fff
1098	mtspr	SPRN_HDSISR, r0
1099END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1100
1101	ld	r6, VCPU_KVM(r4)
1102	lbz	r7, KVM_SECURE_GUEST(r6)
1103	cmpdi	r7, 0
1104	ld	r6, VCPU_GPR(R6)(r4)
1105	ld	r7, VCPU_GPR(R7)(r4)
1106	bne	ret_to_ultra
1107
1108	ld	r0, VCPU_CR(r4)
1109	mtcr	r0
1110
1111	ld	r0, VCPU_GPR(R0)(r4)
1112	ld	r2, VCPU_GPR(R2)(r4)
1113	ld	r3, VCPU_GPR(R3)(r4)
1114	ld	r4, VCPU_GPR(R4)(r4)
1115	HRFI_TO_GUEST
1116	b	.
1117/*
1118 * Use UV_RETURN ultracall to return control back to the Ultravisor after
1119 * processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
1120 * to the Hypervisor.
1121 *
1122 * All registers have already been loaded, except:
1123 *   R0 = hcall result
1124 *   R2 = SRR1, so UV can detect a synthesized interrupt (if any)
1125 *   R3 = UV_RETURN
1126 */
1127ret_to_ultra:
1128	ld	r0, VCPU_CR(r4)
1129	mtcr	r0
1130
1131	ld	r0, VCPU_GPR(R3)(r4)
1132	mfspr	r2, SPRN_SRR1
1133	li	r3, 0
1134	ori	r3, r3, UV_RETURN
1135	ld	r4, VCPU_GPR(R4)(r4)
1136	sc	2
1137
1138/*
1139 * Enter the guest on a P9 or later system where we have exactly
1140 * one vcpu per vcore and we don't need to go to real mode
1141 * (which implies that host and guest are both using radix MMU mode).
1142 * r3 = vcpu pointer
1143 * Most SPRs and all the VSRs have been loaded already.
1144 */
1145_GLOBAL(__kvmhv_vcpu_entry_p9)
1146EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
1147	mflr	r0
1148	std	r0, PPC_LR_STKOFF(r1)
1149	stdu	r1, -SFS(r1)
1150
1151	li	r0, 1
1152	stw	r0, STACK_SLOT_SHORT_PATH(r1)
1153
1154	std	r3, HSTATE_KVM_VCPU(r13)
1155	mfcr	r4
1156	stw	r4, SFS+8(r1)
1157
1158	std	r1, HSTATE_HOST_R1(r13)
1159
1160	reg = 14
1161	.rept	18
1162	std	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
1163	reg = reg + 1
1164	.endr
1165
1166	reg = 14
1167	.rept	18
1168	ld	reg, __VCPU_GPR(reg)(r3)
1169	reg = reg + 1
1170	.endr
1171
1172	mfmsr	r10
1173	std	r10, HSTATE_HOST_MSR(r13)
1174
1175	mr	r4, r3
1176	b	fast_guest_entry_c
1177guest_exit_short_path:
1178	/*
1179	 * Malicious or buggy radix guests may have inserted SLB entries
1180	 * (only 0..3 because radix always runs with UPRT=1), so these must
1181	 * be cleared here to avoid side-channels. slbmte is used rather
1182	 * than slbia, as it won't clear cached translations.
1183	 */
1184	li	r0,0
1185	slbmte	r0,r0
1186	li	r4,1
1187	slbmte	r0,r4
1188	li	r4,2
1189	slbmte	r0,r4
1190	li	r4,3
1191	slbmte	r0,r4
1192
1193	li	r0, KVM_GUEST_MODE_NONE
1194	stb	r0, HSTATE_IN_GUEST(r13)
1195
1196	reg = 14
1197	.rept	18
1198	std	reg, __VCPU_GPR(reg)(r9)
1199	reg = reg + 1
1200	.endr
1201
1202	reg = 14
1203	.rept	18
1204	ld	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
1205	reg = reg + 1
1206	.endr
1207
1208	lwz	r4, SFS+8(r1)
1209	mtcr	r4
1210
1211	mr	r3, r12		/* trap number */
1212
1213	addi	r1, r1, SFS
1214	ld	r0, PPC_LR_STKOFF(r1)
1215	mtlr	r0
1216
1217	/* If we are in real mode, do a rfid to get back to the caller */
1218	mfmsr	r4
1219	andi.	r5, r4, MSR_IR
1220	bnelr
1221	rldicl	r5, r4, 64 - MSR_TS_S_LG, 62	/* extract TS field */
1222	mtspr	SPRN_SRR0, r0
1223	ld	r10, HSTATE_HOST_MSR(r13)
1224	rldimi	r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
1225	mtspr	SPRN_SRR1, r10
1226	RFI_TO_KERNEL
1227	b	.
1228
1229secondary_too_late:
1230	li	r12, 0
1231	stw	r12, STACK_SLOT_TRAP(r1)
1232	cmpdi	r4, 0
1233	beq	11f
1234	stw	r12, VCPU_TRAP(r4)
1235#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1236	addi	r3, r4, VCPU_TB_RMEXIT
1237	bl	kvmhv_accumulate_time
1238#endif
123911:	b	kvmhv_switch_to_host
1240
1241no_switch_exit:
1242	HMT_MEDIUM
1243	li	r12, 0
1244	b	12f
1245hdec_soon:
1246	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
124712:	stw	r12, VCPU_TRAP(r4)
1248	mr	r9, r4
1249#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1250	addi	r3, r4, VCPU_TB_RMEXIT
1251	bl	kvmhv_accumulate_time
1252#endif
1253	b	guest_bypass
1254
1255/******************************************************************************
1256 *                                                                            *
1257 *                               Exit code                                    *
1258 *                                                                            *
1259 *****************************************************************************/
1260
1261/*
1262 * We come here from the first-level interrupt handlers.
1263 */
1264	.globl	kvmppc_interrupt_hv
1265kvmppc_interrupt_hv:
1266	/*
1267	 * Register contents:
1268	 * R12		= (guest CR << 32) | interrupt vector
1269	 * R13		= PACA
1270	 * guest R12 saved in shadow VCPU SCRATCH0
1271	 * guest R13 saved in SPRN_SCRATCH0
1272	 */
1273	std	r9, HSTATE_SCRATCH2(r13)
1274	lbz	r9, HSTATE_IN_GUEST(r13)
1275	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
1276	beq	kvmppc_bad_host_intr
1277#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
1278	cmpwi	r9, KVM_GUEST_MODE_GUEST
1279	ld	r9, HSTATE_SCRATCH2(r13)
1280	beq	kvmppc_interrupt_pr
1281#endif
1282	/* We're now back in the host but in guest MMU context */
1283	li	r9, KVM_GUEST_MODE_HOST_HV
1284	stb	r9, HSTATE_IN_GUEST(r13)
1285
1286	ld	r9, HSTATE_KVM_VCPU(r13)
1287
1288	/* Save registers */
1289
1290	std	r0, VCPU_GPR(R0)(r9)
1291	std	r1, VCPU_GPR(R1)(r9)
1292	std	r2, VCPU_GPR(R2)(r9)
1293	std	r3, VCPU_GPR(R3)(r9)
1294	std	r4, VCPU_GPR(R4)(r9)
1295	std	r5, VCPU_GPR(R5)(r9)
1296	std	r6, VCPU_GPR(R6)(r9)
1297	std	r7, VCPU_GPR(R7)(r9)
1298	std	r8, VCPU_GPR(R8)(r9)
1299	ld	r0, HSTATE_SCRATCH2(r13)
1300	std	r0, VCPU_GPR(R9)(r9)
1301	std	r10, VCPU_GPR(R10)(r9)
1302	std	r11, VCPU_GPR(R11)(r9)
1303	ld	r3, HSTATE_SCRATCH0(r13)
1304	std	r3, VCPU_GPR(R12)(r9)
1305	/* CR is in the high half of r12 */
1306	srdi	r4, r12, 32
1307	std	r4, VCPU_CR(r9)
1308BEGIN_FTR_SECTION
1309	ld	r3, HSTATE_CFAR(r13)
1310	std	r3, VCPU_CFAR(r9)
1311END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1312BEGIN_FTR_SECTION
1313	ld	r4, HSTATE_PPR(r13)
1314	std	r4, VCPU_PPR(r9)
1315END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1316
1317	/* Restore R1/R2 so we can handle faults */
1318	ld	r1, HSTATE_HOST_R1(r13)
1319	ld	r2, PACATOC(r13)
1320
1321	mfspr	r10, SPRN_SRR0
1322	mfspr	r11, SPRN_SRR1
1323	std	r10, VCPU_SRR0(r9)
1324	std	r11, VCPU_SRR1(r9)
1325	/* trap is in the low half of r12, clear CR from the high half */
1326	clrldi	r12, r12, 32
1327	andi.	r0, r12, 2		/* need to read HSRR0/1? */
1328	beq	1f
1329	mfspr	r10, SPRN_HSRR0
1330	mfspr	r11, SPRN_HSRR1
1331	clrrdi	r12, r12, 2
13321:	std	r10, VCPU_PC(r9)
1333	std	r11, VCPU_MSR(r9)
1334
1335	GET_SCRATCH0(r3)
1336	mflr	r4
1337	std	r3, VCPU_GPR(R13)(r9)
1338	std	r4, VCPU_LR(r9)
1339
1340	stw	r12,VCPU_TRAP(r9)
1341
1342	/*
1343	 * Now that we have saved away SRR0/1 and HSRR0/1,
1344	 * interrupts are recoverable in principle, so set MSR_RI.
1345	 * This becomes important for relocation-on interrupts from
1346	 * the guest, which we can get in radix mode on POWER9.
1347	 */
1348	li	r0, MSR_RI
1349	mtmsrd	r0, 1
1350
1351#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1352	addi	r3, r9, VCPU_TB_RMINTR
1353	mr	r4, r9
1354	bl	kvmhv_accumulate_time
1355	ld	r5, VCPU_GPR(R5)(r9)
1356	ld	r6, VCPU_GPR(R6)(r9)
1357	ld	r7, VCPU_GPR(R7)(r9)
1358	ld	r8, VCPU_GPR(R8)(r9)
1359#endif
1360
1361	/* Save HEIR (HV emulation assist reg) in emul_inst
1362	   if this is an HEI (HV emulation interrupt, e40) */
1363	li	r3,KVM_INST_FETCH_FAILED
1364	stw	r3,VCPU_LAST_INST(r9)
1365	cmpwi	r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
1366	bne	11f
1367	mfspr	r3,SPRN_HEIR
136811:	stw	r3,VCPU_HEIR(r9)
1369
1370	/* these are volatile across C function calls */
1371	mfctr	r3
1372	mfxer	r4
1373	std	r3, VCPU_CTR(r9)
1374	std	r4, VCPU_XER(r9)
1375
1376	/* Save more register state  */
1377	mfdar	r3
1378	mfdsisr	r4
1379	std	r3, VCPU_DAR(r9)
1380	stw	r4, VCPU_DSISR(r9)
1381
1382	/* If this is a page table miss then see if it's theirs or ours */
1383	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
1384	beq	kvmppc_hdsi
1385	std	r3, VCPU_FAULT_DAR(r9)
1386	stw	r4, VCPU_FAULT_DSISR(r9)
1387	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
1388	beq	kvmppc_hisi
1389
1390#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1391	/* For softpatch interrupt, go off and do TM instruction emulation */
1392	cmpwi	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
1393	beq	kvmppc_tm_emul
1394#endif
1395
1396	/* See if this is a leftover HDEC interrupt */
1397	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
1398	bne	2f
1399	mfspr	r3,SPRN_HDEC
1400	EXTEND_HDEC(r3)
1401	cmpdi	r3,0
1402	mr	r4,r9
1403	bge	fast_guest_return
14042:
1405	/* See if this is an hcall we can handle in real mode */
1406	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
1407	beq	hcall_try_real_mode
1408
1409	/* Hypervisor doorbell - exit only if host IPI flag set */
1410	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
1411	bne	3f
1412BEGIN_FTR_SECTION
1413	PPC_MSGSYNC
1414	lwsync
1415	/* always exit if we're running a nested guest */
1416	ld	r0, VCPU_NESTED(r9)
1417	cmpdi	r0, 0
1418	bne	guest_exit_cont
1419END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1420	lbz	r0, HSTATE_HOST_IPI(r13)
1421	cmpwi	r0, 0
1422	beq	maybe_reenter_guest
1423	b	guest_exit_cont
14243:
1425	/* If it's a hypervisor facility unavailable interrupt, save HFSCR */
1426	cmpwi	r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
1427	bne	14f
1428	mfspr	r3, SPRN_HFSCR
1429	std	r3, VCPU_HFSCR(r9)
1430	b	guest_exit_cont
143114:
1432	/* External interrupt ? */
1433	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
1434	beq	kvmppc_guest_external
1435	/* See if it is a machine check */
1436	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
1437	beq	machine_check_realmode
1438	/* Or a hypervisor maintenance interrupt */
1439	cmpwi	r12, BOOK3S_INTERRUPT_HMI
1440	beq	hmi_realmode
1441
1442guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
1443
1444#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1445	addi	r3, r9, VCPU_TB_RMEXIT
1446	mr	r4, r9
1447	bl	kvmhv_accumulate_time
1448#endif
1449#ifdef CONFIG_KVM_XICS
1450	/* We are exiting, pull the VP from the XIVE */
1451	lbz	r0, VCPU_XIVE_PUSHED(r9)
1452	cmpwi	cr0, r0, 0
1453	beq	1f
1454	li	r7, TM_SPC_PULL_OS_CTX
1455	li	r6, TM_QW1_OS
1456	mfmsr	r0
1457	andi.	r0, r0, MSR_DR		/* in real mode? */
1458	beq	2f
1459	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
1460	cmpldi	cr0, r10, 0
1461	beq	1f
1462	/* First load to pull the context, we ignore the value */
1463	eieio
1464	lwzx	r11, r7, r10
1465	/* Second load to recover the context state (Words 0 and 1) */
1466	ldx	r11, r6, r10
1467	b	3f
14682:	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
1469	cmpldi	cr0, r10, 0
1470	beq	1f
1471	/* First load to pull the context, we ignore the value */
1472	eieio
1473	lwzcix	r11, r7, r10
1474	/* Second load to recover the context state (Words 0 and 1) */
1475	ldcix	r11, r6, r10
14763:	std	r11, VCPU_XIVE_SAVED_STATE(r9)
1477	/* Fixup some of the state for the next load */
1478	li	r10, 0
1479	li	r0, 0xff
1480	stb	r10, VCPU_XIVE_PUSHED(r9)
1481	stb	r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
1482	stb	r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
1483	eieio
14841:
1485#endif /* CONFIG_KVM_XICS */
1486
1487	/*
1488	 * Possibly flush the link stack here, before we do a blr in
1489	 * guest_exit_short_path.
1490	 */
14911:	nop
1492	patch_site 1b patch__call_kvm_flush_link_stack
1493
1494	/* If we came in through the P9 short path, go back out to C now */
1495	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
1496	cmpwi	r0, 0
1497	bne	guest_exit_short_path
1498
1499	/* For hash guest, read the guest SLB and save it away */
1500	ld	r5, VCPU_KVM(r9)
1501	lbz	r0, KVM_RADIX(r5)
1502	li	r5, 0
1503	cmpwi	r0, 0
1504	bne	0f			/* for radix, save 0 entries */
1505	lwz	r0,VCPU_SLB_NR(r9)	/* number of entries in SLB */
1506	mtctr	r0
1507	li	r6,0
1508	addi	r7,r9,VCPU_SLB
15091:	slbmfee	r8,r6
1510	andis.	r0,r8,SLB_ESID_V@h
1511	beq	2f
1512	add	r8,r8,r6		/* put index in */
1513	slbmfev	r3,r6
1514	std	r8,VCPU_SLB_E(r7)
1515	std	r3,VCPU_SLB_V(r7)
1516	addi	r7,r7,VCPU_SLB_SIZE
1517	addi	r5,r5,1
15182:	addi	r6,r6,1
1519	bdnz	1b
1520	/* Finally clear out the SLB */
1521	li	r0,0
1522	slbmte	r0,r0
1523	PPC_SLBIA(6)
1524	ptesync
1525	stw	r5,VCPU_SLB_MAX(r9)
1526
1527	/* load host SLB entries */
1528BEGIN_MMU_FTR_SECTION
1529	b	guest_bypass
1530END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
1531	ld	r8,PACA_SLBSHADOWPTR(r13)
1532
1533	.rept	SLB_NUM_BOLTED
1534	li	r3, SLBSHADOW_SAVEAREA
1535	LDX_BE	r5, r8, r3
1536	addi	r3, r3, 8
1537	LDX_BE	r6, r8, r3
1538	andis.	r7,r5,SLB_ESID_V@h
1539	beq	1f
1540	slbmte	r6,r5
15411:	addi	r8,r8,16
1542	.endr
1543	b	guest_bypass
1544
15450:	/*
1546	 * Sanitise radix guest SLB, see guest_exit_short_path comment.
1547	 * We clear vcpu->arch.slb_max to match earlier behaviour.
1548	 */
1549	li	r0,0
1550	stw	r0,VCPU_SLB_MAX(r9)
1551	slbmte	r0,r0
1552	li	r4,1
1553	slbmte	r0,r4
1554	li	r4,2
1555	slbmte	r0,r4
1556	li	r4,3
1557	slbmte	r0,r4
1558
1559guest_bypass:
1560	stw	r12, STACK_SLOT_TRAP(r1)
1561
1562	/* Save DEC */
1563	/* Do this before kvmhv_commence_exit so we know TB is guest TB */
1564	ld	r3, HSTATE_KVM_VCORE(r13)
1565	mfspr	r5,SPRN_DEC
1566	mftb	r6
1567	/* On P9, if the guest has large decr enabled, don't sign extend */
1568BEGIN_FTR_SECTION
1569	ld	r4, VCORE_LPCR(r3)
1570	andis.	r4, r4, LPCR_LD@h
1571	bne	16f
1572END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1573	extsw	r5,r5
157416:	add	r5,r5,r6
1575	/* r5 is a guest timebase value here, convert to host TB */
1576	ld	r4,VCORE_TB_OFFSET_APPL(r3)
1577	subf	r5,r4,r5
1578	std	r5,VCPU_DEC_EXPIRES(r9)
1579
1580	/* Increment exit count, poke other threads to exit */
1581	mr 	r3, r12
1582	bl	kvmhv_commence_exit
1583	nop
1584	ld	r9, HSTATE_KVM_VCPU(r13)
1585
1586	/* Stop others sending VCPU interrupts to this physical CPU */
1587	li	r0, -1
1588	stw	r0, VCPU_CPU(r9)
1589	stw	r0, VCPU_THREAD_CPU(r9)
1590
1591	/* Save guest CTRL register, set runlatch to 1 */
1592	mfspr	r6,SPRN_CTRLF
1593	stw	r6,VCPU_CTRL(r9)
1594	andi.	r0,r6,1
1595	bne	4f
1596	ori	r6,r6,1
1597	mtspr	SPRN_CTRLT,r6
15984:
1599	/*
1600	 * Save the guest PURR/SPURR
1601	 */
1602	mfspr	r5,SPRN_PURR
1603	mfspr	r6,SPRN_SPURR
1604	ld	r7,VCPU_PURR(r9)
1605	ld	r8,VCPU_SPURR(r9)
1606	std	r5,VCPU_PURR(r9)
1607	std	r6,VCPU_SPURR(r9)
1608	subf	r5,r7,r5
1609	subf	r6,r8,r6
1610
1611	/*
1612	 * Restore host PURR/SPURR and add guest times
1613	 * so that the time in the guest gets accounted.
1614	 */
1615	ld	r3,HSTATE_PURR(r13)
1616	ld	r4,HSTATE_SPURR(r13)
1617	add	r3,r3,r5
1618	add	r4,r4,r6
1619	mtspr	SPRN_PURR,r3
1620	mtspr	SPRN_SPURR,r4
1621
1622BEGIN_FTR_SECTION
1623	b	8f
1624END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
1625	/* Save POWER8-specific registers */
1626	mfspr	r5, SPRN_IAMR
1627	mfspr	r6, SPRN_PSPB
1628	mfspr	r7, SPRN_FSCR
1629	std	r5, VCPU_IAMR(r9)
1630	stw	r6, VCPU_PSPB(r9)
1631	std	r7, VCPU_FSCR(r9)
1632	mfspr	r5, SPRN_IC
1633	mfspr	r7, SPRN_TAR
1634	std	r5, VCPU_IC(r9)
1635	std	r7, VCPU_TAR(r9)
1636	mfspr	r8, SPRN_EBBHR
1637	std	r8, VCPU_EBBHR(r9)
1638	mfspr	r5, SPRN_EBBRR
1639	mfspr	r6, SPRN_BESCR
1640	mfspr	r7, SPRN_PID
1641	mfspr	r8, SPRN_WORT
1642	std	r5, VCPU_EBBRR(r9)
1643	std	r6, VCPU_BESCR(r9)
1644	stw	r7, VCPU_GUEST_PID(r9)
1645	std	r8, VCPU_WORT(r9)
1646BEGIN_FTR_SECTION
1647	mfspr	r5, SPRN_TCSCR
1648	mfspr	r6, SPRN_ACOP
1649	mfspr	r7, SPRN_CSIGR
1650	mfspr	r8, SPRN_TACR
1651	std	r5, VCPU_TCSCR(r9)
1652	std	r6, VCPU_ACOP(r9)
1653	std	r7, VCPU_CSIGR(r9)
1654	std	r8, VCPU_TACR(r9)
1655FTR_SECTION_ELSE
1656	mfspr	r5, SPRN_TIDR
1657	mfspr	r6, SPRN_PSSCR
1658	std	r5, VCPU_TID(r9)
1659	rldicl	r6, r6, 4, 50		/* r6 &= PSSCR_GUEST_VIS */
1660	rotldi	r6, r6, 60
1661	std	r6, VCPU_PSSCR(r9)
1662	/* Restore host HFSCR value */
1663	ld	r7, STACK_SLOT_HFSCR(r1)
1664	mtspr	SPRN_HFSCR, r7
1665ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
1666	/*
1667	 * Restore various registers to 0, where non-zero values
1668	 * set by the guest could disrupt the host.
1669	 */
1670	li	r0, 0
1671	mtspr	SPRN_PSPB, r0
1672	mtspr	SPRN_WORT, r0
1673BEGIN_FTR_SECTION
1674	mtspr	SPRN_TCSCR, r0
1675	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
1676	li	r0, 1
1677	sldi	r0, r0, 31
1678	mtspr	SPRN_MMCRS, r0
1679END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
1680
1681	/* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
1682	ld	r8, STACK_SLOT_IAMR(r1)
1683	mtspr	SPRN_IAMR, r8
1684
16858:	/* Power7 jumps back in here */
1686	mfspr	r5,SPRN_AMR
1687	mfspr	r6,SPRN_UAMOR
1688	std	r5,VCPU_AMR(r9)
1689	std	r6,VCPU_UAMOR(r9)
1690	ld	r5,STACK_SLOT_AMR(r1)
1691	ld	r6,STACK_SLOT_UAMOR(r1)
1692	mtspr	SPRN_AMR, r5
1693	mtspr	SPRN_UAMOR, r6
1694
1695	/* Switch DSCR back to host value */
1696	mfspr	r8, SPRN_DSCR
1697	ld	r7, HSTATE_DSCR(r13)
1698	std	r8, VCPU_DSCR(r9)
1699	mtspr	SPRN_DSCR, r7
1700
1701	/* Save non-volatile GPRs */
1702	std	r14, VCPU_GPR(R14)(r9)
1703	std	r15, VCPU_GPR(R15)(r9)
1704	std	r16, VCPU_GPR(R16)(r9)
1705	std	r17, VCPU_GPR(R17)(r9)
1706	std	r18, VCPU_GPR(R18)(r9)
1707	std	r19, VCPU_GPR(R19)(r9)
1708	std	r20, VCPU_GPR(R20)(r9)
1709	std	r21, VCPU_GPR(R21)(r9)
1710	std	r22, VCPU_GPR(R22)(r9)
1711	std	r23, VCPU_GPR(R23)(r9)
1712	std	r24, VCPU_GPR(R24)(r9)
1713	std	r25, VCPU_GPR(R25)(r9)
1714	std	r26, VCPU_GPR(R26)(r9)
1715	std	r27, VCPU_GPR(R27)(r9)
1716	std	r28, VCPU_GPR(R28)(r9)
1717	std	r29, VCPU_GPR(R29)(r9)
1718	std	r30, VCPU_GPR(R30)(r9)
1719	std	r31, VCPU_GPR(R31)(r9)
1720
1721	/* Save SPRGs */
1722	mfspr	r3, SPRN_SPRG0
1723	mfspr	r4, SPRN_SPRG1
1724	mfspr	r5, SPRN_SPRG2
1725	mfspr	r6, SPRN_SPRG3
1726	std	r3, VCPU_SPRG0(r9)
1727	std	r4, VCPU_SPRG1(r9)
1728	std	r5, VCPU_SPRG2(r9)
1729	std	r6, VCPU_SPRG3(r9)
1730
1731	/* save FP state */
1732	mr	r3, r9
1733	bl	kvmppc_save_fp
1734
1735#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1736/*
1737 * Branch around the call if both CPU_FTR_TM and
1738 * CPU_FTR_P9_TM_HV_ASSIST are off.
1739 */
1740BEGIN_FTR_SECTION
1741	b	91f
1742END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
1743	/*
1744	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
1745	 */
1746	mr      r3, r9
1747	ld      r4, VCPU_MSR(r3)
1748	li	r5, 0			/* don't preserve non-vol regs */
1749	bl	kvmppc_save_tm_hv
1750	nop
1751	ld	r9, HSTATE_KVM_VCPU(r13)
175291:
1753#endif
1754
1755	/* Increment yield count if they have a VPA */
1756	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */
1757	cmpdi	r8, 0
1758	beq	25f
1759	li	r4, LPPACA_YIELDCOUNT
1760	LWZX_BE	r3, r8, r4
1761	addi	r3, r3, 1
1762	STWX_BE	r3, r8, r4
1763	li	r3, 1
1764	stb	r3, VCPU_VPA_DIRTY(r9)
176525:
1766	/* Save PMU registers if requested */
1767	/* r8 and cr0.eq are live here */
1768	mr	r3, r9
1769	li	r4, 1
1770	beq	21f			/* if no VPA, save PMU stuff anyway */
1771	lbz	r4, LPPACA_PMCINUSE(r8)
177221:	bl	kvmhv_save_guest_pmu
1773	ld	r9, HSTATE_KVM_VCPU(r13)
1774
1775	/* Restore host values of some registers */
1776BEGIN_FTR_SECTION
1777	ld	r5, STACK_SLOT_CIABR(r1)
1778	ld	r6, STACK_SLOT_DAWR0(r1)
1779	ld	r7, STACK_SLOT_DAWRX0(r1)
1780	mtspr	SPRN_CIABR, r5
1781	/*
1782	 * If the DAWR doesn't work, it's ok to write these here as
1783	 * this value should always be zero
1784	*/
1785	mtspr	SPRN_DAWR0, r6
1786	mtspr	SPRN_DAWRX0, r7
1787END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1788BEGIN_FTR_SECTION
1789	ld	r6, STACK_SLOT_DAWR1(r1)
1790	ld	r7, STACK_SLOT_DAWRX1(r1)
1791	mtspr	SPRN_DAWR1, r6
1792	mtspr	SPRN_DAWRX1, r7
1793END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
1794BEGIN_FTR_SECTION
1795	ld	r5, STACK_SLOT_TID(r1)
1796	ld	r6, STACK_SLOT_PSSCR(r1)
1797	ld	r7, STACK_SLOT_PID(r1)
1798	mtspr	SPRN_TIDR, r5
1799	mtspr	SPRN_PSSCR, r6
1800	mtspr	SPRN_PID, r7
1801END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1802
1803#ifdef CONFIG_PPC_RADIX_MMU
1804	/*
1805	 * Are we running hash or radix ?
1806	 */
1807	ld	r5, VCPU_KVM(r9)
1808	lbz	r0, KVM_RADIX(r5)
1809	cmpwi	cr2, r0, 0
1810	beq	cr2, 2f
1811
1812	/*
1813	 * Radix: do eieio; tlbsync; ptesync sequence in case we
1814	 * interrupted the guest between a tlbie and a ptesync.
1815	 */
1816	eieio
1817	tlbsync
1818	ptesync
1819
1820BEGIN_FTR_SECTION
1821	/* Radix: Handle the case where the guest used an illegal PID */
1822	LOAD_REG_ADDR(r4, mmu_base_pid)
1823	lwz	r3, VCPU_GUEST_PID(r9)
1824	lwz	r5, 0(r4)
1825	cmpw	cr0,r3,r5
1826	blt	2f
1827
1828	/*
1829	 * Illegal PID, the HW might have prefetched and cached in the TLB
1830	 * some translations for the  LPID 0 / guest PID combination which
1831	 * Linux doesn't know about, so we need to flush that PID out of
1832	 * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
1833	 * the right context.
1834	*/
1835	li	r0,0
1836	mtspr	SPRN_LPID,r0
1837	isync
1838
1839	/* Then do a congruence class local flush */
1840	ld	r6,VCPU_KVM(r9)
1841	lwz	r0,KVM_TLB_SETS(r6)
1842	mtctr	r0
1843	li	r7,0x400		/* IS field = 0b01 */
1844	ptesync
1845	sldi	r0,r3,32		/* RS has PID */
18461:	PPC_TLBIEL(7,0,2,1,1)		/* RIC=2, PRS=1, R=1 */
1847	addi	r7,r7,0x1000
1848	bdnz	1b
1849	ptesync
1850END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
1851
18522:
1853#endif /* CONFIG_PPC_RADIX_MMU */
1854
1855	/*
1856	 * cp_abort is required if the processor supports local copy-paste
1857	 * to clear the copy buffer that was under control of the guest.
1858	 */
1859BEGIN_FTR_SECTION
1860	PPC_CP_ABORT
1861END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
1862
1863	/*
1864	 * POWER7/POWER8 guest -> host partition switch code.
1865	 * We don't have to lock against tlbies but we do
1866	 * have to coordinate the hardware threads.
1867	 * Here STACK_SLOT_TRAP(r1) contains the trap number.
1868	 */
1869kvmhv_switch_to_host:
1870	/* Secondary threads wait for primary to do partition switch */
1871	ld	r5,HSTATE_KVM_VCORE(r13)
1872	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
1873	lbz	r3,HSTATE_PTID(r13)
1874	cmpwi	r3,0
1875	beq	15f
1876	HMT_LOW
187713:	lbz	r3,VCORE_IN_GUEST(r5)
1878	cmpwi	r3,0
1879	bne	13b
1880	HMT_MEDIUM
1881	b	16f
1882
1883	/* Primary thread waits for all the secondaries to exit guest */
188415:	lwz	r3,VCORE_ENTRY_EXIT(r5)
1885	rlwinm	r0,r3,32-8,0xff
1886	clrldi	r3,r3,56
1887	cmpw	r3,r0
1888	bne	15b
1889	isync
1890
1891	/* Did we actually switch to the guest at all? */
1892	lbz	r6, VCORE_IN_GUEST(r5)
1893	cmpwi	r6, 0
1894	beq	19f
1895
1896	/* Primary thread switches back to host partition */
1897	lwz	r7,KVM_HOST_LPID(r4)
1898BEGIN_FTR_SECTION
1899	ld	r6,KVM_HOST_SDR1(r4)
1900	li	r8,LPID_RSVD		/* switch to reserved LPID */
1901	mtspr	SPRN_LPID,r8
1902	ptesync
1903	mtspr	SPRN_SDR1,r6		/* switch to host page table */
1904END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
1905	mtspr	SPRN_LPID,r7
1906	isync
1907
1908BEGIN_FTR_SECTION
1909	/* DPDES and VTB are shared between threads */
1910	mfspr	r7, SPRN_DPDES
1911	mfspr	r8, SPRN_VTB
1912	std	r7, VCORE_DPDES(r5)
1913	std	r8, VCORE_VTB(r5)
1914	/* clear DPDES so we don't get guest doorbells in the host */
1915	li	r8, 0
1916	mtspr	SPRN_DPDES, r8
1917END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1918
1919	/* Subtract timebase offset from timebase */
1920	ld	r8, VCORE_TB_OFFSET_APPL(r5)
1921	cmpdi	r8,0
1922	beq	17f
1923	li	r0, 0
1924	std	r0, VCORE_TB_OFFSET_APPL(r5)
1925	mftb	r6			/* current guest timebase */
1926	subf	r8,r8,r6
1927	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */
1928	mftb	r7			/* check if lower 24 bits overflowed */
1929	clrldi	r6,r6,40
1930	clrldi	r7,r7,40
1931	cmpld	r7,r6
1932	bge	17f
1933	addis	r8,r8,0x100		/* if so, increment upper 40 bits */
1934	mtspr	SPRN_TBU40,r8
1935
193617:
1937	/*
1938	 * If this is an HMI, we called kvmppc_realmode_hmi_handler
1939	 * above, which may or may not have already called
1940	 * kvmppc_subcore_exit_guest.  Fortunately, all that
1941	 * kvmppc_subcore_exit_guest does is clear a flag, so calling
1942	 * it again here is benign even if kvmppc_realmode_hmi_handler
1943	 * has already called it.
1944	 */
1945	bl	kvmppc_subcore_exit_guest
1946	nop
194730:	ld	r5,HSTATE_KVM_VCORE(r13)
1948	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
1949
1950	/* Reset PCR */
1951	ld	r0, VCORE_PCR(r5)
1952	LOAD_REG_IMMEDIATE(r6, PCR_MASK)
1953	cmpld	r0, r6
1954	beq	18f
1955	mtspr	SPRN_PCR, r6
195618:
1957	/* Signal secondary CPUs to continue */
1958	li	r0, 0
1959	stb	r0,VCORE_IN_GUEST(r5)
196019:	lis	r8,0x7fff		/* MAX_INT@h */
1961	mtspr	SPRN_HDEC,r8
1962
196316:	ld	r8,KVM_HOST_LPCR(r4)
1964	mtspr	SPRN_LPCR,r8
1965	isync
1966
1967#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1968	/* Finish timing, if we have a vcpu */
1969	ld	r4, HSTATE_KVM_VCPU(r13)
1970	cmpdi	r4, 0
1971	li	r3, 0
1972	beq	2f
1973	bl	kvmhv_accumulate_time
19742:
1975#endif
1976	/* Unset guest mode */
1977	li	r0, KVM_GUEST_MODE_NONE
1978	stb	r0, HSTATE_IN_GUEST(r13)
1979
1980	lwz	r12, STACK_SLOT_TRAP(r1)	/* return trap # in r12 */
1981	ld	r0, SFS+PPC_LR_STKOFF(r1)
1982	addi	r1, r1, SFS
1983	mtlr	r0
1984	blr
1985
1986.balign 32
1987.global kvm_flush_link_stack
1988kvm_flush_link_stack:
1989	/* Save LR into r0 */
1990	mflr	r0
1991
1992	/* Flush the link stack. On Power8 it's up to 32 entries in size. */
1993	.rept 32
1994	bl	.+4
1995	.endr
1996
1997	/* And on Power9 it's up to 64. */
1998BEGIN_FTR_SECTION
1999	.rept 32
2000	bl	.+4
2001	.endr
2002END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2003
2004	/* Restore LR */
2005	mtlr	r0
2006	blr
2007
2008kvmppc_guest_external:
2009	/* External interrupt, first check for host_ipi. If this is
2010	 * set, we know the host wants us out so let's do it now
2011	 */
2012	bl	kvmppc_read_intr
2013
2014	/*
2015	 * Restore the active volatile registers after returning from
2016	 * a C function.
2017	 */
2018	ld	r9, HSTATE_KVM_VCPU(r13)
2019	li	r12, BOOK3S_INTERRUPT_EXTERNAL
2020
2021	/*
2022	 * kvmppc_read_intr return codes:
2023	 *
2024	 * Exit to host (r3 > 0)
2025	 *   1 An interrupt is pending that needs to be handled by the host
2026	 *     Exit guest and return to host by branching to guest_exit_cont
2027	 *
2028	 *   2 Passthrough that needs completion in the host
2029	 *     Exit guest and return to host by branching to guest_exit_cont
2030	 *     However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
2031	 *     to indicate to the host to complete handling the interrupt
2032	 *
2033	 * Before returning to guest, we check if any CPU is heading out
2034	 * to the host and if so, we head out also. If no CPUs are heading
2035	 * check return values <= 0.
2036	 *
2037	 * Return to guest (r3 <= 0)
2038	 *  0 No external interrupt is pending
2039	 * -1 A guest wakeup IPI (which has now been cleared)
2040	 *    In either case, we return to guest to deliver any pending
2041	 *    guest interrupts.
2042	 *
2043	 * -2 A PCI passthrough external interrupt was handled
2044	 *    (interrupt was delivered directly to guest)
2045	 *    Return to guest to deliver any pending guest interrupts.
2046	 */
2047
2048	cmpdi	r3, 1
2049	ble	1f
2050
2051	/* Return code = 2 */
2052	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
2053	stw	r12, VCPU_TRAP(r9)
2054	b	guest_exit_cont
2055
20561:	/* Return code <= 1 */
2057	cmpdi	r3, 0
2058	bgt	guest_exit_cont
2059
2060	/* Return code <= 0 */
2061maybe_reenter_guest:
2062	ld	r5, HSTATE_KVM_VCORE(r13)
2063	lwz	r0, VCORE_ENTRY_EXIT(r5)
2064	cmpwi	r0, 0x100
2065	mr	r4, r9
2066	blt	deliver_guest_interrupt
2067	b	guest_exit_cont
2068
2069#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2070/*
2071 * Softpatch interrupt for transactional memory emulation cases
2072 * on POWER9 DD2.2.  This is early in the guest exit path - we
2073 * haven't saved registers or done a treclaim yet.
2074 */
2075kvmppc_tm_emul:
2076	/* Save instruction image in HEIR */
2077	mfspr	r3, SPRN_HEIR
2078	stw	r3, VCPU_HEIR(r9)
2079
2080	/*
2081	 * The cases we want to handle here are those where the guest
2082	 * is in real suspend mode and is trying to transition to
2083	 * transactional mode.
2084	 */
2085	lbz	r0, HSTATE_FAKE_SUSPEND(r13)
2086	cmpwi	r0, 0		/* keep exiting guest if in fake suspend */
2087	bne	guest_exit_cont
2088	rldicl	r3, r11, 64 - MSR_TS_S_LG, 62
2089	cmpwi	r3, 1		/* or if not in suspend state */
2090	bne	guest_exit_cont
2091
2092	/* Call C code to do the emulation */
2093	mr	r3, r9
2094	bl	kvmhv_p9_tm_emulation_early
2095	nop
2096	ld	r9, HSTATE_KVM_VCPU(r13)
2097	li	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
2098	cmpwi	r3, 0
2099	beq	guest_exit_cont		/* continue exiting if not handled */
2100	ld	r10, VCPU_PC(r9)
2101	ld	r11, VCPU_MSR(r9)
2102	b	fast_interrupt_c_return	/* go back to guest if handled */
2103#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
2104
2105/*
2106 * Check whether an HDSI is an HPTE not found fault or something else.
2107 * If it is an HPTE not found fault that is due to the guest accessing
2108 * a page that they have mapped but which we have paged out, then
2109 * we continue on with the guest exit path.  In all other cases,
2110 * reflect the HDSI to the guest as a DSI.
2111 */
2112kvmppc_hdsi:
2113	ld	r3, VCPU_KVM(r9)
2114	lbz	r0, KVM_RADIX(r3)
2115	mfspr	r4, SPRN_HDAR
2116	mfspr	r6, SPRN_HDSISR
2117BEGIN_FTR_SECTION
2118	/* Look for DSISR canary. If we find it, retry instruction */
2119	cmpdi	r6, 0x7fff
2120	beq	6f
2121END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2122	cmpwi	r0, 0
2123	bne	.Lradix_hdsi		/* on radix, just save DAR/DSISR/ASDR */
2124	/* HPTE not found fault or protection fault? */
2125	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
2126	beq	1f			/* if not, send it to the guest */
2127	andi.	r0, r11, MSR_DR		/* data relocation enabled? */
2128	beq	3f
2129BEGIN_FTR_SECTION
2130	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
2131	b	4f
2132END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2133	clrrdi	r0, r4, 28
2134	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
2135	li	r0, BOOK3S_INTERRUPT_DATA_SEGMENT
2136	bne	7f			/* if no SLB entry found */
21374:	std	r4, VCPU_FAULT_DAR(r9)
2138	stw	r6, VCPU_FAULT_DSISR(r9)
2139
2140	/* Search the hash table. */
2141	mr	r3, r9			/* vcpu pointer */
2142	li	r7, 1			/* data fault */
2143	bl	kvmppc_hpte_hv_fault
2144	ld	r9, HSTATE_KVM_VCPU(r13)
2145	ld	r10, VCPU_PC(r9)
2146	ld	r11, VCPU_MSR(r9)
2147	li	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
2148	cmpdi	r3, 0			/* retry the instruction */
2149	beq	6f
2150	cmpdi	r3, -1			/* handle in kernel mode */
2151	beq	guest_exit_cont
2152	cmpdi	r3, -2			/* MMIO emulation; need instr word */
2153	beq	2f
2154
2155	/* Synthesize a DSI (or DSegI) for the guest */
2156	ld	r4, VCPU_FAULT_DAR(r9)
2157	mr	r6, r3
21581:	li	r0, BOOK3S_INTERRUPT_DATA_STORAGE
2159	mtspr	SPRN_DSISR, r6
21607:	mtspr	SPRN_DAR, r4
2161	mtspr	SPRN_SRR0, r10
2162	mtspr	SPRN_SRR1, r11
2163	mr	r10, r0
2164	bl	kvmppc_msr_interrupt
2165fast_interrupt_c_return:
21666:	ld	r7, VCPU_CTR(r9)
2167	ld	r8, VCPU_XER(r9)
2168	mtctr	r7
2169	mtxer	r8
2170	mr	r4, r9
2171	b	fast_guest_return
2172
21733:	ld	r5, VCPU_KVM(r9)	/* not relocated, use VRMA */
2174	ld	r5, KVM_VRMA_SLB_V(r5)
2175	b	4b
2176
2177	/* If this is for emulated MMIO, load the instruction word */
21782:	li	r8, KVM_INST_FETCH_FAILED	/* In case lwz faults */
2179
2180	/* Set guest mode to 'jump over instruction' so if lwz faults
2181	 * we'll just continue at the next IP. */
2182	li	r0, KVM_GUEST_MODE_SKIP
2183	stb	r0, HSTATE_IN_GUEST(r13)
2184
2185	/* Do the access with MSR:DR enabled */
2186	mfmsr	r3
2187	ori	r4, r3, MSR_DR		/* Enable paging for data */
2188	mtmsrd	r4
2189	lwz	r8, 0(r10)
2190	mtmsrd	r3
2191
2192	/* Store the result */
2193	stw	r8, VCPU_LAST_INST(r9)
2194
2195	/* Unset guest mode. */
2196	li	r0, KVM_GUEST_MODE_HOST_HV
2197	stb	r0, HSTATE_IN_GUEST(r13)
2198	b	guest_exit_cont
2199
2200.Lradix_hdsi:
2201	std	r4, VCPU_FAULT_DAR(r9)
2202	stw	r6, VCPU_FAULT_DSISR(r9)
2203.Lradix_hisi:
2204	mfspr	r5, SPRN_ASDR
2205	std	r5, VCPU_FAULT_GPA(r9)
2206	b	guest_exit_cont
2207
2208/*
2209 * Similarly for an HISI, reflect it to the guest as an ISI unless
2210 * it is an HPTE not found fault for a page that we have paged out.
2211 */
2212kvmppc_hisi:
2213	ld	r3, VCPU_KVM(r9)
2214	lbz	r0, KVM_RADIX(r3)
2215	cmpwi	r0, 0
2216	bne	.Lradix_hisi		/* for radix, just save ASDR */
2217	andis.	r0, r11, SRR1_ISI_NOPT@h
2218	beq	1f
2219	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
2220	beq	3f
2221BEGIN_FTR_SECTION
2222	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
2223	b	4f
2224END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2225	clrrdi	r0, r10, 28
2226	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
2227	li	r0, BOOK3S_INTERRUPT_INST_SEGMENT
2228	bne	7f			/* if no SLB entry found */
22294:
2230	/* Search the hash table. */
2231	mr	r3, r9			/* vcpu pointer */
2232	mr	r4, r10
2233	mr	r6, r11
2234	li	r7, 0			/* instruction fault */
2235	bl	kvmppc_hpte_hv_fault
2236	ld	r9, HSTATE_KVM_VCPU(r13)
2237	ld	r10, VCPU_PC(r9)
2238	ld	r11, VCPU_MSR(r9)
2239	li	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
2240	cmpdi	r3, 0			/* retry the instruction */
2241	beq	fast_interrupt_c_return
2242	cmpdi	r3, -1			/* handle in kernel mode */
2243	beq	guest_exit_cont
2244
2245	/* Synthesize an ISI (or ISegI) for the guest */
2246	mr	r11, r3
22471:	li	r0, BOOK3S_INTERRUPT_INST_STORAGE
22487:	mtspr	SPRN_SRR0, r10
2249	mtspr	SPRN_SRR1, r11
2250	mr	r10, r0
2251	bl	kvmppc_msr_interrupt
2252	b	fast_interrupt_c_return
2253
22543:	ld	r6, VCPU_KVM(r9)	/* not relocated, use VRMA */
2255	ld	r5, KVM_VRMA_SLB_V(r6)
2256	b	4b
2257
2258/*
2259 * Try to handle an hcall in real mode.
2260 * Returns to the guest if we handle it, or continues on up to
2261 * the kernel if we can't (i.e. if we don't have a handler for
2262 * it, or if the handler returns H_TOO_HARD).
2263 *
2264 * r5 - r8 contain hcall args,
2265 * r9 = vcpu, r10 = pc, r11 = msr, r12 = trap, r13 = paca
2266 */
2267hcall_try_real_mode:
2268	ld	r3,VCPU_GPR(R3)(r9)
2269	andi.	r0,r11,MSR_PR
2270	/* sc 1 from userspace - reflect to guest syscall */
2271	bne	sc_1_fast_return
2272	/* sc 1 from nested guest - give it to L1 to handle */
2273	ld	r0, VCPU_NESTED(r9)
2274	cmpdi	r0, 0
2275	bne	guest_exit_cont
2276	clrrdi	r3,r3,2
2277	cmpldi	r3,hcall_real_table_end - hcall_real_table
2278	bge	guest_exit_cont
2279	/* See if this hcall is enabled for in-kernel handling */
2280	ld	r4, VCPU_KVM(r9)
2281	srdi	r0, r3, 8	/* r0 = (r3 / 4) >> 6 */
2282	sldi	r0, r0, 3	/* index into kvm->arch.enabled_hcalls[] */
2283	add	r4, r4, r0
2284	ld	r0, KVM_ENABLED_HCALLS(r4)
2285	rlwinm	r4, r3, 32-2, 0x3f	/* r4 = (r3 / 4) & 0x3f */
2286	srd	r0, r0, r4
2287	andi.	r0, r0, 1
2288	beq	guest_exit_cont
2289	/* Get pointer to handler, if any, and call it */
2290	LOAD_REG_ADDR(r4, hcall_real_table)
2291	lwax	r3,r3,r4
2292	cmpwi	r3,0
2293	beq	guest_exit_cont
2294	add	r12,r3,r4
2295	mtctr	r12
2296	mr	r3,r9		/* get vcpu pointer */
2297	ld	r4,VCPU_GPR(R4)(r9)
2298	bctrl
2299	cmpdi	r3,H_TOO_HARD
2300	beq	hcall_real_fallback
2301	ld	r4,HSTATE_KVM_VCPU(r13)
2302	std	r3,VCPU_GPR(R3)(r4)
2303	ld	r10,VCPU_PC(r4)
2304	ld	r11,VCPU_MSR(r4)
2305	b	fast_guest_return
2306
2307sc_1_fast_return:
2308	mtspr	SPRN_SRR0,r10
2309	mtspr	SPRN_SRR1,r11
2310	li	r10, BOOK3S_INTERRUPT_SYSCALL
2311	bl	kvmppc_msr_interrupt
2312	mr	r4,r9
2313	b	fast_guest_return
2314
2315	/* We've attempted a real mode hcall, but it's punted it back
2316	 * to userspace.  We need to restore some clobbered volatiles
2317	 * before resuming the pass-it-to-qemu path */
2318hcall_real_fallback:
2319	li	r12,BOOK3S_INTERRUPT_SYSCALL
2320	ld	r9, HSTATE_KVM_VCPU(r13)
2321
2322	b	guest_exit_cont
2323
2324	.globl	hcall_real_table
2325hcall_real_table:
2326	.long	0		/* 0 - unused */
2327	.long	DOTSYM(kvmppc_h_remove) - hcall_real_table
2328	.long	DOTSYM(kvmppc_h_enter) - hcall_real_table
2329	.long	DOTSYM(kvmppc_h_read) - hcall_real_table
2330	.long	DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
2331	.long	DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
2332	.long	DOTSYM(kvmppc_h_protect) - hcall_real_table
2333#ifdef CONFIG_SPAPR_TCE_IOMMU
2334	.long	DOTSYM(kvmppc_h_get_tce) - hcall_real_table
2335	.long	DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table
2336#else
2337	.long	0		/* 0x1c */
2338	.long	0		/* 0x20 */
2339#endif
2340	.long	0		/* 0x24 - H_SET_SPRG0 */
2341	.long	DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
2342	.long	DOTSYM(kvmppc_rm_h_page_init) - hcall_real_table
2343	.long	0		/* 0x30 */
2344	.long	0		/* 0x34 */
2345	.long	0		/* 0x38 */
2346	.long	0		/* 0x3c */
2347	.long	0		/* 0x40 */
2348	.long	0		/* 0x44 */
2349	.long	0		/* 0x48 */
2350	.long	0		/* 0x4c */
2351	.long	0		/* 0x50 */
2352	.long	0		/* 0x54 */
2353	.long	0		/* 0x58 */
2354	.long	0		/* 0x5c */
2355	.long	0		/* 0x60 */
2356#ifdef CONFIG_KVM_XICS
2357	.long	DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
2358	.long	DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
2359	.long	DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
2360	.long	DOTSYM(kvmppc_rm_h_ipoll) - hcall_real_table
2361	.long	DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
2362#else
2363	.long	0		/* 0x64 - H_EOI */
2364	.long	0		/* 0x68 - H_CPPR */
2365	.long	0		/* 0x6c - H_IPI */
2366	.long	0		/* 0x70 - H_IPOLL */
2367	.long	0		/* 0x74 - H_XIRR */
2368#endif
2369	.long	0		/* 0x78 */
2370	.long	0		/* 0x7c */
2371	.long	0		/* 0x80 */
2372	.long	0		/* 0x84 */
2373	.long	0		/* 0x88 */
2374	.long	0		/* 0x8c */
2375	.long	0		/* 0x90 */
2376	.long	0		/* 0x94 */
2377	.long	0		/* 0x98 */
2378	.long	0		/* 0x9c */
2379	.long	0		/* 0xa0 */
2380	.long	0		/* 0xa4 */
2381	.long	0		/* 0xa8 */
2382	.long	0		/* 0xac */
2383	.long	0		/* 0xb0 */
2384	.long	0		/* 0xb4 */
2385	.long	0		/* 0xb8 */
2386	.long	0		/* 0xbc */
2387	.long	0		/* 0xc0 */
2388	.long	0		/* 0xc4 */
2389	.long	0		/* 0xc8 */
2390	.long	0		/* 0xcc */
2391	.long	0		/* 0xd0 */
2392	.long	0		/* 0xd4 */
2393	.long	0		/* 0xd8 */
2394	.long	0		/* 0xdc */
2395	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
2396	.long	DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
2397	.long	0		/* 0xe8 */
2398	.long	0		/* 0xec */
2399	.long	0		/* 0xf0 */
2400	.long	0		/* 0xf4 */
2401	.long	0		/* 0xf8 */
2402	.long	0		/* 0xfc */
2403	.long	0		/* 0x100 */
2404	.long	0		/* 0x104 */
2405	.long	0		/* 0x108 */
2406	.long	0		/* 0x10c */
2407	.long	0		/* 0x110 */
2408	.long	0		/* 0x114 */
2409	.long	0		/* 0x118 */
2410	.long	0		/* 0x11c */
2411	.long	0		/* 0x120 */
2412	.long	DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table
2413	.long	0		/* 0x128 */
2414	.long	0		/* 0x12c */
2415	.long	0		/* 0x130 */
2416	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
2417#ifdef CONFIG_SPAPR_TCE_IOMMU
2418	.long	DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table
2419	.long	DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table
2420#else
2421	.long	0		/* 0x138 */
2422	.long	0		/* 0x13c */
2423#endif
2424	.long	0		/* 0x140 */
2425	.long	0		/* 0x144 */
2426	.long	0		/* 0x148 */
2427	.long	0		/* 0x14c */
2428	.long	0		/* 0x150 */
2429	.long	0		/* 0x154 */
2430	.long	0		/* 0x158 */
2431	.long	0		/* 0x15c */
2432	.long	0		/* 0x160 */
2433	.long	0		/* 0x164 */
2434	.long	0		/* 0x168 */
2435	.long	0		/* 0x16c */
2436	.long	0		/* 0x170 */
2437	.long	0		/* 0x174 */
2438	.long	0		/* 0x178 */
2439	.long	0		/* 0x17c */
2440	.long	0		/* 0x180 */
2441	.long	0		/* 0x184 */
2442	.long	0		/* 0x188 */
2443	.long	0		/* 0x18c */
2444	.long	0		/* 0x190 */
2445	.long	0		/* 0x194 */
2446	.long	0		/* 0x198 */
2447	.long	0		/* 0x19c */
2448	.long	0		/* 0x1a0 */
2449	.long	0		/* 0x1a4 */
2450	.long	0		/* 0x1a8 */
2451	.long	0		/* 0x1ac */
2452	.long	0		/* 0x1b0 */
2453	.long	0		/* 0x1b4 */
2454	.long	0		/* 0x1b8 */
2455	.long	0		/* 0x1bc */
2456	.long	0		/* 0x1c0 */
2457	.long	0		/* 0x1c4 */
2458	.long	0		/* 0x1c8 */
2459	.long	0		/* 0x1cc */
2460	.long	0		/* 0x1d0 */
2461	.long	0		/* 0x1d4 */
2462	.long	0		/* 0x1d8 */
2463	.long	0		/* 0x1dc */
2464	.long	0		/* 0x1e0 */
2465	.long	0		/* 0x1e4 */
2466	.long	0		/* 0x1e8 */
2467	.long	0		/* 0x1ec */
2468	.long	0		/* 0x1f0 */
2469	.long	0		/* 0x1f4 */
2470	.long	0		/* 0x1f8 */
2471	.long	0		/* 0x1fc */
2472	.long	0		/* 0x200 */
2473	.long	0		/* 0x204 */
2474	.long	0		/* 0x208 */
2475	.long	0		/* 0x20c */
2476	.long	0		/* 0x210 */
2477	.long	0		/* 0x214 */
2478	.long	0		/* 0x218 */
2479	.long	0		/* 0x21c */
2480	.long	0		/* 0x220 */
2481	.long	0		/* 0x224 */
2482	.long	0		/* 0x228 */
2483	.long	0		/* 0x22c */
2484	.long	0		/* 0x230 */
2485	.long	0		/* 0x234 */
2486	.long	0		/* 0x238 */
2487	.long	0		/* 0x23c */
2488	.long	0		/* 0x240 */
2489	.long	0		/* 0x244 */
2490	.long	0		/* 0x248 */
2491	.long	0		/* 0x24c */
2492	.long	0		/* 0x250 */
2493	.long	0		/* 0x254 */
2494	.long	0		/* 0x258 */
2495	.long	0		/* 0x25c */
2496	.long	0		/* 0x260 */
2497	.long	0		/* 0x264 */
2498	.long	0		/* 0x268 */
2499	.long	0		/* 0x26c */
2500	.long	0		/* 0x270 */
2501	.long	0		/* 0x274 */
2502	.long	0		/* 0x278 */
2503	.long	0		/* 0x27c */
2504	.long	0		/* 0x280 */
2505	.long	0		/* 0x284 */
2506	.long	0		/* 0x288 */
2507	.long	0		/* 0x28c */
2508	.long	0		/* 0x290 */
2509	.long	0		/* 0x294 */
2510	.long	0		/* 0x298 */
2511	.long	0		/* 0x29c */
2512	.long	0		/* 0x2a0 */
2513	.long	0		/* 0x2a4 */
2514	.long	0		/* 0x2a8 */
2515	.long	0		/* 0x2ac */
2516	.long	0		/* 0x2b0 */
2517	.long	0		/* 0x2b4 */
2518	.long	0		/* 0x2b8 */
2519	.long	0		/* 0x2bc */
2520	.long	0		/* 0x2c0 */
2521	.long	0		/* 0x2c4 */
2522	.long	0		/* 0x2c8 */
2523	.long	0		/* 0x2cc */
2524	.long	0		/* 0x2d0 */
2525	.long	0		/* 0x2d4 */
2526	.long	0		/* 0x2d8 */
2527	.long	0		/* 0x2dc */
2528	.long	0		/* 0x2e0 */
2529	.long	0		/* 0x2e4 */
2530	.long	0		/* 0x2e8 */
2531	.long	0		/* 0x2ec */
2532	.long	0		/* 0x2f0 */
2533	.long	0		/* 0x2f4 */
2534	.long	0		/* 0x2f8 */
2535#ifdef CONFIG_KVM_XICS
2536	.long	DOTSYM(kvmppc_rm_h_xirr_x) - hcall_real_table
2537#else
2538	.long	0		/* 0x2fc - H_XIRR_X*/
2539#endif
2540	.long	DOTSYM(kvmppc_h_random) - hcall_real_table
2541	.globl	hcall_real_table_end
2542hcall_real_table_end:
2543
2544_GLOBAL(kvmppc_h_set_xdabr)
2545EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
2546	andi.	r0, r5, DABRX_USER | DABRX_KERNEL
2547	beq	6f
2548	li	r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
2549	andc.	r0, r5, r0
2550	beq	3f
25516:	li	r3, H_PARAMETER
2552	blr
2553
2554_GLOBAL(kvmppc_h_set_dabr)
2555EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
2556	li	r5, DABRX_USER | DABRX_KERNEL
25573:
2558BEGIN_FTR_SECTION
2559	b	2f
2560END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2561	std	r4,VCPU_DABR(r3)
2562	stw	r5, VCPU_DABRX(r3)
2563	mtspr	SPRN_DABRX, r5
2564	/* Work around P7 bug where DABR can get corrupted on mtspr */
25651:	mtspr	SPRN_DABR,r4
2566	mfspr	r5, SPRN_DABR
2567	cmpd	r4, r5
2568	bne	1b
2569	isync
2570	li	r3,0
2571	blr
2572
25732:
2574	LOAD_REG_ADDR(r11, dawr_force_enable)
2575	lbz	r11, 0(r11)
2576	cmpdi	r11, 0
2577	bne	3f
2578	li	r3, H_HARDWARE
2579	blr
25803:
2581	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
2582	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
2583	rlwimi	r5, r4, 2, DAWRX_WT
2584	clrrdi	r4, r4, 3
2585	std	r4, VCPU_DAWR0(r3)
2586	std	r5, VCPU_DAWRX0(r3)
2587	/*
2588	 * If came in through the real mode hcall handler then it is necessary
2589	 * to write the registers since the return path won't. Otherwise it is
2590	 * sufficient to store then in the vcpu struct as they will be loaded
2591	 * next time the vcpu is run.
2592	 */
2593	mfmsr	r6
2594	andi.	r6, r6, MSR_DR		/* in real mode? */
2595	bne	4f
2596	mtspr	SPRN_DAWR0, r4
2597	mtspr	SPRN_DAWRX0, r5
25984:	li	r3, 0
2599	blr
2600
2601_GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
2602	ori	r11,r11,MSR_EE
2603	std	r11,VCPU_MSR(r3)
2604	li	r0,1
2605	stb	r0,VCPU_CEDED(r3)
2606	sync			/* order setting ceded vs. testing prodded */
2607	lbz	r5,VCPU_PRODDED(r3)
2608	cmpwi	r5,0
2609	bne	kvm_cede_prodded
2610	li	r12,0		/* set trap to 0 to say hcall is handled */
2611	stw	r12,VCPU_TRAP(r3)
2612	li	r0,H_SUCCESS
2613	std	r0,VCPU_GPR(R3)(r3)
2614
2615	/*
2616	 * Set our bit in the bitmask of napping threads unless all the
2617	 * other threads are already napping, in which case we send this
2618	 * up to the host.
2619	 */
2620	ld	r5,HSTATE_KVM_VCORE(r13)
2621	lbz	r6,HSTATE_PTID(r13)
2622	lwz	r8,VCORE_ENTRY_EXIT(r5)
2623	clrldi	r8,r8,56
2624	li	r0,1
2625	sld	r0,r0,r6
2626	addi	r6,r5,VCORE_NAPPING_THREADS
262731:	lwarx	r4,0,r6
2628	or	r4,r4,r0
2629	cmpw	r4,r8
2630	beq	kvm_cede_exit
2631	stwcx.	r4,0,r6
2632	bne	31b
2633	/* order napping_threads update vs testing entry_exit_map */
2634	isync
2635	li	r0,NAPPING_CEDE
2636	stb	r0,HSTATE_NAPPING(r13)
2637	lwz	r7,VCORE_ENTRY_EXIT(r5)
2638	cmpwi	r7,0x100
2639	bge	33f		/* another thread already exiting */
2640
2641/*
2642 * Although not specifically required by the architecture, POWER7
2643 * preserves the following registers in nap mode, even if an SMT mode
2644 * switch occurs: SLB entries, PURR, SPURR, AMOR, UAMOR, AMR, SPRG0-3,
2645 * DAR, DSISR, DABR, DABRX, DSCR, PMCx, MMCRx, SIAR, SDAR.
2646 */
2647	/* Save non-volatile GPRs */
2648	std	r14, VCPU_GPR(R14)(r3)
2649	std	r15, VCPU_GPR(R15)(r3)
2650	std	r16, VCPU_GPR(R16)(r3)
2651	std	r17, VCPU_GPR(R17)(r3)
2652	std	r18, VCPU_GPR(R18)(r3)
2653	std	r19, VCPU_GPR(R19)(r3)
2654	std	r20, VCPU_GPR(R20)(r3)
2655	std	r21, VCPU_GPR(R21)(r3)
2656	std	r22, VCPU_GPR(R22)(r3)
2657	std	r23, VCPU_GPR(R23)(r3)
2658	std	r24, VCPU_GPR(R24)(r3)
2659	std	r25, VCPU_GPR(R25)(r3)
2660	std	r26, VCPU_GPR(R26)(r3)
2661	std	r27, VCPU_GPR(R27)(r3)
2662	std	r28, VCPU_GPR(R28)(r3)
2663	std	r29, VCPU_GPR(R29)(r3)
2664	std	r30, VCPU_GPR(R30)(r3)
2665	std	r31, VCPU_GPR(R31)(r3)
2666
2667	/* save FP state */
2668	bl	kvmppc_save_fp
2669
2670#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2671/*
2672 * Branch around the call if both CPU_FTR_TM and
2673 * CPU_FTR_P9_TM_HV_ASSIST are off.
2674 */
2675BEGIN_FTR_SECTION
2676	b	91f
2677END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
2678	/*
2679	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
2680	 */
2681	ld	r3, HSTATE_KVM_VCPU(r13)
2682	ld      r4, VCPU_MSR(r3)
2683	li	r5, 0			/* don't preserve non-vol regs */
2684	bl	kvmppc_save_tm_hv
2685	nop
268691:
2687#endif
2688
2689	/*
2690	 * Set DEC to the smaller of DEC and HDEC, so that we wake
2691	 * no later than the end of our timeslice (HDEC interrupts
2692	 * don't wake us from nap).
2693	 */
2694	mfspr	r3, SPRN_DEC
2695	mfspr	r4, SPRN_HDEC
2696	mftb	r5
2697BEGIN_FTR_SECTION
2698	/* On P9 check whether the guest has large decrementer mode enabled */
2699	ld	r6, HSTATE_KVM_VCORE(r13)
2700	ld	r6, VCORE_LPCR(r6)
2701	andis.	r6, r6, LPCR_LD@h
2702	bne	68f
2703END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2704	extsw	r3, r3
270568:	EXTEND_HDEC(r4)
2706	cmpd	r3, r4
2707	ble	67f
2708	mtspr	SPRN_DEC, r4
270967:
2710	/* save expiry time of guest decrementer */
2711	add	r3, r3, r5
2712	ld	r4, HSTATE_KVM_VCPU(r13)
2713	ld	r5, HSTATE_KVM_VCORE(r13)
2714	ld	r6, VCORE_TB_OFFSET_APPL(r5)
2715	subf	r3, r6, r3	/* convert to host TB value */
2716	std	r3, VCPU_DEC_EXPIRES(r4)
2717
2718#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
2719	ld	r4, HSTATE_KVM_VCPU(r13)
2720	addi	r3, r4, VCPU_TB_CEDE
2721	bl	kvmhv_accumulate_time
2722#endif
2723
2724	lis	r3, LPCR_PECEDP@h	/* Do wake on privileged doorbell */
2725
2726	/* Go back to host stack */
2727	ld	r1, HSTATE_HOST_R1(r13)
2728
2729	/*
2730	 * Take a nap until a decrementer or external or doobell interrupt
2731	 * occurs, with PECE1 and PECE0 set in LPCR.
2732	 * On POWER8, set PECEDH, and if we are ceding, also set PECEDP.
2733	 * Also clear the runlatch bit before napping.
2734	 */
2735kvm_do_nap:
2736	mfspr	r0, SPRN_CTRLF
2737	clrrdi	r0, r0, 1
2738	mtspr	SPRN_CTRLT, r0
2739
2740	li	r0,1
2741	stb	r0,HSTATE_HWTHREAD_REQ(r13)
2742	mfspr	r5,SPRN_LPCR
2743	ori	r5,r5,LPCR_PECE0 | LPCR_PECE1
2744BEGIN_FTR_SECTION
2745	ori	r5, r5, LPCR_PECEDH
2746	rlwimi	r5, r3, 0, LPCR_PECEDP
2747END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2748
2749kvm_nap_sequence:		/* desired LPCR value in r5 */
2750BEGIN_FTR_SECTION
2751	/*
2752	 * PSSCR bits:	exit criterion = 1 (wakeup based on LPCR at sreset)
2753	 *		enable state loss = 1 (allow SMT mode switch)
2754	 *		requested level = 0 (just stop dispatching)
2755	 */
2756	lis	r3, (PSSCR_EC | PSSCR_ESL)@h
2757	/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
2758	li	r4, LPCR_PECE_HVEE@higher
2759	sldi	r4, r4, 32
2760	or	r5, r5, r4
2761FTR_SECTION_ELSE
2762	li	r3, PNV_THREAD_NAP
2763ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
2764	mtspr	SPRN_LPCR,r5
2765	isync
2766
2767BEGIN_FTR_SECTION
2768	bl	isa300_idle_stop_mayloss
2769FTR_SECTION_ELSE
2770	bl	isa206_idle_insn_mayloss
2771ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
2772
2773	mfspr	r0, SPRN_CTRLF
2774	ori	r0, r0, 1
2775	mtspr	SPRN_CTRLT, r0
2776
2777	mtspr	SPRN_SRR1, r3
2778
2779	li	r0, 0
2780	stb	r0, PACA_FTRACE_ENABLED(r13)
2781
2782	li	r0, KVM_HWTHREAD_IN_KVM
2783	stb	r0, HSTATE_HWTHREAD_STATE(r13)
2784
2785	lbz	r0, HSTATE_NAPPING(r13)
2786	cmpwi	r0, NAPPING_CEDE
2787	beq	kvm_end_cede
2788	cmpwi	r0, NAPPING_NOVCPU
2789	beq	kvm_novcpu_wakeup
2790BEGIN_FTR_SECTION
2791	cmpwi	r0, NAPPING_UNSPLIT
2792	beq	kvm_unsplit_wakeup
2793END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
2794	twi	31,0,0 /* Nap state must not be zero */
2795
279633:	mr	r4, r3
2797	li	r3, 0
2798	li	r12, 0
2799	b	34f
2800
2801kvm_end_cede:
2802	/* Woken by external or decrementer interrupt */
2803
2804	/* get vcpu pointer */
2805	ld	r4, HSTATE_KVM_VCPU(r13)
2806
2807#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
2808	addi	r3, r4, VCPU_TB_RMINTR
2809	bl	kvmhv_accumulate_time
2810#endif
2811
2812#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2813/*
2814 * Branch around the call if both CPU_FTR_TM and
2815 * CPU_FTR_P9_TM_HV_ASSIST are off.
2816 */
2817BEGIN_FTR_SECTION
2818	b	91f
2819END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
2820	/*
2821	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
2822	 */
2823	mr      r3, r4
2824	ld      r4, VCPU_MSR(r3)
2825	li	r5, 0			/* don't preserve non-vol regs */
2826	bl	kvmppc_restore_tm_hv
2827	nop
2828	ld	r4, HSTATE_KVM_VCPU(r13)
282991:
2830#endif
2831
2832	/* load up FP state */
2833	bl	kvmppc_load_fp
2834
2835	/* Restore guest decrementer */
2836	ld	r3, VCPU_DEC_EXPIRES(r4)
2837	ld	r5, HSTATE_KVM_VCORE(r13)
2838	ld	r6, VCORE_TB_OFFSET_APPL(r5)
2839	add	r3, r3, r6	/* convert host TB to guest TB value */
2840	mftb	r7
2841	subf	r3, r7, r3
2842	mtspr	SPRN_DEC, r3
2843
2844	/* Load NV GPRS */
2845	ld	r14, VCPU_GPR(R14)(r4)
2846	ld	r15, VCPU_GPR(R15)(r4)
2847	ld	r16, VCPU_GPR(R16)(r4)
2848	ld	r17, VCPU_GPR(R17)(r4)
2849	ld	r18, VCPU_GPR(R18)(r4)
2850	ld	r19, VCPU_GPR(R19)(r4)
2851	ld	r20, VCPU_GPR(R20)(r4)
2852	ld	r21, VCPU_GPR(R21)(r4)
2853	ld	r22, VCPU_GPR(R22)(r4)
2854	ld	r23, VCPU_GPR(R23)(r4)
2855	ld	r24, VCPU_GPR(R24)(r4)
2856	ld	r25, VCPU_GPR(R25)(r4)
2857	ld	r26, VCPU_GPR(R26)(r4)
2858	ld	r27, VCPU_GPR(R27)(r4)
2859	ld	r28, VCPU_GPR(R28)(r4)
2860	ld	r29, VCPU_GPR(R29)(r4)
2861	ld	r30, VCPU_GPR(R30)(r4)
2862	ld	r31, VCPU_GPR(R31)(r4)
2863
2864	/* Check the wake reason in SRR1 to see why we got here */
2865	bl	kvmppc_check_wake_reason
2866
2867	/*
2868	 * Restore volatile registers since we could have called a
2869	 * C routine in kvmppc_check_wake_reason
2870	 *	r4 = VCPU
2871	 * r3 tells us whether we need to return to host or not
2872	 * WARNING: it gets checked further down:
2873	 * should not modify r3 until this check is done.
2874	 */
2875	ld	r4, HSTATE_KVM_VCPU(r13)
2876
2877	/* clear our bit in vcore->napping_threads */
287834:	ld	r5,HSTATE_KVM_VCORE(r13)
2879	lbz	r7,HSTATE_PTID(r13)
2880	li	r0,1
2881	sld	r0,r0,r7
2882	addi	r6,r5,VCORE_NAPPING_THREADS
288332:	lwarx	r7,0,r6
2884	andc	r7,r7,r0
2885	stwcx.	r7,0,r6
2886	bne	32b
2887	li	r0,0
2888	stb	r0,HSTATE_NAPPING(r13)
2889
2890	/* See if the wake reason saved in r3 means we need to exit */
2891	stw	r12, VCPU_TRAP(r4)
2892	mr	r9, r4
2893	cmpdi	r3, 0
2894	bgt	guest_exit_cont
2895	b	maybe_reenter_guest
2896
2897	/* cede when already previously prodded case */
2898kvm_cede_prodded:
2899	li	r0,0
2900	stb	r0,VCPU_PRODDED(r3)
2901	sync			/* order testing prodded vs. clearing ceded */
2902	stb	r0,VCPU_CEDED(r3)
2903	li	r3,H_SUCCESS
2904	blr
2905
2906	/* we've ceded but we want to give control to the host */
2907kvm_cede_exit:
2908	ld	r9, HSTATE_KVM_VCPU(r13)
2909#ifdef CONFIG_KVM_XICS
2910	/* are we using XIVE with single escalation? */
2911	ld	r10, VCPU_XIVE_ESC_VADDR(r9)
2912	cmpdi	r10, 0
2913	beq	3f
2914	li	r6, XIVE_ESB_SET_PQ_00
2915	/*
2916	 * If we still have a pending escalation, abort the cede,
2917	 * and we must set PQ to 10 rather than 00 so that we don't
2918	 * potentially end up with two entries for the escalation
2919	 * interrupt in the XIVE interrupt queue.  In that case
2920	 * we also don't want to set xive_esc_on to 1 here in
2921	 * case we race with xive_esc_irq().
2922	 */
2923	lbz	r5, VCPU_XIVE_ESC_ON(r9)
2924	cmpwi	r5, 0
2925	beq	4f
2926	li	r0, 0
2927	stb	r0, VCPU_CEDED(r9)
2928	/*
2929	 * The escalation interrupts are special as we don't EOI them.
2930	 * There is no need to use the load-after-store ordering offset
2931	 * to set PQ to 10 as we won't use StoreEOI.
2932	 */
2933	li	r6, XIVE_ESB_SET_PQ_10
2934	b	5f
29354:	li	r0, 1
2936	stb	r0, VCPU_XIVE_ESC_ON(r9)
2937	/* make sure store to xive_esc_on is seen before xive_esc_irq runs */
2938	sync
29395:	/* Enable XIVE escalation */
2940	mfmsr	r0
2941	andi.	r0, r0, MSR_DR		/* in real mode? */
2942	beq	1f
2943	ldx	r0, r10, r6
2944	b	2f
29451:	ld	r10, VCPU_XIVE_ESC_RADDR(r9)
2946	ldcix	r0, r10, r6
29472:	sync
2948#endif /* CONFIG_KVM_XICS */
29493:	b	guest_exit_cont
2950
2951	/* Try to do machine check recovery in real mode */
2952machine_check_realmode:
2953	mr	r3, r9		/* get vcpu pointer */
2954	bl	kvmppc_realmode_machine_check
2955	nop
2956	/* all machine checks go to virtual mode for further handling */
2957	ld	r9, HSTATE_KVM_VCPU(r13)
2958	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
2959	b	guest_exit_cont
2960
2961/*
2962 * Call C code to handle a HMI in real mode.
2963 * Only the primary thread does the call, secondary threads are handled
2964 * by calling hmi_exception_realmode() after kvmppc_hv_entry returns.
2965 * r9 points to the vcpu on entry
2966 */
2967hmi_realmode:
2968	lbz	r0, HSTATE_PTID(r13)
2969	cmpwi	r0, 0
2970	bne	guest_exit_cont
2971	bl	kvmppc_realmode_hmi_handler
2972	ld	r9, HSTATE_KVM_VCPU(r13)
2973	li	r12, BOOK3S_INTERRUPT_HMI
2974	b	guest_exit_cont
2975
2976/*
2977 * Check the reason we woke from nap, and take appropriate action.
2978 * Returns (in r3):
2979 *	0 if nothing needs to be done
2980 *	1 if something happened that needs to be handled by the host
2981 *	-1 if there was a guest wakeup (IPI or msgsnd)
2982 *	-2 if we handled a PCI passthrough interrupt (returned by
2983 *		kvmppc_read_intr only)
2984 *
2985 * Also sets r12 to the interrupt vector for any interrupt that needs
2986 * to be handled now by the host (0x500 for external interrupt), or zero.
2987 * Modifies all volatile registers (since it may call a C function).
2988 * This routine calls kvmppc_read_intr, a C function, if an external
2989 * interrupt is pending.
2990 */
2991kvmppc_check_wake_reason:
2992	mfspr	r6, SPRN_SRR1
2993BEGIN_FTR_SECTION
2994	rlwinm	r6, r6, 45-31, 0xf	/* extract wake reason field (P8) */
2995FTR_SECTION_ELSE
2996	rlwinm	r6, r6, 45-31, 0xe	/* P7 wake reason field is 3 bits */
2997ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
2998	cmpwi	r6, 8			/* was it an external interrupt? */
2999	beq	7f			/* if so, see what it was */
3000	li	r3, 0
3001	li	r12, 0
3002	cmpwi	r6, 6			/* was it the decrementer? */
3003	beq	0f
3004BEGIN_FTR_SECTION
3005	cmpwi	r6, 5			/* privileged doorbell? */
3006	beq	0f
3007	cmpwi	r6, 3			/* hypervisor doorbell? */
3008	beq	3f
3009END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3010	cmpwi	r6, 0xa			/* Hypervisor maintenance ? */
3011	beq	4f
3012	li	r3, 1			/* anything else, return 1 */
30130:	blr
3014
3015	/* hypervisor doorbell */
30163:	li	r12, BOOK3S_INTERRUPT_H_DOORBELL
3017
3018	/*
3019	 * Clear the doorbell as we will invoke the handler
3020	 * explicitly in the guest exit path.
3021	 */
3022	lis	r6, (PPC_DBELL_SERVER << (63-36))@h
3023	PPC_MSGCLR(6)
3024	/* see if it's a host IPI */
3025	li	r3, 1
3026BEGIN_FTR_SECTION
3027	PPC_MSGSYNC
3028	lwsync
3029END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
3030	lbz	r0, HSTATE_HOST_IPI(r13)
3031	cmpwi	r0, 0
3032	bnelr
3033	/* if not, return -1 */
3034	li	r3, -1
3035	blr
3036
3037	/* Woken up due to Hypervisor maintenance interrupt */
30384:	li	r12, BOOK3S_INTERRUPT_HMI
3039	li	r3, 1
3040	blr
3041
3042	/* external interrupt - create a stack frame so we can call C */
30437:	mflr	r0
3044	std	r0, PPC_LR_STKOFF(r1)
3045	stdu	r1, -PPC_MIN_STKFRM(r1)
3046	bl	kvmppc_read_intr
3047	nop
3048	li	r12, BOOK3S_INTERRUPT_EXTERNAL
3049	cmpdi	r3, 1
3050	ble	1f
3051
3052	/*
3053	 * Return code of 2 means PCI passthrough interrupt, but
3054	 * we need to return back to host to complete handling the
3055	 * interrupt. Trap reason is expected in r12 by guest
3056	 * exit code.
3057	 */
3058	li	r12, BOOK3S_INTERRUPT_HV_RM_HARD
30591:
3060	ld	r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
3061	addi	r1, r1, PPC_MIN_STKFRM
3062	mtlr	r0
3063	blr
3064
3065/*
3066 * Save away FP, VMX and VSX registers.
3067 * r3 = vcpu pointer
3068 * N.B. r30 and r31 are volatile across this function,
3069 * thus it is not callable from C.
3070 */
3071kvmppc_save_fp:
3072	mflr	r30
3073	mr	r31,r3
3074	mfmsr	r5
3075	ori	r8,r5,MSR_FP
3076#ifdef CONFIG_ALTIVEC
3077BEGIN_FTR_SECTION
3078	oris	r8,r8,MSR_VEC@h
3079END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3080#endif
3081#ifdef CONFIG_VSX
3082BEGIN_FTR_SECTION
3083	oris	r8,r8,MSR_VSX@h
3084END_FTR_SECTION_IFSET(CPU_FTR_VSX)
3085#endif
3086	mtmsrd	r8
3087	addi	r3,r3,VCPU_FPRS
3088	bl	store_fp_state
3089#ifdef CONFIG_ALTIVEC
3090BEGIN_FTR_SECTION
3091	addi	r3,r31,VCPU_VRS
3092	bl	store_vr_state
3093END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3094#endif
3095	mfspr	r6,SPRN_VRSAVE
3096	stw	r6,VCPU_VRSAVE(r31)
3097	mtlr	r30
3098	blr
3099
3100/*
3101 * Load up FP, VMX and VSX registers
3102 * r4 = vcpu pointer
3103 * N.B. r30 and r31 are volatile across this function,
3104 * thus it is not callable from C.
3105 */
3106kvmppc_load_fp:
3107	mflr	r30
3108	mr	r31,r4
3109	mfmsr	r9
3110	ori	r8,r9,MSR_FP
3111#ifdef CONFIG_ALTIVEC
3112BEGIN_FTR_SECTION
3113	oris	r8,r8,MSR_VEC@h
3114END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3115#endif
3116#ifdef CONFIG_VSX
3117BEGIN_FTR_SECTION
3118	oris	r8,r8,MSR_VSX@h
3119END_FTR_SECTION_IFSET(CPU_FTR_VSX)
3120#endif
3121	mtmsrd	r8
3122	addi	r3,r4,VCPU_FPRS
3123	bl	load_fp_state
3124#ifdef CONFIG_ALTIVEC
3125BEGIN_FTR_SECTION
3126	addi	r3,r31,VCPU_VRS
3127	bl	load_vr_state
3128END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
3129#endif
3130	lwz	r7,VCPU_VRSAVE(r31)
3131	mtspr	SPRN_VRSAVE,r7
3132	mtlr	r30
3133	mr	r4,r31
3134	blr
3135
3136#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
3137/*
3138 * Save transactional state and TM-related registers.
3139 * Called with r3 pointing to the vcpu struct and r4 containing
3140 * the guest MSR value.
3141 * r5 is non-zero iff non-volatile register state needs to be maintained.
3142 * If r5 == 0, this can modify all checkpointed registers, but
3143 * restores r1 and r2 before exit.
3144 */
3145_GLOBAL_TOC(kvmppc_save_tm_hv)
3146EXPORT_SYMBOL_GPL(kvmppc_save_tm_hv)
3147	/* See if we need to handle fake suspend mode */
3148BEGIN_FTR_SECTION
3149	b	__kvmppc_save_tm
3150END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
3151
3152	lbz	r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
3153	cmpwi	r0, 0
3154	beq	__kvmppc_save_tm
3155
3156	/* The following code handles the fake_suspend = 1 case */
3157	mflr	r0
3158	std	r0, PPC_LR_STKOFF(r1)
3159	stdu	r1, -PPC_MIN_STKFRM(r1)
3160
3161	/* Turn on TM. */
3162	mfmsr	r8
3163	li	r0, 1
3164	rldimi	r8, r0, MSR_TM_LG, 63-MSR_TM_LG
3165	mtmsrd	r8
3166
3167	rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
3168	beq	4f
3169BEGIN_FTR_SECTION
3170	bl	pnv_power9_force_smt4_catch
3171END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
3172	nop
3173
3174	/* We have to treclaim here because that's the only way to do S->N */
3175	li	r3, TM_CAUSE_KVM_RESCHED
3176	TRECLAIM(R3)
3177
3178	/*
3179	 * We were in fake suspend, so we are not going to save the
3180	 * register state as the guest checkpointed state (since
3181	 * we already have it), therefore we can now use any volatile GPR.
3182	 * In fact treclaim in fake suspend state doesn't modify
3183	 * any registers.
3184	 */
3185
3186BEGIN_FTR_SECTION
3187	bl	pnv_power9_force_smt4_release
3188END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
3189	nop
3190
31914:
3192	mfspr	r3, SPRN_PSSCR
3193	/* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */
3194	li	r0, PSSCR_FAKE_SUSPEND
3195	andc	r3, r3, r0
3196	mtspr	SPRN_PSSCR, r3
3197
3198	/* Don't save TEXASR, use value from last exit in real suspend state */
3199	ld	r9, HSTATE_KVM_VCPU(r13)
3200	mfspr	r5, SPRN_TFHAR
3201	mfspr	r6, SPRN_TFIAR
3202	std	r5, VCPU_TFHAR(r9)
3203	std	r6, VCPU_TFIAR(r9)
3204
3205	addi	r1, r1, PPC_MIN_STKFRM
3206	ld	r0, PPC_LR_STKOFF(r1)
3207	mtlr	r0
3208	blr
3209
3210/*
3211 * Restore transactional state and TM-related registers.
3212 * Called with r3 pointing to the vcpu struct
3213 * and r4 containing the guest MSR value.
3214 * r5 is non-zero iff non-volatile register state needs to be maintained.
3215 * This potentially modifies all checkpointed registers.
3216 * It restores r1 and r2 from the PACA.
3217 */
3218_GLOBAL_TOC(kvmppc_restore_tm_hv)
3219EXPORT_SYMBOL_GPL(kvmppc_restore_tm_hv)
3220	/*
3221	 * If we are doing TM emulation for the guest on a POWER9 DD2,
3222	 * then we don't actually do a trechkpt -- we either set up
3223	 * fake-suspend mode, or emulate a TM rollback.
3224	 */
3225BEGIN_FTR_SECTION
3226	b	__kvmppc_restore_tm
3227END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
3228	mflr	r0
3229	std	r0, PPC_LR_STKOFF(r1)
3230
3231	li	r0, 0
3232	stb	r0, HSTATE_FAKE_SUSPEND(r13)
3233
3234	/* Turn on TM so we can restore TM SPRs */
3235	mfmsr	r5
3236	li	r0, 1
3237	rldimi	r5, r0, MSR_TM_LG, 63-MSR_TM_LG
3238	mtmsrd	r5
3239
3240	/*
3241	 * The user may change these outside of a transaction, so they must
3242	 * always be context switched.
3243	 */
3244	ld	r5, VCPU_TFHAR(r3)
3245	ld	r6, VCPU_TFIAR(r3)
3246	ld	r7, VCPU_TEXASR(r3)
3247	mtspr	SPRN_TFHAR, r5
3248	mtspr	SPRN_TFIAR, r6
3249	mtspr	SPRN_TEXASR, r7
3250
3251	rldicl. r5, r4, 64 - MSR_TS_S_LG, 62
3252	beqlr		/* TM not active in guest */
3253
3254	/* Make sure the failure summary is set */
3255	oris	r7, r7, (TEXASR_FS)@h
3256	mtspr	SPRN_TEXASR, r7
3257
3258	cmpwi	r5, 1		/* check for suspended state */
3259	bgt	10f
3260	stb	r5, HSTATE_FAKE_SUSPEND(r13)
3261	b	9f		/* and return */
326210:	stdu	r1, -PPC_MIN_STKFRM(r1)
3263	/* guest is in transactional state, so simulate rollback */
3264	bl	kvmhv_emulate_tm_rollback
3265	nop
3266	addi	r1, r1, PPC_MIN_STKFRM
32679:	ld	r0, PPC_LR_STKOFF(r1)
3268	mtlr	r0
3269	blr
3270#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
3271
3272/*
3273 * We come here if we get any exception or interrupt while we are
3274 * executing host real mode code while in guest MMU context.
3275 * r12 is (CR << 32) | vector
3276 * r13 points to our PACA
3277 * r12 is saved in HSTATE_SCRATCH0(r13)
3278 * r9 is saved in HSTATE_SCRATCH2(r13)
3279 * r13 is saved in HSPRG1
3280 * cfar is saved in HSTATE_CFAR(r13)
3281 * ppr is saved in HSTATE_PPR(r13)
3282 */
3283kvmppc_bad_host_intr:
3284	/*
3285	 * Switch to the emergency stack, but start half-way down in
3286	 * case we were already on it.
3287	 */
3288	mr	r9, r1
3289	std	r1, PACAR1(r13)
3290	ld	r1, PACAEMERGSP(r13)
3291	subi	r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE
3292	std	r9, 0(r1)
3293	std	r0, GPR0(r1)
3294	std	r9, GPR1(r1)
3295	std	r2, GPR2(r1)
3296	SAVE_4GPRS(3, r1)
3297	SAVE_2GPRS(7, r1)
3298	srdi	r0, r12, 32
3299	clrldi	r12, r12, 32
3300	std	r0, _CCR(r1)
3301	std	r12, _TRAP(r1)
3302	andi.	r0, r12, 2
3303	beq	1f
3304	mfspr	r3, SPRN_HSRR0
3305	mfspr	r4, SPRN_HSRR1
3306	mfspr	r5, SPRN_HDAR
3307	mfspr	r6, SPRN_HDSISR
3308	b	2f
33091:	mfspr	r3, SPRN_SRR0
3310	mfspr	r4, SPRN_SRR1
3311	mfspr	r5, SPRN_DAR
3312	mfspr	r6, SPRN_DSISR
33132:	std	r3, _NIP(r1)
3314	std	r4, _MSR(r1)
3315	std	r5, _DAR(r1)
3316	std	r6, _DSISR(r1)
3317	ld	r9, HSTATE_SCRATCH2(r13)
3318	ld	r12, HSTATE_SCRATCH0(r13)
3319	GET_SCRATCH0(r0)
3320	SAVE_4GPRS(9, r1)
3321	std	r0, GPR13(r1)
3322	SAVE_NVGPRS(r1)
3323	ld	r5, HSTATE_CFAR(r13)
3324	std	r5, ORIG_GPR3(r1)
3325	mflr	r3
3326	mfctr	r4
3327	mfxer	r5
3328	lbz	r6, PACAIRQSOFTMASK(r13)
3329	std	r3, _LINK(r1)
3330	std	r4, _CTR(r1)
3331	std	r5, _XER(r1)
3332	std	r6, SOFTE(r1)
3333	ld	r2, PACATOC(r13)
3334	LOAD_REG_IMMEDIATE(3, 0x7265677368657265)
3335	std	r3, STACK_FRAME_OVERHEAD-16(r1)
3336
3337	/*
3338	 * On POWER9 do a minimal restore of the MMU and call C code,
3339	 * which will print a message and panic.
3340	 * XXX On POWER7 and POWER8, we just spin here since we don't
3341	 * know what the other threads are doing (and we don't want to
3342	 * coordinate with them) - but at least we now have register state
3343	 * in memory that we might be able to look at from another CPU.
3344	 */
3345BEGIN_FTR_SECTION
3346	b	.
3347END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
3348	ld	r9, HSTATE_KVM_VCPU(r13)
3349	ld	r10, VCPU_KVM(r9)
3350
3351	li	r0, 0
3352	mtspr	SPRN_AMR, r0
3353	mtspr	SPRN_IAMR, r0
3354	mtspr	SPRN_CIABR, r0
3355	mtspr	SPRN_DAWRX0, r0
3356BEGIN_FTR_SECTION
3357	mtspr	SPRN_DAWRX1, r0
3358END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
3359
3360	/* Clear hash and radix guest SLB, see guest_exit_short_path comment. */
3361	slbmte	r0, r0
3362	PPC_SLBIA(6)
3363
3364BEGIN_MMU_FTR_SECTION
3365	b	4f
3366END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
3367
3368	ptesync
3369	ld	r8, PACA_SLBSHADOWPTR(r13)
3370	.rept	SLB_NUM_BOLTED
3371	li	r3, SLBSHADOW_SAVEAREA
3372	LDX_BE	r5, r8, r3
3373	addi	r3, r3, 8
3374	LDX_BE	r6, r8, r3
3375	andis.	r7, r5, SLB_ESID_V@h
3376	beq	3f
3377	slbmte	r6, r5
33783:	addi	r8, r8, 16
3379	.endr
3380
33814:	lwz	r7, KVM_HOST_LPID(r10)
3382	mtspr	SPRN_LPID, r7
3383	mtspr	SPRN_PID, r0
3384	ld	r8, KVM_HOST_LPCR(r10)
3385	mtspr	SPRN_LPCR, r8
3386	isync
3387	li	r0, KVM_GUEST_MODE_NONE
3388	stb	r0, HSTATE_IN_GUEST(r13)
3389
3390	/*
3391	 * Turn on the MMU and jump to C code
3392	 */
3393	bcl	20, 31, .+4
33945:	mflr	r3
3395	addi	r3, r3, 9f - 5b
3396	li	r4, -1
3397	rldimi	r3, r4, 62, 0	/* ensure 0xc000000000000000 bits are set */
3398	ld	r4, PACAKMSR(r13)
3399	mtspr	SPRN_SRR0, r3
3400	mtspr	SPRN_SRR1, r4
3401	RFI_TO_KERNEL
34029:	addi	r3, r1, STACK_FRAME_OVERHEAD
3403	bl	kvmppc_bad_interrupt
3404	b	9b
3405
3406/*
3407 * This mimics the MSR transition on IRQ delivery.  The new guest MSR is taken
3408 * from VCPU_INTR_MSR and is modified based on the required TM state changes.
3409 *   r11 has the guest MSR value (in/out)
3410 *   r9 has a vcpu pointer (in)
3411 *   r0 is used as a scratch register
3412 */
3413kvmppc_msr_interrupt:
3414	rldicl	r0, r11, 64 - MSR_TS_S_LG, 62
3415	cmpwi	r0, 2 /* Check if we are in transactional state..  */
3416	ld	r11, VCPU_INTR_MSR(r9)
3417	bne	1f
3418	/* ... if transactional, change to suspended */
3419	li	r0, 1
34201:	rldimi	r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
3421	blr
3422
3423/*
3424 * Load up guest PMU state.  R3 points to the vcpu struct.
3425 */
3426_GLOBAL(kvmhv_load_guest_pmu)
3427EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
3428	mr	r4, r3
3429	mflr	r0
3430	li	r3, 1
3431	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
3432	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
3433	isync
3434BEGIN_FTR_SECTION
3435	ld	r3, VCPU_MMCR(r4)
3436	andi.	r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
3437	cmpwi	r5, MMCR0_PMAO
3438	beql	kvmppc_fix_pmao
3439END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
3440	lwz	r3, VCPU_PMC(r4)	/* always load up guest PMU registers */
3441	lwz	r5, VCPU_PMC + 4(r4)	/* to prevent information leak */
3442	lwz	r6, VCPU_PMC + 8(r4)
3443	lwz	r7, VCPU_PMC + 12(r4)
3444	lwz	r8, VCPU_PMC + 16(r4)
3445	lwz	r9, VCPU_PMC + 20(r4)
3446	mtspr	SPRN_PMC1, r3
3447	mtspr	SPRN_PMC2, r5
3448	mtspr	SPRN_PMC3, r6
3449	mtspr	SPRN_PMC4, r7
3450	mtspr	SPRN_PMC5, r8
3451	mtspr	SPRN_PMC6, r9
3452	ld	r3, VCPU_MMCR(r4)
3453	ld	r5, VCPU_MMCR + 8(r4)
3454	ld	r6, VCPU_MMCRA(r4)
3455	ld	r7, VCPU_SIAR(r4)
3456	ld	r8, VCPU_SDAR(r4)
3457	mtspr	SPRN_MMCR1, r5
3458	mtspr	SPRN_MMCRA, r6
3459	mtspr	SPRN_SIAR, r7
3460	mtspr	SPRN_SDAR, r8
3461BEGIN_FTR_SECTION
3462	ld      r5, VCPU_MMCR + 24(r4)
3463	ld      r6, VCPU_SIER + 8(r4)
3464	ld      r7, VCPU_SIER + 16(r4)
3465	mtspr   SPRN_MMCR3, r5
3466	mtspr   SPRN_SIER2, r6
3467	mtspr   SPRN_SIER3, r7
3468END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
3469BEGIN_FTR_SECTION
3470	ld	r5, VCPU_MMCR + 16(r4)
3471	ld	r6, VCPU_SIER(r4)
3472	mtspr	SPRN_MMCR2, r5
3473	mtspr	SPRN_SIER, r6
3474BEGIN_FTR_SECTION_NESTED(96)
3475	lwz	r7, VCPU_PMC + 24(r4)
3476	lwz	r8, VCPU_PMC + 28(r4)
3477	ld	r9, VCPU_MMCRS(r4)
3478	mtspr	SPRN_SPMC1, r7
3479	mtspr	SPRN_SPMC2, r8
3480	mtspr	SPRN_MMCRS, r9
3481END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
3482END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3483	mtspr	SPRN_MMCR0, r3
3484	isync
3485	mtlr	r0
3486	blr
3487
3488/*
3489 * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
3490 */
3491_GLOBAL(kvmhv_load_host_pmu)
3492EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
3493	mflr	r0
3494	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
3495	cmpwi	r4, 0
3496	beq	23f			/* skip if not */
3497BEGIN_FTR_SECTION
3498	ld	r3, HSTATE_MMCR0(r13)
3499	andi.	r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
3500	cmpwi	r4, MMCR0_PMAO
3501	beql	kvmppc_fix_pmao
3502END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
3503	lwz	r3, HSTATE_PMC1(r13)
3504	lwz	r4, HSTATE_PMC2(r13)
3505	lwz	r5, HSTATE_PMC3(r13)
3506	lwz	r6, HSTATE_PMC4(r13)
3507	lwz	r8, HSTATE_PMC5(r13)
3508	lwz	r9, HSTATE_PMC6(r13)
3509	mtspr	SPRN_PMC1, r3
3510	mtspr	SPRN_PMC2, r4
3511	mtspr	SPRN_PMC3, r5
3512	mtspr	SPRN_PMC4, r6
3513	mtspr	SPRN_PMC5, r8
3514	mtspr	SPRN_PMC6, r9
3515	ld	r3, HSTATE_MMCR0(r13)
3516	ld	r4, HSTATE_MMCR1(r13)
3517	ld	r5, HSTATE_MMCRA(r13)
3518	ld	r6, HSTATE_SIAR(r13)
3519	ld	r7, HSTATE_SDAR(r13)
3520	mtspr	SPRN_MMCR1, r4
3521	mtspr	SPRN_MMCRA, r5
3522	mtspr	SPRN_SIAR, r6
3523	mtspr	SPRN_SDAR, r7
3524BEGIN_FTR_SECTION
3525	ld	r8, HSTATE_MMCR2(r13)
3526	ld	r9, HSTATE_SIER(r13)
3527	mtspr	SPRN_MMCR2, r8
3528	mtspr	SPRN_SIER, r9
3529END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3530BEGIN_FTR_SECTION
3531	ld      r5, HSTATE_MMCR3(r13)
3532	ld      r6, HSTATE_SIER2(r13)
3533	ld      r7, HSTATE_SIER3(r13)
3534	mtspr   SPRN_MMCR3, r5
3535	mtspr   SPRN_SIER2, r6
3536	mtspr   SPRN_SIER3, r7
3537END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
3538	mtspr	SPRN_MMCR0, r3
3539	isync
3540	mtlr	r0
354123:	blr
3542
3543/*
3544 * Save guest PMU state into the vcpu struct.
3545 * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
3546 */
3547_GLOBAL(kvmhv_save_guest_pmu)
3548EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
3549	mr	r9, r3
3550	mr	r8, r4
3551BEGIN_FTR_SECTION
3552	/*
3553	 * POWER8 seems to have a hardware bug where setting
3554	 * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
3555	 * when some counters are already negative doesn't seem
3556	 * to cause a performance monitor alert (and hence interrupt).
3557	 * The effect of this is that when saving the PMU state,
3558	 * if there is no PMU alert pending when we read MMCR0
3559	 * before freezing the counters, but one becomes pending
3560	 * before we read the counters, we lose it.
3561	 * To work around this, we need a way to freeze the counters
3562	 * before reading MMCR0.  Normally, freezing the counters
3563	 * is done by writing MMCR0 (to set MMCR0[FC]) which
3564	 * unavoidably writes MMCR0[PMA0] as well.  On POWER8,
3565	 * we can also freeze the counters using MMCR2, by writing
3566	 * 1s to all the counter freeze condition bits (there are
3567	 * 9 bits each for 6 counters).
3568	 */
3569	li	r3, -1			/* set all freeze bits */
3570	clrrdi	r3, r3, 10
3571	mfspr	r10, SPRN_MMCR2
3572	mtspr	SPRN_MMCR2, r3
3573	isync
3574END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3575	li	r3, 1
3576	sldi	r3, r3, 31		/* MMCR0_FC (freeze counters) bit */
3577	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
3578	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
3579	mfspr	r6, SPRN_MMCRA
3580	/* Clear MMCRA in order to disable SDAR updates */
3581	li	r7, 0
3582	mtspr	SPRN_MMCRA, r7
3583	isync
3584	cmpwi	r8, 0			/* did they ask for PMU stuff to be saved? */
3585	bne	21f
3586	std	r3, VCPU_MMCR(r9)	/* if not, set saved MMCR0 to FC */
3587	b	22f
358821:	mfspr	r5, SPRN_MMCR1
3589	mfspr	r7, SPRN_SIAR
3590	mfspr	r8, SPRN_SDAR
3591	std	r4, VCPU_MMCR(r9)
3592	std	r5, VCPU_MMCR + 8(r9)
3593	std	r6, VCPU_MMCRA(r9)
3594BEGIN_FTR_SECTION
3595	std	r10, VCPU_MMCR + 16(r9)
3596END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
3597BEGIN_FTR_SECTION
3598	mfspr   r5, SPRN_MMCR3
3599	mfspr   r6, SPRN_SIER2
3600	mfspr   r7, SPRN_SIER3
3601	std     r5, VCPU_MMCR + 24(r9)
3602	std     r6, VCPU_SIER + 8(r9)
3603	std     r7, VCPU_SIER + 16(r9)
3604END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
3605	std	r7, VCPU_SIAR(r9)
3606	std	r8, VCPU_SDAR(r9)
3607	mfspr	r3, SPRN_PMC1
3608	mfspr	r4, SPRN_PMC2
3609	mfspr	r5, SPRN_PMC3
3610	mfspr	r6, SPRN_PMC4
3611	mfspr	r7, SPRN_PMC5
3612	mfspr	r8, SPRN_PMC6
3613	stw	r3, VCPU_PMC(r9)
3614	stw	r4, VCPU_PMC + 4(r9)
3615	stw	r5, VCPU_PMC + 8(r9)
3616	stw	r6, VCPU_PMC + 12(r9)
3617	stw	r7, VCPU_PMC + 16(r9)
3618	stw	r8, VCPU_PMC + 20(r9)
3619BEGIN_FTR_SECTION
3620	mfspr	r5, SPRN_SIER
3621	std	r5, VCPU_SIER(r9)
3622BEGIN_FTR_SECTION_NESTED(96)
3623	mfspr	r6, SPRN_SPMC1
3624	mfspr	r7, SPRN_SPMC2
3625	mfspr	r8, SPRN_MMCRS
3626	stw	r6, VCPU_PMC + 24(r9)
3627	stw	r7, VCPU_PMC + 28(r9)
3628	std	r8, VCPU_MMCRS(r9)
3629	lis	r4, 0x8000
3630	mtspr	SPRN_MMCRS, r4
3631END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
3632END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
363322:	blr
3634
3635/*
3636 * This works around a hardware bug on POWER8E processors, where
3637 * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
3638 * performance monitor interrupt.  Instead, when we need to have
3639 * an interrupt pending, we have to arrange for a counter to overflow.
3640 */
3641kvmppc_fix_pmao:
3642	li	r3, 0
3643	mtspr	SPRN_MMCR2, r3
3644	lis	r3, (MMCR0_PMXE | MMCR0_FCECE)@h
3645	ori	r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN
3646	mtspr	SPRN_MMCR0, r3
3647	lis	r3, 0x7fff
3648	ori	r3, r3, 0xffff
3649	mtspr	SPRN_PMC6, r3
3650	isync
3651	blr
3652
3653#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
3654/*
3655 * Start timing an activity
3656 * r3 = pointer to time accumulation struct, r4 = vcpu
3657 */
3658kvmhv_start_timing:
3659	ld	r5, HSTATE_KVM_VCORE(r13)
3660	ld	r6, VCORE_TB_OFFSET_APPL(r5)
3661	mftb	r5
3662	subf	r5, r6, r5	/* subtract current timebase offset */
3663	std	r3, VCPU_CUR_ACTIVITY(r4)
3664	std	r5, VCPU_ACTIVITY_START(r4)
3665	blr
3666
3667/*
3668 * Accumulate time to one activity and start another.
3669 * r3 = pointer to new time accumulation struct, r4 = vcpu
3670 */
3671kvmhv_accumulate_time:
3672	ld	r5, HSTATE_KVM_VCORE(r13)
3673	ld	r8, VCORE_TB_OFFSET_APPL(r5)
3674	ld	r5, VCPU_CUR_ACTIVITY(r4)
3675	ld	r6, VCPU_ACTIVITY_START(r4)
3676	std	r3, VCPU_CUR_ACTIVITY(r4)
3677	mftb	r7
3678	subf	r7, r8, r7	/* subtract current timebase offset */
3679	std	r7, VCPU_ACTIVITY_START(r4)
3680	cmpdi	r5, 0
3681	beqlr
3682	subf	r3, r6, r7
3683	ld	r8, TAS_SEQCOUNT(r5)
3684	cmpdi	r8, 0
3685	addi	r8, r8, 1
3686	std	r8, TAS_SEQCOUNT(r5)
3687	lwsync
3688	ld	r7, TAS_TOTAL(r5)
3689	add	r7, r7, r3
3690	std	r7, TAS_TOTAL(r5)
3691	ld	r6, TAS_MIN(r5)
3692	ld	r7, TAS_MAX(r5)
3693	beq	3f
3694	cmpd	r3, r6
3695	bge	1f
36963:	std	r3, TAS_MIN(r5)
36971:	cmpd	r3, r7
3698	ble	2f
3699	std	r3, TAS_MAX(r5)
37002:	lwsync
3701	addi	r8, r8, 1
3702	std	r8, TAS_SEQCOUNT(r5)
3703	blr
3704#endif
3705