xref: /openbmc/linux/arch/powerpc/kernel/vector.S (revision c4f461a1)
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/linkage.h>
3#include <asm/processor.h>
4#include <asm/ppc_asm.h>
5#include <asm/reg.h>
6#include <asm/asm-offsets.h>
7#include <asm/cputable.h>
8#include <asm/thread_info.h>
9#include <asm/page.h>
10#include <asm/ptrace.h>
11#include <asm/export.h>
12#include <asm/asm-compat.h>
13
14/*
15 * Load state from memory into VMX registers including VSCR.
16 * Assumes the caller has enabled VMX in the MSR.
17 */
18_GLOBAL(load_vr_state)
19	li	r4,VRSTATE_VSCR
20	lvx	v0,r4,r3
21	mtvscr	v0
22	REST_32VRS(0,r4,r3)
23	blr
24EXPORT_SYMBOL(load_vr_state)
25_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
26
27/*
28 * Store VMX state into memory, including VSCR.
29 * Assumes the caller has enabled VMX in the MSR.
30 */
31_GLOBAL(store_vr_state)
32	SAVE_32VRS(0, r4, r3)
33	mfvscr	v0
34	li	r4, VRSTATE_VSCR
35	stvx	v0, r4, r3
36	blr
37EXPORT_SYMBOL(store_vr_state)
38
39/*
40 * Disable VMX for the task which had it previously,
41 * and save its vector registers in its thread_struct.
42 * Enables the VMX for use in the kernel on return.
43 * On SMP we know the VMX is free, since we give it up every
44 * switch (ie, no lazy save of the vector registers).
45 *
46 * Note that on 32-bit this can only use registers that will be
47 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
48 */
49_GLOBAL(load_up_altivec)
50	mfmsr	r5			/* grab the current MSR */
51#ifdef CONFIG_PPC_BOOK3S_64
52	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
53	ori	r5,r5,MSR_RI
54#endif
55	oris	r5,r5,MSR_VEC@h
56	MTMSRD(r5)			/* enable use of AltiVec now */
57	isync
58
59	/*
60	 * While userspace in general ignores VRSAVE, glibc uses it as a boolean
61	 * to optimise userspace context save/restore. Whenever we take an
62	 * altivec unavailable exception we must set VRSAVE to something non
63	 * zero. Set it to all 1s. See also the programming note in the ISA.
64	 */
65	mfspr	r4,SPRN_VRSAVE
66	cmpwi	0,r4,0
67	bne+	1f
68	li	r4,-1
69	mtspr	SPRN_VRSAVE,r4
701:
71	/* enable use of VMX after return */
72#ifdef CONFIG_PPC32
73	addi	r5,r2,THREAD
74	oris	r9,r9,MSR_VEC@h
75#else
76	ld	r4,PACACURRENT(r13)
77	addi	r5,r4,THREAD		/* Get THREAD */
78	oris	r12,r12,MSR_VEC@h
79	std	r12,_MSR(r1)
80#ifdef CONFIG_PPC_BOOK3S_64
81	li	r4,0
82	stb	r4,PACASRR_VALID(r13)
83#endif
84#endif
85	li	r4,1
86	stb	r4,THREAD_LOAD_VEC(r5)
87	addi	r6,r5,THREAD_VRSTATE
88	li	r10,VRSTATE_VSCR
89	stw	r4,THREAD_USED_VR(r5)
90	lvx	v0,r10,r6
91	mtvscr	v0
92	REST_32VRS(0,r4,r6)
93	/* restore registers and return */
94	blr
95_ASM_NOKPROBE_SYMBOL(load_up_altivec)
96
97/*
98 * save_altivec(tsk)
99 * Save the vector registers to its thread_struct
100 */
101_GLOBAL(save_altivec)
102	addi	r3,r3,THREAD		/* want THREAD of task */
103	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
104	PPC_LL	r5,PT_REGS(r3)
105	PPC_LCMPI	0,r7,0
106	bne	2f
107	addi	r7,r3,THREAD_VRSTATE
1082:	SAVE_32VRS(0,r4,r7)
109	mfvscr	v0
110	li	r4,VRSTATE_VSCR
111	stvx	v0,r4,r7
112	blr
113
114#ifdef CONFIG_VSX
115
116#ifdef CONFIG_PPC32
117#error This asm code isn't ready for 32-bit kernels
118#endif
119
120/*
121 * load_up_vsx(unused, unused, tsk)
122 * Disable VSX for the task which had it previously,
123 * and save its vector registers in its thread_struct.
124 * Reuse the fp and vsx saves, but first check to see if they have
125 * been saved already.
126 */
127_GLOBAL(load_up_vsx)
128/* Load FP and VSX registers if they haven't been done yet */
129	andi.	r5,r12,MSR_FP
130	beql+	load_up_fpu		/* skip if already loaded */
131	andis.	r5,r12,MSR_VEC@h
132	beql+	load_up_altivec		/* skip if already loaded */
133
134#ifdef CONFIG_PPC_BOOK3S_64
135	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */
136	li	r5,MSR_RI
137	mtmsrd	r5,1
138#endif
139
140	ld	r4,PACACURRENT(r13)
141	addi	r4,r4,THREAD		/* Get THREAD */
142	li	r6,1
143	stw	r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
144	/* enable use of VSX after return */
145	oris	r12,r12,MSR_VSX@h
146	std	r12,_MSR(r1)
147	li	r4,0
148	stb	r4,PACASRR_VALID(r13)
149	b	fast_interrupt_return_srr
150
151#endif /* CONFIG_VSX */
152
153
154/*
155 * The routines below are in assembler so we can closely control the
156 * usage of floating-point registers.  These routines must be called
157 * with preempt disabled.
158 */
159	.data
160#ifdef CONFIG_PPC32
161fpzero:
162	.long	0
163fpone:
164	.long	0x3f800000	/* 1.0 in single-precision FP */
165fphalf:
166	.long	0x3f000000	/* 0.5 in single-precision FP */
167
168#define LDCONST(fr, name)	\
169	lis	r11,name@ha;	\
170	lfs	fr,name@l(r11)
171#else
172
173fpzero:
174	.quad	0
175fpone:
176	.quad	0x3ff0000000000000	/* 1.0 */
177fphalf:
178	.quad	0x3fe0000000000000	/* 0.5 */
179
180#ifdef CONFIG_PPC_KERNEL_PCREL
181#define LDCONST(fr, name)		\
182	pla	r11,name@pcrel;		\
183	lfd	fr,0(r11)
184#else
185#define LDCONST(fr, name)		\
186	addis	r11,r2,name@toc@ha;	\
187	lfd	fr,name@toc@l(r11)
188#endif
189#endif
190	.text
191/*
192 * Internal routine to enable floating point and set FPSCR to 0.
193 * Don't call it from C; it doesn't use the normal calling convention.
194 */
195SYM_FUNC_START_LOCAL(fpenable)
196#ifdef CONFIG_PPC32
197	stwu	r1,-64(r1)
198#else
199	stdu	r1,-64(r1)
200#endif
201	mfmsr	r10
202	ori	r11,r10,MSR_FP
203	mtmsr	r11
204	isync
205	stfd	fr0,24(r1)
206	stfd	fr1,16(r1)
207	stfd	fr31,8(r1)
208	LDCONST(fr1, fpzero)
209	mffs	fr31
210	MTFSF_L(fr1)
211	blr
212SYM_FUNC_END(fpenable)
213
214fpdisable:
215	mtlr	r12
216	MTFSF_L(fr31)
217	lfd	fr31,8(r1)
218	lfd	fr1,16(r1)
219	lfd	fr0,24(r1)
220	mtmsr	r10
221	isync
222	addi	r1,r1,64
223	blr
224
225/*
226 * Vector add, floating point.
227 */
228_GLOBAL(vaddfp)
229	mflr	r12
230	bl	fpenable
231	li	r0,4
232	mtctr	r0
233	li	r6,0
2341:	lfsx	fr0,r4,r6
235	lfsx	fr1,r5,r6
236	fadds	fr0,fr0,fr1
237	stfsx	fr0,r3,r6
238	addi	r6,r6,4
239	bdnz	1b
240	b	fpdisable
241
242/*
243 * Vector subtract, floating point.
244 */
245_GLOBAL(vsubfp)
246	mflr	r12
247	bl	fpenable
248	li	r0,4
249	mtctr	r0
250	li	r6,0
2511:	lfsx	fr0,r4,r6
252	lfsx	fr1,r5,r6
253	fsubs	fr0,fr0,fr1
254	stfsx	fr0,r3,r6
255	addi	r6,r6,4
256	bdnz	1b
257	b	fpdisable
258
259/*
260 * Vector multiply and add, floating point.
261 */
262_GLOBAL(vmaddfp)
263	mflr	r12
264	bl	fpenable
265	stfd	fr2,32(r1)
266	li	r0,4
267	mtctr	r0
268	li	r7,0
2691:	lfsx	fr0,r4,r7
270	lfsx	fr1,r5,r7
271	lfsx	fr2,r6,r7
272	fmadds	fr0,fr0,fr2,fr1
273	stfsx	fr0,r3,r7
274	addi	r7,r7,4
275	bdnz	1b
276	lfd	fr2,32(r1)
277	b	fpdisable
278
279/*
280 * Vector negative multiply and subtract, floating point.
281 */
282_GLOBAL(vnmsubfp)
283	mflr	r12
284	bl	fpenable
285	stfd	fr2,32(r1)
286	li	r0,4
287	mtctr	r0
288	li	r7,0
2891:	lfsx	fr0,r4,r7
290	lfsx	fr1,r5,r7
291	lfsx	fr2,r6,r7
292	fnmsubs	fr0,fr0,fr2,fr1
293	stfsx	fr0,r3,r7
294	addi	r7,r7,4
295	bdnz	1b
296	lfd	fr2,32(r1)
297	b	fpdisable
298
299/*
300 * Vector reciprocal estimate.  We just compute 1.0/x.
301 * r3 -> destination, r4 -> source.
302 */
303_GLOBAL(vrefp)
304	mflr	r12
305	bl	fpenable
306	li	r0,4
307	LDCONST(fr1, fpone)
308	mtctr	r0
309	li	r6,0
3101:	lfsx	fr0,r4,r6
311	fdivs	fr0,fr1,fr0
312	stfsx	fr0,r3,r6
313	addi	r6,r6,4
314	bdnz	1b
315	b	fpdisable
316
317/*
318 * Vector reciprocal square-root estimate, floating point.
319 * We use the frsqrte instruction for the initial estimate followed
320 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
321 * r3 -> destination, r4 -> source.
322 */
323_GLOBAL(vrsqrtefp)
324	mflr	r12
325	bl	fpenable
326	stfd	fr2,32(r1)
327	stfd	fr3,40(r1)
328	stfd	fr4,48(r1)
329	stfd	fr5,56(r1)
330	li	r0,4
331	LDCONST(fr4, fpone)
332	LDCONST(fr5, fphalf)
333	mtctr	r0
334	li	r6,0
3351:	lfsx	fr0,r4,r6
336	frsqrte	fr1,fr0		/* r = frsqrte(s) */
337	fmuls	fr3,fr1,fr0	/* r * s */
338	fmuls	fr2,fr1,fr5	/* r * 0.5 */
339	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
340	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
341	fmuls	fr3,fr1,fr0	/* r * s */
342	fmuls	fr2,fr1,fr5	/* r * 0.5 */
343	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
344	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
345	stfsx	fr1,r3,r6
346	addi	r6,r6,4
347	bdnz	1b
348	lfd	fr5,56(r1)
349	lfd	fr4,48(r1)
350	lfd	fr3,40(r1)
351	lfd	fr2,32(r1)
352	b	fpdisable
353