xref: /openbmc/linux/arch/powerpc/kernel/vector.S (revision 002dff36)
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <asm/processor.h>
3#include <asm/ppc_asm.h>
4#include <asm/reg.h>
5#include <asm/asm-offsets.h>
6#include <asm/cputable.h>
7#include <asm/thread_info.h>
8#include <asm/page.h>
9#include <asm/ptrace.h>
10#include <asm/export.h>
11#include <asm/asm-compat.h>
12
13/*
14 * Load state from memory into VMX registers including VSCR.
15 * Assumes the caller has enabled VMX in the MSR.
16 */
17_GLOBAL(load_vr_state)
18	li	r4,VRSTATE_VSCR
19	lvx	v0,r4,r3
20	mtvscr	v0
21	REST_32VRS(0,r4,r3)
22	blr
23EXPORT_SYMBOL(load_vr_state)
24_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
25
26/*
27 * Store VMX state into memory, including VSCR.
28 * Assumes the caller has enabled VMX in the MSR.
29 */
30_GLOBAL(store_vr_state)
31	SAVE_32VRS(0, r4, r3)
32	mfvscr	v0
33	li	r4, VRSTATE_VSCR
34	stvx	v0, r4, r3
35	blr
36EXPORT_SYMBOL(store_vr_state)
37
38/*
39 * Disable VMX for the task which had it previously,
40 * and save its vector registers in its thread_struct.
41 * Enables the VMX for use in the kernel on return.
42 * On SMP we know the VMX is free, since we give it up every
43 * switch (ie, no lazy save of the vector registers).
44 *
45 * Note that on 32-bit this can only use registers that will be
46 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
47 */
48_GLOBAL(load_up_altivec)
49	mfmsr	r5			/* grab the current MSR */
50	oris	r5,r5,MSR_VEC@h
51	MTMSRD(r5)			/* enable use of AltiVec now */
52	isync
53
54	/*
55	 * While userspace in general ignores VRSAVE, glibc uses it as a boolean
56	 * to optimise userspace context save/restore. Whenever we take an
57	 * altivec unavailable exception we must set VRSAVE to something non
58	 * zero. Set it to all 1s. See also the programming note in the ISA.
59	 */
60	mfspr	r4,SPRN_VRSAVE
61	cmpwi	0,r4,0
62	bne+	1f
63	li	r4,-1
64	mtspr	SPRN_VRSAVE,r4
651:
66	/* enable use of VMX after return */
67#ifdef CONFIG_PPC32
68	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */
69	oris	r9,r9,MSR_VEC@h
70#ifdef CONFIG_VMAP_STACK
71	tovirt(r5, r5)
72#endif
73#else
74	ld	r4,PACACURRENT(r13)
75	addi	r5,r4,THREAD		/* Get THREAD */
76	oris	r12,r12,MSR_VEC@h
77	std	r12,_MSR(r1)
78#endif
79	/* Don't care if r4 overflows, this is desired behaviour */
80	lbz	r4,THREAD_LOAD_VEC(r5)
81	addi	r4,r4,1
82	stb	r4,THREAD_LOAD_VEC(r5)
83	addi	r6,r5,THREAD_VRSTATE
84	li	r4,1
85	li	r10,VRSTATE_VSCR
86	stw	r4,THREAD_USED_VR(r5)
87	lvx	v0,r10,r6
88	mtvscr	v0
89	REST_32VRS(0,r4,r6)
90	/* restore registers and return */
91	blr
92_ASM_NOKPROBE_SYMBOL(load_up_altivec)
93
94/*
95 * save_altivec(tsk)
96 * Save the vector registers to its thread_struct
97 */
98_GLOBAL(save_altivec)
99	addi	r3,r3,THREAD		/* want THREAD of task */
100	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
101	PPC_LL	r5,PT_REGS(r3)
102	PPC_LCMPI	0,r7,0
103	bne	2f
104	addi	r7,r3,THREAD_VRSTATE
1052:	SAVE_32VRS(0,r4,r7)
106	mfvscr	v0
107	li	r4,VRSTATE_VSCR
108	stvx	v0,r4,r7
109	blr
110
111#ifdef CONFIG_VSX
112
113#ifdef CONFIG_PPC32
114#error This asm code isn't ready for 32-bit kernels
115#endif
116
117/*
118 * load_up_vsx(unused, unused, tsk)
119 * Disable VSX for the task which had it previously,
120 * and save its vector registers in its thread_struct.
121 * Reuse the fp and vsx saves, but first check to see if they have
122 * been saved already.
123 */
124_GLOBAL(load_up_vsx)
125/* Load FP and VSX registers if they haven't been done yet */
126	andi.	r5,r12,MSR_FP
127	beql+	load_up_fpu		/* skip if already loaded */
128	andis.	r5,r12,MSR_VEC@h
129	beql+	load_up_altivec		/* skip if already loaded */
130
131	ld	r4,PACACURRENT(r13)
132	addi	r4,r4,THREAD		/* Get THREAD */
133	li	r6,1
134	stw	r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
135	/* enable use of VSX after return */
136	oris	r12,r12,MSR_VSX@h
137	std	r12,_MSR(r1)
138	b	fast_interrupt_return
139
140#endif /* CONFIG_VSX */
141
142
143/*
144 * The routines below are in assembler so we can closely control the
145 * usage of floating-point registers.  These routines must be called
146 * with preempt disabled.
147 */
148#ifdef CONFIG_PPC32
149	.data
150fpzero:
151	.long	0
152fpone:
153	.long	0x3f800000	/* 1.0 in single-precision FP */
154fphalf:
155	.long	0x3f000000	/* 0.5 in single-precision FP */
156
157#define LDCONST(fr, name)	\
158	lis	r11,name@ha;	\
159	lfs	fr,name@l(r11)
160#else
161
162	.section ".toc","aw"
163fpzero:
164	.tc	FD_0_0[TC],0
165fpone:
166	.tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */
167fphalf:
168	.tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */
169
170#define LDCONST(fr, name)	\
171	lfd	fr,name@toc(r2)
172#endif
173
174	.text
175/*
176 * Internal routine to enable floating point and set FPSCR to 0.
177 * Don't call it from C; it doesn't use the normal calling convention.
178 */
179fpenable:
180#ifdef CONFIG_PPC32
181	stwu	r1,-64(r1)
182#else
183	stdu	r1,-64(r1)
184#endif
185	mfmsr	r10
186	ori	r11,r10,MSR_FP
187	mtmsr	r11
188	isync
189	stfd	fr0,24(r1)
190	stfd	fr1,16(r1)
191	stfd	fr31,8(r1)
192	LDCONST(fr1, fpzero)
193	mffs	fr31
194	MTFSF_L(fr1)
195	blr
196
197fpdisable:
198	mtlr	r12
199	MTFSF_L(fr31)
200	lfd	fr31,8(r1)
201	lfd	fr1,16(r1)
202	lfd	fr0,24(r1)
203	mtmsr	r10
204	isync
205	addi	r1,r1,64
206	blr
207
208/*
209 * Vector add, floating point.
210 */
211_GLOBAL(vaddfp)
212	mflr	r12
213	bl	fpenable
214	li	r0,4
215	mtctr	r0
216	li	r6,0
2171:	lfsx	fr0,r4,r6
218	lfsx	fr1,r5,r6
219	fadds	fr0,fr0,fr1
220	stfsx	fr0,r3,r6
221	addi	r6,r6,4
222	bdnz	1b
223	b	fpdisable
224
225/*
226 * Vector subtract, floating point.
227 */
228_GLOBAL(vsubfp)
229	mflr	r12
230	bl	fpenable
231	li	r0,4
232	mtctr	r0
233	li	r6,0
2341:	lfsx	fr0,r4,r6
235	lfsx	fr1,r5,r6
236	fsubs	fr0,fr0,fr1
237	stfsx	fr0,r3,r6
238	addi	r6,r6,4
239	bdnz	1b
240	b	fpdisable
241
242/*
243 * Vector multiply and add, floating point.
244 */
245_GLOBAL(vmaddfp)
246	mflr	r12
247	bl	fpenable
248	stfd	fr2,32(r1)
249	li	r0,4
250	mtctr	r0
251	li	r7,0
2521:	lfsx	fr0,r4,r7
253	lfsx	fr1,r5,r7
254	lfsx	fr2,r6,r7
255	fmadds	fr0,fr0,fr2,fr1
256	stfsx	fr0,r3,r7
257	addi	r7,r7,4
258	bdnz	1b
259	lfd	fr2,32(r1)
260	b	fpdisable
261
262/*
263 * Vector negative multiply and subtract, floating point.
264 */
265_GLOBAL(vnmsubfp)
266	mflr	r12
267	bl	fpenable
268	stfd	fr2,32(r1)
269	li	r0,4
270	mtctr	r0
271	li	r7,0
2721:	lfsx	fr0,r4,r7
273	lfsx	fr1,r5,r7
274	lfsx	fr2,r6,r7
275	fnmsubs	fr0,fr0,fr2,fr1
276	stfsx	fr0,r3,r7
277	addi	r7,r7,4
278	bdnz	1b
279	lfd	fr2,32(r1)
280	b	fpdisable
281
282/*
283 * Vector reciprocal estimate.  We just compute 1.0/x.
284 * r3 -> destination, r4 -> source.
285 */
286_GLOBAL(vrefp)
287	mflr	r12
288	bl	fpenable
289	li	r0,4
290	LDCONST(fr1, fpone)
291	mtctr	r0
292	li	r6,0
2931:	lfsx	fr0,r4,r6
294	fdivs	fr0,fr1,fr0
295	stfsx	fr0,r3,r6
296	addi	r6,r6,4
297	bdnz	1b
298	b	fpdisable
299
300/*
301 * Vector reciprocal square-root estimate, floating point.
302 * We use the frsqrte instruction for the initial estimate followed
303 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
304 * r3 -> destination, r4 -> source.
305 */
306_GLOBAL(vrsqrtefp)
307	mflr	r12
308	bl	fpenable
309	stfd	fr2,32(r1)
310	stfd	fr3,40(r1)
311	stfd	fr4,48(r1)
312	stfd	fr5,56(r1)
313	li	r0,4
314	LDCONST(fr4, fpone)
315	LDCONST(fr5, fphalf)
316	mtctr	r0
317	li	r6,0
3181:	lfsx	fr0,r4,r6
319	frsqrte	fr1,fr0		/* r = frsqrte(s) */
320	fmuls	fr3,fr1,fr0	/* r * s */
321	fmuls	fr2,fr1,fr5	/* r * 0.5 */
322	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
323	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
324	fmuls	fr3,fr1,fr0	/* r * s */
325	fmuls	fr2,fr1,fr5	/* r * 0.5 */
326	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
327	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
328	stfsx	fr1,r3,r6
329	addi	r6,r6,4
330	bdnz	1b
331	lfd	fr5,56(r1)
332	lfd	fr4,48(r1)
333	lfd	fr3,40(r1)
334	lfd	fr2,32(r1)
335	b	fpdisable
336