xref: /openbmc/linux/arch/powerpc/kernel/vector.S (revision 7dd65feb)
1#include <asm/processor.h>
2#include <asm/ppc_asm.h>
3#include <asm/reg.h>
4#include <asm/asm-offsets.h>
5#include <asm/cputable.h>
6#include <asm/thread_info.h>
7#include <asm/page.h>
8
9/*
10 * load_up_altivec(unused, unused, tsk)
11 * Disable VMX for the task which had it previously,
12 * and save its vector registers in its thread_struct.
13 * Enables the VMX for use in the kernel on return.
14 * On SMP we know the VMX is free, since we give it up every
15 * switch (ie, no lazy save of the vector registers).
16 */
17_GLOBAL(load_up_altivec)
18	mfmsr	r5			/* grab the current MSR */
19	oris	r5,r5,MSR_VEC@h
20	MTMSRD(r5)			/* enable use of AltiVec now */
21	isync
22
23/*
24 * For SMP, we don't do lazy VMX switching because it just gets too
25 * horrendously complex, especially when a task switches from one CPU
26 * to another.  Instead we call giveup_altvec in switch_to.
27 * VRSAVE isn't dealt with here, that is done in the normal context
28 * switch code. Note that we could rely on vrsave value to eventually
29 * avoid saving all of the VREGs here...
30 */
31#ifndef CONFIG_SMP
32	LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
33	toreal(r3)
34	PPC_LL	r4,ADDROFF(last_task_used_altivec)(r3)
35	PPC_LCMPI	0,r4,0
36	beq	1f
37
38	/* Save VMX state to last_task_used_altivec's THREAD struct */
39	toreal(r4)
40	addi	r4,r4,THREAD
41	SAVE_32VRS(0,r5,r4)
42	mfvscr	vr0
43	li	r10,THREAD_VSCR
44	stvx	vr0,r10,r4
45	/* Disable VMX for last_task_used_altivec */
46	PPC_LL	r5,PT_REGS(r4)
47	toreal(r5)
48	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
49	lis	r10,MSR_VEC@h
50	andc	r4,r4,r10
51	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
521:
53#endif /* CONFIG_SMP */
54
55	/* Hack: if we get an altivec unavailable trap with VRSAVE
56	 * set to all zeros, we assume this is a broken application
57	 * that fails to set it properly, and thus we switch it to
58	 * all 1's
59	 */
60	mfspr	r4,SPRN_VRSAVE
61	cmpwi	0,r4,0
62	bne+	1f
63	li	r4,-1
64	mtspr	SPRN_VRSAVE,r4
651:
66	/* enable use of VMX after return */
67#ifdef CONFIG_PPC32
68	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */
69	oris	r9,r9,MSR_VEC@h
70#else
71	ld	r4,PACACURRENT(r13)
72	addi	r5,r4,THREAD		/* Get THREAD */
73	oris	r12,r12,MSR_VEC@h
74	std	r12,_MSR(r1)
75#endif
76	li	r4,1
77	li	r10,THREAD_VSCR
78	stw	r4,THREAD_USED_VR(r5)
79	lvx	vr0,r10,r5
80	mtvscr	vr0
81	REST_32VRS(0,r4,r5)
82#ifndef CONFIG_SMP
83	/* Update last_task_used_altivec to 'current' */
84	subi	r4,r5,THREAD		/* Back to 'current' */
85	fromreal(r4)
86	PPC_STL	r4,ADDROFF(last_task_used_altivec)(r3)
87#endif /* CONFIG_SMP */
88	/* restore registers and return */
89	blr
90
91/*
92 * giveup_altivec(tsk)
93 * Disable VMX for the task given as the argument,
94 * and save the vector registers in its thread_struct.
95 * Enables the VMX for use in the kernel on return.
96 */
97_GLOBAL(giveup_altivec)
98	mfmsr	r5
99	oris	r5,r5,MSR_VEC@h
100	SYNC
101	MTMSRD(r5)			/* enable use of VMX now */
102	isync
103	PPC_LCMPI	0,r3,0
104	beqlr-				/* if no previous owner, done */
105	addi	r3,r3,THREAD		/* want THREAD of task */
106	PPC_LL	r5,PT_REGS(r3)
107	PPC_LCMPI	0,r5,0
108	SAVE_32VRS(0,r4,r3)
109	mfvscr	vr0
110	li	r4,THREAD_VSCR
111	stvx	vr0,r4,r3
112	beq	1f
113	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
114#ifdef CONFIG_VSX
115BEGIN_FTR_SECTION
116	lis	r3,(MSR_VEC|MSR_VSX)@h
117FTR_SECTION_ELSE
118	lis	r3,MSR_VEC@h
119ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
120#else
121	lis	r3,MSR_VEC@h
122#endif
123	andc	r4,r4,r3		/* disable FP for previous task */
124	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1251:
126#ifndef CONFIG_SMP
127	li	r5,0
128	LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
129	PPC_STL	r5,ADDROFF(last_task_used_altivec)(r4)
130#endif /* CONFIG_SMP */
131	blr
132
133#ifdef CONFIG_VSX
134
135#ifdef CONFIG_PPC32
136#error This asm code isn't ready for 32-bit kernels
137#endif
138
139/*
140 * load_up_vsx(unused, unused, tsk)
141 * Disable VSX for the task which had it previously,
142 * and save its vector registers in its thread_struct.
143 * Reuse the fp and vsx saves, but first check to see if they have
144 * been saved already.
145 */
146_GLOBAL(load_up_vsx)
147/* Load FP and VSX registers if they haven't been done yet */
148	andi.	r5,r12,MSR_FP
149	beql+	load_up_fpu		/* skip if already loaded */
150	andis.	r5,r12,MSR_VEC@h
151	beql+	load_up_altivec		/* skip if already loaded */
152
153#ifndef CONFIG_SMP
154	ld	r3,last_task_used_vsx@got(r2)
155	ld	r4,0(r3)
156	cmpdi	0,r4,0
157	beq	1f
158	/* Disable VSX for last_task_used_vsx */
159	addi	r4,r4,THREAD
160	ld	r5,PT_REGS(r4)
161	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
162	lis	r6,MSR_VSX@h
163	andc	r6,r4,r6
164	std	r6,_MSR-STACK_FRAME_OVERHEAD(r5)
1651:
166#endif /* CONFIG_SMP */
167	ld	r4,PACACURRENT(r13)
168	addi	r4,r4,THREAD		/* Get THREAD */
169	li	r6,1
170	stw	r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
171	/* enable use of VSX after return */
172	oris	r12,r12,MSR_VSX@h
173	std	r12,_MSR(r1)
174#ifndef CONFIG_SMP
175	/* Update last_task_used_vsx to 'current' */
176	ld	r4,PACACURRENT(r13)
177	std	r4,0(r3)
178#endif /* CONFIG_SMP */
179	b	fast_exception_return
180
181/*
182 * __giveup_vsx(tsk)
183 * Disable VSX for the task given as the argument.
184 * Does NOT save vsx registers.
185 * Enables the VSX for use in the kernel on return.
186 */
187_GLOBAL(__giveup_vsx)
188	mfmsr	r5
189	oris	r5,r5,MSR_VSX@h
190	mtmsrd	r5			/* enable use of VSX now */
191	isync
192
193	cmpdi	0,r3,0
194	beqlr-				/* if no previous owner, done */
195	addi	r3,r3,THREAD		/* want THREAD of task */
196	ld	r5,PT_REGS(r3)
197	cmpdi	0,r5,0
198	beq	1f
199	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
200	lis	r3,MSR_VSX@h
201	andc	r4,r4,r3		/* disable VSX for previous task */
202	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
2031:
204#ifndef CONFIG_SMP
205	li	r5,0
206	ld	r4,last_task_used_vsx@got(r2)
207	std	r5,0(r4)
208#endif /* CONFIG_SMP */
209	blr
210
211#endif /* CONFIG_VSX */
212
213
214/*
215 * The routines below are in assembler so we can closely control the
216 * usage of floating-point registers.  These routines must be called
217 * with preempt disabled.
218 */
219#ifdef CONFIG_PPC32
220	.data
221fpzero:
222	.long	0
223fpone:
224	.long	0x3f800000	/* 1.0 in single-precision FP */
225fphalf:
226	.long	0x3f000000	/* 0.5 in single-precision FP */
227
228#define LDCONST(fr, name)	\
229	lis	r11,name@ha;	\
230	lfs	fr,name@l(r11)
231#else
232
233	.section ".toc","aw"
234fpzero:
235	.tc	FD_0_0[TC],0
236fpone:
237	.tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */
238fphalf:
239	.tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */
240
241#define LDCONST(fr, name)	\
242	lfd	fr,name@toc(r2)
243#endif
244
245	.text
246/*
247 * Internal routine to enable floating point and set FPSCR to 0.
248 * Don't call it from C; it doesn't use the normal calling convention.
249 */
250fpenable:
251#ifdef CONFIG_PPC32
252	stwu	r1,-64(r1)
253#else
254	stdu	r1,-64(r1)
255#endif
256	mfmsr	r10
257	ori	r11,r10,MSR_FP
258	mtmsr	r11
259	isync
260	stfd	fr0,24(r1)
261	stfd	fr1,16(r1)
262	stfd	fr31,8(r1)
263	LDCONST(fr1, fpzero)
264	mffs	fr31
265	MTFSF_L(fr1)
266	blr
267
268fpdisable:
269	mtlr	r12
270	MTFSF_L(fr31)
271	lfd	fr31,8(r1)
272	lfd	fr1,16(r1)
273	lfd	fr0,24(r1)
274	mtmsr	r10
275	isync
276	addi	r1,r1,64
277	blr
278
279/*
280 * Vector add, floating point.
281 */
282_GLOBAL(vaddfp)
283	mflr	r12
284	bl	fpenable
285	li	r0,4
286	mtctr	r0
287	li	r6,0
2881:	lfsx	fr0,r4,r6
289	lfsx	fr1,r5,r6
290	fadds	fr0,fr0,fr1
291	stfsx	fr0,r3,r6
292	addi	r6,r6,4
293	bdnz	1b
294	b	fpdisable
295
296/*
297 * Vector subtract, floating point.
298 */
299_GLOBAL(vsubfp)
300	mflr	r12
301	bl	fpenable
302	li	r0,4
303	mtctr	r0
304	li	r6,0
3051:	lfsx	fr0,r4,r6
306	lfsx	fr1,r5,r6
307	fsubs	fr0,fr0,fr1
308	stfsx	fr0,r3,r6
309	addi	r6,r6,4
310	bdnz	1b
311	b	fpdisable
312
313/*
314 * Vector multiply and add, floating point.
315 */
316_GLOBAL(vmaddfp)
317	mflr	r12
318	bl	fpenable
319	stfd	fr2,32(r1)
320	li	r0,4
321	mtctr	r0
322	li	r7,0
3231:	lfsx	fr0,r4,r7
324	lfsx	fr1,r5,r7
325	lfsx	fr2,r6,r7
326	fmadds	fr0,fr0,fr2,fr1
327	stfsx	fr0,r3,r7
328	addi	r7,r7,4
329	bdnz	1b
330	lfd	fr2,32(r1)
331	b	fpdisable
332
333/*
334 * Vector negative multiply and subtract, floating point.
335 */
336_GLOBAL(vnmsubfp)
337	mflr	r12
338	bl	fpenable
339	stfd	fr2,32(r1)
340	li	r0,4
341	mtctr	r0
342	li	r7,0
3431:	lfsx	fr0,r4,r7
344	lfsx	fr1,r5,r7
345	lfsx	fr2,r6,r7
346	fnmsubs	fr0,fr0,fr2,fr1
347	stfsx	fr0,r3,r7
348	addi	r7,r7,4
349	bdnz	1b
350	lfd	fr2,32(r1)
351	b	fpdisable
352
353/*
354 * Vector reciprocal estimate.  We just compute 1.0/x.
355 * r3 -> destination, r4 -> source.
356 */
357_GLOBAL(vrefp)
358	mflr	r12
359	bl	fpenable
360	li	r0,4
361	LDCONST(fr1, fpone)
362	mtctr	r0
363	li	r6,0
3641:	lfsx	fr0,r4,r6
365	fdivs	fr0,fr1,fr0
366	stfsx	fr0,r3,r6
367	addi	r6,r6,4
368	bdnz	1b
369	b	fpdisable
370
371/*
372 * Vector reciprocal square-root estimate, floating point.
373 * We use the frsqrte instruction for the initial estimate followed
374 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
375 * r3 -> destination, r4 -> source.
376 */
377_GLOBAL(vrsqrtefp)
378	mflr	r12
379	bl	fpenable
380	stfd	fr2,32(r1)
381	stfd	fr3,40(r1)
382	stfd	fr4,48(r1)
383	stfd	fr5,56(r1)
384	li	r0,4
385	LDCONST(fr4, fpone)
386	LDCONST(fr5, fphalf)
387	mtctr	r0
388	li	r6,0
3891:	lfsx	fr0,r4,r6
390	frsqrte	fr1,fr0		/* r = frsqrte(s) */
391	fmuls	fr3,fr1,fr0	/* r * s */
392	fmuls	fr2,fr1,fr5	/* r * 0.5 */
393	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
394	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
395	fmuls	fr3,fr1,fr0	/* r * s */
396	fmuls	fr2,fr1,fr5	/* r * 0.5 */
397	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
398	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
399	stfsx	fr1,r3,r6
400	addi	r6,r6,4
401	bdnz	1b
402	lfd	fr5,56(r1)
403	lfd	fr4,48(r1)
404	lfd	fr3,40(r1)
405	lfd	fr2,32(r1)
406	b	fpdisable
407