xref: /openbmc/linux/arch/powerpc/kernel/vector.S (revision f35e839a)
1#include <asm/processor.h>
2#include <asm/ppc_asm.h>
3#include <asm/reg.h>
4#include <asm/asm-offsets.h>
5#include <asm/cputable.h>
6#include <asm/thread_info.h>
7#include <asm/page.h>
8#include <asm/ptrace.h>
9
10#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
11/*
12 * Wrapper to call load_up_altivec from C.
13 * void do_load_up_altivec(struct pt_regs *regs);
14 */
15_GLOBAL(do_load_up_altivec)
16	mflr	r0
17	std	r0, 16(r1)
18	stdu	r1, -112(r1)
19
20	subi	r6, r3, STACK_FRAME_OVERHEAD
21	/* load_up_altivec expects r12=MSR, r13=PACA, and returns
22	 * with r12 = new MSR.
23	 */
24	ld	r12,_MSR(r6)
25	GET_PACA(r13)
26	bl	load_up_altivec
27	std	r12,_MSR(r6)
28
29	ld	r0, 112+16(r1)
30	addi	r1, r1, 112
31	mtlr	r0
32	blr
33
34/* void do_load_up_transact_altivec(struct thread_struct *thread)
35 *
36 * This is similar to load_up_altivec but for the transactional version of the
37 * vector regs.  It doesn't mess with the task MSR or valid flags.
38 * Furthermore, VEC laziness is not supported with TM currently.
39 */
40_GLOBAL(do_load_up_transact_altivec)
41	mfmsr	r6
42	oris	r5,r6,MSR_VEC@h
43	MTMSRD(r5)
44	isync
45
46	li	r4,1
47	stw	r4,THREAD_USED_VR(r3)
48
49	li	r10,THREAD_TRANSACT_VSCR
50	lvx	vr0,r10,r3
51	mtvscr	vr0
52	REST_32VRS_TRANSACT(0,r4,r3)
53
54	/* Disable VEC again. */
55	MTMSRD(r6)
56	isync
57
58	blr
59#endif
60
61/*
62 * load_up_altivec(unused, unused, tsk)
63 * Disable VMX for the task which had it previously,
64 * and save its vector registers in its thread_struct.
65 * Enables the VMX for use in the kernel on return.
66 * On SMP we know the VMX is free, since we give it up every
67 * switch (ie, no lazy save of the vector registers).
68 */
69_GLOBAL(load_up_altivec)
70	mfmsr	r5			/* grab the current MSR */
71	oris	r5,r5,MSR_VEC@h
72	MTMSRD(r5)			/* enable use of AltiVec now */
73	isync
74
75/*
76 * For SMP, we don't do lazy VMX switching because it just gets too
77 * horrendously complex, especially when a task switches from one CPU
78 * to another.  Instead we call giveup_altvec in switch_to.
79 * VRSAVE isn't dealt with here, that is done in the normal context
80 * switch code. Note that we could rely on vrsave value to eventually
81 * avoid saving all of the VREGs here...
82 */
83#ifndef CONFIG_SMP
84	LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
85	toreal(r3)
86	PPC_LL	r4,ADDROFF(last_task_used_altivec)(r3)
87	PPC_LCMPI	0,r4,0
88	beq	1f
89
90	/* Save VMX state to last_task_used_altivec's THREAD struct */
91	toreal(r4)
92	addi	r4,r4,THREAD
93	SAVE_32VRS(0,r5,r4)
94	mfvscr	vr0
95	li	r10,THREAD_VSCR
96	stvx	vr0,r10,r4
97	/* Disable VMX for last_task_used_altivec */
98	PPC_LL	r5,PT_REGS(r4)
99	toreal(r5)
100	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
101	lis	r10,MSR_VEC@h
102	andc	r4,r4,r10
103	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1041:
105#endif /* CONFIG_SMP */
106
107	/* Hack: if we get an altivec unavailable trap with VRSAVE
108	 * set to all zeros, we assume this is a broken application
109	 * that fails to set it properly, and thus we switch it to
110	 * all 1's
111	 */
112	mfspr	r4,SPRN_VRSAVE
113	cmpwi	0,r4,0
114	bne+	1f
115	li	r4,-1
116	mtspr	SPRN_VRSAVE,r4
1171:
118	/* enable use of VMX after return */
119#ifdef CONFIG_PPC32
120	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */
121	oris	r9,r9,MSR_VEC@h
122#else
123	ld	r4,PACACURRENT(r13)
124	addi	r5,r4,THREAD		/* Get THREAD */
125	oris	r12,r12,MSR_VEC@h
126	std	r12,_MSR(r1)
127#endif
128	li	r4,1
129	li	r10,THREAD_VSCR
130	stw	r4,THREAD_USED_VR(r5)
131	lvx	vr0,r10,r5
132	mtvscr	vr0
133	REST_32VRS(0,r4,r5)
134#ifndef CONFIG_SMP
135	/* Update last_task_used_altivec to 'current' */
136	subi	r4,r5,THREAD		/* Back to 'current' */
137	fromreal(r4)
138	PPC_STL	r4,ADDROFF(last_task_used_altivec)(r3)
139#endif /* CONFIG_SMP */
140	/* restore registers and return */
141	blr
142
143_GLOBAL(giveup_altivec_notask)
144	mfmsr	r3
145	andis.	r4,r3,MSR_VEC@h
146	bnelr				/* Already enabled? */
147	oris	r3,r3,MSR_VEC@h
148	SYNC
149	MTMSRD(r3)			/* enable use of VMX now */
150	isync
151	blr
152
153/*
154 * giveup_altivec(tsk)
155 * Disable VMX for the task given as the argument,
156 * and save the vector registers in its thread_struct.
157 * Enables the VMX for use in the kernel on return.
158 */
159_GLOBAL(giveup_altivec)
160	mfmsr	r5
161	oris	r5,r5,MSR_VEC@h
162	SYNC
163	MTMSRD(r5)			/* enable use of VMX now */
164	isync
165	PPC_LCMPI	0,r3,0
166	beqlr				/* if no previous owner, done */
167	addi	r3,r3,THREAD		/* want THREAD of task */
168	PPC_LL	r5,PT_REGS(r3)
169	PPC_LCMPI	0,r5,0
170	SAVE_32VRS(0,r4,r3)
171	mfvscr	vr0
172	li	r4,THREAD_VSCR
173	stvx	vr0,r4,r3
174	beq	1f
175	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
176#ifdef CONFIG_VSX
177BEGIN_FTR_SECTION
178	lis	r3,(MSR_VEC|MSR_VSX)@h
179FTR_SECTION_ELSE
180	lis	r3,MSR_VEC@h
181ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
182#else
183	lis	r3,MSR_VEC@h
184#endif
185	andc	r4,r4,r3		/* disable FP for previous task */
186	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1871:
188#ifndef CONFIG_SMP
189	li	r5,0
190	LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
191	PPC_STL	r5,ADDROFF(last_task_used_altivec)(r4)
192#endif /* CONFIG_SMP */
193	blr
194
195#ifdef CONFIG_VSX
196
197#ifdef CONFIG_PPC32
198#error This asm code isn't ready for 32-bit kernels
199#endif
200
201/*
202 * load_up_vsx(unused, unused, tsk)
203 * Disable VSX for the task which had it previously,
204 * and save its vector registers in its thread_struct.
205 * Reuse the fp and vsx saves, but first check to see if they have
206 * been saved already.
207 */
208_GLOBAL(load_up_vsx)
209/* Load FP and VSX registers if they haven't been done yet */
210	andi.	r5,r12,MSR_FP
211	beql+	load_up_fpu		/* skip if already loaded */
212	andis.	r5,r12,MSR_VEC@h
213	beql+	load_up_altivec		/* skip if already loaded */
214
215#ifndef CONFIG_SMP
216	ld	r3,last_task_used_vsx@got(r2)
217	ld	r4,0(r3)
218	cmpdi	0,r4,0
219	beq	1f
220	/* Disable VSX for last_task_used_vsx */
221	addi	r4,r4,THREAD
222	ld	r5,PT_REGS(r4)
223	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
224	lis	r6,MSR_VSX@h
225	andc	r6,r4,r6
226	std	r6,_MSR-STACK_FRAME_OVERHEAD(r5)
2271:
228#endif /* CONFIG_SMP */
229	ld	r4,PACACURRENT(r13)
230	addi	r4,r4,THREAD		/* Get THREAD */
231	li	r6,1
232	stw	r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
233	/* enable use of VSX after return */
234	oris	r12,r12,MSR_VSX@h
235	std	r12,_MSR(r1)
236#ifndef CONFIG_SMP
237	/* Update last_task_used_vsx to 'current' */
238	ld	r4,PACACURRENT(r13)
239	std	r4,0(r3)
240#endif /* CONFIG_SMP */
241	b	fast_exception_return
242
243/*
244 * __giveup_vsx(tsk)
245 * Disable VSX for the task given as the argument.
246 * Does NOT save vsx registers.
247 * Enables the VSX for use in the kernel on return.
248 */
249_GLOBAL(__giveup_vsx)
250	mfmsr	r5
251	oris	r5,r5,MSR_VSX@h
252	mtmsrd	r5			/* enable use of VSX now */
253	isync
254
255	cmpdi	0,r3,0
256	beqlr-				/* if no previous owner, done */
257	addi	r3,r3,THREAD		/* want THREAD of task */
258	ld	r5,PT_REGS(r3)
259	cmpdi	0,r5,0
260	beq	1f
261	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
262	lis	r3,MSR_VSX@h
263	andc	r4,r4,r3		/* disable VSX for previous task */
264	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
2651:
266#ifndef CONFIG_SMP
267	li	r5,0
268	ld	r4,last_task_used_vsx@got(r2)
269	std	r5,0(r4)
270#endif /* CONFIG_SMP */
271	blr
272
273#endif /* CONFIG_VSX */
274
275
276/*
277 * The routines below are in assembler so we can closely control the
278 * usage of floating-point registers.  These routines must be called
279 * with preempt disabled.
280 */
281#ifdef CONFIG_PPC32
282	.data
283fpzero:
284	.long	0
285fpone:
286	.long	0x3f800000	/* 1.0 in single-precision FP */
287fphalf:
288	.long	0x3f000000	/* 0.5 in single-precision FP */
289
290#define LDCONST(fr, name)	\
291	lis	r11,name@ha;	\
292	lfs	fr,name@l(r11)
293#else
294
295	.section ".toc","aw"
296fpzero:
297	.tc	FD_0_0[TC],0
298fpone:
299	.tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */
300fphalf:
301	.tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */
302
303#define LDCONST(fr, name)	\
304	lfd	fr,name@toc(r2)
305#endif
306
307	.text
308/*
309 * Internal routine to enable floating point and set FPSCR to 0.
310 * Don't call it from C; it doesn't use the normal calling convention.
311 */
312fpenable:
313#ifdef CONFIG_PPC32
314	stwu	r1,-64(r1)
315#else
316	stdu	r1,-64(r1)
317#endif
318	mfmsr	r10
319	ori	r11,r10,MSR_FP
320	mtmsr	r11
321	isync
322	stfd	fr0,24(r1)
323	stfd	fr1,16(r1)
324	stfd	fr31,8(r1)
325	LDCONST(fr1, fpzero)
326	mffs	fr31
327	MTFSF_L(fr1)
328	blr
329
330fpdisable:
331	mtlr	r12
332	MTFSF_L(fr31)
333	lfd	fr31,8(r1)
334	lfd	fr1,16(r1)
335	lfd	fr0,24(r1)
336	mtmsr	r10
337	isync
338	addi	r1,r1,64
339	blr
340
341/*
342 * Vector add, floating point.
343 */
344_GLOBAL(vaddfp)
345	mflr	r12
346	bl	fpenable
347	li	r0,4
348	mtctr	r0
349	li	r6,0
3501:	lfsx	fr0,r4,r6
351	lfsx	fr1,r5,r6
352	fadds	fr0,fr0,fr1
353	stfsx	fr0,r3,r6
354	addi	r6,r6,4
355	bdnz	1b
356	b	fpdisable
357
358/*
359 * Vector subtract, floating point.
360 */
361_GLOBAL(vsubfp)
362	mflr	r12
363	bl	fpenable
364	li	r0,4
365	mtctr	r0
366	li	r6,0
3671:	lfsx	fr0,r4,r6
368	lfsx	fr1,r5,r6
369	fsubs	fr0,fr0,fr1
370	stfsx	fr0,r3,r6
371	addi	r6,r6,4
372	bdnz	1b
373	b	fpdisable
374
375/*
376 * Vector multiply and add, floating point.
377 */
378_GLOBAL(vmaddfp)
379	mflr	r12
380	bl	fpenable
381	stfd	fr2,32(r1)
382	li	r0,4
383	mtctr	r0
384	li	r7,0
3851:	lfsx	fr0,r4,r7
386	lfsx	fr1,r5,r7
387	lfsx	fr2,r6,r7
388	fmadds	fr0,fr0,fr2,fr1
389	stfsx	fr0,r3,r7
390	addi	r7,r7,4
391	bdnz	1b
392	lfd	fr2,32(r1)
393	b	fpdisable
394
395/*
396 * Vector negative multiply and subtract, floating point.
397 */
398_GLOBAL(vnmsubfp)
399	mflr	r12
400	bl	fpenable
401	stfd	fr2,32(r1)
402	li	r0,4
403	mtctr	r0
404	li	r7,0
4051:	lfsx	fr0,r4,r7
406	lfsx	fr1,r5,r7
407	lfsx	fr2,r6,r7
408	fnmsubs	fr0,fr0,fr2,fr1
409	stfsx	fr0,r3,r7
410	addi	r7,r7,4
411	bdnz	1b
412	lfd	fr2,32(r1)
413	b	fpdisable
414
415/*
416 * Vector reciprocal estimate.  We just compute 1.0/x.
417 * r3 -> destination, r4 -> source.
418 */
419_GLOBAL(vrefp)
420	mflr	r12
421	bl	fpenable
422	li	r0,4
423	LDCONST(fr1, fpone)
424	mtctr	r0
425	li	r6,0
4261:	lfsx	fr0,r4,r6
427	fdivs	fr0,fr1,fr0
428	stfsx	fr0,r3,r6
429	addi	r6,r6,4
430	bdnz	1b
431	b	fpdisable
432
433/*
434 * Vector reciprocal square-root estimate, floating point.
435 * We use the frsqrte instruction for the initial estimate followed
436 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
437 * r3 -> destination, r4 -> source.
438 */
439_GLOBAL(vrsqrtefp)
440	mflr	r12
441	bl	fpenable
442	stfd	fr2,32(r1)
443	stfd	fr3,40(r1)
444	stfd	fr4,48(r1)
445	stfd	fr5,56(r1)
446	li	r0,4
447	LDCONST(fr4, fpone)
448	LDCONST(fr5, fphalf)
449	mtctr	r0
450	li	r6,0
4511:	lfsx	fr0,r4,r6
452	frsqrte	fr1,fr0		/* r = frsqrte(s) */
453	fmuls	fr3,fr1,fr0	/* r * s */
454	fmuls	fr2,fr1,fr5	/* r * 0.5 */
455	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
456	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
457	fmuls	fr3,fr1,fr0	/* r * s */
458	fmuls	fr2,fr1,fr5	/* r * 0.5 */
459	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
460	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
461	stfsx	fr1,r3,r6
462	addi	r6,r6,4
463	bdnz	1b
464	lfd	fr5,56(r1)
465	lfd	fr4,48(r1)
466	lfd	fr3,40(r1)
467	lfd	fr2,32(r1)
468	b	fpdisable
469