xref: /openbmc/linux/arch/powerpc/kernel/vector.S (revision 31b90347)
1#include <asm/processor.h>
2#include <asm/ppc_asm.h>
3#include <asm/reg.h>
4#include <asm/asm-offsets.h>
5#include <asm/cputable.h>
6#include <asm/thread_info.h>
7#include <asm/page.h>
8#include <asm/ptrace.h>
9
10#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
11/* void do_load_up_transact_altivec(struct thread_struct *thread)
12 *
13 * This is similar to load_up_altivec but for the transactional version of the
14 * vector regs.  It doesn't mess with the task MSR or valid flags.
15 * Furthermore, VEC laziness is not supported with TM currently.
16 */
17_GLOBAL(do_load_up_transact_altivec)
18	mfmsr	r6
19	oris	r5,r6,MSR_VEC@h
20	MTMSRD(r5)
21	isync
22
23	li	r4,1
24	stw	r4,THREAD_USED_VR(r3)
25
26	li	r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
27	lvx	vr0,r10,r3
28	mtvscr	vr0
29	addi	r10,r3,THREAD_TRANSACT_VRSTATE
30	REST_32VRS(0,r4,r10)
31
32	/* Disable VEC again. */
33	MTMSRD(r6)
34	isync
35
36	blr
37#endif
38
39/*
40 * Load state from memory into VMX registers including VSCR.
41 * Assumes the caller has enabled VMX in the MSR.
42 */
43_GLOBAL(load_vr_state)
44	li	r4,VRSTATE_VSCR
45	lvx	vr0,r4,r3
46	mtvscr	vr0
47	REST_32VRS(0,r4,r3)
48	blr
49
50/*
51 * Store VMX state into memory, including VSCR.
52 * Assumes the caller has enabled VMX in the MSR.
53 */
54_GLOBAL(store_vr_state)
55	SAVE_32VRS(0, r4, r3)
56	mfvscr	vr0
57	li	r4, VRSTATE_VSCR
58	stvx	vr0, r4, r3
59	blr
60
61/*
62 * Disable VMX for the task which had it previously,
63 * and save its vector registers in its thread_struct.
64 * Enables the VMX for use in the kernel on return.
65 * On SMP we know the VMX is free, since we give it up every
66 * switch (ie, no lazy save of the vector registers).
67 *
68 * Note that on 32-bit this can only use registers that will be
69 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
70 */
71_GLOBAL(load_up_altivec)
72	mfmsr	r5			/* grab the current MSR */
73	oris	r5,r5,MSR_VEC@h
74	MTMSRD(r5)			/* enable use of AltiVec now */
75	isync
76
77/*
78 * For SMP, we don't do lazy VMX switching because it just gets too
79 * horrendously complex, especially when a task switches from one CPU
80 * to another.  Instead we call giveup_altvec in switch_to.
81 * VRSAVE isn't dealt with here, that is done in the normal context
82 * switch code. Note that we could rely on vrsave value to eventually
83 * avoid saving all of the VREGs here...
84 */
85#ifndef CONFIG_SMP
86	LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
87	toreal(r3)
88	PPC_LL	r4,ADDROFF(last_task_used_altivec)(r3)
89	PPC_LCMPI	0,r4,0
90	beq	1f
91
92	/* Save VMX state to last_task_used_altivec's THREAD struct */
93	toreal(r4)
94	addi	r4,r4,THREAD
95	addi	r6,r4,THREAD_VRSTATE
96	SAVE_32VRS(0,r5,r6)
97	mfvscr	vr0
98	li	r10,VRSTATE_VSCR
99	stvx	vr0,r10,r6
100	/* Disable VMX for last_task_used_altivec */
101	PPC_LL	r5,PT_REGS(r4)
102	toreal(r5)
103	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
104	lis	r10,MSR_VEC@h
105	andc	r4,r4,r10
106	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1071:
108#endif /* CONFIG_SMP */
109
110	/* Hack: if we get an altivec unavailable trap with VRSAVE
111	 * set to all zeros, we assume this is a broken application
112	 * that fails to set it properly, and thus we switch it to
113	 * all 1's
114	 */
115	mfspr	r4,SPRN_VRSAVE
116	cmpwi	0,r4,0
117	bne+	1f
118	li	r4,-1
119	mtspr	SPRN_VRSAVE,r4
1201:
121	/* enable use of VMX after return */
122#ifdef CONFIG_PPC32
123	mfspr	r5,SPRN_SPRG_THREAD		/* current task's THREAD (phys) */
124	oris	r9,r9,MSR_VEC@h
125#else
126	ld	r4,PACACURRENT(r13)
127	addi	r5,r4,THREAD		/* Get THREAD */
128	oris	r12,r12,MSR_VEC@h
129	std	r12,_MSR(r1)
130#endif
131	addi	r6,r5,THREAD_VRSTATE
132	li	r4,1
133	li	r10,VRSTATE_VSCR
134	stw	r4,THREAD_USED_VR(r5)
135	lvx	vr0,r10,r6
136	mtvscr	vr0
137	REST_32VRS(0,r4,r6)
138#ifndef CONFIG_SMP
139	/* Update last_task_used_altivec to 'current' */
140	subi	r4,r5,THREAD		/* Back to 'current' */
141	fromreal(r4)
142	PPC_STL	r4,ADDROFF(last_task_used_altivec)(r3)
143#endif /* CONFIG_SMP */
144	/* restore registers and return */
145	blr
146
147_GLOBAL(giveup_altivec_notask)
148	mfmsr	r3
149	andis.	r4,r3,MSR_VEC@h
150	bnelr				/* Already enabled? */
151	oris	r3,r3,MSR_VEC@h
152	SYNC
153	MTMSRD(r3)			/* enable use of VMX now */
154	isync
155	blr
156
157/*
158 * giveup_altivec(tsk)
159 * Disable VMX for the task given as the argument,
160 * and save the vector registers in its thread_struct.
161 * Enables the VMX for use in the kernel on return.
162 */
163_GLOBAL(giveup_altivec)
164	mfmsr	r5
165	oris	r5,r5,MSR_VEC@h
166	SYNC
167	MTMSRD(r5)			/* enable use of VMX now */
168	isync
169	PPC_LCMPI	0,r3,0
170	beqlr				/* if no previous owner, done */
171	addi	r3,r3,THREAD		/* want THREAD of task */
172	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
173	PPC_LL	r5,PT_REGS(r3)
174	PPC_LCMPI	0,r7,0
175	bne	2f
176	addi	r7,r3,THREAD_VRSTATE
1772:	PPC_LCMPI	0,r5,0
178	SAVE_32VRS(0,r4,r7)
179	mfvscr	vr0
180	li	r4,VRSTATE_VSCR
181	stvx	vr0,r4,r7
182	beq	1f
183	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
184#ifdef CONFIG_VSX
185BEGIN_FTR_SECTION
186	lis	r3,(MSR_VEC|MSR_VSX)@h
187FTR_SECTION_ELSE
188	lis	r3,MSR_VEC@h
189ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
190#else
191	lis	r3,MSR_VEC@h
192#endif
193	andc	r4,r4,r3		/* disable FP for previous task */
194	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
1951:
196#ifndef CONFIG_SMP
197	li	r5,0
198	LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
199	PPC_STL	r5,ADDROFF(last_task_used_altivec)(r4)
200#endif /* CONFIG_SMP */
201	blr
202
203#ifdef CONFIG_VSX
204
205#ifdef CONFIG_PPC32
206#error This asm code isn't ready for 32-bit kernels
207#endif
208
209/*
210 * load_up_vsx(unused, unused, tsk)
211 * Disable VSX for the task which had it previously,
212 * and save its vector registers in its thread_struct.
213 * Reuse the fp and vsx saves, but first check to see if they have
214 * been saved already.
215 */
216_GLOBAL(load_up_vsx)
217/* Load FP and VSX registers if they haven't been done yet */
218	andi.	r5,r12,MSR_FP
219	beql+	load_up_fpu		/* skip if already loaded */
220	andis.	r5,r12,MSR_VEC@h
221	beql+	load_up_altivec		/* skip if already loaded */
222
223#ifndef CONFIG_SMP
224	ld	r3,last_task_used_vsx@got(r2)
225	ld	r4,0(r3)
226	cmpdi	0,r4,0
227	beq	1f
228	/* Disable VSX for last_task_used_vsx */
229	addi	r4,r4,THREAD
230	ld	r5,PT_REGS(r4)
231	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
232	lis	r6,MSR_VSX@h
233	andc	r6,r4,r6
234	std	r6,_MSR-STACK_FRAME_OVERHEAD(r5)
2351:
236#endif /* CONFIG_SMP */
237	ld	r4,PACACURRENT(r13)
238	addi	r4,r4,THREAD		/* Get THREAD */
239	li	r6,1
240	stw	r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
241	/* enable use of VSX after return */
242	oris	r12,r12,MSR_VSX@h
243	std	r12,_MSR(r1)
244#ifndef CONFIG_SMP
245	/* Update last_task_used_vsx to 'current' */
246	ld	r4,PACACURRENT(r13)
247	std	r4,0(r3)
248#endif /* CONFIG_SMP */
249	b	fast_exception_return
250
251/*
252 * __giveup_vsx(tsk)
253 * Disable VSX for the task given as the argument.
254 * Does NOT save vsx registers.
255 * Enables the VSX for use in the kernel on return.
256 */
257_GLOBAL(__giveup_vsx)
258	mfmsr	r5
259	oris	r5,r5,MSR_VSX@h
260	mtmsrd	r5			/* enable use of VSX now */
261	isync
262
263	cmpdi	0,r3,0
264	beqlr-				/* if no previous owner, done */
265	addi	r3,r3,THREAD		/* want THREAD of task */
266	ld	r5,PT_REGS(r3)
267	cmpdi	0,r5,0
268	beq	1f
269	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
270	lis	r3,MSR_VSX@h
271	andc	r4,r4,r3		/* disable VSX for previous task */
272	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
2731:
274#ifndef CONFIG_SMP
275	li	r5,0
276	ld	r4,last_task_used_vsx@got(r2)
277	std	r5,0(r4)
278#endif /* CONFIG_SMP */
279	blr
280
281#endif /* CONFIG_VSX */
282
283
284/*
285 * The routines below are in assembler so we can closely control the
286 * usage of floating-point registers.  These routines must be called
287 * with preempt disabled.
288 */
289#ifdef CONFIG_PPC32
290	.data
291fpzero:
292	.long	0
293fpone:
294	.long	0x3f800000	/* 1.0 in single-precision FP */
295fphalf:
296	.long	0x3f000000	/* 0.5 in single-precision FP */
297
298#define LDCONST(fr, name)	\
299	lis	r11,name@ha;	\
300	lfs	fr,name@l(r11)
301#else
302
303	.section ".toc","aw"
304fpzero:
305	.tc	FD_0_0[TC],0
306fpone:
307	.tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */
308fphalf:
309	.tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */
310
311#define LDCONST(fr, name)	\
312	lfd	fr,name@toc(r2)
313#endif
314
315	.text
316/*
317 * Internal routine to enable floating point and set FPSCR to 0.
318 * Don't call it from C; it doesn't use the normal calling convention.
319 */
320fpenable:
321#ifdef CONFIG_PPC32
322	stwu	r1,-64(r1)
323#else
324	stdu	r1,-64(r1)
325#endif
326	mfmsr	r10
327	ori	r11,r10,MSR_FP
328	mtmsr	r11
329	isync
330	stfd	fr0,24(r1)
331	stfd	fr1,16(r1)
332	stfd	fr31,8(r1)
333	LDCONST(fr1, fpzero)
334	mffs	fr31
335	MTFSF_L(fr1)
336	blr
337
338fpdisable:
339	mtlr	r12
340	MTFSF_L(fr31)
341	lfd	fr31,8(r1)
342	lfd	fr1,16(r1)
343	lfd	fr0,24(r1)
344	mtmsr	r10
345	isync
346	addi	r1,r1,64
347	blr
348
349/*
350 * Vector add, floating point.
351 */
352_GLOBAL(vaddfp)
353	mflr	r12
354	bl	fpenable
355	li	r0,4
356	mtctr	r0
357	li	r6,0
3581:	lfsx	fr0,r4,r6
359	lfsx	fr1,r5,r6
360	fadds	fr0,fr0,fr1
361	stfsx	fr0,r3,r6
362	addi	r6,r6,4
363	bdnz	1b
364	b	fpdisable
365
366/*
367 * Vector subtract, floating point.
368 */
369_GLOBAL(vsubfp)
370	mflr	r12
371	bl	fpenable
372	li	r0,4
373	mtctr	r0
374	li	r6,0
3751:	lfsx	fr0,r4,r6
376	lfsx	fr1,r5,r6
377	fsubs	fr0,fr0,fr1
378	stfsx	fr0,r3,r6
379	addi	r6,r6,4
380	bdnz	1b
381	b	fpdisable
382
383/*
384 * Vector multiply and add, floating point.
385 */
386_GLOBAL(vmaddfp)
387	mflr	r12
388	bl	fpenable
389	stfd	fr2,32(r1)
390	li	r0,4
391	mtctr	r0
392	li	r7,0
3931:	lfsx	fr0,r4,r7
394	lfsx	fr1,r5,r7
395	lfsx	fr2,r6,r7
396	fmadds	fr0,fr0,fr2,fr1
397	stfsx	fr0,r3,r7
398	addi	r7,r7,4
399	bdnz	1b
400	lfd	fr2,32(r1)
401	b	fpdisable
402
403/*
404 * Vector negative multiply and subtract, floating point.
405 */
406_GLOBAL(vnmsubfp)
407	mflr	r12
408	bl	fpenable
409	stfd	fr2,32(r1)
410	li	r0,4
411	mtctr	r0
412	li	r7,0
4131:	lfsx	fr0,r4,r7
414	lfsx	fr1,r5,r7
415	lfsx	fr2,r6,r7
416	fnmsubs	fr0,fr0,fr2,fr1
417	stfsx	fr0,r3,r7
418	addi	r7,r7,4
419	bdnz	1b
420	lfd	fr2,32(r1)
421	b	fpdisable
422
423/*
424 * Vector reciprocal estimate.  We just compute 1.0/x.
425 * r3 -> destination, r4 -> source.
426 */
427_GLOBAL(vrefp)
428	mflr	r12
429	bl	fpenable
430	li	r0,4
431	LDCONST(fr1, fpone)
432	mtctr	r0
433	li	r6,0
4341:	lfsx	fr0,r4,r6
435	fdivs	fr0,fr1,fr0
436	stfsx	fr0,r3,r6
437	addi	r6,r6,4
438	bdnz	1b
439	b	fpdisable
440
441/*
442 * Vector reciprocal square-root estimate, floating point.
443 * We use the frsqrte instruction for the initial estimate followed
444 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
445 * r3 -> destination, r4 -> source.
446 */
447_GLOBAL(vrsqrtefp)
448	mflr	r12
449	bl	fpenable
450	stfd	fr2,32(r1)
451	stfd	fr3,40(r1)
452	stfd	fr4,48(r1)
453	stfd	fr5,56(r1)
454	li	r0,4
455	LDCONST(fr4, fpone)
456	LDCONST(fr5, fphalf)
457	mtctr	r0
458	li	r6,0
4591:	lfsx	fr0,r4,r6
460	frsqrte	fr1,fr0		/* r = frsqrte(s) */
461	fmuls	fr3,fr1,fr0	/* r * s */
462	fmuls	fr2,fr1,fr5	/* r * 0.5 */
463	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
464	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
465	fmuls	fr3,fr1,fr0	/* r * s */
466	fmuls	fr2,fr1,fr5	/* r * 0.5 */
467	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
468	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
469	stfsx	fr1,r3,r6
470	addi	r6,r6,4
471	bdnz	1b
472	lfd	fr5,56(r1)
473	lfd	fr4,48(r1)
474	lfd	fr3,40(r1)
475	lfd	fr2,32(r1)
476	b	fpdisable
477