xref: /openbmc/linux/arch/powerpc/kernel/vector.S (revision 87c2ce3b)
1#include <linux/config.h>
2#include <asm/ppc_asm.h>
3#include <asm/reg.h>
4
5/*
6 * The routines below are in assembler so we can closely control the
7 * usage of floating-point registers.  These routines must be called
8 * with preempt disabled.
9 */
10#ifdef CONFIG_PPC32
11	.data
12fpzero:
13	.long	0
14fpone:
15	.long	0x3f800000	/* 1.0 in single-precision FP */
16fphalf:
17	.long	0x3f000000	/* 0.5 in single-precision FP */
18
19#define LDCONST(fr, name)	\
20	lis	r11,name@ha;	\
21	lfs	fr,name@l(r11)
22#else
23
24	.section ".toc","aw"
25fpzero:
26	.tc	FD_0_0[TC],0
27fpone:
28	.tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */
29fphalf:
30	.tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */
31
32#define LDCONST(fr, name)	\
33	lfd	fr,name@toc(r2)
34#endif
35
36	.text
37/*
38 * Internal routine to enable floating point and set FPSCR to 0.
39 * Don't call it from C; it doesn't use the normal calling convention.
40 */
41fpenable:
42#ifdef CONFIG_PPC32
43	stwu	r1,-64(r1)
44#else
45	stdu	r1,-64(r1)
46#endif
47	mfmsr	r10
48	ori	r11,r10,MSR_FP
49	mtmsr	r11
50	isync
51	stfd	fr0,24(r1)
52	stfd	fr1,16(r1)
53	stfd	fr31,8(r1)
54	LDCONST(fr1, fpzero)
55	mffs	fr31
56	mtfsf	0xff,fr1
57	blr
58
59fpdisable:
60	mtlr	r12
61	mtfsf	0xff,fr31
62	lfd	fr31,8(r1)
63	lfd	fr1,16(r1)
64	lfd	fr0,24(r1)
65	mtmsr	r10
66	isync
67	addi	r1,r1,64
68	blr
69
70/*
71 * Vector add, floating point.
72 */
73_GLOBAL(vaddfp)
74	mflr	r12
75	bl	fpenable
76	li	r0,4
77	mtctr	r0
78	li	r6,0
791:	lfsx	fr0,r4,r6
80	lfsx	fr1,r5,r6
81	fadds	fr0,fr0,fr1
82	stfsx	fr0,r3,r6
83	addi	r6,r6,4
84	bdnz	1b
85	b	fpdisable
86
87/*
88 * Vector subtract, floating point.
89 */
90_GLOBAL(vsubfp)
91	mflr	r12
92	bl	fpenable
93	li	r0,4
94	mtctr	r0
95	li	r6,0
961:	lfsx	fr0,r4,r6
97	lfsx	fr1,r5,r6
98	fsubs	fr0,fr0,fr1
99	stfsx	fr0,r3,r6
100	addi	r6,r6,4
101	bdnz	1b
102	b	fpdisable
103
104/*
105 * Vector multiply and add, floating point.
106 */
107_GLOBAL(vmaddfp)
108	mflr	r12
109	bl	fpenable
110	stfd	fr2,32(r1)
111	li	r0,4
112	mtctr	r0
113	li	r7,0
1141:	lfsx	fr0,r4,r7
115	lfsx	fr1,r5,r7
116	lfsx	fr2,r6,r7
117	fmadds	fr0,fr0,fr2,fr1
118	stfsx	fr0,r3,r7
119	addi	r7,r7,4
120	bdnz	1b
121	lfd	fr2,32(r1)
122	b	fpdisable
123
124/*
125 * Vector negative multiply and subtract, floating point.
126 */
127_GLOBAL(vnmsubfp)
128	mflr	r12
129	bl	fpenable
130	stfd	fr2,32(r1)
131	li	r0,4
132	mtctr	r0
133	li	r7,0
1341:	lfsx	fr0,r4,r7
135	lfsx	fr1,r5,r7
136	lfsx	fr2,r6,r7
137	fnmsubs	fr0,fr0,fr2,fr1
138	stfsx	fr0,r3,r7
139	addi	r7,r7,4
140	bdnz	1b
141	lfd	fr2,32(r1)
142	b	fpdisable
143
144/*
145 * Vector reciprocal estimate.  We just compute 1.0/x.
146 * r3 -> destination, r4 -> source.
147 */
148_GLOBAL(vrefp)
149	mflr	r12
150	bl	fpenable
151	li	r0,4
152	LDCONST(fr1, fpone)
153	mtctr	r0
154	li	r6,0
1551:	lfsx	fr0,r4,r6
156	fdivs	fr0,fr1,fr0
157	stfsx	fr0,r3,r6
158	addi	r6,r6,4
159	bdnz	1b
160	b	fpdisable
161
162/*
163 * Vector reciprocal square-root estimate, floating point.
164 * We use the frsqrte instruction for the initial estimate followed
165 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
166 * r3 -> destination, r4 -> source.
167 */
168_GLOBAL(vrsqrtefp)
169	mflr	r12
170	bl	fpenable
171	stfd	fr2,32(r1)
172	stfd	fr3,40(r1)
173	stfd	fr4,48(r1)
174	stfd	fr5,56(r1)
175	li	r0,4
176	LDCONST(fr4, fpone)
177	LDCONST(fr5, fphalf)
178	mtctr	r0
179	li	r6,0
1801:	lfsx	fr0,r4,r6
181	frsqrte	fr1,fr0		/* r = frsqrte(s) */
182	fmuls	fr3,fr1,fr0	/* r * s */
183	fmuls	fr2,fr1,fr5	/* r * 0.5 */
184	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
185	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
186	fmuls	fr3,fr1,fr0	/* r * s */
187	fmuls	fr2,fr1,fr5	/* r * 0.5 */
188	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
189	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
190	stfsx	fr1,r3,r6
191	addi	r6,r6,4
192	bdnz	1b
193	lfd	fr5,56(r1)
194	lfd	fr4,48(r1)
195	lfd	fr3,40(r1)
196	lfd	fr2,32(r1)
197	b	fpdisable
198