xref: /openbmc/linux/arch/powerpc/kernel/vecemu.c (revision 9cfc5c90)
1 /*
2  * Routines to emulate some Altivec/VMX instructions, specifically
3  * those that can trap when given denormalized operands in Java mode.
4  */
5 #include <linux/kernel.h>
6 #include <linux/errno.h>
7 #include <linux/sched.h>
8 #include <asm/ptrace.h>
9 #include <asm/processor.h>
10 #include <asm/uaccess.h>
11 
12 /* Functions in vector.S */
13 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
14 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
15 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
16 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
17 extern void vrefp(vector128 *dst, vector128 *src);
18 extern void vrsqrtefp(vector128 *dst, vector128 *src);
19 extern void vexptep(vector128 *dst, vector128 *src);
20 
21 static unsigned int exp2s[8] = {
22 	0x800000,
23 	0x8b95c2,
24 	0x9837f0,
25 	0xa5fed7,
26 	0xb504f3,
27 	0xc5672a,
28 	0xd744fd,
29 	0xeac0c7
30 };
31 
32 /*
33  * Computes an estimate of 2^x.  The `s' argument is the 32-bit
34  * single-precision floating-point representation of x.
35  */
36 static unsigned int eexp2(unsigned int s)
37 {
38 	int exp, pwr;
39 	unsigned int mant, frac;
40 
41 	/* extract exponent field from input */
42 	exp = ((s >> 23) & 0xff) - 127;
43 	if (exp > 7) {
44 		/* check for NaN input */
45 		if (exp == 128 && (s & 0x7fffff) != 0)
46 			return s | 0x400000;	/* return QNaN */
47 		/* 2^-big = 0, 2^+big = +Inf */
48 		return (s & 0x80000000)? 0: 0x7f800000;	/* 0 or +Inf */
49 	}
50 	if (exp < -23)
51 		return 0x3f800000;	/* 1.0 */
52 
53 	/* convert to fixed point integer in 9.23 representation */
54 	pwr = (s & 0x7fffff) | 0x800000;
55 	if (exp > 0)
56 		pwr <<= exp;
57 	else
58 		pwr >>= -exp;
59 	if (s & 0x80000000)
60 		pwr = -pwr;
61 
62 	/* extract integer part, which becomes exponent part of result */
63 	exp = (pwr >> 23) + 126;
64 	if (exp >= 254)
65 		return 0x7f800000;
66 	if (exp < -23)
67 		return 0;
68 
69 	/* table lookup on top 3 bits of fraction to get mantissa */
70 	mant = exp2s[(pwr >> 20) & 7];
71 
72 	/* linear interpolation using remaining 20 bits of fraction */
73 	asm("mulhwu %0,%1,%2" : "=r" (frac)
74 	    : "r" (pwr << 12), "r" (0x172b83ff));
75 	asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
76 	mant += frac;
77 
78 	if (exp >= 0)
79 		return mant + (exp << 23);
80 
81 	/* denormalized result */
82 	exp = -exp;
83 	mant += 1 << (exp - 1);
84 	return mant >> exp;
85 }
86 
87 /*
88  * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
89  * single-precision floating-point representation of x.
90  */
91 static unsigned int elog2(unsigned int s)
92 {
93 	int exp, mant, lz, frac;
94 
95 	exp = s & 0x7f800000;
96 	mant = s & 0x7fffff;
97 	if (exp == 0x7f800000) {	/* Inf or NaN */
98 		if (mant != 0)
99 			s |= 0x400000;	/* turn NaN into QNaN */
100 		return s;
101 	}
102 	if ((exp | mant) == 0)		/* +0 or -0 */
103 		return 0xff800000;	/* return -Inf */
104 
105 	if (exp == 0) {
106 		/* denormalized */
107 		asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
108 		mant <<= lz - 8;
109 		exp = (-118 - lz) << 23;
110 	} else {
111 		mant |= 0x800000;
112 		exp -= 127 << 23;
113 	}
114 
115 	if (mant >= 0xb504f3) {				/* 2^0.5 * 2^23 */
116 		exp |= 0x400000;			/* 0.5 * 2^23 */
117 		asm("mulhwu %0,%1,%2" : "=r" (mant)
118 		    : "r" (mant), "r" (0xb504f334));	/* 2^-0.5 * 2^32 */
119 	}
120 	if (mant >= 0x9837f0) {				/* 2^0.25 * 2^23 */
121 		exp |= 0x200000;			/* 0.25 * 2^23 */
122 		asm("mulhwu %0,%1,%2" : "=r" (mant)
123 		    : "r" (mant), "r" (0xd744fccb));	/* 2^-0.25 * 2^32 */
124 	}
125 	if (mant >= 0x8b95c2) {				/* 2^0.125 * 2^23 */
126 		exp |= 0x100000;			/* 0.125 * 2^23 */
127 		asm("mulhwu %0,%1,%2" : "=r" (mant)
128 		    : "r" (mant), "r" (0xeac0c6e8));	/* 2^-0.125 * 2^32 */
129 	}
130 	if (mant > 0x800000) {				/* 1.0 * 2^23 */
131 		/* calculate (mant - 1) * 1.381097463 */
132 		/* 1.381097463 == 0.125 / (2^0.125 - 1) */
133 		asm("mulhwu %0,%1,%2" : "=r" (frac)
134 		    : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
135 		exp += frac;
136 	}
137 	s = exp & 0x80000000;
138 	if (exp != 0) {
139 		if (s)
140 			exp = -exp;
141 		asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
142 		lz = 8 - lz;
143 		if (lz > 0)
144 			exp >>= lz;
145 		else if (lz < 0)
146 			exp <<= -lz;
147 		s += ((lz + 126) << 23) + exp;
148 	}
149 	return s;
150 }
151 
152 #define VSCR_SAT	1
153 
154 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
155 {
156 	int exp, mant;
157 
158 	exp = (x >> 23) & 0xff;
159 	mant = x & 0x7fffff;
160 	if (exp == 255 && mant != 0)
161 		return 0;		/* NaN -> 0 */
162 	exp = exp - 127 + scale;
163 	if (exp < 0)
164 		return 0;		/* round towards zero */
165 	if (exp >= 31) {
166 		/* saturate, unless the result would be -2^31 */
167 		if (x + (scale << 23) != 0xcf000000)
168 			*vscrp |= VSCR_SAT;
169 		return (x & 0x80000000)? 0x80000000: 0x7fffffff;
170 	}
171 	mant |= 0x800000;
172 	mant = (mant << 7) >> (30 - exp);
173 	return (x & 0x80000000)? -mant: mant;
174 }
175 
176 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
177 {
178 	int exp;
179 	unsigned int mant;
180 
181 	exp = (x >> 23) & 0xff;
182 	mant = x & 0x7fffff;
183 	if (exp == 255 && mant != 0)
184 		return 0;		/* NaN -> 0 */
185 	exp = exp - 127 + scale;
186 	if (exp < 0)
187 		return 0;		/* round towards zero */
188 	if (x & 0x80000000) {
189 		/* negative => saturate to 0 */
190 		*vscrp |= VSCR_SAT;
191 		return 0;
192 	}
193 	if (exp >= 32) {
194 		/* saturate */
195 		*vscrp |= VSCR_SAT;
196 		return 0xffffffff;
197 	}
198 	mant |= 0x800000;
199 	mant = (mant << 8) >> (31 - exp);
200 	return mant;
201 }
202 
203 /* Round to floating integer, towards 0 */
204 static unsigned int rfiz(unsigned int x)
205 {
206 	int exp;
207 
208 	exp = ((x >> 23) & 0xff) - 127;
209 	if (exp == 128 && (x & 0x7fffff) != 0)
210 		return x | 0x400000;	/* NaN -> make it a QNaN */
211 	if (exp >= 23)
212 		return x;		/* it's an integer already (or Inf) */
213 	if (exp < 0)
214 		return x & 0x80000000;	/* |x| < 1.0 rounds to 0 */
215 	return x & ~(0x7fffff >> exp);
216 }
217 
218 /* Round to floating integer, towards +/- Inf */
219 static unsigned int rfii(unsigned int x)
220 {
221 	int exp, mask;
222 
223 	exp = ((x >> 23) & 0xff) - 127;
224 	if (exp == 128 && (x & 0x7fffff) != 0)
225 		return x | 0x400000;	/* NaN -> make it a QNaN */
226 	if (exp >= 23)
227 		return x;		/* it's an integer already (or Inf) */
228 	if ((x & 0x7fffffff) == 0)
229 		return x;		/* +/-0 -> +/-0 */
230 	if (exp < 0)
231 		/* 0 < |x| < 1.0 rounds to +/- 1.0 */
232 		return (x & 0x80000000) | 0x3f800000;
233 	mask = 0x7fffff >> exp;
234 	/* mantissa overflows into exponent - that's OK,
235 	   it can't overflow into the sign bit */
236 	return (x + mask) & ~mask;
237 }
238 
239 /* Round to floating integer, to nearest */
240 static unsigned int rfin(unsigned int x)
241 {
242 	int exp, half;
243 
244 	exp = ((x >> 23) & 0xff) - 127;
245 	if (exp == 128 && (x & 0x7fffff) != 0)
246 		return x | 0x400000;	/* NaN -> make it a QNaN */
247 	if (exp >= 23)
248 		return x;		/* it's an integer already (or Inf) */
249 	if (exp < -1)
250 		return x & 0x80000000;	/* |x| < 0.5 -> +/-0 */
251 	if (exp == -1)
252 		/* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
253 		return (x & 0x80000000) | 0x3f800000;
254 	half = 0x400000 >> exp;
255 	/* add 0.5 to the magnitude and chop off the fraction bits */
256 	return (x + half) & ~(0x7fffff >> exp);
257 }
258 
259 int emulate_altivec(struct pt_regs *regs)
260 {
261 	unsigned int instr, i;
262 	unsigned int va, vb, vc, vd;
263 	vector128 *vrs;
264 
265 	if (get_user(instr, (unsigned int __user *) regs->nip))
266 		return -EFAULT;
267 	if ((instr >> 26) != 4)
268 		return -EINVAL;		/* not an altivec instruction */
269 	vd = (instr >> 21) & 0x1f;
270 	va = (instr >> 16) & 0x1f;
271 	vb = (instr >> 11) & 0x1f;
272 	vc = (instr >> 6) & 0x1f;
273 
274 	vrs = current->thread.vr_state.vr;
275 	switch (instr & 0x3f) {
276 	case 10:
277 		switch (vc) {
278 		case 0:	/* vaddfp */
279 			vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
280 			break;
281 		case 1:	/* vsubfp */
282 			vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
283 			break;
284 		case 4:	/* vrefp */
285 			vrefp(&vrs[vd], &vrs[vb]);
286 			break;
287 		case 5:	/* vrsqrtefp */
288 			vrsqrtefp(&vrs[vd], &vrs[vb]);
289 			break;
290 		case 6:	/* vexptefp */
291 			for (i = 0; i < 4; ++i)
292 				vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
293 			break;
294 		case 7:	/* vlogefp */
295 			for (i = 0; i < 4; ++i)
296 				vrs[vd].u[i] = elog2(vrs[vb].u[i]);
297 			break;
298 		case 8:		/* vrfin */
299 			for (i = 0; i < 4; ++i)
300 				vrs[vd].u[i] = rfin(vrs[vb].u[i]);
301 			break;
302 		case 9:		/* vrfiz */
303 			for (i = 0; i < 4; ++i)
304 				vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
305 			break;
306 		case 10:	/* vrfip */
307 			for (i = 0; i < 4; ++i) {
308 				u32 x = vrs[vb].u[i];
309 				x = (x & 0x80000000)? rfiz(x): rfii(x);
310 				vrs[vd].u[i] = x;
311 			}
312 			break;
313 		case 11:	/* vrfim */
314 			for (i = 0; i < 4; ++i) {
315 				u32 x = vrs[vb].u[i];
316 				x = (x & 0x80000000)? rfii(x): rfiz(x);
317 				vrs[vd].u[i] = x;
318 			}
319 			break;
320 		case 14:	/* vctuxs */
321 			for (i = 0; i < 4; ++i)
322 				vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
323 					&current->thread.vr_state.vscr.u[3]);
324 			break;
325 		case 15:	/* vctsxs */
326 			for (i = 0; i < 4; ++i)
327 				vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
328 					&current->thread.vr_state.vscr.u[3]);
329 			break;
330 		default:
331 			return -EINVAL;
332 		}
333 		break;
334 	case 46:	/* vmaddfp */
335 		vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
336 		break;
337 	case 47:	/* vnmsubfp */
338 		vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
339 		break;
340 	default:
341 		return -EINVAL;
342 	}
343 
344 	return 0;
345 }
346