1 /* 2 * Routines to emulate some Altivec/VMX instructions, specifically 3 * those that can trap when given denormalized operands in Java mode. 4 */ 5 #include <linux/kernel.h> 6 #include <linux/errno.h> 7 #include <linux/sched.h> 8 #include <asm/ptrace.h> 9 #include <asm/processor.h> 10 #include <asm/uaccess.h> 11 12 /* Functions in vector.S */ 13 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); 14 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); 15 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); 16 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); 17 extern void vrefp(vector128 *dst, vector128 *src); 18 extern void vrsqrtefp(vector128 *dst, vector128 *src); 19 extern void vexptep(vector128 *dst, vector128 *src); 20 21 static unsigned int exp2s[8] = { 22 0x800000, 23 0x8b95c2, 24 0x9837f0, 25 0xa5fed7, 26 0xb504f3, 27 0xc5672a, 28 0xd744fd, 29 0xeac0c7 30 }; 31 32 /* 33 * Computes an estimate of 2^x. The `s' argument is the 32-bit 34 * single-precision floating-point representation of x. 35 */ 36 static unsigned int eexp2(unsigned int s) 37 { 38 int exp, pwr; 39 unsigned int mant, frac; 40 41 /* extract exponent field from input */ 42 exp = ((s >> 23) & 0xff) - 127; 43 if (exp > 7) { 44 /* check for NaN input */ 45 if (exp == 128 && (s & 0x7fffff) != 0) 46 return s | 0x400000; /* return QNaN */ 47 /* 2^-big = 0, 2^+big = +Inf */ 48 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ 49 } 50 if (exp < -23) 51 return 0x3f800000; /* 1.0 */ 52 53 /* convert to fixed point integer in 9.23 representation */ 54 pwr = (s & 0x7fffff) | 0x800000; 55 if (exp > 0) 56 pwr <<= exp; 57 else 58 pwr >>= -exp; 59 if (s & 0x80000000) 60 pwr = -pwr; 61 62 /* extract integer part, which becomes exponent part of result */ 63 exp = (pwr >> 23) + 126; 64 if (exp >= 254) 65 return 0x7f800000; 66 if (exp < -23) 67 return 0; 68 69 /* table lookup on top 3 bits of fraction to get mantissa */ 70 mant = exp2s[(pwr >> 20) & 7]; 71 72 /* linear interpolation using remaining 20 bits of fraction */ 73 asm("mulhwu %0,%1,%2" : "=r" (frac) 74 : "r" (pwr << 12), "r" (0x172b83ff)); 75 asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); 76 mant += frac; 77 78 if (exp >= 0) 79 return mant + (exp << 23); 80 81 /* denormalized result */ 82 exp = -exp; 83 mant += 1 << (exp - 1); 84 return mant >> exp; 85 } 86 87 /* 88 * Computes an estimate of log_2(x). The `s' argument is the 32-bit 89 * single-precision floating-point representation of x. 90 */ 91 static unsigned int elog2(unsigned int s) 92 { 93 int exp, mant, lz, frac; 94 95 exp = s & 0x7f800000; 96 mant = s & 0x7fffff; 97 if (exp == 0x7f800000) { /* Inf or NaN */ 98 if (mant != 0) 99 s |= 0x400000; /* turn NaN into QNaN */ 100 return s; 101 } 102 if ((exp | mant) == 0) /* +0 or -0 */ 103 return 0xff800000; /* return -Inf */ 104 105 if (exp == 0) { 106 /* denormalized */ 107 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); 108 mant <<= lz - 8; 109 exp = (-118 - lz) << 23; 110 } else { 111 mant |= 0x800000; 112 exp -= 127 << 23; 113 } 114 115 if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ 116 exp |= 0x400000; /* 0.5 * 2^23 */ 117 asm("mulhwu %0,%1,%2" : "=r" (mant) 118 : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ 119 } 120 if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ 121 exp |= 0x200000; /* 0.25 * 2^23 */ 122 asm("mulhwu %0,%1,%2" : "=r" (mant) 123 : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ 124 } 125 if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ 126 exp |= 0x100000; /* 0.125 * 2^23 */ 127 asm("mulhwu %0,%1,%2" : "=r" (mant) 128 : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ 129 } 130 if (mant > 0x800000) { /* 1.0 * 2^23 */ 131 /* calculate (mant - 1) * 1.381097463 */ 132 /* 1.381097463 == 0.125 / (2^0.125 - 1) */ 133 asm("mulhwu %0,%1,%2" : "=r" (frac) 134 : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); 135 exp += frac; 136 } 137 s = exp & 0x80000000; 138 if (exp != 0) { 139 if (s) 140 exp = -exp; 141 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); 142 lz = 8 - lz; 143 if (lz > 0) 144 exp >>= lz; 145 else if (lz < 0) 146 exp <<= -lz; 147 s += ((lz + 126) << 23) + exp; 148 } 149 return s; 150 } 151 152 #define VSCR_SAT 1 153 154 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) 155 { 156 int exp, mant; 157 158 exp = (x >> 23) & 0xff; 159 mant = x & 0x7fffff; 160 if (exp == 255 && mant != 0) 161 return 0; /* NaN -> 0 */ 162 exp = exp - 127 + scale; 163 if (exp < 0) 164 return 0; /* round towards zero */ 165 if (exp >= 31) { 166 /* saturate, unless the result would be -2^31 */ 167 if (x + (scale << 23) != 0xcf000000) 168 *vscrp |= VSCR_SAT; 169 return (x & 0x80000000)? 0x80000000: 0x7fffffff; 170 } 171 mant |= 0x800000; 172 mant = (mant << 7) >> (30 - exp); 173 return (x & 0x80000000)? -mant: mant; 174 } 175 176 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) 177 { 178 int exp; 179 unsigned int mant; 180 181 exp = (x >> 23) & 0xff; 182 mant = x & 0x7fffff; 183 if (exp == 255 && mant != 0) 184 return 0; /* NaN -> 0 */ 185 exp = exp - 127 + scale; 186 if (exp < 0) 187 return 0; /* round towards zero */ 188 if (x & 0x80000000) { 189 /* negative => saturate to 0 */ 190 *vscrp |= VSCR_SAT; 191 return 0; 192 } 193 if (exp >= 32) { 194 /* saturate */ 195 *vscrp |= VSCR_SAT; 196 return 0xffffffff; 197 } 198 mant |= 0x800000; 199 mant = (mant << 8) >> (31 - exp); 200 return mant; 201 } 202 203 /* Round to floating integer, towards 0 */ 204 static unsigned int rfiz(unsigned int x) 205 { 206 int exp; 207 208 exp = ((x >> 23) & 0xff) - 127; 209 if (exp == 128 && (x & 0x7fffff) != 0) 210 return x | 0x400000; /* NaN -> make it a QNaN */ 211 if (exp >= 23) 212 return x; /* it's an integer already (or Inf) */ 213 if (exp < 0) 214 return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ 215 return x & ~(0x7fffff >> exp); 216 } 217 218 /* Round to floating integer, towards +/- Inf */ 219 static unsigned int rfii(unsigned int x) 220 { 221 int exp, mask; 222 223 exp = ((x >> 23) & 0xff) - 127; 224 if (exp == 128 && (x & 0x7fffff) != 0) 225 return x | 0x400000; /* NaN -> make it a QNaN */ 226 if (exp >= 23) 227 return x; /* it's an integer already (or Inf) */ 228 if ((x & 0x7fffffff) == 0) 229 return x; /* +/-0 -> +/-0 */ 230 if (exp < 0) 231 /* 0 < |x| < 1.0 rounds to +/- 1.0 */ 232 return (x & 0x80000000) | 0x3f800000; 233 mask = 0x7fffff >> exp; 234 /* mantissa overflows into exponent - that's OK, 235 it can't overflow into the sign bit */ 236 return (x + mask) & ~mask; 237 } 238 239 /* Round to floating integer, to nearest */ 240 static unsigned int rfin(unsigned int x) 241 { 242 int exp, half; 243 244 exp = ((x >> 23) & 0xff) - 127; 245 if (exp == 128 && (x & 0x7fffff) != 0) 246 return x | 0x400000; /* NaN -> make it a QNaN */ 247 if (exp >= 23) 248 return x; /* it's an integer already (or Inf) */ 249 if (exp < -1) 250 return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ 251 if (exp == -1) 252 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ 253 return (x & 0x80000000) | 0x3f800000; 254 half = 0x400000 >> exp; 255 /* add 0.5 to the magnitude and chop off the fraction bits */ 256 return (x + half) & ~(0x7fffff >> exp); 257 } 258 259 int emulate_altivec(struct pt_regs *regs) 260 { 261 unsigned int instr, i; 262 unsigned int va, vb, vc, vd; 263 vector128 *vrs; 264 265 if (get_user(instr, (unsigned int __user *) regs->nip)) 266 return -EFAULT; 267 if ((instr >> 26) != 4) 268 return -EINVAL; /* not an altivec instruction */ 269 vd = (instr >> 21) & 0x1f; 270 va = (instr >> 16) & 0x1f; 271 vb = (instr >> 11) & 0x1f; 272 vc = (instr >> 6) & 0x1f; 273 274 vrs = current->thread.vr_state.vr; 275 switch (instr & 0x3f) { 276 case 10: 277 switch (vc) { 278 case 0: /* vaddfp */ 279 vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); 280 break; 281 case 1: /* vsubfp */ 282 vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); 283 break; 284 case 4: /* vrefp */ 285 vrefp(&vrs[vd], &vrs[vb]); 286 break; 287 case 5: /* vrsqrtefp */ 288 vrsqrtefp(&vrs[vd], &vrs[vb]); 289 break; 290 case 6: /* vexptefp */ 291 for (i = 0; i < 4; ++i) 292 vrs[vd].u[i] = eexp2(vrs[vb].u[i]); 293 break; 294 case 7: /* vlogefp */ 295 for (i = 0; i < 4; ++i) 296 vrs[vd].u[i] = elog2(vrs[vb].u[i]); 297 break; 298 case 8: /* vrfin */ 299 for (i = 0; i < 4; ++i) 300 vrs[vd].u[i] = rfin(vrs[vb].u[i]); 301 break; 302 case 9: /* vrfiz */ 303 for (i = 0; i < 4; ++i) 304 vrs[vd].u[i] = rfiz(vrs[vb].u[i]); 305 break; 306 case 10: /* vrfip */ 307 for (i = 0; i < 4; ++i) { 308 u32 x = vrs[vb].u[i]; 309 x = (x & 0x80000000)? rfiz(x): rfii(x); 310 vrs[vd].u[i] = x; 311 } 312 break; 313 case 11: /* vrfim */ 314 for (i = 0; i < 4; ++i) { 315 u32 x = vrs[vb].u[i]; 316 x = (x & 0x80000000)? rfii(x): rfiz(x); 317 vrs[vd].u[i] = x; 318 } 319 break; 320 case 14: /* vctuxs */ 321 for (i = 0; i < 4; ++i) 322 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, 323 ¤t->thread.vr_state.vscr.u[3]); 324 break; 325 case 15: /* vctsxs */ 326 for (i = 0; i < 4; ++i) 327 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, 328 ¤t->thread.vr_state.vscr.u[3]); 329 break; 330 default: 331 return -EINVAL; 332 } 333 break; 334 case 46: /* vmaddfp */ 335 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); 336 break; 337 case 47: /* vnmsubfp */ 338 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); 339 break; 340 default: 341 return -EINVAL; 342 } 343 344 return 0; 345 } 346