1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Routines to emulate some Altivec/VMX instructions, specifically 4 * those that can trap when given denormalized operands in Java mode. 5 */ 6 #include <linux/kernel.h> 7 #include <linux/errno.h> 8 #include <linux/sched.h> 9 #include <asm/ptrace.h> 10 #include <asm/processor.h> 11 #include <linux/uaccess.h> 12 13 /* Functions in vector.S */ 14 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b); 15 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b); 16 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); 17 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c); 18 extern void vrefp(vector128 *dst, vector128 *src); 19 extern void vrsqrtefp(vector128 *dst, vector128 *src); 20 extern void vexptep(vector128 *dst, vector128 *src); 21 22 static unsigned int exp2s[8] = { 23 0x800000, 24 0x8b95c2, 25 0x9837f0, 26 0xa5fed7, 27 0xb504f3, 28 0xc5672a, 29 0xd744fd, 30 0xeac0c7 31 }; 32 33 /* 34 * Computes an estimate of 2^x. The `s' argument is the 32-bit 35 * single-precision floating-point representation of x. 36 */ 37 static unsigned int eexp2(unsigned int s) 38 { 39 int exp, pwr; 40 unsigned int mant, frac; 41 42 /* extract exponent field from input */ 43 exp = ((s >> 23) & 0xff) - 127; 44 if (exp > 7) { 45 /* check for NaN input */ 46 if (exp == 128 && (s & 0x7fffff) != 0) 47 return s | 0x400000; /* return QNaN */ 48 /* 2^-big = 0, 2^+big = +Inf */ 49 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */ 50 } 51 if (exp < -23) 52 return 0x3f800000; /* 1.0 */ 53 54 /* convert to fixed point integer in 9.23 representation */ 55 pwr = (s & 0x7fffff) | 0x800000; 56 if (exp > 0) 57 pwr <<= exp; 58 else 59 pwr >>= -exp; 60 if (s & 0x80000000) 61 pwr = -pwr; 62 63 /* extract integer part, which becomes exponent part of result */ 64 exp = (pwr >> 23) + 126; 65 if (exp >= 254) 66 return 0x7f800000; 67 if (exp < -23) 68 return 0; 69 70 /* table lookup on top 3 bits of fraction to get mantissa */ 71 mant = exp2s[(pwr >> 20) & 7]; 72 73 /* linear interpolation using remaining 20 bits of fraction */ 74 asm("mulhwu %0,%1,%2" : "=r" (frac) 75 : "r" (pwr << 12), "r" (0x172b83ff)); 76 asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant)); 77 mant += frac; 78 79 if (exp >= 0) 80 return mant + (exp << 23); 81 82 /* denormalized result */ 83 exp = -exp; 84 mant += 1 << (exp - 1); 85 return mant >> exp; 86 } 87 88 /* 89 * Computes an estimate of log_2(x). The `s' argument is the 32-bit 90 * single-precision floating-point representation of x. 91 */ 92 static unsigned int elog2(unsigned int s) 93 { 94 int exp, mant, lz, frac; 95 96 exp = s & 0x7f800000; 97 mant = s & 0x7fffff; 98 if (exp == 0x7f800000) { /* Inf or NaN */ 99 if (mant != 0) 100 s |= 0x400000; /* turn NaN into QNaN */ 101 return s; 102 } 103 if ((exp | mant) == 0) /* +0 or -0 */ 104 return 0xff800000; /* return -Inf */ 105 106 if (exp == 0) { 107 /* denormalized */ 108 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant)); 109 mant <<= lz - 8; 110 exp = (-118 - lz) << 23; 111 } else { 112 mant |= 0x800000; 113 exp -= 127 << 23; 114 } 115 116 if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */ 117 exp |= 0x400000; /* 0.5 * 2^23 */ 118 asm("mulhwu %0,%1,%2" : "=r" (mant) 119 : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */ 120 } 121 if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */ 122 exp |= 0x200000; /* 0.25 * 2^23 */ 123 asm("mulhwu %0,%1,%2" : "=r" (mant) 124 : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */ 125 } 126 if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */ 127 exp |= 0x100000; /* 0.125 * 2^23 */ 128 asm("mulhwu %0,%1,%2" : "=r" (mant) 129 : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */ 130 } 131 if (mant > 0x800000) { /* 1.0 * 2^23 */ 132 /* calculate (mant - 1) * 1.381097463 */ 133 /* 1.381097463 == 0.125 / (2^0.125 - 1) */ 134 asm("mulhwu %0,%1,%2" : "=r" (frac) 135 : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a)); 136 exp += frac; 137 } 138 s = exp & 0x80000000; 139 if (exp != 0) { 140 if (s) 141 exp = -exp; 142 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp)); 143 lz = 8 - lz; 144 if (lz > 0) 145 exp >>= lz; 146 else if (lz < 0) 147 exp <<= -lz; 148 s += ((lz + 126) << 23) + exp; 149 } 150 return s; 151 } 152 153 #define VSCR_SAT 1 154 155 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp) 156 { 157 int exp, mant; 158 159 exp = (x >> 23) & 0xff; 160 mant = x & 0x7fffff; 161 if (exp == 255 && mant != 0) 162 return 0; /* NaN -> 0 */ 163 exp = exp - 127 + scale; 164 if (exp < 0) 165 return 0; /* round towards zero */ 166 if (exp >= 31) { 167 /* saturate, unless the result would be -2^31 */ 168 if (x + (scale << 23) != 0xcf000000) 169 *vscrp |= VSCR_SAT; 170 return (x & 0x80000000)? 0x80000000: 0x7fffffff; 171 } 172 mant |= 0x800000; 173 mant = (mant << 7) >> (30 - exp); 174 return (x & 0x80000000)? -mant: mant; 175 } 176 177 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp) 178 { 179 int exp; 180 unsigned int mant; 181 182 exp = (x >> 23) & 0xff; 183 mant = x & 0x7fffff; 184 if (exp == 255 && mant != 0) 185 return 0; /* NaN -> 0 */ 186 exp = exp - 127 + scale; 187 if (exp < 0) 188 return 0; /* round towards zero */ 189 if (x & 0x80000000) { 190 /* negative => saturate to 0 */ 191 *vscrp |= VSCR_SAT; 192 return 0; 193 } 194 if (exp >= 32) { 195 /* saturate */ 196 *vscrp |= VSCR_SAT; 197 return 0xffffffff; 198 } 199 mant |= 0x800000; 200 mant = (mant << 8) >> (31 - exp); 201 return mant; 202 } 203 204 /* Round to floating integer, towards 0 */ 205 static unsigned int rfiz(unsigned int x) 206 { 207 int exp; 208 209 exp = ((x >> 23) & 0xff) - 127; 210 if (exp == 128 && (x & 0x7fffff) != 0) 211 return x | 0x400000; /* NaN -> make it a QNaN */ 212 if (exp >= 23) 213 return x; /* it's an integer already (or Inf) */ 214 if (exp < 0) 215 return x & 0x80000000; /* |x| < 1.0 rounds to 0 */ 216 return x & ~(0x7fffff >> exp); 217 } 218 219 /* Round to floating integer, towards +/- Inf */ 220 static unsigned int rfii(unsigned int x) 221 { 222 int exp, mask; 223 224 exp = ((x >> 23) & 0xff) - 127; 225 if (exp == 128 && (x & 0x7fffff) != 0) 226 return x | 0x400000; /* NaN -> make it a QNaN */ 227 if (exp >= 23) 228 return x; /* it's an integer already (or Inf) */ 229 if ((x & 0x7fffffff) == 0) 230 return x; /* +/-0 -> +/-0 */ 231 if (exp < 0) 232 /* 0 < |x| < 1.0 rounds to +/- 1.0 */ 233 return (x & 0x80000000) | 0x3f800000; 234 mask = 0x7fffff >> exp; 235 /* mantissa overflows into exponent - that's OK, 236 it can't overflow into the sign bit */ 237 return (x + mask) & ~mask; 238 } 239 240 /* Round to floating integer, to nearest */ 241 static unsigned int rfin(unsigned int x) 242 { 243 int exp, half; 244 245 exp = ((x >> 23) & 0xff) - 127; 246 if (exp == 128 && (x & 0x7fffff) != 0) 247 return x | 0x400000; /* NaN -> make it a QNaN */ 248 if (exp >= 23) 249 return x; /* it's an integer already (or Inf) */ 250 if (exp < -1) 251 return x & 0x80000000; /* |x| < 0.5 -> +/-0 */ 252 if (exp == -1) 253 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */ 254 return (x & 0x80000000) | 0x3f800000; 255 half = 0x400000 >> exp; 256 /* add 0.5 to the magnitude and chop off the fraction bits */ 257 return (x + half) & ~(0x7fffff >> exp); 258 } 259 260 int emulate_altivec(struct pt_regs *regs) 261 { 262 unsigned int instr, i; 263 unsigned int va, vb, vc, vd; 264 vector128 *vrs; 265 266 if (get_user(instr, (unsigned int __user *) regs->nip)) 267 return -EFAULT; 268 if ((instr >> 26) != 4) 269 return -EINVAL; /* not an altivec instruction */ 270 vd = (instr >> 21) & 0x1f; 271 va = (instr >> 16) & 0x1f; 272 vb = (instr >> 11) & 0x1f; 273 vc = (instr >> 6) & 0x1f; 274 275 vrs = current->thread.vr_state.vr; 276 switch (instr & 0x3f) { 277 case 10: 278 switch (vc) { 279 case 0: /* vaddfp */ 280 vaddfp(&vrs[vd], &vrs[va], &vrs[vb]); 281 break; 282 case 1: /* vsubfp */ 283 vsubfp(&vrs[vd], &vrs[va], &vrs[vb]); 284 break; 285 case 4: /* vrefp */ 286 vrefp(&vrs[vd], &vrs[vb]); 287 break; 288 case 5: /* vrsqrtefp */ 289 vrsqrtefp(&vrs[vd], &vrs[vb]); 290 break; 291 case 6: /* vexptefp */ 292 for (i = 0; i < 4; ++i) 293 vrs[vd].u[i] = eexp2(vrs[vb].u[i]); 294 break; 295 case 7: /* vlogefp */ 296 for (i = 0; i < 4; ++i) 297 vrs[vd].u[i] = elog2(vrs[vb].u[i]); 298 break; 299 case 8: /* vrfin */ 300 for (i = 0; i < 4; ++i) 301 vrs[vd].u[i] = rfin(vrs[vb].u[i]); 302 break; 303 case 9: /* vrfiz */ 304 for (i = 0; i < 4; ++i) 305 vrs[vd].u[i] = rfiz(vrs[vb].u[i]); 306 break; 307 case 10: /* vrfip */ 308 for (i = 0; i < 4; ++i) { 309 u32 x = vrs[vb].u[i]; 310 x = (x & 0x80000000)? rfiz(x): rfii(x); 311 vrs[vd].u[i] = x; 312 } 313 break; 314 case 11: /* vrfim */ 315 for (i = 0; i < 4; ++i) { 316 u32 x = vrs[vb].u[i]; 317 x = (x & 0x80000000)? rfii(x): rfiz(x); 318 vrs[vd].u[i] = x; 319 } 320 break; 321 case 14: /* vctuxs */ 322 for (i = 0; i < 4; ++i) 323 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va, 324 ¤t->thread.vr_state.vscr.u[3]); 325 break; 326 case 15: /* vctsxs */ 327 for (i = 0; i < 4; ++i) 328 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va, 329 ¤t->thread.vr_state.vscr.u[3]); 330 break; 331 default: 332 return -EINVAL; 333 } 334 break; 335 case 46: /* vmaddfp */ 336 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); 337 break; 338 case 47: /* vnmsubfp */ 339 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]); 340 break; 341 default: 342 return -EINVAL; 343 } 344 345 return 0; 346 } 347