1 /* 2 * linux/arch/arm/vfp/vfpsingle.c 3 * 4 * This code is derived in part from John R. Housers softfloat library, which 5 * carries the following notice: 6 * 7 * =========================================================================== 8 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 9 * Arithmetic Package, Release 2. 10 * 11 * Written by John R. Hauser. This work was made possible in part by the 12 * International Computer Science Institute, located at Suite 600, 1947 Center 13 * Street, Berkeley, California 94704. Funding was partially provided by the 14 * National Science Foundation under grant MIP-9311980. The original version 15 * of this code was written as part of a project to build a fixed-point vector 16 * processor in collaboration with the University of California at Berkeley, 17 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 18 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 19 * arithmetic/softfloat.html'. 20 * 21 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 22 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 23 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 24 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 25 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 26 * 27 * Derivative works are acceptable, even for commercial purposes, so long as 28 * (1) they include prominent notice that the work is derivative, and (2) they 29 * include prominent notice akin to these three paragraphs for those parts of 30 * this code that are retained. 31 * =========================================================================== 32 */ 33 #include <linux/kernel.h> 34 #include <linux/bitops.h> 35 #include <asm/ptrace.h> 36 #include <asm/vfp.h> 37 38 #include "vfpinstr.h" 39 #include "vfp.h" 40 41 static struct vfp_single vfp_single_default_qnan = { 42 .exponent = 255, 43 .sign = 0, 44 .significand = VFP_SINGLE_SIGNIFICAND_QNAN, 45 }; 46 47 static void vfp_single_dump(const char *str, struct vfp_single *s) 48 { 49 pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n", 50 str, s->sign != 0, s->exponent, s->significand); 51 } 52 53 static void vfp_single_normalise_denormal(struct vfp_single *vs) 54 { 55 int bits = 31 - fls(vs->significand); 56 57 vfp_single_dump("normalise_denormal: in", vs); 58 59 if (bits) { 60 vs->exponent -= bits - 1; 61 vs->significand <<= bits; 62 } 63 64 vfp_single_dump("normalise_denormal: out", vs); 65 } 66 67 #ifndef DEBUG 68 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except) 69 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions) 70 #else 71 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func) 72 #endif 73 { 74 u32 significand, incr, rmode; 75 int exponent, shift, underflow; 76 77 vfp_single_dump("pack: in", vs); 78 79 /* 80 * Infinities and NaNs are a special case. 81 */ 82 if (vs->exponent == 255 && (vs->significand == 0 || exceptions)) 83 goto pack; 84 85 /* 86 * Special-case zero. 87 */ 88 if (vs->significand == 0) { 89 vs->exponent = 0; 90 goto pack; 91 } 92 93 exponent = vs->exponent; 94 significand = vs->significand; 95 96 /* 97 * Normalise first. Note that we shift the significand up to 98 * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least 99 * significant bit. 100 */ 101 shift = 32 - fls(significand); 102 if (shift < 32 && shift) { 103 exponent -= shift; 104 significand <<= shift; 105 } 106 107 #ifdef DEBUG 108 vs->exponent = exponent; 109 vs->significand = significand; 110 vfp_single_dump("pack: normalised", vs); 111 #endif 112 113 /* 114 * Tiny number? 115 */ 116 underflow = exponent < 0; 117 if (underflow) { 118 significand = vfp_shiftright32jamming(significand, -exponent); 119 exponent = 0; 120 #ifdef DEBUG 121 vs->exponent = exponent; 122 vs->significand = significand; 123 vfp_single_dump("pack: tiny number", vs); 124 #endif 125 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))) 126 underflow = 0; 127 } 128 129 /* 130 * Select rounding increment. 131 */ 132 incr = 0; 133 rmode = fpscr & FPSCR_RMODE_MASK; 134 135 if (rmode == FPSCR_ROUND_NEAREST) { 136 incr = 1 << VFP_SINGLE_LOW_BITS; 137 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0) 138 incr -= 1; 139 } else if (rmode == FPSCR_ROUND_TOZERO) { 140 incr = 0; 141 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0)) 142 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1; 143 144 pr_debug("VFP: rounding increment = 0x%08x\n", incr); 145 146 /* 147 * Is our rounding going to overflow? 148 */ 149 if ((significand + incr) < significand) { 150 exponent += 1; 151 significand = (significand >> 1) | (significand & 1); 152 incr >>= 1; 153 #ifdef DEBUG 154 vs->exponent = exponent; 155 vs->significand = significand; 156 vfp_single_dump("pack: overflow", vs); 157 #endif 158 } 159 160 /* 161 * If any of the low bits (which will be shifted out of the 162 * number) are non-zero, the result is inexact. 163 */ 164 if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)) 165 exceptions |= FPSCR_IXC; 166 167 /* 168 * Do our rounding. 169 */ 170 significand += incr; 171 172 /* 173 * Infinity? 174 */ 175 if (exponent >= 254) { 176 exceptions |= FPSCR_OFC | FPSCR_IXC; 177 if (incr == 0) { 178 vs->exponent = 253; 179 vs->significand = 0x7fffffff; 180 } else { 181 vs->exponent = 255; /* infinity */ 182 vs->significand = 0; 183 } 184 } else { 185 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0) 186 exponent = 0; 187 if (exponent || significand > 0x80000000) 188 underflow = 0; 189 if (underflow) 190 exceptions |= FPSCR_UFC; 191 vs->exponent = exponent; 192 vs->significand = significand >> 1; 193 } 194 195 pack: 196 vfp_single_dump("pack: final", vs); 197 { 198 s32 d = vfp_single_pack(vs); 199 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func, 200 sd, d, exceptions); 201 vfp_put_float(sd, d); 202 } 203 204 return exceptions & ~VFP_NAN_FLAG; 205 } 206 207 /* 208 * Propagate the NaN, setting exceptions if it is signalling. 209 * 'n' is always a NaN. 'm' may be a number, NaN or infinity. 210 */ 211 static u32 212 vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn, 213 struct vfp_single *vsm, u32 fpscr) 214 { 215 struct vfp_single *nan; 216 int tn, tm = 0; 217 218 tn = vfp_single_type(vsn); 219 220 if (vsm) 221 tm = vfp_single_type(vsm); 222 223 if (fpscr & FPSCR_DEFAULT_NAN) 224 /* 225 * Default NaN mode - always returns a quiet NaN 226 */ 227 nan = &vfp_single_default_qnan; 228 else { 229 /* 230 * Contemporary mode - select the first signalling 231 * NAN, or if neither are signalling, the first 232 * quiet NAN. 233 */ 234 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN)) 235 nan = vsn; 236 else 237 nan = vsm; 238 /* 239 * Make the NaN quiet. 240 */ 241 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN; 242 } 243 244 *vsd = *nan; 245 246 /* 247 * If one was a signalling NAN, raise invalid operation. 248 */ 249 return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG; 250 } 251 252 253 /* 254 * Extended operations 255 */ 256 static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr) 257 { 258 vfp_put_float(sd, vfp_single_packed_abs(m)); 259 return 0; 260 } 261 262 static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr) 263 { 264 vfp_put_float(sd, m); 265 return 0; 266 } 267 268 static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr) 269 { 270 vfp_put_float(sd, vfp_single_packed_negate(m)); 271 return 0; 272 } 273 274 static const u16 sqrt_oddadjust[] = { 275 0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0, 276 0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67 277 }; 278 279 static const u16 sqrt_evenadjust[] = { 280 0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e, 281 0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002 282 }; 283 284 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand) 285 { 286 int index; 287 u32 z, a; 288 289 if ((significand & 0xc0000000) != 0x40000000) { 290 printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n"); 291 } 292 293 a = significand << 1; 294 index = (a >> 27) & 15; 295 if (exponent & 1) { 296 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index]; 297 z = ((a / z) << 14) + (z << 15); 298 a >>= 1; 299 } else { 300 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index]; 301 z = a / z + z; 302 z = (z >= 0x20000) ? 0xffff8000 : (z << 15); 303 if (z <= a) 304 return (s32)a >> 1; 305 } 306 return (u32)(((u64)a << 31) / z) + (z >> 1); 307 } 308 309 static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr) 310 { 311 struct vfp_single vsm, vsd; 312 int ret, tm; 313 314 vfp_single_unpack(&vsm, m); 315 tm = vfp_single_type(&vsm); 316 if (tm & (VFP_NAN|VFP_INFINITY)) { 317 struct vfp_single *vsp = &vsd; 318 319 if (tm & VFP_NAN) 320 ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr); 321 else if (vsm.sign == 0) { 322 sqrt_copy: 323 vsp = &vsm; 324 ret = 0; 325 } else { 326 sqrt_invalid: 327 vsp = &vfp_single_default_qnan; 328 ret = FPSCR_IOC; 329 } 330 vfp_put_float(sd, vfp_single_pack(vsp)); 331 return ret; 332 } 333 334 /* 335 * sqrt(+/- 0) == +/- 0 336 */ 337 if (tm & VFP_ZERO) 338 goto sqrt_copy; 339 340 /* 341 * Normalise a denormalised number 342 */ 343 if (tm & VFP_DENORMAL) 344 vfp_single_normalise_denormal(&vsm); 345 346 /* 347 * sqrt(<0) = invalid 348 */ 349 if (vsm.sign) 350 goto sqrt_invalid; 351 352 vfp_single_dump("sqrt", &vsm); 353 354 /* 355 * Estimate the square root. 356 */ 357 vsd.sign = 0; 358 vsd.exponent = ((vsm.exponent - 127) >> 1) + 127; 359 vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2; 360 361 vfp_single_dump("sqrt estimate", &vsd); 362 363 /* 364 * And now adjust. 365 */ 366 if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) { 367 if (vsd.significand < 2) { 368 vsd.significand = 0xffffffff; 369 } else { 370 u64 term; 371 s64 rem; 372 vsm.significand <<= !(vsm.exponent & 1); 373 term = (u64)vsd.significand * vsd.significand; 374 rem = ((u64)vsm.significand << 32) - term; 375 376 pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem); 377 378 while (rem < 0) { 379 vsd.significand -= 1; 380 rem += ((u64)vsd.significand << 1) | 1; 381 } 382 vsd.significand |= rem != 0; 383 } 384 } 385 vsd.significand = vfp_shiftright32jamming(vsd.significand, 1); 386 387 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt"); 388 } 389 390 /* 391 * Equal := ZC 392 * Less than := N 393 * Greater than := C 394 * Unordered := CV 395 */ 396 static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr) 397 { 398 s32 d; 399 u32 ret = 0; 400 401 d = vfp_get_float(sd); 402 if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { 403 ret |= FPSCR_C | FPSCR_V; 404 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 405 /* 406 * Signalling NaN, or signalling on quiet NaN 407 */ 408 ret |= FPSCR_IOC; 409 } 410 411 if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { 412 ret |= FPSCR_C | FPSCR_V; 413 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) 414 /* 415 * Signalling NaN, or signalling on quiet NaN 416 */ 417 ret |= FPSCR_IOC; 418 } 419 420 if (ret == 0) { 421 if (d == m || vfp_single_packed_abs(d | m) == 0) { 422 /* 423 * equal 424 */ 425 ret |= FPSCR_Z | FPSCR_C; 426 } else if (vfp_single_packed_sign(d ^ m)) { 427 /* 428 * different signs 429 */ 430 if (vfp_single_packed_sign(d)) 431 /* 432 * d is negative, so d < m 433 */ 434 ret |= FPSCR_N; 435 else 436 /* 437 * d is positive, so d > m 438 */ 439 ret |= FPSCR_C; 440 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { 441 /* 442 * d < m 443 */ 444 ret |= FPSCR_N; 445 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { 446 /* 447 * d > m 448 */ 449 ret |= FPSCR_C; 450 } 451 } 452 return ret; 453 } 454 455 static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr) 456 { 457 return vfp_compare(sd, 0, m, fpscr); 458 } 459 460 static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr) 461 { 462 return vfp_compare(sd, 1, m, fpscr); 463 } 464 465 static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr) 466 { 467 return vfp_compare(sd, 0, 0, fpscr); 468 } 469 470 static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr) 471 { 472 return vfp_compare(sd, 1, 0, fpscr); 473 } 474 475 static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr) 476 { 477 struct vfp_single vsm; 478 struct vfp_double vdd; 479 int tm; 480 u32 exceptions = 0; 481 482 vfp_single_unpack(&vsm, m); 483 484 tm = vfp_single_type(&vsm); 485 486 /* 487 * If we have a signalling NaN, signal invalid operation. 488 */ 489 if (tm == VFP_SNAN) 490 exceptions = FPSCR_IOC; 491 492 if (tm & VFP_DENORMAL) 493 vfp_single_normalise_denormal(&vsm); 494 495 vdd.sign = vsm.sign; 496 vdd.significand = (u64)vsm.significand << 32; 497 498 /* 499 * If we have an infinity or NaN, the exponent must be 2047. 500 */ 501 if (tm & (VFP_INFINITY|VFP_NAN)) { 502 vdd.exponent = 2047; 503 if (tm & VFP_NAN) 504 vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; 505 goto pack_nan; 506 } else if (tm & VFP_ZERO) 507 vdd.exponent = 0; 508 else 509 vdd.exponent = vsm.exponent + (1023 - 127); 510 511 /* 512 * Technically, if bit 0 of dd is set, this is an invalid 513 * instruction. However, we ignore this for efficiency. 514 */ 515 return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd"); 516 517 pack_nan: 518 vfp_put_double(dd, vfp_double_pack(&vdd)); 519 return exceptions; 520 } 521 522 static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr) 523 { 524 struct vfp_single vs; 525 526 vs.sign = 0; 527 vs.exponent = 127 + 31 - 1; 528 vs.significand = (u32)m; 529 530 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito"); 531 } 532 533 static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr) 534 { 535 struct vfp_single vs; 536 537 vs.sign = (m & 0x80000000) >> 16; 538 vs.exponent = 127 + 31 - 1; 539 vs.significand = vs.sign ? -m : m; 540 541 return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito"); 542 } 543 544 static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr) 545 { 546 struct vfp_single vsm; 547 u32 d, exceptions = 0; 548 int rmode = fpscr & FPSCR_RMODE_MASK; 549 int tm; 550 551 vfp_single_unpack(&vsm, m); 552 vfp_single_dump("VSM", &vsm); 553 554 /* 555 * Do we have a denormalised number? 556 */ 557 tm = vfp_single_type(&vsm); 558 if (tm & VFP_DENORMAL) 559 exceptions |= FPSCR_IDC; 560 561 if (tm & VFP_NAN) 562 vsm.sign = 0; 563 564 if (vsm.exponent >= 127 + 32) { 565 d = vsm.sign ? 0 : 0xffffffff; 566 exceptions = FPSCR_IOC; 567 } else if (vsm.exponent >= 127 - 1) { 568 int shift = 127 + 31 - vsm.exponent; 569 u32 rem, incr = 0; 570 571 /* 572 * 2^0 <= m < 2^32-2^8 573 */ 574 d = (vsm.significand << 1) >> shift; 575 rem = vsm.significand << (33 - shift); 576 577 if (rmode == FPSCR_ROUND_NEAREST) { 578 incr = 0x80000000; 579 if ((d & 1) == 0) 580 incr -= 1; 581 } else if (rmode == FPSCR_ROUND_TOZERO) { 582 incr = 0; 583 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 584 incr = ~0; 585 } 586 587 if ((rem + incr) < rem) { 588 if (d < 0xffffffff) 589 d += 1; 590 else 591 exceptions |= FPSCR_IOC; 592 } 593 594 if (d && vsm.sign) { 595 d = 0; 596 exceptions |= FPSCR_IOC; 597 } else if (rem) 598 exceptions |= FPSCR_IXC; 599 } else { 600 d = 0; 601 if (vsm.exponent | vsm.significand) { 602 exceptions |= FPSCR_IXC; 603 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 604 d = 1; 605 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) { 606 d = 0; 607 exceptions |= FPSCR_IOC; 608 } 609 } 610 } 611 612 pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 613 614 vfp_put_float(sd, d); 615 616 return exceptions; 617 } 618 619 static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr) 620 { 621 return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO); 622 } 623 624 static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr) 625 { 626 struct vfp_single vsm; 627 u32 d, exceptions = 0; 628 int rmode = fpscr & FPSCR_RMODE_MASK; 629 630 vfp_single_unpack(&vsm, m); 631 vfp_single_dump("VSM", &vsm); 632 633 /* 634 * Do we have a denormalised number? 635 */ 636 if (vfp_single_type(&vsm) & VFP_DENORMAL) 637 exceptions |= FPSCR_IDC; 638 639 if (vsm.exponent >= 127 + 32) { 640 /* 641 * m >= 2^31-2^7: invalid 642 */ 643 d = 0x7fffffff; 644 if (vsm.sign) 645 d = ~d; 646 exceptions |= FPSCR_IOC; 647 } else if (vsm.exponent >= 127 - 1) { 648 int shift = 127 + 31 - vsm.exponent; 649 u32 rem, incr = 0; 650 651 /* 2^0 <= m <= 2^31-2^7 */ 652 d = (vsm.significand << 1) >> shift; 653 rem = vsm.significand << (33 - shift); 654 655 if (rmode == FPSCR_ROUND_NEAREST) { 656 incr = 0x80000000; 657 if ((d & 1) == 0) 658 incr -= 1; 659 } else if (rmode == FPSCR_ROUND_TOZERO) { 660 incr = 0; 661 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) { 662 incr = ~0; 663 } 664 665 if ((rem + incr) < rem && d < 0xffffffff) 666 d += 1; 667 if (d > 0x7fffffff + (vsm.sign != 0)) { 668 d = 0x7fffffff + (vsm.sign != 0); 669 exceptions |= FPSCR_IOC; 670 } else if (rem) 671 exceptions |= FPSCR_IXC; 672 673 if (vsm.sign) 674 d = -d; 675 } else { 676 d = 0; 677 if (vsm.exponent | vsm.significand) { 678 exceptions |= FPSCR_IXC; 679 if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0) 680 d = 1; 681 else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) 682 d = -1; 683 } 684 } 685 686 pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions); 687 688 vfp_put_float(sd, (s32)d); 689 690 return exceptions; 691 } 692 693 static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr) 694 { 695 return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO); 696 } 697 698 static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = { 699 [FEXT_TO_IDX(FEXT_FCPY)] = vfp_single_fcpy, 700 [FEXT_TO_IDX(FEXT_FABS)] = vfp_single_fabs, 701 [FEXT_TO_IDX(FEXT_FNEG)] = vfp_single_fneg, 702 [FEXT_TO_IDX(FEXT_FSQRT)] = vfp_single_fsqrt, 703 [FEXT_TO_IDX(FEXT_FCMP)] = vfp_single_fcmp, 704 [FEXT_TO_IDX(FEXT_FCMPE)] = vfp_single_fcmpe, 705 [FEXT_TO_IDX(FEXT_FCMPZ)] = vfp_single_fcmpz, 706 [FEXT_TO_IDX(FEXT_FCMPEZ)] = vfp_single_fcmpez, 707 [FEXT_TO_IDX(FEXT_FCVT)] = vfp_single_fcvtd, 708 [FEXT_TO_IDX(FEXT_FUITO)] = vfp_single_fuito, 709 [FEXT_TO_IDX(FEXT_FSITO)] = vfp_single_fsito, 710 [FEXT_TO_IDX(FEXT_FTOUI)] = vfp_single_ftoui, 711 [FEXT_TO_IDX(FEXT_FTOUIZ)] = vfp_single_ftouiz, 712 [FEXT_TO_IDX(FEXT_FTOSI)] = vfp_single_ftosi, 713 [FEXT_TO_IDX(FEXT_FTOSIZ)] = vfp_single_ftosiz, 714 }; 715 716 717 718 719 720 static u32 721 vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn, 722 struct vfp_single *vsm, u32 fpscr) 723 { 724 struct vfp_single *vsp; 725 u32 exceptions = 0; 726 int tn, tm; 727 728 tn = vfp_single_type(vsn); 729 tm = vfp_single_type(vsm); 730 731 if (tn & tm & VFP_INFINITY) { 732 /* 733 * Two infinities. Are they different signs? 734 */ 735 if (vsn->sign ^ vsm->sign) { 736 /* 737 * different signs -> invalid 738 */ 739 exceptions = FPSCR_IOC; 740 vsp = &vfp_single_default_qnan; 741 } else { 742 /* 743 * same signs -> valid 744 */ 745 vsp = vsn; 746 } 747 } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) { 748 /* 749 * One infinity and one number -> infinity 750 */ 751 vsp = vsn; 752 } else { 753 /* 754 * 'n' is a NaN of some type 755 */ 756 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 757 } 758 *vsd = *vsp; 759 return exceptions; 760 } 761 762 static u32 763 vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn, 764 struct vfp_single *vsm, u32 fpscr) 765 { 766 u32 exp_diff, m_sig; 767 768 if (vsn->significand & 0x80000000 || 769 vsm->significand & 0x80000000) { 770 pr_info("VFP: bad FP values in %s\n", __func__); 771 vfp_single_dump("VSN", vsn); 772 vfp_single_dump("VSM", vsm); 773 } 774 775 /* 776 * Ensure that 'n' is the largest magnitude number. Note that 777 * if 'n' and 'm' have equal exponents, we do not swap them. 778 * This ensures that NaN propagation works correctly. 779 */ 780 if (vsn->exponent < vsm->exponent) { 781 struct vfp_single *t = vsn; 782 vsn = vsm; 783 vsm = t; 784 } 785 786 /* 787 * Is 'n' an infinity or a NaN? Note that 'm' may be a number, 788 * infinity or a NaN here. 789 */ 790 if (vsn->exponent == 255) 791 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr); 792 793 /* 794 * We have two proper numbers, where 'vsn' is the larger magnitude. 795 * 796 * Copy 'n' to 'd' before doing the arithmetic. 797 */ 798 *vsd = *vsn; 799 800 /* 801 * Align both numbers. 802 */ 803 exp_diff = vsn->exponent - vsm->exponent; 804 m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff); 805 806 /* 807 * If the signs are different, we are really subtracting. 808 */ 809 if (vsn->sign ^ vsm->sign) { 810 m_sig = vsn->significand - m_sig; 811 if ((s32)m_sig < 0) { 812 vsd->sign = vfp_sign_negate(vsd->sign); 813 m_sig = -m_sig; 814 } else if (m_sig == 0) { 815 vsd->sign = (fpscr & FPSCR_RMODE_MASK) == 816 FPSCR_ROUND_MINUSINF ? 0x8000 : 0; 817 } 818 } else { 819 m_sig = vsn->significand + m_sig; 820 } 821 vsd->significand = m_sig; 822 823 return 0; 824 } 825 826 static u32 827 vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr) 828 { 829 vfp_single_dump("VSN", vsn); 830 vfp_single_dump("VSM", vsm); 831 832 /* 833 * Ensure that 'n' is the largest magnitude number. Note that 834 * if 'n' and 'm' have equal exponents, we do not swap them. 835 * This ensures that NaN propagation works correctly. 836 */ 837 if (vsn->exponent < vsm->exponent) { 838 struct vfp_single *t = vsn; 839 vsn = vsm; 840 vsm = t; 841 pr_debug("VFP: swapping M <-> N\n"); 842 } 843 844 vsd->sign = vsn->sign ^ vsm->sign; 845 846 /* 847 * If 'n' is an infinity or NaN, handle it. 'm' may be anything. 848 */ 849 if (vsn->exponent == 255) { 850 if (vsn->significand || (vsm->exponent == 255 && vsm->significand)) 851 return vfp_propagate_nan(vsd, vsn, vsm, fpscr); 852 if ((vsm->exponent | vsm->significand) == 0) { 853 *vsd = vfp_single_default_qnan; 854 return FPSCR_IOC; 855 } 856 vsd->exponent = vsn->exponent; 857 vsd->significand = 0; 858 return 0; 859 } 860 861 /* 862 * If 'm' is zero, the result is always zero. In this case, 863 * 'n' may be zero or a number, but it doesn't matter which. 864 */ 865 if ((vsm->exponent | vsm->significand) == 0) { 866 vsd->exponent = 0; 867 vsd->significand = 0; 868 return 0; 869 } 870 871 /* 872 * We add 2 to the destination exponent for the same reason as 873 * the addition case - though this time we have +1 from each 874 * input operand. 875 */ 876 vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2; 877 vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand); 878 879 vfp_single_dump("VSD", vsd); 880 return 0; 881 } 882 883 #define NEG_MULTIPLY (1 << 0) 884 #define NEG_SUBTRACT (1 << 1) 885 886 static u32 887 vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func) 888 { 889 struct vfp_single vsd, vsp, vsn, vsm; 890 u32 exceptions; 891 s32 v; 892 893 v = vfp_get_float(sn); 894 pr_debug("VFP: s%u = %08x\n", sn, v); 895 vfp_single_unpack(&vsn, v); 896 if (vsn.exponent == 0 && vsn.significand) 897 vfp_single_normalise_denormal(&vsn); 898 899 vfp_single_unpack(&vsm, m); 900 if (vsm.exponent == 0 && vsm.significand) 901 vfp_single_normalise_denormal(&vsm); 902 903 exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr); 904 if (negate & NEG_MULTIPLY) 905 vsp.sign = vfp_sign_negate(vsp.sign); 906 907 v = vfp_get_float(sd); 908 pr_debug("VFP: s%u = %08x\n", sd, v); 909 vfp_single_unpack(&vsn, v); 910 if (negate & NEG_SUBTRACT) 911 vsn.sign = vfp_sign_negate(vsn.sign); 912 913 exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr); 914 915 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func); 916 } 917 918 /* 919 * Standard operations 920 */ 921 922 /* 923 * sd = sd + (sn * sm) 924 */ 925 static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr) 926 { 927 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac"); 928 } 929 930 /* 931 * sd = sd - (sn * sm) 932 */ 933 static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr) 934 { 935 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac"); 936 } 937 938 /* 939 * sd = -sd + (sn * sm) 940 */ 941 static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr) 942 { 943 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc"); 944 } 945 946 /* 947 * sd = -sd - (sn * sm) 948 */ 949 static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr) 950 { 951 return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc"); 952 } 953 954 /* 955 * sd = sn * sm 956 */ 957 static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr) 958 { 959 struct vfp_single vsd, vsn, vsm; 960 u32 exceptions; 961 s32 n = vfp_get_float(sn); 962 963 pr_debug("VFP: s%u = %08x\n", sn, n); 964 965 vfp_single_unpack(&vsn, n); 966 if (vsn.exponent == 0 && vsn.significand) 967 vfp_single_normalise_denormal(&vsn); 968 969 vfp_single_unpack(&vsm, m); 970 if (vsm.exponent == 0 && vsm.significand) 971 vfp_single_normalise_denormal(&vsm); 972 973 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 974 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul"); 975 } 976 977 /* 978 * sd = -(sn * sm) 979 */ 980 static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr) 981 { 982 struct vfp_single vsd, vsn, vsm; 983 u32 exceptions; 984 s32 n = vfp_get_float(sn); 985 986 pr_debug("VFP: s%u = %08x\n", sn, n); 987 988 vfp_single_unpack(&vsn, n); 989 if (vsn.exponent == 0 && vsn.significand) 990 vfp_single_normalise_denormal(&vsn); 991 992 vfp_single_unpack(&vsm, m); 993 if (vsm.exponent == 0 && vsm.significand) 994 vfp_single_normalise_denormal(&vsm); 995 996 exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr); 997 vsd.sign = vfp_sign_negate(vsd.sign); 998 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul"); 999 } 1000 1001 /* 1002 * sd = sn + sm 1003 */ 1004 static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr) 1005 { 1006 struct vfp_single vsd, vsn, vsm; 1007 u32 exceptions; 1008 s32 n = vfp_get_float(sn); 1009 1010 pr_debug("VFP: s%u = %08x\n", sn, n); 1011 1012 /* 1013 * Unpack and normalise denormals. 1014 */ 1015 vfp_single_unpack(&vsn, n); 1016 if (vsn.exponent == 0 && vsn.significand) 1017 vfp_single_normalise_denormal(&vsn); 1018 1019 vfp_single_unpack(&vsm, m); 1020 if (vsm.exponent == 0 && vsm.significand) 1021 vfp_single_normalise_denormal(&vsm); 1022 1023 exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr); 1024 1025 return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd"); 1026 } 1027 1028 /* 1029 * sd = sn - sm 1030 */ 1031 static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr) 1032 { 1033 /* 1034 * Subtraction is addition with one sign inverted. 1035 */ 1036 return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr); 1037 } 1038 1039 /* 1040 * sd = sn / sm 1041 */ 1042 static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr) 1043 { 1044 struct vfp_single vsd, vsn, vsm; 1045 u32 exceptions = 0; 1046 s32 n = vfp_get_float(sn); 1047 int tm, tn; 1048 1049 pr_debug("VFP: s%u = %08x\n", sn, n); 1050 1051 vfp_single_unpack(&vsn, n); 1052 vfp_single_unpack(&vsm, m); 1053 1054 vsd.sign = vsn.sign ^ vsm.sign; 1055 1056 tn = vfp_single_type(&vsn); 1057 tm = vfp_single_type(&vsm); 1058 1059 /* 1060 * Is n a NAN? 1061 */ 1062 if (tn & VFP_NAN) 1063 goto vsn_nan; 1064 1065 /* 1066 * Is m a NAN? 1067 */ 1068 if (tm & VFP_NAN) 1069 goto vsm_nan; 1070 1071 /* 1072 * If n and m are infinity, the result is invalid 1073 * If n and m are zero, the result is invalid 1074 */ 1075 if (tm & tn & (VFP_INFINITY|VFP_ZERO)) 1076 goto invalid; 1077 1078 /* 1079 * If n is infinity, the result is infinity 1080 */ 1081 if (tn & VFP_INFINITY) 1082 goto infinity; 1083 1084 /* 1085 * If m is zero, raise div0 exception 1086 */ 1087 if (tm & VFP_ZERO) 1088 goto divzero; 1089 1090 /* 1091 * If m is infinity, or n is zero, the result is zero 1092 */ 1093 if (tm & VFP_INFINITY || tn & VFP_ZERO) 1094 goto zero; 1095 1096 if (tn & VFP_DENORMAL) 1097 vfp_single_normalise_denormal(&vsn); 1098 if (tm & VFP_DENORMAL) 1099 vfp_single_normalise_denormal(&vsm); 1100 1101 /* 1102 * Ok, we have two numbers, we can perform division. 1103 */ 1104 vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1; 1105 vsm.significand <<= 1; 1106 if (vsm.significand <= (2 * vsn.significand)) { 1107 vsn.significand >>= 1; 1108 vsd.exponent++; 1109 } 1110 vsd.significand = ((u64)vsn.significand << 32) / vsm.significand; 1111 if ((vsd.significand & 0x3f) == 0) 1112 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32); 1113 1114 return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv"); 1115 1116 vsn_nan: 1117 exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr); 1118 pack: 1119 vfp_put_float(sd, vfp_single_pack(&vsd)); 1120 return exceptions; 1121 1122 vsm_nan: 1123 exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr); 1124 goto pack; 1125 1126 zero: 1127 vsd.exponent = 0; 1128 vsd.significand = 0; 1129 goto pack; 1130 1131 divzero: 1132 exceptions = FPSCR_DZC; 1133 infinity: 1134 vsd.exponent = 255; 1135 vsd.significand = 0; 1136 goto pack; 1137 1138 invalid: 1139 vfp_put_float(sd, vfp_single_pack(&vfp_single_default_qnan)); 1140 return FPSCR_IOC; 1141 } 1142 1143 static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = { 1144 [FOP_TO_IDX(FOP_FMAC)] = vfp_single_fmac, 1145 [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac, 1146 [FOP_TO_IDX(FOP_FMSC)] = vfp_single_fmsc, 1147 [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc, 1148 [FOP_TO_IDX(FOP_FMUL)] = vfp_single_fmul, 1149 [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul, 1150 [FOP_TO_IDX(FOP_FADD)] = vfp_single_fadd, 1151 [FOP_TO_IDX(FOP_FSUB)] = vfp_single_fsub, 1152 [FOP_TO_IDX(FOP_FDIV)] = vfp_single_fdiv, 1153 }; 1154 1155 #define FREG_BANK(x) ((x) & 0x18) 1156 #define FREG_IDX(x) ((x) & 7) 1157 1158 u32 vfp_single_cpdo(u32 inst, u32 fpscr) 1159 { 1160 u32 op = inst & FOP_MASK; 1161 u32 exceptions = 0; 1162 unsigned int sd = vfp_get_sd(inst); 1163 unsigned int sn = vfp_get_sn(inst); 1164 unsigned int sm = vfp_get_sm(inst); 1165 unsigned int vecitr, veclen, vecstride; 1166 u32 (*fop)(int, int, s32, u32); 1167 1168 veclen = fpscr & FPSCR_LENGTH_MASK; 1169 vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK); 1170 1171 /* 1172 * If destination bank is zero, vector length is always '1'. 1173 * ARM DDI0100F C5.1.3, C5.3.2. 1174 */ 1175 if (FREG_BANK(sd) == 0) 1176 veclen = 0; 1177 1178 pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride, 1179 (veclen >> FPSCR_LENGTH_BIT) + 1); 1180 1181 fop = (op == FOP_EXT) ? fop_extfns[sn] : fop_fns[FOP_TO_IDX(op)]; 1182 if (!fop) 1183 goto invalid; 1184 1185 for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) { 1186 s32 m = vfp_get_float(sm); 1187 u32 except; 1188 1189 if (op == FOP_EXT) 1190 pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n", 1191 vecitr >> FPSCR_LENGTH_BIT, sd, sn, sm, m); 1192 else 1193 pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n", 1194 vecitr >> FPSCR_LENGTH_BIT, sd, sn, 1195 FOP_TO_IDX(op), sm, m); 1196 1197 except = fop(sd, sn, m, fpscr); 1198 pr_debug("VFP: itr%d: exceptions=%08x\n", 1199 vecitr >> FPSCR_LENGTH_BIT, except); 1200 1201 exceptions |= except; 1202 1203 /* 1204 * This ensures that comparisons only operate on scalars; 1205 * comparisons always return with one FPSCR status bit set. 1206 */ 1207 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) 1208 break; 1209 1210 /* 1211 * CHECK: It appears to be undefined whether we stop when 1212 * we encounter an exception. We continue. 1213 */ 1214 1215 sd = FREG_BANK(sd) + ((FREG_IDX(sd) + vecstride) & 7); 1216 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7); 1217 if (FREG_BANK(sm) != 0) 1218 sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7); 1219 } 1220 return exceptions; 1221 1222 invalid: 1223 return (u32)-1; 1224 } 1225