1 /* 2 * Floating point emulation support for subnormalised numbers on SH4 3 * architecture This file is derived from the SoftFloat IEC/IEEE 4 * Floating-point Arithmetic Package, Release 2 the original license of 5 * which is reproduced below. 6 * 7 * ======================================================================== 8 * 9 * This C source file is part of the SoftFloat IEC/IEEE Floating-point 10 * Arithmetic Package, Release 2. 11 * 12 * Written by John R. Hauser. This work was made possible in part by the 13 * International Computer Science Institute, located at Suite 600, 1947 Center 14 * Street, Berkeley, California 94704. Funding was partially provided by the 15 * National Science Foundation under grant MIP-9311980. The original version 16 * of this code was written as part of a project to build a fixed-point vector 17 * processor in collaboration with the University of California at Berkeley, 18 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information 19 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 20 * arithmetic/softfloat.html'. 21 * 22 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 23 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 24 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 25 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 26 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 27 * 28 * Derivative works are acceptable, even for commercial purposes, so long as 29 * (1) they include prominent notice that the work is derivative, and (2) they 30 * include prominent notice akin to these three paragraphs for those parts of 31 * this code that are retained. 32 * 33 * ======================================================================== 34 * 35 * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com> 36 * and Kamel Khelifi <kamel.khelifi@st.com> 37 */ 38 #include <linux/kernel.h> 39 #include <cpu/fpu.h> 40 41 #define LIT64( a ) a##LL 42 43 typedef char flag; 44 typedef unsigned char uint8; 45 typedef signed char int8; 46 typedef int uint16; 47 typedef int int16; 48 typedef unsigned int uint32; 49 typedef signed int int32; 50 51 typedef unsigned long long int bits64; 52 typedef signed long long int sbits64; 53 54 typedef unsigned char bits8; 55 typedef signed char sbits8; 56 typedef unsigned short int bits16; 57 typedef signed short int sbits16; 58 typedef unsigned int bits32; 59 typedef signed int sbits32; 60 61 typedef unsigned long long int uint64; 62 typedef signed long long int int64; 63 64 typedef unsigned long int float32; 65 typedef unsigned long long float64; 66 67 extern void float_raise(unsigned int flags); /* in fpu.c */ 68 extern int float_rounding_mode(void); /* in fpu.c */ 69 70 inline bits64 extractFloat64Frac(float64 a); 71 inline flag extractFloat64Sign(float64 a); 72 inline int16 extractFloat64Exp(float64 a); 73 inline int16 extractFloat32Exp(float32 a); 74 inline flag extractFloat32Sign(float32 a); 75 inline bits32 extractFloat32Frac(float32 a); 76 inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig); 77 inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr); 78 inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig); 79 inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr); 80 float64 float64_sub(float64 a, float64 b); 81 float32 float32_sub(float32 a, float32 b); 82 float32 float32_add(float32 a, float32 b); 83 float64 float64_add(float64 a, float64 b); 84 float64 float64_div(float64 a, float64 b); 85 float32 float32_div(float32 a, float32 b); 86 float32 float32_mul(float32 a, float32 b); 87 float64 float64_mul(float64 a, float64 b); 88 float32 float64_to_float32(float64 a); 89 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, 90 bits64 * z1Ptr); 91 inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, 92 bits64 * z1Ptr); 93 inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr); 94 95 static int8 countLeadingZeros32(bits32 a); 96 static int8 countLeadingZeros64(bits64 a); 97 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, 98 bits64 zSig); 99 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign); 100 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign); 101 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig); 102 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, 103 bits32 zSig); 104 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig); 105 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign); 106 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign); 107 static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, 108 bits64 * zSigPtr); 109 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b); 110 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr, 111 bits32 * zSigPtr); 112 113 inline bits64 extractFloat64Frac(float64 a) 114 { 115 return a & LIT64(0x000FFFFFFFFFFFFF); 116 } 117 118 inline flag extractFloat64Sign(float64 a) 119 { 120 return a >> 63; 121 } 122 123 inline int16 extractFloat64Exp(float64 a) 124 { 125 return (a >> 52) & 0x7FF; 126 } 127 128 inline int16 extractFloat32Exp(float32 a) 129 { 130 return (a >> 23) & 0xFF; 131 } 132 133 inline flag extractFloat32Sign(float32 a) 134 { 135 return a >> 31; 136 } 137 138 inline bits32 extractFloat32Frac(float32 a) 139 { 140 return a & 0x007FFFFF; 141 } 142 143 inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig) 144 { 145 return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig; 146 } 147 148 inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr) 149 { 150 bits64 z; 151 152 if (count == 0) { 153 z = a; 154 } else if (count < 64) { 155 z = (a >> count) | ((a << ((-count) & 63)) != 0); 156 } else { 157 z = (a != 0); 158 } 159 *zPtr = z; 160 } 161 162 static int8 countLeadingZeros32(bits32 a) 163 { 164 static const int8 countLeadingZerosHigh[] = { 165 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 166 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 170 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 172 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 181 }; 182 int8 shiftCount; 183 184 shiftCount = 0; 185 if (a < 0x10000) { 186 shiftCount += 16; 187 a <<= 16; 188 } 189 if (a < 0x1000000) { 190 shiftCount += 8; 191 a <<= 8; 192 } 193 shiftCount += countLeadingZerosHigh[a >> 24]; 194 return shiftCount; 195 196 } 197 198 static int8 countLeadingZeros64(bits64 a) 199 { 200 int8 shiftCount; 201 202 shiftCount = 0; 203 if (a < ((bits64) 1) << 32) { 204 shiftCount += 32; 205 } else { 206 a >>= 32; 207 } 208 shiftCount += countLeadingZeros32(a); 209 return shiftCount; 210 211 } 212 213 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig) 214 { 215 int8 shiftCount; 216 217 shiftCount = countLeadingZeros64(zSig) - 1; 218 return roundAndPackFloat64(zSign, zExp - shiftCount, 219 zSig << shiftCount); 220 221 } 222 223 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign) 224 { 225 int16 aExp, bExp, zExp; 226 bits64 aSig, bSig, zSig; 227 int16 expDiff; 228 229 aSig = extractFloat64Frac(a); 230 aExp = extractFloat64Exp(a); 231 bSig = extractFloat64Frac(b); 232 bExp = extractFloat64Exp(b); 233 expDiff = aExp - bExp; 234 aSig <<= 10; 235 bSig <<= 10; 236 if (0 < expDiff) 237 goto aExpBigger; 238 if (expDiff < 0) 239 goto bExpBigger; 240 if (aExp == 0) { 241 aExp = 1; 242 bExp = 1; 243 } 244 if (bSig < aSig) 245 goto aBigger; 246 if (aSig < bSig) 247 goto bBigger; 248 return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0); 249 bExpBigger: 250 if (bExp == 0x7FF) { 251 return packFloat64(zSign ^ 1, 0x7FF, 0); 252 } 253 if (aExp == 0) { 254 ++expDiff; 255 } else { 256 aSig |= LIT64(0x4000000000000000); 257 } 258 shift64RightJamming(aSig, -expDiff, &aSig); 259 bSig |= LIT64(0x4000000000000000); 260 bBigger: 261 zSig = bSig - aSig; 262 zExp = bExp; 263 zSign ^= 1; 264 goto normalizeRoundAndPack; 265 aExpBigger: 266 if (aExp == 0x7FF) { 267 return a; 268 } 269 if (bExp == 0) { 270 --expDiff; 271 } else { 272 bSig |= LIT64(0x4000000000000000); 273 } 274 shift64RightJamming(bSig, expDiff, &bSig); 275 aSig |= LIT64(0x4000000000000000); 276 aBigger: 277 zSig = aSig - bSig; 278 zExp = aExp; 279 normalizeRoundAndPack: 280 --zExp; 281 return normalizeRoundAndPackFloat64(zSign, zExp, zSig); 282 283 } 284 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign) 285 { 286 int16 aExp, bExp, zExp; 287 bits64 aSig, bSig, zSig; 288 int16 expDiff; 289 290 aSig = extractFloat64Frac(a); 291 aExp = extractFloat64Exp(a); 292 bSig = extractFloat64Frac(b); 293 bExp = extractFloat64Exp(b); 294 expDiff = aExp - bExp; 295 aSig <<= 9; 296 bSig <<= 9; 297 if (0 < expDiff) { 298 if (aExp == 0x7FF) { 299 return a; 300 } 301 if (bExp == 0) { 302 --expDiff; 303 } else { 304 bSig |= LIT64(0x2000000000000000); 305 } 306 shift64RightJamming(bSig, expDiff, &bSig); 307 zExp = aExp; 308 } else if (expDiff < 0) { 309 if (bExp == 0x7FF) { 310 return packFloat64(zSign, 0x7FF, 0); 311 } 312 if (aExp == 0) { 313 ++expDiff; 314 } else { 315 aSig |= LIT64(0x2000000000000000); 316 } 317 shift64RightJamming(aSig, -expDiff, &aSig); 318 zExp = bExp; 319 } else { 320 if (aExp == 0x7FF) { 321 return a; 322 } 323 if (aExp == 0) 324 return packFloat64(zSign, 0, (aSig + bSig) >> 9); 325 zSig = LIT64(0x4000000000000000) + aSig + bSig; 326 zExp = aExp; 327 goto roundAndPack; 328 } 329 aSig |= LIT64(0x2000000000000000); 330 zSig = (aSig + bSig) << 1; 331 --zExp; 332 if ((sbits64) zSig < 0) { 333 zSig = aSig + bSig; 334 ++zExp; 335 } 336 roundAndPack: 337 return roundAndPackFloat64(zSign, zExp, zSig); 338 339 } 340 341 inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig) 342 { 343 return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig; 344 } 345 346 inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr) 347 { 348 bits32 z; 349 if (count == 0) { 350 z = a; 351 } else if (count < 32) { 352 z = (a >> count) | ((a << ((-count) & 31)) != 0); 353 } else { 354 z = (a != 0); 355 } 356 *zPtr = z; 357 } 358 359 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig) 360 { 361 flag roundNearestEven; 362 int8 roundIncrement, roundBits; 363 flag isTiny; 364 365 /* SH4 has only 2 rounding modes - round to nearest and round to zero */ 366 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST); 367 roundIncrement = 0x40; 368 if (!roundNearestEven) { 369 roundIncrement = 0; 370 } 371 roundBits = zSig & 0x7F; 372 if (0xFD <= (bits16) zExp) { 373 if ((0xFD < zExp) 374 || ((zExp == 0xFD) 375 && ((sbits32) (zSig + roundIncrement) < 0)) 376 ) { 377 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT); 378 return packFloat32(zSign, 0xFF, 379 0) - (roundIncrement == 0); 380 } 381 if (zExp < 0) { 382 isTiny = (zExp < -1) 383 || (zSig + roundIncrement < 0x80000000); 384 shift32RightJamming(zSig, -zExp, &zSig); 385 zExp = 0; 386 roundBits = zSig & 0x7F; 387 if (isTiny && roundBits) 388 float_raise(FPSCR_CAUSE_UNDERFLOW); 389 } 390 } 391 if (roundBits) 392 float_raise(FPSCR_CAUSE_INEXACT); 393 zSig = (zSig + roundIncrement) >> 7; 394 zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven); 395 if (zSig == 0) 396 zExp = 0; 397 return packFloat32(zSign, zExp, zSig); 398 399 } 400 401 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig) 402 { 403 int8 shiftCount; 404 405 shiftCount = countLeadingZeros32(zSig) - 1; 406 return roundAndPackFloat32(zSign, zExp - shiftCount, 407 zSig << shiftCount); 408 } 409 410 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig) 411 { 412 flag roundNearestEven; 413 int16 roundIncrement, roundBits; 414 flag isTiny; 415 416 /* SH4 has only 2 rounding modes - round to nearest and round to zero */ 417 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST); 418 roundIncrement = 0x200; 419 if (!roundNearestEven) { 420 roundIncrement = 0; 421 } 422 roundBits = zSig & 0x3FF; 423 if (0x7FD <= (bits16) zExp) { 424 if ((0x7FD < zExp) 425 || ((zExp == 0x7FD) 426 && ((sbits64) (zSig + roundIncrement) < 0)) 427 ) { 428 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT); 429 return packFloat64(zSign, 0x7FF, 430 0) - (roundIncrement == 0); 431 } 432 if (zExp < 0) { 433 isTiny = (zExp < -1) 434 || (zSig + roundIncrement < 435 LIT64(0x8000000000000000)); 436 shift64RightJamming(zSig, -zExp, &zSig); 437 zExp = 0; 438 roundBits = zSig & 0x3FF; 439 if (isTiny && roundBits) 440 float_raise(FPSCR_CAUSE_UNDERFLOW); 441 } 442 } 443 if (roundBits) 444 float_raise(FPSCR_CAUSE_INEXACT); 445 zSig = (zSig + roundIncrement) >> 10; 446 zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven); 447 if (zSig == 0) 448 zExp = 0; 449 return packFloat64(zSign, zExp, zSig); 450 451 } 452 453 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign) 454 { 455 int16 aExp, bExp, zExp; 456 bits32 aSig, bSig, zSig; 457 int16 expDiff; 458 459 aSig = extractFloat32Frac(a); 460 aExp = extractFloat32Exp(a); 461 bSig = extractFloat32Frac(b); 462 bExp = extractFloat32Exp(b); 463 expDiff = aExp - bExp; 464 aSig <<= 7; 465 bSig <<= 7; 466 if (0 < expDiff) 467 goto aExpBigger; 468 if (expDiff < 0) 469 goto bExpBigger; 470 if (aExp == 0) { 471 aExp = 1; 472 bExp = 1; 473 } 474 if (bSig < aSig) 475 goto aBigger; 476 if (aSig < bSig) 477 goto bBigger; 478 return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0); 479 bExpBigger: 480 if (bExp == 0xFF) { 481 return packFloat32(zSign ^ 1, 0xFF, 0); 482 } 483 if (aExp == 0) { 484 ++expDiff; 485 } else { 486 aSig |= 0x40000000; 487 } 488 shift32RightJamming(aSig, -expDiff, &aSig); 489 bSig |= 0x40000000; 490 bBigger: 491 zSig = bSig - aSig; 492 zExp = bExp; 493 zSign ^= 1; 494 goto normalizeRoundAndPack; 495 aExpBigger: 496 if (aExp == 0xFF) { 497 return a; 498 } 499 if (bExp == 0) { 500 --expDiff; 501 } else { 502 bSig |= 0x40000000; 503 } 504 shift32RightJamming(bSig, expDiff, &bSig); 505 aSig |= 0x40000000; 506 aBigger: 507 zSig = aSig - bSig; 508 zExp = aExp; 509 normalizeRoundAndPack: 510 --zExp; 511 return normalizeRoundAndPackFloat32(zSign, zExp, zSig); 512 513 } 514 515 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign) 516 { 517 int16 aExp, bExp, zExp; 518 bits32 aSig, bSig, zSig; 519 int16 expDiff; 520 521 aSig = extractFloat32Frac(a); 522 aExp = extractFloat32Exp(a); 523 bSig = extractFloat32Frac(b); 524 bExp = extractFloat32Exp(b); 525 expDiff = aExp - bExp; 526 aSig <<= 6; 527 bSig <<= 6; 528 if (0 < expDiff) { 529 if (aExp == 0xFF) { 530 return a; 531 } 532 if (bExp == 0) { 533 --expDiff; 534 } else { 535 bSig |= 0x20000000; 536 } 537 shift32RightJamming(bSig, expDiff, &bSig); 538 zExp = aExp; 539 } else if (expDiff < 0) { 540 if (bExp == 0xFF) { 541 return packFloat32(zSign, 0xFF, 0); 542 } 543 if (aExp == 0) { 544 ++expDiff; 545 } else { 546 aSig |= 0x20000000; 547 } 548 shift32RightJamming(aSig, -expDiff, &aSig); 549 zExp = bExp; 550 } else { 551 if (aExp == 0xFF) { 552 return a; 553 } 554 if (aExp == 0) 555 return packFloat32(zSign, 0, (aSig + bSig) >> 6); 556 zSig = 0x40000000 + aSig + bSig; 557 zExp = aExp; 558 goto roundAndPack; 559 } 560 aSig |= 0x20000000; 561 zSig = (aSig + bSig) << 1; 562 --zExp; 563 if ((sbits32) zSig < 0) { 564 zSig = aSig + bSig; 565 ++zExp; 566 } 567 roundAndPack: 568 return roundAndPackFloat32(zSign, zExp, zSig); 569 570 } 571 572 float64 float64_sub(float64 a, float64 b) 573 { 574 flag aSign, bSign; 575 576 aSign = extractFloat64Sign(a); 577 bSign = extractFloat64Sign(b); 578 if (aSign == bSign) { 579 return subFloat64Sigs(a, b, aSign); 580 } else { 581 return addFloat64Sigs(a, b, aSign); 582 } 583 584 } 585 586 float32 float32_sub(float32 a, float32 b) 587 { 588 flag aSign, bSign; 589 590 aSign = extractFloat32Sign(a); 591 bSign = extractFloat32Sign(b); 592 if (aSign == bSign) { 593 return subFloat32Sigs(a, b, aSign); 594 } else { 595 return addFloat32Sigs(a, b, aSign); 596 } 597 598 } 599 600 float32 float32_add(float32 a, float32 b) 601 { 602 flag aSign, bSign; 603 604 aSign = extractFloat32Sign(a); 605 bSign = extractFloat32Sign(b); 606 if (aSign == bSign) { 607 return addFloat32Sigs(a, b, aSign); 608 } else { 609 return subFloat32Sigs(a, b, aSign); 610 } 611 612 } 613 614 float64 float64_add(float64 a, float64 b) 615 { 616 flag aSign, bSign; 617 618 aSign = extractFloat64Sign(a); 619 bSign = extractFloat64Sign(b); 620 if (aSign == bSign) { 621 return addFloat64Sigs(a, b, aSign); 622 } else { 623 return subFloat64Sigs(a, b, aSign); 624 } 625 } 626 627 static void 628 normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr) 629 { 630 int8 shiftCount; 631 632 shiftCount = countLeadingZeros64(aSig) - 11; 633 *zSigPtr = aSig << shiftCount; 634 *zExpPtr = 1 - shiftCount; 635 } 636 637 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, 638 bits64 * z1Ptr) 639 { 640 bits64 z1; 641 642 z1 = a1 + b1; 643 *z1Ptr = z1; 644 *z0Ptr = a0 + b0 + (z1 < a1); 645 } 646 647 inline void 648 sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr, 649 bits64 * z1Ptr) 650 { 651 *z1Ptr = a1 - b1; 652 *z0Ptr = a0 - b0 - (a1 < b1); 653 } 654 655 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b) 656 { 657 bits64 b0, b1; 658 bits64 rem0, rem1, term0, term1; 659 bits64 z; 660 if (b <= a0) 661 return LIT64(0xFFFFFFFFFFFFFFFF); 662 b0 = b >> 32; 663 z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32; 664 mul64To128(b, z, &term0, &term1); 665 sub128(a0, a1, term0, term1, &rem0, &rem1); 666 while (((sbits64) rem0) < 0) { 667 z -= LIT64(0x100000000); 668 b1 = b << 32; 669 add128(rem0, rem1, b0, b1, &rem0, &rem1); 670 } 671 rem0 = (rem0 << 32) | (rem1 >> 32); 672 z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0; 673 return z; 674 } 675 676 inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr) 677 { 678 bits32 aHigh, aLow, bHigh, bLow; 679 bits64 z0, zMiddleA, zMiddleB, z1; 680 681 aLow = a; 682 aHigh = a >> 32; 683 bLow = b; 684 bHigh = b >> 32; 685 z1 = ((bits64) aLow) * bLow; 686 zMiddleA = ((bits64) aLow) * bHigh; 687 zMiddleB = ((bits64) aHigh) * bLow; 688 z0 = ((bits64) aHigh) * bHigh; 689 zMiddleA += zMiddleB; 690 z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32); 691 zMiddleA <<= 32; 692 z1 += zMiddleA; 693 z0 += (z1 < zMiddleA); 694 *z1Ptr = z1; 695 *z0Ptr = z0; 696 697 } 698 699 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr, 700 bits32 * zSigPtr) 701 { 702 int8 shiftCount; 703 704 shiftCount = countLeadingZeros32(aSig) - 8; 705 *zSigPtr = aSig << shiftCount; 706 *zExpPtr = 1 - shiftCount; 707 708 } 709 710 float64 float64_div(float64 a, float64 b) 711 { 712 flag aSign, bSign, zSign; 713 int16 aExp, bExp, zExp; 714 bits64 aSig, bSig, zSig; 715 bits64 rem0, rem1; 716 bits64 term0, term1; 717 718 aSig = extractFloat64Frac(a); 719 aExp = extractFloat64Exp(a); 720 aSign = extractFloat64Sign(a); 721 bSig = extractFloat64Frac(b); 722 bExp = extractFloat64Exp(b); 723 bSign = extractFloat64Sign(b); 724 zSign = aSign ^ bSign; 725 if (aExp == 0x7FF) { 726 if (bExp == 0x7FF) { 727 } 728 return packFloat64(zSign, 0x7FF, 0); 729 } 730 if (bExp == 0x7FF) { 731 return packFloat64(zSign, 0, 0); 732 } 733 if (bExp == 0) { 734 if (bSig == 0) { 735 if ((aExp | aSig) == 0) { 736 float_raise(FPSCR_CAUSE_INVALID); 737 } 738 return packFloat64(zSign, 0x7FF, 0); 739 } 740 normalizeFloat64Subnormal(bSig, &bExp, &bSig); 741 } 742 if (aExp == 0) { 743 if (aSig == 0) 744 return packFloat64(zSign, 0, 0); 745 normalizeFloat64Subnormal(aSig, &aExp, &aSig); 746 } 747 zExp = aExp - bExp + 0x3FD; 748 aSig = (aSig | LIT64(0x0010000000000000)) << 10; 749 bSig = (bSig | LIT64(0x0010000000000000)) << 11; 750 if (bSig <= (aSig + aSig)) { 751 aSig >>= 1; 752 ++zExp; 753 } 754 zSig = estimateDiv128To64(aSig, 0, bSig); 755 if ((zSig & 0x1FF) <= 2) { 756 mul64To128(bSig, zSig, &term0, &term1); 757 sub128(aSig, 0, term0, term1, &rem0, &rem1); 758 while ((sbits64) rem0 < 0) { 759 --zSig; 760 add128(rem0, rem1, 0, bSig, &rem0, &rem1); 761 } 762 zSig |= (rem1 != 0); 763 } 764 return roundAndPackFloat64(zSign, zExp, zSig); 765 766 } 767 768 float32 float32_div(float32 a, float32 b) 769 { 770 flag aSign, bSign, zSign; 771 int16 aExp, bExp, zExp; 772 bits32 aSig, bSig, zSig; 773 774 aSig = extractFloat32Frac(a); 775 aExp = extractFloat32Exp(a); 776 aSign = extractFloat32Sign(a); 777 bSig = extractFloat32Frac(b); 778 bExp = extractFloat32Exp(b); 779 bSign = extractFloat32Sign(b); 780 zSign = aSign ^ bSign; 781 if (aExp == 0xFF) { 782 if (bExp == 0xFF) { 783 } 784 return packFloat32(zSign, 0xFF, 0); 785 } 786 if (bExp == 0xFF) { 787 return packFloat32(zSign, 0, 0); 788 } 789 if (bExp == 0) { 790 if (bSig == 0) { 791 return packFloat32(zSign, 0xFF, 0); 792 } 793 normalizeFloat32Subnormal(bSig, &bExp, &bSig); 794 } 795 if (aExp == 0) { 796 if (aSig == 0) 797 return packFloat32(zSign, 0, 0); 798 normalizeFloat32Subnormal(aSig, &aExp, &aSig); 799 } 800 zExp = aExp - bExp + 0x7D; 801 aSig = (aSig | 0x00800000) << 7; 802 bSig = (bSig | 0x00800000) << 8; 803 if (bSig <= (aSig + aSig)) { 804 aSig >>= 1; 805 ++zExp; 806 } 807 zSig = (((bits64) aSig) << 32) / bSig; 808 if ((zSig & 0x3F) == 0) { 809 zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32); 810 } 811 return roundAndPackFloat32(zSign, zExp, zSig); 812 813 } 814 815 float32 float32_mul(float32 a, float32 b) 816 { 817 char aSign, bSign, zSign; 818 int aExp, bExp, zExp; 819 unsigned int aSig, bSig; 820 unsigned long long zSig64; 821 unsigned int zSig; 822 823 aSig = extractFloat32Frac(a); 824 aExp = extractFloat32Exp(a); 825 aSign = extractFloat32Sign(a); 826 bSig = extractFloat32Frac(b); 827 bExp = extractFloat32Exp(b); 828 bSign = extractFloat32Sign(b); 829 zSign = aSign ^ bSign; 830 if (aExp == 0) { 831 if (aSig == 0) 832 return packFloat32(zSign, 0, 0); 833 normalizeFloat32Subnormal(aSig, &aExp, &aSig); 834 } 835 if (bExp == 0) { 836 if (bSig == 0) 837 return packFloat32(zSign, 0, 0); 838 normalizeFloat32Subnormal(bSig, &bExp, &bSig); 839 } 840 if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0)) 841 return roundAndPackFloat32(zSign, 0xff, 0); 842 843 zExp = aExp + bExp - 0x7F; 844 aSig = (aSig | 0x00800000) << 7; 845 bSig = (bSig | 0x00800000) << 8; 846 shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64); 847 zSig = zSig64; 848 if (0 <= (signed int)(zSig << 1)) { 849 zSig <<= 1; 850 --zExp; 851 } 852 return roundAndPackFloat32(zSign, zExp, zSig); 853 854 } 855 856 float64 float64_mul(float64 a, float64 b) 857 { 858 char aSign, bSign, zSign; 859 int aExp, bExp, zExp; 860 unsigned long long int aSig, bSig, zSig0, zSig1; 861 862 aSig = extractFloat64Frac(a); 863 aExp = extractFloat64Exp(a); 864 aSign = extractFloat64Sign(a); 865 bSig = extractFloat64Frac(b); 866 bExp = extractFloat64Exp(b); 867 bSign = extractFloat64Sign(b); 868 zSign = aSign ^ bSign; 869 870 if (aExp == 0) { 871 if (aSig == 0) 872 return packFloat64(zSign, 0, 0); 873 normalizeFloat64Subnormal(aSig, &aExp, &aSig); 874 } 875 if (bExp == 0) { 876 if (bSig == 0) 877 return packFloat64(zSign, 0, 0); 878 normalizeFloat64Subnormal(bSig, &bExp, &bSig); 879 } 880 if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0)) 881 return roundAndPackFloat64(zSign, 0x7ff, 0); 882 883 zExp = aExp + bExp - 0x3FF; 884 aSig = (aSig | 0x0010000000000000LL) << 10; 885 bSig = (bSig | 0x0010000000000000LL) << 11; 886 mul64To128(aSig, bSig, &zSig0, &zSig1); 887 zSig0 |= (zSig1 != 0); 888 if (0 <= (signed long long int)(zSig0 << 1)) { 889 zSig0 <<= 1; 890 --zExp; 891 } 892 return roundAndPackFloat64(zSign, zExp, zSig0); 893 } 894 895 /* 896 * ------------------------------------------------------------------------------- 897 * Returns the result of converting the double-precision floating-point value 898 * `a' to the single-precision floating-point format. The conversion is 899 * performed according to the IEC/IEEE Standard for Binary Floating-point 900 * Arithmetic. 901 * ------------------------------------------------------------------------------- 902 * */ 903 float32 float64_to_float32(float64 a) 904 { 905 flag aSign; 906 int16 aExp; 907 bits64 aSig; 908 bits32 zSig; 909 910 aSig = extractFloat64Frac( a ); 911 aExp = extractFloat64Exp( a ); 912 aSign = extractFloat64Sign( a ); 913 914 shift64RightJamming( aSig, 22, &aSig ); 915 zSig = aSig; 916 if ( aExp || zSig ) { 917 zSig |= 0x40000000; 918 aExp -= 0x381; 919 } 920 return roundAndPackFloat32(aSign, aExp, zSig); 921 } 922