1 /* 2 * Linux/PA-RISC Project (http://www.parisc-linux.org/) 3 * 4 * Floating-point emulation code 5 * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2, or (at your option) 10 * any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 */ 21 /* 22 * BEGIN_DESC 23 * 24 * File: 25 * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $ 26 * 27 * Purpose: 28 * Double Floating-point Multiply Fused Add 29 * Double Floating-point Multiply Negate Fused Add 30 * Single Floating-point Multiply Fused Add 31 * Single Floating-point Multiply Negate Fused Add 32 * 33 * External Interfaces: 34 * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 35 * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 36 * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 37 * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 38 * 39 * Internal Interfaces: 40 * 41 * Theory: 42 * <<please update with a overview of the operation of this file>> 43 * 44 * END_DESC 45 */ 46 47 48 #include "float.h" 49 #include "sgl_float.h" 50 #include "dbl_float.h" 51 52 53 /* 54 * Double Floating-point Multiply Fused Add 55 */ 56 57 int 58 dbl_fmpyfadd( 59 dbl_floating_point *src1ptr, 60 dbl_floating_point *src2ptr, 61 dbl_floating_point *src3ptr, 62 unsigned int *status, 63 dbl_floating_point *dstptr) 64 { 65 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2; 66 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4; 67 unsigned int rightp1, rightp2, rightp3, rightp4; 68 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0; 69 register int mpy_exponent, add_exponent, count; 70 boolean inexact = FALSE, is_tiny = FALSE; 71 72 unsigned int signlessleft1, signlessright1, save; 73 register int result_exponent, diff_exponent; 74 int sign_save, jumpsize; 75 76 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2); 77 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2); 78 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2); 79 80 /* 81 * set sign bit of result of multiply 82 */ 83 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 84 Dbl_setnegativezerop1(resultp1); 85 else Dbl_setzerop1(resultp1); 86 87 /* 88 * Generate multiply exponent 89 */ 90 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS; 91 92 /* 93 * check first operand for NaN's or infinity 94 */ 95 if (Dbl_isinfinity_exponent(opnd1p1)) { 96 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { 97 if (Dbl_isnotnan(opnd2p1,opnd2p2) && 98 Dbl_isnotnan(opnd3p1,opnd3p2)) { 99 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { 100 /* 101 * invalid since operands are infinity 102 * and zero 103 */ 104 if (Is_invalidtrap_enabled()) 105 return(OPC_2E_INVALIDEXCEPTION); 106 Set_invalidflag(); 107 Dbl_makequietnan(resultp1,resultp2); 108 Dbl_copytoptr(resultp1,resultp2,dstptr); 109 return(NOEXCEPTION); 110 } 111 /* 112 * Check third operand for infinity with a 113 * sign opposite of the multiply result 114 */ 115 if (Dbl_isinfinity(opnd3p1,opnd3p2) && 116 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { 117 /* 118 * invalid since attempting a magnitude 119 * subtraction of infinities 120 */ 121 if (Is_invalidtrap_enabled()) 122 return(OPC_2E_INVALIDEXCEPTION); 123 Set_invalidflag(); 124 Dbl_makequietnan(resultp1,resultp2); 125 Dbl_copytoptr(resultp1,resultp2,dstptr); 126 return(NOEXCEPTION); 127 } 128 129 /* 130 * return infinity 131 */ 132 Dbl_setinfinity_exponentmantissa(resultp1,resultp2); 133 Dbl_copytoptr(resultp1,resultp2,dstptr); 134 return(NOEXCEPTION); 135 } 136 } 137 else { 138 /* 139 * is NaN; signaling or quiet? 140 */ 141 if (Dbl_isone_signaling(opnd1p1)) { 142 /* trap if INVALIDTRAP enabled */ 143 if (Is_invalidtrap_enabled()) 144 return(OPC_2E_INVALIDEXCEPTION); 145 /* make NaN quiet */ 146 Set_invalidflag(); 147 Dbl_set_quiet(opnd1p1); 148 } 149 /* 150 * is second operand a signaling NaN? 151 */ 152 else if (Dbl_is_signalingnan(opnd2p1)) { 153 /* trap if INVALIDTRAP enabled */ 154 if (Is_invalidtrap_enabled()) 155 return(OPC_2E_INVALIDEXCEPTION); 156 /* make NaN quiet */ 157 Set_invalidflag(); 158 Dbl_set_quiet(opnd2p1); 159 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 160 return(NOEXCEPTION); 161 } 162 /* 163 * is third operand a signaling NaN? 164 */ 165 else if (Dbl_is_signalingnan(opnd3p1)) { 166 /* trap if INVALIDTRAP enabled */ 167 if (Is_invalidtrap_enabled()) 168 return(OPC_2E_INVALIDEXCEPTION); 169 /* make NaN quiet */ 170 Set_invalidflag(); 171 Dbl_set_quiet(opnd3p1); 172 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 173 return(NOEXCEPTION); 174 } 175 /* 176 * return quiet NaN 177 */ 178 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); 179 return(NOEXCEPTION); 180 } 181 } 182 183 /* 184 * check second operand for NaN's or infinity 185 */ 186 if (Dbl_isinfinity_exponent(opnd2p1)) { 187 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { 188 if (Dbl_isnotnan(opnd3p1,opnd3p2)) { 189 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) { 190 /* 191 * invalid since multiply operands are 192 * zero & infinity 193 */ 194 if (Is_invalidtrap_enabled()) 195 return(OPC_2E_INVALIDEXCEPTION); 196 Set_invalidflag(); 197 Dbl_makequietnan(opnd2p1,opnd2p2); 198 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 199 return(NOEXCEPTION); 200 } 201 202 /* 203 * Check third operand for infinity with a 204 * sign opposite of the multiply result 205 */ 206 if (Dbl_isinfinity(opnd3p1,opnd3p2) && 207 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { 208 /* 209 * invalid since attempting a magnitude 210 * subtraction of infinities 211 */ 212 if (Is_invalidtrap_enabled()) 213 return(OPC_2E_INVALIDEXCEPTION); 214 Set_invalidflag(); 215 Dbl_makequietnan(resultp1,resultp2); 216 Dbl_copytoptr(resultp1,resultp2,dstptr); 217 return(NOEXCEPTION); 218 } 219 220 /* 221 * return infinity 222 */ 223 Dbl_setinfinity_exponentmantissa(resultp1,resultp2); 224 Dbl_copytoptr(resultp1,resultp2,dstptr); 225 return(NOEXCEPTION); 226 } 227 } 228 else { 229 /* 230 * is NaN; signaling or quiet? 231 */ 232 if (Dbl_isone_signaling(opnd2p1)) { 233 /* trap if INVALIDTRAP enabled */ 234 if (Is_invalidtrap_enabled()) 235 return(OPC_2E_INVALIDEXCEPTION); 236 /* make NaN quiet */ 237 Set_invalidflag(); 238 Dbl_set_quiet(opnd2p1); 239 } 240 /* 241 * is third operand a signaling NaN? 242 */ 243 else if (Dbl_is_signalingnan(opnd3p1)) { 244 /* trap if INVALIDTRAP enabled */ 245 if (Is_invalidtrap_enabled()) 246 return(OPC_2E_INVALIDEXCEPTION); 247 /* make NaN quiet */ 248 Set_invalidflag(); 249 Dbl_set_quiet(opnd3p1); 250 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 251 return(NOEXCEPTION); 252 } 253 /* 254 * return quiet NaN 255 */ 256 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 257 return(NOEXCEPTION); 258 } 259 } 260 261 /* 262 * check third operand for NaN's or infinity 263 */ 264 if (Dbl_isinfinity_exponent(opnd3p1)) { 265 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { 266 /* return infinity */ 267 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 268 return(NOEXCEPTION); 269 } else { 270 /* 271 * is NaN; signaling or quiet? 272 */ 273 if (Dbl_isone_signaling(opnd3p1)) { 274 /* trap if INVALIDTRAP enabled */ 275 if (Is_invalidtrap_enabled()) 276 return(OPC_2E_INVALIDEXCEPTION); 277 /* make NaN quiet */ 278 Set_invalidflag(); 279 Dbl_set_quiet(opnd3p1); 280 } 281 /* 282 * return quiet NaN 283 */ 284 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 285 return(NOEXCEPTION); 286 } 287 } 288 289 /* 290 * Generate multiply mantissa 291 */ 292 if (Dbl_isnotzero_exponent(opnd1p1)) { 293 /* set hidden bit */ 294 Dbl_clear_signexponent_set_hidden(opnd1p1); 295 } 296 else { 297 /* check for zero */ 298 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { 299 /* 300 * Perform the add opnd3 with zero here. 301 */ 302 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { 303 if (Is_rounding_mode(ROUNDMINUS)) { 304 Dbl_or_signs(opnd3p1,resultp1); 305 } else { 306 Dbl_and_signs(opnd3p1,resultp1); 307 } 308 } 309 /* 310 * Now let's check for trapped underflow case. 311 */ 312 else if (Dbl_iszero_exponent(opnd3p1) && 313 Is_underflowtrap_enabled()) { 314 /* need to normalize results mantissa */ 315 sign_save = Dbl_signextendedsign(opnd3p1); 316 result_exponent = 0; 317 Dbl_leftshiftby1(opnd3p1,opnd3p2); 318 Dbl_normalize(opnd3p1,opnd3p2,result_exponent); 319 Dbl_set_sign(opnd3p1,/*using*/sign_save); 320 Dbl_setwrapped_exponent(opnd3p1,result_exponent, 321 unfl); 322 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 323 /* inexact = FALSE */ 324 return(OPC_2E_UNDERFLOWEXCEPTION); 325 } 326 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 327 return(NOEXCEPTION); 328 } 329 /* is denormalized, adjust exponent */ 330 Dbl_clear_signexponent(opnd1p1); 331 Dbl_leftshiftby1(opnd1p1,opnd1p2); 332 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent); 333 } 334 /* opnd2 needs to have hidden bit set with msb in hidden bit */ 335 if (Dbl_isnotzero_exponent(opnd2p1)) { 336 Dbl_clear_signexponent_set_hidden(opnd2p1); 337 } 338 else { 339 /* check for zero */ 340 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { 341 /* 342 * Perform the add opnd3 with zero here. 343 */ 344 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { 345 if (Is_rounding_mode(ROUNDMINUS)) { 346 Dbl_or_signs(opnd3p1,resultp1); 347 } else { 348 Dbl_and_signs(opnd3p1,resultp1); 349 } 350 } 351 /* 352 * Now let's check for trapped underflow case. 353 */ 354 else if (Dbl_iszero_exponent(opnd3p1) && 355 Is_underflowtrap_enabled()) { 356 /* need to normalize results mantissa */ 357 sign_save = Dbl_signextendedsign(opnd3p1); 358 result_exponent = 0; 359 Dbl_leftshiftby1(opnd3p1,opnd3p2); 360 Dbl_normalize(opnd3p1,opnd3p2,result_exponent); 361 Dbl_set_sign(opnd3p1,/*using*/sign_save); 362 Dbl_setwrapped_exponent(opnd3p1,result_exponent, 363 unfl); 364 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 365 /* inexact = FALSE */ 366 return(OPC_2E_UNDERFLOWEXCEPTION); 367 } 368 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 369 return(NOEXCEPTION); 370 } 371 /* is denormalized; want to normalize */ 372 Dbl_clear_signexponent(opnd2p1); 373 Dbl_leftshiftby1(opnd2p1,opnd2p2); 374 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent); 375 } 376 377 /* Multiply the first two source mantissas together */ 378 379 /* 380 * The intermediate result will be kept in tmpres, 381 * which needs enough room for 106 bits of mantissa, 382 * so lets call it a Double extended. 383 */ 384 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 385 386 /* 387 * Four bits at a time are inspected in each loop, and a 388 * simple shift and add multiply algorithm is used. 389 */ 390 for (count = DBL_P-1; count >= 0; count -= 4) { 391 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 392 if (Dbit28p2(opnd1p2)) { 393 /* Fourword_add should be an ADD followed by 3 ADDC's */ 394 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 395 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0); 396 } 397 if (Dbit29p2(opnd1p2)) { 398 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 399 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0); 400 } 401 if (Dbit30p2(opnd1p2)) { 402 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 403 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0); 404 } 405 if (Dbit31p2(opnd1p2)) { 406 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 407 opnd2p1, opnd2p2, 0, 0); 408 } 409 Dbl_rightshiftby4(opnd1p1,opnd1p2); 410 } 411 if (Is_dexthiddenoverflow(tmpresp1)) { 412 /* result mantissa >= 2 (mantissa overflow) */ 413 mpy_exponent++; 414 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 415 } 416 417 /* 418 * Restore the sign of the mpy result which was saved in resultp1. 419 * The exponent will continue to be kept in mpy_exponent. 420 */ 421 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1)); 422 423 /* 424 * No rounding is required, since the result of the multiply 425 * is exact in the extended format. 426 */ 427 428 /* 429 * Now we are ready to perform the add portion of the operation. 430 * 431 * The exponents need to be kept as integers for now, since the 432 * multiply result might not fit into the exponent field. We 433 * can't overflow or underflow because of this yet, since the 434 * add could bring the final result back into range. 435 */ 436 add_exponent = Dbl_exponent(opnd3p1); 437 438 /* 439 * Check for denormalized or zero add operand. 440 */ 441 if (add_exponent == 0) { 442 /* check for zero */ 443 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { 444 /* right is zero */ 445 /* Left can't be zero and must be result. 446 * 447 * The final result is now in tmpres and mpy_exponent, 448 * and needs to be rounded and squeezed back into 449 * double precision format from double extended. 450 */ 451 result_exponent = mpy_exponent; 452 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 453 resultp1,resultp2,resultp3,resultp4); 454 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/ 455 goto round; 456 } 457 458 /* 459 * Neither are zeroes. 460 * Adjust exponent and normalize add operand. 461 */ 462 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */ 463 Dbl_clear_signexponent(opnd3p1); 464 Dbl_leftshiftby1(opnd3p1,opnd3p2); 465 Dbl_normalize(opnd3p1,opnd3p2,add_exponent); 466 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */ 467 } else { 468 Dbl_clear_exponent_set_hidden(opnd3p1); 469 } 470 /* 471 * Copy opnd3 to the double extended variable called right. 472 */ 473 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4); 474 475 /* 476 * A zero "save" helps discover equal operands (for later), 477 * and is used in swapping operands (if needed). 478 */ 479 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save); 480 481 /* 482 * Compare magnitude of operands. 483 */ 484 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1); 485 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1); 486 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && 487 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){ 488 /* 489 * Set the left operand to the larger one by XOR swap. 490 * First finish the first word "save". 491 */ 492 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1); 493 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); 494 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4, 495 rightp2,rightp3,rightp4); 496 /* also setup exponents used in rest of routine */ 497 diff_exponent = add_exponent - mpy_exponent; 498 result_exponent = add_exponent; 499 } else { 500 /* also setup exponents used in rest of routine */ 501 diff_exponent = mpy_exponent - add_exponent; 502 result_exponent = mpy_exponent; 503 } 504 /* Invariant: left is not smaller than right. */ 505 506 /* 507 * Special case alignment of operands that would force alignment 508 * beyond the extent of the extension. A further optimization 509 * could special case this but only reduces the path length for 510 * this infrequent case. 511 */ 512 if (diff_exponent > DBLEXT_THRESHOLD) { 513 diff_exponent = DBLEXT_THRESHOLD; 514 } 515 516 /* Align right operand by shifting it to the right */ 517 Dblext_clear_sign(rightp1); 518 Dblext_right_align(rightp1,rightp2,rightp3,rightp4, 519 /*shifted by*/diff_exponent); 520 521 /* Treat sum and difference of the operands separately. */ 522 if ((int)save < 0) { 523 /* 524 * Difference of the two operands. Overflow can occur if the 525 * multiply overflowed. A borrow can occur out of the hidden 526 * bit and force a post normalization phase. 527 */ 528 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 529 rightp1,rightp2,rightp3,rightp4, 530 resultp1,resultp2,resultp3,resultp4); 531 sign_save = Dbl_signextendedsign(resultp1); 532 if (Dbl_iszero_hidden(resultp1)) { 533 /* Handle normalization */ 534 /* A straightforward algorithm would now shift the 535 * result and extension left until the hidden bit 536 * becomes one. Not all of the extension bits need 537 * participate in the shift. Only the two most 538 * significant bits (round and guard) are needed. 539 * If only a single shift is needed then the guard 540 * bit becomes a significant low order bit and the 541 * extension must participate in the rounding. 542 * If more than a single shift is needed, then all 543 * bits to the right of the guard bit are zeros, 544 * and the guard bit may or may not be zero. */ 545 Dblext_leftshiftby1(resultp1,resultp2,resultp3, 546 resultp4); 547 548 /* Need to check for a zero result. The sign and 549 * exponent fields have already been zeroed. The more 550 * efficient test of the full object can be used. 551 */ 552 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){ 553 /* Must have been "x-x" or "x+(-x)". */ 554 if (Is_rounding_mode(ROUNDMINUS)) 555 Dbl_setone_sign(resultp1); 556 Dbl_copytoptr(resultp1,resultp2,dstptr); 557 return(NOEXCEPTION); 558 } 559 result_exponent--; 560 561 /* Look to see if normalization is finished. */ 562 if (Dbl_isone_hidden(resultp1)) { 563 /* No further normalization is needed */ 564 goto round; 565 } 566 567 /* Discover first one bit to determine shift amount. 568 * Use a modified binary search. We have already 569 * shifted the result one position right and still 570 * not found a one so the remainder of the extension 571 * must be zero and simplifies rounding. */ 572 /* Scan bytes */ 573 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) { 574 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4); 575 result_exponent -= 8; 576 } 577 /* Now narrow it down to the nibble */ 578 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) { 579 /* The lower nibble contains the 580 * normalizing one */ 581 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4); 582 result_exponent -= 4; 583 } 584 /* Select case where first bit is set (already 585 * normalized) otherwise select the proper shift. */ 586 jumpsize = Dbl_hiddenhigh3mantissa(resultp1); 587 if (jumpsize <= 7) switch(jumpsize) { 588 case 1: 589 Dblext_leftshiftby3(resultp1,resultp2,resultp3, 590 resultp4); 591 result_exponent -= 3; 592 break; 593 case 2: 594 case 3: 595 Dblext_leftshiftby2(resultp1,resultp2,resultp3, 596 resultp4); 597 result_exponent -= 2; 598 break; 599 case 4: 600 case 5: 601 case 6: 602 case 7: 603 Dblext_leftshiftby1(resultp1,resultp2,resultp3, 604 resultp4); 605 result_exponent -= 1; 606 break; 607 } 608 } /* end if (hidden...)... */ 609 /* Fall through and round */ 610 } /* end if (save < 0)... */ 611 else { 612 /* Add magnitudes */ 613 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 614 rightp1,rightp2,rightp3,rightp4, 615 /*to*/resultp1,resultp2,resultp3,resultp4); 616 sign_save = Dbl_signextendedsign(resultp1); 617 if (Dbl_isone_hiddenoverflow(resultp1)) { 618 /* Prenormalization required. */ 619 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3, 620 resultp4); 621 result_exponent++; 622 } /* end if hiddenoverflow... */ 623 } /* end else ...add magnitudes... */ 624 625 /* Round the result. If the extension and lower two words are 626 * all zeros, then the result is exact. Otherwise round in the 627 * correct direction. Underflow is possible. If a postnormalization 628 * is necessary, then the mantissa is all zeros so no shift is needed. 629 */ 630 round: 631 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { 632 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4, 633 result_exponent,is_tiny); 634 } 635 Dbl_set_sign(resultp1,/*using*/sign_save); 636 if (Dblext_isnotzero_mantissap3(resultp3) || 637 Dblext_isnotzero_mantissap4(resultp4)) { 638 inexact = TRUE; 639 switch(Rounding_mode()) { 640 case ROUNDNEAREST: /* The default. */ 641 if (Dblext_isone_highp3(resultp3)) { 642 /* at least 1/2 ulp */ 643 if (Dblext_isnotzero_low31p3(resultp3) || 644 Dblext_isnotzero_mantissap4(resultp4) || 645 Dblext_isone_lowp2(resultp2)) { 646 /* either exactly half way and odd or 647 * more than 1/2ulp */ 648 Dbl_increment(resultp1,resultp2); 649 } 650 } 651 break; 652 653 case ROUNDPLUS: 654 if (Dbl_iszero_sign(resultp1)) { 655 /* Round up positive results */ 656 Dbl_increment(resultp1,resultp2); 657 } 658 break; 659 660 case ROUNDMINUS: 661 if (Dbl_isone_sign(resultp1)) { 662 /* Round down negative results */ 663 Dbl_increment(resultp1,resultp2); 664 } 665 666 case ROUNDZERO:; 667 /* truncate is simple */ 668 } /* end switch... */ 669 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++; 670 } 671 if (result_exponent >= DBL_INFINITY_EXPONENT) { 672 /* trap if OVERFLOWTRAP enabled */ 673 if (Is_overflowtrap_enabled()) { 674 /* 675 * Adjust bias of result 676 */ 677 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl); 678 Dbl_copytoptr(resultp1,resultp2,dstptr); 679 if (inexact) 680 if (Is_inexacttrap_enabled()) 681 return (OPC_2E_OVERFLOWEXCEPTION | 682 OPC_2E_INEXACTEXCEPTION); 683 else Set_inexactflag(); 684 return (OPC_2E_OVERFLOWEXCEPTION); 685 } 686 inexact = TRUE; 687 Set_overflowflag(); 688 /* set result to infinity or largest number */ 689 Dbl_setoverflow(resultp1,resultp2); 690 691 } else if (result_exponent <= 0) { /* underflow case */ 692 if (Is_underflowtrap_enabled()) { 693 /* 694 * Adjust bias of result 695 */ 696 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); 697 Dbl_copytoptr(resultp1,resultp2,dstptr); 698 if (inexact) 699 if (Is_inexacttrap_enabled()) 700 return (OPC_2E_UNDERFLOWEXCEPTION | 701 OPC_2E_INEXACTEXCEPTION); 702 else Set_inexactflag(); 703 return(OPC_2E_UNDERFLOWEXCEPTION); 704 } 705 else if (inexact && is_tiny) Set_underflowflag(); 706 } 707 else Dbl_set_exponent(resultp1,result_exponent); 708 Dbl_copytoptr(resultp1,resultp2,dstptr); 709 if (inexact) 710 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); 711 else Set_inexactflag(); 712 return(NOEXCEPTION); 713 } 714 715 /* 716 * Double Floating-point Multiply Negate Fused Add 717 */ 718 719 dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 720 721 dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; 722 unsigned int *status; 723 { 724 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2; 725 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4; 726 unsigned int rightp1, rightp2, rightp3, rightp4; 727 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0; 728 register int mpy_exponent, add_exponent, count; 729 boolean inexact = FALSE, is_tiny = FALSE; 730 731 unsigned int signlessleft1, signlessright1, save; 732 register int result_exponent, diff_exponent; 733 int sign_save, jumpsize; 734 735 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2); 736 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2); 737 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2); 738 739 /* 740 * set sign bit of result of multiply 741 */ 742 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 743 Dbl_setzerop1(resultp1); 744 else 745 Dbl_setnegativezerop1(resultp1); 746 747 /* 748 * Generate multiply exponent 749 */ 750 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS; 751 752 /* 753 * check first operand for NaN's or infinity 754 */ 755 if (Dbl_isinfinity_exponent(opnd1p1)) { 756 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { 757 if (Dbl_isnotnan(opnd2p1,opnd2p2) && 758 Dbl_isnotnan(opnd3p1,opnd3p2)) { 759 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { 760 /* 761 * invalid since operands are infinity 762 * and zero 763 */ 764 if (Is_invalidtrap_enabled()) 765 return(OPC_2E_INVALIDEXCEPTION); 766 Set_invalidflag(); 767 Dbl_makequietnan(resultp1,resultp2); 768 Dbl_copytoptr(resultp1,resultp2,dstptr); 769 return(NOEXCEPTION); 770 } 771 /* 772 * Check third operand for infinity with a 773 * sign opposite of the multiply result 774 */ 775 if (Dbl_isinfinity(opnd3p1,opnd3p2) && 776 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { 777 /* 778 * invalid since attempting a magnitude 779 * subtraction of infinities 780 */ 781 if (Is_invalidtrap_enabled()) 782 return(OPC_2E_INVALIDEXCEPTION); 783 Set_invalidflag(); 784 Dbl_makequietnan(resultp1,resultp2); 785 Dbl_copytoptr(resultp1,resultp2,dstptr); 786 return(NOEXCEPTION); 787 } 788 789 /* 790 * return infinity 791 */ 792 Dbl_setinfinity_exponentmantissa(resultp1,resultp2); 793 Dbl_copytoptr(resultp1,resultp2,dstptr); 794 return(NOEXCEPTION); 795 } 796 } 797 else { 798 /* 799 * is NaN; signaling or quiet? 800 */ 801 if (Dbl_isone_signaling(opnd1p1)) { 802 /* trap if INVALIDTRAP enabled */ 803 if (Is_invalidtrap_enabled()) 804 return(OPC_2E_INVALIDEXCEPTION); 805 /* make NaN quiet */ 806 Set_invalidflag(); 807 Dbl_set_quiet(opnd1p1); 808 } 809 /* 810 * is second operand a signaling NaN? 811 */ 812 else if (Dbl_is_signalingnan(opnd2p1)) { 813 /* trap if INVALIDTRAP enabled */ 814 if (Is_invalidtrap_enabled()) 815 return(OPC_2E_INVALIDEXCEPTION); 816 /* make NaN quiet */ 817 Set_invalidflag(); 818 Dbl_set_quiet(opnd2p1); 819 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 820 return(NOEXCEPTION); 821 } 822 /* 823 * is third operand a signaling NaN? 824 */ 825 else if (Dbl_is_signalingnan(opnd3p1)) { 826 /* trap if INVALIDTRAP enabled */ 827 if (Is_invalidtrap_enabled()) 828 return(OPC_2E_INVALIDEXCEPTION); 829 /* make NaN quiet */ 830 Set_invalidflag(); 831 Dbl_set_quiet(opnd3p1); 832 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 833 return(NOEXCEPTION); 834 } 835 /* 836 * return quiet NaN 837 */ 838 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); 839 return(NOEXCEPTION); 840 } 841 } 842 843 /* 844 * check second operand for NaN's or infinity 845 */ 846 if (Dbl_isinfinity_exponent(opnd2p1)) { 847 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { 848 if (Dbl_isnotnan(opnd3p1,opnd3p2)) { 849 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) { 850 /* 851 * invalid since multiply operands are 852 * zero & infinity 853 */ 854 if (Is_invalidtrap_enabled()) 855 return(OPC_2E_INVALIDEXCEPTION); 856 Set_invalidflag(); 857 Dbl_makequietnan(opnd2p1,opnd2p2); 858 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 859 return(NOEXCEPTION); 860 } 861 862 /* 863 * Check third operand for infinity with a 864 * sign opposite of the multiply result 865 */ 866 if (Dbl_isinfinity(opnd3p1,opnd3p2) && 867 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { 868 /* 869 * invalid since attempting a magnitude 870 * subtraction of infinities 871 */ 872 if (Is_invalidtrap_enabled()) 873 return(OPC_2E_INVALIDEXCEPTION); 874 Set_invalidflag(); 875 Dbl_makequietnan(resultp1,resultp2); 876 Dbl_copytoptr(resultp1,resultp2,dstptr); 877 return(NOEXCEPTION); 878 } 879 880 /* 881 * return infinity 882 */ 883 Dbl_setinfinity_exponentmantissa(resultp1,resultp2); 884 Dbl_copytoptr(resultp1,resultp2,dstptr); 885 return(NOEXCEPTION); 886 } 887 } 888 else { 889 /* 890 * is NaN; signaling or quiet? 891 */ 892 if (Dbl_isone_signaling(opnd2p1)) { 893 /* trap if INVALIDTRAP enabled */ 894 if (Is_invalidtrap_enabled()) 895 return(OPC_2E_INVALIDEXCEPTION); 896 /* make NaN quiet */ 897 Set_invalidflag(); 898 Dbl_set_quiet(opnd2p1); 899 } 900 /* 901 * is third operand a signaling NaN? 902 */ 903 else if (Dbl_is_signalingnan(opnd3p1)) { 904 /* trap if INVALIDTRAP enabled */ 905 if (Is_invalidtrap_enabled()) 906 return(OPC_2E_INVALIDEXCEPTION); 907 /* make NaN quiet */ 908 Set_invalidflag(); 909 Dbl_set_quiet(opnd3p1); 910 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 911 return(NOEXCEPTION); 912 } 913 /* 914 * return quiet NaN 915 */ 916 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 917 return(NOEXCEPTION); 918 } 919 } 920 921 /* 922 * check third operand for NaN's or infinity 923 */ 924 if (Dbl_isinfinity_exponent(opnd3p1)) { 925 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { 926 /* return infinity */ 927 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 928 return(NOEXCEPTION); 929 } else { 930 /* 931 * is NaN; signaling or quiet? 932 */ 933 if (Dbl_isone_signaling(opnd3p1)) { 934 /* trap if INVALIDTRAP enabled */ 935 if (Is_invalidtrap_enabled()) 936 return(OPC_2E_INVALIDEXCEPTION); 937 /* make NaN quiet */ 938 Set_invalidflag(); 939 Dbl_set_quiet(opnd3p1); 940 } 941 /* 942 * return quiet NaN 943 */ 944 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 945 return(NOEXCEPTION); 946 } 947 } 948 949 /* 950 * Generate multiply mantissa 951 */ 952 if (Dbl_isnotzero_exponent(opnd1p1)) { 953 /* set hidden bit */ 954 Dbl_clear_signexponent_set_hidden(opnd1p1); 955 } 956 else { 957 /* check for zero */ 958 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { 959 /* 960 * Perform the add opnd3 with zero here. 961 */ 962 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { 963 if (Is_rounding_mode(ROUNDMINUS)) { 964 Dbl_or_signs(opnd3p1,resultp1); 965 } else { 966 Dbl_and_signs(opnd3p1,resultp1); 967 } 968 } 969 /* 970 * Now let's check for trapped underflow case. 971 */ 972 else if (Dbl_iszero_exponent(opnd3p1) && 973 Is_underflowtrap_enabled()) { 974 /* need to normalize results mantissa */ 975 sign_save = Dbl_signextendedsign(opnd3p1); 976 result_exponent = 0; 977 Dbl_leftshiftby1(opnd3p1,opnd3p2); 978 Dbl_normalize(opnd3p1,opnd3p2,result_exponent); 979 Dbl_set_sign(opnd3p1,/*using*/sign_save); 980 Dbl_setwrapped_exponent(opnd3p1,result_exponent, 981 unfl); 982 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 983 /* inexact = FALSE */ 984 return(OPC_2E_UNDERFLOWEXCEPTION); 985 } 986 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 987 return(NOEXCEPTION); 988 } 989 /* is denormalized, adjust exponent */ 990 Dbl_clear_signexponent(opnd1p1); 991 Dbl_leftshiftby1(opnd1p1,opnd1p2); 992 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent); 993 } 994 /* opnd2 needs to have hidden bit set with msb in hidden bit */ 995 if (Dbl_isnotzero_exponent(opnd2p1)) { 996 Dbl_clear_signexponent_set_hidden(opnd2p1); 997 } 998 else { 999 /* check for zero */ 1000 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { 1001 /* 1002 * Perform the add opnd3 with zero here. 1003 */ 1004 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { 1005 if (Is_rounding_mode(ROUNDMINUS)) { 1006 Dbl_or_signs(opnd3p1,resultp1); 1007 } else { 1008 Dbl_and_signs(opnd3p1,resultp1); 1009 } 1010 } 1011 /* 1012 * Now let's check for trapped underflow case. 1013 */ 1014 else if (Dbl_iszero_exponent(opnd3p1) && 1015 Is_underflowtrap_enabled()) { 1016 /* need to normalize results mantissa */ 1017 sign_save = Dbl_signextendedsign(opnd3p1); 1018 result_exponent = 0; 1019 Dbl_leftshiftby1(opnd3p1,opnd3p2); 1020 Dbl_normalize(opnd3p1,opnd3p2,result_exponent); 1021 Dbl_set_sign(opnd3p1,/*using*/sign_save); 1022 Dbl_setwrapped_exponent(opnd3p1,result_exponent, 1023 unfl); 1024 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 1025 /* inexact = FALSE */ 1026 return(OPC_2E_UNDERFLOWEXCEPTION); 1027 } 1028 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 1029 return(NOEXCEPTION); 1030 } 1031 /* is denormalized; want to normalize */ 1032 Dbl_clear_signexponent(opnd2p1); 1033 Dbl_leftshiftby1(opnd2p1,opnd2p2); 1034 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent); 1035 } 1036 1037 /* Multiply the first two source mantissas together */ 1038 1039 /* 1040 * The intermediate result will be kept in tmpres, 1041 * which needs enough room for 106 bits of mantissa, 1042 * so lets call it a Double extended. 1043 */ 1044 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 1045 1046 /* 1047 * Four bits at a time are inspected in each loop, and a 1048 * simple shift and add multiply algorithm is used. 1049 */ 1050 for (count = DBL_P-1; count >= 0; count -= 4) { 1051 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 1052 if (Dbit28p2(opnd1p2)) { 1053 /* Fourword_add should be an ADD followed by 3 ADDC's */ 1054 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 1055 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0); 1056 } 1057 if (Dbit29p2(opnd1p2)) { 1058 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 1059 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0); 1060 } 1061 if (Dbit30p2(opnd1p2)) { 1062 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 1063 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0); 1064 } 1065 if (Dbit31p2(opnd1p2)) { 1066 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 1067 opnd2p1, opnd2p2, 0, 0); 1068 } 1069 Dbl_rightshiftby4(opnd1p1,opnd1p2); 1070 } 1071 if (Is_dexthiddenoverflow(tmpresp1)) { 1072 /* result mantissa >= 2 (mantissa overflow) */ 1073 mpy_exponent++; 1074 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 1075 } 1076 1077 /* 1078 * Restore the sign of the mpy result which was saved in resultp1. 1079 * The exponent will continue to be kept in mpy_exponent. 1080 */ 1081 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1)); 1082 1083 /* 1084 * No rounding is required, since the result of the multiply 1085 * is exact in the extended format. 1086 */ 1087 1088 /* 1089 * Now we are ready to perform the add portion of the operation. 1090 * 1091 * The exponents need to be kept as integers for now, since the 1092 * multiply result might not fit into the exponent field. We 1093 * can't overflow or underflow because of this yet, since the 1094 * add could bring the final result back into range. 1095 */ 1096 add_exponent = Dbl_exponent(opnd3p1); 1097 1098 /* 1099 * Check for denormalized or zero add operand. 1100 */ 1101 if (add_exponent == 0) { 1102 /* check for zero */ 1103 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { 1104 /* right is zero */ 1105 /* Left can't be zero and must be result. 1106 * 1107 * The final result is now in tmpres and mpy_exponent, 1108 * and needs to be rounded and squeezed back into 1109 * double precision format from double extended. 1110 */ 1111 result_exponent = mpy_exponent; 1112 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 1113 resultp1,resultp2,resultp3,resultp4); 1114 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/ 1115 goto round; 1116 } 1117 1118 /* 1119 * Neither are zeroes. 1120 * Adjust exponent and normalize add operand. 1121 */ 1122 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */ 1123 Dbl_clear_signexponent(opnd3p1); 1124 Dbl_leftshiftby1(opnd3p1,opnd3p2); 1125 Dbl_normalize(opnd3p1,opnd3p2,add_exponent); 1126 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */ 1127 } else { 1128 Dbl_clear_exponent_set_hidden(opnd3p1); 1129 } 1130 /* 1131 * Copy opnd3 to the double extended variable called right. 1132 */ 1133 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4); 1134 1135 /* 1136 * A zero "save" helps discover equal operands (for later), 1137 * and is used in swapping operands (if needed). 1138 */ 1139 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save); 1140 1141 /* 1142 * Compare magnitude of operands. 1143 */ 1144 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1); 1145 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1); 1146 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && 1147 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){ 1148 /* 1149 * Set the left operand to the larger one by XOR swap. 1150 * First finish the first word "save". 1151 */ 1152 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1); 1153 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); 1154 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4, 1155 rightp2,rightp3,rightp4); 1156 /* also setup exponents used in rest of routine */ 1157 diff_exponent = add_exponent - mpy_exponent; 1158 result_exponent = add_exponent; 1159 } else { 1160 /* also setup exponents used in rest of routine */ 1161 diff_exponent = mpy_exponent - add_exponent; 1162 result_exponent = mpy_exponent; 1163 } 1164 /* Invariant: left is not smaller than right. */ 1165 1166 /* 1167 * Special case alignment of operands that would force alignment 1168 * beyond the extent of the extension. A further optimization 1169 * could special case this but only reduces the path length for 1170 * this infrequent case. 1171 */ 1172 if (diff_exponent > DBLEXT_THRESHOLD) { 1173 diff_exponent = DBLEXT_THRESHOLD; 1174 } 1175 1176 /* Align right operand by shifting it to the right */ 1177 Dblext_clear_sign(rightp1); 1178 Dblext_right_align(rightp1,rightp2,rightp3,rightp4, 1179 /*shifted by*/diff_exponent); 1180 1181 /* Treat sum and difference of the operands separately. */ 1182 if ((int)save < 0) { 1183 /* 1184 * Difference of the two operands. Overflow can occur if the 1185 * multiply overflowed. A borrow can occur out of the hidden 1186 * bit and force a post normalization phase. 1187 */ 1188 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 1189 rightp1,rightp2,rightp3,rightp4, 1190 resultp1,resultp2,resultp3,resultp4); 1191 sign_save = Dbl_signextendedsign(resultp1); 1192 if (Dbl_iszero_hidden(resultp1)) { 1193 /* Handle normalization */ 1194 /* A straightforward algorithm would now shift the 1195 * result and extension left until the hidden bit 1196 * becomes one. Not all of the extension bits need 1197 * participate in the shift. Only the two most 1198 * significant bits (round and guard) are needed. 1199 * If only a single shift is needed then the guard 1200 * bit becomes a significant low order bit and the 1201 * extension must participate in the rounding. 1202 * If more than a single shift is needed, then all 1203 * bits to the right of the guard bit are zeros, 1204 * and the guard bit may or may not be zero. */ 1205 Dblext_leftshiftby1(resultp1,resultp2,resultp3, 1206 resultp4); 1207 1208 /* Need to check for a zero result. The sign and 1209 * exponent fields have already been zeroed. The more 1210 * efficient test of the full object can be used. 1211 */ 1212 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) { 1213 /* Must have been "x-x" or "x+(-x)". */ 1214 if (Is_rounding_mode(ROUNDMINUS)) 1215 Dbl_setone_sign(resultp1); 1216 Dbl_copytoptr(resultp1,resultp2,dstptr); 1217 return(NOEXCEPTION); 1218 } 1219 result_exponent--; 1220 1221 /* Look to see if normalization is finished. */ 1222 if (Dbl_isone_hidden(resultp1)) { 1223 /* No further normalization is needed */ 1224 goto round; 1225 } 1226 1227 /* Discover first one bit to determine shift amount. 1228 * Use a modified binary search. We have already 1229 * shifted the result one position right and still 1230 * not found a one so the remainder of the extension 1231 * must be zero and simplifies rounding. */ 1232 /* Scan bytes */ 1233 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) { 1234 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4); 1235 result_exponent -= 8; 1236 } 1237 /* Now narrow it down to the nibble */ 1238 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) { 1239 /* The lower nibble contains the 1240 * normalizing one */ 1241 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4); 1242 result_exponent -= 4; 1243 } 1244 /* Select case where first bit is set (already 1245 * normalized) otherwise select the proper shift. */ 1246 jumpsize = Dbl_hiddenhigh3mantissa(resultp1); 1247 if (jumpsize <= 7) switch(jumpsize) { 1248 case 1: 1249 Dblext_leftshiftby3(resultp1,resultp2,resultp3, 1250 resultp4); 1251 result_exponent -= 3; 1252 break; 1253 case 2: 1254 case 3: 1255 Dblext_leftshiftby2(resultp1,resultp2,resultp3, 1256 resultp4); 1257 result_exponent -= 2; 1258 break; 1259 case 4: 1260 case 5: 1261 case 6: 1262 case 7: 1263 Dblext_leftshiftby1(resultp1,resultp2,resultp3, 1264 resultp4); 1265 result_exponent -= 1; 1266 break; 1267 } 1268 } /* end if (hidden...)... */ 1269 /* Fall through and round */ 1270 } /* end if (save < 0)... */ 1271 else { 1272 /* Add magnitudes */ 1273 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 1274 rightp1,rightp2,rightp3,rightp4, 1275 /*to*/resultp1,resultp2,resultp3,resultp4); 1276 sign_save = Dbl_signextendedsign(resultp1); 1277 if (Dbl_isone_hiddenoverflow(resultp1)) { 1278 /* Prenormalization required. */ 1279 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3, 1280 resultp4); 1281 result_exponent++; 1282 } /* end if hiddenoverflow... */ 1283 } /* end else ...add magnitudes... */ 1284 1285 /* Round the result. If the extension and lower two words are 1286 * all zeros, then the result is exact. Otherwise round in the 1287 * correct direction. Underflow is possible. If a postnormalization 1288 * is necessary, then the mantissa is all zeros so no shift is needed. 1289 */ 1290 round: 1291 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { 1292 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4, 1293 result_exponent,is_tiny); 1294 } 1295 Dbl_set_sign(resultp1,/*using*/sign_save); 1296 if (Dblext_isnotzero_mantissap3(resultp3) || 1297 Dblext_isnotzero_mantissap4(resultp4)) { 1298 inexact = TRUE; 1299 switch(Rounding_mode()) { 1300 case ROUNDNEAREST: /* The default. */ 1301 if (Dblext_isone_highp3(resultp3)) { 1302 /* at least 1/2 ulp */ 1303 if (Dblext_isnotzero_low31p3(resultp3) || 1304 Dblext_isnotzero_mantissap4(resultp4) || 1305 Dblext_isone_lowp2(resultp2)) { 1306 /* either exactly half way and odd or 1307 * more than 1/2ulp */ 1308 Dbl_increment(resultp1,resultp2); 1309 } 1310 } 1311 break; 1312 1313 case ROUNDPLUS: 1314 if (Dbl_iszero_sign(resultp1)) { 1315 /* Round up positive results */ 1316 Dbl_increment(resultp1,resultp2); 1317 } 1318 break; 1319 1320 case ROUNDMINUS: 1321 if (Dbl_isone_sign(resultp1)) { 1322 /* Round down negative results */ 1323 Dbl_increment(resultp1,resultp2); 1324 } 1325 1326 case ROUNDZERO:; 1327 /* truncate is simple */ 1328 } /* end switch... */ 1329 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++; 1330 } 1331 if (result_exponent >= DBL_INFINITY_EXPONENT) { 1332 /* Overflow */ 1333 if (Is_overflowtrap_enabled()) { 1334 /* 1335 * Adjust bias of result 1336 */ 1337 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl); 1338 Dbl_copytoptr(resultp1,resultp2,dstptr); 1339 if (inexact) 1340 if (Is_inexacttrap_enabled()) 1341 return (OPC_2E_OVERFLOWEXCEPTION | 1342 OPC_2E_INEXACTEXCEPTION); 1343 else Set_inexactflag(); 1344 return (OPC_2E_OVERFLOWEXCEPTION); 1345 } 1346 inexact = TRUE; 1347 Set_overflowflag(); 1348 Dbl_setoverflow(resultp1,resultp2); 1349 } else if (result_exponent <= 0) { /* underflow case */ 1350 if (Is_underflowtrap_enabled()) { 1351 /* 1352 * Adjust bias of result 1353 */ 1354 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); 1355 Dbl_copytoptr(resultp1,resultp2,dstptr); 1356 if (inexact) 1357 if (Is_inexacttrap_enabled()) 1358 return (OPC_2E_UNDERFLOWEXCEPTION | 1359 OPC_2E_INEXACTEXCEPTION); 1360 else Set_inexactflag(); 1361 return(OPC_2E_UNDERFLOWEXCEPTION); 1362 } 1363 else if (inexact && is_tiny) Set_underflowflag(); 1364 } 1365 else Dbl_set_exponent(resultp1,result_exponent); 1366 Dbl_copytoptr(resultp1,resultp2,dstptr); 1367 if (inexact) 1368 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); 1369 else Set_inexactflag(); 1370 return(NOEXCEPTION); 1371 } 1372 1373 /* 1374 * Single Floating-point Multiply Fused Add 1375 */ 1376 1377 sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 1378 1379 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; 1380 unsigned int *status; 1381 { 1382 unsigned int opnd1, opnd2, opnd3; 1383 register unsigned int tmpresp1, tmpresp2; 1384 unsigned int rightp1, rightp2; 1385 unsigned int resultp1, resultp2 = 0; 1386 register int mpy_exponent, add_exponent, count; 1387 boolean inexact = FALSE, is_tiny = FALSE; 1388 1389 unsigned int signlessleft1, signlessright1, save; 1390 register int result_exponent, diff_exponent; 1391 int sign_save, jumpsize; 1392 1393 Sgl_copyfromptr(src1ptr,opnd1); 1394 Sgl_copyfromptr(src2ptr,opnd2); 1395 Sgl_copyfromptr(src3ptr,opnd3); 1396 1397 /* 1398 * set sign bit of result of multiply 1399 */ 1400 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 1401 Sgl_setnegativezero(resultp1); 1402 else Sgl_setzero(resultp1); 1403 1404 /* 1405 * Generate multiply exponent 1406 */ 1407 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS; 1408 1409 /* 1410 * check first operand for NaN's or infinity 1411 */ 1412 if (Sgl_isinfinity_exponent(opnd1)) { 1413 if (Sgl_iszero_mantissa(opnd1)) { 1414 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) { 1415 if (Sgl_iszero_exponentmantissa(opnd2)) { 1416 /* 1417 * invalid since operands are infinity 1418 * and zero 1419 */ 1420 if (Is_invalidtrap_enabled()) 1421 return(OPC_2E_INVALIDEXCEPTION); 1422 Set_invalidflag(); 1423 Sgl_makequietnan(resultp1); 1424 Sgl_copytoptr(resultp1,dstptr); 1425 return(NOEXCEPTION); 1426 } 1427 /* 1428 * Check third operand for infinity with a 1429 * sign opposite of the multiply result 1430 */ 1431 if (Sgl_isinfinity(opnd3) && 1432 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { 1433 /* 1434 * invalid since attempting a magnitude 1435 * subtraction of infinities 1436 */ 1437 if (Is_invalidtrap_enabled()) 1438 return(OPC_2E_INVALIDEXCEPTION); 1439 Set_invalidflag(); 1440 Sgl_makequietnan(resultp1); 1441 Sgl_copytoptr(resultp1,dstptr); 1442 return(NOEXCEPTION); 1443 } 1444 1445 /* 1446 * return infinity 1447 */ 1448 Sgl_setinfinity_exponentmantissa(resultp1); 1449 Sgl_copytoptr(resultp1,dstptr); 1450 return(NOEXCEPTION); 1451 } 1452 } 1453 else { 1454 /* 1455 * is NaN; signaling or quiet? 1456 */ 1457 if (Sgl_isone_signaling(opnd1)) { 1458 /* trap if INVALIDTRAP enabled */ 1459 if (Is_invalidtrap_enabled()) 1460 return(OPC_2E_INVALIDEXCEPTION); 1461 /* make NaN quiet */ 1462 Set_invalidflag(); 1463 Sgl_set_quiet(opnd1); 1464 } 1465 /* 1466 * is second operand a signaling NaN? 1467 */ 1468 else if (Sgl_is_signalingnan(opnd2)) { 1469 /* trap if INVALIDTRAP enabled */ 1470 if (Is_invalidtrap_enabled()) 1471 return(OPC_2E_INVALIDEXCEPTION); 1472 /* make NaN quiet */ 1473 Set_invalidflag(); 1474 Sgl_set_quiet(opnd2); 1475 Sgl_copytoptr(opnd2,dstptr); 1476 return(NOEXCEPTION); 1477 } 1478 /* 1479 * is third operand a signaling NaN? 1480 */ 1481 else if (Sgl_is_signalingnan(opnd3)) { 1482 /* trap if INVALIDTRAP enabled */ 1483 if (Is_invalidtrap_enabled()) 1484 return(OPC_2E_INVALIDEXCEPTION); 1485 /* make NaN quiet */ 1486 Set_invalidflag(); 1487 Sgl_set_quiet(opnd3); 1488 Sgl_copytoptr(opnd3,dstptr); 1489 return(NOEXCEPTION); 1490 } 1491 /* 1492 * return quiet NaN 1493 */ 1494 Sgl_copytoptr(opnd1,dstptr); 1495 return(NOEXCEPTION); 1496 } 1497 } 1498 1499 /* 1500 * check second operand for NaN's or infinity 1501 */ 1502 if (Sgl_isinfinity_exponent(opnd2)) { 1503 if (Sgl_iszero_mantissa(opnd2)) { 1504 if (Sgl_isnotnan(opnd3)) { 1505 if (Sgl_iszero_exponentmantissa(opnd1)) { 1506 /* 1507 * invalid since multiply operands are 1508 * zero & infinity 1509 */ 1510 if (Is_invalidtrap_enabled()) 1511 return(OPC_2E_INVALIDEXCEPTION); 1512 Set_invalidflag(); 1513 Sgl_makequietnan(opnd2); 1514 Sgl_copytoptr(opnd2,dstptr); 1515 return(NOEXCEPTION); 1516 } 1517 1518 /* 1519 * Check third operand for infinity with a 1520 * sign opposite of the multiply result 1521 */ 1522 if (Sgl_isinfinity(opnd3) && 1523 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { 1524 /* 1525 * invalid since attempting a magnitude 1526 * subtraction of infinities 1527 */ 1528 if (Is_invalidtrap_enabled()) 1529 return(OPC_2E_INVALIDEXCEPTION); 1530 Set_invalidflag(); 1531 Sgl_makequietnan(resultp1); 1532 Sgl_copytoptr(resultp1,dstptr); 1533 return(NOEXCEPTION); 1534 } 1535 1536 /* 1537 * return infinity 1538 */ 1539 Sgl_setinfinity_exponentmantissa(resultp1); 1540 Sgl_copytoptr(resultp1,dstptr); 1541 return(NOEXCEPTION); 1542 } 1543 } 1544 else { 1545 /* 1546 * is NaN; signaling or quiet? 1547 */ 1548 if (Sgl_isone_signaling(opnd2)) { 1549 /* trap if INVALIDTRAP enabled */ 1550 if (Is_invalidtrap_enabled()) 1551 return(OPC_2E_INVALIDEXCEPTION); 1552 /* make NaN quiet */ 1553 Set_invalidflag(); 1554 Sgl_set_quiet(opnd2); 1555 } 1556 /* 1557 * is third operand a signaling NaN? 1558 */ 1559 else if (Sgl_is_signalingnan(opnd3)) { 1560 /* trap if INVALIDTRAP enabled */ 1561 if (Is_invalidtrap_enabled()) 1562 return(OPC_2E_INVALIDEXCEPTION); 1563 /* make NaN quiet */ 1564 Set_invalidflag(); 1565 Sgl_set_quiet(opnd3); 1566 Sgl_copytoptr(opnd3,dstptr); 1567 return(NOEXCEPTION); 1568 } 1569 /* 1570 * return quiet NaN 1571 */ 1572 Sgl_copytoptr(opnd2,dstptr); 1573 return(NOEXCEPTION); 1574 } 1575 } 1576 1577 /* 1578 * check third operand for NaN's or infinity 1579 */ 1580 if (Sgl_isinfinity_exponent(opnd3)) { 1581 if (Sgl_iszero_mantissa(opnd3)) { 1582 /* return infinity */ 1583 Sgl_copytoptr(opnd3,dstptr); 1584 return(NOEXCEPTION); 1585 } else { 1586 /* 1587 * is NaN; signaling or quiet? 1588 */ 1589 if (Sgl_isone_signaling(opnd3)) { 1590 /* trap if INVALIDTRAP enabled */ 1591 if (Is_invalidtrap_enabled()) 1592 return(OPC_2E_INVALIDEXCEPTION); 1593 /* make NaN quiet */ 1594 Set_invalidflag(); 1595 Sgl_set_quiet(opnd3); 1596 } 1597 /* 1598 * return quiet NaN 1599 */ 1600 Sgl_copytoptr(opnd3,dstptr); 1601 return(NOEXCEPTION); 1602 } 1603 } 1604 1605 /* 1606 * Generate multiply mantissa 1607 */ 1608 if (Sgl_isnotzero_exponent(opnd1)) { 1609 /* set hidden bit */ 1610 Sgl_clear_signexponent_set_hidden(opnd1); 1611 } 1612 else { 1613 /* check for zero */ 1614 if (Sgl_iszero_mantissa(opnd1)) { 1615 /* 1616 * Perform the add opnd3 with zero here. 1617 */ 1618 if (Sgl_iszero_exponentmantissa(opnd3)) { 1619 if (Is_rounding_mode(ROUNDMINUS)) { 1620 Sgl_or_signs(opnd3,resultp1); 1621 } else { 1622 Sgl_and_signs(opnd3,resultp1); 1623 } 1624 } 1625 /* 1626 * Now let's check for trapped underflow case. 1627 */ 1628 else if (Sgl_iszero_exponent(opnd3) && 1629 Is_underflowtrap_enabled()) { 1630 /* need to normalize results mantissa */ 1631 sign_save = Sgl_signextendedsign(opnd3); 1632 result_exponent = 0; 1633 Sgl_leftshiftby1(opnd3); 1634 Sgl_normalize(opnd3,result_exponent); 1635 Sgl_set_sign(opnd3,/*using*/sign_save); 1636 Sgl_setwrapped_exponent(opnd3,result_exponent, 1637 unfl); 1638 Sgl_copytoptr(opnd3,dstptr); 1639 /* inexact = FALSE */ 1640 return(OPC_2E_UNDERFLOWEXCEPTION); 1641 } 1642 Sgl_copytoptr(opnd3,dstptr); 1643 return(NOEXCEPTION); 1644 } 1645 /* is denormalized, adjust exponent */ 1646 Sgl_clear_signexponent(opnd1); 1647 Sgl_leftshiftby1(opnd1); 1648 Sgl_normalize(opnd1,mpy_exponent); 1649 } 1650 /* opnd2 needs to have hidden bit set with msb in hidden bit */ 1651 if (Sgl_isnotzero_exponent(opnd2)) { 1652 Sgl_clear_signexponent_set_hidden(opnd2); 1653 } 1654 else { 1655 /* check for zero */ 1656 if (Sgl_iszero_mantissa(opnd2)) { 1657 /* 1658 * Perform the add opnd3 with zero here. 1659 */ 1660 if (Sgl_iszero_exponentmantissa(opnd3)) { 1661 if (Is_rounding_mode(ROUNDMINUS)) { 1662 Sgl_or_signs(opnd3,resultp1); 1663 } else { 1664 Sgl_and_signs(opnd3,resultp1); 1665 } 1666 } 1667 /* 1668 * Now let's check for trapped underflow case. 1669 */ 1670 else if (Sgl_iszero_exponent(opnd3) && 1671 Is_underflowtrap_enabled()) { 1672 /* need to normalize results mantissa */ 1673 sign_save = Sgl_signextendedsign(opnd3); 1674 result_exponent = 0; 1675 Sgl_leftshiftby1(opnd3); 1676 Sgl_normalize(opnd3,result_exponent); 1677 Sgl_set_sign(opnd3,/*using*/sign_save); 1678 Sgl_setwrapped_exponent(opnd3,result_exponent, 1679 unfl); 1680 Sgl_copytoptr(opnd3,dstptr); 1681 /* inexact = FALSE */ 1682 return(OPC_2E_UNDERFLOWEXCEPTION); 1683 } 1684 Sgl_copytoptr(opnd3,dstptr); 1685 return(NOEXCEPTION); 1686 } 1687 /* is denormalized; want to normalize */ 1688 Sgl_clear_signexponent(opnd2); 1689 Sgl_leftshiftby1(opnd2); 1690 Sgl_normalize(opnd2,mpy_exponent); 1691 } 1692 1693 /* Multiply the first two source mantissas together */ 1694 1695 /* 1696 * The intermediate result will be kept in tmpres, 1697 * which needs enough room for 106 bits of mantissa, 1698 * so lets call it a Double extended. 1699 */ 1700 Sglext_setzero(tmpresp1,tmpresp2); 1701 1702 /* 1703 * Four bits at a time are inspected in each loop, and a 1704 * simple shift and add multiply algorithm is used. 1705 */ 1706 for (count = SGL_P-1; count >= 0; count -= 4) { 1707 Sglext_rightshiftby4(tmpresp1,tmpresp2); 1708 if (Sbit28(opnd1)) { 1709 /* Twoword_add should be an ADD followed by 2 ADDC's */ 1710 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0); 1711 } 1712 if (Sbit29(opnd1)) { 1713 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0); 1714 } 1715 if (Sbit30(opnd1)) { 1716 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0); 1717 } 1718 if (Sbit31(opnd1)) { 1719 Twoword_add(tmpresp1, tmpresp2, opnd2, 0); 1720 } 1721 Sgl_rightshiftby4(opnd1); 1722 } 1723 if (Is_sexthiddenoverflow(tmpresp1)) { 1724 /* result mantissa >= 2 (mantissa overflow) */ 1725 mpy_exponent++; 1726 Sglext_rightshiftby4(tmpresp1,tmpresp2); 1727 } else { 1728 Sglext_rightshiftby3(tmpresp1,tmpresp2); 1729 } 1730 1731 /* 1732 * Restore the sign of the mpy result which was saved in resultp1. 1733 * The exponent will continue to be kept in mpy_exponent. 1734 */ 1735 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1)); 1736 1737 /* 1738 * No rounding is required, since the result of the multiply 1739 * is exact in the extended format. 1740 */ 1741 1742 /* 1743 * Now we are ready to perform the add portion of the operation. 1744 * 1745 * The exponents need to be kept as integers for now, since the 1746 * multiply result might not fit into the exponent field. We 1747 * can't overflow or underflow because of this yet, since the 1748 * add could bring the final result back into range. 1749 */ 1750 add_exponent = Sgl_exponent(opnd3); 1751 1752 /* 1753 * Check for denormalized or zero add operand. 1754 */ 1755 if (add_exponent == 0) { 1756 /* check for zero */ 1757 if (Sgl_iszero_mantissa(opnd3)) { 1758 /* right is zero */ 1759 /* Left can't be zero and must be result. 1760 * 1761 * The final result is now in tmpres and mpy_exponent, 1762 * and needs to be rounded and squeezed back into 1763 * double precision format from double extended. 1764 */ 1765 result_exponent = mpy_exponent; 1766 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2); 1767 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/ 1768 goto round; 1769 } 1770 1771 /* 1772 * Neither are zeroes. 1773 * Adjust exponent and normalize add operand. 1774 */ 1775 sign_save = Sgl_signextendedsign(opnd3); /* save sign */ 1776 Sgl_clear_signexponent(opnd3); 1777 Sgl_leftshiftby1(opnd3); 1778 Sgl_normalize(opnd3,add_exponent); 1779 Sgl_set_sign(opnd3,sign_save); /* restore sign */ 1780 } else { 1781 Sgl_clear_exponent_set_hidden(opnd3); 1782 } 1783 /* 1784 * Copy opnd3 to the double extended variable called right. 1785 */ 1786 Sgl_copyto_sglext(opnd3,rightp1,rightp2); 1787 1788 /* 1789 * A zero "save" helps discover equal operands (for later), 1790 * and is used in swapping operands (if needed). 1791 */ 1792 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save); 1793 1794 /* 1795 * Compare magnitude of operands. 1796 */ 1797 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1); 1798 Sglext_copytoint_exponentmantissa(rightp1,signlessright1); 1799 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && 1800 Sglext_ismagnitudeless(signlessleft1,signlessright1)) { 1801 /* 1802 * Set the left operand to the larger one by XOR swap. 1803 * First finish the first word "save". 1804 */ 1805 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1); 1806 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); 1807 Sglext_swap_lower(tmpresp2,rightp2); 1808 /* also setup exponents used in rest of routine */ 1809 diff_exponent = add_exponent - mpy_exponent; 1810 result_exponent = add_exponent; 1811 } else { 1812 /* also setup exponents used in rest of routine */ 1813 diff_exponent = mpy_exponent - add_exponent; 1814 result_exponent = mpy_exponent; 1815 } 1816 /* Invariant: left is not smaller than right. */ 1817 1818 /* 1819 * Special case alignment of operands that would force alignment 1820 * beyond the extent of the extension. A further optimization 1821 * could special case this but only reduces the path length for 1822 * this infrequent case. 1823 */ 1824 if (diff_exponent > SGLEXT_THRESHOLD) { 1825 diff_exponent = SGLEXT_THRESHOLD; 1826 } 1827 1828 /* Align right operand by shifting it to the right */ 1829 Sglext_clear_sign(rightp1); 1830 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent); 1831 1832 /* Treat sum and difference of the operands separately. */ 1833 if ((int)save < 0) { 1834 /* 1835 * Difference of the two operands. Overflow can occur if the 1836 * multiply overflowed. A borrow can occur out of the hidden 1837 * bit and force a post normalization phase. 1838 */ 1839 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2, 1840 resultp1,resultp2); 1841 sign_save = Sgl_signextendedsign(resultp1); 1842 if (Sgl_iszero_hidden(resultp1)) { 1843 /* Handle normalization */ 1844 /* A straightforward algorithm would now shift the 1845 * result and extension left until the hidden bit 1846 * becomes one. Not all of the extension bits need 1847 * participate in the shift. Only the two most 1848 * significant bits (round and guard) are needed. 1849 * If only a single shift is needed then the guard 1850 * bit becomes a significant low order bit and the 1851 * extension must participate in the rounding. 1852 * If more than a single shift is needed, then all 1853 * bits to the right of the guard bit are zeros, 1854 * and the guard bit may or may not be zero. */ 1855 Sglext_leftshiftby1(resultp1,resultp2); 1856 1857 /* Need to check for a zero result. The sign and 1858 * exponent fields have already been zeroed. The more 1859 * efficient test of the full object can be used. 1860 */ 1861 if (Sglext_iszero(resultp1,resultp2)) { 1862 /* Must have been "x-x" or "x+(-x)". */ 1863 if (Is_rounding_mode(ROUNDMINUS)) 1864 Sgl_setone_sign(resultp1); 1865 Sgl_copytoptr(resultp1,dstptr); 1866 return(NOEXCEPTION); 1867 } 1868 result_exponent--; 1869 1870 /* Look to see if normalization is finished. */ 1871 if (Sgl_isone_hidden(resultp1)) { 1872 /* No further normalization is needed */ 1873 goto round; 1874 } 1875 1876 /* Discover first one bit to determine shift amount. 1877 * Use a modified binary search. We have already 1878 * shifted the result one position right and still 1879 * not found a one so the remainder of the extension 1880 * must be zero and simplifies rounding. */ 1881 /* Scan bytes */ 1882 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) { 1883 Sglext_leftshiftby8(resultp1,resultp2); 1884 result_exponent -= 8; 1885 } 1886 /* Now narrow it down to the nibble */ 1887 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) { 1888 /* The lower nibble contains the 1889 * normalizing one */ 1890 Sglext_leftshiftby4(resultp1,resultp2); 1891 result_exponent -= 4; 1892 } 1893 /* Select case where first bit is set (already 1894 * normalized) otherwise select the proper shift. */ 1895 jumpsize = Sgl_hiddenhigh3mantissa(resultp1); 1896 if (jumpsize <= 7) switch(jumpsize) { 1897 case 1: 1898 Sglext_leftshiftby3(resultp1,resultp2); 1899 result_exponent -= 3; 1900 break; 1901 case 2: 1902 case 3: 1903 Sglext_leftshiftby2(resultp1,resultp2); 1904 result_exponent -= 2; 1905 break; 1906 case 4: 1907 case 5: 1908 case 6: 1909 case 7: 1910 Sglext_leftshiftby1(resultp1,resultp2); 1911 result_exponent -= 1; 1912 break; 1913 } 1914 } /* end if (hidden...)... */ 1915 /* Fall through and round */ 1916 } /* end if (save < 0)... */ 1917 else { 1918 /* Add magnitudes */ 1919 Sglext_addition(tmpresp1,tmpresp2, 1920 rightp1,rightp2, /*to*/resultp1,resultp2); 1921 sign_save = Sgl_signextendedsign(resultp1); 1922 if (Sgl_isone_hiddenoverflow(resultp1)) { 1923 /* Prenormalization required. */ 1924 Sglext_arithrightshiftby1(resultp1,resultp2); 1925 result_exponent++; 1926 } /* end if hiddenoverflow... */ 1927 } /* end else ...add magnitudes... */ 1928 1929 /* Round the result. If the extension and lower two words are 1930 * all zeros, then the result is exact. Otherwise round in the 1931 * correct direction. Underflow is possible. If a postnormalization 1932 * is necessary, then the mantissa is all zeros so no shift is needed. 1933 */ 1934 round: 1935 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { 1936 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny); 1937 } 1938 Sgl_set_sign(resultp1,/*using*/sign_save); 1939 if (Sglext_isnotzero_mantissap2(resultp2)) { 1940 inexact = TRUE; 1941 switch(Rounding_mode()) { 1942 case ROUNDNEAREST: /* The default. */ 1943 if (Sglext_isone_highp2(resultp2)) { 1944 /* at least 1/2 ulp */ 1945 if (Sglext_isnotzero_low31p2(resultp2) || 1946 Sglext_isone_lowp1(resultp1)) { 1947 /* either exactly half way and odd or 1948 * more than 1/2ulp */ 1949 Sgl_increment(resultp1); 1950 } 1951 } 1952 break; 1953 1954 case ROUNDPLUS: 1955 if (Sgl_iszero_sign(resultp1)) { 1956 /* Round up positive results */ 1957 Sgl_increment(resultp1); 1958 } 1959 break; 1960 1961 case ROUNDMINUS: 1962 if (Sgl_isone_sign(resultp1)) { 1963 /* Round down negative results */ 1964 Sgl_increment(resultp1); 1965 } 1966 1967 case ROUNDZERO:; 1968 /* truncate is simple */ 1969 } /* end switch... */ 1970 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++; 1971 } 1972 if (result_exponent >= SGL_INFINITY_EXPONENT) { 1973 /* Overflow */ 1974 if (Is_overflowtrap_enabled()) { 1975 /* 1976 * Adjust bias of result 1977 */ 1978 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl); 1979 Sgl_copytoptr(resultp1,dstptr); 1980 if (inexact) 1981 if (Is_inexacttrap_enabled()) 1982 return (OPC_2E_OVERFLOWEXCEPTION | 1983 OPC_2E_INEXACTEXCEPTION); 1984 else Set_inexactflag(); 1985 return (OPC_2E_OVERFLOWEXCEPTION); 1986 } 1987 inexact = TRUE; 1988 Set_overflowflag(); 1989 Sgl_setoverflow(resultp1); 1990 } else if (result_exponent <= 0) { /* underflow case */ 1991 if (Is_underflowtrap_enabled()) { 1992 /* 1993 * Adjust bias of result 1994 */ 1995 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl); 1996 Sgl_copytoptr(resultp1,dstptr); 1997 if (inexact) 1998 if (Is_inexacttrap_enabled()) 1999 return (OPC_2E_UNDERFLOWEXCEPTION | 2000 OPC_2E_INEXACTEXCEPTION); 2001 else Set_inexactflag(); 2002 return(OPC_2E_UNDERFLOWEXCEPTION); 2003 } 2004 else if (inexact && is_tiny) Set_underflowflag(); 2005 } 2006 else Sgl_set_exponent(resultp1,result_exponent); 2007 Sgl_copytoptr(resultp1,dstptr); 2008 if (inexact) 2009 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); 2010 else Set_inexactflag(); 2011 return(NOEXCEPTION); 2012 } 2013 2014 /* 2015 * Single Floating-point Multiply Negate Fused Add 2016 */ 2017 2018 sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 2019 2020 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; 2021 unsigned int *status; 2022 { 2023 unsigned int opnd1, opnd2, opnd3; 2024 register unsigned int tmpresp1, tmpresp2; 2025 unsigned int rightp1, rightp2; 2026 unsigned int resultp1, resultp2 = 0; 2027 register int mpy_exponent, add_exponent, count; 2028 boolean inexact = FALSE, is_tiny = FALSE; 2029 2030 unsigned int signlessleft1, signlessright1, save; 2031 register int result_exponent, diff_exponent; 2032 int sign_save, jumpsize; 2033 2034 Sgl_copyfromptr(src1ptr,opnd1); 2035 Sgl_copyfromptr(src2ptr,opnd2); 2036 Sgl_copyfromptr(src3ptr,opnd3); 2037 2038 /* 2039 * set sign bit of result of multiply 2040 */ 2041 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 2042 Sgl_setzero(resultp1); 2043 else 2044 Sgl_setnegativezero(resultp1); 2045 2046 /* 2047 * Generate multiply exponent 2048 */ 2049 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS; 2050 2051 /* 2052 * check first operand for NaN's or infinity 2053 */ 2054 if (Sgl_isinfinity_exponent(opnd1)) { 2055 if (Sgl_iszero_mantissa(opnd1)) { 2056 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) { 2057 if (Sgl_iszero_exponentmantissa(opnd2)) { 2058 /* 2059 * invalid since operands are infinity 2060 * and zero 2061 */ 2062 if (Is_invalidtrap_enabled()) 2063 return(OPC_2E_INVALIDEXCEPTION); 2064 Set_invalidflag(); 2065 Sgl_makequietnan(resultp1); 2066 Sgl_copytoptr(resultp1,dstptr); 2067 return(NOEXCEPTION); 2068 } 2069 /* 2070 * Check third operand for infinity with a 2071 * sign opposite of the multiply result 2072 */ 2073 if (Sgl_isinfinity(opnd3) && 2074 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { 2075 /* 2076 * invalid since attempting a magnitude 2077 * subtraction of infinities 2078 */ 2079 if (Is_invalidtrap_enabled()) 2080 return(OPC_2E_INVALIDEXCEPTION); 2081 Set_invalidflag(); 2082 Sgl_makequietnan(resultp1); 2083 Sgl_copytoptr(resultp1,dstptr); 2084 return(NOEXCEPTION); 2085 } 2086 2087 /* 2088 * return infinity 2089 */ 2090 Sgl_setinfinity_exponentmantissa(resultp1); 2091 Sgl_copytoptr(resultp1,dstptr); 2092 return(NOEXCEPTION); 2093 } 2094 } 2095 else { 2096 /* 2097 * is NaN; signaling or quiet? 2098 */ 2099 if (Sgl_isone_signaling(opnd1)) { 2100 /* trap if INVALIDTRAP enabled */ 2101 if (Is_invalidtrap_enabled()) 2102 return(OPC_2E_INVALIDEXCEPTION); 2103 /* make NaN quiet */ 2104 Set_invalidflag(); 2105 Sgl_set_quiet(opnd1); 2106 } 2107 /* 2108 * is second operand a signaling NaN? 2109 */ 2110 else if (Sgl_is_signalingnan(opnd2)) { 2111 /* trap if INVALIDTRAP enabled */ 2112 if (Is_invalidtrap_enabled()) 2113 return(OPC_2E_INVALIDEXCEPTION); 2114 /* make NaN quiet */ 2115 Set_invalidflag(); 2116 Sgl_set_quiet(opnd2); 2117 Sgl_copytoptr(opnd2,dstptr); 2118 return(NOEXCEPTION); 2119 } 2120 /* 2121 * is third operand a signaling NaN? 2122 */ 2123 else if (Sgl_is_signalingnan(opnd3)) { 2124 /* trap if INVALIDTRAP enabled */ 2125 if (Is_invalidtrap_enabled()) 2126 return(OPC_2E_INVALIDEXCEPTION); 2127 /* make NaN quiet */ 2128 Set_invalidflag(); 2129 Sgl_set_quiet(opnd3); 2130 Sgl_copytoptr(opnd3,dstptr); 2131 return(NOEXCEPTION); 2132 } 2133 /* 2134 * return quiet NaN 2135 */ 2136 Sgl_copytoptr(opnd1,dstptr); 2137 return(NOEXCEPTION); 2138 } 2139 } 2140 2141 /* 2142 * check second operand for NaN's or infinity 2143 */ 2144 if (Sgl_isinfinity_exponent(opnd2)) { 2145 if (Sgl_iszero_mantissa(opnd2)) { 2146 if (Sgl_isnotnan(opnd3)) { 2147 if (Sgl_iszero_exponentmantissa(opnd1)) { 2148 /* 2149 * invalid since multiply operands are 2150 * zero & infinity 2151 */ 2152 if (Is_invalidtrap_enabled()) 2153 return(OPC_2E_INVALIDEXCEPTION); 2154 Set_invalidflag(); 2155 Sgl_makequietnan(opnd2); 2156 Sgl_copytoptr(opnd2,dstptr); 2157 return(NOEXCEPTION); 2158 } 2159 2160 /* 2161 * Check third operand for infinity with a 2162 * sign opposite of the multiply result 2163 */ 2164 if (Sgl_isinfinity(opnd3) && 2165 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { 2166 /* 2167 * invalid since attempting a magnitude 2168 * subtraction of infinities 2169 */ 2170 if (Is_invalidtrap_enabled()) 2171 return(OPC_2E_INVALIDEXCEPTION); 2172 Set_invalidflag(); 2173 Sgl_makequietnan(resultp1); 2174 Sgl_copytoptr(resultp1,dstptr); 2175 return(NOEXCEPTION); 2176 } 2177 2178 /* 2179 * return infinity 2180 */ 2181 Sgl_setinfinity_exponentmantissa(resultp1); 2182 Sgl_copytoptr(resultp1,dstptr); 2183 return(NOEXCEPTION); 2184 } 2185 } 2186 else { 2187 /* 2188 * is NaN; signaling or quiet? 2189 */ 2190 if (Sgl_isone_signaling(opnd2)) { 2191 /* trap if INVALIDTRAP enabled */ 2192 if (Is_invalidtrap_enabled()) 2193 return(OPC_2E_INVALIDEXCEPTION); 2194 /* make NaN quiet */ 2195 Set_invalidflag(); 2196 Sgl_set_quiet(opnd2); 2197 } 2198 /* 2199 * is third operand a signaling NaN? 2200 */ 2201 else if (Sgl_is_signalingnan(opnd3)) { 2202 /* trap if INVALIDTRAP enabled */ 2203 if (Is_invalidtrap_enabled()) 2204 return(OPC_2E_INVALIDEXCEPTION); 2205 /* make NaN quiet */ 2206 Set_invalidflag(); 2207 Sgl_set_quiet(opnd3); 2208 Sgl_copytoptr(opnd3,dstptr); 2209 return(NOEXCEPTION); 2210 } 2211 /* 2212 * return quiet NaN 2213 */ 2214 Sgl_copytoptr(opnd2,dstptr); 2215 return(NOEXCEPTION); 2216 } 2217 } 2218 2219 /* 2220 * check third operand for NaN's or infinity 2221 */ 2222 if (Sgl_isinfinity_exponent(opnd3)) { 2223 if (Sgl_iszero_mantissa(opnd3)) { 2224 /* return infinity */ 2225 Sgl_copytoptr(opnd3,dstptr); 2226 return(NOEXCEPTION); 2227 } else { 2228 /* 2229 * is NaN; signaling or quiet? 2230 */ 2231 if (Sgl_isone_signaling(opnd3)) { 2232 /* trap if INVALIDTRAP enabled */ 2233 if (Is_invalidtrap_enabled()) 2234 return(OPC_2E_INVALIDEXCEPTION); 2235 /* make NaN quiet */ 2236 Set_invalidflag(); 2237 Sgl_set_quiet(opnd3); 2238 } 2239 /* 2240 * return quiet NaN 2241 */ 2242 Sgl_copytoptr(opnd3,dstptr); 2243 return(NOEXCEPTION); 2244 } 2245 } 2246 2247 /* 2248 * Generate multiply mantissa 2249 */ 2250 if (Sgl_isnotzero_exponent(opnd1)) { 2251 /* set hidden bit */ 2252 Sgl_clear_signexponent_set_hidden(opnd1); 2253 } 2254 else { 2255 /* check for zero */ 2256 if (Sgl_iszero_mantissa(opnd1)) { 2257 /* 2258 * Perform the add opnd3 with zero here. 2259 */ 2260 if (Sgl_iszero_exponentmantissa(opnd3)) { 2261 if (Is_rounding_mode(ROUNDMINUS)) { 2262 Sgl_or_signs(opnd3,resultp1); 2263 } else { 2264 Sgl_and_signs(opnd3,resultp1); 2265 } 2266 } 2267 /* 2268 * Now let's check for trapped underflow case. 2269 */ 2270 else if (Sgl_iszero_exponent(opnd3) && 2271 Is_underflowtrap_enabled()) { 2272 /* need to normalize results mantissa */ 2273 sign_save = Sgl_signextendedsign(opnd3); 2274 result_exponent = 0; 2275 Sgl_leftshiftby1(opnd3); 2276 Sgl_normalize(opnd3,result_exponent); 2277 Sgl_set_sign(opnd3,/*using*/sign_save); 2278 Sgl_setwrapped_exponent(opnd3,result_exponent, 2279 unfl); 2280 Sgl_copytoptr(opnd3,dstptr); 2281 /* inexact = FALSE */ 2282 return(OPC_2E_UNDERFLOWEXCEPTION); 2283 } 2284 Sgl_copytoptr(opnd3,dstptr); 2285 return(NOEXCEPTION); 2286 } 2287 /* is denormalized, adjust exponent */ 2288 Sgl_clear_signexponent(opnd1); 2289 Sgl_leftshiftby1(opnd1); 2290 Sgl_normalize(opnd1,mpy_exponent); 2291 } 2292 /* opnd2 needs to have hidden bit set with msb in hidden bit */ 2293 if (Sgl_isnotzero_exponent(opnd2)) { 2294 Sgl_clear_signexponent_set_hidden(opnd2); 2295 } 2296 else { 2297 /* check for zero */ 2298 if (Sgl_iszero_mantissa(opnd2)) { 2299 /* 2300 * Perform the add opnd3 with zero here. 2301 */ 2302 if (Sgl_iszero_exponentmantissa(opnd3)) { 2303 if (Is_rounding_mode(ROUNDMINUS)) { 2304 Sgl_or_signs(opnd3,resultp1); 2305 } else { 2306 Sgl_and_signs(opnd3,resultp1); 2307 } 2308 } 2309 /* 2310 * Now let's check for trapped underflow case. 2311 */ 2312 else if (Sgl_iszero_exponent(opnd3) && 2313 Is_underflowtrap_enabled()) { 2314 /* need to normalize results mantissa */ 2315 sign_save = Sgl_signextendedsign(opnd3); 2316 result_exponent = 0; 2317 Sgl_leftshiftby1(opnd3); 2318 Sgl_normalize(opnd3,result_exponent); 2319 Sgl_set_sign(opnd3,/*using*/sign_save); 2320 Sgl_setwrapped_exponent(opnd3,result_exponent, 2321 unfl); 2322 Sgl_copytoptr(opnd3,dstptr); 2323 /* inexact = FALSE */ 2324 return(OPC_2E_UNDERFLOWEXCEPTION); 2325 } 2326 Sgl_copytoptr(opnd3,dstptr); 2327 return(NOEXCEPTION); 2328 } 2329 /* is denormalized; want to normalize */ 2330 Sgl_clear_signexponent(opnd2); 2331 Sgl_leftshiftby1(opnd2); 2332 Sgl_normalize(opnd2,mpy_exponent); 2333 } 2334 2335 /* Multiply the first two source mantissas together */ 2336 2337 /* 2338 * The intermediate result will be kept in tmpres, 2339 * which needs enough room for 106 bits of mantissa, 2340 * so lets call it a Double extended. 2341 */ 2342 Sglext_setzero(tmpresp1,tmpresp2); 2343 2344 /* 2345 * Four bits at a time are inspected in each loop, and a 2346 * simple shift and add multiply algorithm is used. 2347 */ 2348 for (count = SGL_P-1; count >= 0; count -= 4) { 2349 Sglext_rightshiftby4(tmpresp1,tmpresp2); 2350 if (Sbit28(opnd1)) { 2351 /* Twoword_add should be an ADD followed by 2 ADDC's */ 2352 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0); 2353 } 2354 if (Sbit29(opnd1)) { 2355 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0); 2356 } 2357 if (Sbit30(opnd1)) { 2358 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0); 2359 } 2360 if (Sbit31(opnd1)) { 2361 Twoword_add(tmpresp1, tmpresp2, opnd2, 0); 2362 } 2363 Sgl_rightshiftby4(opnd1); 2364 } 2365 if (Is_sexthiddenoverflow(tmpresp1)) { 2366 /* result mantissa >= 2 (mantissa overflow) */ 2367 mpy_exponent++; 2368 Sglext_rightshiftby4(tmpresp1,tmpresp2); 2369 } else { 2370 Sglext_rightshiftby3(tmpresp1,tmpresp2); 2371 } 2372 2373 /* 2374 * Restore the sign of the mpy result which was saved in resultp1. 2375 * The exponent will continue to be kept in mpy_exponent. 2376 */ 2377 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1)); 2378 2379 /* 2380 * No rounding is required, since the result of the multiply 2381 * is exact in the extended format. 2382 */ 2383 2384 /* 2385 * Now we are ready to perform the add portion of the operation. 2386 * 2387 * The exponents need to be kept as integers for now, since the 2388 * multiply result might not fit into the exponent field. We 2389 * can't overflow or underflow because of this yet, since the 2390 * add could bring the final result back into range. 2391 */ 2392 add_exponent = Sgl_exponent(opnd3); 2393 2394 /* 2395 * Check for denormalized or zero add operand. 2396 */ 2397 if (add_exponent == 0) { 2398 /* check for zero */ 2399 if (Sgl_iszero_mantissa(opnd3)) { 2400 /* right is zero */ 2401 /* Left can't be zero and must be result. 2402 * 2403 * The final result is now in tmpres and mpy_exponent, 2404 * and needs to be rounded and squeezed back into 2405 * double precision format from double extended. 2406 */ 2407 result_exponent = mpy_exponent; 2408 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2); 2409 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/ 2410 goto round; 2411 } 2412 2413 /* 2414 * Neither are zeroes. 2415 * Adjust exponent and normalize add operand. 2416 */ 2417 sign_save = Sgl_signextendedsign(opnd3); /* save sign */ 2418 Sgl_clear_signexponent(opnd3); 2419 Sgl_leftshiftby1(opnd3); 2420 Sgl_normalize(opnd3,add_exponent); 2421 Sgl_set_sign(opnd3,sign_save); /* restore sign */ 2422 } else { 2423 Sgl_clear_exponent_set_hidden(opnd3); 2424 } 2425 /* 2426 * Copy opnd3 to the double extended variable called right. 2427 */ 2428 Sgl_copyto_sglext(opnd3,rightp1,rightp2); 2429 2430 /* 2431 * A zero "save" helps discover equal operands (for later), 2432 * and is used in swapping operands (if needed). 2433 */ 2434 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save); 2435 2436 /* 2437 * Compare magnitude of operands. 2438 */ 2439 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1); 2440 Sglext_copytoint_exponentmantissa(rightp1,signlessright1); 2441 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && 2442 Sglext_ismagnitudeless(signlessleft1,signlessright1)) { 2443 /* 2444 * Set the left operand to the larger one by XOR swap. 2445 * First finish the first word "save". 2446 */ 2447 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1); 2448 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); 2449 Sglext_swap_lower(tmpresp2,rightp2); 2450 /* also setup exponents used in rest of routine */ 2451 diff_exponent = add_exponent - mpy_exponent; 2452 result_exponent = add_exponent; 2453 } else { 2454 /* also setup exponents used in rest of routine */ 2455 diff_exponent = mpy_exponent - add_exponent; 2456 result_exponent = mpy_exponent; 2457 } 2458 /* Invariant: left is not smaller than right. */ 2459 2460 /* 2461 * Special case alignment of operands that would force alignment 2462 * beyond the extent of the extension. A further optimization 2463 * could special case this but only reduces the path length for 2464 * this infrequent case. 2465 */ 2466 if (diff_exponent > SGLEXT_THRESHOLD) { 2467 diff_exponent = SGLEXT_THRESHOLD; 2468 } 2469 2470 /* Align right operand by shifting it to the right */ 2471 Sglext_clear_sign(rightp1); 2472 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent); 2473 2474 /* Treat sum and difference of the operands separately. */ 2475 if ((int)save < 0) { 2476 /* 2477 * Difference of the two operands. Overflow can occur if the 2478 * multiply overflowed. A borrow can occur out of the hidden 2479 * bit and force a post normalization phase. 2480 */ 2481 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2, 2482 resultp1,resultp2); 2483 sign_save = Sgl_signextendedsign(resultp1); 2484 if (Sgl_iszero_hidden(resultp1)) { 2485 /* Handle normalization */ 2486 /* A straightforward algorithm would now shift the 2487 * result and extension left until the hidden bit 2488 * becomes one. Not all of the extension bits need 2489 * participate in the shift. Only the two most 2490 * significant bits (round and guard) are needed. 2491 * If only a single shift is needed then the guard 2492 * bit becomes a significant low order bit and the 2493 * extension must participate in the rounding. 2494 * If more than a single shift is needed, then all 2495 * bits to the right of the guard bit are zeros, 2496 * and the guard bit may or may not be zero. */ 2497 Sglext_leftshiftby1(resultp1,resultp2); 2498 2499 /* Need to check for a zero result. The sign and 2500 * exponent fields have already been zeroed. The more 2501 * efficient test of the full object can be used. 2502 */ 2503 if (Sglext_iszero(resultp1,resultp2)) { 2504 /* Must have been "x-x" or "x+(-x)". */ 2505 if (Is_rounding_mode(ROUNDMINUS)) 2506 Sgl_setone_sign(resultp1); 2507 Sgl_copytoptr(resultp1,dstptr); 2508 return(NOEXCEPTION); 2509 } 2510 result_exponent--; 2511 2512 /* Look to see if normalization is finished. */ 2513 if (Sgl_isone_hidden(resultp1)) { 2514 /* No further normalization is needed */ 2515 goto round; 2516 } 2517 2518 /* Discover first one bit to determine shift amount. 2519 * Use a modified binary search. We have already 2520 * shifted the result one position right and still 2521 * not found a one so the remainder of the extension 2522 * must be zero and simplifies rounding. */ 2523 /* Scan bytes */ 2524 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) { 2525 Sglext_leftshiftby8(resultp1,resultp2); 2526 result_exponent -= 8; 2527 } 2528 /* Now narrow it down to the nibble */ 2529 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) { 2530 /* The lower nibble contains the 2531 * normalizing one */ 2532 Sglext_leftshiftby4(resultp1,resultp2); 2533 result_exponent -= 4; 2534 } 2535 /* Select case where first bit is set (already 2536 * normalized) otherwise select the proper shift. */ 2537 jumpsize = Sgl_hiddenhigh3mantissa(resultp1); 2538 if (jumpsize <= 7) switch(jumpsize) { 2539 case 1: 2540 Sglext_leftshiftby3(resultp1,resultp2); 2541 result_exponent -= 3; 2542 break; 2543 case 2: 2544 case 3: 2545 Sglext_leftshiftby2(resultp1,resultp2); 2546 result_exponent -= 2; 2547 break; 2548 case 4: 2549 case 5: 2550 case 6: 2551 case 7: 2552 Sglext_leftshiftby1(resultp1,resultp2); 2553 result_exponent -= 1; 2554 break; 2555 } 2556 } /* end if (hidden...)... */ 2557 /* Fall through and round */ 2558 } /* end if (save < 0)... */ 2559 else { 2560 /* Add magnitudes */ 2561 Sglext_addition(tmpresp1,tmpresp2, 2562 rightp1,rightp2, /*to*/resultp1,resultp2); 2563 sign_save = Sgl_signextendedsign(resultp1); 2564 if (Sgl_isone_hiddenoverflow(resultp1)) { 2565 /* Prenormalization required. */ 2566 Sglext_arithrightshiftby1(resultp1,resultp2); 2567 result_exponent++; 2568 } /* end if hiddenoverflow... */ 2569 } /* end else ...add magnitudes... */ 2570 2571 /* Round the result. If the extension and lower two words are 2572 * all zeros, then the result is exact. Otherwise round in the 2573 * correct direction. Underflow is possible. If a postnormalization 2574 * is necessary, then the mantissa is all zeros so no shift is needed. 2575 */ 2576 round: 2577 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { 2578 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny); 2579 } 2580 Sgl_set_sign(resultp1,/*using*/sign_save); 2581 if (Sglext_isnotzero_mantissap2(resultp2)) { 2582 inexact = TRUE; 2583 switch(Rounding_mode()) { 2584 case ROUNDNEAREST: /* The default. */ 2585 if (Sglext_isone_highp2(resultp2)) { 2586 /* at least 1/2 ulp */ 2587 if (Sglext_isnotzero_low31p2(resultp2) || 2588 Sglext_isone_lowp1(resultp1)) { 2589 /* either exactly half way and odd or 2590 * more than 1/2ulp */ 2591 Sgl_increment(resultp1); 2592 } 2593 } 2594 break; 2595 2596 case ROUNDPLUS: 2597 if (Sgl_iszero_sign(resultp1)) { 2598 /* Round up positive results */ 2599 Sgl_increment(resultp1); 2600 } 2601 break; 2602 2603 case ROUNDMINUS: 2604 if (Sgl_isone_sign(resultp1)) { 2605 /* Round down negative results */ 2606 Sgl_increment(resultp1); 2607 } 2608 2609 case ROUNDZERO:; 2610 /* truncate is simple */ 2611 } /* end switch... */ 2612 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++; 2613 } 2614 if (result_exponent >= SGL_INFINITY_EXPONENT) { 2615 /* Overflow */ 2616 if (Is_overflowtrap_enabled()) { 2617 /* 2618 * Adjust bias of result 2619 */ 2620 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl); 2621 Sgl_copytoptr(resultp1,dstptr); 2622 if (inexact) 2623 if (Is_inexacttrap_enabled()) 2624 return (OPC_2E_OVERFLOWEXCEPTION | 2625 OPC_2E_INEXACTEXCEPTION); 2626 else Set_inexactflag(); 2627 return (OPC_2E_OVERFLOWEXCEPTION); 2628 } 2629 inexact = TRUE; 2630 Set_overflowflag(); 2631 Sgl_setoverflow(resultp1); 2632 } else if (result_exponent <= 0) { /* underflow case */ 2633 if (Is_underflowtrap_enabled()) { 2634 /* 2635 * Adjust bias of result 2636 */ 2637 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl); 2638 Sgl_copytoptr(resultp1,dstptr); 2639 if (inexact) 2640 if (Is_inexacttrap_enabled()) 2641 return (OPC_2E_UNDERFLOWEXCEPTION | 2642 OPC_2E_INEXACTEXCEPTION); 2643 else Set_inexactflag(); 2644 return(OPC_2E_UNDERFLOWEXCEPTION); 2645 } 2646 else if (inexact && is_tiny) Set_underflowflag(); 2647 } 2648 else Sgl_set_exponent(resultp1,result_exponent); 2649 Sgl_copytoptr(resultp1,dstptr); 2650 if (inexact) 2651 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); 2652 else Set_inexactflag(); 2653 return(NOEXCEPTION); 2654 } 2655 2656