1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Linux/PA-RISC Project (http://www.parisc-linux.org/) 4 * 5 * Floating-point emulation code 6 * Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org> 7 */ 8 /* 9 * BEGIN_DESC 10 * 11 * File: 12 * @(#) pa/spmath/fmpyfadd.c $Revision: 1.1 $ 13 * 14 * Purpose: 15 * Double Floating-point Multiply Fused Add 16 * Double Floating-point Multiply Negate Fused Add 17 * Single Floating-point Multiply Fused Add 18 * Single Floating-point Multiply Negate Fused Add 19 * 20 * External Interfaces: 21 * dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 22 * dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 23 * sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 24 * sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 25 * 26 * Internal Interfaces: 27 * 28 * Theory: 29 * <<please update with a overview of the operation of this file>> 30 * 31 * END_DESC 32 */ 33 34 35 #include "float.h" 36 #include "sgl_float.h" 37 #include "dbl_float.h" 38 39 40 /* 41 * Double Floating-point Multiply Fused Add 42 */ 43 44 int 45 dbl_fmpyfadd( 46 dbl_floating_point *src1ptr, 47 dbl_floating_point *src2ptr, 48 dbl_floating_point *src3ptr, 49 unsigned int *status, 50 dbl_floating_point *dstptr) 51 { 52 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2; 53 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4; 54 unsigned int rightp1, rightp2, rightp3, rightp4; 55 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0; 56 register int mpy_exponent, add_exponent, count; 57 boolean inexact = FALSE, is_tiny = FALSE; 58 59 unsigned int signlessleft1, signlessright1, save; 60 register int result_exponent, diff_exponent; 61 int sign_save, jumpsize; 62 63 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2); 64 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2); 65 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2); 66 67 /* 68 * set sign bit of result of multiply 69 */ 70 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 71 Dbl_setnegativezerop1(resultp1); 72 else Dbl_setzerop1(resultp1); 73 74 /* 75 * Generate multiply exponent 76 */ 77 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS; 78 79 /* 80 * check first operand for NaN's or infinity 81 */ 82 if (Dbl_isinfinity_exponent(opnd1p1)) { 83 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { 84 if (Dbl_isnotnan(opnd2p1,opnd2p2) && 85 Dbl_isnotnan(opnd3p1,opnd3p2)) { 86 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { 87 /* 88 * invalid since operands are infinity 89 * and zero 90 */ 91 if (Is_invalidtrap_enabled()) 92 return(OPC_2E_INVALIDEXCEPTION); 93 Set_invalidflag(); 94 Dbl_makequietnan(resultp1,resultp2); 95 Dbl_copytoptr(resultp1,resultp2,dstptr); 96 return(NOEXCEPTION); 97 } 98 /* 99 * Check third operand for infinity with a 100 * sign opposite of the multiply result 101 */ 102 if (Dbl_isinfinity(opnd3p1,opnd3p2) && 103 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { 104 /* 105 * invalid since attempting a magnitude 106 * subtraction of infinities 107 */ 108 if (Is_invalidtrap_enabled()) 109 return(OPC_2E_INVALIDEXCEPTION); 110 Set_invalidflag(); 111 Dbl_makequietnan(resultp1,resultp2); 112 Dbl_copytoptr(resultp1,resultp2,dstptr); 113 return(NOEXCEPTION); 114 } 115 116 /* 117 * return infinity 118 */ 119 Dbl_setinfinity_exponentmantissa(resultp1,resultp2); 120 Dbl_copytoptr(resultp1,resultp2,dstptr); 121 return(NOEXCEPTION); 122 } 123 } 124 else { 125 /* 126 * is NaN; signaling or quiet? 127 */ 128 if (Dbl_isone_signaling(opnd1p1)) { 129 /* trap if INVALIDTRAP enabled */ 130 if (Is_invalidtrap_enabled()) 131 return(OPC_2E_INVALIDEXCEPTION); 132 /* make NaN quiet */ 133 Set_invalidflag(); 134 Dbl_set_quiet(opnd1p1); 135 } 136 /* 137 * is second operand a signaling NaN? 138 */ 139 else if (Dbl_is_signalingnan(opnd2p1)) { 140 /* trap if INVALIDTRAP enabled */ 141 if (Is_invalidtrap_enabled()) 142 return(OPC_2E_INVALIDEXCEPTION); 143 /* make NaN quiet */ 144 Set_invalidflag(); 145 Dbl_set_quiet(opnd2p1); 146 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 147 return(NOEXCEPTION); 148 } 149 /* 150 * is third operand a signaling NaN? 151 */ 152 else if (Dbl_is_signalingnan(opnd3p1)) { 153 /* trap if INVALIDTRAP enabled */ 154 if (Is_invalidtrap_enabled()) 155 return(OPC_2E_INVALIDEXCEPTION); 156 /* make NaN quiet */ 157 Set_invalidflag(); 158 Dbl_set_quiet(opnd3p1); 159 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 160 return(NOEXCEPTION); 161 } 162 /* 163 * return quiet NaN 164 */ 165 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); 166 return(NOEXCEPTION); 167 } 168 } 169 170 /* 171 * check second operand for NaN's or infinity 172 */ 173 if (Dbl_isinfinity_exponent(opnd2p1)) { 174 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { 175 if (Dbl_isnotnan(opnd3p1,opnd3p2)) { 176 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) { 177 /* 178 * invalid since multiply operands are 179 * zero & infinity 180 */ 181 if (Is_invalidtrap_enabled()) 182 return(OPC_2E_INVALIDEXCEPTION); 183 Set_invalidflag(); 184 Dbl_makequietnan(opnd2p1,opnd2p2); 185 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 186 return(NOEXCEPTION); 187 } 188 189 /* 190 * Check third operand for infinity with a 191 * sign opposite of the multiply result 192 */ 193 if (Dbl_isinfinity(opnd3p1,opnd3p2) && 194 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { 195 /* 196 * invalid since attempting a magnitude 197 * subtraction of infinities 198 */ 199 if (Is_invalidtrap_enabled()) 200 return(OPC_2E_INVALIDEXCEPTION); 201 Set_invalidflag(); 202 Dbl_makequietnan(resultp1,resultp2); 203 Dbl_copytoptr(resultp1,resultp2,dstptr); 204 return(NOEXCEPTION); 205 } 206 207 /* 208 * return infinity 209 */ 210 Dbl_setinfinity_exponentmantissa(resultp1,resultp2); 211 Dbl_copytoptr(resultp1,resultp2,dstptr); 212 return(NOEXCEPTION); 213 } 214 } 215 else { 216 /* 217 * is NaN; signaling or quiet? 218 */ 219 if (Dbl_isone_signaling(opnd2p1)) { 220 /* trap if INVALIDTRAP enabled */ 221 if (Is_invalidtrap_enabled()) 222 return(OPC_2E_INVALIDEXCEPTION); 223 /* make NaN quiet */ 224 Set_invalidflag(); 225 Dbl_set_quiet(opnd2p1); 226 } 227 /* 228 * is third operand a signaling NaN? 229 */ 230 else if (Dbl_is_signalingnan(opnd3p1)) { 231 /* trap if INVALIDTRAP enabled */ 232 if (Is_invalidtrap_enabled()) 233 return(OPC_2E_INVALIDEXCEPTION); 234 /* make NaN quiet */ 235 Set_invalidflag(); 236 Dbl_set_quiet(opnd3p1); 237 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 238 return(NOEXCEPTION); 239 } 240 /* 241 * return quiet NaN 242 */ 243 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 244 return(NOEXCEPTION); 245 } 246 } 247 248 /* 249 * check third operand for NaN's or infinity 250 */ 251 if (Dbl_isinfinity_exponent(opnd3p1)) { 252 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { 253 /* return infinity */ 254 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 255 return(NOEXCEPTION); 256 } else { 257 /* 258 * is NaN; signaling or quiet? 259 */ 260 if (Dbl_isone_signaling(opnd3p1)) { 261 /* trap if INVALIDTRAP enabled */ 262 if (Is_invalidtrap_enabled()) 263 return(OPC_2E_INVALIDEXCEPTION); 264 /* make NaN quiet */ 265 Set_invalidflag(); 266 Dbl_set_quiet(opnd3p1); 267 } 268 /* 269 * return quiet NaN 270 */ 271 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 272 return(NOEXCEPTION); 273 } 274 } 275 276 /* 277 * Generate multiply mantissa 278 */ 279 if (Dbl_isnotzero_exponent(opnd1p1)) { 280 /* set hidden bit */ 281 Dbl_clear_signexponent_set_hidden(opnd1p1); 282 } 283 else { 284 /* check for zero */ 285 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { 286 /* 287 * Perform the add opnd3 with zero here. 288 */ 289 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { 290 if (Is_rounding_mode(ROUNDMINUS)) { 291 Dbl_or_signs(opnd3p1,resultp1); 292 } else { 293 Dbl_and_signs(opnd3p1,resultp1); 294 } 295 } 296 /* 297 * Now let's check for trapped underflow case. 298 */ 299 else if (Dbl_iszero_exponent(opnd3p1) && 300 Is_underflowtrap_enabled()) { 301 /* need to normalize results mantissa */ 302 sign_save = Dbl_signextendedsign(opnd3p1); 303 result_exponent = 0; 304 Dbl_leftshiftby1(opnd3p1,opnd3p2); 305 Dbl_normalize(opnd3p1,opnd3p2,result_exponent); 306 Dbl_set_sign(opnd3p1,/*using*/sign_save); 307 Dbl_setwrapped_exponent(opnd3p1,result_exponent, 308 unfl); 309 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 310 /* inexact = FALSE */ 311 return(OPC_2E_UNDERFLOWEXCEPTION); 312 } 313 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 314 return(NOEXCEPTION); 315 } 316 /* is denormalized, adjust exponent */ 317 Dbl_clear_signexponent(opnd1p1); 318 Dbl_leftshiftby1(opnd1p1,opnd1p2); 319 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent); 320 } 321 /* opnd2 needs to have hidden bit set with msb in hidden bit */ 322 if (Dbl_isnotzero_exponent(opnd2p1)) { 323 Dbl_clear_signexponent_set_hidden(opnd2p1); 324 } 325 else { 326 /* check for zero */ 327 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { 328 /* 329 * Perform the add opnd3 with zero here. 330 */ 331 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { 332 if (Is_rounding_mode(ROUNDMINUS)) { 333 Dbl_or_signs(opnd3p1,resultp1); 334 } else { 335 Dbl_and_signs(opnd3p1,resultp1); 336 } 337 } 338 /* 339 * Now let's check for trapped underflow case. 340 */ 341 else if (Dbl_iszero_exponent(opnd3p1) && 342 Is_underflowtrap_enabled()) { 343 /* need to normalize results mantissa */ 344 sign_save = Dbl_signextendedsign(opnd3p1); 345 result_exponent = 0; 346 Dbl_leftshiftby1(opnd3p1,opnd3p2); 347 Dbl_normalize(opnd3p1,opnd3p2,result_exponent); 348 Dbl_set_sign(opnd3p1,/*using*/sign_save); 349 Dbl_setwrapped_exponent(opnd3p1,result_exponent, 350 unfl); 351 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 352 /* inexact = FALSE */ 353 return(OPC_2E_UNDERFLOWEXCEPTION); 354 } 355 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 356 return(NOEXCEPTION); 357 } 358 /* is denormalized; want to normalize */ 359 Dbl_clear_signexponent(opnd2p1); 360 Dbl_leftshiftby1(opnd2p1,opnd2p2); 361 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent); 362 } 363 364 /* Multiply the first two source mantissas together */ 365 366 /* 367 * The intermediate result will be kept in tmpres, 368 * which needs enough room for 106 bits of mantissa, 369 * so lets call it a Double extended. 370 */ 371 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 372 373 /* 374 * Four bits at a time are inspected in each loop, and a 375 * simple shift and add multiply algorithm is used. 376 */ 377 for (count = DBL_P-1; count >= 0; count -= 4) { 378 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 379 if (Dbit28p2(opnd1p2)) { 380 /* Fourword_add should be an ADD followed by 3 ADDC's */ 381 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 382 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0); 383 } 384 if (Dbit29p2(opnd1p2)) { 385 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 386 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0); 387 } 388 if (Dbit30p2(opnd1p2)) { 389 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 390 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0); 391 } 392 if (Dbit31p2(opnd1p2)) { 393 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 394 opnd2p1, opnd2p2, 0, 0); 395 } 396 Dbl_rightshiftby4(opnd1p1,opnd1p2); 397 } 398 if (Is_dexthiddenoverflow(tmpresp1)) { 399 /* result mantissa >= 2 (mantissa overflow) */ 400 mpy_exponent++; 401 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 402 } 403 404 /* 405 * Restore the sign of the mpy result which was saved in resultp1. 406 * The exponent will continue to be kept in mpy_exponent. 407 */ 408 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1)); 409 410 /* 411 * No rounding is required, since the result of the multiply 412 * is exact in the extended format. 413 */ 414 415 /* 416 * Now we are ready to perform the add portion of the operation. 417 * 418 * The exponents need to be kept as integers for now, since the 419 * multiply result might not fit into the exponent field. We 420 * can't overflow or underflow because of this yet, since the 421 * add could bring the final result back into range. 422 */ 423 add_exponent = Dbl_exponent(opnd3p1); 424 425 /* 426 * Check for denormalized or zero add operand. 427 */ 428 if (add_exponent == 0) { 429 /* check for zero */ 430 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { 431 /* right is zero */ 432 /* Left can't be zero and must be result. 433 * 434 * The final result is now in tmpres and mpy_exponent, 435 * and needs to be rounded and squeezed back into 436 * double precision format from double extended. 437 */ 438 result_exponent = mpy_exponent; 439 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 440 resultp1,resultp2,resultp3,resultp4); 441 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/ 442 goto round; 443 } 444 445 /* 446 * Neither are zeroes. 447 * Adjust exponent and normalize add operand. 448 */ 449 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */ 450 Dbl_clear_signexponent(opnd3p1); 451 Dbl_leftshiftby1(opnd3p1,opnd3p2); 452 Dbl_normalize(opnd3p1,opnd3p2,add_exponent); 453 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */ 454 } else { 455 Dbl_clear_exponent_set_hidden(opnd3p1); 456 } 457 /* 458 * Copy opnd3 to the double extended variable called right. 459 */ 460 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4); 461 462 /* 463 * A zero "save" helps discover equal operands (for later), 464 * and is used in swapping operands (if needed). 465 */ 466 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save); 467 468 /* 469 * Compare magnitude of operands. 470 */ 471 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1); 472 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1); 473 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && 474 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){ 475 /* 476 * Set the left operand to the larger one by XOR swap. 477 * First finish the first word "save". 478 */ 479 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1); 480 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); 481 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4, 482 rightp2,rightp3,rightp4); 483 /* also setup exponents used in rest of routine */ 484 diff_exponent = add_exponent - mpy_exponent; 485 result_exponent = add_exponent; 486 } else { 487 /* also setup exponents used in rest of routine */ 488 diff_exponent = mpy_exponent - add_exponent; 489 result_exponent = mpy_exponent; 490 } 491 /* Invariant: left is not smaller than right. */ 492 493 /* 494 * Special case alignment of operands that would force alignment 495 * beyond the extent of the extension. A further optimization 496 * could special case this but only reduces the path length for 497 * this infrequent case. 498 */ 499 if (diff_exponent > DBLEXT_THRESHOLD) { 500 diff_exponent = DBLEXT_THRESHOLD; 501 } 502 503 /* Align right operand by shifting it to the right */ 504 Dblext_clear_sign(rightp1); 505 Dblext_right_align(rightp1,rightp2,rightp3,rightp4, 506 /*shifted by*/diff_exponent); 507 508 /* Treat sum and difference of the operands separately. */ 509 if ((int)save < 0) { 510 /* 511 * Difference of the two operands. Overflow can occur if the 512 * multiply overflowed. A borrow can occur out of the hidden 513 * bit and force a post normalization phase. 514 */ 515 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 516 rightp1,rightp2,rightp3,rightp4, 517 resultp1,resultp2,resultp3,resultp4); 518 sign_save = Dbl_signextendedsign(resultp1); 519 if (Dbl_iszero_hidden(resultp1)) { 520 /* Handle normalization */ 521 /* A straightforward algorithm would now shift the 522 * result and extension left until the hidden bit 523 * becomes one. Not all of the extension bits need 524 * participate in the shift. Only the two most 525 * significant bits (round and guard) are needed. 526 * If only a single shift is needed then the guard 527 * bit becomes a significant low order bit and the 528 * extension must participate in the rounding. 529 * If more than a single shift is needed, then all 530 * bits to the right of the guard bit are zeros, 531 * and the guard bit may or may not be zero. */ 532 Dblext_leftshiftby1(resultp1,resultp2,resultp3, 533 resultp4); 534 535 /* Need to check for a zero result. The sign and 536 * exponent fields have already been zeroed. The more 537 * efficient test of the full object can be used. 538 */ 539 if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){ 540 /* Must have been "x-x" or "x+(-x)". */ 541 if (Is_rounding_mode(ROUNDMINUS)) 542 Dbl_setone_sign(resultp1); 543 Dbl_copytoptr(resultp1,resultp2,dstptr); 544 return(NOEXCEPTION); 545 } 546 result_exponent--; 547 548 /* Look to see if normalization is finished. */ 549 if (Dbl_isone_hidden(resultp1)) { 550 /* No further normalization is needed */ 551 goto round; 552 } 553 554 /* Discover first one bit to determine shift amount. 555 * Use a modified binary search. We have already 556 * shifted the result one position right and still 557 * not found a one so the remainder of the extension 558 * must be zero and simplifies rounding. */ 559 /* Scan bytes */ 560 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) { 561 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4); 562 result_exponent -= 8; 563 } 564 /* Now narrow it down to the nibble */ 565 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) { 566 /* The lower nibble contains the 567 * normalizing one */ 568 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4); 569 result_exponent -= 4; 570 } 571 /* Select case where first bit is set (already 572 * normalized) otherwise select the proper shift. */ 573 jumpsize = Dbl_hiddenhigh3mantissa(resultp1); 574 if (jumpsize <= 7) switch(jumpsize) { 575 case 1: 576 Dblext_leftshiftby3(resultp1,resultp2,resultp3, 577 resultp4); 578 result_exponent -= 3; 579 break; 580 case 2: 581 case 3: 582 Dblext_leftshiftby2(resultp1,resultp2,resultp3, 583 resultp4); 584 result_exponent -= 2; 585 break; 586 case 4: 587 case 5: 588 case 6: 589 case 7: 590 Dblext_leftshiftby1(resultp1,resultp2,resultp3, 591 resultp4); 592 result_exponent -= 1; 593 break; 594 } 595 } /* end if (hidden...)... */ 596 /* Fall through and round */ 597 } /* end if (save < 0)... */ 598 else { 599 /* Add magnitudes */ 600 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 601 rightp1,rightp2,rightp3,rightp4, 602 /*to*/resultp1,resultp2,resultp3,resultp4); 603 sign_save = Dbl_signextendedsign(resultp1); 604 if (Dbl_isone_hiddenoverflow(resultp1)) { 605 /* Prenormalization required. */ 606 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3, 607 resultp4); 608 result_exponent++; 609 } /* end if hiddenoverflow... */ 610 } /* end else ...add magnitudes... */ 611 612 /* Round the result. If the extension and lower two words are 613 * all zeros, then the result is exact. Otherwise round in the 614 * correct direction. Underflow is possible. If a postnormalization 615 * is necessary, then the mantissa is all zeros so no shift is needed. 616 */ 617 round: 618 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { 619 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4, 620 result_exponent,is_tiny); 621 } 622 Dbl_set_sign(resultp1,/*using*/sign_save); 623 if (Dblext_isnotzero_mantissap3(resultp3) || 624 Dblext_isnotzero_mantissap4(resultp4)) { 625 inexact = TRUE; 626 switch(Rounding_mode()) { 627 case ROUNDNEAREST: /* The default. */ 628 if (Dblext_isone_highp3(resultp3)) { 629 /* at least 1/2 ulp */ 630 if (Dblext_isnotzero_low31p3(resultp3) || 631 Dblext_isnotzero_mantissap4(resultp4) || 632 Dblext_isone_lowp2(resultp2)) { 633 /* either exactly half way and odd or 634 * more than 1/2ulp */ 635 Dbl_increment(resultp1,resultp2); 636 } 637 } 638 break; 639 640 case ROUNDPLUS: 641 if (Dbl_iszero_sign(resultp1)) { 642 /* Round up positive results */ 643 Dbl_increment(resultp1,resultp2); 644 } 645 break; 646 647 case ROUNDMINUS: 648 if (Dbl_isone_sign(resultp1)) { 649 /* Round down negative results */ 650 Dbl_increment(resultp1,resultp2); 651 } 652 653 case ROUNDZERO:; 654 /* truncate is simple */ 655 } /* end switch... */ 656 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++; 657 } 658 if (result_exponent >= DBL_INFINITY_EXPONENT) { 659 /* trap if OVERFLOWTRAP enabled */ 660 if (Is_overflowtrap_enabled()) { 661 /* 662 * Adjust bias of result 663 */ 664 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl); 665 Dbl_copytoptr(resultp1,resultp2,dstptr); 666 if (inexact) 667 if (Is_inexacttrap_enabled()) 668 return (OPC_2E_OVERFLOWEXCEPTION | 669 OPC_2E_INEXACTEXCEPTION); 670 else Set_inexactflag(); 671 return (OPC_2E_OVERFLOWEXCEPTION); 672 } 673 inexact = TRUE; 674 Set_overflowflag(); 675 /* set result to infinity or largest number */ 676 Dbl_setoverflow(resultp1,resultp2); 677 678 } else if (result_exponent <= 0) { /* underflow case */ 679 if (Is_underflowtrap_enabled()) { 680 /* 681 * Adjust bias of result 682 */ 683 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); 684 Dbl_copytoptr(resultp1,resultp2,dstptr); 685 if (inexact) 686 if (Is_inexacttrap_enabled()) 687 return (OPC_2E_UNDERFLOWEXCEPTION | 688 OPC_2E_INEXACTEXCEPTION); 689 else Set_inexactflag(); 690 return(OPC_2E_UNDERFLOWEXCEPTION); 691 } 692 else if (inexact && is_tiny) Set_underflowflag(); 693 } 694 else Dbl_set_exponent(resultp1,result_exponent); 695 Dbl_copytoptr(resultp1,resultp2,dstptr); 696 if (inexact) 697 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); 698 else Set_inexactflag(); 699 return(NOEXCEPTION); 700 } 701 702 /* 703 * Double Floating-point Multiply Negate Fused Add 704 */ 705 706 dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 707 708 dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; 709 unsigned int *status; 710 { 711 unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2; 712 register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4; 713 unsigned int rightp1, rightp2, rightp3, rightp4; 714 unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0; 715 register int mpy_exponent, add_exponent, count; 716 boolean inexact = FALSE, is_tiny = FALSE; 717 718 unsigned int signlessleft1, signlessright1, save; 719 register int result_exponent, diff_exponent; 720 int sign_save, jumpsize; 721 722 Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2); 723 Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2); 724 Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2); 725 726 /* 727 * set sign bit of result of multiply 728 */ 729 if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 730 Dbl_setzerop1(resultp1); 731 else 732 Dbl_setnegativezerop1(resultp1); 733 734 /* 735 * Generate multiply exponent 736 */ 737 mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS; 738 739 /* 740 * check first operand for NaN's or infinity 741 */ 742 if (Dbl_isinfinity_exponent(opnd1p1)) { 743 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { 744 if (Dbl_isnotnan(opnd2p1,opnd2p2) && 745 Dbl_isnotnan(opnd3p1,opnd3p2)) { 746 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) { 747 /* 748 * invalid since operands are infinity 749 * and zero 750 */ 751 if (Is_invalidtrap_enabled()) 752 return(OPC_2E_INVALIDEXCEPTION); 753 Set_invalidflag(); 754 Dbl_makequietnan(resultp1,resultp2); 755 Dbl_copytoptr(resultp1,resultp2,dstptr); 756 return(NOEXCEPTION); 757 } 758 /* 759 * Check third operand for infinity with a 760 * sign opposite of the multiply result 761 */ 762 if (Dbl_isinfinity(opnd3p1,opnd3p2) && 763 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { 764 /* 765 * invalid since attempting a magnitude 766 * subtraction of infinities 767 */ 768 if (Is_invalidtrap_enabled()) 769 return(OPC_2E_INVALIDEXCEPTION); 770 Set_invalidflag(); 771 Dbl_makequietnan(resultp1,resultp2); 772 Dbl_copytoptr(resultp1,resultp2,dstptr); 773 return(NOEXCEPTION); 774 } 775 776 /* 777 * return infinity 778 */ 779 Dbl_setinfinity_exponentmantissa(resultp1,resultp2); 780 Dbl_copytoptr(resultp1,resultp2,dstptr); 781 return(NOEXCEPTION); 782 } 783 } 784 else { 785 /* 786 * is NaN; signaling or quiet? 787 */ 788 if (Dbl_isone_signaling(opnd1p1)) { 789 /* trap if INVALIDTRAP enabled */ 790 if (Is_invalidtrap_enabled()) 791 return(OPC_2E_INVALIDEXCEPTION); 792 /* make NaN quiet */ 793 Set_invalidflag(); 794 Dbl_set_quiet(opnd1p1); 795 } 796 /* 797 * is second operand a signaling NaN? 798 */ 799 else if (Dbl_is_signalingnan(opnd2p1)) { 800 /* trap if INVALIDTRAP enabled */ 801 if (Is_invalidtrap_enabled()) 802 return(OPC_2E_INVALIDEXCEPTION); 803 /* make NaN quiet */ 804 Set_invalidflag(); 805 Dbl_set_quiet(opnd2p1); 806 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 807 return(NOEXCEPTION); 808 } 809 /* 810 * is third operand a signaling NaN? 811 */ 812 else if (Dbl_is_signalingnan(opnd3p1)) { 813 /* trap if INVALIDTRAP enabled */ 814 if (Is_invalidtrap_enabled()) 815 return(OPC_2E_INVALIDEXCEPTION); 816 /* make NaN quiet */ 817 Set_invalidflag(); 818 Dbl_set_quiet(opnd3p1); 819 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 820 return(NOEXCEPTION); 821 } 822 /* 823 * return quiet NaN 824 */ 825 Dbl_copytoptr(opnd1p1,opnd1p2,dstptr); 826 return(NOEXCEPTION); 827 } 828 } 829 830 /* 831 * check second operand for NaN's or infinity 832 */ 833 if (Dbl_isinfinity_exponent(opnd2p1)) { 834 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { 835 if (Dbl_isnotnan(opnd3p1,opnd3p2)) { 836 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) { 837 /* 838 * invalid since multiply operands are 839 * zero & infinity 840 */ 841 if (Is_invalidtrap_enabled()) 842 return(OPC_2E_INVALIDEXCEPTION); 843 Set_invalidflag(); 844 Dbl_makequietnan(opnd2p1,opnd2p2); 845 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 846 return(NOEXCEPTION); 847 } 848 849 /* 850 * Check third operand for infinity with a 851 * sign opposite of the multiply result 852 */ 853 if (Dbl_isinfinity(opnd3p1,opnd3p2) && 854 (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) { 855 /* 856 * invalid since attempting a magnitude 857 * subtraction of infinities 858 */ 859 if (Is_invalidtrap_enabled()) 860 return(OPC_2E_INVALIDEXCEPTION); 861 Set_invalidflag(); 862 Dbl_makequietnan(resultp1,resultp2); 863 Dbl_copytoptr(resultp1,resultp2,dstptr); 864 return(NOEXCEPTION); 865 } 866 867 /* 868 * return infinity 869 */ 870 Dbl_setinfinity_exponentmantissa(resultp1,resultp2); 871 Dbl_copytoptr(resultp1,resultp2,dstptr); 872 return(NOEXCEPTION); 873 } 874 } 875 else { 876 /* 877 * is NaN; signaling or quiet? 878 */ 879 if (Dbl_isone_signaling(opnd2p1)) { 880 /* trap if INVALIDTRAP enabled */ 881 if (Is_invalidtrap_enabled()) 882 return(OPC_2E_INVALIDEXCEPTION); 883 /* make NaN quiet */ 884 Set_invalidflag(); 885 Dbl_set_quiet(opnd2p1); 886 } 887 /* 888 * is third operand a signaling NaN? 889 */ 890 else if (Dbl_is_signalingnan(opnd3p1)) { 891 /* trap if INVALIDTRAP enabled */ 892 if (Is_invalidtrap_enabled()) 893 return(OPC_2E_INVALIDEXCEPTION); 894 /* make NaN quiet */ 895 Set_invalidflag(); 896 Dbl_set_quiet(opnd3p1); 897 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 898 return(NOEXCEPTION); 899 } 900 /* 901 * return quiet NaN 902 */ 903 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr); 904 return(NOEXCEPTION); 905 } 906 } 907 908 /* 909 * check third operand for NaN's or infinity 910 */ 911 if (Dbl_isinfinity_exponent(opnd3p1)) { 912 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { 913 /* return infinity */ 914 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 915 return(NOEXCEPTION); 916 } else { 917 /* 918 * is NaN; signaling or quiet? 919 */ 920 if (Dbl_isone_signaling(opnd3p1)) { 921 /* trap if INVALIDTRAP enabled */ 922 if (Is_invalidtrap_enabled()) 923 return(OPC_2E_INVALIDEXCEPTION); 924 /* make NaN quiet */ 925 Set_invalidflag(); 926 Dbl_set_quiet(opnd3p1); 927 } 928 /* 929 * return quiet NaN 930 */ 931 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 932 return(NOEXCEPTION); 933 } 934 } 935 936 /* 937 * Generate multiply mantissa 938 */ 939 if (Dbl_isnotzero_exponent(opnd1p1)) { 940 /* set hidden bit */ 941 Dbl_clear_signexponent_set_hidden(opnd1p1); 942 } 943 else { 944 /* check for zero */ 945 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) { 946 /* 947 * Perform the add opnd3 with zero here. 948 */ 949 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { 950 if (Is_rounding_mode(ROUNDMINUS)) { 951 Dbl_or_signs(opnd3p1,resultp1); 952 } else { 953 Dbl_and_signs(opnd3p1,resultp1); 954 } 955 } 956 /* 957 * Now let's check for trapped underflow case. 958 */ 959 else if (Dbl_iszero_exponent(opnd3p1) && 960 Is_underflowtrap_enabled()) { 961 /* need to normalize results mantissa */ 962 sign_save = Dbl_signextendedsign(opnd3p1); 963 result_exponent = 0; 964 Dbl_leftshiftby1(opnd3p1,opnd3p2); 965 Dbl_normalize(opnd3p1,opnd3p2,result_exponent); 966 Dbl_set_sign(opnd3p1,/*using*/sign_save); 967 Dbl_setwrapped_exponent(opnd3p1,result_exponent, 968 unfl); 969 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 970 /* inexact = FALSE */ 971 return(OPC_2E_UNDERFLOWEXCEPTION); 972 } 973 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 974 return(NOEXCEPTION); 975 } 976 /* is denormalized, adjust exponent */ 977 Dbl_clear_signexponent(opnd1p1); 978 Dbl_leftshiftby1(opnd1p1,opnd1p2); 979 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent); 980 } 981 /* opnd2 needs to have hidden bit set with msb in hidden bit */ 982 if (Dbl_isnotzero_exponent(opnd2p1)) { 983 Dbl_clear_signexponent_set_hidden(opnd2p1); 984 } 985 else { 986 /* check for zero */ 987 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) { 988 /* 989 * Perform the add opnd3 with zero here. 990 */ 991 if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) { 992 if (Is_rounding_mode(ROUNDMINUS)) { 993 Dbl_or_signs(opnd3p1,resultp1); 994 } else { 995 Dbl_and_signs(opnd3p1,resultp1); 996 } 997 } 998 /* 999 * Now let's check for trapped underflow case. 1000 */ 1001 else if (Dbl_iszero_exponent(opnd3p1) && 1002 Is_underflowtrap_enabled()) { 1003 /* need to normalize results mantissa */ 1004 sign_save = Dbl_signextendedsign(opnd3p1); 1005 result_exponent = 0; 1006 Dbl_leftshiftby1(opnd3p1,opnd3p2); 1007 Dbl_normalize(opnd3p1,opnd3p2,result_exponent); 1008 Dbl_set_sign(opnd3p1,/*using*/sign_save); 1009 Dbl_setwrapped_exponent(opnd3p1,result_exponent, 1010 unfl); 1011 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 1012 /* inexact = FALSE */ 1013 return(OPC_2E_UNDERFLOWEXCEPTION); 1014 } 1015 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr); 1016 return(NOEXCEPTION); 1017 } 1018 /* is denormalized; want to normalize */ 1019 Dbl_clear_signexponent(opnd2p1); 1020 Dbl_leftshiftby1(opnd2p1,opnd2p2); 1021 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent); 1022 } 1023 1024 /* Multiply the first two source mantissas together */ 1025 1026 /* 1027 * The intermediate result will be kept in tmpres, 1028 * which needs enough room for 106 bits of mantissa, 1029 * so lets call it a Double extended. 1030 */ 1031 Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 1032 1033 /* 1034 * Four bits at a time are inspected in each loop, and a 1035 * simple shift and add multiply algorithm is used. 1036 */ 1037 for (count = DBL_P-1; count >= 0; count -= 4) { 1038 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 1039 if (Dbit28p2(opnd1p2)) { 1040 /* Fourword_add should be an ADD followed by 3 ADDC's */ 1041 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 1042 opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0); 1043 } 1044 if (Dbit29p2(opnd1p2)) { 1045 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 1046 opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0); 1047 } 1048 if (Dbit30p2(opnd1p2)) { 1049 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 1050 opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0); 1051 } 1052 if (Dbit31p2(opnd1p2)) { 1053 Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 1054 opnd2p1, opnd2p2, 0, 0); 1055 } 1056 Dbl_rightshiftby4(opnd1p1,opnd1p2); 1057 } 1058 if (Is_dexthiddenoverflow(tmpresp1)) { 1059 /* result mantissa >= 2 (mantissa overflow) */ 1060 mpy_exponent++; 1061 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4); 1062 } 1063 1064 /* 1065 * Restore the sign of the mpy result which was saved in resultp1. 1066 * The exponent will continue to be kept in mpy_exponent. 1067 */ 1068 Dblext_set_sign(tmpresp1,Dbl_sign(resultp1)); 1069 1070 /* 1071 * No rounding is required, since the result of the multiply 1072 * is exact in the extended format. 1073 */ 1074 1075 /* 1076 * Now we are ready to perform the add portion of the operation. 1077 * 1078 * The exponents need to be kept as integers for now, since the 1079 * multiply result might not fit into the exponent field. We 1080 * can't overflow or underflow because of this yet, since the 1081 * add could bring the final result back into range. 1082 */ 1083 add_exponent = Dbl_exponent(opnd3p1); 1084 1085 /* 1086 * Check for denormalized or zero add operand. 1087 */ 1088 if (add_exponent == 0) { 1089 /* check for zero */ 1090 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) { 1091 /* right is zero */ 1092 /* Left can't be zero and must be result. 1093 * 1094 * The final result is now in tmpres and mpy_exponent, 1095 * and needs to be rounded and squeezed back into 1096 * double precision format from double extended. 1097 */ 1098 result_exponent = mpy_exponent; 1099 Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 1100 resultp1,resultp2,resultp3,resultp4); 1101 sign_save = Dbl_signextendedsign(resultp1);/*save sign*/ 1102 goto round; 1103 } 1104 1105 /* 1106 * Neither are zeroes. 1107 * Adjust exponent and normalize add operand. 1108 */ 1109 sign_save = Dbl_signextendedsign(opnd3p1); /* save sign */ 1110 Dbl_clear_signexponent(opnd3p1); 1111 Dbl_leftshiftby1(opnd3p1,opnd3p2); 1112 Dbl_normalize(opnd3p1,opnd3p2,add_exponent); 1113 Dbl_set_sign(opnd3p1,sign_save); /* restore sign */ 1114 } else { 1115 Dbl_clear_exponent_set_hidden(opnd3p1); 1116 } 1117 /* 1118 * Copy opnd3 to the double extended variable called right. 1119 */ 1120 Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4); 1121 1122 /* 1123 * A zero "save" helps discover equal operands (for later), 1124 * and is used in swapping operands (if needed). 1125 */ 1126 Dblext_xortointp1(tmpresp1,rightp1,/*to*/save); 1127 1128 /* 1129 * Compare magnitude of operands. 1130 */ 1131 Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1); 1132 Dblext_copytoint_exponentmantissap1(rightp1,signlessright1); 1133 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && 1134 Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){ 1135 /* 1136 * Set the left operand to the larger one by XOR swap. 1137 * First finish the first word "save". 1138 */ 1139 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1); 1140 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); 1141 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4, 1142 rightp2,rightp3,rightp4); 1143 /* also setup exponents used in rest of routine */ 1144 diff_exponent = add_exponent - mpy_exponent; 1145 result_exponent = add_exponent; 1146 } else { 1147 /* also setup exponents used in rest of routine */ 1148 diff_exponent = mpy_exponent - add_exponent; 1149 result_exponent = mpy_exponent; 1150 } 1151 /* Invariant: left is not smaller than right. */ 1152 1153 /* 1154 * Special case alignment of operands that would force alignment 1155 * beyond the extent of the extension. A further optimization 1156 * could special case this but only reduces the path length for 1157 * this infrequent case. 1158 */ 1159 if (diff_exponent > DBLEXT_THRESHOLD) { 1160 diff_exponent = DBLEXT_THRESHOLD; 1161 } 1162 1163 /* Align right operand by shifting it to the right */ 1164 Dblext_clear_sign(rightp1); 1165 Dblext_right_align(rightp1,rightp2,rightp3,rightp4, 1166 /*shifted by*/diff_exponent); 1167 1168 /* Treat sum and difference of the operands separately. */ 1169 if ((int)save < 0) { 1170 /* 1171 * Difference of the two operands. Overflow can occur if the 1172 * multiply overflowed. A borrow can occur out of the hidden 1173 * bit and force a post normalization phase. 1174 */ 1175 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 1176 rightp1,rightp2,rightp3,rightp4, 1177 resultp1,resultp2,resultp3,resultp4); 1178 sign_save = Dbl_signextendedsign(resultp1); 1179 if (Dbl_iszero_hidden(resultp1)) { 1180 /* Handle normalization */ 1181 /* A straightforward algorithm would now shift the 1182 * result and extension left until the hidden bit 1183 * becomes one. Not all of the extension bits need 1184 * participate in the shift. Only the two most 1185 * significant bits (round and guard) are needed. 1186 * If only a single shift is needed then the guard 1187 * bit becomes a significant low order bit and the 1188 * extension must participate in the rounding. 1189 * If more than a single shift is needed, then all 1190 * bits to the right of the guard bit are zeros, 1191 * and the guard bit may or may not be zero. */ 1192 Dblext_leftshiftby1(resultp1,resultp2,resultp3, 1193 resultp4); 1194 1195 /* Need to check for a zero result. The sign and 1196 * exponent fields have already been zeroed. The more 1197 * efficient test of the full object can be used. 1198 */ 1199 if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) { 1200 /* Must have been "x-x" or "x+(-x)". */ 1201 if (Is_rounding_mode(ROUNDMINUS)) 1202 Dbl_setone_sign(resultp1); 1203 Dbl_copytoptr(resultp1,resultp2,dstptr); 1204 return(NOEXCEPTION); 1205 } 1206 result_exponent--; 1207 1208 /* Look to see if normalization is finished. */ 1209 if (Dbl_isone_hidden(resultp1)) { 1210 /* No further normalization is needed */ 1211 goto round; 1212 } 1213 1214 /* Discover first one bit to determine shift amount. 1215 * Use a modified binary search. We have already 1216 * shifted the result one position right and still 1217 * not found a one so the remainder of the extension 1218 * must be zero and simplifies rounding. */ 1219 /* Scan bytes */ 1220 while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) { 1221 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4); 1222 result_exponent -= 8; 1223 } 1224 /* Now narrow it down to the nibble */ 1225 if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) { 1226 /* The lower nibble contains the 1227 * normalizing one */ 1228 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4); 1229 result_exponent -= 4; 1230 } 1231 /* Select case where first bit is set (already 1232 * normalized) otherwise select the proper shift. */ 1233 jumpsize = Dbl_hiddenhigh3mantissa(resultp1); 1234 if (jumpsize <= 7) switch(jumpsize) { 1235 case 1: 1236 Dblext_leftshiftby3(resultp1,resultp2,resultp3, 1237 resultp4); 1238 result_exponent -= 3; 1239 break; 1240 case 2: 1241 case 3: 1242 Dblext_leftshiftby2(resultp1,resultp2,resultp3, 1243 resultp4); 1244 result_exponent -= 2; 1245 break; 1246 case 4: 1247 case 5: 1248 case 6: 1249 case 7: 1250 Dblext_leftshiftby1(resultp1,resultp2,resultp3, 1251 resultp4); 1252 result_exponent -= 1; 1253 break; 1254 } 1255 } /* end if (hidden...)... */ 1256 /* Fall through and round */ 1257 } /* end if (save < 0)... */ 1258 else { 1259 /* Add magnitudes */ 1260 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4, 1261 rightp1,rightp2,rightp3,rightp4, 1262 /*to*/resultp1,resultp2,resultp3,resultp4); 1263 sign_save = Dbl_signextendedsign(resultp1); 1264 if (Dbl_isone_hiddenoverflow(resultp1)) { 1265 /* Prenormalization required. */ 1266 Dblext_arithrightshiftby1(resultp1,resultp2,resultp3, 1267 resultp4); 1268 result_exponent++; 1269 } /* end if hiddenoverflow... */ 1270 } /* end else ...add magnitudes... */ 1271 1272 /* Round the result. If the extension and lower two words are 1273 * all zeros, then the result is exact. Otherwise round in the 1274 * correct direction. Underflow is possible. If a postnormalization 1275 * is necessary, then the mantissa is all zeros so no shift is needed. 1276 */ 1277 round: 1278 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { 1279 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4, 1280 result_exponent,is_tiny); 1281 } 1282 Dbl_set_sign(resultp1,/*using*/sign_save); 1283 if (Dblext_isnotzero_mantissap3(resultp3) || 1284 Dblext_isnotzero_mantissap4(resultp4)) { 1285 inexact = TRUE; 1286 switch(Rounding_mode()) { 1287 case ROUNDNEAREST: /* The default. */ 1288 if (Dblext_isone_highp3(resultp3)) { 1289 /* at least 1/2 ulp */ 1290 if (Dblext_isnotzero_low31p3(resultp3) || 1291 Dblext_isnotzero_mantissap4(resultp4) || 1292 Dblext_isone_lowp2(resultp2)) { 1293 /* either exactly half way and odd or 1294 * more than 1/2ulp */ 1295 Dbl_increment(resultp1,resultp2); 1296 } 1297 } 1298 break; 1299 1300 case ROUNDPLUS: 1301 if (Dbl_iszero_sign(resultp1)) { 1302 /* Round up positive results */ 1303 Dbl_increment(resultp1,resultp2); 1304 } 1305 break; 1306 1307 case ROUNDMINUS: 1308 if (Dbl_isone_sign(resultp1)) { 1309 /* Round down negative results */ 1310 Dbl_increment(resultp1,resultp2); 1311 } 1312 1313 case ROUNDZERO:; 1314 /* truncate is simple */ 1315 } /* end switch... */ 1316 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++; 1317 } 1318 if (result_exponent >= DBL_INFINITY_EXPONENT) { 1319 /* Overflow */ 1320 if (Is_overflowtrap_enabled()) { 1321 /* 1322 * Adjust bias of result 1323 */ 1324 Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl); 1325 Dbl_copytoptr(resultp1,resultp2,dstptr); 1326 if (inexact) 1327 if (Is_inexacttrap_enabled()) 1328 return (OPC_2E_OVERFLOWEXCEPTION | 1329 OPC_2E_INEXACTEXCEPTION); 1330 else Set_inexactflag(); 1331 return (OPC_2E_OVERFLOWEXCEPTION); 1332 } 1333 inexact = TRUE; 1334 Set_overflowflag(); 1335 Dbl_setoverflow(resultp1,resultp2); 1336 } else if (result_exponent <= 0) { /* underflow case */ 1337 if (Is_underflowtrap_enabled()) { 1338 /* 1339 * Adjust bias of result 1340 */ 1341 Dbl_setwrapped_exponent(resultp1,result_exponent,unfl); 1342 Dbl_copytoptr(resultp1,resultp2,dstptr); 1343 if (inexact) 1344 if (Is_inexacttrap_enabled()) 1345 return (OPC_2E_UNDERFLOWEXCEPTION | 1346 OPC_2E_INEXACTEXCEPTION); 1347 else Set_inexactflag(); 1348 return(OPC_2E_UNDERFLOWEXCEPTION); 1349 } 1350 else if (inexact && is_tiny) Set_underflowflag(); 1351 } 1352 else Dbl_set_exponent(resultp1,result_exponent); 1353 Dbl_copytoptr(resultp1,resultp2,dstptr); 1354 if (inexact) 1355 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); 1356 else Set_inexactflag(); 1357 return(NOEXCEPTION); 1358 } 1359 1360 /* 1361 * Single Floating-point Multiply Fused Add 1362 */ 1363 1364 sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 1365 1366 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; 1367 unsigned int *status; 1368 { 1369 unsigned int opnd1, opnd2, opnd3; 1370 register unsigned int tmpresp1, tmpresp2; 1371 unsigned int rightp1, rightp2; 1372 unsigned int resultp1, resultp2 = 0; 1373 register int mpy_exponent, add_exponent, count; 1374 boolean inexact = FALSE, is_tiny = FALSE; 1375 1376 unsigned int signlessleft1, signlessright1, save; 1377 register int result_exponent, diff_exponent; 1378 int sign_save, jumpsize; 1379 1380 Sgl_copyfromptr(src1ptr,opnd1); 1381 Sgl_copyfromptr(src2ptr,opnd2); 1382 Sgl_copyfromptr(src3ptr,opnd3); 1383 1384 /* 1385 * set sign bit of result of multiply 1386 */ 1387 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 1388 Sgl_setnegativezero(resultp1); 1389 else Sgl_setzero(resultp1); 1390 1391 /* 1392 * Generate multiply exponent 1393 */ 1394 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS; 1395 1396 /* 1397 * check first operand for NaN's or infinity 1398 */ 1399 if (Sgl_isinfinity_exponent(opnd1)) { 1400 if (Sgl_iszero_mantissa(opnd1)) { 1401 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) { 1402 if (Sgl_iszero_exponentmantissa(opnd2)) { 1403 /* 1404 * invalid since operands are infinity 1405 * and zero 1406 */ 1407 if (Is_invalidtrap_enabled()) 1408 return(OPC_2E_INVALIDEXCEPTION); 1409 Set_invalidflag(); 1410 Sgl_makequietnan(resultp1); 1411 Sgl_copytoptr(resultp1,dstptr); 1412 return(NOEXCEPTION); 1413 } 1414 /* 1415 * Check third operand for infinity with a 1416 * sign opposite of the multiply result 1417 */ 1418 if (Sgl_isinfinity(opnd3) && 1419 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { 1420 /* 1421 * invalid since attempting a magnitude 1422 * subtraction of infinities 1423 */ 1424 if (Is_invalidtrap_enabled()) 1425 return(OPC_2E_INVALIDEXCEPTION); 1426 Set_invalidflag(); 1427 Sgl_makequietnan(resultp1); 1428 Sgl_copytoptr(resultp1,dstptr); 1429 return(NOEXCEPTION); 1430 } 1431 1432 /* 1433 * return infinity 1434 */ 1435 Sgl_setinfinity_exponentmantissa(resultp1); 1436 Sgl_copytoptr(resultp1,dstptr); 1437 return(NOEXCEPTION); 1438 } 1439 } 1440 else { 1441 /* 1442 * is NaN; signaling or quiet? 1443 */ 1444 if (Sgl_isone_signaling(opnd1)) { 1445 /* trap if INVALIDTRAP enabled */ 1446 if (Is_invalidtrap_enabled()) 1447 return(OPC_2E_INVALIDEXCEPTION); 1448 /* make NaN quiet */ 1449 Set_invalidflag(); 1450 Sgl_set_quiet(opnd1); 1451 } 1452 /* 1453 * is second operand a signaling NaN? 1454 */ 1455 else if (Sgl_is_signalingnan(opnd2)) { 1456 /* trap if INVALIDTRAP enabled */ 1457 if (Is_invalidtrap_enabled()) 1458 return(OPC_2E_INVALIDEXCEPTION); 1459 /* make NaN quiet */ 1460 Set_invalidflag(); 1461 Sgl_set_quiet(opnd2); 1462 Sgl_copytoptr(opnd2,dstptr); 1463 return(NOEXCEPTION); 1464 } 1465 /* 1466 * is third operand a signaling NaN? 1467 */ 1468 else if (Sgl_is_signalingnan(opnd3)) { 1469 /* trap if INVALIDTRAP enabled */ 1470 if (Is_invalidtrap_enabled()) 1471 return(OPC_2E_INVALIDEXCEPTION); 1472 /* make NaN quiet */ 1473 Set_invalidflag(); 1474 Sgl_set_quiet(opnd3); 1475 Sgl_copytoptr(opnd3,dstptr); 1476 return(NOEXCEPTION); 1477 } 1478 /* 1479 * return quiet NaN 1480 */ 1481 Sgl_copytoptr(opnd1,dstptr); 1482 return(NOEXCEPTION); 1483 } 1484 } 1485 1486 /* 1487 * check second operand for NaN's or infinity 1488 */ 1489 if (Sgl_isinfinity_exponent(opnd2)) { 1490 if (Sgl_iszero_mantissa(opnd2)) { 1491 if (Sgl_isnotnan(opnd3)) { 1492 if (Sgl_iszero_exponentmantissa(opnd1)) { 1493 /* 1494 * invalid since multiply operands are 1495 * zero & infinity 1496 */ 1497 if (Is_invalidtrap_enabled()) 1498 return(OPC_2E_INVALIDEXCEPTION); 1499 Set_invalidflag(); 1500 Sgl_makequietnan(opnd2); 1501 Sgl_copytoptr(opnd2,dstptr); 1502 return(NOEXCEPTION); 1503 } 1504 1505 /* 1506 * Check third operand for infinity with a 1507 * sign opposite of the multiply result 1508 */ 1509 if (Sgl_isinfinity(opnd3) && 1510 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { 1511 /* 1512 * invalid since attempting a magnitude 1513 * subtraction of infinities 1514 */ 1515 if (Is_invalidtrap_enabled()) 1516 return(OPC_2E_INVALIDEXCEPTION); 1517 Set_invalidflag(); 1518 Sgl_makequietnan(resultp1); 1519 Sgl_copytoptr(resultp1,dstptr); 1520 return(NOEXCEPTION); 1521 } 1522 1523 /* 1524 * return infinity 1525 */ 1526 Sgl_setinfinity_exponentmantissa(resultp1); 1527 Sgl_copytoptr(resultp1,dstptr); 1528 return(NOEXCEPTION); 1529 } 1530 } 1531 else { 1532 /* 1533 * is NaN; signaling or quiet? 1534 */ 1535 if (Sgl_isone_signaling(opnd2)) { 1536 /* trap if INVALIDTRAP enabled */ 1537 if (Is_invalidtrap_enabled()) 1538 return(OPC_2E_INVALIDEXCEPTION); 1539 /* make NaN quiet */ 1540 Set_invalidflag(); 1541 Sgl_set_quiet(opnd2); 1542 } 1543 /* 1544 * is third operand a signaling NaN? 1545 */ 1546 else if (Sgl_is_signalingnan(opnd3)) { 1547 /* trap if INVALIDTRAP enabled */ 1548 if (Is_invalidtrap_enabled()) 1549 return(OPC_2E_INVALIDEXCEPTION); 1550 /* make NaN quiet */ 1551 Set_invalidflag(); 1552 Sgl_set_quiet(opnd3); 1553 Sgl_copytoptr(opnd3,dstptr); 1554 return(NOEXCEPTION); 1555 } 1556 /* 1557 * return quiet NaN 1558 */ 1559 Sgl_copytoptr(opnd2,dstptr); 1560 return(NOEXCEPTION); 1561 } 1562 } 1563 1564 /* 1565 * check third operand for NaN's or infinity 1566 */ 1567 if (Sgl_isinfinity_exponent(opnd3)) { 1568 if (Sgl_iszero_mantissa(opnd3)) { 1569 /* return infinity */ 1570 Sgl_copytoptr(opnd3,dstptr); 1571 return(NOEXCEPTION); 1572 } else { 1573 /* 1574 * is NaN; signaling or quiet? 1575 */ 1576 if (Sgl_isone_signaling(opnd3)) { 1577 /* trap if INVALIDTRAP enabled */ 1578 if (Is_invalidtrap_enabled()) 1579 return(OPC_2E_INVALIDEXCEPTION); 1580 /* make NaN quiet */ 1581 Set_invalidflag(); 1582 Sgl_set_quiet(opnd3); 1583 } 1584 /* 1585 * return quiet NaN 1586 */ 1587 Sgl_copytoptr(opnd3,dstptr); 1588 return(NOEXCEPTION); 1589 } 1590 } 1591 1592 /* 1593 * Generate multiply mantissa 1594 */ 1595 if (Sgl_isnotzero_exponent(opnd1)) { 1596 /* set hidden bit */ 1597 Sgl_clear_signexponent_set_hidden(opnd1); 1598 } 1599 else { 1600 /* check for zero */ 1601 if (Sgl_iszero_mantissa(opnd1)) { 1602 /* 1603 * Perform the add opnd3 with zero here. 1604 */ 1605 if (Sgl_iszero_exponentmantissa(opnd3)) { 1606 if (Is_rounding_mode(ROUNDMINUS)) { 1607 Sgl_or_signs(opnd3,resultp1); 1608 } else { 1609 Sgl_and_signs(opnd3,resultp1); 1610 } 1611 } 1612 /* 1613 * Now let's check for trapped underflow case. 1614 */ 1615 else if (Sgl_iszero_exponent(opnd3) && 1616 Is_underflowtrap_enabled()) { 1617 /* need to normalize results mantissa */ 1618 sign_save = Sgl_signextendedsign(opnd3); 1619 result_exponent = 0; 1620 Sgl_leftshiftby1(opnd3); 1621 Sgl_normalize(opnd3,result_exponent); 1622 Sgl_set_sign(opnd3,/*using*/sign_save); 1623 Sgl_setwrapped_exponent(opnd3,result_exponent, 1624 unfl); 1625 Sgl_copytoptr(opnd3,dstptr); 1626 /* inexact = FALSE */ 1627 return(OPC_2E_UNDERFLOWEXCEPTION); 1628 } 1629 Sgl_copytoptr(opnd3,dstptr); 1630 return(NOEXCEPTION); 1631 } 1632 /* is denormalized, adjust exponent */ 1633 Sgl_clear_signexponent(opnd1); 1634 Sgl_leftshiftby1(opnd1); 1635 Sgl_normalize(opnd1,mpy_exponent); 1636 } 1637 /* opnd2 needs to have hidden bit set with msb in hidden bit */ 1638 if (Sgl_isnotzero_exponent(opnd2)) { 1639 Sgl_clear_signexponent_set_hidden(opnd2); 1640 } 1641 else { 1642 /* check for zero */ 1643 if (Sgl_iszero_mantissa(opnd2)) { 1644 /* 1645 * Perform the add opnd3 with zero here. 1646 */ 1647 if (Sgl_iszero_exponentmantissa(opnd3)) { 1648 if (Is_rounding_mode(ROUNDMINUS)) { 1649 Sgl_or_signs(opnd3,resultp1); 1650 } else { 1651 Sgl_and_signs(opnd3,resultp1); 1652 } 1653 } 1654 /* 1655 * Now let's check for trapped underflow case. 1656 */ 1657 else if (Sgl_iszero_exponent(opnd3) && 1658 Is_underflowtrap_enabled()) { 1659 /* need to normalize results mantissa */ 1660 sign_save = Sgl_signextendedsign(opnd3); 1661 result_exponent = 0; 1662 Sgl_leftshiftby1(opnd3); 1663 Sgl_normalize(opnd3,result_exponent); 1664 Sgl_set_sign(opnd3,/*using*/sign_save); 1665 Sgl_setwrapped_exponent(opnd3,result_exponent, 1666 unfl); 1667 Sgl_copytoptr(opnd3,dstptr); 1668 /* inexact = FALSE */ 1669 return(OPC_2E_UNDERFLOWEXCEPTION); 1670 } 1671 Sgl_copytoptr(opnd3,dstptr); 1672 return(NOEXCEPTION); 1673 } 1674 /* is denormalized; want to normalize */ 1675 Sgl_clear_signexponent(opnd2); 1676 Sgl_leftshiftby1(opnd2); 1677 Sgl_normalize(opnd2,mpy_exponent); 1678 } 1679 1680 /* Multiply the first two source mantissas together */ 1681 1682 /* 1683 * The intermediate result will be kept in tmpres, 1684 * which needs enough room for 106 bits of mantissa, 1685 * so lets call it a Double extended. 1686 */ 1687 Sglext_setzero(tmpresp1,tmpresp2); 1688 1689 /* 1690 * Four bits at a time are inspected in each loop, and a 1691 * simple shift and add multiply algorithm is used. 1692 */ 1693 for (count = SGL_P-1; count >= 0; count -= 4) { 1694 Sglext_rightshiftby4(tmpresp1,tmpresp2); 1695 if (Sbit28(opnd1)) { 1696 /* Twoword_add should be an ADD followed by 2 ADDC's */ 1697 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0); 1698 } 1699 if (Sbit29(opnd1)) { 1700 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0); 1701 } 1702 if (Sbit30(opnd1)) { 1703 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0); 1704 } 1705 if (Sbit31(opnd1)) { 1706 Twoword_add(tmpresp1, tmpresp2, opnd2, 0); 1707 } 1708 Sgl_rightshiftby4(opnd1); 1709 } 1710 if (Is_sexthiddenoverflow(tmpresp1)) { 1711 /* result mantissa >= 2 (mantissa overflow) */ 1712 mpy_exponent++; 1713 Sglext_rightshiftby4(tmpresp1,tmpresp2); 1714 } else { 1715 Sglext_rightshiftby3(tmpresp1,tmpresp2); 1716 } 1717 1718 /* 1719 * Restore the sign of the mpy result which was saved in resultp1. 1720 * The exponent will continue to be kept in mpy_exponent. 1721 */ 1722 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1)); 1723 1724 /* 1725 * No rounding is required, since the result of the multiply 1726 * is exact in the extended format. 1727 */ 1728 1729 /* 1730 * Now we are ready to perform the add portion of the operation. 1731 * 1732 * The exponents need to be kept as integers for now, since the 1733 * multiply result might not fit into the exponent field. We 1734 * can't overflow or underflow because of this yet, since the 1735 * add could bring the final result back into range. 1736 */ 1737 add_exponent = Sgl_exponent(opnd3); 1738 1739 /* 1740 * Check for denormalized or zero add operand. 1741 */ 1742 if (add_exponent == 0) { 1743 /* check for zero */ 1744 if (Sgl_iszero_mantissa(opnd3)) { 1745 /* right is zero */ 1746 /* Left can't be zero and must be result. 1747 * 1748 * The final result is now in tmpres and mpy_exponent, 1749 * and needs to be rounded and squeezed back into 1750 * double precision format from double extended. 1751 */ 1752 result_exponent = mpy_exponent; 1753 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2); 1754 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/ 1755 goto round; 1756 } 1757 1758 /* 1759 * Neither are zeroes. 1760 * Adjust exponent and normalize add operand. 1761 */ 1762 sign_save = Sgl_signextendedsign(opnd3); /* save sign */ 1763 Sgl_clear_signexponent(opnd3); 1764 Sgl_leftshiftby1(opnd3); 1765 Sgl_normalize(opnd3,add_exponent); 1766 Sgl_set_sign(opnd3,sign_save); /* restore sign */ 1767 } else { 1768 Sgl_clear_exponent_set_hidden(opnd3); 1769 } 1770 /* 1771 * Copy opnd3 to the double extended variable called right. 1772 */ 1773 Sgl_copyto_sglext(opnd3,rightp1,rightp2); 1774 1775 /* 1776 * A zero "save" helps discover equal operands (for later), 1777 * and is used in swapping operands (if needed). 1778 */ 1779 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save); 1780 1781 /* 1782 * Compare magnitude of operands. 1783 */ 1784 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1); 1785 Sglext_copytoint_exponentmantissa(rightp1,signlessright1); 1786 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && 1787 Sglext_ismagnitudeless(signlessleft1,signlessright1)) { 1788 /* 1789 * Set the left operand to the larger one by XOR swap. 1790 * First finish the first word "save". 1791 */ 1792 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1); 1793 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); 1794 Sglext_swap_lower(tmpresp2,rightp2); 1795 /* also setup exponents used in rest of routine */ 1796 diff_exponent = add_exponent - mpy_exponent; 1797 result_exponent = add_exponent; 1798 } else { 1799 /* also setup exponents used in rest of routine */ 1800 diff_exponent = mpy_exponent - add_exponent; 1801 result_exponent = mpy_exponent; 1802 } 1803 /* Invariant: left is not smaller than right. */ 1804 1805 /* 1806 * Special case alignment of operands that would force alignment 1807 * beyond the extent of the extension. A further optimization 1808 * could special case this but only reduces the path length for 1809 * this infrequent case. 1810 */ 1811 if (diff_exponent > SGLEXT_THRESHOLD) { 1812 diff_exponent = SGLEXT_THRESHOLD; 1813 } 1814 1815 /* Align right operand by shifting it to the right */ 1816 Sglext_clear_sign(rightp1); 1817 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent); 1818 1819 /* Treat sum and difference of the operands separately. */ 1820 if ((int)save < 0) { 1821 /* 1822 * Difference of the two operands. Overflow can occur if the 1823 * multiply overflowed. A borrow can occur out of the hidden 1824 * bit and force a post normalization phase. 1825 */ 1826 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2, 1827 resultp1,resultp2); 1828 sign_save = Sgl_signextendedsign(resultp1); 1829 if (Sgl_iszero_hidden(resultp1)) { 1830 /* Handle normalization */ 1831 /* A straightforward algorithm would now shift the 1832 * result and extension left until the hidden bit 1833 * becomes one. Not all of the extension bits need 1834 * participate in the shift. Only the two most 1835 * significant bits (round and guard) are needed. 1836 * If only a single shift is needed then the guard 1837 * bit becomes a significant low order bit and the 1838 * extension must participate in the rounding. 1839 * If more than a single shift is needed, then all 1840 * bits to the right of the guard bit are zeros, 1841 * and the guard bit may or may not be zero. */ 1842 Sglext_leftshiftby1(resultp1,resultp2); 1843 1844 /* Need to check for a zero result. The sign and 1845 * exponent fields have already been zeroed. The more 1846 * efficient test of the full object can be used. 1847 */ 1848 if (Sglext_iszero(resultp1,resultp2)) { 1849 /* Must have been "x-x" or "x+(-x)". */ 1850 if (Is_rounding_mode(ROUNDMINUS)) 1851 Sgl_setone_sign(resultp1); 1852 Sgl_copytoptr(resultp1,dstptr); 1853 return(NOEXCEPTION); 1854 } 1855 result_exponent--; 1856 1857 /* Look to see if normalization is finished. */ 1858 if (Sgl_isone_hidden(resultp1)) { 1859 /* No further normalization is needed */ 1860 goto round; 1861 } 1862 1863 /* Discover first one bit to determine shift amount. 1864 * Use a modified binary search. We have already 1865 * shifted the result one position right and still 1866 * not found a one so the remainder of the extension 1867 * must be zero and simplifies rounding. */ 1868 /* Scan bytes */ 1869 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) { 1870 Sglext_leftshiftby8(resultp1,resultp2); 1871 result_exponent -= 8; 1872 } 1873 /* Now narrow it down to the nibble */ 1874 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) { 1875 /* The lower nibble contains the 1876 * normalizing one */ 1877 Sglext_leftshiftby4(resultp1,resultp2); 1878 result_exponent -= 4; 1879 } 1880 /* Select case where first bit is set (already 1881 * normalized) otherwise select the proper shift. */ 1882 jumpsize = Sgl_hiddenhigh3mantissa(resultp1); 1883 if (jumpsize <= 7) switch(jumpsize) { 1884 case 1: 1885 Sglext_leftshiftby3(resultp1,resultp2); 1886 result_exponent -= 3; 1887 break; 1888 case 2: 1889 case 3: 1890 Sglext_leftshiftby2(resultp1,resultp2); 1891 result_exponent -= 2; 1892 break; 1893 case 4: 1894 case 5: 1895 case 6: 1896 case 7: 1897 Sglext_leftshiftby1(resultp1,resultp2); 1898 result_exponent -= 1; 1899 break; 1900 } 1901 } /* end if (hidden...)... */ 1902 /* Fall through and round */ 1903 } /* end if (save < 0)... */ 1904 else { 1905 /* Add magnitudes */ 1906 Sglext_addition(tmpresp1,tmpresp2, 1907 rightp1,rightp2, /*to*/resultp1,resultp2); 1908 sign_save = Sgl_signextendedsign(resultp1); 1909 if (Sgl_isone_hiddenoverflow(resultp1)) { 1910 /* Prenormalization required. */ 1911 Sglext_arithrightshiftby1(resultp1,resultp2); 1912 result_exponent++; 1913 } /* end if hiddenoverflow... */ 1914 } /* end else ...add magnitudes... */ 1915 1916 /* Round the result. If the extension and lower two words are 1917 * all zeros, then the result is exact. Otherwise round in the 1918 * correct direction. Underflow is possible. If a postnormalization 1919 * is necessary, then the mantissa is all zeros so no shift is needed. 1920 */ 1921 round: 1922 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { 1923 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny); 1924 } 1925 Sgl_set_sign(resultp1,/*using*/sign_save); 1926 if (Sglext_isnotzero_mantissap2(resultp2)) { 1927 inexact = TRUE; 1928 switch(Rounding_mode()) { 1929 case ROUNDNEAREST: /* The default. */ 1930 if (Sglext_isone_highp2(resultp2)) { 1931 /* at least 1/2 ulp */ 1932 if (Sglext_isnotzero_low31p2(resultp2) || 1933 Sglext_isone_lowp1(resultp1)) { 1934 /* either exactly half way and odd or 1935 * more than 1/2ulp */ 1936 Sgl_increment(resultp1); 1937 } 1938 } 1939 break; 1940 1941 case ROUNDPLUS: 1942 if (Sgl_iszero_sign(resultp1)) { 1943 /* Round up positive results */ 1944 Sgl_increment(resultp1); 1945 } 1946 break; 1947 1948 case ROUNDMINUS: 1949 if (Sgl_isone_sign(resultp1)) { 1950 /* Round down negative results */ 1951 Sgl_increment(resultp1); 1952 } 1953 1954 case ROUNDZERO:; 1955 /* truncate is simple */ 1956 } /* end switch... */ 1957 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++; 1958 } 1959 if (result_exponent >= SGL_INFINITY_EXPONENT) { 1960 /* Overflow */ 1961 if (Is_overflowtrap_enabled()) { 1962 /* 1963 * Adjust bias of result 1964 */ 1965 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl); 1966 Sgl_copytoptr(resultp1,dstptr); 1967 if (inexact) 1968 if (Is_inexacttrap_enabled()) 1969 return (OPC_2E_OVERFLOWEXCEPTION | 1970 OPC_2E_INEXACTEXCEPTION); 1971 else Set_inexactflag(); 1972 return (OPC_2E_OVERFLOWEXCEPTION); 1973 } 1974 inexact = TRUE; 1975 Set_overflowflag(); 1976 Sgl_setoverflow(resultp1); 1977 } else if (result_exponent <= 0) { /* underflow case */ 1978 if (Is_underflowtrap_enabled()) { 1979 /* 1980 * Adjust bias of result 1981 */ 1982 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl); 1983 Sgl_copytoptr(resultp1,dstptr); 1984 if (inexact) 1985 if (Is_inexacttrap_enabled()) 1986 return (OPC_2E_UNDERFLOWEXCEPTION | 1987 OPC_2E_INEXACTEXCEPTION); 1988 else Set_inexactflag(); 1989 return(OPC_2E_UNDERFLOWEXCEPTION); 1990 } 1991 else if (inexact && is_tiny) Set_underflowflag(); 1992 } 1993 else Sgl_set_exponent(resultp1,result_exponent); 1994 Sgl_copytoptr(resultp1,dstptr); 1995 if (inexact) 1996 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); 1997 else Set_inexactflag(); 1998 return(NOEXCEPTION); 1999 } 2000 2001 /* 2002 * Single Floating-point Multiply Negate Fused Add 2003 */ 2004 2005 sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr) 2006 2007 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr; 2008 unsigned int *status; 2009 { 2010 unsigned int opnd1, opnd2, opnd3; 2011 register unsigned int tmpresp1, tmpresp2; 2012 unsigned int rightp1, rightp2; 2013 unsigned int resultp1, resultp2 = 0; 2014 register int mpy_exponent, add_exponent, count; 2015 boolean inexact = FALSE, is_tiny = FALSE; 2016 2017 unsigned int signlessleft1, signlessright1, save; 2018 register int result_exponent, diff_exponent; 2019 int sign_save, jumpsize; 2020 2021 Sgl_copyfromptr(src1ptr,opnd1); 2022 Sgl_copyfromptr(src2ptr,opnd2); 2023 Sgl_copyfromptr(src3ptr,opnd3); 2024 2025 /* 2026 * set sign bit of result of multiply 2027 */ 2028 if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 2029 Sgl_setzero(resultp1); 2030 else 2031 Sgl_setnegativezero(resultp1); 2032 2033 /* 2034 * Generate multiply exponent 2035 */ 2036 mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS; 2037 2038 /* 2039 * check first operand for NaN's or infinity 2040 */ 2041 if (Sgl_isinfinity_exponent(opnd1)) { 2042 if (Sgl_iszero_mantissa(opnd1)) { 2043 if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) { 2044 if (Sgl_iszero_exponentmantissa(opnd2)) { 2045 /* 2046 * invalid since operands are infinity 2047 * and zero 2048 */ 2049 if (Is_invalidtrap_enabled()) 2050 return(OPC_2E_INVALIDEXCEPTION); 2051 Set_invalidflag(); 2052 Sgl_makequietnan(resultp1); 2053 Sgl_copytoptr(resultp1,dstptr); 2054 return(NOEXCEPTION); 2055 } 2056 /* 2057 * Check third operand for infinity with a 2058 * sign opposite of the multiply result 2059 */ 2060 if (Sgl_isinfinity(opnd3) && 2061 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { 2062 /* 2063 * invalid since attempting a magnitude 2064 * subtraction of infinities 2065 */ 2066 if (Is_invalidtrap_enabled()) 2067 return(OPC_2E_INVALIDEXCEPTION); 2068 Set_invalidflag(); 2069 Sgl_makequietnan(resultp1); 2070 Sgl_copytoptr(resultp1,dstptr); 2071 return(NOEXCEPTION); 2072 } 2073 2074 /* 2075 * return infinity 2076 */ 2077 Sgl_setinfinity_exponentmantissa(resultp1); 2078 Sgl_copytoptr(resultp1,dstptr); 2079 return(NOEXCEPTION); 2080 } 2081 } 2082 else { 2083 /* 2084 * is NaN; signaling or quiet? 2085 */ 2086 if (Sgl_isone_signaling(opnd1)) { 2087 /* trap if INVALIDTRAP enabled */ 2088 if (Is_invalidtrap_enabled()) 2089 return(OPC_2E_INVALIDEXCEPTION); 2090 /* make NaN quiet */ 2091 Set_invalidflag(); 2092 Sgl_set_quiet(opnd1); 2093 } 2094 /* 2095 * is second operand a signaling NaN? 2096 */ 2097 else if (Sgl_is_signalingnan(opnd2)) { 2098 /* trap if INVALIDTRAP enabled */ 2099 if (Is_invalidtrap_enabled()) 2100 return(OPC_2E_INVALIDEXCEPTION); 2101 /* make NaN quiet */ 2102 Set_invalidflag(); 2103 Sgl_set_quiet(opnd2); 2104 Sgl_copytoptr(opnd2,dstptr); 2105 return(NOEXCEPTION); 2106 } 2107 /* 2108 * is third operand a signaling NaN? 2109 */ 2110 else if (Sgl_is_signalingnan(opnd3)) { 2111 /* trap if INVALIDTRAP enabled */ 2112 if (Is_invalidtrap_enabled()) 2113 return(OPC_2E_INVALIDEXCEPTION); 2114 /* make NaN quiet */ 2115 Set_invalidflag(); 2116 Sgl_set_quiet(opnd3); 2117 Sgl_copytoptr(opnd3,dstptr); 2118 return(NOEXCEPTION); 2119 } 2120 /* 2121 * return quiet NaN 2122 */ 2123 Sgl_copytoptr(opnd1,dstptr); 2124 return(NOEXCEPTION); 2125 } 2126 } 2127 2128 /* 2129 * check second operand for NaN's or infinity 2130 */ 2131 if (Sgl_isinfinity_exponent(opnd2)) { 2132 if (Sgl_iszero_mantissa(opnd2)) { 2133 if (Sgl_isnotnan(opnd3)) { 2134 if (Sgl_iszero_exponentmantissa(opnd1)) { 2135 /* 2136 * invalid since multiply operands are 2137 * zero & infinity 2138 */ 2139 if (Is_invalidtrap_enabled()) 2140 return(OPC_2E_INVALIDEXCEPTION); 2141 Set_invalidflag(); 2142 Sgl_makequietnan(opnd2); 2143 Sgl_copytoptr(opnd2,dstptr); 2144 return(NOEXCEPTION); 2145 } 2146 2147 /* 2148 * Check third operand for infinity with a 2149 * sign opposite of the multiply result 2150 */ 2151 if (Sgl_isinfinity(opnd3) && 2152 (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) { 2153 /* 2154 * invalid since attempting a magnitude 2155 * subtraction of infinities 2156 */ 2157 if (Is_invalidtrap_enabled()) 2158 return(OPC_2E_INVALIDEXCEPTION); 2159 Set_invalidflag(); 2160 Sgl_makequietnan(resultp1); 2161 Sgl_copytoptr(resultp1,dstptr); 2162 return(NOEXCEPTION); 2163 } 2164 2165 /* 2166 * return infinity 2167 */ 2168 Sgl_setinfinity_exponentmantissa(resultp1); 2169 Sgl_copytoptr(resultp1,dstptr); 2170 return(NOEXCEPTION); 2171 } 2172 } 2173 else { 2174 /* 2175 * is NaN; signaling or quiet? 2176 */ 2177 if (Sgl_isone_signaling(opnd2)) { 2178 /* trap if INVALIDTRAP enabled */ 2179 if (Is_invalidtrap_enabled()) 2180 return(OPC_2E_INVALIDEXCEPTION); 2181 /* make NaN quiet */ 2182 Set_invalidflag(); 2183 Sgl_set_quiet(opnd2); 2184 } 2185 /* 2186 * is third operand a signaling NaN? 2187 */ 2188 else if (Sgl_is_signalingnan(opnd3)) { 2189 /* trap if INVALIDTRAP enabled */ 2190 if (Is_invalidtrap_enabled()) 2191 return(OPC_2E_INVALIDEXCEPTION); 2192 /* make NaN quiet */ 2193 Set_invalidflag(); 2194 Sgl_set_quiet(opnd3); 2195 Sgl_copytoptr(opnd3,dstptr); 2196 return(NOEXCEPTION); 2197 } 2198 /* 2199 * return quiet NaN 2200 */ 2201 Sgl_copytoptr(opnd2,dstptr); 2202 return(NOEXCEPTION); 2203 } 2204 } 2205 2206 /* 2207 * check third operand for NaN's or infinity 2208 */ 2209 if (Sgl_isinfinity_exponent(opnd3)) { 2210 if (Sgl_iszero_mantissa(opnd3)) { 2211 /* return infinity */ 2212 Sgl_copytoptr(opnd3,dstptr); 2213 return(NOEXCEPTION); 2214 } else { 2215 /* 2216 * is NaN; signaling or quiet? 2217 */ 2218 if (Sgl_isone_signaling(opnd3)) { 2219 /* trap if INVALIDTRAP enabled */ 2220 if (Is_invalidtrap_enabled()) 2221 return(OPC_2E_INVALIDEXCEPTION); 2222 /* make NaN quiet */ 2223 Set_invalidflag(); 2224 Sgl_set_quiet(opnd3); 2225 } 2226 /* 2227 * return quiet NaN 2228 */ 2229 Sgl_copytoptr(opnd3,dstptr); 2230 return(NOEXCEPTION); 2231 } 2232 } 2233 2234 /* 2235 * Generate multiply mantissa 2236 */ 2237 if (Sgl_isnotzero_exponent(opnd1)) { 2238 /* set hidden bit */ 2239 Sgl_clear_signexponent_set_hidden(opnd1); 2240 } 2241 else { 2242 /* check for zero */ 2243 if (Sgl_iszero_mantissa(opnd1)) { 2244 /* 2245 * Perform the add opnd3 with zero here. 2246 */ 2247 if (Sgl_iszero_exponentmantissa(opnd3)) { 2248 if (Is_rounding_mode(ROUNDMINUS)) { 2249 Sgl_or_signs(opnd3,resultp1); 2250 } else { 2251 Sgl_and_signs(opnd3,resultp1); 2252 } 2253 } 2254 /* 2255 * Now let's check for trapped underflow case. 2256 */ 2257 else if (Sgl_iszero_exponent(opnd3) && 2258 Is_underflowtrap_enabled()) { 2259 /* need to normalize results mantissa */ 2260 sign_save = Sgl_signextendedsign(opnd3); 2261 result_exponent = 0; 2262 Sgl_leftshiftby1(opnd3); 2263 Sgl_normalize(opnd3,result_exponent); 2264 Sgl_set_sign(opnd3,/*using*/sign_save); 2265 Sgl_setwrapped_exponent(opnd3,result_exponent, 2266 unfl); 2267 Sgl_copytoptr(opnd3,dstptr); 2268 /* inexact = FALSE */ 2269 return(OPC_2E_UNDERFLOWEXCEPTION); 2270 } 2271 Sgl_copytoptr(opnd3,dstptr); 2272 return(NOEXCEPTION); 2273 } 2274 /* is denormalized, adjust exponent */ 2275 Sgl_clear_signexponent(opnd1); 2276 Sgl_leftshiftby1(opnd1); 2277 Sgl_normalize(opnd1,mpy_exponent); 2278 } 2279 /* opnd2 needs to have hidden bit set with msb in hidden bit */ 2280 if (Sgl_isnotzero_exponent(opnd2)) { 2281 Sgl_clear_signexponent_set_hidden(opnd2); 2282 } 2283 else { 2284 /* check for zero */ 2285 if (Sgl_iszero_mantissa(opnd2)) { 2286 /* 2287 * Perform the add opnd3 with zero here. 2288 */ 2289 if (Sgl_iszero_exponentmantissa(opnd3)) { 2290 if (Is_rounding_mode(ROUNDMINUS)) { 2291 Sgl_or_signs(opnd3,resultp1); 2292 } else { 2293 Sgl_and_signs(opnd3,resultp1); 2294 } 2295 } 2296 /* 2297 * Now let's check for trapped underflow case. 2298 */ 2299 else if (Sgl_iszero_exponent(opnd3) && 2300 Is_underflowtrap_enabled()) { 2301 /* need to normalize results mantissa */ 2302 sign_save = Sgl_signextendedsign(opnd3); 2303 result_exponent = 0; 2304 Sgl_leftshiftby1(opnd3); 2305 Sgl_normalize(opnd3,result_exponent); 2306 Sgl_set_sign(opnd3,/*using*/sign_save); 2307 Sgl_setwrapped_exponent(opnd3,result_exponent, 2308 unfl); 2309 Sgl_copytoptr(opnd3,dstptr); 2310 /* inexact = FALSE */ 2311 return(OPC_2E_UNDERFLOWEXCEPTION); 2312 } 2313 Sgl_copytoptr(opnd3,dstptr); 2314 return(NOEXCEPTION); 2315 } 2316 /* is denormalized; want to normalize */ 2317 Sgl_clear_signexponent(opnd2); 2318 Sgl_leftshiftby1(opnd2); 2319 Sgl_normalize(opnd2,mpy_exponent); 2320 } 2321 2322 /* Multiply the first two source mantissas together */ 2323 2324 /* 2325 * The intermediate result will be kept in tmpres, 2326 * which needs enough room for 106 bits of mantissa, 2327 * so lets call it a Double extended. 2328 */ 2329 Sglext_setzero(tmpresp1,tmpresp2); 2330 2331 /* 2332 * Four bits at a time are inspected in each loop, and a 2333 * simple shift and add multiply algorithm is used. 2334 */ 2335 for (count = SGL_P-1; count >= 0; count -= 4) { 2336 Sglext_rightshiftby4(tmpresp1,tmpresp2); 2337 if (Sbit28(opnd1)) { 2338 /* Twoword_add should be an ADD followed by 2 ADDC's */ 2339 Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0); 2340 } 2341 if (Sbit29(opnd1)) { 2342 Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0); 2343 } 2344 if (Sbit30(opnd1)) { 2345 Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0); 2346 } 2347 if (Sbit31(opnd1)) { 2348 Twoword_add(tmpresp1, tmpresp2, opnd2, 0); 2349 } 2350 Sgl_rightshiftby4(opnd1); 2351 } 2352 if (Is_sexthiddenoverflow(tmpresp1)) { 2353 /* result mantissa >= 2 (mantissa overflow) */ 2354 mpy_exponent++; 2355 Sglext_rightshiftby4(tmpresp1,tmpresp2); 2356 } else { 2357 Sglext_rightshiftby3(tmpresp1,tmpresp2); 2358 } 2359 2360 /* 2361 * Restore the sign of the mpy result which was saved in resultp1. 2362 * The exponent will continue to be kept in mpy_exponent. 2363 */ 2364 Sglext_set_sign(tmpresp1,Sgl_sign(resultp1)); 2365 2366 /* 2367 * No rounding is required, since the result of the multiply 2368 * is exact in the extended format. 2369 */ 2370 2371 /* 2372 * Now we are ready to perform the add portion of the operation. 2373 * 2374 * The exponents need to be kept as integers for now, since the 2375 * multiply result might not fit into the exponent field. We 2376 * can't overflow or underflow because of this yet, since the 2377 * add could bring the final result back into range. 2378 */ 2379 add_exponent = Sgl_exponent(opnd3); 2380 2381 /* 2382 * Check for denormalized or zero add operand. 2383 */ 2384 if (add_exponent == 0) { 2385 /* check for zero */ 2386 if (Sgl_iszero_mantissa(opnd3)) { 2387 /* right is zero */ 2388 /* Left can't be zero and must be result. 2389 * 2390 * The final result is now in tmpres and mpy_exponent, 2391 * and needs to be rounded and squeezed back into 2392 * double precision format from double extended. 2393 */ 2394 result_exponent = mpy_exponent; 2395 Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2); 2396 sign_save = Sgl_signextendedsign(resultp1);/*save sign*/ 2397 goto round; 2398 } 2399 2400 /* 2401 * Neither are zeroes. 2402 * Adjust exponent and normalize add operand. 2403 */ 2404 sign_save = Sgl_signextendedsign(opnd3); /* save sign */ 2405 Sgl_clear_signexponent(opnd3); 2406 Sgl_leftshiftby1(opnd3); 2407 Sgl_normalize(opnd3,add_exponent); 2408 Sgl_set_sign(opnd3,sign_save); /* restore sign */ 2409 } else { 2410 Sgl_clear_exponent_set_hidden(opnd3); 2411 } 2412 /* 2413 * Copy opnd3 to the double extended variable called right. 2414 */ 2415 Sgl_copyto_sglext(opnd3,rightp1,rightp2); 2416 2417 /* 2418 * A zero "save" helps discover equal operands (for later), 2419 * and is used in swapping operands (if needed). 2420 */ 2421 Sglext_xortointp1(tmpresp1,rightp1,/*to*/save); 2422 2423 /* 2424 * Compare magnitude of operands. 2425 */ 2426 Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1); 2427 Sglext_copytoint_exponentmantissa(rightp1,signlessright1); 2428 if (mpy_exponent < add_exponent || mpy_exponent == add_exponent && 2429 Sglext_ismagnitudeless(signlessleft1,signlessright1)) { 2430 /* 2431 * Set the left operand to the larger one by XOR swap. 2432 * First finish the first word "save". 2433 */ 2434 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1); 2435 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1); 2436 Sglext_swap_lower(tmpresp2,rightp2); 2437 /* also setup exponents used in rest of routine */ 2438 diff_exponent = add_exponent - mpy_exponent; 2439 result_exponent = add_exponent; 2440 } else { 2441 /* also setup exponents used in rest of routine */ 2442 diff_exponent = mpy_exponent - add_exponent; 2443 result_exponent = mpy_exponent; 2444 } 2445 /* Invariant: left is not smaller than right. */ 2446 2447 /* 2448 * Special case alignment of operands that would force alignment 2449 * beyond the extent of the extension. A further optimization 2450 * could special case this but only reduces the path length for 2451 * this infrequent case. 2452 */ 2453 if (diff_exponent > SGLEXT_THRESHOLD) { 2454 diff_exponent = SGLEXT_THRESHOLD; 2455 } 2456 2457 /* Align right operand by shifting it to the right */ 2458 Sglext_clear_sign(rightp1); 2459 Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent); 2460 2461 /* Treat sum and difference of the operands separately. */ 2462 if ((int)save < 0) { 2463 /* 2464 * Difference of the two operands. Overflow can occur if the 2465 * multiply overflowed. A borrow can occur out of the hidden 2466 * bit and force a post normalization phase. 2467 */ 2468 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2, 2469 resultp1,resultp2); 2470 sign_save = Sgl_signextendedsign(resultp1); 2471 if (Sgl_iszero_hidden(resultp1)) { 2472 /* Handle normalization */ 2473 /* A straightforward algorithm would now shift the 2474 * result and extension left until the hidden bit 2475 * becomes one. Not all of the extension bits need 2476 * participate in the shift. Only the two most 2477 * significant bits (round and guard) are needed. 2478 * If only a single shift is needed then the guard 2479 * bit becomes a significant low order bit and the 2480 * extension must participate in the rounding. 2481 * If more than a single shift is needed, then all 2482 * bits to the right of the guard bit are zeros, 2483 * and the guard bit may or may not be zero. */ 2484 Sglext_leftshiftby1(resultp1,resultp2); 2485 2486 /* Need to check for a zero result. The sign and 2487 * exponent fields have already been zeroed. The more 2488 * efficient test of the full object can be used. 2489 */ 2490 if (Sglext_iszero(resultp1,resultp2)) { 2491 /* Must have been "x-x" or "x+(-x)". */ 2492 if (Is_rounding_mode(ROUNDMINUS)) 2493 Sgl_setone_sign(resultp1); 2494 Sgl_copytoptr(resultp1,dstptr); 2495 return(NOEXCEPTION); 2496 } 2497 result_exponent--; 2498 2499 /* Look to see if normalization is finished. */ 2500 if (Sgl_isone_hidden(resultp1)) { 2501 /* No further normalization is needed */ 2502 goto round; 2503 } 2504 2505 /* Discover first one bit to determine shift amount. 2506 * Use a modified binary search. We have already 2507 * shifted the result one position right and still 2508 * not found a one so the remainder of the extension 2509 * must be zero and simplifies rounding. */ 2510 /* Scan bytes */ 2511 while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) { 2512 Sglext_leftshiftby8(resultp1,resultp2); 2513 result_exponent -= 8; 2514 } 2515 /* Now narrow it down to the nibble */ 2516 if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) { 2517 /* The lower nibble contains the 2518 * normalizing one */ 2519 Sglext_leftshiftby4(resultp1,resultp2); 2520 result_exponent -= 4; 2521 } 2522 /* Select case where first bit is set (already 2523 * normalized) otherwise select the proper shift. */ 2524 jumpsize = Sgl_hiddenhigh3mantissa(resultp1); 2525 if (jumpsize <= 7) switch(jumpsize) { 2526 case 1: 2527 Sglext_leftshiftby3(resultp1,resultp2); 2528 result_exponent -= 3; 2529 break; 2530 case 2: 2531 case 3: 2532 Sglext_leftshiftby2(resultp1,resultp2); 2533 result_exponent -= 2; 2534 break; 2535 case 4: 2536 case 5: 2537 case 6: 2538 case 7: 2539 Sglext_leftshiftby1(resultp1,resultp2); 2540 result_exponent -= 1; 2541 break; 2542 } 2543 } /* end if (hidden...)... */ 2544 /* Fall through and round */ 2545 } /* end if (save < 0)... */ 2546 else { 2547 /* Add magnitudes */ 2548 Sglext_addition(tmpresp1,tmpresp2, 2549 rightp1,rightp2, /*to*/resultp1,resultp2); 2550 sign_save = Sgl_signextendedsign(resultp1); 2551 if (Sgl_isone_hiddenoverflow(resultp1)) { 2552 /* Prenormalization required. */ 2553 Sglext_arithrightshiftby1(resultp1,resultp2); 2554 result_exponent++; 2555 } /* end if hiddenoverflow... */ 2556 } /* end else ...add magnitudes... */ 2557 2558 /* Round the result. If the extension and lower two words are 2559 * all zeros, then the result is exact. Otherwise round in the 2560 * correct direction. Underflow is possible. If a postnormalization 2561 * is necessary, then the mantissa is all zeros so no shift is needed. 2562 */ 2563 round: 2564 if (result_exponent <= 0 && !Is_underflowtrap_enabled()) { 2565 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny); 2566 } 2567 Sgl_set_sign(resultp1,/*using*/sign_save); 2568 if (Sglext_isnotzero_mantissap2(resultp2)) { 2569 inexact = TRUE; 2570 switch(Rounding_mode()) { 2571 case ROUNDNEAREST: /* The default. */ 2572 if (Sglext_isone_highp2(resultp2)) { 2573 /* at least 1/2 ulp */ 2574 if (Sglext_isnotzero_low31p2(resultp2) || 2575 Sglext_isone_lowp1(resultp1)) { 2576 /* either exactly half way and odd or 2577 * more than 1/2ulp */ 2578 Sgl_increment(resultp1); 2579 } 2580 } 2581 break; 2582 2583 case ROUNDPLUS: 2584 if (Sgl_iszero_sign(resultp1)) { 2585 /* Round up positive results */ 2586 Sgl_increment(resultp1); 2587 } 2588 break; 2589 2590 case ROUNDMINUS: 2591 if (Sgl_isone_sign(resultp1)) { 2592 /* Round down negative results */ 2593 Sgl_increment(resultp1); 2594 } 2595 2596 case ROUNDZERO:; 2597 /* truncate is simple */ 2598 } /* end switch... */ 2599 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++; 2600 } 2601 if (result_exponent >= SGL_INFINITY_EXPONENT) { 2602 /* Overflow */ 2603 if (Is_overflowtrap_enabled()) { 2604 /* 2605 * Adjust bias of result 2606 */ 2607 Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl); 2608 Sgl_copytoptr(resultp1,dstptr); 2609 if (inexact) 2610 if (Is_inexacttrap_enabled()) 2611 return (OPC_2E_OVERFLOWEXCEPTION | 2612 OPC_2E_INEXACTEXCEPTION); 2613 else Set_inexactflag(); 2614 return (OPC_2E_OVERFLOWEXCEPTION); 2615 } 2616 inexact = TRUE; 2617 Set_overflowflag(); 2618 Sgl_setoverflow(resultp1); 2619 } else if (result_exponent <= 0) { /* underflow case */ 2620 if (Is_underflowtrap_enabled()) { 2621 /* 2622 * Adjust bias of result 2623 */ 2624 Sgl_setwrapped_exponent(resultp1,result_exponent,unfl); 2625 Sgl_copytoptr(resultp1,dstptr); 2626 if (inexact) 2627 if (Is_inexacttrap_enabled()) 2628 return (OPC_2E_UNDERFLOWEXCEPTION | 2629 OPC_2E_INEXACTEXCEPTION); 2630 else Set_inexactflag(); 2631 return(OPC_2E_UNDERFLOWEXCEPTION); 2632 } 2633 else if (inexact && is_tiny) Set_underflowflag(); 2634 } 2635 else Sgl_set_exponent(resultp1,result_exponent); 2636 Sgl_copytoptr(resultp1,dstptr); 2637 if (inexact) 2638 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION); 2639 else Set_inexactflag(); 2640 return(NOEXCEPTION); 2641 } 2642 2643