1 .file "reg_round.S" 2/*---------------------------------------------------------------------------+ 3 | reg_round.S | 4 | | 5 | Rounding/truncation/etc for FPU basic arithmetic functions. | 6 | | 7 | Copyright (C) 1993,1995,1997 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail billm@suburbia.net | 10 | | 11 | This code has four possible entry points. | 12 | The following must be entered by a jmp instruction: | 13 | fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit. | 14 | | 15 | The FPU_round entry point is intended to be used by C code. | 16 | From C, call as: | 17 | int FPU_round(FPU_REG *arg, unsigned int extent, unsigned int control_w) | 18 | | 19 | Return value is the tag of the answer, or-ed with FPU_Exception if | 20 | one was raised, or -1 on internal error. | 21 | | 22 | For correct "up" and "down" rounding, the argument must have the correct | 23 | sign. | 24 | | 25 +---------------------------------------------------------------------------*/ 26 27/*---------------------------------------------------------------------------+ 28 | Four entry points. | 29 | | 30 | Needed by both the fpu_reg_round and fpu_reg_round_sqrt entry points: | 31 | %eax:%ebx 64 bit significand | 32 | %edx 32 bit extension of the significand | 33 | %edi pointer to an FPU_REG for the result to be stored | 34 | stack calling function must have set up a C stack frame and | 35 | pushed %esi, %edi, and %ebx | 36 | | 37 | Needed just for the fpu_reg_round_sqrt entry point: | 38 | %cx A control word in the same format as the FPU control word. | 39 | Otherwise, PARAM4 must give such a value. | 40 | | 41 | | 42 | The significand and its extension are assumed to be exact in the | 43 | following sense: | 44 | If the significand by itself is the exact result then the significand | 45 | extension (%edx) must contain 0, otherwise the significand extension | 46 | must be non-zero. | 47 | If the significand extension is non-zero then the significand is | 48 | smaller than the magnitude of the correct exact result by an amount | 49 | greater than zero and less than one ls bit of the significand. | 50 | The significand extension is only required to have three possible | 51 | non-zero values: | 52 | less than 0x80000000 <=> the significand is less than 1/2 an ls | 53 | bit smaller than the magnitude of the | 54 | true exact result. | 55 | exactly 0x80000000 <=> the significand is exactly 1/2 an ls bit | 56 | smaller than the magnitude of the true | 57 | exact result. | 58 | greater than 0x80000000 <=> the significand is more than 1/2 an ls | 59 | bit smaller than the magnitude of the | 60 | true exact result. | 61 | | 62 +---------------------------------------------------------------------------*/ 63 64/*---------------------------------------------------------------------------+ 65 | The code in this module has become quite complex, but it should handle | 66 | all of the FPU flags which are set at this stage of the basic arithmetic | 67 | computations. | 68 | There are a few rare cases where the results are not set identically to | 69 | a real FPU. These require a bit more thought because at this stage the | 70 | results of the code here appear to be more consistent... | 71 | This may be changed in a future version. | 72 +---------------------------------------------------------------------------*/ 73 74 75#include "fpu_emu.h" 76#include "exception.h" 77#include "control_w.h" 78 79/* Flags for FPU_bits_lost */ 80#define LOST_DOWN $1 81#define LOST_UP $2 82 83/* Flags for FPU_denormal */ 84#define DENORMAL $1 85#define UNMASKED_UNDERFLOW $2 86 87 88#ifndef NON_REENTRANT_FPU 89/* Make the code re-entrant by putting 90 local storage on the stack: */ 91#define FPU_bits_lost (%esp) 92#define FPU_denormal 1(%esp) 93 94#else 95/* Not re-entrant, so we can gain speed by putting 96 local storage in a static area: */ 97.data 98 .align 4,0 99FPU_bits_lost: 100 .byte 0 101FPU_denormal: 102 .byte 0 103#endif /* NON_REENTRANT_FPU */ 104 105 106.text 107.globl fpu_reg_round 108.globl fpu_Arith_exit 109 110/* Entry point when called from C */ 111ENTRY(FPU_round) 112 pushl %ebp 113 movl %esp,%ebp 114 pushl %esi 115 pushl %edi 116 pushl %ebx 117 118 movl PARAM1,%edi 119 movl SIGH(%edi),%eax 120 movl SIGL(%edi),%ebx 121 movl PARAM2,%edx 122 123fpu_reg_round: /* Normal entry point */ 124 movl PARAM4,%ecx 125 126#ifndef NON_REENTRANT_FPU 127 pushl %ebx /* adjust the stack pointer */ 128#endif /* NON_REENTRANT_FPU */ 129 130#ifdef PARANOID 131/* Cannot use this here yet */ 132/* orl %eax,%eax */ 133/* jns L_entry_bugged */ 134#endif /* PARANOID */ 135 136 cmpw EXP_UNDER,EXP(%edi) 137 jle L_Make_denorm /* The number is a de-normal */ 138 139 movb $0,FPU_denormal /* 0 -> not a de-normal */ 140 141Denorm_done: 142 movb $0,FPU_bits_lost /* No bits yet lost in rounding */ 143 144 movl %ecx,%esi 145 andl CW_PC,%ecx 146 cmpl PR_64_BITS,%ecx 147 je LRound_To_64 148 149 cmpl PR_53_BITS,%ecx 150 je LRound_To_53 151 152 cmpl PR_24_BITS,%ecx 153 je LRound_To_24 154 155#ifdef PECULIAR_486 156/* With the precision control bits set to 01 "(reserved)", a real 80486 157 behaves as if the precision control bits were set to 11 "64 bits" */ 158 cmpl PR_RESERVED_BITS,%ecx 159 je LRound_To_64 160#ifdef PARANOID 161 jmp L_bugged_denorm_486 162#endif /* PARANOID */ 163#else 164#ifdef PARANOID 165 jmp L_bugged_denorm /* There is no bug, just a bad control word */ 166#endif /* PARANOID */ 167#endif /* PECULIAR_486 */ 168 169 170/* Round etc to 24 bit precision */ 171LRound_To_24: 172 movl %esi,%ecx 173 andl CW_RC,%ecx 174 cmpl RC_RND,%ecx 175 je LRound_nearest_24 176 177 cmpl RC_CHOP,%ecx 178 je LCheck_truncate_24 179 180 cmpl RC_UP,%ecx /* Towards +infinity */ 181 je LUp_24 182 183 cmpl RC_DOWN,%ecx /* Towards -infinity */ 184 je LDown_24 185 186#ifdef PARANOID 187 jmp L_bugged_round24 188#endif /* PARANOID */ 189 190LUp_24: 191 cmpb SIGN_POS,PARAM5 192 jne LCheck_truncate_24 /* If negative then up==truncate */ 193 194 jmp LCheck_24_round_up 195 196LDown_24: 197 cmpb SIGN_POS,PARAM5 198 je LCheck_truncate_24 /* If positive then down==truncate */ 199 200LCheck_24_round_up: 201 movl %eax,%ecx 202 andl $0x000000ff,%ecx 203 orl %ebx,%ecx 204 orl %edx,%ecx 205 jnz LDo_24_round_up 206 jmp L_Re_normalise 207 208LRound_nearest_24: 209 /* Do rounding of the 24th bit if needed (nearest or even) */ 210 movl %eax,%ecx 211 andl $0x000000ff,%ecx 212 cmpl $0x00000080,%ecx 213 jc LCheck_truncate_24 /* less than half, no increment needed */ 214 215 jne LGreater_Half_24 /* greater than half, increment needed */ 216 217 /* Possibly half, we need to check the ls bits */ 218 orl %ebx,%ebx 219 jnz LGreater_Half_24 /* greater than half, increment needed */ 220 221 orl %edx,%edx 222 jnz LGreater_Half_24 /* greater than half, increment needed */ 223 224 /* Exactly half, increment only if 24th bit is 1 (round to even) */ 225 testl $0x00000100,%eax 226 jz LDo_truncate_24 227 228LGreater_Half_24: /* Rounding: increment at the 24th bit */ 229LDo_24_round_up: 230 andl $0xffffff00,%eax /* Truncate to 24 bits */ 231 xorl %ebx,%ebx 232 movb LOST_UP,FPU_bits_lost 233 addl $0x00000100,%eax 234 jmp LCheck_Round_Overflow 235 236LCheck_truncate_24: 237 movl %eax,%ecx 238 andl $0x000000ff,%ecx 239 orl %ebx,%ecx 240 orl %edx,%ecx 241 jz L_Re_normalise /* No truncation needed */ 242 243LDo_truncate_24: 244 andl $0xffffff00,%eax /* Truncate to 24 bits */ 245 xorl %ebx,%ebx 246 movb LOST_DOWN,FPU_bits_lost 247 jmp L_Re_normalise 248 249 250/* Round etc to 53 bit precision */ 251LRound_To_53: 252 movl %esi,%ecx 253 andl CW_RC,%ecx 254 cmpl RC_RND,%ecx 255 je LRound_nearest_53 256 257 cmpl RC_CHOP,%ecx 258 je LCheck_truncate_53 259 260 cmpl RC_UP,%ecx /* Towards +infinity */ 261 je LUp_53 262 263 cmpl RC_DOWN,%ecx /* Towards -infinity */ 264 je LDown_53 265 266#ifdef PARANOID 267 jmp L_bugged_round53 268#endif /* PARANOID */ 269 270LUp_53: 271 cmpb SIGN_POS,PARAM5 272 jne LCheck_truncate_53 /* If negative then up==truncate */ 273 274 jmp LCheck_53_round_up 275 276LDown_53: 277 cmpb SIGN_POS,PARAM5 278 je LCheck_truncate_53 /* If positive then down==truncate */ 279 280LCheck_53_round_up: 281 movl %ebx,%ecx 282 andl $0x000007ff,%ecx 283 orl %edx,%ecx 284 jnz LDo_53_round_up 285 jmp L_Re_normalise 286 287LRound_nearest_53: 288 /* Do rounding of the 53rd bit if needed (nearest or even) */ 289 movl %ebx,%ecx 290 andl $0x000007ff,%ecx 291 cmpl $0x00000400,%ecx 292 jc LCheck_truncate_53 /* less than half, no increment needed */ 293 294 jnz LGreater_Half_53 /* greater than half, increment needed */ 295 296 /* Possibly half, we need to check the ls bits */ 297 orl %edx,%edx 298 jnz LGreater_Half_53 /* greater than half, increment needed */ 299 300 /* Exactly half, increment only if 53rd bit is 1 (round to even) */ 301 testl $0x00000800,%ebx 302 jz LTruncate_53 303 304LGreater_Half_53: /* Rounding: increment at the 53rd bit */ 305LDo_53_round_up: 306 movb LOST_UP,FPU_bits_lost 307 andl $0xfffff800,%ebx /* Truncate to 53 bits */ 308 addl $0x00000800,%ebx 309 adcl $0,%eax 310 jmp LCheck_Round_Overflow 311 312LCheck_truncate_53: 313 movl %ebx,%ecx 314 andl $0x000007ff,%ecx 315 orl %edx,%ecx 316 jz L_Re_normalise 317 318LTruncate_53: 319 movb LOST_DOWN,FPU_bits_lost 320 andl $0xfffff800,%ebx /* Truncate to 53 bits */ 321 jmp L_Re_normalise 322 323 324/* Round etc to 64 bit precision */ 325LRound_To_64: 326 movl %esi,%ecx 327 andl CW_RC,%ecx 328 cmpl RC_RND,%ecx 329 je LRound_nearest_64 330 331 cmpl RC_CHOP,%ecx 332 je LCheck_truncate_64 333 334 cmpl RC_UP,%ecx /* Towards +infinity */ 335 je LUp_64 336 337 cmpl RC_DOWN,%ecx /* Towards -infinity */ 338 je LDown_64 339 340#ifdef PARANOID 341 jmp L_bugged_round64 342#endif /* PARANOID */ 343 344LUp_64: 345 cmpb SIGN_POS,PARAM5 346 jne LCheck_truncate_64 /* If negative then up==truncate */ 347 348 orl %edx,%edx 349 jnz LDo_64_round_up 350 jmp L_Re_normalise 351 352LDown_64: 353 cmpb SIGN_POS,PARAM5 354 je LCheck_truncate_64 /* If positive then down==truncate */ 355 356 orl %edx,%edx 357 jnz LDo_64_round_up 358 jmp L_Re_normalise 359 360LRound_nearest_64: 361 cmpl $0x80000000,%edx 362 jc LCheck_truncate_64 363 364 jne LDo_64_round_up 365 366 /* Now test for round-to-even */ 367 testb $1,%bl 368 jz LCheck_truncate_64 369 370LDo_64_round_up: 371 movb LOST_UP,FPU_bits_lost 372 addl $1,%ebx 373 adcl $0,%eax 374 375LCheck_Round_Overflow: 376 jnc L_Re_normalise 377 378 /* Overflow, adjust the result (significand to 1.0) */ 379 rcrl $1,%eax 380 rcrl $1,%ebx 381 incw EXP(%edi) 382 jmp L_Re_normalise 383 384LCheck_truncate_64: 385 orl %edx,%edx 386 jz L_Re_normalise 387 388LTruncate_64: 389 movb LOST_DOWN,FPU_bits_lost 390 391L_Re_normalise: 392 testb $0xff,FPU_denormal 393 jnz Normalise_result 394 395L_Normalised: 396 movl TAG_Valid,%edx 397 398L_deNormalised: 399 cmpb LOST_UP,FPU_bits_lost 400 je L_precision_lost_up 401 402 cmpb LOST_DOWN,FPU_bits_lost 403 je L_precision_lost_down 404 405L_no_precision_loss: 406 /* store the result */ 407 408L_Store_significand: 409 movl %eax,SIGH(%edi) 410 movl %ebx,SIGL(%edi) 411 412 cmpw EXP_OVER,EXP(%edi) 413 jge L_overflow 414 415 movl %edx,%eax 416 417 /* Convert the exponent to 80x87 form. */ 418 addw EXTENDED_Ebias,EXP(%edi) 419 andw $0x7fff,EXP(%edi) 420 421fpu_reg_round_signed_special_exit: 422 423 cmpb SIGN_POS,PARAM5 424 je fpu_reg_round_special_exit 425 426 orw $0x8000,EXP(%edi) /* Negative sign for the result. */ 427 428fpu_reg_round_special_exit: 429 430#ifndef NON_REENTRANT_FPU 431 popl %ebx /* adjust the stack pointer */ 432#endif /* NON_REENTRANT_FPU */ 433 434fpu_Arith_exit: 435 popl %ebx 436 popl %edi 437 popl %esi 438 leave 439 ret 440 441 442/* 443 * Set the FPU status flags to represent precision loss due to 444 * round-up. 445 */ 446L_precision_lost_up: 447 push %edx 448 push %eax 449 call set_precision_flag_up 450 popl %eax 451 popl %edx 452 jmp L_no_precision_loss 453 454/* 455 * Set the FPU status flags to represent precision loss due to 456 * truncation. 457 */ 458L_precision_lost_down: 459 push %edx 460 push %eax 461 call set_precision_flag_down 462 popl %eax 463 popl %edx 464 jmp L_no_precision_loss 465 466 467/* 468 * The number is a denormal (which might get rounded up to a normal) 469 * Shift the number right the required number of bits, which will 470 * have to be undone later... 471 */ 472L_Make_denorm: 473 /* The action to be taken depends upon whether the underflow 474 exception is masked */ 475 testb CW_Underflow,%cl /* Underflow mask. */ 476 jz Unmasked_underflow /* Do not make a denormal. */ 477 478 movb DENORMAL,FPU_denormal 479 480 pushl %ecx /* Save */ 481 movw EXP_UNDER+1,%cx 482 subw EXP(%edi),%cx 483 484 cmpw $64,%cx /* shrd only works for 0..31 bits */ 485 jnc Denorm_shift_more_than_63 486 487 cmpw $32,%cx /* shrd only works for 0..31 bits */ 488 jnc Denorm_shift_more_than_32 489 490/* 491 * We got here without jumps by assuming that the most common requirement 492 * is for a small de-normalising shift. 493 * Shift by [1..31] bits 494 */ 495 addw %cx,EXP(%edi) 496 orl %edx,%edx /* extension */ 497 setne %ch /* Save whether %edx is non-zero */ 498 xorl %edx,%edx 499 shrd %cl,%ebx,%edx 500 shrd %cl,%eax,%ebx 501 shr %cl,%eax 502 orb %ch,%dl 503 popl %ecx 504 jmp Denorm_done 505 506/* Shift by [32..63] bits */ 507Denorm_shift_more_than_32: 508 addw %cx,EXP(%edi) 509 subb $32,%cl 510 orl %edx,%edx 511 setne %ch 512 orb %ch,%bl 513 xorl %edx,%edx 514 shrd %cl,%ebx,%edx 515 shrd %cl,%eax,%ebx 516 shr %cl,%eax 517 orl %edx,%edx /* test these 32 bits */ 518 setne %cl 519 orb %ch,%bl 520 orb %cl,%bl 521 movl %ebx,%edx 522 movl %eax,%ebx 523 xorl %eax,%eax 524 popl %ecx 525 jmp Denorm_done 526 527/* Shift by [64..) bits */ 528Denorm_shift_more_than_63: 529 cmpw $64,%cx 530 jne Denorm_shift_more_than_64 531 532/* Exactly 64 bit shift */ 533 addw %cx,EXP(%edi) 534 xorl %ecx,%ecx 535 orl %edx,%edx 536 setne %cl 537 orl %ebx,%ebx 538 setne %ch 539 orb %ch,%cl 540 orb %cl,%al 541 movl %eax,%edx 542 xorl %eax,%eax 543 xorl %ebx,%ebx 544 popl %ecx 545 jmp Denorm_done 546 547Denorm_shift_more_than_64: 548 movw EXP_UNDER+1,EXP(%edi) 549/* This is easy, %eax must be non-zero, so.. */ 550 movl $1,%edx 551 xorl %eax,%eax 552 xorl %ebx,%ebx 553 popl %ecx 554 jmp Denorm_done 555 556 557Unmasked_underflow: 558 movb UNMASKED_UNDERFLOW,FPU_denormal 559 jmp Denorm_done 560 561 562/* Undo the de-normalisation. */ 563Normalise_result: 564 cmpb UNMASKED_UNDERFLOW,FPU_denormal 565 je Signal_underflow 566 567/* The number must be a denormal if we got here. */ 568#ifdef PARANOID 569 /* But check it... just in case. */ 570 cmpw EXP_UNDER+1,EXP(%edi) 571 jne L_norm_bugged 572#endif /* PARANOID */ 573 574#ifdef PECULIAR_486 575 /* 576 * This implements a special feature of 80486 behaviour. 577 * Underflow will be signalled even if the number is 578 * not a denormal after rounding. 579 * This difference occurs only for masked underflow, and not 580 * in the unmasked case. 581 * Actual 80486 behaviour differs from this in some circumstances. 582 */ 583 orl %eax,%eax /* ms bits */ 584 js LPseudoDenormal /* Will be masked underflow */ 585#else 586 orl %eax,%eax /* ms bits */ 587 js L_Normalised /* No longer a denormal */ 588#endif /* PECULIAR_486 */ 589 590 jnz LDenormal_adj_exponent 591 592 orl %ebx,%ebx 593 jz L_underflow_to_zero /* The contents are zero */ 594 595LDenormal_adj_exponent: 596 decw EXP(%edi) 597 598LPseudoDenormal: 599 testb $0xff,FPU_bits_lost /* bits lost == underflow */ 600 movl TAG_Special,%edx 601 jz L_deNormalised 602 603 /* There must be a masked underflow */ 604 push %eax 605 pushl EX_Underflow 606 call EXCEPTION 607 popl %eax 608 popl %eax 609 movl TAG_Special,%edx 610 jmp L_deNormalised 611 612 613/* 614 * The operations resulted in a number too small to represent. 615 * Masked response. 616 */ 617L_underflow_to_zero: 618 push %eax 619 call set_precision_flag_down 620 popl %eax 621 622 push %eax 623 pushl EX_Underflow 624 call EXCEPTION 625 popl %eax 626 popl %eax 627 628/* Reduce the exponent to EXP_UNDER */ 629 movw EXP_UNDER,EXP(%edi) 630 movl TAG_Zero,%edx 631 jmp L_Store_significand 632 633 634/* The operations resulted in a number too large to represent. */ 635L_overflow: 636 addw EXTENDED_Ebias,EXP(%edi) /* Set for unmasked response. */ 637 push %edi 638 call arith_overflow 639 pop %edi 640 jmp fpu_reg_round_signed_special_exit 641 642 643Signal_underflow: 644 /* The number may have been changed to a non-denormal */ 645 /* by the rounding operations. */ 646 cmpw EXP_UNDER,EXP(%edi) 647 jle Do_unmasked_underflow 648 649 jmp L_Normalised 650 651Do_unmasked_underflow: 652 /* Increase the exponent by the magic number */ 653 addw $(3*(1<<13)),EXP(%edi) 654 push %eax 655 pushl EX_Underflow 656 call EXCEPTION 657 popl %eax 658 popl %eax 659 jmp L_Normalised 660 661 662#ifdef PARANOID 663#ifdef PECULIAR_486 664L_bugged_denorm_486: 665 pushl EX_INTERNAL|0x236 666 call EXCEPTION 667 popl %ebx 668 jmp L_exception_exit 669#else 670L_bugged_denorm: 671 pushl EX_INTERNAL|0x230 672 call EXCEPTION 673 popl %ebx 674 jmp L_exception_exit 675#endif /* PECULIAR_486 */ 676 677L_bugged_round24: 678 pushl EX_INTERNAL|0x231 679 call EXCEPTION 680 popl %ebx 681 jmp L_exception_exit 682 683L_bugged_round53: 684 pushl EX_INTERNAL|0x232 685 call EXCEPTION 686 popl %ebx 687 jmp L_exception_exit 688 689L_bugged_round64: 690 pushl EX_INTERNAL|0x233 691 call EXCEPTION 692 popl %ebx 693 jmp L_exception_exit 694 695L_norm_bugged: 696 pushl EX_INTERNAL|0x234 697 call EXCEPTION 698 popl %ebx 699 jmp L_exception_exit 700 701L_entry_bugged: 702 pushl EX_INTERNAL|0x235 703 call EXCEPTION 704 popl %ebx 705L_exception_exit: 706 mov $-1,%eax 707 jmp fpu_reg_round_special_exit 708#endif /* PARANOID */ 709 710ENDPROC(FPU_round) 711