1| 2| round.sa 3.4 7/29/91 3| 4| handle rounding and normalization tasks 5| 6| 7| 8| Copyright (C) Motorola, Inc. 1990 9| All Rights Reserved 10| 11| For details on the license for this file, please see the 12| file, README, in this same directory. 13 14|ROUND idnt 2,1 | Motorola 040 Floating Point Software Package 15 16 |section 8 17 18#include "fpsp.h" 19 20| 21| round --- round result according to precision/mode 22| 23| a0 points to the input operand in the internal extended format 24| d1(high word) contains rounding precision: 25| ext = $0000xxxx 26| sgl = $0001xxxx 27| dbl = $0002xxxx 28| d1(low word) contains rounding mode: 29| RN = $xxxx0000 30| RZ = $xxxx0001 31| RM = $xxxx0010 32| RP = $xxxx0011 33| d0{31:29} contains the g,r,s bits (extended) 34| 35| On return the value pointed to by a0 is correctly rounded, 36| a0 is preserved and the g-r-s bits in d0 are cleared. 37| The result is not typed - the tag field is invalid. The 38| result is still in the internal extended format. 39| 40| The INEX bit of USER_FPSR will be set if the rounded result was 41| inexact (i.e. if any of the g-r-s bits were set). 42| 43 44 .global round 45round: 46| If g=r=s=0 then result is exact and round is done, else set 47| the inex flag in status reg and continue. 48| 49 bsrs ext_grs |this subroutine looks at the 50| :rounding precision and sets 51| ;the appropriate g-r-s bits. 52 tstl %d0 |if grs are zero, go force 53 bne rnd_cont |lower bits to zero for size 54 55 swap %d1 |set up d1.w for round prec. 56 bra truncate 57 58rnd_cont: 59| 60| Use rounding mode as an index into a jump table for these modes. 61| 62 orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex 63 lea mode_tab,%a1 64 movel (%a1,%d1.w*4),%a1 65 jmp (%a1) 66| 67| Jump table indexed by rounding mode in d1.w. All following assumes 68| grs != 0. 69| 70mode_tab: 71 .long rnd_near 72 .long rnd_zero 73 .long rnd_mnus 74 .long rnd_plus 75| 76| ROUND PLUS INFINITY 77| 78| If sign of fp number = 0 (positive), then add 1 to l. 79| 80rnd_plus: 81 swap %d1 |set up d1 for round prec. 82 tstb LOCAL_SGN(%a0) |check for sign 83 bmi truncate |if positive then truncate 84 movel #0xffffffff,%d0 |force g,r,s to be all f's 85 lea add_to_l,%a1 86 movel (%a1,%d1.w*4),%a1 87 jmp (%a1) 88| 89| ROUND MINUS INFINITY 90| 91| If sign of fp number = 1 (negative), then add 1 to l. 92| 93rnd_mnus: 94 swap %d1 |set up d1 for round prec. 95 tstb LOCAL_SGN(%a0) |check for sign 96 bpl truncate |if negative then truncate 97 movel #0xffffffff,%d0 |force g,r,s to be all f's 98 lea add_to_l,%a1 99 movel (%a1,%d1.w*4),%a1 100 jmp (%a1) 101| 102| ROUND ZERO 103| 104| Always truncate. 105rnd_zero: 106 swap %d1 |set up d1 for round prec. 107 bra truncate 108| 109| 110| ROUND NEAREST 111| 112| If (g=1), then add 1 to l and if (r=s=0), then clear l 113| Note that this will round to even in case of a tie. 114| 115rnd_near: 116 swap %d1 |set up d1 for round prec. 117 asll #1,%d0 |shift g-bit to c-bit 118 bcc truncate |if (g=1) then 119 lea add_to_l,%a1 120 movel (%a1,%d1.w*4),%a1 121 jmp (%a1) 122 123| 124| ext_grs --- extract guard, round and sticky bits 125| 126| Input: d1 = PREC:ROUND 127| Output: d0{31:29}= guard, round, sticky 128| 129| The ext_grs extract the guard/round/sticky bits according to the 130| selected rounding precision. It is called by the round subroutine 131| only. All registers except d0 are kept intact. d0 becomes an 132| updated guard,round,sticky in d0{31:29} 133| 134| Notes: the ext_grs uses the round PREC, and therefore has to swap d1 135| prior to usage, and needs to restore d1 to original. 136| 137ext_grs: 138 swap %d1 |have d1.w point to round precision 139 cmpiw #0,%d1 140 bnes sgl_or_dbl 141 bras end_ext_grs 142 143sgl_or_dbl: 144 moveml %d2/%d3,-(%a7) |make some temp registers 145 cmpiw #1,%d1 146 bnes grs_dbl 147grs_sgl: 148 bfextu LOCAL_HI(%a0){#24:#2},%d3 |sgl prec. g-r are 2 bits right 149 movel #30,%d2 |of the sgl prec. limits 150 lsll %d2,%d3 |shift g-r bits to MSB of d3 151 movel LOCAL_HI(%a0),%d2 |get word 2 for s-bit test 152 andil #0x0000003f,%d2 |s bit is the or of all other 153 bnes st_stky |bits to the right of g-r 154 tstl LOCAL_LO(%a0) |test lower mantissa 155 bnes st_stky |if any are set, set sticky 156 tstl %d0 |test original g,r,s 157 bnes st_stky |if any are set, set sticky 158 bras end_sd |if words 3 and 4 are clr, exit 159grs_dbl: 160 bfextu LOCAL_LO(%a0){#21:#2},%d3 |dbl-prec. g-r are 2 bits right 161 movel #30,%d2 |of the dbl prec. limits 162 lsll %d2,%d3 |shift g-r bits to the MSB of d3 163 movel LOCAL_LO(%a0),%d2 |get lower mantissa for s-bit test 164 andil #0x000001ff,%d2 |s bit is the or-ing of all 165 bnes st_stky |other bits to the right of g-r 166 tstl %d0 |test word original g,r,s 167 bnes st_stky |if any are set, set sticky 168 bras end_sd |if clear, exit 169st_stky: 170 bset #rnd_stky_bit,%d3 171end_sd: 172 movel %d3,%d0 |return grs to d0 173 moveml (%a7)+,%d2/%d3 |restore scratch registers 174end_ext_grs: 175 swap %d1 |restore d1 to original 176 rts 177 178|******************* Local Equates 179 .set ad_1_sgl,0x00000100 | constant to add 1 to l-bit in sgl prec 180 .set ad_1_dbl,0x00000800 | constant to add 1 to l-bit in dbl prec 181 182 183|Jump table for adding 1 to the l-bit indexed by rnd prec 184 185add_to_l: 186 .long add_ext 187 .long add_sgl 188 .long add_dbl 189 .long add_dbl 190| 191| ADD SINGLE 192| 193add_sgl: 194 addl #ad_1_sgl,LOCAL_HI(%a0) 195 bccs scc_clr |no mantissa overflow 196 roxrw LOCAL_HI(%a0) |shift v-bit back in 197 roxrw LOCAL_HI+2(%a0) |shift v-bit back in 198 addw #0x1,LOCAL_EX(%a0) |and incr exponent 199scc_clr: 200 tstl %d0 |test for rs = 0 201 bnes sgl_done 202 andiw #0xfe00,LOCAL_HI+2(%a0) |clear the l-bit 203sgl_done: 204 andil #0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit 205 clrl LOCAL_LO(%a0) |clear d2 206 rts 207 208| 209| ADD EXTENDED 210| 211add_ext: 212 addql #1,LOCAL_LO(%a0) |add 1 to l-bit 213 bccs xcc_clr |test for carry out 214 addql #1,LOCAL_HI(%a0) |propagate carry 215 bccs xcc_clr 216 roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit 217 roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit 218 roxrw LOCAL_LO(%a0) 219 roxrw LOCAL_LO+2(%a0) 220 addw #0x1,LOCAL_EX(%a0) |and inc exp 221xcc_clr: 222 tstl %d0 |test rs = 0 223 bnes add_ext_done 224 andib #0xfe,LOCAL_LO+3(%a0) |clear the l bit 225add_ext_done: 226 rts 227| 228| ADD DOUBLE 229| 230add_dbl: 231 addl #ad_1_dbl,LOCAL_LO(%a0) 232 bccs dcc_clr 233 addql #1,LOCAL_HI(%a0) |propagate carry 234 bccs dcc_clr 235 roxrw LOCAL_HI(%a0) |mant is 0 so restore v-bit 236 roxrw LOCAL_HI+2(%a0) |mant is 0 so restore v-bit 237 roxrw LOCAL_LO(%a0) 238 roxrw LOCAL_LO+2(%a0) 239 addw #0x1,LOCAL_EX(%a0) |incr exponent 240dcc_clr: 241 tstl %d0 |test for rs = 0 242 bnes dbl_done 243 andiw #0xf000,LOCAL_LO+2(%a0) |clear the l-bit 244 245dbl_done: 246 andil #0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit 247 rts 248 249error: 250 rts 251| 252| Truncate all other bits 253| 254trunct: 255 .long end_rnd 256 .long sgl_done 257 .long dbl_done 258 .long dbl_done 259 260truncate: 261 lea trunct,%a1 262 movel (%a1,%d1.w*4),%a1 263 jmp (%a1) 264 265end_rnd: 266 rts 267 268| 269| NORMALIZE 270| 271| These routines (nrm_zero & nrm_set) normalize the unnorm. This 272| is done by shifting the mantissa left while decrementing the 273| exponent. 274| 275| NRM_SET shifts and decrements until there is a 1 set in the integer 276| bit of the mantissa (msb in d1). 277| 278| NRM_ZERO shifts and decrements until there is a 1 set in the integer 279| bit of the mantissa (msb in d1) unless this would mean the exponent 280| would go less than 0. In that case the number becomes a denorm - the 281| exponent (d0) is set to 0 and the mantissa (d1 & d2) is not 282| normalized. 283| 284| Note that both routines have been optimized (for the worst case) and 285| therefore do not have the easy to follow decrement/shift loop. 286| 287| NRM_ZERO 288| 289| Distance to first 1 bit in mantissa = X 290| Distance to 0 from exponent = Y 291| If X < Y 292| Then 293| nrm_set 294| Else 295| shift mantissa by Y 296| set exponent = 0 297| 298|input: 299| FP_SCR1 = exponent, ms mantissa part, ls mantissa part 300|output: 301| L_SCR1{4} = fpte15 or ete15 bit 302| 303 .global nrm_zero 304nrm_zero: 305 movew LOCAL_EX(%a0),%d0 306 cmpw #64,%d0 |see if exp > 64 307 bmis d0_less 308 bsr nrm_set |exp > 64 so exp won't exceed 0 309 rts 310d0_less: 311 moveml %d2/%d3/%d5/%d6,-(%a7) 312 movel LOCAL_HI(%a0),%d1 313 movel LOCAL_LO(%a0),%d2 314 315 bfffo %d1{#0:#32},%d3 |get the distance to the first 1 316| ;in ms mant 317 beqs ms_clr |branch if no bits were set 318 cmpw %d3,%d0 |of X>Y 319 bmis greater |then exp will go past 0 (neg) if 320| ;it is just shifted 321 bsr nrm_set |else exp won't go past 0 322 moveml (%a7)+,%d2/%d3/%d5/%d6 323 rts 324greater: 325 movel %d2,%d6 |save ls mant in d6 326 lsll %d0,%d2 |shift ls mant by count 327 lsll %d0,%d1 |shift ms mant by count 328 movel #32,%d5 329 subl %d0,%d5 |make op a denorm by shifting bits 330 lsrl %d5,%d6 |by the number in the exp, then 331| ;set exp = 0. 332 orl %d6,%d1 |shift the ls mant bits into the ms mant 333 movel #0,%d0 |same as if decremented exp to 0 334| ;while shifting 335 movew %d0,LOCAL_EX(%a0) 336 movel %d1,LOCAL_HI(%a0) 337 movel %d2,LOCAL_LO(%a0) 338 moveml (%a7)+,%d2/%d3/%d5/%d6 339 rts 340ms_clr: 341 bfffo %d2{#0:#32},%d3 |check if any bits set in ls mant 342 beqs all_clr |branch if none set 343 addw #32,%d3 344 cmpw %d3,%d0 |if X>Y 345 bmis greater |then branch 346 bsr nrm_set |else exp won't go past 0 347 moveml (%a7)+,%d2/%d3/%d5/%d6 348 rts 349all_clr: 350 movew #0,LOCAL_EX(%a0) |no mantissa bits set. Set exp = 0. 351 moveml (%a7)+,%d2/%d3/%d5/%d6 352 rts 353| 354| NRM_SET 355| 356 .global nrm_set 357nrm_set: 358 movel %d7,-(%a7) 359 bfffo LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7) 360 beqs lower |branch if ms mant is all 0's 361 362 movel %d6,-(%a7) 363 364 subw %d7,LOCAL_EX(%a0) |sub exponent by count 365 movel LOCAL_HI(%a0),%d0 |d0 has ms mant 366 movel LOCAL_LO(%a0),%d1 |d1 has ls mant 367 368 lsll %d7,%d0 |shift first 1 to j bit position 369 movel %d1,%d6 |copy ls mant into d6 370 lsll %d7,%d6 |shift ls mant by count 371 movel %d6,LOCAL_LO(%a0) |store ls mant into memory 372 moveql #32,%d6 373 subl %d7,%d6 |continue shift 374 lsrl %d6,%d1 |shift off all bits but those that will 375| ;be shifted into ms mant 376 orl %d1,%d0 |shift the ls mant bits into the ms mant 377 movel %d0,LOCAL_HI(%a0) |store ms mant into memory 378 moveml (%a7)+,%d7/%d6 |restore registers 379 rts 380 381| 382| We get here if ms mant was = 0, and we assume ls mant has bits 383| set (otherwise this would have been tagged a zero not a denorm). 384| 385lower: 386 movew LOCAL_EX(%a0),%d0 |d0 has exponent 387 movel LOCAL_LO(%a0),%d1 |d1 has ls mant 388 subw #32,%d0 |account for ms mant being all zeros 389 bfffo %d1{#0:#32},%d7 |find first 1 in ls mant to d7) 390 subw %d7,%d0 |subtract shift count from exp 391 lsll %d7,%d1 |shift first 1 to integer bit in ms mant 392 movew %d0,LOCAL_EX(%a0) |store ms mant 393 movel %d1,LOCAL_HI(%a0) |store exp 394 clrl LOCAL_LO(%a0) |clear ls mant 395 movel (%a7)+,%d7 396 rts 397| 398| denorm --- denormalize an intermediate result 399| 400| Used by underflow. 401| 402| Input: 403| a0 points to the operand to be denormalized 404| (in the internal extended format) 405| 406| d0: rounding precision 407| Output: 408| a0 points to the denormalized result 409| (in the internal extended format) 410| 411| d0 is guard,round,sticky 412| 413| d0 comes into this routine with the rounding precision. It 414| is then loaded with the denormalized exponent threshold for the 415| rounding precision. 416| 417 418 .global denorm 419denorm: 420 btstb #6,LOCAL_EX(%a0) |check for exponents between $7fff-$4000 421 beqs no_sgn_ext 422 bsetb #7,LOCAL_EX(%a0) |sign extend if it is so 423no_sgn_ext: 424 425 cmpib #0,%d0 |if 0 then extended precision 426 bnes not_ext |else branch 427 428 clrl %d1 |load d1 with ext threshold 429 clrl %d0 |clear the sticky flag 430 bsr dnrm_lp |denormalize the number 431 tstb %d1 |check for inex 432 beq no_inex |if clr, no inex 433 bras dnrm_inex |if set, set inex 434 435not_ext: 436 cmpil #1,%d0 |if 1 then single precision 437 beqs load_sgl |else must be 2, double prec 438 439load_dbl: 440 movew #dbl_thresh,%d1 |put copy of threshold in d1 441 movel %d1,%d0 |copy d1 into d0 442 subw LOCAL_EX(%a0),%d0 |diff = threshold - exp 443 cmpw #67,%d0 |if diff > 67 (mant + grs bits) 444 bpls chk_stky |then branch (all bits would be 445| ; shifted off in denorm routine) 446 clrl %d0 |else clear the sticky flag 447 bsr dnrm_lp |denormalize the number 448 tstb %d1 |check flag 449 beqs no_inex |if clr, no inex 450 bras dnrm_inex |if set, set inex 451 452load_sgl: 453 movew #sgl_thresh,%d1 |put copy of threshold in d1 454 movel %d1,%d0 |copy d1 into d0 455 subw LOCAL_EX(%a0),%d0 |diff = threshold - exp 456 cmpw #67,%d0 |if diff > 67 (mant + grs bits) 457 bpls chk_stky |then branch (all bits would be 458| ; shifted off in denorm routine) 459 clrl %d0 |else clear the sticky flag 460 bsr dnrm_lp |denormalize the number 461 tstb %d1 |check flag 462 beqs no_inex |if clr, no inex 463 bras dnrm_inex |if set, set inex 464 465chk_stky: 466 tstl LOCAL_HI(%a0) |check for any bits set 467 bnes set_stky 468 tstl LOCAL_LO(%a0) |check for any bits set 469 bnes set_stky 470 bras clr_mant 471set_stky: 472 orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex 473 movel #0x20000000,%d0 |set sticky bit in return value 474clr_mant: 475 movew %d1,LOCAL_EX(%a0) |load exp with threshold 476 movel #0,LOCAL_HI(%a0) |set d1 = 0 (ms mantissa) 477 movel #0,LOCAL_LO(%a0) |set d2 = 0 (ms mantissa) 478 rts 479dnrm_inex: 480 orl #inx2a_mask,USER_FPSR(%a6) |set inex2/ainex 481no_inex: 482 rts 483 484| 485| dnrm_lp --- normalize exponent/mantissa to specified threshold 486| 487| Input: 488| a0 points to the operand to be denormalized 489| d0{31:29} initial guard,round,sticky 490| d1{15:0} denormalization threshold 491| Output: 492| a0 points to the denormalized operand 493| d0{31:29} final guard,round,sticky 494| d1.b inexact flag: all ones means inexact result 495| 496| The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2 497| so that bfext can be used to extract the new low part of the mantissa. 498| Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there 499| is no LOCAL_GRS scratch word following it on the fsave frame. 500| 501 .global dnrm_lp 502dnrm_lp: 503 movel %d2,-(%sp) |save d2 for temp use 504 btstb #E3,E_BYTE(%a6) |test for type E3 exception 505 beqs not_E3 |not type E3 exception 506 bfextu WBTEMP_GRS(%a6){#6:#3},%d2 |extract guard,round, sticky bit 507 movel #29,%d0 508 lsll %d0,%d2 |shift g,r,s to their positions 509 movel %d2,%d0 510not_E3: 511 movel (%sp)+,%d2 |restore d2 512 movel LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6) 513 movel %d0,FP_SCR2+LOCAL_GRS(%a6) 514 movel %d1,%d0 |copy the denorm threshold 515 subw LOCAL_EX(%a0),%d1 |d1 = threshold - uns exponent 516 bles no_lp |d1 <= 0 517 cmpw #32,%d1 518 blts case_1 |0 = d1 < 32 519 cmpw #64,%d1 520 blts case_2 |32 <= d1 < 64 521 bra case_3 |d1 >= 64 522| 523| No normalization necessary 524| 525no_lp: 526 clrb %d1 |set no inex2 reported 527 movel FP_SCR2+LOCAL_GRS(%a6),%d0 |restore original g,r,s 528 rts 529| 530| case (0<d1<32) 531| 532case_1: 533 movel %d2,-(%sp) 534 movew %d0,LOCAL_EX(%a0) |exponent = denorm threshold 535 movel #32,%d0 536 subw %d1,%d0 |d0 = 32 - d1 537 bfextu LOCAL_EX(%a0){%d0:#32},%d2 538 bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_HI 539 bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new LOCAL_LO 540 bfextu FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0 |d0 = new G,R,S 541 movel %d2,LOCAL_HI(%a0) |store new LOCAL_HI 542 movel %d1,LOCAL_LO(%a0) |store new LOCAL_LO 543 clrb %d1 544 bftst %d0{#2:#30} 545 beqs c1nstky 546 bsetl #rnd_stky_bit,%d0 547 st %d1 548c1nstky: 549 movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s 550 andil #0xe0000000,%d2 |clear all but G,R,S 551 tstl %d2 |test if original G,R,S are clear 552 beqs grs_clear 553 orl #0x20000000,%d0 |set sticky bit in d0 554grs_clear: 555 andil #0xe0000000,%d0 |clear all but G,R,S 556 movel (%sp)+,%d2 557 rts 558| 559| case (32<=d1<64) 560| 561case_2: 562 movel %d2,-(%sp) 563 movew %d0,LOCAL_EX(%a0) |unsigned exponent = threshold 564 subw #32,%d1 |d1 now between 0 and 32 565 movel #32,%d0 566 subw %d1,%d0 |d0 = 32 - d1 567 bfextu LOCAL_EX(%a0){%d0:#32},%d2 568 bfextu %d2{%d1:%d0},%d2 |d2 = new LOCAL_LO 569 bfextu LOCAL_HI(%a0){%d0:#32},%d1 |d1 = new G,R,S 570 bftst %d1{#2:#30} 571 bnes c2_sstky |bra if sticky bit to be set 572 bftst FP_SCR2+LOCAL_LO(%a6){%d0:#32} 573 bnes c2_sstky |bra if sticky bit to be set 574 movel %d1,%d0 575 clrb %d1 576 bras end_c2 577c2_sstky: 578 movel %d1,%d0 579 bsetl #rnd_stky_bit,%d0 580 st %d1 581end_c2: 582 clrl LOCAL_HI(%a0) |store LOCAL_HI = 0 583 movel %d2,LOCAL_LO(%a0) |store LOCAL_LO 584 movel FP_SCR2+LOCAL_GRS(%a6),%d2 |restore original g,r,s 585 andil #0xe0000000,%d2 |clear all but G,R,S 586 tstl %d2 |test if original G,R,S are clear 587 beqs clear_grs 588 orl #0x20000000,%d0 |set sticky bit in d0 589clear_grs: 590 andil #0xe0000000,%d0 |get rid of all but G,R,S 591 movel (%sp)+,%d2 592 rts 593| 594| d1 >= 64 Force the exponent to be the denorm threshold with the 595| correct sign. 596| 597case_3: 598 movew %d0,LOCAL_EX(%a0) 599 tstw LOCAL_SGN(%a0) 600 bges c3con 601c3neg: 602 orl #0x80000000,LOCAL_EX(%a0) 603c3con: 604 cmpw #64,%d1 605 beqs sixty_four 606 cmpw #65,%d1 607 beqs sixty_five 608| 609| Shift value is out of range. Set d1 for inex2 flag and 610| return a zero with the given threshold. 611| 612 clrl LOCAL_HI(%a0) 613 clrl LOCAL_LO(%a0) 614 movel #0x20000000,%d0 615 st %d1 616 rts 617 618sixty_four: 619 movel LOCAL_HI(%a0),%d0 620 bfextu %d0{#2:#30},%d1 621 andil #0xc0000000,%d0 622 bras c3com 623 624sixty_five: 625 movel LOCAL_HI(%a0),%d0 626 bfextu %d0{#1:#31},%d1 627 andil #0x80000000,%d0 628 lsrl #1,%d0 |shift high bit into R bit 629 630c3com: 631 tstl %d1 632 bnes c3ssticky 633 tstl LOCAL_LO(%a0) 634 bnes c3ssticky 635 tstb FP_SCR2+LOCAL_GRS(%a6) 636 bnes c3ssticky 637 clrb %d1 638 bras c3end 639 640c3ssticky: 641 bsetl #rnd_stky_bit,%d0 642 st %d1 643c3end: 644 clrl LOCAL_HI(%a0) 645 clrl LOCAL_LO(%a0) 646 rts 647 648 |end 649