1 /* 2 * QEMU float support macros 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * GPL-v2-or-later 12 * 13 * Any future contributions to this file after December 1st 2014 will be 14 * taken to be licensed under the Softfloat-2a license unless specifically 15 * indicated otherwise. 16 */ 17 18 /* 19 =============================================================================== 20 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 21 Arithmetic Package, Release 2a. 22 23 Written by John R. Hauser. This work was made possible in part by the 24 International Computer Science Institute, located at Suite 600, 1947 Center 25 Street, Berkeley, California 94704. Funding was partially provided by the 26 National Science Foundation under grant MIP-9311980. The original version 27 of this code was written as part of a project to build a fixed-point vector 28 processor in collaboration with the University of California at Berkeley, 29 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 30 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 31 arithmetic/SoftFloat.html'. 32 33 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 34 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 35 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 36 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 37 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 38 39 Derivative works are acceptable, even for commercial purposes, so long as 40 (1) they include prominent notice that the work is derivative, and (2) they 41 include prominent notice akin to these four paragraphs for those parts of 42 this code that are retained. 43 44 =============================================================================== 45 */ 46 47 /* BSD licensing: 48 * Copyright (c) 2006, Fabrice Bellard 49 * All rights reserved. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions are met: 53 * 54 * 1. Redistributions of source code must retain the above copyright notice, 55 * this list of conditions and the following disclaimer. 56 * 57 * 2. Redistributions in binary form must reproduce the above copyright notice, 58 * this list of conditions and the following disclaimer in the documentation 59 * and/or other materials provided with the distribution. 60 * 61 * 3. Neither the name of the copyright holder nor the names of its contributors 62 * may be used to endorse or promote products derived from this software without 63 * specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 75 * THE POSSIBILITY OF SUCH DAMAGE. 76 */ 77 78 /* Portions of this work are licensed under the terms of the GNU GPL, 79 * version 2 or later. See the COPYING file in the top-level directory. 80 */ 81 82 /*---------------------------------------------------------------------------- 83 | This macro tests for minimum version of the GNU C compiler. 84 *----------------------------------------------------------------------------*/ 85 #if defined(__GNUC__) && defined(__GNUC_MINOR__) 86 # define SOFTFLOAT_GNUC_PREREQ(maj, min) \ 87 ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) 88 #else 89 # define SOFTFLOAT_GNUC_PREREQ(maj, min) 0 90 #endif 91 92 93 /*---------------------------------------------------------------------------- 94 | Shifts `a' right by the number of bits given in `count'. If any nonzero 95 | bits are shifted off, they are ``jammed'' into the least significant bit of 96 | the result by setting the least significant bit to 1. The value of `count' 97 | can be arbitrarily large; in particular, if `count' is greater than 32, the 98 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 99 | The result is stored in the location pointed to by `zPtr'. 100 *----------------------------------------------------------------------------*/ 101 102 static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr) 103 { 104 uint32_t z; 105 106 if ( count == 0 ) { 107 z = a; 108 } 109 else if ( count < 32 ) { 110 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 111 } 112 else { 113 z = ( a != 0 ); 114 } 115 *zPtr = z; 116 117 } 118 119 /*---------------------------------------------------------------------------- 120 | Shifts `a' right by the number of bits given in `count'. If any nonzero 121 | bits are shifted off, they are ``jammed'' into the least significant bit of 122 | the result by setting the least significant bit to 1. The value of `count' 123 | can be arbitrarily large; in particular, if `count' is greater than 64, the 124 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 125 | The result is stored in the location pointed to by `zPtr'. 126 *----------------------------------------------------------------------------*/ 127 128 static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr) 129 { 130 uint64_t z; 131 132 if ( count == 0 ) { 133 z = a; 134 } 135 else if ( count < 64 ) { 136 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 137 } 138 else { 139 z = ( a != 0 ); 140 } 141 *zPtr = z; 142 143 } 144 145 /*---------------------------------------------------------------------------- 146 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 147 | _plus_ the number of bits given in `count'. The shifted result is at most 148 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 149 | bits shifted off form a second 64-bit result as follows: The _last_ bit 150 | shifted off is the most-significant bit of the extra result, and the other 151 | 63 bits of the extra result are all zero if and only if _all_but_the_last_ 152 | bits shifted off were all zero. This extra result is stored in the location 153 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 154 | (This routine makes more sense if `a0' and `a1' are considered to form a 155 | fixed-point value with binary point between `a0' and `a1'. This fixed-point 156 | value is shifted right by the number of bits given in `count', and the 157 | integer part of the result is returned at the location pointed to by 158 | `z0Ptr'. The fractional part of the result may be slightly corrupted as 159 | described above, and is returned at the location pointed to by `z1Ptr'.) 160 *----------------------------------------------------------------------------*/ 161 162 static inline void 163 shift64ExtraRightJamming( 164 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) 165 { 166 uint64_t z0, z1; 167 int8_t negCount = ( - count ) & 63; 168 169 if ( count == 0 ) { 170 z1 = a1; 171 z0 = a0; 172 } 173 else if ( count < 64 ) { 174 z1 = ( a0<<negCount ) | ( a1 != 0 ); 175 z0 = a0>>count; 176 } 177 else { 178 if ( count == 64 ) { 179 z1 = a0 | ( a1 != 0 ); 180 } 181 else { 182 z1 = ( ( a0 | a1 ) != 0 ); 183 } 184 z0 = 0; 185 } 186 *z1Ptr = z1; 187 *z0Ptr = z0; 188 189 } 190 191 /*---------------------------------------------------------------------------- 192 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 193 | number of bits given in `count'. Any bits shifted off are lost. The value 194 | of `count' can be arbitrarily large; in particular, if `count' is greater 195 | than 128, the result will be 0. The result is broken into two 64-bit pieces 196 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 197 *----------------------------------------------------------------------------*/ 198 199 static inline void 200 shift128Right( 201 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) 202 { 203 uint64_t z0, z1; 204 int8_t negCount = ( - count ) & 63; 205 206 if ( count == 0 ) { 207 z1 = a1; 208 z0 = a0; 209 } 210 else if ( count < 64 ) { 211 z1 = ( a0<<negCount ) | ( a1>>count ); 212 z0 = a0>>count; 213 } 214 else { 215 z1 = (count < 128) ? (a0 >> (count & 63)) : 0; 216 z0 = 0; 217 } 218 *z1Ptr = z1; 219 *z0Ptr = z0; 220 221 } 222 223 /*---------------------------------------------------------------------------- 224 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 225 | number of bits given in `count'. If any nonzero bits are shifted off, they 226 | are ``jammed'' into the least significant bit of the result by setting the 227 | least significant bit to 1. The value of `count' can be arbitrarily large; 228 | in particular, if `count' is greater than 128, the result will be either 229 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or 230 | nonzero. The result is broken into two 64-bit pieces which are stored at 231 | the locations pointed to by `z0Ptr' and `z1Ptr'. 232 *----------------------------------------------------------------------------*/ 233 234 static inline void 235 shift128RightJamming( 236 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) 237 { 238 uint64_t z0, z1; 239 int8_t negCount = ( - count ) & 63; 240 241 if ( count == 0 ) { 242 z1 = a1; 243 z0 = a0; 244 } 245 else if ( count < 64 ) { 246 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 247 z0 = a0>>count; 248 } 249 else { 250 if ( count == 64 ) { 251 z1 = a0 | ( a1 != 0 ); 252 } 253 else if ( count < 128 ) { 254 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 255 } 256 else { 257 z1 = ( ( a0 | a1 ) != 0 ); 258 } 259 z0 = 0; 260 } 261 *z1Ptr = z1; 262 *z0Ptr = z0; 263 264 } 265 266 /*---------------------------------------------------------------------------- 267 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 268 | by 64 _plus_ the number of bits given in `count'. The shifted result is 269 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are 270 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 271 | off form a third 64-bit result as follows: The _last_ bit shifted off is 272 | the most-significant bit of the extra result, and the other 63 bits of the 273 | extra result are all zero if and only if _all_but_the_last_ bits shifted off 274 | were all zero. This extra result is stored in the location pointed to by 275 | `z2Ptr'. The value of `count' can be arbitrarily large. 276 | (This routine makes more sense if `a0', `a1', and `a2' are considered 277 | to form a fixed-point value with binary point between `a1' and `a2'. This 278 | fixed-point value is shifted right by the number of bits given in `count', 279 | and the integer part of the result is returned at the locations pointed to 280 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 281 | corrupted as described above, and is returned at the location pointed to by 282 | `z2Ptr'.) 283 *----------------------------------------------------------------------------*/ 284 285 static inline void 286 shift128ExtraRightJamming( 287 uint64_t a0, 288 uint64_t a1, 289 uint64_t a2, 290 int count, 291 uint64_t *z0Ptr, 292 uint64_t *z1Ptr, 293 uint64_t *z2Ptr 294 ) 295 { 296 uint64_t z0, z1, z2; 297 int8_t negCount = ( - count ) & 63; 298 299 if ( count == 0 ) { 300 z2 = a2; 301 z1 = a1; 302 z0 = a0; 303 } 304 else { 305 if ( count < 64 ) { 306 z2 = a1<<negCount; 307 z1 = ( a0<<negCount ) | ( a1>>count ); 308 z0 = a0>>count; 309 } 310 else { 311 if ( count == 64 ) { 312 z2 = a1; 313 z1 = a0; 314 } 315 else { 316 a2 |= a1; 317 if ( count < 128 ) { 318 z2 = a0<<negCount; 319 z1 = a0>>( count & 63 ); 320 } 321 else { 322 z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 323 z1 = 0; 324 } 325 } 326 z0 = 0; 327 } 328 z2 |= ( a2 != 0 ); 329 } 330 *z2Ptr = z2; 331 *z1Ptr = z1; 332 *z0Ptr = z0; 333 334 } 335 336 /*---------------------------------------------------------------------------- 337 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 338 | number of bits given in `count'. Any bits shifted off are lost. The value 339 | of `count' must be less than 64. The result is broken into two 64-bit 340 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 341 *----------------------------------------------------------------------------*/ 342 343 static inline void 344 shortShift128Left( 345 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) 346 { 347 348 *z1Ptr = a1<<count; 349 *z0Ptr = 350 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); 351 352 } 353 354 /*---------------------------------------------------------------------------- 355 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left 356 | by the number of bits given in `count'. Any bits shifted off are lost. 357 | The value of `count' must be less than 64. The result is broken into three 358 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 359 | `z1Ptr', and `z2Ptr'. 360 *----------------------------------------------------------------------------*/ 361 362 static inline void 363 shortShift192Left( 364 uint64_t a0, 365 uint64_t a1, 366 uint64_t a2, 367 int count, 368 uint64_t *z0Ptr, 369 uint64_t *z1Ptr, 370 uint64_t *z2Ptr 371 ) 372 { 373 uint64_t z0, z1, z2; 374 int8_t negCount; 375 376 z2 = a2<<count; 377 z1 = a1<<count; 378 z0 = a0<<count; 379 if ( 0 < count ) { 380 negCount = ( ( - count ) & 63 ); 381 z1 |= a2>>negCount; 382 z0 |= a1>>negCount; 383 } 384 *z2Ptr = z2; 385 *z1Ptr = z1; 386 *z0Ptr = z0; 387 388 } 389 390 /*---------------------------------------------------------------------------- 391 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 392 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 393 | any carry out is lost. The result is broken into two 64-bit pieces which 394 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 395 *----------------------------------------------------------------------------*/ 396 397 static inline void 398 add128( 399 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) 400 { 401 uint64_t z1; 402 403 z1 = a1 + b1; 404 *z1Ptr = z1; 405 *z0Ptr = a0 + b0 + ( z1 < a1 ); 406 407 } 408 409 /*---------------------------------------------------------------------------- 410 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 411 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 412 | modulo 2^192, so any carry out is lost. The result is broken into three 413 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 414 | `z1Ptr', and `z2Ptr'. 415 *----------------------------------------------------------------------------*/ 416 417 static inline void 418 add192( 419 uint64_t a0, 420 uint64_t a1, 421 uint64_t a2, 422 uint64_t b0, 423 uint64_t b1, 424 uint64_t b2, 425 uint64_t *z0Ptr, 426 uint64_t *z1Ptr, 427 uint64_t *z2Ptr 428 ) 429 { 430 uint64_t z0, z1, z2; 431 int8_t carry0, carry1; 432 433 z2 = a2 + b2; 434 carry1 = ( z2 < a2 ); 435 z1 = a1 + b1; 436 carry0 = ( z1 < a1 ); 437 z0 = a0 + b0; 438 z1 += carry1; 439 z0 += ( z1 < carry1 ); 440 z0 += carry0; 441 *z2Ptr = z2; 442 *z1Ptr = z1; 443 *z0Ptr = z0; 444 445 } 446 447 /*---------------------------------------------------------------------------- 448 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the 449 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 450 | 2^128, so any borrow out (carry out) is lost. The result is broken into two 451 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and 452 | `z1Ptr'. 453 *----------------------------------------------------------------------------*/ 454 455 static inline void 456 sub128( 457 uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr ) 458 { 459 460 *z1Ptr = a1 - b1; 461 *z0Ptr = a0 - b0 - ( a1 < b1 ); 462 463 } 464 465 /*---------------------------------------------------------------------------- 466 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 467 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. 468 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The 469 | result is broken into three 64-bit pieces which are stored at the locations 470 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 471 *----------------------------------------------------------------------------*/ 472 473 static inline void 474 sub192( 475 uint64_t a0, 476 uint64_t a1, 477 uint64_t a2, 478 uint64_t b0, 479 uint64_t b1, 480 uint64_t b2, 481 uint64_t *z0Ptr, 482 uint64_t *z1Ptr, 483 uint64_t *z2Ptr 484 ) 485 { 486 uint64_t z0, z1, z2; 487 int8_t borrow0, borrow1; 488 489 z2 = a2 - b2; 490 borrow1 = ( a2 < b2 ); 491 z1 = a1 - b1; 492 borrow0 = ( a1 < b1 ); 493 z0 = a0 - b0; 494 z0 -= ( z1 < borrow1 ); 495 z1 -= borrow1; 496 z0 -= borrow0; 497 *z2Ptr = z2; 498 *z1Ptr = z1; 499 *z0Ptr = z0; 500 501 } 502 503 /*---------------------------------------------------------------------------- 504 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken 505 | into two 64-bit pieces which are stored at the locations pointed to by 506 | `z0Ptr' and `z1Ptr'. 507 *----------------------------------------------------------------------------*/ 508 509 static inline void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr ) 510 { 511 uint32_t aHigh, aLow, bHigh, bLow; 512 uint64_t z0, zMiddleA, zMiddleB, z1; 513 514 aLow = a; 515 aHigh = a>>32; 516 bLow = b; 517 bHigh = b>>32; 518 z1 = ( (uint64_t) aLow ) * bLow; 519 zMiddleA = ( (uint64_t) aLow ) * bHigh; 520 zMiddleB = ( (uint64_t) aHigh ) * bLow; 521 z0 = ( (uint64_t) aHigh ) * bHigh; 522 zMiddleA += zMiddleB; 523 z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 524 zMiddleA <<= 32; 525 z1 += zMiddleA; 526 z0 += ( z1 < zMiddleA ); 527 *z1Ptr = z1; 528 *z0Ptr = z0; 529 530 } 531 532 /*---------------------------------------------------------------------------- 533 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by 534 | `b' to obtain a 192-bit product. The product is broken into three 64-bit 535 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 536 | `z2Ptr'. 537 *----------------------------------------------------------------------------*/ 538 539 static inline void 540 mul128By64To192( 541 uint64_t a0, 542 uint64_t a1, 543 uint64_t b, 544 uint64_t *z0Ptr, 545 uint64_t *z1Ptr, 546 uint64_t *z2Ptr 547 ) 548 { 549 uint64_t z0, z1, z2, more1; 550 551 mul64To128( a1, b, &z1, &z2 ); 552 mul64To128( a0, b, &z0, &more1 ); 553 add128( z0, more1, 0, z1, &z0, &z1 ); 554 *z2Ptr = z2; 555 *z1Ptr = z1; 556 *z0Ptr = z0; 557 558 } 559 560 /*---------------------------------------------------------------------------- 561 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the 562 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 563 | product. The product is broken into four 64-bit pieces which are stored at 564 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 565 *----------------------------------------------------------------------------*/ 566 567 static inline void 568 mul128To256( 569 uint64_t a0, 570 uint64_t a1, 571 uint64_t b0, 572 uint64_t b1, 573 uint64_t *z0Ptr, 574 uint64_t *z1Ptr, 575 uint64_t *z2Ptr, 576 uint64_t *z3Ptr 577 ) 578 { 579 uint64_t z0, z1, z2, z3; 580 uint64_t more1, more2; 581 582 mul64To128( a1, b1, &z2, &z3 ); 583 mul64To128( a1, b0, &z1, &more2 ); 584 add128( z1, more2, 0, z2, &z1, &z2 ); 585 mul64To128( a0, b0, &z0, &more1 ); 586 add128( z0, more1, 0, z1, &z0, &z1 ); 587 mul64To128( a0, b1, &more1, &more2 ); 588 add128( more1, more2, 0, z2, &more1, &z2 ); 589 add128( z0, z1, 0, more1, &z0, &z1 ); 590 *z3Ptr = z3; 591 *z2Ptr = z2; 592 *z1Ptr = z1; 593 *z0Ptr = z0; 594 595 } 596 597 /*---------------------------------------------------------------------------- 598 | Returns an approximation to the 64-bit integer quotient obtained by dividing 599 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The 600 | divisor `b' must be at least 2^63. If q is the exact quotient truncated 601 | toward zero, the approximation returned lies between q and q + 2 inclusive. 602 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit 603 | unsigned integer is returned. 604 *----------------------------------------------------------------------------*/ 605 606 static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b) 607 { 608 uint64_t b0, b1; 609 uint64_t rem0, rem1, term0, term1; 610 uint64_t z; 611 612 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 613 b0 = b>>32; 614 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; 615 mul64To128( b, z, &term0, &term1 ); 616 sub128( a0, a1, term0, term1, &rem0, &rem1 ); 617 while ( ( (int64_t) rem0 ) < 0 ) { 618 z -= LIT64( 0x100000000 ); 619 b1 = b<<32; 620 add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 621 } 622 rem0 = ( rem0<<32 ) | ( rem1>>32 ); 623 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; 624 return z; 625 626 } 627 628 /* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd 629 * (https://gmplib.org/repo/gmp/file/tip/longlong.h) 630 * 631 * Licensed under the GPLv2/LGPLv3 632 */ 633 static inline uint64_t div128To64(uint64_t n0, uint64_t n1, uint64_t d) 634 { 635 uint64_t d0, d1, q0, q1, r1, r0, m; 636 637 d0 = (uint32_t)d; 638 d1 = d >> 32; 639 640 r1 = n1 % d1; 641 q1 = n1 / d1; 642 m = q1 * d0; 643 r1 = (r1 << 32) | (n0 >> 32); 644 if (r1 < m) { 645 q1 -= 1; 646 r1 += d; 647 if (r1 >= d) { 648 if (r1 < m) { 649 q1 -= 1; 650 r1 += d; 651 } 652 } 653 } 654 r1 -= m; 655 656 r0 = r1 % d1; 657 q0 = r1 / d1; 658 m = q0 * d0; 659 r0 = (r0 << 32) | (uint32_t)n0; 660 if (r0 < m) { 661 q0 -= 1; 662 r0 += d; 663 if (r0 >= d) { 664 if (r0 < m) { 665 q0 -= 1; 666 r0 += d; 667 } 668 } 669 } 670 r0 -= m; 671 672 /* Return remainder in LSB */ 673 return (q1 << 32) | q0 | (r0 != 0); 674 } 675 676 /*---------------------------------------------------------------------------- 677 | Returns an approximation to the square root of the 32-bit significand given 678 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 679 | `aExp' (the least significant bit) is 1, the integer returned approximates 680 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 681 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 682 | case, the approximation returned lies strictly within +/-2 of the exact 683 | value. 684 *----------------------------------------------------------------------------*/ 685 686 static inline uint32_t estimateSqrt32(int aExp, uint32_t a) 687 { 688 static const uint16_t sqrtOddAdjustments[] = { 689 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 690 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 691 }; 692 static const uint16_t sqrtEvenAdjustments[] = { 693 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 694 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 695 }; 696 int8_t index; 697 uint32_t z; 698 699 index = ( a>>27 ) & 15; 700 if ( aExp & 1 ) { 701 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; 702 z = ( ( a / z )<<14 ) + ( z<<15 ); 703 a >>= 1; 704 } 705 else { 706 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; 707 z = a / z + z; 708 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 709 if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 ); 710 } 711 return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 ); 712 713 } 714 715 /*---------------------------------------------------------------------------- 716 | Returns the number of leading 0 bits before the most-significant 1 bit of 717 | `a'. If `a' is zero, 32 is returned. 718 *----------------------------------------------------------------------------*/ 719 720 static inline int8_t countLeadingZeros32(uint32_t a) 721 { 722 #if SOFTFLOAT_GNUC_PREREQ(3, 4) 723 if (a) { 724 return __builtin_clz(a); 725 } else { 726 return 32; 727 } 728 #else 729 static const int8_t countLeadingZerosHigh[] = { 730 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 731 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 732 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 733 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 734 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 735 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 736 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 737 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 738 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 739 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 740 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 741 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 742 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 743 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 744 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 745 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 746 }; 747 int8_t shiftCount; 748 749 shiftCount = 0; 750 if ( a < 0x10000 ) { 751 shiftCount += 16; 752 a <<= 16; 753 } 754 if ( a < 0x1000000 ) { 755 shiftCount += 8; 756 a <<= 8; 757 } 758 shiftCount += countLeadingZerosHigh[ a>>24 ]; 759 return shiftCount; 760 #endif 761 } 762 763 /*---------------------------------------------------------------------------- 764 | Returns the number of leading 0 bits before the most-significant 1 bit of 765 | `a'. If `a' is zero, 64 is returned. 766 *----------------------------------------------------------------------------*/ 767 768 static inline int8_t countLeadingZeros64(uint64_t a) 769 { 770 #if SOFTFLOAT_GNUC_PREREQ(3, 4) 771 if (a) { 772 return __builtin_clzll(a); 773 } else { 774 return 64; 775 } 776 #else 777 int8_t shiftCount; 778 779 shiftCount = 0; 780 if ( a < ( (uint64_t) 1 )<<32 ) { 781 shiftCount += 32; 782 } 783 else { 784 a >>= 32; 785 } 786 shiftCount += countLeadingZeros32( a ); 787 return shiftCount; 788 #endif 789 } 790 791 /*---------------------------------------------------------------------------- 792 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' 793 | is equal to the 128-bit value formed by concatenating `b0' and `b1'. 794 | Otherwise, returns 0. 795 *----------------------------------------------------------------------------*/ 796 797 static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 798 { 799 800 return ( a0 == b0 ) && ( a1 == b1 ); 801 802 } 803 804 /*---------------------------------------------------------------------------- 805 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 806 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'. 807 | Otherwise, returns 0. 808 *----------------------------------------------------------------------------*/ 809 810 static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 811 { 812 813 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 814 815 } 816 817 /*---------------------------------------------------------------------------- 818 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 819 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, 820 | returns 0. 821 *----------------------------------------------------------------------------*/ 822 823 static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 824 { 825 826 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 827 828 } 829 830 /*---------------------------------------------------------------------------- 831 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is 832 | not equal to the 128-bit value formed by concatenating `b0' and `b1'. 833 | Otherwise, returns 0. 834 *----------------------------------------------------------------------------*/ 835 836 static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) 837 { 838 839 return ( a0 != b0 ) || ( a1 != b1 ); 840 841 } 842