1 /* 2 * QEMU float support macros 3 * 4 * The code in this source file is derived from release 2a of the SoftFloat 5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and 6 * some later contributions) are provided under that license, as detailed below. 7 * It has subsequently been modified by contributors to the QEMU Project, 8 * so some portions are provided under: 9 * the SoftFloat-2a license 10 * the BSD license 11 * 12 * Any future contributions to this file after December 1st 2014 will be 13 * taken to be licensed under the Softfloat-2a license unless specifically 14 * indicated otherwise. 15 */ 16 17 /* 18 =============================================================================== 19 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 20 Arithmetic Package, Release 2a. 21 22 Written by John R. Hauser. This work was made possible in part by the 23 International Computer Science Institute, located at Suite 600, 1947 Center 24 Street, Berkeley, California 94704. Funding was partially provided by the 25 National Science Foundation under grant MIP-9311980. The original version 26 of this code was written as part of a project to build a fixed-point vector 27 processor in collaboration with the University of California at Berkeley, 28 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 29 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 30 arithmetic/SoftFloat.html'. 31 32 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 33 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 34 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 35 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 36 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 37 38 Derivative works are acceptable, even for commercial purposes, so long as 39 (1) they include prominent notice that the work is derivative, and (2) they 40 include prominent notice akin to these four paragraphs for those parts of 41 this code that are retained. 42 43 =============================================================================== 44 */ 45 46 /* BSD licensing: 47 * Copyright (c) 2006, Fabrice Bellard 48 * All rights reserved. 49 * 50 * Redistribution and use in source and binary forms, with or without 51 * modification, are permitted provided that the following conditions are met: 52 * 53 * 1. Redistributions of source code must retain the above copyright notice, 54 * this list of conditions and the following disclaimer. 55 * 56 * 2. Redistributions in binary form must reproduce the above copyright notice, 57 * this list of conditions and the following disclaimer in the documentation 58 * and/or other materials provided with the distribution. 59 * 60 * 3. Neither the name of the copyright holder nor the names of its contributors 61 * may be used to endorse or promote products derived from this software without 62 * specific prior written permission. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 65 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 68 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 69 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 70 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 71 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 72 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 73 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 74 * THE POSSIBILITY OF SUCH DAMAGE. 75 */ 76 77 #ifndef FPU_SOFTFLOAT_MACROS_H 78 #define FPU_SOFTFLOAT_MACROS_H 79 80 #include "fpu/softfloat-types.h" 81 #include "qemu/host-utils.h" 82 83 /** 84 * shl_double: double-word merging left shift 85 * @l: left or most-significant word 86 * @r: right or least-significant word 87 * @c: shift count 88 * 89 * Shift @l left by @c bits, shifting in bits from @r. 90 */ 91 static inline uint64_t shl_double(uint64_t l, uint64_t r, int c) 92 { 93 #if defined(__x86_64__) 94 asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c)); 95 return l; 96 #else 97 return c ? (l << c) | (r >> (64 - c)) : l; 98 #endif 99 } 100 101 /** 102 * shr_double: double-word merging right shift 103 * @l: left or most-significant word 104 * @r: right or least-significant word 105 * @c: shift count 106 * 107 * Shift @r right by @c bits, shifting in bits from @l. 108 */ 109 static inline uint64_t shr_double(uint64_t l, uint64_t r, int c) 110 { 111 #if defined(__x86_64__) 112 asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c)); 113 return r; 114 #else 115 return c ? (r >> c) | (l << (64 - c)) : r; 116 #endif 117 } 118 119 /*---------------------------------------------------------------------------- 120 | Shifts `a' right by the number of bits given in `count'. If any nonzero 121 | bits are shifted off, they are ``jammed'' into the least significant bit of 122 | the result by setting the least significant bit to 1. The value of `count' 123 | can be arbitrarily large; in particular, if `count' is greater than 32, the 124 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 125 | The result is stored in the location pointed to by `zPtr'. 126 *----------------------------------------------------------------------------*/ 127 128 static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr) 129 { 130 uint32_t z; 131 132 if ( count == 0 ) { 133 z = a; 134 } 135 else if ( count < 32 ) { 136 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 137 } 138 else { 139 z = ( a != 0 ); 140 } 141 *zPtr = z; 142 143 } 144 145 /*---------------------------------------------------------------------------- 146 | Shifts `a' right by the number of bits given in `count'. If any nonzero 147 | bits are shifted off, they are ``jammed'' into the least significant bit of 148 | the result by setting the least significant bit to 1. The value of `count' 149 | can be arbitrarily large; in particular, if `count' is greater than 64, the 150 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. 151 | The result is stored in the location pointed to by `zPtr'. 152 *----------------------------------------------------------------------------*/ 153 154 static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr) 155 { 156 uint64_t z; 157 158 if ( count == 0 ) { 159 z = a; 160 } 161 else if ( count < 64 ) { 162 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 163 } 164 else { 165 z = ( a != 0 ); 166 } 167 *zPtr = z; 168 169 } 170 171 /*---------------------------------------------------------------------------- 172 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 173 | _plus_ the number of bits given in `count'. The shifted result is at most 174 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 175 | bits shifted off form a second 64-bit result as follows: The _last_ bit 176 | shifted off is the most-significant bit of the extra result, and the other 177 | 63 bits of the extra result are all zero if and only if _all_but_the_last_ 178 | bits shifted off were all zero. This extra result is stored in the location 179 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 180 | (This routine makes more sense if `a0' and `a1' are considered to form a 181 | fixed-point value with binary point between `a0' and `a1'. This fixed-point 182 | value is shifted right by the number of bits given in `count', and the 183 | integer part of the result is returned at the location pointed to by 184 | `z0Ptr'. The fractional part of the result may be slightly corrupted as 185 | described above, and is returned at the location pointed to by `z1Ptr'.) 186 *----------------------------------------------------------------------------*/ 187 188 static inline void 189 shift64ExtraRightJamming( 190 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) 191 { 192 uint64_t z0, z1; 193 int8_t negCount = ( - count ) & 63; 194 195 if ( count == 0 ) { 196 z1 = a1; 197 z0 = a0; 198 } 199 else if ( count < 64 ) { 200 z1 = ( a0<<negCount ) | ( a1 != 0 ); 201 z0 = a0>>count; 202 } 203 else { 204 if ( count == 64 ) { 205 z1 = a0 | ( a1 != 0 ); 206 } 207 else { 208 z1 = ( ( a0 | a1 ) != 0 ); 209 } 210 z0 = 0; 211 } 212 *z1Ptr = z1; 213 *z0Ptr = z0; 214 215 } 216 217 /*---------------------------------------------------------------------------- 218 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 219 | number of bits given in `count'. Any bits shifted off are lost. The value 220 | of `count' can be arbitrarily large; in particular, if `count' is greater 221 | than 128, the result will be 0. The result is broken into two 64-bit pieces 222 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 223 *----------------------------------------------------------------------------*/ 224 225 static inline void 226 shift128Right( 227 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) 228 { 229 uint64_t z0, z1; 230 int8_t negCount = ( - count ) & 63; 231 232 if ( count == 0 ) { 233 z1 = a1; 234 z0 = a0; 235 } 236 else if ( count < 64 ) { 237 z1 = ( a0<<negCount ) | ( a1>>count ); 238 z0 = a0>>count; 239 } 240 else { 241 z1 = (count < 128) ? (a0 >> (count & 63)) : 0; 242 z0 = 0; 243 } 244 *z1Ptr = z1; 245 *z0Ptr = z0; 246 247 } 248 249 /*---------------------------------------------------------------------------- 250 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 251 | number of bits given in `count'. If any nonzero bits are shifted off, they 252 | are ``jammed'' into the least significant bit of the result by setting the 253 | least significant bit to 1. The value of `count' can be arbitrarily large; 254 | in particular, if `count' is greater than 128, the result will be either 255 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or 256 | nonzero. The result is broken into two 64-bit pieces which are stored at 257 | the locations pointed to by `z0Ptr' and `z1Ptr'. 258 *----------------------------------------------------------------------------*/ 259 260 static inline void 261 shift128RightJamming( 262 uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) 263 { 264 uint64_t z0, z1; 265 int8_t negCount = ( - count ) & 63; 266 267 if ( count == 0 ) { 268 z1 = a1; 269 z0 = a0; 270 } 271 else if ( count < 64 ) { 272 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 273 z0 = a0>>count; 274 } 275 else { 276 if ( count == 64 ) { 277 z1 = a0 | ( a1 != 0 ); 278 } 279 else if ( count < 128 ) { 280 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 281 } 282 else { 283 z1 = ( ( a0 | a1 ) != 0 ); 284 } 285 z0 = 0; 286 } 287 *z1Ptr = z1; 288 *z0Ptr = z0; 289 290 } 291 292 /*---------------------------------------------------------------------------- 293 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 294 | by 64 _plus_ the number of bits given in `count'. The shifted result is 295 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are 296 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 297 | off form a third 64-bit result as follows: The _last_ bit shifted off is 298 | the most-significant bit of the extra result, and the other 63 bits of the 299 | extra result are all zero if and only if _all_but_the_last_ bits shifted off 300 | were all zero. This extra result is stored in the location pointed to by 301 | `z2Ptr'. The value of `count' can be arbitrarily large. 302 | (This routine makes more sense if `a0', `a1', and `a2' are considered 303 | to form a fixed-point value with binary point between `a1' and `a2'. This 304 | fixed-point value is shifted right by the number of bits given in `count', 305 | and the integer part of the result is returned at the locations pointed to 306 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 307 | corrupted as described above, and is returned at the location pointed to by 308 | `z2Ptr'.) 309 *----------------------------------------------------------------------------*/ 310 311 static inline void 312 shift128ExtraRightJamming( 313 uint64_t a0, 314 uint64_t a1, 315 uint64_t a2, 316 int count, 317 uint64_t *z0Ptr, 318 uint64_t *z1Ptr, 319 uint64_t *z2Ptr 320 ) 321 { 322 uint64_t z0, z1, z2; 323 int8_t negCount = ( - count ) & 63; 324 325 if ( count == 0 ) { 326 z2 = a2; 327 z1 = a1; 328 z0 = a0; 329 } 330 else { 331 if ( count < 64 ) { 332 z2 = a1<<negCount; 333 z1 = ( a0<<negCount ) | ( a1>>count ); 334 z0 = a0>>count; 335 } 336 else { 337 if ( count == 64 ) { 338 z2 = a1; 339 z1 = a0; 340 } 341 else { 342 a2 |= a1; 343 if ( count < 128 ) { 344 z2 = a0<<negCount; 345 z1 = a0>>( count & 63 ); 346 } 347 else { 348 z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 349 z1 = 0; 350 } 351 } 352 z0 = 0; 353 } 354 z2 |= ( a2 != 0 ); 355 } 356 *z2Ptr = z2; 357 *z1Ptr = z1; 358 *z0Ptr = z0; 359 360 } 361 362 /*---------------------------------------------------------------------------- 363 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 364 | number of bits given in `count'. Any bits shifted off are lost. The value 365 | of `count' must be less than 64. The result is broken into two 64-bit 366 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 367 *----------------------------------------------------------------------------*/ 368 369 static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count, 370 uint64_t *z0Ptr, uint64_t *z1Ptr) 371 { 372 *z1Ptr = a1 << count; 373 *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63)); 374 } 375 376 /*---------------------------------------------------------------------------- 377 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 378 | number of bits given in `count'. Any bits shifted off are lost. The value 379 | of `count' may be greater than 64. The result is broken into two 64-bit 380 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 381 *----------------------------------------------------------------------------*/ 382 383 static inline void shift128Left(uint64_t a0, uint64_t a1, int count, 384 uint64_t *z0Ptr, uint64_t *z1Ptr) 385 { 386 if (count < 64) { 387 *z1Ptr = a1 << count; 388 *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63)); 389 } else { 390 *z1Ptr = 0; 391 *z0Ptr = a1 << (count - 64); 392 } 393 } 394 395 /*---------------------------------------------------------------------------- 396 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left 397 | by the number of bits given in `count'. Any bits shifted off are lost. 398 | The value of `count' must be less than 64. The result is broken into three 399 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 400 | `z1Ptr', and `z2Ptr'. 401 *----------------------------------------------------------------------------*/ 402 403 static inline void 404 shortShift192Left( 405 uint64_t a0, 406 uint64_t a1, 407 uint64_t a2, 408 int count, 409 uint64_t *z0Ptr, 410 uint64_t *z1Ptr, 411 uint64_t *z2Ptr 412 ) 413 { 414 uint64_t z0, z1, z2; 415 int8_t negCount; 416 417 z2 = a2<<count; 418 z1 = a1<<count; 419 z0 = a0<<count; 420 if ( 0 < count ) { 421 negCount = ( ( - count ) & 63 ); 422 z1 |= a2>>negCount; 423 z0 |= a1>>negCount; 424 } 425 *z2Ptr = z2; 426 *z1Ptr = z1; 427 *z0Ptr = z0; 428 429 } 430 431 /*---------------------------------------------------------------------------- 432 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 433 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 434 | any carry out is lost. The result is broken into two 64-bit pieces which 435 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 436 *----------------------------------------------------------------------------*/ 437 438 static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, 439 uint64_t *z0Ptr, uint64_t *z1Ptr) 440 { 441 bool c = 0; 442 *z1Ptr = uadd64_carry(a1, b1, &c); 443 *z0Ptr = uadd64_carry(a0, b0, &c); 444 } 445 446 /*---------------------------------------------------------------------------- 447 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 448 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 449 | modulo 2^192, so any carry out is lost. The result is broken into three 450 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 451 | `z1Ptr', and `z2Ptr'. 452 *----------------------------------------------------------------------------*/ 453 454 static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2, 455 uint64_t b0, uint64_t b1, uint64_t b2, 456 uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) 457 { 458 bool c = 0; 459 *z2Ptr = uadd64_carry(a2, b2, &c); 460 *z1Ptr = uadd64_carry(a1, b1, &c); 461 *z0Ptr = uadd64_carry(a0, b0, &c); 462 } 463 464 /*---------------------------------------------------------------------------- 465 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the 466 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 467 | 2^128, so any borrow out (carry out) is lost. The result is broken into two 468 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and 469 | `z1Ptr'. 470 *----------------------------------------------------------------------------*/ 471 472 static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, 473 uint64_t *z0Ptr, uint64_t *z1Ptr) 474 { 475 bool c = 0; 476 *z1Ptr = usub64_borrow(a1, b1, &c); 477 *z0Ptr = usub64_borrow(a0, b0, &c); 478 } 479 480 /*---------------------------------------------------------------------------- 481 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 482 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. 483 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The 484 | result is broken into three 64-bit pieces which are stored at the locations 485 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 486 *----------------------------------------------------------------------------*/ 487 488 static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2, 489 uint64_t b0, uint64_t b1, uint64_t b2, 490 uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) 491 { 492 bool c = 0; 493 *z2Ptr = usub64_borrow(a2, b2, &c); 494 *z1Ptr = usub64_borrow(a1, b1, &c); 495 *z0Ptr = usub64_borrow(a0, b0, &c); 496 } 497 498 /*---------------------------------------------------------------------------- 499 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken 500 | into two 64-bit pieces which are stored at the locations pointed to by 501 | `z0Ptr' and `z1Ptr'. 502 *----------------------------------------------------------------------------*/ 503 504 static inline void 505 mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr) 506 { 507 mulu64(z1Ptr, z0Ptr, a, b); 508 } 509 510 /*---------------------------------------------------------------------------- 511 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by 512 | `b' to obtain a 192-bit product. The product is broken into three 64-bit 513 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 514 | `z2Ptr'. 515 *----------------------------------------------------------------------------*/ 516 517 static inline void 518 mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b, 519 uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr) 520 { 521 uint64_t z0, z1, m1; 522 523 mul64To128(a1, b, &m1, z2Ptr); 524 mul64To128(a0, b, &z0, &z1); 525 add128(z0, z1, 0, m1, z0Ptr, z1Ptr); 526 } 527 528 /*---------------------------------------------------------------------------- 529 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the 530 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 531 | product. The product is broken into four 64-bit pieces which are stored at 532 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 533 *----------------------------------------------------------------------------*/ 534 535 static inline void mul128To256(uint64_t a0, uint64_t a1, 536 uint64_t b0, uint64_t b1, 537 uint64_t *z0Ptr, uint64_t *z1Ptr, 538 uint64_t *z2Ptr, uint64_t *z3Ptr) 539 { 540 uint64_t z0, z1, z2; 541 uint64_t m0, m1, m2, n1, n2; 542 543 mul64To128(a1, b0, &m1, &m2); 544 mul64To128(a0, b1, &n1, &n2); 545 mul64To128(a1, b1, &z2, z3Ptr); 546 mul64To128(a0, b0, &z0, &z1); 547 548 add192( 0, m1, m2, 0, n1, n2, &m0, &m1, &m2); 549 add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr); 550 } 551 552 /*---------------------------------------------------------------------------- 553 | Returns an approximation to the 64-bit integer quotient obtained by dividing 554 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The 555 | divisor `b' must be at least 2^63. If q is the exact quotient truncated 556 | toward zero, the approximation returned lies between q and q + 2 inclusive. 557 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit 558 | unsigned integer is returned. 559 *----------------------------------------------------------------------------*/ 560 561 static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b) 562 { 563 uint64_t b0, b1; 564 uint64_t rem0, rem1, term0, term1; 565 uint64_t z; 566 567 if ( b <= a0 ) return UINT64_C(0xFFFFFFFFFFFFFFFF); 568 b0 = b>>32; 569 z = ( b0<<32 <= a0 ) ? UINT64_C(0xFFFFFFFF00000000) : ( a0 / b0 )<<32; 570 mul64To128( b, z, &term0, &term1 ); 571 sub128( a0, a1, term0, term1, &rem0, &rem1 ); 572 while ( ( (int64_t) rem0 ) < 0 ) { 573 z -= UINT64_C(0x100000000); 574 b1 = b<<32; 575 add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 576 } 577 rem0 = ( rem0<<32 ) | ( rem1>>32 ); 578 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; 579 return z; 580 581 } 582 583 /*---------------------------------------------------------------------------- 584 | Returns an approximation to the square root of the 32-bit significand given 585 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 586 | `aExp' (the least significant bit) is 1, the integer returned approximates 587 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 588 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 589 | case, the approximation returned lies strictly within +/-2 of the exact 590 | value. 591 *----------------------------------------------------------------------------*/ 592 593 static inline uint32_t estimateSqrt32(int aExp, uint32_t a) 594 { 595 static const uint16_t sqrtOddAdjustments[] = { 596 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 597 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 598 }; 599 static const uint16_t sqrtEvenAdjustments[] = { 600 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 601 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 602 }; 603 int8_t index; 604 uint32_t z; 605 606 index = ( a>>27 ) & 15; 607 if ( aExp & 1 ) { 608 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; 609 z = ( ( a / z )<<14 ) + ( z<<15 ); 610 a >>= 1; 611 } 612 else { 613 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; 614 z = a / z + z; 615 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 616 if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 ); 617 } 618 return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 ); 619 620 } 621 622 /*---------------------------------------------------------------------------- 623 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' 624 | is equal to the 128-bit value formed by concatenating `b0' and `b1'. 625 | Otherwise, returns 0. 626 *----------------------------------------------------------------------------*/ 627 628 static inline bool eq128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 629 { 630 return a0 == b0 && a1 == b1; 631 } 632 633 /*---------------------------------------------------------------------------- 634 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 635 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'. 636 | Otherwise, returns 0. 637 *----------------------------------------------------------------------------*/ 638 639 static inline bool le128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 640 { 641 return a0 < b0 || (a0 == b0 && a1 <= b1); 642 } 643 644 /*---------------------------------------------------------------------------- 645 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 646 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, 647 | returns 0. 648 *----------------------------------------------------------------------------*/ 649 650 static inline bool lt128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 651 { 652 return a0 < b0 || (a0 == b0 && a1 < b1); 653 } 654 655 /*---------------------------------------------------------------------------- 656 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is 657 | not equal to the 128-bit value formed by concatenating `b0' and `b1'. 658 | Otherwise, returns 0. 659 *----------------------------------------------------------------------------*/ 660 661 static inline bool ne128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1) 662 { 663 return a0 != b0 || a1 != b1; 664 } 665 666 /* 667 * Similarly, comparisons of 192-bit values. 668 */ 669 670 static inline bool eq192(uint64_t a0, uint64_t a1, uint64_t a2, 671 uint64_t b0, uint64_t b1, uint64_t b2) 672 { 673 return ((a0 ^ b0) | (a1 ^ b1) | (a2 ^ b2)) == 0; 674 } 675 676 static inline bool le192(uint64_t a0, uint64_t a1, uint64_t a2, 677 uint64_t b0, uint64_t b1, uint64_t b2) 678 { 679 if (a0 != b0) { 680 return a0 < b0; 681 } 682 if (a1 != b1) { 683 return a1 < b1; 684 } 685 return a2 <= b2; 686 } 687 688 static inline bool lt192(uint64_t a0, uint64_t a1, uint64_t a2, 689 uint64_t b0, uint64_t b1, uint64_t b2) 690 { 691 if (a0 != b0) { 692 return a0 < b0; 693 } 694 if (a1 != b1) { 695 return a1 < b1; 696 } 697 return a2 < b2; 698 } 699 700 #endif 701