1 /* 2 * Utility compute operations used by translated code. 3 * 4 * Copyright (c) 2007 Thiemo Seufer 5 * Copyright (c) 2007 Jocelyn Mayer 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 /* Portions of this work are licensed under the terms of the GNU GPL, 27 * version 2 or later. See the COPYING file in the top-level directory. 28 */ 29 30 #ifndef HOST_UTILS_H 31 #define HOST_UTILS_H 32 33 #include "qemu/compiler.h" 34 #include "qemu/bswap.h" 35 #include "qemu/int128.h" 36 37 #ifdef CONFIG_INT128 38 static inline void mulu64(uint64_t *plow, uint64_t *phigh, 39 uint64_t a, uint64_t b) 40 { 41 __uint128_t r = (__uint128_t)a * b; 42 *plow = r; 43 *phigh = r >> 64; 44 } 45 46 static inline void muls64(uint64_t *plow, uint64_t *phigh, 47 int64_t a, int64_t b) 48 { 49 __int128_t r = (__int128_t)a * b; 50 *plow = r; 51 *phigh = r >> 64; 52 } 53 54 /* compute with 96 bit intermediate result: (a*b)/c */ 55 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) 56 { 57 return (__int128_t)a * b / c; 58 } 59 60 static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh, 61 uint64_t divisor) 62 { 63 __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow; 64 __uint128_t result = dividend / divisor; 65 66 *plow = result; 67 *phigh = result >> 64; 68 return dividend % divisor; 69 } 70 71 static inline int64_t divs128(uint64_t *plow, int64_t *phigh, 72 int64_t divisor) 73 { 74 __int128_t dividend = ((__int128_t)*phigh << 64) | *plow; 75 __int128_t result = dividend / divisor; 76 77 *plow = result; 78 *phigh = result >> 64; 79 return dividend % divisor; 80 } 81 #else 82 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b); 83 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b); 84 uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor); 85 int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor); 86 87 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c) 88 { 89 union { 90 uint64_t ll; 91 struct { 92 #if HOST_BIG_ENDIAN 93 uint32_t high, low; 94 #else 95 uint32_t low, high; 96 #endif 97 } l; 98 } u, res; 99 uint64_t rl, rh; 100 101 u.ll = a; 102 rl = (uint64_t)u.l.low * (uint64_t)b; 103 rh = (uint64_t)u.l.high * (uint64_t)b; 104 rh += (rl >> 32); 105 res.l.high = rh / c; 106 res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c; 107 return res.ll; 108 } 109 #endif 110 111 /** 112 * clz32 - count leading zeros in a 32-bit value. 113 * @val: The value to search 114 * 115 * Returns 32 if the value is zero. Note that the GCC builtin is 116 * undefined if the value is zero. 117 */ 118 static inline int clz32(uint32_t val) 119 { 120 return val ? __builtin_clz(val) : 32; 121 } 122 123 /** 124 * clo32 - count leading ones in a 32-bit value. 125 * @val: The value to search 126 * 127 * Returns 32 if the value is -1. 128 */ 129 static inline int clo32(uint32_t val) 130 { 131 return clz32(~val); 132 } 133 134 /** 135 * clz64 - count leading zeros in a 64-bit value. 136 * @val: The value to search 137 * 138 * Returns 64 if the value is zero. Note that the GCC builtin is 139 * undefined if the value is zero. 140 */ 141 static inline int clz64(uint64_t val) 142 { 143 return val ? __builtin_clzll(val) : 64; 144 } 145 146 /** 147 * clo64 - count leading ones in a 64-bit value. 148 * @val: The value to search 149 * 150 * Returns 64 if the value is -1. 151 */ 152 static inline int clo64(uint64_t val) 153 { 154 return clz64(~val); 155 } 156 157 /** 158 * ctz32 - count trailing zeros in a 32-bit value. 159 * @val: The value to search 160 * 161 * Returns 32 if the value is zero. Note that the GCC builtin is 162 * undefined if the value is zero. 163 */ 164 static inline int ctz32(uint32_t val) 165 { 166 return val ? __builtin_ctz(val) : 32; 167 } 168 169 /** 170 * cto32 - count trailing ones in a 32-bit value. 171 * @val: The value to search 172 * 173 * Returns 32 if the value is -1. 174 */ 175 static inline int cto32(uint32_t val) 176 { 177 return ctz32(~val); 178 } 179 180 /** 181 * ctz64 - count trailing zeros in a 64-bit value. 182 * @val: The value to search 183 * 184 * Returns 64 if the value is zero. Note that the GCC builtin is 185 * undefined if the value is zero. 186 */ 187 static inline int ctz64(uint64_t val) 188 { 189 return val ? __builtin_ctzll(val) : 64; 190 } 191 192 /** 193 * cto64 - count trailing ones in a 64-bit value. 194 * @val: The value to search 195 * 196 * Returns 64 if the value is -1. 197 */ 198 static inline int cto64(uint64_t val) 199 { 200 return ctz64(~val); 201 } 202 203 /** 204 * clrsb32 - count leading redundant sign bits in a 32-bit value. 205 * @val: The value to search 206 * 207 * Returns the number of bits following the sign bit that are equal to it. 208 * No special cases; output range is [0-31]. 209 */ 210 static inline int clrsb32(uint32_t val) 211 { 212 #if __has_builtin(__builtin_clrsb) || !defined(__clang__) 213 return __builtin_clrsb(val); 214 #else 215 return clz32(val ^ ((int32_t)val >> 1)) - 1; 216 #endif 217 } 218 219 /** 220 * clrsb64 - count leading redundant sign bits in a 64-bit value. 221 * @val: The value to search 222 * 223 * Returns the number of bits following the sign bit that are equal to it. 224 * No special cases; output range is [0-63]. 225 */ 226 static inline int clrsb64(uint64_t val) 227 { 228 #if __has_builtin(__builtin_clrsbll) || !defined(__clang__) 229 return __builtin_clrsbll(val); 230 #else 231 return clz64(val ^ ((int64_t)val >> 1)) - 1; 232 #endif 233 } 234 235 /** 236 * ctpop8 - count the population of one bits in an 8-bit value. 237 * @val: The value to search 238 */ 239 static inline int ctpop8(uint8_t val) 240 { 241 return __builtin_popcount(val); 242 } 243 244 /** 245 * ctpop16 - count the population of one bits in a 16-bit value. 246 * @val: The value to search 247 */ 248 static inline int ctpop16(uint16_t val) 249 { 250 return __builtin_popcount(val); 251 } 252 253 /** 254 * ctpop32 - count the population of one bits in a 32-bit value. 255 * @val: The value to search 256 */ 257 static inline int ctpop32(uint32_t val) 258 { 259 return __builtin_popcount(val); 260 } 261 262 /** 263 * ctpop64 - count the population of one bits in a 64-bit value. 264 * @val: The value to search 265 */ 266 static inline int ctpop64(uint64_t val) 267 { 268 return __builtin_popcountll(val); 269 } 270 271 /** 272 * revbit8 - reverse the bits in an 8-bit value. 273 * @x: The value to modify. 274 */ 275 static inline uint8_t revbit8(uint8_t x) 276 { 277 #if __has_builtin(__builtin_bitreverse8) 278 return __builtin_bitreverse8(x); 279 #else 280 /* Assign the correct nibble position. */ 281 x = ((x & 0xf0) >> 4) 282 | ((x & 0x0f) << 4); 283 /* Assign the correct bit position. */ 284 x = ((x & 0x88) >> 3) 285 | ((x & 0x44) >> 1) 286 | ((x & 0x22) << 1) 287 | ((x & 0x11) << 3); 288 return x; 289 #endif 290 } 291 292 /** 293 * revbit16 - reverse the bits in a 16-bit value. 294 * @x: The value to modify. 295 */ 296 static inline uint16_t revbit16(uint16_t x) 297 { 298 #if __has_builtin(__builtin_bitreverse16) 299 return __builtin_bitreverse16(x); 300 #else 301 /* Assign the correct byte position. */ 302 x = bswap16(x); 303 /* Assign the correct nibble position. */ 304 x = ((x & 0xf0f0) >> 4) 305 | ((x & 0x0f0f) << 4); 306 /* Assign the correct bit position. */ 307 x = ((x & 0x8888) >> 3) 308 | ((x & 0x4444) >> 1) 309 | ((x & 0x2222) << 1) 310 | ((x & 0x1111) << 3); 311 return x; 312 #endif 313 } 314 315 /** 316 * revbit32 - reverse the bits in a 32-bit value. 317 * @x: The value to modify. 318 */ 319 static inline uint32_t revbit32(uint32_t x) 320 { 321 #if __has_builtin(__builtin_bitreverse32) 322 return __builtin_bitreverse32(x); 323 #else 324 /* Assign the correct byte position. */ 325 x = bswap32(x); 326 /* Assign the correct nibble position. */ 327 x = ((x & 0xf0f0f0f0u) >> 4) 328 | ((x & 0x0f0f0f0fu) << 4); 329 /* Assign the correct bit position. */ 330 x = ((x & 0x88888888u) >> 3) 331 | ((x & 0x44444444u) >> 1) 332 | ((x & 0x22222222u) << 1) 333 | ((x & 0x11111111u) << 3); 334 return x; 335 #endif 336 } 337 338 /** 339 * revbit64 - reverse the bits in a 64-bit value. 340 * @x: The value to modify. 341 */ 342 static inline uint64_t revbit64(uint64_t x) 343 { 344 #if __has_builtin(__builtin_bitreverse64) 345 return __builtin_bitreverse64(x); 346 #else 347 /* Assign the correct byte position. */ 348 x = bswap64(x); 349 /* Assign the correct nibble position. */ 350 x = ((x & 0xf0f0f0f0f0f0f0f0ull) >> 4) 351 | ((x & 0x0f0f0f0f0f0f0f0full) << 4); 352 /* Assign the correct bit position. */ 353 x = ((x & 0x8888888888888888ull) >> 3) 354 | ((x & 0x4444444444444444ull) >> 1) 355 | ((x & 0x2222222222222222ull) << 1) 356 | ((x & 0x1111111111111111ull) << 3); 357 return x; 358 #endif 359 } 360 361 /** 362 * Return the absolute value of a 64-bit integer as an unsigned 64-bit value 363 */ 364 static inline uint64_t uabs64(int64_t v) 365 { 366 return v < 0 ? -v : v; 367 } 368 369 /** 370 * sadd32_overflow - addition with overflow indication 371 * @x, @y: addends 372 * @ret: Output for sum 373 * 374 * Computes *@ret = @x + @y, and returns true if and only if that 375 * value has been truncated. 376 */ 377 static inline bool sadd32_overflow(int32_t x, int32_t y, int32_t *ret) 378 { 379 return __builtin_add_overflow(x, y, ret); 380 } 381 382 /** 383 * sadd64_overflow - addition with overflow indication 384 * @x, @y: addends 385 * @ret: Output for sum 386 * 387 * Computes *@ret = @x + @y, and returns true if and only if that 388 * value has been truncated. 389 */ 390 static inline bool sadd64_overflow(int64_t x, int64_t y, int64_t *ret) 391 { 392 return __builtin_add_overflow(x, y, ret); 393 } 394 395 /** 396 * uadd32_overflow - addition with overflow indication 397 * @x, @y: addends 398 * @ret: Output for sum 399 * 400 * Computes *@ret = @x + @y, and returns true if and only if that 401 * value has been truncated. 402 */ 403 static inline bool uadd32_overflow(uint32_t x, uint32_t y, uint32_t *ret) 404 { 405 return __builtin_add_overflow(x, y, ret); 406 } 407 408 /** 409 * uadd64_overflow - addition with overflow indication 410 * @x, @y: addends 411 * @ret: Output for sum 412 * 413 * Computes *@ret = @x + @y, and returns true if and only if that 414 * value has been truncated. 415 */ 416 static inline bool uadd64_overflow(uint64_t x, uint64_t y, uint64_t *ret) 417 { 418 return __builtin_add_overflow(x, y, ret); 419 } 420 421 /** 422 * ssub32_overflow - subtraction with overflow indication 423 * @x: Minuend 424 * @y: Subtrahend 425 * @ret: Output for difference 426 * 427 * Computes *@ret = @x - @y, and returns true if and only if that 428 * value has been truncated. 429 */ 430 static inline bool ssub32_overflow(int32_t x, int32_t y, int32_t *ret) 431 { 432 return __builtin_sub_overflow(x, y, ret); 433 } 434 435 /** 436 * ssub64_overflow - subtraction with overflow indication 437 * @x: Minuend 438 * @y: Subtrahend 439 * @ret: Output for sum 440 * 441 * Computes *@ret = @x - @y, and returns true if and only if that 442 * value has been truncated. 443 */ 444 static inline bool ssub64_overflow(int64_t x, int64_t y, int64_t *ret) 445 { 446 return __builtin_sub_overflow(x, y, ret); 447 } 448 449 /** 450 * usub32_overflow - subtraction with overflow indication 451 * @x: Minuend 452 * @y: Subtrahend 453 * @ret: Output for sum 454 * 455 * Computes *@ret = @x - @y, and returns true if and only if that 456 * value has been truncated. 457 */ 458 static inline bool usub32_overflow(uint32_t x, uint32_t y, uint32_t *ret) 459 { 460 return __builtin_sub_overflow(x, y, ret); 461 } 462 463 /** 464 * usub64_overflow - subtraction with overflow indication 465 * @x: Minuend 466 * @y: Subtrahend 467 * @ret: Output for sum 468 * 469 * Computes *@ret = @x - @y, and returns true if and only if that 470 * value has been truncated. 471 */ 472 static inline bool usub64_overflow(uint64_t x, uint64_t y, uint64_t *ret) 473 { 474 return __builtin_sub_overflow(x, y, ret); 475 } 476 477 /** 478 * smul32_overflow - multiplication with overflow indication 479 * @x, @y: Input multipliers 480 * @ret: Output for product 481 * 482 * Computes *@ret = @x * @y, and returns true if and only if that 483 * value has been truncated. 484 */ 485 static inline bool smul32_overflow(int32_t x, int32_t y, int32_t *ret) 486 { 487 return __builtin_mul_overflow(x, y, ret); 488 } 489 490 /** 491 * smul64_overflow - multiplication with overflow indication 492 * @x, @y: Input multipliers 493 * @ret: Output for product 494 * 495 * Computes *@ret = @x * @y, and returns true if and only if that 496 * value has been truncated. 497 */ 498 static inline bool smul64_overflow(int64_t x, int64_t y, int64_t *ret) 499 { 500 return __builtin_mul_overflow(x, y, ret); 501 } 502 503 /** 504 * umul32_overflow - multiplication with overflow indication 505 * @x, @y: Input multipliers 506 * @ret: Output for product 507 * 508 * Computes *@ret = @x * @y, and returns true if and only if that 509 * value has been truncated. 510 */ 511 static inline bool umul32_overflow(uint32_t x, uint32_t y, uint32_t *ret) 512 { 513 return __builtin_mul_overflow(x, y, ret); 514 } 515 516 /** 517 * umul64_overflow - multiplication with overflow indication 518 * @x, @y: Input multipliers 519 * @ret: Output for product 520 * 521 * Computes *@ret = @x * @y, and returns true if and only if that 522 * value has been truncated. 523 */ 524 static inline bool umul64_overflow(uint64_t x, uint64_t y, uint64_t *ret) 525 { 526 return __builtin_mul_overflow(x, y, ret); 527 } 528 529 /* 530 * Unsigned 128x64 multiplication. 531 * Returns true if the result got truncated to 128 bits. 532 * Otherwise, returns false and the multiplication result via plow and phigh. 533 */ 534 static inline bool mulu128(uint64_t *plow, uint64_t *phigh, uint64_t factor) 535 { 536 #if defined(CONFIG_INT128) 537 bool res; 538 __uint128_t r; 539 __uint128_t f = ((__uint128_t)*phigh << 64) | *plow; 540 res = __builtin_mul_overflow(f, factor, &r); 541 542 *plow = r; 543 *phigh = r >> 64; 544 545 return res; 546 #else 547 uint64_t dhi = *phigh; 548 uint64_t dlo = *plow; 549 uint64_t ahi; 550 uint64_t blo, bhi; 551 552 if (dhi == 0) { 553 mulu64(plow, phigh, dlo, factor); 554 return false; 555 } 556 557 mulu64(plow, &ahi, dlo, factor); 558 mulu64(&blo, &bhi, dhi, factor); 559 560 return uadd64_overflow(ahi, blo, phigh) || bhi != 0; 561 #endif 562 } 563 564 /** 565 * uadd64_carry - addition with carry-in and carry-out 566 * @x, @y: addends 567 * @pcarry: in-out carry value 568 * 569 * Computes @x + @y + *@pcarry, placing the carry-out back 570 * into *@pcarry and returning the 64-bit sum. 571 */ 572 static inline uint64_t uadd64_carry(uint64_t x, uint64_t y, bool *pcarry) 573 { 574 #if __has_builtin(__builtin_addcll) 575 unsigned long long c = *pcarry; 576 x = __builtin_addcll(x, y, c, &c); 577 *pcarry = c & 1; 578 return x; 579 #else 580 bool c = *pcarry; 581 /* This is clang's internal expansion of __builtin_addc. */ 582 c = uadd64_overflow(x, c, &x); 583 c |= uadd64_overflow(x, y, &x); 584 *pcarry = c; 585 return x; 586 #endif 587 } 588 589 /** 590 * usub64_borrow - subtraction with borrow-in and borrow-out 591 * @x, @y: addends 592 * @pborrow: in-out borrow value 593 * 594 * Computes @x - @y - *@pborrow, placing the borrow-out back 595 * into *@pborrow and returning the 64-bit sum. 596 */ 597 static inline uint64_t usub64_borrow(uint64_t x, uint64_t y, bool *pborrow) 598 { 599 #if __has_builtin(__builtin_subcll) 600 unsigned long long b = *pborrow; 601 x = __builtin_subcll(x, y, b, &b); 602 *pborrow = b & 1; 603 return x; 604 #else 605 bool b = *pborrow; 606 b = usub64_overflow(x, b, &x); 607 b |= usub64_overflow(x, y, &x); 608 *pborrow = b; 609 return x; 610 #endif 611 } 612 613 /* Host type specific sizes of these routines. */ 614 615 #if ULONG_MAX == UINT32_MAX 616 # define clzl clz32 617 # define ctzl ctz32 618 # define clol clo32 619 # define ctol cto32 620 # define ctpopl ctpop32 621 # define revbitl revbit32 622 #elif ULONG_MAX == UINT64_MAX 623 # define clzl clz64 624 # define ctzl ctz64 625 # define clol clo64 626 # define ctol cto64 627 # define ctpopl ctpop64 628 # define revbitl revbit64 629 #else 630 # error Unknown sizeof long 631 #endif 632 633 static inline bool is_power_of_2(uint64_t value) 634 { 635 if (!value) { 636 return false; 637 } 638 639 return !(value & (value - 1)); 640 } 641 642 /** 643 * Return @value rounded down to the nearest power of two or zero. 644 */ 645 static inline uint64_t pow2floor(uint64_t value) 646 { 647 if (!value) { 648 /* Avoid undefined shift by 64 */ 649 return 0; 650 } 651 return 0x8000000000000000ull >> clz64(value); 652 } 653 654 /* 655 * Return @value rounded up to the nearest power of two modulo 2^64. 656 * This is *zero* for @value > 2^63, so be careful. 657 */ 658 static inline uint64_t pow2ceil(uint64_t value) 659 { 660 int n = clz64(value - 1); 661 662 if (!n) { 663 /* 664 * @value - 1 has no leading zeroes, thus @value - 1 >= 2^63 665 * Therefore, either @value == 0 or @value > 2^63. 666 * If it's 0, return 1, else return 0. 667 */ 668 return !value; 669 } 670 return 0x8000000000000000ull >> (n - 1); 671 } 672 673 static inline uint32_t pow2roundup32(uint32_t x) 674 { 675 x |= (x >> 1); 676 x |= (x >> 2); 677 x |= (x >> 4); 678 x |= (x >> 8); 679 x |= (x >> 16); 680 return x + 1; 681 } 682 683 /** 684 * urshift - 128-bit Unsigned Right Shift. 685 * @plow: in/out - lower 64-bit integer. 686 * @phigh: in/out - higher 64-bit integer. 687 * @shift: in - bytes to shift, between 0 and 127. 688 * 689 * Result is zero-extended and stored in plow/phigh, which are 690 * input/output variables. Shift values outside the range will 691 * be mod to 128. In other words, the caller is responsible to 692 * verify/assert both the shift range and plow/phigh pointers. 693 */ 694 void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift); 695 696 /** 697 * ulshift - 128-bit Unsigned Left Shift. 698 * @plow: in/out - lower 64-bit integer. 699 * @phigh: in/out - higher 64-bit integer. 700 * @shift: in - bytes to shift, between 0 and 127. 701 * @overflow: out - true if any 1-bit is shifted out. 702 * 703 * Result is zero-extended and stored in plow/phigh, which are 704 * input/output variables. Shift values outside the range will 705 * be mod to 128. In other words, the caller is responsible to 706 * verify/assert both the shift range and plow/phigh pointers. 707 */ 708 void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow); 709 710 /* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd 711 * (https://gmplib.org/repo/gmp/file/tip/longlong.h) 712 * 713 * Licensed under the GPLv2/LGPLv3 714 */ 715 static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1, 716 uint64_t n0, uint64_t d) 717 { 718 #if defined(__x86_64__) 719 uint64_t q; 720 asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d)); 721 return q; 722 #elif defined(__s390x__) && !defined(__clang__) 723 /* Need to use a TImode type to get an even register pair for DLGR. */ 724 unsigned __int128 n = (unsigned __int128)n1 << 64 | n0; 725 asm("dlgr %0, %1" : "+r"(n) : "r"(d)); 726 *r = n >> 64; 727 return n; 728 #elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7) 729 /* From Power ISA 2.06, programming note for divdeu. */ 730 uint64_t q1, q2, Q, r1, r2, R; 731 asm("divdeu %0,%2,%4; divdu %1,%3,%4" 732 : "=&r"(q1), "=r"(q2) 733 : "r"(n1), "r"(n0), "r"(d)); 734 r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */ 735 r2 = n0 - (q2 * d); 736 Q = q1 + q2; 737 R = r1 + r2; 738 if (R >= d || R < r2) { /* overflow implies R > d */ 739 Q += 1; 740 R -= d; 741 } 742 *r = R; 743 return Q; 744 #else 745 uint64_t d0, d1, q0, q1, r1, r0, m; 746 747 d0 = (uint32_t)d; 748 d1 = d >> 32; 749 750 r1 = n1 % d1; 751 q1 = n1 / d1; 752 m = q1 * d0; 753 r1 = (r1 << 32) | (n0 >> 32); 754 if (r1 < m) { 755 q1 -= 1; 756 r1 += d; 757 if (r1 >= d) { 758 if (r1 < m) { 759 q1 -= 1; 760 r1 += d; 761 } 762 } 763 } 764 r1 -= m; 765 766 r0 = r1 % d1; 767 q0 = r1 / d1; 768 m = q0 * d0; 769 r0 = (r0 << 32) | (uint32_t)n0; 770 if (r0 < m) { 771 q0 -= 1; 772 r0 += d; 773 if (r0 >= d) { 774 if (r0 < m) { 775 q0 -= 1; 776 r0 += d; 777 } 778 } 779 } 780 r0 -= m; 781 782 *r = r0; 783 return (q1 << 32) | q0; 784 #endif 785 } 786 787 Int128 divu256(Int128 *plow, Int128 *phigh, Int128 divisor); 788 Int128 divs256(Int128 *plow, Int128 *phigh, Int128 divisor); 789 #endif 790