xref: /openbmc/qemu/include/fpu/softfloat-macros.h (revision a92cecba2791cd408d2bca04ce181dc2abaf9695)
188857acaSLaurent Vivier /*
288857acaSLaurent Vivier  * QEMU float support macros
388857acaSLaurent Vivier  *
488857acaSLaurent Vivier  * The code in this source file is derived from release 2a of the SoftFloat
588857acaSLaurent Vivier  * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
688857acaSLaurent Vivier  * some later contributions) are provided under that license, as detailed below.
788857acaSLaurent Vivier  * It has subsequently been modified by contributors to the QEMU Project,
888857acaSLaurent Vivier  * so some portions are provided under:
988857acaSLaurent Vivier  *  the SoftFloat-2a license
1088857acaSLaurent Vivier  *  the BSD license
1188857acaSLaurent Vivier  *
1288857acaSLaurent Vivier  * Any future contributions to this file after December 1st 2014 will be
1388857acaSLaurent Vivier  * taken to be licensed under the Softfloat-2a license unless specifically
1488857acaSLaurent Vivier  * indicated otherwise.
1588857acaSLaurent Vivier  */
1688857acaSLaurent Vivier 
1788857acaSLaurent Vivier /*
1888857acaSLaurent Vivier ===============================================================================
1988857acaSLaurent Vivier This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
2088857acaSLaurent Vivier Arithmetic Package, Release 2a.
2188857acaSLaurent Vivier 
2288857acaSLaurent Vivier Written by John R. Hauser.  This work was made possible in part by the
2388857acaSLaurent Vivier International Computer Science Institute, located at Suite 600, 1947 Center
2488857acaSLaurent Vivier Street, Berkeley, California 94704.  Funding was partially provided by the
2588857acaSLaurent Vivier National Science Foundation under grant MIP-9311980.  The original version
2688857acaSLaurent Vivier of this code was written as part of a project to build a fixed-point vector
2788857acaSLaurent Vivier processor in collaboration with the University of California at Berkeley,
2888857acaSLaurent Vivier overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
2988857acaSLaurent Vivier is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
3088857acaSLaurent Vivier arithmetic/SoftFloat.html'.
3188857acaSLaurent Vivier 
3288857acaSLaurent Vivier THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
3388857acaSLaurent Vivier has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
3488857acaSLaurent Vivier TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
3588857acaSLaurent Vivier PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
3688857acaSLaurent Vivier AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
3788857acaSLaurent Vivier 
3888857acaSLaurent Vivier Derivative works are acceptable, even for commercial purposes, so long as
3988857acaSLaurent Vivier (1) they include prominent notice that the work is derivative, and (2) they
4088857acaSLaurent Vivier include prominent notice akin to these four paragraphs for those parts of
4188857acaSLaurent Vivier this code that are retained.
4288857acaSLaurent Vivier 
4388857acaSLaurent Vivier ===============================================================================
4488857acaSLaurent Vivier */
4588857acaSLaurent Vivier 
4688857acaSLaurent Vivier /* BSD licensing:
4788857acaSLaurent Vivier  * Copyright (c) 2006, Fabrice Bellard
4888857acaSLaurent Vivier  * All rights reserved.
4988857acaSLaurent Vivier  *
5088857acaSLaurent Vivier  * Redistribution and use in source and binary forms, with or without
5188857acaSLaurent Vivier  * modification, are permitted provided that the following conditions are met:
5288857acaSLaurent Vivier  *
5388857acaSLaurent Vivier  * 1. Redistributions of source code must retain the above copyright notice,
5488857acaSLaurent Vivier  * this list of conditions and the following disclaimer.
5588857acaSLaurent Vivier  *
5688857acaSLaurent Vivier  * 2. Redistributions in binary form must reproduce the above copyright notice,
5788857acaSLaurent Vivier  * this list of conditions and the following disclaimer in the documentation
5888857acaSLaurent Vivier  * and/or other materials provided with the distribution.
5988857acaSLaurent Vivier  *
6088857acaSLaurent Vivier  * 3. Neither the name of the copyright holder nor the names of its contributors
6188857acaSLaurent Vivier  * may be used to endorse or promote products derived from this software without
6288857acaSLaurent Vivier  * specific prior written permission.
6388857acaSLaurent Vivier  *
6488857acaSLaurent Vivier  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
6588857acaSLaurent Vivier  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
6688857acaSLaurent Vivier  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
6788857acaSLaurent Vivier  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
6888857acaSLaurent Vivier  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
6988857acaSLaurent Vivier  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
7088857acaSLaurent Vivier  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
7188857acaSLaurent Vivier  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
7288857acaSLaurent Vivier  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
7388857acaSLaurent Vivier  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
7488857acaSLaurent Vivier  * THE POSSIBILITY OF SUCH DAMAGE.
7588857acaSLaurent Vivier  */
7688857acaSLaurent Vivier 
77f91005e1SMarkus Armbruster #ifndef FPU_SOFTFLOAT_MACROS_H
78f91005e1SMarkus Armbruster #define FPU_SOFTFLOAT_MACROS_H
79f91005e1SMarkus Armbruster 
805937fb63SAlex Bennée #include "fpu/softfloat-types.h"
81cb3ad036SRichard Henderson #include "qemu/host-utils.h"
82ec150c7eSMarkus Armbruster 
83463e45dcSRichard Henderson /**
84463e45dcSRichard Henderson  * shl_double: double-word merging left shift
85463e45dcSRichard Henderson  * @l: left or most-significant word
86463e45dcSRichard Henderson  * @r: right or least-significant word
87463e45dcSRichard Henderson  * @c: shift count
88463e45dcSRichard Henderson  *
89463e45dcSRichard Henderson  * Shift @l left by @c bits, shifting in bits from @r.
90463e45dcSRichard Henderson  */
shl_double(uint64_t l,uint64_t r,int c)91463e45dcSRichard Henderson static inline uint64_t shl_double(uint64_t l, uint64_t r, int c)
92463e45dcSRichard Henderson {
93463e45dcSRichard Henderson #if defined(__x86_64__)
94463e45dcSRichard Henderson     asm("shld %b2, %1, %0" : "+r"(l) : "r"(r), "ci"(c));
95463e45dcSRichard Henderson     return l;
96463e45dcSRichard Henderson #else
97463e45dcSRichard Henderson     return c ? (l << c) | (r >> (64 - c)) : l;
98463e45dcSRichard Henderson #endif
99463e45dcSRichard Henderson }
100463e45dcSRichard Henderson 
101463e45dcSRichard Henderson /**
102463e45dcSRichard Henderson  * shr_double: double-word merging right shift
103463e45dcSRichard Henderson  * @l: left or most-significant word
104463e45dcSRichard Henderson  * @r: right or least-significant word
105463e45dcSRichard Henderson  * @c: shift count
106463e45dcSRichard Henderson  *
107463e45dcSRichard Henderson  * Shift @r right by @c bits, shifting in bits from @l.
108463e45dcSRichard Henderson  */
shr_double(uint64_t l,uint64_t r,int c)109463e45dcSRichard Henderson static inline uint64_t shr_double(uint64_t l, uint64_t r, int c)
110463e45dcSRichard Henderson {
111463e45dcSRichard Henderson #if defined(__x86_64__)
112463e45dcSRichard Henderson     asm("shrd %b2, %1, %0" : "+r"(r) : "r"(l), "ci"(c));
113463e45dcSRichard Henderson     return r;
114463e45dcSRichard Henderson #else
115463e45dcSRichard Henderson     return c ? (r >> c) | (l << (64 - c)) : r;
116463e45dcSRichard Henderson #endif
117463e45dcSRichard Henderson }
118463e45dcSRichard Henderson 
11988857acaSLaurent Vivier /*----------------------------------------------------------------------------
12088857acaSLaurent Vivier | Shifts `a' right by the number of bits given in `count'.  If any nonzero
12188857acaSLaurent Vivier | bits are shifted off, they are ``jammed'' into the least significant bit of
12288857acaSLaurent Vivier | the result by setting the least significant bit to 1.  The value of `count'
12388857acaSLaurent Vivier | can be arbitrarily large; in particular, if `count' is greater than 32, the
12488857acaSLaurent Vivier | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
12588857acaSLaurent Vivier | The result is stored in the location pointed to by `zPtr'.
12688857acaSLaurent Vivier *----------------------------------------------------------------------------*/
12788857acaSLaurent Vivier 
shift32RightJamming(uint32_t a,int count,uint32_t * zPtr)12888857acaSLaurent Vivier static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr)
12988857acaSLaurent Vivier {
13088857acaSLaurent Vivier     uint32_t z;
13188857acaSLaurent Vivier 
13288857acaSLaurent Vivier     if ( count == 0 ) {
13388857acaSLaurent Vivier         z = a;
13488857acaSLaurent Vivier     }
13588857acaSLaurent Vivier     else if ( count < 32 ) {
13688857acaSLaurent Vivier         z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
13788857acaSLaurent Vivier     }
13888857acaSLaurent Vivier     else {
13988857acaSLaurent Vivier         z = ( a != 0 );
14088857acaSLaurent Vivier     }
14188857acaSLaurent Vivier     *zPtr = z;
14288857acaSLaurent Vivier 
14388857acaSLaurent Vivier }
14488857acaSLaurent Vivier 
14588857acaSLaurent Vivier /*----------------------------------------------------------------------------
14688857acaSLaurent Vivier | Shifts `a' right by the number of bits given in `count'.  If any nonzero
14788857acaSLaurent Vivier | bits are shifted off, they are ``jammed'' into the least significant bit of
14888857acaSLaurent Vivier | the result by setting the least significant bit to 1.  The value of `count'
14988857acaSLaurent Vivier | can be arbitrarily large; in particular, if `count' is greater than 64, the
15088857acaSLaurent Vivier | result will be either 0 or 1, depending on whether `a' is zero or nonzero.
15188857acaSLaurent Vivier | The result is stored in the location pointed to by `zPtr'.
15288857acaSLaurent Vivier *----------------------------------------------------------------------------*/
15388857acaSLaurent Vivier 
shift64RightJamming(uint64_t a,int count,uint64_t * zPtr)15488857acaSLaurent Vivier static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr)
15588857acaSLaurent Vivier {
15688857acaSLaurent Vivier     uint64_t z;
15788857acaSLaurent Vivier 
15888857acaSLaurent Vivier     if ( count == 0 ) {
15988857acaSLaurent Vivier         z = a;
16088857acaSLaurent Vivier     }
16188857acaSLaurent Vivier     else if ( count < 64 ) {
16288857acaSLaurent Vivier         z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
16388857acaSLaurent Vivier     }
16488857acaSLaurent Vivier     else {
16588857acaSLaurent Vivier         z = ( a != 0 );
16688857acaSLaurent Vivier     }
16788857acaSLaurent Vivier     *zPtr = z;
16888857acaSLaurent Vivier 
16988857acaSLaurent Vivier }
17088857acaSLaurent Vivier 
17188857acaSLaurent Vivier /*----------------------------------------------------------------------------
17288857acaSLaurent Vivier | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
17388857acaSLaurent Vivier | _plus_ the number of bits given in `count'.  The shifted result is at most
17488857acaSLaurent Vivier | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
17588857acaSLaurent Vivier | bits shifted off form a second 64-bit result as follows:  The _last_ bit
17688857acaSLaurent Vivier | shifted off is the most-significant bit of the extra result, and the other
17788857acaSLaurent Vivier | 63 bits of the extra result are all zero if and only if _all_but_the_last_
17888857acaSLaurent Vivier | bits shifted off were all zero.  This extra result is stored in the location
17988857acaSLaurent Vivier | pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
18088857acaSLaurent Vivier |     (This routine makes more sense if `a0' and `a1' are considered to form a
18188857acaSLaurent Vivier | fixed-point value with binary point between `a0' and `a1'.  This fixed-point
18288857acaSLaurent Vivier | value is shifted right by the number of bits given in `count', and the
18388857acaSLaurent Vivier | integer part of the result is returned at the location pointed to by
18488857acaSLaurent Vivier | `z0Ptr'.  The fractional part of the result may be slightly corrupted as
18588857acaSLaurent Vivier | described above, and is returned at the location pointed to by `z1Ptr'.)
18688857acaSLaurent Vivier *----------------------------------------------------------------------------*/
18788857acaSLaurent Vivier 
18888857acaSLaurent Vivier static inline void
shift64ExtraRightJamming(uint64_t a0,uint64_t a1,int count,uint64_t * z0Ptr,uint64_t * z1Ptr)18988857acaSLaurent Vivier  shift64ExtraRightJamming(
19088857acaSLaurent Vivier      uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
19188857acaSLaurent Vivier {
19288857acaSLaurent Vivier     uint64_t z0, z1;
19388857acaSLaurent Vivier     int8_t negCount = ( - count ) & 63;
19488857acaSLaurent Vivier 
19588857acaSLaurent Vivier     if ( count == 0 ) {
19688857acaSLaurent Vivier         z1 = a1;
19788857acaSLaurent Vivier         z0 = a0;
19888857acaSLaurent Vivier     }
19988857acaSLaurent Vivier     else if ( count < 64 ) {
20088857acaSLaurent Vivier         z1 = ( a0<<negCount ) | ( a1 != 0 );
20188857acaSLaurent Vivier         z0 = a0>>count;
20288857acaSLaurent Vivier     }
20388857acaSLaurent Vivier     else {
20488857acaSLaurent Vivier         if ( count == 64 ) {
20588857acaSLaurent Vivier             z1 = a0 | ( a1 != 0 );
20688857acaSLaurent Vivier         }
20788857acaSLaurent Vivier         else {
20888857acaSLaurent Vivier             z1 = ( ( a0 | a1 ) != 0 );
20988857acaSLaurent Vivier         }
21088857acaSLaurent Vivier         z0 = 0;
21188857acaSLaurent Vivier     }
21288857acaSLaurent Vivier     *z1Ptr = z1;
21388857acaSLaurent Vivier     *z0Ptr = z0;
21488857acaSLaurent Vivier 
21588857acaSLaurent Vivier }
21688857acaSLaurent Vivier 
21788857acaSLaurent Vivier /*----------------------------------------------------------------------------
21888857acaSLaurent Vivier | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
21988857acaSLaurent Vivier | number of bits given in `count'.  Any bits shifted off are lost.  The value
22088857acaSLaurent Vivier | of `count' can be arbitrarily large; in particular, if `count' is greater
22188857acaSLaurent Vivier | than 128, the result will be 0.  The result is broken into two 64-bit pieces
22288857acaSLaurent Vivier | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
22388857acaSLaurent Vivier *----------------------------------------------------------------------------*/
22488857acaSLaurent Vivier 
22588857acaSLaurent Vivier static inline void
shift128Right(uint64_t a0,uint64_t a1,int count,uint64_t * z0Ptr,uint64_t * z1Ptr)22688857acaSLaurent Vivier  shift128Right(
22788857acaSLaurent Vivier      uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
22888857acaSLaurent Vivier {
22988857acaSLaurent Vivier     uint64_t z0, z1;
23088857acaSLaurent Vivier     int8_t negCount = ( - count ) & 63;
23188857acaSLaurent Vivier 
23288857acaSLaurent Vivier     if ( count == 0 ) {
23388857acaSLaurent Vivier         z1 = a1;
23488857acaSLaurent Vivier         z0 = a0;
23588857acaSLaurent Vivier     }
23688857acaSLaurent Vivier     else if ( count < 64 ) {
23788857acaSLaurent Vivier         z1 = ( a0<<negCount ) | ( a1>>count );
23888857acaSLaurent Vivier         z0 = a0>>count;
23988857acaSLaurent Vivier     }
24088857acaSLaurent Vivier     else {
24188857acaSLaurent Vivier         z1 = (count < 128) ? (a0 >> (count & 63)) : 0;
24288857acaSLaurent Vivier         z0 = 0;
24388857acaSLaurent Vivier     }
24488857acaSLaurent Vivier     *z1Ptr = z1;
24588857acaSLaurent Vivier     *z0Ptr = z0;
24688857acaSLaurent Vivier 
24788857acaSLaurent Vivier }
24888857acaSLaurent Vivier 
24988857acaSLaurent Vivier /*----------------------------------------------------------------------------
25088857acaSLaurent Vivier | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
25188857acaSLaurent Vivier | number of bits given in `count'.  If any nonzero bits are shifted off, they
25288857acaSLaurent Vivier | are ``jammed'' into the least significant bit of the result by setting the
25388857acaSLaurent Vivier | least significant bit to 1.  The value of `count' can be arbitrarily large;
25488857acaSLaurent Vivier | in particular, if `count' is greater than 128, the result will be either
25588857acaSLaurent Vivier | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
25688857acaSLaurent Vivier | nonzero.  The result is broken into two 64-bit pieces which are stored at
25788857acaSLaurent Vivier | the locations pointed to by `z0Ptr' and `z1Ptr'.
25888857acaSLaurent Vivier *----------------------------------------------------------------------------*/
25988857acaSLaurent Vivier 
26088857acaSLaurent Vivier static inline void
shift128RightJamming(uint64_t a0,uint64_t a1,int count,uint64_t * z0Ptr,uint64_t * z1Ptr)26188857acaSLaurent Vivier  shift128RightJamming(
26288857acaSLaurent Vivier      uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
26388857acaSLaurent Vivier {
26488857acaSLaurent Vivier     uint64_t z0, z1;
26588857acaSLaurent Vivier     int8_t negCount = ( - count ) & 63;
26688857acaSLaurent Vivier 
26788857acaSLaurent Vivier     if ( count == 0 ) {
26888857acaSLaurent Vivier         z1 = a1;
26988857acaSLaurent Vivier         z0 = a0;
27088857acaSLaurent Vivier     }
27188857acaSLaurent Vivier     else if ( count < 64 ) {
27288857acaSLaurent Vivier         z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
27388857acaSLaurent Vivier         z0 = a0>>count;
27488857acaSLaurent Vivier     }
27588857acaSLaurent Vivier     else {
27688857acaSLaurent Vivier         if ( count == 64 ) {
27788857acaSLaurent Vivier             z1 = a0 | ( a1 != 0 );
27888857acaSLaurent Vivier         }
27988857acaSLaurent Vivier         else if ( count < 128 ) {
28088857acaSLaurent Vivier             z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
28188857acaSLaurent Vivier         }
28288857acaSLaurent Vivier         else {
28388857acaSLaurent Vivier             z1 = ( ( a0 | a1 ) != 0 );
28488857acaSLaurent Vivier         }
28588857acaSLaurent Vivier         z0 = 0;
28688857acaSLaurent Vivier     }
28788857acaSLaurent Vivier     *z1Ptr = z1;
28888857acaSLaurent Vivier     *z0Ptr = z0;
28988857acaSLaurent Vivier 
29088857acaSLaurent Vivier }
29188857acaSLaurent Vivier 
29288857acaSLaurent Vivier /*----------------------------------------------------------------------------
29388857acaSLaurent Vivier | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
29488857acaSLaurent Vivier | by 64 _plus_ the number of bits given in `count'.  The shifted result is
29588857acaSLaurent Vivier | at most 128 nonzero bits; these are broken into two 64-bit pieces which are
29688857acaSLaurent Vivier | stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
29788857acaSLaurent Vivier | off form a third 64-bit result as follows:  The _last_ bit shifted off is
29888857acaSLaurent Vivier | the most-significant bit of the extra result, and the other 63 bits of the
29988857acaSLaurent Vivier | extra result are all zero if and only if _all_but_the_last_ bits shifted off
30088857acaSLaurent Vivier | were all zero.  This extra result is stored in the location pointed to by
30188857acaSLaurent Vivier | `z2Ptr'.  The value of `count' can be arbitrarily large.
30288857acaSLaurent Vivier |     (This routine makes more sense if `a0', `a1', and `a2' are considered
30388857acaSLaurent Vivier | to form a fixed-point value with binary point between `a1' and `a2'.  This
30488857acaSLaurent Vivier | fixed-point value is shifted right by the number of bits given in `count',
30588857acaSLaurent Vivier | and the integer part of the result is returned at the locations pointed to
30688857acaSLaurent Vivier | by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
30788857acaSLaurent Vivier | corrupted as described above, and is returned at the location pointed to by
30888857acaSLaurent Vivier | `z2Ptr'.)
30988857acaSLaurent Vivier *----------------------------------------------------------------------------*/
31088857acaSLaurent Vivier 
31188857acaSLaurent Vivier static inline void
shift128ExtraRightJamming(uint64_t a0,uint64_t a1,uint64_t a2,int count,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr)31288857acaSLaurent Vivier  shift128ExtraRightJamming(
31388857acaSLaurent Vivier      uint64_t a0,
31488857acaSLaurent Vivier      uint64_t a1,
31588857acaSLaurent Vivier      uint64_t a2,
31688857acaSLaurent Vivier      int count,
31788857acaSLaurent Vivier      uint64_t *z0Ptr,
31888857acaSLaurent Vivier      uint64_t *z1Ptr,
31988857acaSLaurent Vivier      uint64_t *z2Ptr
32088857acaSLaurent Vivier  )
32188857acaSLaurent Vivier {
32288857acaSLaurent Vivier     uint64_t z0, z1, z2;
32388857acaSLaurent Vivier     int8_t negCount = ( - count ) & 63;
32488857acaSLaurent Vivier 
32588857acaSLaurent Vivier     if ( count == 0 ) {
32688857acaSLaurent Vivier         z2 = a2;
32788857acaSLaurent Vivier         z1 = a1;
32888857acaSLaurent Vivier         z0 = a0;
32988857acaSLaurent Vivier     }
33088857acaSLaurent Vivier     else {
33188857acaSLaurent Vivier         if ( count < 64 ) {
33288857acaSLaurent Vivier             z2 = a1<<negCount;
33388857acaSLaurent Vivier             z1 = ( a0<<negCount ) | ( a1>>count );
33488857acaSLaurent Vivier             z0 = a0>>count;
33588857acaSLaurent Vivier         }
33688857acaSLaurent Vivier         else {
33788857acaSLaurent Vivier             if ( count == 64 ) {
33888857acaSLaurent Vivier                 z2 = a1;
33988857acaSLaurent Vivier                 z1 = a0;
34088857acaSLaurent Vivier             }
34188857acaSLaurent Vivier             else {
34288857acaSLaurent Vivier                 a2 |= a1;
34388857acaSLaurent Vivier                 if ( count < 128 ) {
34488857acaSLaurent Vivier                     z2 = a0<<negCount;
34588857acaSLaurent Vivier                     z1 = a0>>( count & 63 );
34688857acaSLaurent Vivier                 }
34788857acaSLaurent Vivier                 else {
34888857acaSLaurent Vivier                     z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
34988857acaSLaurent Vivier                     z1 = 0;
35088857acaSLaurent Vivier                 }
35188857acaSLaurent Vivier             }
35288857acaSLaurent Vivier             z0 = 0;
35388857acaSLaurent Vivier         }
35488857acaSLaurent Vivier         z2 |= ( a2 != 0 );
35588857acaSLaurent Vivier     }
35688857acaSLaurent Vivier     *z2Ptr = z2;
35788857acaSLaurent Vivier     *z1Ptr = z1;
35888857acaSLaurent Vivier     *z0Ptr = z0;
35988857acaSLaurent Vivier 
36088857acaSLaurent Vivier }
36188857acaSLaurent Vivier 
36288857acaSLaurent Vivier /*----------------------------------------------------------------------------
36388857acaSLaurent Vivier | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
36488857acaSLaurent Vivier | number of bits given in `count'.  Any bits shifted off are lost.  The value
36588857acaSLaurent Vivier | of `count' must be less than 64.  The result is broken into two 64-bit
36688857acaSLaurent Vivier | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
36788857acaSLaurent Vivier *----------------------------------------------------------------------------*/
36888857acaSLaurent Vivier 
shortShift128Left(uint64_t a0,uint64_t a1,int count,uint64_t * z0Ptr,uint64_t * z1Ptr)3695dfbc9e4SRichard Henderson static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count,
3705dfbc9e4SRichard Henderson                                      uint64_t *z0Ptr, uint64_t *z1Ptr)
37188857acaSLaurent Vivier {
37288857acaSLaurent Vivier     *z1Ptr = a1 << count;
3735dfbc9e4SRichard Henderson     *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
3745dfbc9e4SRichard Henderson }
37588857acaSLaurent Vivier 
3765dfbc9e4SRichard Henderson /*----------------------------------------------------------------------------
3775dfbc9e4SRichard Henderson | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
3785dfbc9e4SRichard Henderson | number of bits given in `count'.  Any bits shifted off are lost.  The value
3795dfbc9e4SRichard Henderson | of `count' may be greater than 64.  The result is broken into two 64-bit
3805dfbc9e4SRichard Henderson | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
3815dfbc9e4SRichard Henderson *----------------------------------------------------------------------------*/
3825dfbc9e4SRichard Henderson 
shift128Left(uint64_t a0,uint64_t a1,int count,uint64_t * z0Ptr,uint64_t * z1Ptr)3835dfbc9e4SRichard Henderson static inline void shift128Left(uint64_t a0, uint64_t a1, int count,
3845dfbc9e4SRichard Henderson                                 uint64_t *z0Ptr, uint64_t *z1Ptr)
3855dfbc9e4SRichard Henderson {
3865dfbc9e4SRichard Henderson     if (count < 64) {
3875dfbc9e4SRichard Henderson         *z1Ptr = a1 << count;
3885dfbc9e4SRichard Henderson         *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
3895dfbc9e4SRichard Henderson     } else {
3905dfbc9e4SRichard Henderson         *z1Ptr = 0;
3915dfbc9e4SRichard Henderson         *z0Ptr = a1 << (count - 64);
3925dfbc9e4SRichard Henderson     }
39388857acaSLaurent Vivier }
39488857acaSLaurent Vivier 
39588857acaSLaurent Vivier /*----------------------------------------------------------------------------
39688857acaSLaurent Vivier | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
39788857acaSLaurent Vivier | by the number of bits given in `count'.  Any bits shifted off are lost.
39888857acaSLaurent Vivier | The value of `count' must be less than 64.  The result is broken into three
39988857acaSLaurent Vivier | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
40088857acaSLaurent Vivier | `z1Ptr', and `z2Ptr'.
40188857acaSLaurent Vivier *----------------------------------------------------------------------------*/
40288857acaSLaurent Vivier 
40388857acaSLaurent Vivier static inline void
shortShift192Left(uint64_t a0,uint64_t a1,uint64_t a2,int count,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr)40488857acaSLaurent Vivier  shortShift192Left(
40588857acaSLaurent Vivier      uint64_t a0,
40688857acaSLaurent Vivier      uint64_t a1,
40788857acaSLaurent Vivier      uint64_t a2,
40888857acaSLaurent Vivier      int count,
40988857acaSLaurent Vivier      uint64_t *z0Ptr,
41088857acaSLaurent Vivier      uint64_t *z1Ptr,
41188857acaSLaurent Vivier      uint64_t *z2Ptr
41288857acaSLaurent Vivier  )
41388857acaSLaurent Vivier {
41488857acaSLaurent Vivier     uint64_t z0, z1, z2;
41588857acaSLaurent Vivier     int8_t negCount;
41688857acaSLaurent Vivier 
41788857acaSLaurent Vivier     z2 = a2<<count;
41888857acaSLaurent Vivier     z1 = a1<<count;
41988857acaSLaurent Vivier     z0 = a0<<count;
42088857acaSLaurent Vivier     if ( 0 < count ) {
42188857acaSLaurent Vivier         negCount = ( ( - count ) & 63 );
42288857acaSLaurent Vivier         z1 |= a2>>negCount;
42388857acaSLaurent Vivier         z0 |= a1>>negCount;
42488857acaSLaurent Vivier     }
42588857acaSLaurent Vivier     *z2Ptr = z2;
42688857acaSLaurent Vivier     *z1Ptr = z1;
42788857acaSLaurent Vivier     *z0Ptr = z0;
42888857acaSLaurent Vivier 
42988857acaSLaurent Vivier }
43088857acaSLaurent Vivier 
43188857acaSLaurent Vivier /*----------------------------------------------------------------------------
43288857acaSLaurent Vivier | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
43388857acaSLaurent Vivier | value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
43488857acaSLaurent Vivier | any carry out is lost.  The result is broken into two 64-bit pieces which
43588857acaSLaurent Vivier | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
43688857acaSLaurent Vivier *----------------------------------------------------------------------------*/
43788857acaSLaurent Vivier 
add128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1,uint64_t * z0Ptr,uint64_t * z1Ptr)438cb3ad036SRichard Henderson static inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
439cb3ad036SRichard Henderson                           uint64_t *z0Ptr, uint64_t *z1Ptr)
44088857acaSLaurent Vivier {
441cb3ad036SRichard Henderson     bool c = 0;
442cb3ad036SRichard Henderson     *z1Ptr = uadd64_carry(a1, b1, &c);
443cb3ad036SRichard Henderson     *z0Ptr = uadd64_carry(a0, b0, &c);
44488857acaSLaurent Vivier }
44588857acaSLaurent Vivier 
44688857acaSLaurent Vivier /*----------------------------------------------------------------------------
44788857acaSLaurent Vivier | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
44888857acaSLaurent Vivier | 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
44988857acaSLaurent Vivier | modulo 2^192, so any carry out is lost.  The result is broken into three
45088857acaSLaurent Vivier | 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
45188857acaSLaurent Vivier | `z1Ptr', and `z2Ptr'.
45288857acaSLaurent Vivier *----------------------------------------------------------------------------*/
45388857acaSLaurent Vivier 
add192(uint64_t a0,uint64_t a1,uint64_t a2,uint64_t b0,uint64_t b1,uint64_t b2,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr)454cb3ad036SRichard Henderson static inline void add192(uint64_t a0, uint64_t a1, uint64_t a2,
455cb3ad036SRichard Henderson                           uint64_t b0, uint64_t b1, uint64_t b2,
456cb3ad036SRichard Henderson                           uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
45788857acaSLaurent Vivier {
458cb3ad036SRichard Henderson     bool c = 0;
459cb3ad036SRichard Henderson     *z2Ptr = uadd64_carry(a2, b2, &c);
460cb3ad036SRichard Henderson     *z1Ptr = uadd64_carry(a1, b1, &c);
461cb3ad036SRichard Henderson     *z0Ptr = uadd64_carry(a0, b0, &c);
46288857acaSLaurent Vivier }
46388857acaSLaurent Vivier 
46488857acaSLaurent Vivier /*----------------------------------------------------------------------------
46588857acaSLaurent Vivier | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
46688857acaSLaurent Vivier | 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
46788857acaSLaurent Vivier | 2^128, so any borrow out (carry out) is lost.  The result is broken into two
46888857acaSLaurent Vivier | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
46988857acaSLaurent Vivier | `z1Ptr'.
47088857acaSLaurent Vivier *----------------------------------------------------------------------------*/
47188857acaSLaurent Vivier 
sub128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1,uint64_t * z0Ptr,uint64_t * z1Ptr)472cb3ad036SRichard Henderson static inline void sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1,
473cb3ad036SRichard Henderson                           uint64_t *z0Ptr, uint64_t *z1Ptr)
47488857acaSLaurent Vivier {
475cb3ad036SRichard Henderson     bool c = 0;
476cb3ad036SRichard Henderson     *z1Ptr = usub64_borrow(a1, b1, &c);
477cb3ad036SRichard Henderson     *z0Ptr = usub64_borrow(a0, b0, &c);
47888857acaSLaurent Vivier }
47988857acaSLaurent Vivier 
48088857acaSLaurent Vivier /*----------------------------------------------------------------------------
48188857acaSLaurent Vivier | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
48288857acaSLaurent Vivier | from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
48388857acaSLaurent Vivier | Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
48488857acaSLaurent Vivier | result is broken into three 64-bit pieces which are stored at the locations
48588857acaSLaurent Vivier | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
48688857acaSLaurent Vivier *----------------------------------------------------------------------------*/
48788857acaSLaurent Vivier 
sub192(uint64_t a0,uint64_t a1,uint64_t a2,uint64_t b0,uint64_t b1,uint64_t b2,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr)488cb3ad036SRichard Henderson static inline void sub192(uint64_t a0, uint64_t a1, uint64_t a2,
489cb3ad036SRichard Henderson                           uint64_t b0, uint64_t b1, uint64_t b2,
490cb3ad036SRichard Henderson                           uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
49188857acaSLaurent Vivier {
492cb3ad036SRichard Henderson     bool c = 0;
493cb3ad036SRichard Henderson     *z2Ptr = usub64_borrow(a2, b2, &c);
494cb3ad036SRichard Henderson     *z1Ptr = usub64_borrow(a1, b1, &c);
495cb3ad036SRichard Henderson     *z0Ptr = usub64_borrow(a0, b0, &c);
49688857acaSLaurent Vivier }
49788857acaSLaurent Vivier 
49888857acaSLaurent Vivier /*----------------------------------------------------------------------------
49988857acaSLaurent Vivier | Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
50088857acaSLaurent Vivier | into two 64-bit pieces which are stored at the locations pointed to by
50188857acaSLaurent Vivier | `z0Ptr' and `z1Ptr'.
50288857acaSLaurent Vivier *----------------------------------------------------------------------------*/
50388857acaSLaurent Vivier 
504b4d09b17SRichard Henderson static inline void
mul64To128(uint64_t a,uint64_t b,uint64_t * z0Ptr,uint64_t * z1Ptr)505b4d09b17SRichard Henderson mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
50688857acaSLaurent Vivier {
507b4d09b17SRichard Henderson     mulu64(z1Ptr, z0Ptr, a, b);
50888857acaSLaurent Vivier }
50988857acaSLaurent Vivier 
51088857acaSLaurent Vivier /*----------------------------------------------------------------------------
51188857acaSLaurent Vivier | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
51288857acaSLaurent Vivier | `b' to obtain a 192-bit product.  The product is broken into three 64-bit
51388857acaSLaurent Vivier | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
51488857acaSLaurent Vivier | `z2Ptr'.
51588857acaSLaurent Vivier *----------------------------------------------------------------------------*/
51688857acaSLaurent Vivier 
51788857acaSLaurent Vivier static inline void
mul128By64To192(uint64_t a0,uint64_t a1,uint64_t b,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr)5185ffb6bd9SRichard Henderson mul128By64To192(uint64_t a0, uint64_t a1, uint64_t b,
5195ffb6bd9SRichard Henderson                 uint64_t *z0Ptr, uint64_t *z1Ptr, uint64_t *z2Ptr)
52088857acaSLaurent Vivier {
5215ffb6bd9SRichard Henderson     uint64_t z0, z1, m1;
52288857acaSLaurent Vivier 
5235ffb6bd9SRichard Henderson     mul64To128(a1, b, &m1, z2Ptr);
5245ffb6bd9SRichard Henderson     mul64To128(a0, b, &z0, &z1);
5255ffb6bd9SRichard Henderson     add128(z0, z1, 0, m1, z0Ptr, z1Ptr);
52688857acaSLaurent Vivier }
52788857acaSLaurent Vivier 
52888857acaSLaurent Vivier /*----------------------------------------------------------------------------
52988857acaSLaurent Vivier | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
53088857acaSLaurent Vivier | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
53188857acaSLaurent Vivier | product.  The product is broken into four 64-bit pieces which are stored at
53288857acaSLaurent Vivier | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
53388857acaSLaurent Vivier *----------------------------------------------------------------------------*/
53488857acaSLaurent Vivier 
mul128To256(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1,uint64_t * z0Ptr,uint64_t * z1Ptr,uint64_t * z2Ptr,uint64_t * z3Ptr)535cd55a56eSRichard Henderson static inline void mul128To256(uint64_t a0, uint64_t a1,
536cd55a56eSRichard Henderson                                uint64_t b0, uint64_t b1,
537cd55a56eSRichard Henderson                                uint64_t *z0Ptr, uint64_t *z1Ptr,
538cd55a56eSRichard Henderson                                uint64_t *z2Ptr, uint64_t *z3Ptr)
53988857acaSLaurent Vivier {
540cd55a56eSRichard Henderson     uint64_t z0, z1, z2;
541cd55a56eSRichard Henderson     uint64_t m0, m1, m2, n1, n2;
54288857acaSLaurent Vivier 
543cd55a56eSRichard Henderson     mul64To128(a1, b0, &m1, &m2);
544cd55a56eSRichard Henderson     mul64To128(a0, b1, &n1, &n2);
545cd55a56eSRichard Henderson     mul64To128(a1, b1, &z2, z3Ptr);
546cd55a56eSRichard Henderson     mul64To128(a0, b0, &z0, &z1);
54788857acaSLaurent Vivier 
548cd55a56eSRichard Henderson     add192( 0, m1, m2,  0, n1, n2, &m0, &m1, &m2);
549cd55a56eSRichard Henderson     add192(m0, m1, m2, z0, z1, z2, z0Ptr, z1Ptr, z2Ptr);
55088857acaSLaurent Vivier }
55188857acaSLaurent Vivier 
55288857acaSLaurent Vivier /*----------------------------------------------------------------------------
55388857acaSLaurent Vivier | Returns an approximation to the 64-bit integer quotient obtained by dividing
55488857acaSLaurent Vivier | `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
55588857acaSLaurent Vivier | divisor `b' must be at least 2^63.  If q is the exact quotient truncated
55688857acaSLaurent Vivier | toward zero, the approximation returned lies between q and q + 2 inclusive.
55788857acaSLaurent Vivier | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
55888857acaSLaurent Vivier | unsigned integer is returned.
55988857acaSLaurent Vivier *----------------------------------------------------------------------------*/
56088857acaSLaurent Vivier 
estimateDiv128To64(uint64_t a0,uint64_t a1,uint64_t b)56188857acaSLaurent Vivier static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
56288857acaSLaurent Vivier {
56388857acaSLaurent Vivier     uint64_t b0, b1;
56488857acaSLaurent Vivier     uint64_t rem0, rem1, term0, term1;
56588857acaSLaurent Vivier     uint64_t z;
56688857acaSLaurent Vivier 
567e9321124SAlex Bennée     if ( b <= a0 ) return UINT64_C(0xFFFFFFFFFFFFFFFF);
56888857acaSLaurent Vivier     b0 = b>>32;
569e9321124SAlex Bennée     z = ( b0<<32 <= a0 ) ? UINT64_C(0xFFFFFFFF00000000) : ( a0 / b0 )<<32;
57088857acaSLaurent Vivier     mul64To128( b, z, &term0, &term1 );
57188857acaSLaurent Vivier     sub128( a0, a1, term0, term1, &rem0, &rem1 );
57288857acaSLaurent Vivier     while ( ( (int64_t) rem0 ) < 0 ) {
573e9321124SAlex Bennée         z -= UINT64_C(0x100000000);
57488857acaSLaurent Vivier         b1 = b<<32;
57588857acaSLaurent Vivier         add128( rem0, rem1, b0, b1, &rem0, &rem1 );
57688857acaSLaurent Vivier     }
57788857acaSLaurent Vivier     rem0 = ( rem0<<32 ) | ( rem1>>32 );
57888857acaSLaurent Vivier     z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
57988857acaSLaurent Vivier     return z;
58088857acaSLaurent Vivier 
58188857acaSLaurent Vivier }
58288857acaSLaurent Vivier 
58388857acaSLaurent Vivier /*----------------------------------------------------------------------------
58488857acaSLaurent Vivier | Returns an approximation to the square root of the 32-bit significand given
58588857acaSLaurent Vivier | by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
58688857acaSLaurent Vivier | `aExp' (the least significant bit) is 1, the integer returned approximates
58788857acaSLaurent Vivier | 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
58888857acaSLaurent Vivier | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
58988857acaSLaurent Vivier | case, the approximation returned lies strictly within +/-2 of the exact
59088857acaSLaurent Vivier | value.
59188857acaSLaurent Vivier *----------------------------------------------------------------------------*/
59288857acaSLaurent Vivier 
estimateSqrt32(int aExp,uint32_t a)59388857acaSLaurent Vivier static inline uint32_t estimateSqrt32(int aExp, uint32_t a)
59488857acaSLaurent Vivier {
59588857acaSLaurent Vivier     static const uint16_t sqrtOddAdjustments[] = {
59688857acaSLaurent Vivier         0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
59788857acaSLaurent Vivier         0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
59888857acaSLaurent Vivier     };
59988857acaSLaurent Vivier     static const uint16_t sqrtEvenAdjustments[] = {
60088857acaSLaurent Vivier         0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
60188857acaSLaurent Vivier         0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
60288857acaSLaurent Vivier     };
60388857acaSLaurent Vivier     int8_t index;
60488857acaSLaurent Vivier     uint32_t z;
60588857acaSLaurent Vivier 
60688857acaSLaurent Vivier     index = ( a>>27 ) & 15;
60788857acaSLaurent Vivier     if ( aExp & 1 ) {
60888857acaSLaurent Vivier         z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
60988857acaSLaurent Vivier         z = ( ( a / z )<<14 ) + ( z<<15 );
61088857acaSLaurent Vivier         a >>= 1;
61188857acaSLaurent Vivier     }
61288857acaSLaurent Vivier     else {
61388857acaSLaurent Vivier         z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
61488857acaSLaurent Vivier         z = a / z + z;
61588857acaSLaurent Vivier         z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
61688857acaSLaurent Vivier         if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
61788857acaSLaurent Vivier     }
61888857acaSLaurent Vivier     return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
61988857acaSLaurent Vivier 
62088857acaSLaurent Vivier }
62188857acaSLaurent Vivier 
62288857acaSLaurent Vivier /*----------------------------------------------------------------------------
62388857acaSLaurent Vivier | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
62488857acaSLaurent Vivier | is equal to the 128-bit value formed by concatenating `b0' and `b1'.
62588857acaSLaurent Vivier | Otherwise, returns 0.
62688857acaSLaurent Vivier *----------------------------------------------------------------------------*/
62788857acaSLaurent Vivier 
eq128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1)628c120391cSRichard Henderson static inline bool eq128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
62988857acaSLaurent Vivier {
630c120391cSRichard Henderson     return a0 == b0 && a1 == b1;
63188857acaSLaurent Vivier }
63288857acaSLaurent Vivier 
63388857acaSLaurent Vivier /*----------------------------------------------------------------------------
63488857acaSLaurent Vivier | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
63588857acaSLaurent Vivier | than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
63688857acaSLaurent Vivier | Otherwise, returns 0.
63788857acaSLaurent Vivier *----------------------------------------------------------------------------*/
63888857acaSLaurent Vivier 
le128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1)639c120391cSRichard Henderson static inline bool le128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
64088857acaSLaurent Vivier {
641c120391cSRichard Henderson     return a0 < b0 || (a0 == b0 && a1 <= b1);
64288857acaSLaurent Vivier }
64388857acaSLaurent Vivier 
64488857acaSLaurent Vivier /*----------------------------------------------------------------------------
64588857acaSLaurent Vivier | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
64688857acaSLaurent Vivier | than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
64788857acaSLaurent Vivier | returns 0.
64888857acaSLaurent Vivier *----------------------------------------------------------------------------*/
64988857acaSLaurent Vivier 
lt128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1)650c120391cSRichard Henderson static inline bool lt128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
65188857acaSLaurent Vivier {
652c120391cSRichard Henderson     return a0 < b0 || (a0 == b0 && a1 < b1);
65388857acaSLaurent Vivier }
65488857acaSLaurent Vivier 
65588857acaSLaurent Vivier /*----------------------------------------------------------------------------
65688857acaSLaurent Vivier | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
65788857acaSLaurent Vivier | not equal to the 128-bit value formed by concatenating `b0' and `b1'.
65888857acaSLaurent Vivier | Otherwise, returns 0.
65988857acaSLaurent Vivier *----------------------------------------------------------------------------*/
66088857acaSLaurent Vivier 
ne128(uint64_t a0,uint64_t a1,uint64_t b0,uint64_t b1)661c120391cSRichard Henderson static inline bool ne128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
66288857acaSLaurent Vivier {
663c120391cSRichard Henderson     return a0 != b0 || a1 != b1;
66488857acaSLaurent Vivier }
665f91005e1SMarkus Armbruster 
666*feaf2e9cSRichard Henderson /*
667*feaf2e9cSRichard Henderson  * Similarly, comparisons of 192-bit values.
668*feaf2e9cSRichard Henderson  */
669*feaf2e9cSRichard Henderson 
eq192(uint64_t a0,uint64_t a1,uint64_t a2,uint64_t b0,uint64_t b1,uint64_t b2)670*feaf2e9cSRichard Henderson static inline bool eq192(uint64_t a0, uint64_t a1, uint64_t a2,
671*feaf2e9cSRichard Henderson                          uint64_t b0, uint64_t b1, uint64_t b2)
672*feaf2e9cSRichard Henderson {
673*feaf2e9cSRichard Henderson     return ((a0 ^ b0) | (a1 ^ b1) | (a2 ^ b2)) == 0;
674*feaf2e9cSRichard Henderson }
675*feaf2e9cSRichard Henderson 
le192(uint64_t a0,uint64_t a1,uint64_t a2,uint64_t b0,uint64_t b1,uint64_t b2)676*feaf2e9cSRichard Henderson static inline bool le192(uint64_t a0, uint64_t a1, uint64_t a2,
677*feaf2e9cSRichard Henderson                          uint64_t b0, uint64_t b1, uint64_t b2)
678*feaf2e9cSRichard Henderson {
679*feaf2e9cSRichard Henderson     if (a0 != b0) {
680*feaf2e9cSRichard Henderson         return a0 < b0;
681*feaf2e9cSRichard Henderson     }
682*feaf2e9cSRichard Henderson     if (a1 != b1) {
683*feaf2e9cSRichard Henderson         return a1 < b1;
684*feaf2e9cSRichard Henderson     }
685*feaf2e9cSRichard Henderson     return a2 <= b2;
686*feaf2e9cSRichard Henderson }
687*feaf2e9cSRichard Henderson 
lt192(uint64_t a0,uint64_t a1,uint64_t a2,uint64_t b0,uint64_t b1,uint64_t b2)688*feaf2e9cSRichard Henderson static inline bool lt192(uint64_t a0, uint64_t a1, uint64_t a2,
689*feaf2e9cSRichard Henderson                          uint64_t b0, uint64_t b1, uint64_t b2)
690*feaf2e9cSRichard Henderson {
691*feaf2e9cSRichard Henderson     if (a0 != b0) {
692*feaf2e9cSRichard Henderson         return a0 < b0;
693*feaf2e9cSRichard Henderson     }
694*feaf2e9cSRichard Henderson     if (a1 != b1) {
695*feaf2e9cSRichard Henderson         return a1 < b1;
696*feaf2e9cSRichard Henderson     }
697*feaf2e9cSRichard Henderson     return a2 < b2;
698*feaf2e9cSRichard Henderson }
699*feaf2e9cSRichard Henderson 
700f91005e1SMarkus Armbruster #endif
701