xref: /openbmc/linux/arch/arm/nwfpe/softfloat-macros (revision c1241c4c)
11da177e4SLinus Torvalds
21da177e4SLinus Torvalds/*
31da177e4SLinus Torvalds===============================================================================
41da177e4SLinus Torvalds
51da177e4SLinus TorvaldsThis C source fragment is part of the SoftFloat IEC/IEEE Floating-point
61da177e4SLinus TorvaldsArithmetic Package, Release 2.
71da177e4SLinus Torvalds
81da177e4SLinus TorvaldsWritten by John R. Hauser.  This work was made possible in part by the
91da177e4SLinus TorvaldsInternational Computer Science Institute, located at Suite 600, 1947 Center
101da177e4SLinus TorvaldsStreet, Berkeley, California 94704.  Funding was partially provided by the
111da177e4SLinus TorvaldsNational Science Foundation under grant MIP-9311980.  The original version
121da177e4SLinus Torvaldsof this code was written as part of a project to build a fixed-point vector
131da177e4SLinus Torvaldsprocessor in collaboration with the University of California at Berkeley,
141da177e4SLinus Torvaldsoverseen by Profs. Nelson Morgan and John Wawrzynek.  More information
151da177e4SLinus Torvaldsis available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
161da177e4SLinus Torvaldsarithmetic/softfloat.html'.
171da177e4SLinus Torvalds
181da177e4SLinus TorvaldsTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
191da177e4SLinus Torvaldshas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
201da177e4SLinus TorvaldsTIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
211da177e4SLinus TorvaldsPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
221da177e4SLinus TorvaldsAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
231da177e4SLinus Torvalds
241da177e4SLinus TorvaldsDerivative works are acceptable, even for commercial purposes, so long as
251da177e4SLinus Torvalds(1) they include prominent notice that the work is derivative, and (2) they
261da177e4SLinus Torvaldsinclude prominent notice akin to these three paragraphs for those parts of
271da177e4SLinus Torvaldsthis code that are retained.
281da177e4SLinus Torvalds
291da177e4SLinus Torvalds===============================================================================
301da177e4SLinus Torvalds*/
311da177e4SLinus Torvalds
321da177e4SLinus Torvalds/*
331da177e4SLinus Torvalds-------------------------------------------------------------------------------
341da177e4SLinus TorvaldsShifts `a' right by the number of bits given in `count'.  If any nonzero
351da177e4SLinus Torvaldsbits are shifted off, they are ``jammed'' into the least significant bit of
361da177e4SLinus Torvaldsthe result by setting the least significant bit to 1.  The value of `count'
371da177e4SLinus Torvaldscan be arbitrarily large; in particular, if `count' is greater than 32, the
381da177e4SLinus Torvaldsresult will be either 0 or 1, depending on whether `a' is zero or nonzero.
391da177e4SLinus TorvaldsThe result is stored in the location pointed to by `zPtr'.
401da177e4SLinus Torvalds-------------------------------------------------------------------------------
411da177e4SLinus Torvalds*/
421da177e4SLinus TorvaldsINLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
431da177e4SLinus Torvalds{
441da177e4SLinus Torvalds    bits32 z;
451da177e4SLinus Torvalds    if ( count == 0 ) {
461da177e4SLinus Torvalds        z = a;
471da177e4SLinus Torvalds    }
481da177e4SLinus Torvalds    else if ( count < 32 ) {
491da177e4SLinus Torvalds        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
501da177e4SLinus Torvalds    }
511da177e4SLinus Torvalds    else {
521da177e4SLinus Torvalds        z = ( a != 0 );
531da177e4SLinus Torvalds    }
541da177e4SLinus Torvalds    *zPtr = z;
551da177e4SLinus Torvalds}
561da177e4SLinus Torvalds
571da177e4SLinus Torvalds/*
581da177e4SLinus Torvalds-------------------------------------------------------------------------------
591da177e4SLinus TorvaldsShifts `a' right by the number of bits given in `count'.  If any nonzero
601da177e4SLinus Torvaldsbits are shifted off, they are ``jammed'' into the least significant bit of
611da177e4SLinus Torvaldsthe result by setting the least significant bit to 1.  The value of `count'
621da177e4SLinus Torvaldscan be arbitrarily large; in particular, if `count' is greater than 64, the
631da177e4SLinus Torvaldsresult will be either 0 or 1, depending on whether `a' is zero or nonzero.
641da177e4SLinus TorvaldsThe result is stored in the location pointed to by `zPtr'.
651da177e4SLinus Torvalds-------------------------------------------------------------------------------
661da177e4SLinus Torvalds*/
671da177e4SLinus TorvaldsINLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
681da177e4SLinus Torvalds{
691da177e4SLinus Torvalds    bits64 z;
701da177e4SLinus Torvalds
711da177e4SLinus Torvalds __asm__("@shift64RightJamming -- start");
721da177e4SLinus Torvalds    if ( count == 0 ) {
731da177e4SLinus Torvalds        z = a;
741da177e4SLinus Torvalds    }
751da177e4SLinus Torvalds    else if ( count < 64 ) {
761da177e4SLinus Torvalds        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
771da177e4SLinus Torvalds    }
781da177e4SLinus Torvalds    else {
791da177e4SLinus Torvalds        z = ( a != 0 );
801da177e4SLinus Torvalds    }
811da177e4SLinus Torvalds __asm__("@shift64RightJamming -- end");
821da177e4SLinus Torvalds    *zPtr = z;
831da177e4SLinus Torvalds}
841da177e4SLinus Torvalds
851da177e4SLinus Torvalds/*
861da177e4SLinus Torvalds-------------------------------------------------------------------------------
871da177e4SLinus TorvaldsShifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
881da177e4SLinus Torvalds_plus_ the number of bits given in `count'.  The shifted result is at most
891da177e4SLinus Torvalds64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
901da177e4SLinus Torvaldsbits shifted off form a second 64-bit result as follows:  The _last_ bit
911da177e4SLinus Torvaldsshifted off is the most-significant bit of the extra result, and the other
921da177e4SLinus Torvalds63 bits of the extra result are all zero if and only if _all_but_the_last_
931da177e4SLinus Torvaldsbits shifted off were all zero.  This extra result is stored in the location
941da177e4SLinus Torvaldspointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
951da177e4SLinus Torvalds    (This routine makes more sense if `a0' and `a1' are considered to form a
961da177e4SLinus Torvaldsfixed-point value with binary point between `a0' and `a1'.  This fixed-point
971da177e4SLinus Torvaldsvalue is shifted right by the number of bits given in `count', and the
981da177e4SLinus Torvaldsinteger part of the result is returned at the location pointed to by
991da177e4SLinus Torvalds`z0Ptr'.  The fractional part of the result may be slightly corrupted as
1001da177e4SLinus Torvaldsdescribed above, and is returned at the location pointed to by `z1Ptr'.)
1011da177e4SLinus Torvalds-------------------------------------------------------------------------------
1021da177e4SLinus Torvalds*/
1031da177e4SLinus TorvaldsINLINE void
1041da177e4SLinus Torvalds shift64ExtraRightJamming(
1051da177e4SLinus Torvalds     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
1061da177e4SLinus Torvalds{
1071da177e4SLinus Torvalds    bits64 z0, z1;
1081da177e4SLinus Torvalds    int8 negCount = ( - count ) & 63;
1091da177e4SLinus Torvalds
1101da177e4SLinus Torvalds    if ( count == 0 ) {
1111da177e4SLinus Torvalds        z1 = a1;
1121da177e4SLinus Torvalds        z0 = a0;
1131da177e4SLinus Torvalds    }
1141da177e4SLinus Torvalds    else if ( count < 64 ) {
1151da177e4SLinus Torvalds        z1 = ( a0<<negCount ) | ( a1 != 0 );
1161da177e4SLinus Torvalds        z0 = a0>>count;
1171da177e4SLinus Torvalds    }
1181da177e4SLinus Torvalds    else {
1191da177e4SLinus Torvalds        if ( count == 64 ) {
1201da177e4SLinus Torvalds            z1 = a0 | ( a1 != 0 );
1211da177e4SLinus Torvalds        }
1221da177e4SLinus Torvalds        else {
1231da177e4SLinus Torvalds            z1 = ( ( a0 | a1 ) != 0 );
1241da177e4SLinus Torvalds        }
1251da177e4SLinus Torvalds        z0 = 0;
1261da177e4SLinus Torvalds    }
1271da177e4SLinus Torvalds    *z1Ptr = z1;
1281da177e4SLinus Torvalds    *z0Ptr = z0;
1291da177e4SLinus Torvalds
1301da177e4SLinus Torvalds}
1311da177e4SLinus Torvalds
1321da177e4SLinus Torvalds/*
1331da177e4SLinus Torvalds-------------------------------------------------------------------------------
1341da177e4SLinus TorvaldsShifts the 128-bit value formed by concatenating `a0' and `a1' right by the
1351da177e4SLinus Torvaldsnumber of bits given in `count'.  Any bits shifted off are lost.  The value
1361da177e4SLinus Torvaldsof `count' can be arbitrarily large; in particular, if `count' is greater
1371da177e4SLinus Torvaldsthan 128, the result will be 0.  The result is broken into two 64-bit pieces
1381da177e4SLinus Torvaldswhich are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
1391da177e4SLinus Torvalds-------------------------------------------------------------------------------
1401da177e4SLinus Torvalds*/
1411da177e4SLinus TorvaldsINLINE void
1421da177e4SLinus Torvalds shift128Right(
1431da177e4SLinus Torvalds     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
1441da177e4SLinus Torvalds{
1451da177e4SLinus Torvalds    bits64 z0, z1;
1461da177e4SLinus Torvalds    int8 negCount = ( - count ) & 63;
1471da177e4SLinus Torvalds
1481da177e4SLinus Torvalds    if ( count == 0 ) {
1491da177e4SLinus Torvalds        z1 = a1;
1501da177e4SLinus Torvalds        z0 = a0;
1511da177e4SLinus Torvalds    }
1521da177e4SLinus Torvalds    else if ( count < 64 ) {
1531da177e4SLinus Torvalds        z1 = ( a0<<negCount ) | ( a1>>count );
1541da177e4SLinus Torvalds        z0 = a0>>count;
1551da177e4SLinus Torvalds    }
1561da177e4SLinus Torvalds    else {
1571da177e4SLinus Torvalds        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
1581da177e4SLinus Torvalds        z0 = 0;
1591da177e4SLinus Torvalds    }
1601da177e4SLinus Torvalds    *z1Ptr = z1;
1611da177e4SLinus Torvalds    *z0Ptr = z0;
1621da177e4SLinus Torvalds
1631da177e4SLinus Torvalds}
1641da177e4SLinus Torvalds
1651da177e4SLinus Torvalds/*
1661da177e4SLinus Torvalds-------------------------------------------------------------------------------
1671da177e4SLinus TorvaldsShifts the 128-bit value formed by concatenating `a0' and `a1' right by the
1681da177e4SLinus Torvaldsnumber of bits given in `count'.  If any nonzero bits are shifted off, they
1691da177e4SLinus Torvaldsare ``jammed'' into the least significant bit of the result by setting the
1701da177e4SLinus Torvaldsleast significant bit to 1.  The value of `count' can be arbitrarily large;
1711da177e4SLinus Torvaldsin particular, if `count' is greater than 128, the result will be either 0
1721da177e4SLinus Torvaldsor 1, depending on whether the concatenation of `a0' and `a1' is zero or
1731da177e4SLinus Torvaldsnonzero.  The result is broken into two 64-bit pieces which are stored at
1741da177e4SLinus Torvaldsthe locations pointed to by `z0Ptr' and `z1Ptr'.
1751da177e4SLinus Torvalds-------------------------------------------------------------------------------
1761da177e4SLinus Torvalds*/
1771da177e4SLinus TorvaldsINLINE void
1781da177e4SLinus Torvalds shift128RightJamming(
1791da177e4SLinus Torvalds     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
1801da177e4SLinus Torvalds{
1811da177e4SLinus Torvalds    bits64 z0, z1;
1821da177e4SLinus Torvalds    int8 negCount = ( - count ) & 63;
1831da177e4SLinus Torvalds
1841da177e4SLinus Torvalds    if ( count == 0 ) {
1851da177e4SLinus Torvalds        z1 = a1;
1861da177e4SLinus Torvalds        z0 = a0;
1871da177e4SLinus Torvalds    }
1881da177e4SLinus Torvalds    else if ( count < 64 ) {
1891da177e4SLinus Torvalds        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
1901da177e4SLinus Torvalds        z0 = a0>>count;
1911da177e4SLinus Torvalds    }
1921da177e4SLinus Torvalds    else {
1931da177e4SLinus Torvalds        if ( count == 64 ) {
1941da177e4SLinus Torvalds            z1 = a0 | ( a1 != 0 );
1951da177e4SLinus Torvalds        }
1961da177e4SLinus Torvalds        else if ( count < 128 ) {
1971da177e4SLinus Torvalds            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
1981da177e4SLinus Torvalds        }
1991da177e4SLinus Torvalds        else {
2001da177e4SLinus Torvalds            z1 = ( ( a0 | a1 ) != 0 );
2011da177e4SLinus Torvalds        }
2021da177e4SLinus Torvalds        z0 = 0;
2031da177e4SLinus Torvalds    }
2041da177e4SLinus Torvalds    *z1Ptr = z1;
2051da177e4SLinus Torvalds    *z0Ptr = z0;
2061da177e4SLinus Torvalds
2071da177e4SLinus Torvalds}
2081da177e4SLinus Torvalds
2091da177e4SLinus Torvalds/*
2101da177e4SLinus Torvalds-------------------------------------------------------------------------------
2111da177e4SLinus TorvaldsShifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
2121da177e4SLinus Torvaldsby 64 _plus_ the number of bits given in `count'.  The shifted result is
2131da177e4SLinus Torvaldsat most 128 nonzero bits; these are broken into two 64-bit pieces which are
2141da177e4SLinus Torvaldsstored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
2151da177e4SLinus Torvaldsoff form a third 64-bit result as follows:  The _last_ bit shifted off is
2161da177e4SLinus Torvaldsthe most-significant bit of the extra result, and the other 63 bits of the
2171da177e4SLinus Torvaldsextra result are all zero if and only if _all_but_the_last_ bits shifted off
2181da177e4SLinus Torvaldswere all zero.  This extra result is stored in the location pointed to by
2191da177e4SLinus Torvalds`z2Ptr'.  The value of `count' can be arbitrarily large.
2201da177e4SLinus Torvalds    (This routine makes more sense if `a0', `a1', and `a2' are considered
2211da177e4SLinus Torvaldsto form a fixed-point value with binary point between `a1' and `a2'.  This
2221da177e4SLinus Torvaldsfixed-point value is shifted right by the number of bits given in `count',
2231da177e4SLinus Torvaldsand the integer part of the result is returned at the locations pointed to
2241da177e4SLinus Torvaldsby `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
2251da177e4SLinus Torvaldscorrupted as described above, and is returned at the location pointed to by
2261da177e4SLinus Torvalds`z2Ptr'.)
2271da177e4SLinus Torvalds-------------------------------------------------------------------------------
2281da177e4SLinus Torvalds*/
2291da177e4SLinus TorvaldsINLINE void
2301da177e4SLinus Torvalds shift128ExtraRightJamming(
2311da177e4SLinus Torvalds     bits64 a0,
2321da177e4SLinus Torvalds     bits64 a1,
2331da177e4SLinus Torvalds     bits64 a2,
2341da177e4SLinus Torvalds     int16 count,
2351da177e4SLinus Torvalds     bits64 *z0Ptr,
2361da177e4SLinus Torvalds     bits64 *z1Ptr,
2371da177e4SLinus Torvalds     bits64 *z2Ptr
2381da177e4SLinus Torvalds )
2391da177e4SLinus Torvalds{
2401da177e4SLinus Torvalds    bits64 z0, z1, z2;
2411da177e4SLinus Torvalds    int8 negCount = ( - count ) & 63;
2421da177e4SLinus Torvalds
2431da177e4SLinus Torvalds    if ( count == 0 ) {
2441da177e4SLinus Torvalds        z2 = a2;
2451da177e4SLinus Torvalds        z1 = a1;
2461da177e4SLinus Torvalds        z0 = a0;
2471da177e4SLinus Torvalds    }
2481da177e4SLinus Torvalds    else {
2491da177e4SLinus Torvalds        if ( count < 64 ) {
2501da177e4SLinus Torvalds            z2 = a1<<negCount;
2511da177e4SLinus Torvalds            z1 = ( a0<<negCount ) | ( a1>>count );
2521da177e4SLinus Torvalds            z0 = a0>>count;
2531da177e4SLinus Torvalds        }
2541da177e4SLinus Torvalds        else {
2551da177e4SLinus Torvalds            if ( count == 64 ) {
2561da177e4SLinus Torvalds                z2 = a1;
2571da177e4SLinus Torvalds                z1 = a0;
2581da177e4SLinus Torvalds            }
2591da177e4SLinus Torvalds            else {
2601da177e4SLinus Torvalds                a2 |= a1;
2611da177e4SLinus Torvalds                if ( count < 128 ) {
2621da177e4SLinus Torvalds                    z2 = a0<<negCount;
2631da177e4SLinus Torvalds                    z1 = a0>>( count & 63 );
2641da177e4SLinus Torvalds                }
2651da177e4SLinus Torvalds                else {
2661da177e4SLinus Torvalds                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
2671da177e4SLinus Torvalds                    z1 = 0;
2681da177e4SLinus Torvalds                }
2691da177e4SLinus Torvalds            }
2701da177e4SLinus Torvalds            z0 = 0;
2711da177e4SLinus Torvalds        }
2721da177e4SLinus Torvalds        z2 |= ( a2 != 0 );
2731da177e4SLinus Torvalds    }
2741da177e4SLinus Torvalds    *z2Ptr = z2;
2751da177e4SLinus Torvalds    *z1Ptr = z1;
2761da177e4SLinus Torvalds    *z0Ptr = z0;
2771da177e4SLinus Torvalds
2781da177e4SLinus Torvalds}
2791da177e4SLinus Torvalds
2801da177e4SLinus Torvalds/*
2811da177e4SLinus Torvalds-------------------------------------------------------------------------------
2821da177e4SLinus TorvaldsShifts the 128-bit value formed by concatenating `a0' and `a1' left by the
2831da177e4SLinus Torvaldsnumber of bits given in `count'.  Any bits shifted off are lost.  The value
2841da177e4SLinus Torvaldsof `count' must be less than 64.  The result is broken into two 64-bit
2851da177e4SLinus Torvaldspieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
2861da177e4SLinus Torvalds-------------------------------------------------------------------------------
2871da177e4SLinus Torvalds*/
2881da177e4SLinus TorvaldsINLINE void
2891da177e4SLinus Torvalds shortShift128Left(
2901da177e4SLinus Torvalds     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
2911da177e4SLinus Torvalds{
2921da177e4SLinus Torvalds
2931da177e4SLinus Torvalds    *z1Ptr = a1<<count;
2941da177e4SLinus Torvalds    *z0Ptr =
2951da177e4SLinus Torvalds        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
2961da177e4SLinus Torvalds
2971da177e4SLinus Torvalds}
2981da177e4SLinus Torvalds
2991da177e4SLinus Torvalds/*
3001da177e4SLinus Torvalds-------------------------------------------------------------------------------
3011da177e4SLinus TorvaldsShifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
3021da177e4SLinus Torvaldsby the number of bits given in `count'.  Any bits shifted off are lost.
3031da177e4SLinus TorvaldsThe value of `count' must be less than 64.  The result is broken into three
3041da177e4SLinus Torvalds64-bit pieces which are stored at the locations pointed to by `z0Ptr',
3051da177e4SLinus Torvalds`z1Ptr', and `z2Ptr'.
3061da177e4SLinus Torvalds-------------------------------------------------------------------------------
3071da177e4SLinus Torvalds*/
3081da177e4SLinus TorvaldsINLINE void
3091da177e4SLinus Torvalds shortShift192Left(
3101da177e4SLinus Torvalds     bits64 a0,
3111da177e4SLinus Torvalds     bits64 a1,
3121da177e4SLinus Torvalds     bits64 a2,
3131da177e4SLinus Torvalds     int16 count,
3141da177e4SLinus Torvalds     bits64 *z0Ptr,
3151da177e4SLinus Torvalds     bits64 *z1Ptr,
3161da177e4SLinus Torvalds     bits64 *z2Ptr
3171da177e4SLinus Torvalds )
3181da177e4SLinus Torvalds{
3191da177e4SLinus Torvalds    bits64 z0, z1, z2;
3201da177e4SLinus Torvalds    int8 negCount;
3211da177e4SLinus Torvalds
3221da177e4SLinus Torvalds    z2 = a2<<count;
3231da177e4SLinus Torvalds    z1 = a1<<count;
3241da177e4SLinus Torvalds    z0 = a0<<count;
3251da177e4SLinus Torvalds    if ( 0 < count ) {
3261da177e4SLinus Torvalds        negCount = ( ( - count ) & 63 );
3271da177e4SLinus Torvalds        z1 |= a2>>negCount;
3281da177e4SLinus Torvalds        z0 |= a1>>negCount;
3291da177e4SLinus Torvalds    }
3301da177e4SLinus Torvalds    *z2Ptr = z2;
3311da177e4SLinus Torvalds    *z1Ptr = z1;
3321da177e4SLinus Torvalds    *z0Ptr = z0;
3331da177e4SLinus Torvalds
3341da177e4SLinus Torvalds}
3351da177e4SLinus Torvalds
3361da177e4SLinus Torvalds/*
3371da177e4SLinus Torvalds-------------------------------------------------------------------------------
3381da177e4SLinus TorvaldsAdds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
3391da177e4SLinus Torvaldsvalue formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
3401da177e4SLinus Torvaldsany carry out is lost.  The result is broken into two 64-bit pieces which
3411da177e4SLinus Torvaldsare stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
3421da177e4SLinus Torvalds-------------------------------------------------------------------------------
3431da177e4SLinus Torvalds*/
3441da177e4SLinus TorvaldsINLINE void
3451da177e4SLinus Torvalds add128(
3461da177e4SLinus Torvalds     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
3471da177e4SLinus Torvalds{
3481da177e4SLinus Torvalds    bits64 z1;
3491da177e4SLinus Torvalds
3501da177e4SLinus Torvalds    z1 = a1 + b1;
3511da177e4SLinus Torvalds    *z1Ptr = z1;
3521da177e4SLinus Torvalds    *z0Ptr = a0 + b0 + ( z1 < a1 );
3531da177e4SLinus Torvalds
3541da177e4SLinus Torvalds}
3551da177e4SLinus Torvalds
3561da177e4SLinus Torvalds/*
3571da177e4SLinus Torvalds-------------------------------------------------------------------------------
3581da177e4SLinus TorvaldsAdds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
3591da177e4SLinus Torvalds192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
3601da177e4SLinus Torvaldsmodulo 2^192, so any carry out is lost.  The result is broken into three
3611da177e4SLinus Torvalds64-bit pieces which are stored at the locations pointed to by `z0Ptr',
3621da177e4SLinus Torvalds`z1Ptr', and `z2Ptr'.
3631da177e4SLinus Torvalds-------------------------------------------------------------------------------
3641da177e4SLinus Torvalds*/
3651da177e4SLinus TorvaldsINLINE void
3661da177e4SLinus Torvalds add192(
3671da177e4SLinus Torvalds     bits64 a0,
3681da177e4SLinus Torvalds     bits64 a1,
3691da177e4SLinus Torvalds     bits64 a2,
3701da177e4SLinus Torvalds     bits64 b0,
3711da177e4SLinus Torvalds     bits64 b1,
3721da177e4SLinus Torvalds     bits64 b2,
3731da177e4SLinus Torvalds     bits64 *z0Ptr,
3741da177e4SLinus Torvalds     bits64 *z1Ptr,
3751da177e4SLinus Torvalds     bits64 *z2Ptr
3761da177e4SLinus Torvalds )
3771da177e4SLinus Torvalds{
3781da177e4SLinus Torvalds    bits64 z0, z1, z2;
3791da177e4SLinus Torvalds    int8 carry0, carry1;
3801da177e4SLinus Torvalds
3811da177e4SLinus Torvalds    z2 = a2 + b2;
3821da177e4SLinus Torvalds    carry1 = ( z2 < a2 );
3831da177e4SLinus Torvalds    z1 = a1 + b1;
3841da177e4SLinus Torvalds    carry0 = ( z1 < a1 );
3851da177e4SLinus Torvalds    z0 = a0 + b0;
3861da177e4SLinus Torvalds    z1 += carry1;
3871da177e4SLinus Torvalds    z0 += ( z1 < carry1 );
3881da177e4SLinus Torvalds    z0 += carry0;
3891da177e4SLinus Torvalds    *z2Ptr = z2;
3901da177e4SLinus Torvalds    *z1Ptr = z1;
3911da177e4SLinus Torvalds    *z0Ptr = z0;
3921da177e4SLinus Torvalds
3931da177e4SLinus Torvalds}
3941da177e4SLinus Torvalds
3951da177e4SLinus Torvalds/*
3961da177e4SLinus Torvalds-------------------------------------------------------------------------------
3971da177e4SLinus TorvaldsSubtracts the 128-bit value formed by concatenating `b0' and `b1' from the
3981da177e4SLinus Torvalds128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
3991da177e4SLinus Torvalds2^128, so any borrow out (carry out) is lost.  The result is broken into two
4001da177e4SLinus Torvalds64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
4011da177e4SLinus Torvalds`z1Ptr'.
4021da177e4SLinus Torvalds-------------------------------------------------------------------------------
4031da177e4SLinus Torvalds*/
4041da177e4SLinus TorvaldsINLINE void
4051da177e4SLinus Torvalds sub128(
4061da177e4SLinus Torvalds     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
4071da177e4SLinus Torvalds{
4081da177e4SLinus Torvalds
4091da177e4SLinus Torvalds    *z1Ptr = a1 - b1;
4101da177e4SLinus Torvalds    *z0Ptr = a0 - b0 - ( a1 < b1 );
4111da177e4SLinus Torvalds
4121da177e4SLinus Torvalds}
4131da177e4SLinus Torvalds
4141da177e4SLinus Torvalds/*
4151da177e4SLinus Torvalds-------------------------------------------------------------------------------
4161da177e4SLinus TorvaldsSubtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
4171da177e4SLinus Torvaldsfrom the 192-bit value formed by concatenating `a0', `a1', and `a2'.
4181da177e4SLinus TorvaldsSubtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
4191da177e4SLinus Torvaldsresult is broken into three 64-bit pieces which are stored at the locations
4201da177e4SLinus Torvaldspointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
4211da177e4SLinus Torvalds-------------------------------------------------------------------------------
4221da177e4SLinus Torvalds*/
4231da177e4SLinus TorvaldsINLINE void
4241da177e4SLinus Torvalds sub192(
4251da177e4SLinus Torvalds     bits64 a0,
4261da177e4SLinus Torvalds     bits64 a1,
4271da177e4SLinus Torvalds     bits64 a2,
4281da177e4SLinus Torvalds     bits64 b0,
4291da177e4SLinus Torvalds     bits64 b1,
4301da177e4SLinus Torvalds     bits64 b2,
4311da177e4SLinus Torvalds     bits64 *z0Ptr,
4321da177e4SLinus Torvalds     bits64 *z1Ptr,
4331da177e4SLinus Torvalds     bits64 *z2Ptr
4341da177e4SLinus Torvalds )
4351da177e4SLinus Torvalds{
4361da177e4SLinus Torvalds    bits64 z0, z1, z2;
4371da177e4SLinus Torvalds    int8 borrow0, borrow1;
4381da177e4SLinus Torvalds
4391da177e4SLinus Torvalds    z2 = a2 - b2;
4401da177e4SLinus Torvalds    borrow1 = ( a2 < b2 );
4411da177e4SLinus Torvalds    z1 = a1 - b1;
4421da177e4SLinus Torvalds    borrow0 = ( a1 < b1 );
4431da177e4SLinus Torvalds    z0 = a0 - b0;
4441da177e4SLinus Torvalds    z0 -= ( z1 < borrow1 );
4451da177e4SLinus Torvalds    z1 -= borrow1;
4461da177e4SLinus Torvalds    z0 -= borrow0;
4471da177e4SLinus Torvalds    *z2Ptr = z2;
4481da177e4SLinus Torvalds    *z1Ptr = z1;
4491da177e4SLinus Torvalds    *z0Ptr = z0;
4501da177e4SLinus Torvalds
4511da177e4SLinus Torvalds}
4521da177e4SLinus Torvalds
4531da177e4SLinus Torvalds/*
4541da177e4SLinus Torvalds-------------------------------------------------------------------------------
4551da177e4SLinus TorvaldsMultiplies `a' by `b' to obtain a 128-bit product.  The product is broken
4561da177e4SLinus Torvaldsinto two 64-bit pieces which are stored at the locations pointed to by
4571da177e4SLinus Torvalds`z0Ptr' and `z1Ptr'.
4581da177e4SLinus Torvalds-------------------------------------------------------------------------------
4591da177e4SLinus Torvalds*/
4601da177e4SLinus TorvaldsINLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
4611da177e4SLinus Torvalds{
4621da177e4SLinus Torvalds    bits32 aHigh, aLow, bHigh, bLow;
4631da177e4SLinus Torvalds    bits64 z0, zMiddleA, zMiddleB, z1;
4641da177e4SLinus Torvalds
4651da177e4SLinus Torvalds    aLow = a;
4661da177e4SLinus Torvalds    aHigh = a>>32;
4671da177e4SLinus Torvalds    bLow = b;
4681da177e4SLinus Torvalds    bHigh = b>>32;
4691da177e4SLinus Torvalds    z1 = ( (bits64) aLow ) * bLow;
4701da177e4SLinus Torvalds    zMiddleA = ( (bits64) aLow ) * bHigh;
4711da177e4SLinus Torvalds    zMiddleB = ( (bits64) aHigh ) * bLow;
4721da177e4SLinus Torvalds    z0 = ( (bits64) aHigh ) * bHigh;
4731da177e4SLinus Torvalds    zMiddleA += zMiddleB;
4741da177e4SLinus Torvalds    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
4751da177e4SLinus Torvalds    zMiddleA <<= 32;
4761da177e4SLinus Torvalds    z1 += zMiddleA;
4771da177e4SLinus Torvalds    z0 += ( z1 < zMiddleA );
4781da177e4SLinus Torvalds    *z1Ptr = z1;
4791da177e4SLinus Torvalds    *z0Ptr = z0;
4801da177e4SLinus Torvalds
4811da177e4SLinus Torvalds}
4821da177e4SLinus Torvalds
4831da177e4SLinus Torvalds/*
4841da177e4SLinus Torvalds-------------------------------------------------------------------------------
4851da177e4SLinus TorvaldsMultiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
4861da177e4SLinus Torvaldsobtain a 192-bit product.  The product is broken into three 64-bit pieces
4871da177e4SLinus Torvaldswhich are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
4881da177e4SLinus Torvalds`z2Ptr'.
4891da177e4SLinus Torvalds-------------------------------------------------------------------------------
4901da177e4SLinus Torvalds*/
4911da177e4SLinus TorvaldsINLINE void
4921da177e4SLinus Torvalds mul128By64To192(
4931da177e4SLinus Torvalds     bits64 a0,
4941da177e4SLinus Torvalds     bits64 a1,
4951da177e4SLinus Torvalds     bits64 b,
4961da177e4SLinus Torvalds     bits64 *z0Ptr,
4971da177e4SLinus Torvalds     bits64 *z1Ptr,
4981da177e4SLinus Torvalds     bits64 *z2Ptr
4991da177e4SLinus Torvalds )
5001da177e4SLinus Torvalds{
5011da177e4SLinus Torvalds    bits64 z0, z1, z2, more1;
5021da177e4SLinus Torvalds
5031da177e4SLinus Torvalds    mul64To128( a1, b, &z1, &z2 );
5041da177e4SLinus Torvalds    mul64To128( a0, b, &z0, &more1 );
5051da177e4SLinus Torvalds    add128( z0, more1, 0, z1, &z0, &z1 );
5061da177e4SLinus Torvalds    *z2Ptr = z2;
5071da177e4SLinus Torvalds    *z1Ptr = z1;
5081da177e4SLinus Torvalds    *z0Ptr = z0;
5091da177e4SLinus Torvalds
5101da177e4SLinus Torvalds}
5111da177e4SLinus Torvalds
5121da177e4SLinus Torvalds/*
5131da177e4SLinus Torvalds-------------------------------------------------------------------------------
5141da177e4SLinus TorvaldsMultiplies the 128-bit value formed by concatenating `a0' and `a1' to the
5151da177e4SLinus Torvalds128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
5161da177e4SLinus Torvaldsproduct.  The product is broken into four 64-bit pieces which are stored at
5171da177e4SLinus Torvaldsthe locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
5181da177e4SLinus Torvalds-------------------------------------------------------------------------------
5191da177e4SLinus Torvalds*/
5201da177e4SLinus TorvaldsINLINE void
5211da177e4SLinus Torvalds mul128To256(
5221da177e4SLinus Torvalds     bits64 a0,
5231da177e4SLinus Torvalds     bits64 a1,
5241da177e4SLinus Torvalds     bits64 b0,
5251da177e4SLinus Torvalds     bits64 b1,
5261da177e4SLinus Torvalds     bits64 *z0Ptr,
5271da177e4SLinus Torvalds     bits64 *z1Ptr,
5281da177e4SLinus Torvalds     bits64 *z2Ptr,
5291da177e4SLinus Torvalds     bits64 *z3Ptr
5301da177e4SLinus Torvalds )
5311da177e4SLinus Torvalds{
5321da177e4SLinus Torvalds    bits64 z0, z1, z2, z3;
5331da177e4SLinus Torvalds    bits64 more1, more2;
5341da177e4SLinus Torvalds
5351da177e4SLinus Torvalds    mul64To128( a1, b1, &z2, &z3 );
5361da177e4SLinus Torvalds    mul64To128( a1, b0, &z1, &more2 );
5371da177e4SLinus Torvalds    add128( z1, more2, 0, z2, &z1, &z2 );
5381da177e4SLinus Torvalds    mul64To128( a0, b0, &z0, &more1 );
5391da177e4SLinus Torvalds    add128( z0, more1, 0, z1, &z0, &z1 );
5401da177e4SLinus Torvalds    mul64To128( a0, b1, &more1, &more2 );
5411da177e4SLinus Torvalds    add128( more1, more2, 0, z2, &more1, &z2 );
5421da177e4SLinus Torvalds    add128( z0, z1, 0, more1, &z0, &z1 );
5431da177e4SLinus Torvalds    *z3Ptr = z3;
5441da177e4SLinus Torvalds    *z2Ptr = z2;
5451da177e4SLinus Torvalds    *z1Ptr = z1;
5461da177e4SLinus Torvalds    *z0Ptr = z0;
5471da177e4SLinus Torvalds
5481da177e4SLinus Torvalds}
5491da177e4SLinus Torvalds
5501da177e4SLinus Torvalds/*
5511da177e4SLinus Torvalds-------------------------------------------------------------------------------
5521da177e4SLinus TorvaldsReturns an approximation to the 64-bit integer quotient obtained by dividing
5531da177e4SLinus Torvalds`b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
5541da177e4SLinus Torvaldsdivisor `b' must be at least 2^63.  If q is the exact quotient truncated
5551da177e4SLinus Torvaldstoward zero, the approximation returned lies between q and q + 2 inclusive.
5561da177e4SLinus TorvaldsIf the exact quotient q is larger than 64 bits, the maximum positive 64-bit
5571da177e4SLinus Torvaldsunsigned integer is returned.
5581da177e4SLinus Torvalds-------------------------------------------------------------------------------
5591da177e4SLinus Torvalds*/
5601da177e4SLinus Torvaldsstatic bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
5611da177e4SLinus Torvalds{
5621da177e4SLinus Torvalds    bits64 b0, b1;
5631da177e4SLinus Torvalds    bits64 rem0, rem1, term0, term1;
5641da177e4SLinus Torvalds    bits64 z;
5651da177e4SLinus Torvalds    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
566c1241c4cSNicolas Pitre    b0 = b>>32;  /* hence b0 is 32 bits wide now */
567c1241c4cSNicolas Pitre    if ( b0<<32 <= a0 ) {
568c1241c4cSNicolas Pitre        z = LIT64( 0xFFFFFFFF00000000 );
569c1241c4cSNicolas Pitre    }  else {
570c1241c4cSNicolas Pitre        z = a0;
571c1241c4cSNicolas Pitre        do_div( z, b0 );
572c1241c4cSNicolas Pitre        z <<= 32;
573c1241c4cSNicolas Pitre    }
5741da177e4SLinus Torvalds    mul64To128( b, z, &term0, &term1 );
5751da177e4SLinus Torvalds    sub128( a0, a1, term0, term1, &rem0, &rem1 );
5761da177e4SLinus Torvalds    while ( ( (sbits64) rem0 ) < 0 ) {
5771da177e4SLinus Torvalds        z -= LIT64( 0x100000000 );
5781da177e4SLinus Torvalds        b1 = b<<32;
5791da177e4SLinus Torvalds        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
5801da177e4SLinus Torvalds    }
5811da177e4SLinus Torvalds    rem0 = ( rem0<<32 ) | ( rem1>>32 );
582c1241c4cSNicolas Pitre    if ( b0<<32 <= rem0 ) {
583c1241c4cSNicolas Pitre        z |= 0xFFFFFFFF;
584c1241c4cSNicolas Pitre    } else {
585c1241c4cSNicolas Pitre        do_div( rem0, b0 );
586c1241c4cSNicolas Pitre        z |= rem0;
587c1241c4cSNicolas Pitre    }
5881da177e4SLinus Torvalds    return z;
5891da177e4SLinus Torvalds
5901da177e4SLinus Torvalds}
5911da177e4SLinus Torvalds
5921da177e4SLinus Torvalds/*
5931da177e4SLinus Torvalds-------------------------------------------------------------------------------
5941da177e4SLinus TorvaldsReturns an approximation to the square root of the 32-bit significand given
5951da177e4SLinus Torvaldsby `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
5961da177e4SLinus Torvalds`aExp' (the least significant bit) is 1, the integer returned approximates
5971da177e4SLinus Torvalds2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
5981da177e4SLinus Torvaldsis 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
5991da177e4SLinus Torvaldscase, the approximation returned lies strictly within +/-2 of the exact
6001da177e4SLinus Torvaldsvalue.
6011da177e4SLinus Torvalds-------------------------------------------------------------------------------
6021da177e4SLinus Torvalds*/
6031da177e4SLinus Torvaldsstatic bits32 estimateSqrt32( int16 aExp, bits32 a )
6041da177e4SLinus Torvalds{
6051da177e4SLinus Torvalds    static const bits16 sqrtOddAdjustments[] = {
6061da177e4SLinus Torvalds        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
6071da177e4SLinus Torvalds        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
6081da177e4SLinus Torvalds    };
6091da177e4SLinus Torvalds    static const bits16 sqrtEvenAdjustments[] = {
6101da177e4SLinus Torvalds        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
6111da177e4SLinus Torvalds        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
6121da177e4SLinus Torvalds    };
6131da177e4SLinus Torvalds    int8 index;
6141da177e4SLinus Torvalds    bits32 z;
615c1241c4cSNicolas Pitre    bits64 A;
6161da177e4SLinus Torvalds
6171da177e4SLinus Torvalds    index = ( a>>27 ) & 15;
6181da177e4SLinus Torvalds    if ( aExp & 1 ) {
6191da177e4SLinus Torvalds        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
6201da177e4SLinus Torvalds        z = ( ( a / z )<<14 ) + ( z<<15 );
6211da177e4SLinus Torvalds        a >>= 1;
6221da177e4SLinus Torvalds    }
6231da177e4SLinus Torvalds    else {
6241da177e4SLinus Torvalds        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
6251da177e4SLinus Torvalds        z = a / z + z;
6261da177e4SLinus Torvalds        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
6271da177e4SLinus Torvalds        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
6281da177e4SLinus Torvalds    }
629c1241c4cSNicolas Pitre    A = ( (bits64) a )<<31;
630c1241c4cSNicolas Pitre    do_div( A, z );
631c1241c4cSNicolas Pitre    return ( (bits32) A ) + ( z>>1 );
6321da177e4SLinus Torvalds
6331da177e4SLinus Torvalds}
6341da177e4SLinus Torvalds
6351da177e4SLinus Torvalds/*
6361da177e4SLinus Torvalds-------------------------------------------------------------------------------
6371da177e4SLinus TorvaldsReturns the number of leading 0 bits before the most-significant 1 bit
6381da177e4SLinus Torvaldsof `a'.  If `a' is zero, 32 is returned.
6391da177e4SLinus Torvalds-------------------------------------------------------------------------------
6401da177e4SLinus Torvalds*/
6411da177e4SLinus Torvaldsstatic int8 countLeadingZeros32( bits32 a )
6421da177e4SLinus Torvalds{
6431da177e4SLinus Torvalds    static const int8 countLeadingZerosHigh[] = {
6441da177e4SLinus Torvalds        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
6451da177e4SLinus Torvalds        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
6461da177e4SLinus Torvalds        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6471da177e4SLinus Torvalds        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
6481da177e4SLinus Torvalds        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6491da177e4SLinus Torvalds        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6501da177e4SLinus Torvalds        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6511da177e4SLinus Torvalds        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6521da177e4SLinus Torvalds        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6531da177e4SLinus Torvalds        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6541da177e4SLinus Torvalds        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6551da177e4SLinus Torvalds        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6561da177e4SLinus Torvalds        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6571da177e4SLinus Torvalds        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6581da177e4SLinus Torvalds        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6591da177e4SLinus Torvalds        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
6601da177e4SLinus Torvalds    };
6611da177e4SLinus Torvalds    int8 shiftCount;
6621da177e4SLinus Torvalds
6631da177e4SLinus Torvalds    shiftCount = 0;
6641da177e4SLinus Torvalds    if ( a < 0x10000 ) {
6651da177e4SLinus Torvalds        shiftCount += 16;
6661da177e4SLinus Torvalds        a <<= 16;
6671da177e4SLinus Torvalds    }
6681da177e4SLinus Torvalds    if ( a < 0x1000000 ) {
6691da177e4SLinus Torvalds        shiftCount += 8;
6701da177e4SLinus Torvalds        a <<= 8;
6711da177e4SLinus Torvalds    }
6721da177e4SLinus Torvalds    shiftCount += countLeadingZerosHigh[ a>>24 ];
6731da177e4SLinus Torvalds    return shiftCount;
6741da177e4SLinus Torvalds
6751da177e4SLinus Torvalds}
6761da177e4SLinus Torvalds
6771da177e4SLinus Torvalds/*
6781da177e4SLinus Torvalds-------------------------------------------------------------------------------
6791da177e4SLinus TorvaldsReturns the number of leading 0 bits before the most-significant 1 bit
6801da177e4SLinus Torvaldsof `a'.  If `a' is zero, 64 is returned.
6811da177e4SLinus Torvalds-------------------------------------------------------------------------------
6821da177e4SLinus Torvalds*/
6831da177e4SLinus Torvaldsstatic int8 countLeadingZeros64( bits64 a )
6841da177e4SLinus Torvalds{
6851da177e4SLinus Torvalds    int8 shiftCount;
6861da177e4SLinus Torvalds
6871da177e4SLinus Torvalds    shiftCount = 0;
6881da177e4SLinus Torvalds    if ( a < ( (bits64) 1 )<<32 ) {
6891da177e4SLinus Torvalds        shiftCount += 32;
6901da177e4SLinus Torvalds    }
6911da177e4SLinus Torvalds    else {
6921da177e4SLinus Torvalds        a >>= 32;
6931da177e4SLinus Torvalds    }
6941da177e4SLinus Torvalds    shiftCount += countLeadingZeros32( a );
6951da177e4SLinus Torvalds    return shiftCount;
6961da177e4SLinus Torvalds
6971da177e4SLinus Torvalds}
6981da177e4SLinus Torvalds
6991da177e4SLinus Torvalds/*
7001da177e4SLinus Torvalds-------------------------------------------------------------------------------
7011da177e4SLinus TorvaldsReturns 1 if the 128-bit value formed by concatenating `a0' and `a1'
7021da177e4SLinus Torvaldsis equal to the 128-bit value formed by concatenating `b0' and `b1'.
7031da177e4SLinus TorvaldsOtherwise, returns 0.
7041da177e4SLinus Torvalds-------------------------------------------------------------------------------
7051da177e4SLinus Torvalds*/
7061da177e4SLinus TorvaldsINLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
7071da177e4SLinus Torvalds{
7081da177e4SLinus Torvalds
7091da177e4SLinus Torvalds    return ( a0 == b0 ) && ( a1 == b1 );
7101da177e4SLinus Torvalds
7111da177e4SLinus Torvalds}
7121da177e4SLinus Torvalds
7131da177e4SLinus Torvalds/*
7141da177e4SLinus Torvalds-------------------------------------------------------------------------------
7151da177e4SLinus TorvaldsReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
7161da177e4SLinus Torvaldsthan or equal to the 128-bit value formed by concatenating `b0' and `b1'.
7171da177e4SLinus TorvaldsOtherwise, returns 0.
7181da177e4SLinus Torvalds-------------------------------------------------------------------------------
7191da177e4SLinus Torvalds*/
7201da177e4SLinus TorvaldsINLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
7211da177e4SLinus Torvalds{
7221da177e4SLinus Torvalds
7231da177e4SLinus Torvalds    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
7241da177e4SLinus Torvalds
7251da177e4SLinus Torvalds}
7261da177e4SLinus Torvalds
7271da177e4SLinus Torvalds/*
7281da177e4SLinus Torvalds-------------------------------------------------------------------------------
7291da177e4SLinus TorvaldsReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
7301da177e4SLinus Torvaldsthan the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
7311da177e4SLinus Torvaldsreturns 0.
7321da177e4SLinus Torvalds-------------------------------------------------------------------------------
7331da177e4SLinus Torvalds*/
7341da177e4SLinus TorvaldsINLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
7351da177e4SLinus Torvalds{
7361da177e4SLinus Torvalds
7371da177e4SLinus Torvalds    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
7381da177e4SLinus Torvalds
7391da177e4SLinus Torvalds}
7401da177e4SLinus Torvalds
7411da177e4SLinus Torvalds/*
7421da177e4SLinus Torvalds-------------------------------------------------------------------------------
7431da177e4SLinus TorvaldsReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
7441da177e4SLinus Torvaldsnot equal to the 128-bit value formed by concatenating `b0' and `b1'.
7451da177e4SLinus TorvaldsOtherwise, returns 0.
7461da177e4SLinus Torvalds-------------------------------------------------------------------------------
7471da177e4SLinus Torvalds*/
7481da177e4SLinus TorvaldsINLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
7491da177e4SLinus Torvalds{
7501da177e4SLinus Torvalds
7511da177e4SLinus Torvalds    return ( a0 != b0 ) || ( a1 != b1 );
7521da177e4SLinus Torvalds
7531da177e4SLinus Torvalds}
7541da177e4SLinus Torvalds
755