xref: /openbmc/linux/include/math-emu/op-2.h (revision 75bf465f0bc33e9b776a46d6a1b9b990f5fb7c37)
11da177e4SLinus Torvalds /* Software floating-point emulation.
21da177e4SLinus Torvalds    Basic two-word fraction declaration and manipulation.
31da177e4SLinus Torvalds    Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
41da177e4SLinus Torvalds    This file is part of the GNU C Library.
51da177e4SLinus Torvalds    Contributed by Richard Henderson (rth@cygnus.com),
61da177e4SLinus Torvalds 		  Jakub Jelinek (jj@ultra.linux.cz),
71da177e4SLinus Torvalds 		  David S. Miller (davem@redhat.com) and
81da177e4SLinus Torvalds 		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
91da177e4SLinus Torvalds 
101da177e4SLinus Torvalds    The GNU C Library is free software; you can redistribute it and/or
111da177e4SLinus Torvalds    modify it under the terms of the GNU Library General Public License as
121da177e4SLinus Torvalds    published by the Free Software Foundation; either version 2 of the
131da177e4SLinus Torvalds    License, or (at your option) any later version.
141da177e4SLinus Torvalds 
151da177e4SLinus Torvalds    The GNU C Library is distributed in the hope that it will be useful,
161da177e4SLinus Torvalds    but WITHOUT ANY WARRANTY; without even the implied warranty of
171da177e4SLinus Torvalds    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
181da177e4SLinus Torvalds    Library General Public License for more details.
191da177e4SLinus Torvalds 
201da177e4SLinus Torvalds    You should have received a copy of the GNU Library General Public
211da177e4SLinus Torvalds    License along with the GNU C Library; see the file COPYING.LIB.  If
221da177e4SLinus Torvalds    not, write to the Free Software Foundation, Inc.,
231da177e4SLinus Torvalds    59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
241da177e4SLinus Torvalds 
251da177e4SLinus Torvalds #ifndef __MATH_EMU_OP_2_H__
261da177e4SLinus Torvalds #define __MATH_EMU_OP_2_H__
271da177e4SLinus Torvalds 
2840d3057aSKumar Gala #define _FP_FRAC_DECL_2(X)	_FP_W_TYPE X##_f0 = 0, X##_f1 = 0
291da177e4SLinus Torvalds #define _FP_FRAC_COPY_2(D,S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
301da177e4SLinus Torvalds #define _FP_FRAC_SET_2(X,I)	__FP_FRAC_SET_2(X, I)
311da177e4SLinus Torvalds #define _FP_FRAC_HIGH_2(X)	(X##_f1)
321da177e4SLinus Torvalds #define _FP_FRAC_LOW_2(X)	(X##_f0)
331da177e4SLinus Torvalds #define _FP_FRAC_WORD_2(X,w)	(X##_f##w)
347adb3e99SVincent Chen #define _FP_FRAC_SLL_2(X, N) (						       \
357adb3e99SVincent Chen 	(void) (((N) < _FP_W_TYPE_SIZE)					       \
367adb3e99SVincent Chen 	  ? ({								       \
377adb3e99SVincent Chen 		if (__builtin_constant_p(N) && (N) == 1) {		       \
387adb3e99SVincent Chen 			X##_f1 = X##_f1 + X##_f1 +			       \
397adb3e99SVincent Chen 				(((_FP_WS_TYPE) (X##_f0)) < 0);		       \
401da177e4SLinus Torvalds 			X##_f0 += X##_f0;				       \
417adb3e99SVincent Chen 		} else {						       \
427adb3e99SVincent Chen 			X##_f1 = X##_f1 << (N) | X##_f0 >>		       \
437adb3e99SVincent Chen 						(_FP_W_TYPE_SIZE - (N));       \
441da177e4SLinus Torvalds 			X##_f0 <<= (N);					       \
451da177e4SLinus Torvalds 		}							       \
467adb3e99SVincent Chen 		0;							       \
477adb3e99SVincent Chen 	    })								       \
487adb3e99SVincent Chen 	  : ({								       \
491da177e4SLinus Torvalds 	      X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);		       \
501da177e4SLinus Torvalds 	      X##_f0 = 0;						       \
517adb3e99SVincent Chen 	  })))
521da177e4SLinus Torvalds 
537adb3e99SVincent Chen 
547adb3e99SVincent Chen #define _FP_FRAC_SRL_2(X, N) (						       \
557adb3e99SVincent Chen 	(void) (((N) < _FP_W_TYPE_SIZE)					       \
567adb3e99SVincent Chen 	  ? ({								       \
571da177e4SLinus Torvalds 	      X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));      \
581da177e4SLinus Torvalds 	      X##_f1 >>= (N);						       \
597adb3e99SVincent Chen 	    })								       \
607adb3e99SVincent Chen 	  : ({								       \
611da177e4SLinus Torvalds 	      X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);		       \
621da177e4SLinus Torvalds 	      X##_f1 = 0;						       \
637adb3e99SVincent Chen 	    })))
647adb3e99SVincent Chen 
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds /* Right shift with sticky-lsb.  */
677adb3e99SVincent Chen #define _FP_FRAC_SRS_2(X, N, sz) (					       \
687adb3e99SVincent Chen 	(void) (((N) < _FP_W_TYPE_SIZE)					       \
697adb3e99SVincent Chen 	  ? ({								       \
707adb3e99SVincent Chen 	      X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)      \
717adb3e99SVincent Chen 			| (__builtin_constant_p(N) && (N) == 1		       \
721da177e4SLinus Torvalds 			   ? X##_f0 & 1					       \
731da177e4SLinus Torvalds 			   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));       \
741da177e4SLinus Torvalds 		X##_f1 >>= (N);						       \
757adb3e99SVincent Chen 	    })								       \
767adb3e99SVincent Chen 	  : ({								       \
777adb3e99SVincent Chen 	      X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)		       \
787adb3e99SVincent Chen 			| ((((N) == _FP_W_TYPE_SIZE			       \
797adb3e99SVincent Chen 			     ? 0					       \
807adb3e99SVincent Chen 			     : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))          \
817adb3e99SVincent Chen 			    | X##_f0) != 0));				       \
821da177e4SLinus Torvalds 	      X##_f1 = 0;						       \
837adb3e99SVincent Chen 	    })))
841da177e4SLinus Torvalds 
851da177e4SLinus Torvalds #define _FP_FRAC_ADDI_2(X,I)	\
861da177e4SLinus Torvalds   __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
871da177e4SLinus Torvalds 
881da177e4SLinus Torvalds #define _FP_FRAC_ADD_2(R,X,Y)	\
891da177e4SLinus Torvalds   __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
901da177e4SLinus Torvalds 
911da177e4SLinus Torvalds #define _FP_FRAC_SUB_2(R,X,Y)	\
921da177e4SLinus Torvalds   __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
931da177e4SLinus Torvalds 
941da177e4SLinus Torvalds #define _FP_FRAC_DEC_2(X,Y)	\
951da177e4SLinus Torvalds   __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
961da177e4SLinus Torvalds 
971da177e4SLinus Torvalds #define _FP_FRAC_CLZ_2(R,X)	\
981da177e4SLinus Torvalds   do {				\
991da177e4SLinus Torvalds     if (X##_f1)			\
1001da177e4SLinus Torvalds       __FP_CLZ(R,X##_f1);	\
1011da177e4SLinus Torvalds     else 			\
1021da177e4SLinus Torvalds     {				\
1031da177e4SLinus Torvalds       __FP_CLZ(R,X##_f0);	\
1041da177e4SLinus Torvalds       R += _FP_W_TYPE_SIZE;	\
1051da177e4SLinus Torvalds     }				\
1061da177e4SLinus Torvalds   } while(0)
1071da177e4SLinus Torvalds 
1081da177e4SLinus Torvalds /* Predicates */
1091da177e4SLinus Torvalds #define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE)X##_f1 < 0)
1101da177e4SLinus Torvalds #define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
1111da177e4SLinus Torvalds #define _FP_FRAC_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
1121da177e4SLinus Torvalds #define _FP_FRAC_CLEAR_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
1131da177e4SLinus Torvalds #define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
1141da177e4SLinus Torvalds #define _FP_FRAC_GT_2(X, Y)	\
1151da177e4SLinus Torvalds   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
1161da177e4SLinus Torvalds #define _FP_FRAC_GE_2(X, Y)	\
1171da177e4SLinus Torvalds   (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
1181da177e4SLinus Torvalds 
1191da177e4SLinus Torvalds #define _FP_ZEROFRAC_2		0, 0
1201da177e4SLinus Torvalds #define _FP_MINFRAC_2		0, 1
1211da177e4SLinus Torvalds #define _FP_MAXFRAC_2		(~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
1221da177e4SLinus Torvalds 
1231da177e4SLinus Torvalds /*
1241da177e4SLinus Torvalds  * Internals
1251da177e4SLinus Torvalds  */
1261da177e4SLinus Torvalds 
1271da177e4SLinus Torvalds #define __FP_FRAC_SET_2(X,I1,I0)	(X##_f0 = I0, X##_f1 = I1)
1281da177e4SLinus Torvalds 
1291da177e4SLinus Torvalds #define __FP_CLZ_2(R, xh, xl)	\
1301da177e4SLinus Torvalds   do {				\
1311da177e4SLinus Torvalds     if (xh)			\
1321da177e4SLinus Torvalds       __FP_CLZ(R,xh);		\
1331da177e4SLinus Torvalds     else 			\
1341da177e4SLinus Torvalds     {				\
1351da177e4SLinus Torvalds       __FP_CLZ(R,xl);		\
1361da177e4SLinus Torvalds       R += _FP_W_TYPE_SIZE;	\
1371da177e4SLinus Torvalds     }				\
1381da177e4SLinus Torvalds   } while(0)
1391da177e4SLinus Torvalds 
1401da177e4SLinus Torvalds #if 0
1411da177e4SLinus Torvalds 
1421da177e4SLinus Torvalds #ifndef __FP_FRAC_ADDI_2
1431da177e4SLinus Torvalds #define __FP_FRAC_ADDI_2(xh, xl, i)	\
1441da177e4SLinus Torvalds   (xh += ((xl += i) < i))
1451da177e4SLinus Torvalds #endif
1461da177e4SLinus Torvalds #ifndef __FP_FRAC_ADD_2
1471da177e4SLinus Torvalds #define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)	\
1481da177e4SLinus Torvalds   (rh = xh + yh + ((rl = xl + yl) < xl))
1491da177e4SLinus Torvalds #endif
1501da177e4SLinus Torvalds #ifndef __FP_FRAC_SUB_2
1511da177e4SLinus Torvalds #define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)	\
1521da177e4SLinus Torvalds   (rh = xh - yh - ((rl = xl - yl) > xl))
1531da177e4SLinus Torvalds #endif
1541da177e4SLinus Torvalds #ifndef __FP_FRAC_DEC_2
1551da177e4SLinus Torvalds #define __FP_FRAC_DEC_2(xh, xl, yh, yl)	\
1561da177e4SLinus Torvalds   do {					\
1571da177e4SLinus Torvalds     UWtype _t = xl;			\
1581da177e4SLinus Torvalds     xh -= yh + ((xl -= yl) > _t);	\
1591da177e4SLinus Torvalds   } while (0)
1601da177e4SLinus Torvalds #endif
1611da177e4SLinus Torvalds 
1621da177e4SLinus Torvalds #else
1631da177e4SLinus Torvalds 
1641da177e4SLinus Torvalds #undef __FP_FRAC_ADDI_2
1651da177e4SLinus Torvalds #define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa(xh, xl, xh, xl, 0, i)
1661da177e4SLinus Torvalds #undef __FP_FRAC_ADD_2
1671da177e4SLinus Torvalds #define __FP_FRAC_ADD_2			add_ssaaaa
1681da177e4SLinus Torvalds #undef __FP_FRAC_SUB_2
1691da177e4SLinus Torvalds #define __FP_FRAC_SUB_2			sub_ddmmss
1701da177e4SLinus Torvalds #undef __FP_FRAC_DEC_2
1711da177e4SLinus Torvalds #define __FP_FRAC_DEC_2(xh, xl, yh, yl)	sub_ddmmss(xh, xl, xh, xl, yh, yl)
1721da177e4SLinus Torvalds 
1731da177e4SLinus Torvalds #endif
1741da177e4SLinus Torvalds 
1751da177e4SLinus Torvalds /*
1761da177e4SLinus Torvalds  * Unpack the raw bits of a native fp value.  Do not classify or
1771da177e4SLinus Torvalds  * normalize the data.
1781da177e4SLinus Torvalds  */
1791da177e4SLinus Torvalds 
1801da177e4SLinus Torvalds #define _FP_UNPACK_RAW_2(fs, X, val)			\
1811da177e4SLinus Torvalds   do {							\
1821da177e4SLinus Torvalds     union _FP_UNION_##fs _flo; _flo.flt = (val);	\
1831da177e4SLinus Torvalds 							\
1841da177e4SLinus Torvalds     X##_f0 = _flo.bits.frac0;				\
1851da177e4SLinus Torvalds     X##_f1 = _flo.bits.frac1;				\
1861da177e4SLinus Torvalds     X##_e  = _flo.bits.exp;				\
1871da177e4SLinus Torvalds     X##_s  = _flo.bits.sign;				\
1881da177e4SLinus Torvalds   } while (0)
1891da177e4SLinus Torvalds 
1901da177e4SLinus Torvalds #define _FP_UNPACK_RAW_2_P(fs, X, val)			\
1911da177e4SLinus Torvalds   do {							\
1921da177e4SLinus Torvalds     union _FP_UNION_##fs *_flo =			\
1931da177e4SLinus Torvalds       (union _FP_UNION_##fs *)(val);			\
1941da177e4SLinus Torvalds 							\
1951da177e4SLinus Torvalds     X##_f0 = _flo->bits.frac0;				\
1961da177e4SLinus Torvalds     X##_f1 = _flo->bits.frac1;				\
1971da177e4SLinus Torvalds     X##_e  = _flo->bits.exp;				\
1981da177e4SLinus Torvalds     X##_s  = _flo->bits.sign;				\
1991da177e4SLinus Torvalds   } while (0)
2001da177e4SLinus Torvalds 
2011da177e4SLinus Torvalds 
2021da177e4SLinus Torvalds /*
2031da177e4SLinus Torvalds  * Repack the raw bits of a native fp value.
2041da177e4SLinus Torvalds  */
2051da177e4SLinus Torvalds 
2061da177e4SLinus Torvalds #define _FP_PACK_RAW_2(fs, val, X)			\
2071da177e4SLinus Torvalds   do {							\
2081da177e4SLinus Torvalds     union _FP_UNION_##fs _flo;				\
2091da177e4SLinus Torvalds 							\
2101da177e4SLinus Torvalds     _flo.bits.frac0 = X##_f0;				\
2111da177e4SLinus Torvalds     _flo.bits.frac1 = X##_f1;				\
2121da177e4SLinus Torvalds     _flo.bits.exp   = X##_e;				\
2131da177e4SLinus Torvalds     _flo.bits.sign  = X##_s;				\
2141da177e4SLinus Torvalds 							\
2151da177e4SLinus Torvalds     (val) = _flo.flt;					\
2161da177e4SLinus Torvalds   } while (0)
2171da177e4SLinus Torvalds 
2181da177e4SLinus Torvalds #define _FP_PACK_RAW_2_P(fs, val, X)			\
2191da177e4SLinus Torvalds   do {							\
2201da177e4SLinus Torvalds     union _FP_UNION_##fs *_flo =			\
2211da177e4SLinus Torvalds       (union _FP_UNION_##fs *)(val);			\
2221da177e4SLinus Torvalds 							\
2231da177e4SLinus Torvalds     _flo->bits.frac0 = X##_f0;				\
2241da177e4SLinus Torvalds     _flo->bits.frac1 = X##_f1;				\
2251da177e4SLinus Torvalds     _flo->bits.exp   = X##_e;				\
2261da177e4SLinus Torvalds     _flo->bits.sign  = X##_s;				\
2271da177e4SLinus Torvalds   } while (0)
2281da177e4SLinus Torvalds 
2291da177e4SLinus Torvalds 
2301da177e4SLinus Torvalds /*
2311da177e4SLinus Torvalds  * Multiplication algorithms:
2321da177e4SLinus Torvalds  */
2331da177e4SLinus Torvalds 
2341da177e4SLinus Torvalds /* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
2351da177e4SLinus Torvalds 
2361da177e4SLinus Torvalds #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)			\
2371da177e4SLinus Torvalds   do {									\
2381da177e4SLinus Torvalds     _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
2391da177e4SLinus Torvalds 									\
2401da177e4SLinus Torvalds     doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);	\
2411da177e4SLinus Torvalds     doit(_b_f1, _b_f0, X##_f0, Y##_f1);					\
2421da177e4SLinus Torvalds     doit(_c_f1, _c_f0, X##_f1, Y##_f0);					\
2431da177e4SLinus Torvalds     doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1);	\
2441da177e4SLinus Torvalds 									\
2451da177e4SLinus Torvalds     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
2461da177e4SLinus Torvalds 		    _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,		\
2471da177e4SLinus Torvalds 		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
2481da177e4SLinus Torvalds 		    _FP_FRAC_WORD_4(_z,1));				\
2491da177e4SLinus Torvalds     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
2501da177e4SLinus Torvalds 		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,		\
2511da177e4SLinus Torvalds 		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
2521da177e4SLinus Torvalds 		    _FP_FRAC_WORD_4(_z,1));				\
2531da177e4SLinus Torvalds 									\
2541da177e4SLinus Torvalds     /* Normalize since we know where the msb of the multiplicands	\
2551da177e4SLinus Torvalds        were (bit B), we know that the msb of the of the product is	\
2561da177e4SLinus Torvalds        at either 2B or 2B-1.  */					\
2571da177e4SLinus Torvalds     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
2581da177e4SLinus Torvalds     R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
2591da177e4SLinus Torvalds     R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
2601da177e4SLinus Torvalds   } while (0)
2611da177e4SLinus Torvalds 
2621da177e4SLinus Torvalds /* Given a 1W * 1W => 2W primitive, do the extended multiplication.
2631da177e4SLinus Torvalds    Do only 3 multiplications instead of four. This one is for machines
2641da177e4SLinus Torvalds    where multiplication is much more expensive than subtraction.  */
2651da177e4SLinus Torvalds 
2661da177e4SLinus Torvalds #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)		\
2671da177e4SLinus Torvalds   do {									\
2681da177e4SLinus Torvalds     _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
2691da177e4SLinus Torvalds     _FP_W_TYPE _d;							\
2701da177e4SLinus Torvalds     int _c1, _c2;							\
2711da177e4SLinus Torvalds 									\
2721da177e4SLinus Torvalds     _b_f0 = X##_f0 + X##_f1;						\
2731da177e4SLinus Torvalds     _c1 = _b_f0 < X##_f0;						\
2741da177e4SLinus Torvalds     _b_f1 = Y##_f0 + Y##_f1;						\
2751da177e4SLinus Torvalds     _c2 = _b_f1 < Y##_f0;						\
2761da177e4SLinus Torvalds     doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);			\
2771da177e4SLinus Torvalds     doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);	\
2781da177e4SLinus Torvalds     doit(_c_f1, _c_f0, X##_f1, Y##_f1);					\
2791da177e4SLinus Torvalds 									\
2801da177e4SLinus Torvalds     _b_f0 &= -_c2;							\
2811da177e4SLinus Torvalds     _b_f1 &= -_c1;							\
2821da177e4SLinus Torvalds     __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
2831da177e4SLinus Torvalds 		    _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,		\
2841da177e4SLinus Torvalds 		    0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));	\
2851da177e4SLinus Torvalds     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
2861da177e4SLinus Torvalds 		     _b_f0);						\
2871da177e4SLinus Torvalds     __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
2881da177e4SLinus Torvalds 		     _b_f1);						\
2891da177e4SLinus Torvalds     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
2901da177e4SLinus Torvalds 		    _FP_FRAC_WORD_4(_z,1),				\
2911da177e4SLinus Torvalds 		    0, _d, _FP_FRAC_WORD_4(_z,0));			\
2921da177e4SLinus Torvalds     __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
2931da177e4SLinus Torvalds 		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);		\
2941da177e4SLinus Torvalds     __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),	\
2951da177e4SLinus Torvalds 		    _c_f1, _c_f0,					\
2961da177e4SLinus Torvalds 		    _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));	\
2971da177e4SLinus Torvalds 									\
2981da177e4SLinus Torvalds     /* Normalize since we know where the msb of the multiplicands	\
2991da177e4SLinus Torvalds        were (bit B), we know that the msb of the of the product is	\
3001da177e4SLinus Torvalds        at either 2B or 2B-1.  */					\
3011da177e4SLinus Torvalds     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
3021da177e4SLinus Torvalds     R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
3031da177e4SLinus Torvalds     R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
3041da177e4SLinus Torvalds   } while (0)
3051da177e4SLinus Torvalds 
3061da177e4SLinus Torvalds #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)				\
3071da177e4SLinus Torvalds   do {									\
3081da177e4SLinus Torvalds     _FP_FRAC_DECL_4(_z);						\
3091da177e4SLinus Torvalds     _FP_W_TYPE _x[2], _y[2];						\
3101da177e4SLinus Torvalds     _x[0] = X##_f0; _x[1] = X##_f1;					\
3111da177e4SLinus Torvalds     _y[0] = Y##_f0; _y[1] = Y##_f1;					\
3121da177e4SLinus Torvalds 									\
3131da177e4SLinus Torvalds     mpn_mul_n(_z_f, _x, _y, 2);						\
3141da177e4SLinus Torvalds 									\
3151da177e4SLinus Torvalds     /* Normalize since we know where the msb of the multiplicands	\
3161da177e4SLinus Torvalds        were (bit B), we know that the msb of the of the product is	\
3171da177e4SLinus Torvalds        at either 2B or 2B-1.  */					\
3181da177e4SLinus Torvalds     _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
3191da177e4SLinus Torvalds     R##_f0 = _z_f[0];							\
3201da177e4SLinus Torvalds     R##_f1 = _z_f[1];							\
3211da177e4SLinus Torvalds   } while (0)
3221da177e4SLinus Torvalds 
3231da177e4SLinus Torvalds /* Do at most 120x120=240 bits multiplication using double floating
3241da177e4SLinus Torvalds    point multiplication.  This is useful if floating point
3251da177e4SLinus Torvalds    multiplication has much bigger throughput than integer multiply.
3261da177e4SLinus Torvalds    It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
3271da177e4SLinus Torvalds    between 106 and 120 only.
3281da177e4SLinus Torvalds    Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
3291da177e4SLinus Torvalds    SETFETZ is a macro which will disable all FPU exceptions and set rounding
3301da177e4SLinus Torvalds    towards zero,  RESETFE should optionally reset it back.  */
3311da177e4SLinus Torvalds 
3321da177e4SLinus Torvalds #define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)	\
3331da177e4SLinus Torvalds   do {										\
3341da177e4SLinus Torvalds     static const double _const[] = {						\
3351da177e4SLinus Torvalds       /* 2^-24 */ 5.9604644775390625e-08,					\
3361da177e4SLinus Torvalds       /* 2^-48 */ 3.5527136788005009e-15,					\
3371da177e4SLinus Torvalds       /* 2^-72 */ 2.1175823681357508e-22,					\
3381da177e4SLinus Torvalds       /* 2^-96 */ 1.2621774483536189e-29,					\
3391da177e4SLinus Torvalds       /* 2^28 */ 2.68435456e+08,						\
3401da177e4SLinus Torvalds       /* 2^4 */ 1.600000e+01,							\
3411da177e4SLinus Torvalds       /* 2^-20 */ 9.5367431640625e-07,						\
3421da177e4SLinus Torvalds       /* 2^-44 */ 5.6843418860808015e-14,					\
3431da177e4SLinus Torvalds       /* 2^-68 */ 3.3881317890172014e-21,					\
3441da177e4SLinus Torvalds       /* 2^-92 */ 2.0194839173657902e-28,					\
3451da177e4SLinus Torvalds       /* 2^-116 */ 1.2037062152420224e-35};					\
3461da177e4SLinus Torvalds     double _a240, _b240, _c240, _d240, _e240, _f240, 				\
3471da177e4SLinus Torvalds 	   _g240, _h240, _i240, _j240, _k240;					\
3481da177e4SLinus Torvalds     union { double d; UDItype i; } _l240, _m240, _n240, _o240,			\
3491da177e4SLinus Torvalds 				   _p240, _q240, _r240, _s240;			\
3501da177e4SLinus Torvalds     UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;			\
3511da177e4SLinus Torvalds 										\
3521da177e4SLinus Torvalds     if (wfracbits < 106 || wfracbits > 120)					\
3531da177e4SLinus Torvalds       abort();									\
3541da177e4SLinus Torvalds 										\
3551da177e4SLinus Torvalds     setfetz;									\
3561da177e4SLinus Torvalds 										\
3571da177e4SLinus Torvalds     _e240 = (double)(long)(X##_f0 & 0xffffff);					\
3581da177e4SLinus Torvalds     _j240 = (double)(long)(Y##_f0 & 0xffffff);					\
3591da177e4SLinus Torvalds     _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);				\
3601da177e4SLinus Torvalds     _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);				\
3611da177e4SLinus Torvalds     _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));	\
3621da177e4SLinus Torvalds     _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));	\
3631da177e4SLinus Torvalds     _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);				\
3641da177e4SLinus Torvalds     _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);				\
3651da177e4SLinus Torvalds     _a240 = (double)(long)(X##_f1 >> 32);					\
3661da177e4SLinus Torvalds     _f240 = (double)(long)(Y##_f1 >> 32);					\
3671da177e4SLinus Torvalds     _e240 *= _const[3];								\
3681da177e4SLinus Torvalds     _j240 *= _const[3];								\
3691da177e4SLinus Torvalds     _d240 *= _const[2];								\
3701da177e4SLinus Torvalds     _i240 *= _const[2];								\
3711da177e4SLinus Torvalds     _c240 *= _const[1];								\
3721da177e4SLinus Torvalds     _h240 *= _const[1];								\
3731da177e4SLinus Torvalds     _b240 *= _const[0];								\
3741da177e4SLinus Torvalds     _g240 *= _const[0];								\
3751da177e4SLinus Torvalds     _s240.d =							      _e240*_j240;\
3761da177e4SLinus Torvalds     _r240.d =						_d240*_j240 + _e240*_i240;\
3771da177e4SLinus Torvalds     _q240.d =				  _c240*_j240 + _d240*_i240 + _e240*_h240;\
3781da177e4SLinus Torvalds     _p240.d =		    _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
3791da177e4SLinus Torvalds     _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
3801da177e4SLinus Torvalds     _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;		\
3811da177e4SLinus Torvalds     _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;				\
3821da177e4SLinus Torvalds     _l240.d = _a240*_g240 + _b240*_f240;					\
3831da177e4SLinus Torvalds     _k240 =   _a240*_f240;							\
3841da177e4SLinus Torvalds     _r240.d += _s240.d;								\
3851da177e4SLinus Torvalds     _q240.d += _r240.d;								\
3861da177e4SLinus Torvalds     _p240.d += _q240.d;								\
3871da177e4SLinus Torvalds     _o240.d += _p240.d;								\
3881da177e4SLinus Torvalds     _n240.d += _o240.d;								\
3891da177e4SLinus Torvalds     _m240.d += _n240.d;								\
3901da177e4SLinus Torvalds     _l240.d += _m240.d;								\
3911da177e4SLinus Torvalds     _k240 += _l240.d;								\
3921da177e4SLinus Torvalds     _s240.d -= ((_const[10]+_s240.d)-_const[10]);				\
3931da177e4SLinus Torvalds     _r240.d -= ((_const[9]+_r240.d)-_const[9]);					\
3941da177e4SLinus Torvalds     _q240.d -= ((_const[8]+_q240.d)-_const[8]);					\
3951da177e4SLinus Torvalds     _p240.d -= ((_const[7]+_p240.d)-_const[7]);					\
3961da177e4SLinus Torvalds     _o240.d += _const[7];							\
3971da177e4SLinus Torvalds     _n240.d += _const[6];							\
3981da177e4SLinus Torvalds     _m240.d += _const[5];							\
3991da177e4SLinus Torvalds     _l240.d += _const[4];							\
4001da177e4SLinus Torvalds     if (_s240.d != 0.0) _y240 = 1;						\
4011da177e4SLinus Torvalds     if (_r240.d != 0.0) _y240 = 1;						\
4021da177e4SLinus Torvalds     if (_q240.d != 0.0) _y240 = 1;						\
4031da177e4SLinus Torvalds     if (_p240.d != 0.0) _y240 = 1;						\
4041da177e4SLinus Torvalds     _t240 = (DItype)_k240;							\
4051da177e4SLinus Torvalds     _u240 = _l240.i;								\
4061da177e4SLinus Torvalds     _v240 = _m240.i;								\
4071da177e4SLinus Torvalds     _w240 = _n240.i;								\
4081da177e4SLinus Torvalds     _x240 = _o240.i;								\
4091da177e4SLinus Torvalds     R##_f1 = (_t240 << (128 - (wfracbits - 1)))					\
4101da177e4SLinus Torvalds 	     | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));			\
4111da177e4SLinus Torvalds     R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))			\
4121da177e4SLinus Torvalds     	     | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))			\
4131da177e4SLinus Torvalds     	     | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))			\
4141da177e4SLinus Torvalds     	     | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))			\
4151da177e4SLinus Torvalds     	     | _y240;								\
4161da177e4SLinus Torvalds     resetfe;									\
4171da177e4SLinus Torvalds   } while (0)
4181da177e4SLinus Torvalds 
4191da177e4SLinus Torvalds /*
4201da177e4SLinus Torvalds  * Division algorithms:
4211da177e4SLinus Torvalds  */
4221da177e4SLinus Torvalds 
4231da177e4SLinus Torvalds #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)				\
4241da177e4SLinus Torvalds   do {									\
4251da177e4SLinus Torvalds     _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;		\
4261da177e4SLinus Torvalds     if (_FP_FRAC_GT_2(X, Y))						\
4271da177e4SLinus Torvalds       {									\
4281da177e4SLinus Torvalds 	_n_f2 = X##_f1 >> 1;						\
4291da177e4SLinus Torvalds 	_n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
4301da177e4SLinus Torvalds 	_n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);			\
4311da177e4SLinus Torvalds       }									\
4321da177e4SLinus Torvalds     else								\
4331da177e4SLinus Torvalds       {									\
4341da177e4SLinus Torvalds 	R##_e--;							\
4351da177e4SLinus Torvalds 	_n_f2 = X##_f1;							\
4361da177e4SLinus Torvalds 	_n_f1 = X##_f0;							\
4371da177e4SLinus Torvalds 	_n_f0 = 0;							\
4381da177e4SLinus Torvalds       }									\
4391da177e4SLinus Torvalds 									\
4401da177e4SLinus Torvalds     /* Normalize, i.e. make the most significant bit of the 		\
4411da177e4SLinus Torvalds        denominator set. */						\
4421da177e4SLinus Torvalds     _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);				\
4431da177e4SLinus Torvalds 									\
4441da177e4SLinus Torvalds     udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);			\
4451da177e4SLinus Torvalds     umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);				\
4461da177e4SLinus Torvalds     _r_f0 = _n_f0;							\
4471da177e4SLinus Torvalds     if (_FP_FRAC_GT_2(_m, _r))						\
4481da177e4SLinus Torvalds       {									\
4491da177e4SLinus Torvalds 	R##_f1--;							\
4501da177e4SLinus Torvalds 	_FP_FRAC_ADD_2(_r, Y, _r);					\
4511da177e4SLinus Torvalds 	if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
4521da177e4SLinus Torvalds 	  {								\
4531da177e4SLinus Torvalds 	    R##_f1--;							\
4541da177e4SLinus Torvalds 	    _FP_FRAC_ADD_2(_r, Y, _r);					\
4551da177e4SLinus Torvalds 	  }								\
4561da177e4SLinus Torvalds       }									\
4571da177e4SLinus Torvalds     _FP_FRAC_DEC_2(_r, _m);						\
4581da177e4SLinus Torvalds 									\
4591da177e4SLinus Torvalds     if (_r_f1 == Y##_f1)						\
4601da177e4SLinus Torvalds       {									\
4611da177e4SLinus Torvalds 	/* This is a special case, not an optimization			\
4621da177e4SLinus Torvalds 	   (_r/Y##_f1 would not fit into UWtype).			\
4631da177e4SLinus Torvalds 	   As _r is guaranteed to be < Y,  R##_f0 can be either		\
4641da177e4SLinus Torvalds 	   (UWtype)-1 or (UWtype)-2.  But as we know what kind		\
4651da177e4SLinus Torvalds 	   of bits it is (sticky, guard, round),  we don't care.	\
4661da177e4SLinus Torvalds 	   We also don't care what the reminder is,  because the	\
4671da177e4SLinus Torvalds 	   guard bit will be set anyway.  -jj */			\
4681da177e4SLinus Torvalds 	R##_f0 = -1;							\
4691da177e4SLinus Torvalds       }									\
4701da177e4SLinus Torvalds     else								\
4711da177e4SLinus Torvalds       {									\
4721da177e4SLinus Torvalds 	udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);		\
4731da177e4SLinus Torvalds 	umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);			\
4741da177e4SLinus Torvalds 	_r_f0 = 0;							\
4751da177e4SLinus Torvalds 	if (_FP_FRAC_GT_2(_m, _r))					\
4761da177e4SLinus Torvalds 	  {								\
4771da177e4SLinus Torvalds 	    R##_f0--;							\
4781da177e4SLinus Torvalds 	    _FP_FRAC_ADD_2(_r, Y, _r);					\
4791da177e4SLinus Torvalds 	    if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
4801da177e4SLinus Torvalds 	      {								\
4811da177e4SLinus Torvalds 		R##_f0--;						\
4821da177e4SLinus Torvalds 		_FP_FRAC_ADD_2(_r, Y, _r);				\
4831da177e4SLinus Torvalds 	      }								\
4841da177e4SLinus Torvalds 	  }								\
4851da177e4SLinus Torvalds 	if (!_FP_FRAC_EQ_2(_r, _m))					\
4861da177e4SLinus Torvalds 	  R##_f0 |= _FP_WORK_STICKY;					\
4871da177e4SLinus Torvalds       }									\
4881da177e4SLinus Torvalds   } while (0)
4891da177e4SLinus Torvalds 
4901da177e4SLinus Torvalds 
4911da177e4SLinus Torvalds #define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)					\
4921da177e4SLinus Torvalds   do {									\
4931da177e4SLinus Torvalds     _FP_W_TYPE _x[4], _y[2], _z[4];					\
4941da177e4SLinus Torvalds     _y[0] = Y##_f0; _y[1] = Y##_f1;					\
4951da177e4SLinus Torvalds     _x[0] = _x[3] = 0;							\
4961da177e4SLinus Torvalds     if (_FP_FRAC_GT_2(X, Y))						\
4971da177e4SLinus Torvalds       {									\
4981da177e4SLinus Torvalds 	R##_e++;							\
4991da177e4SLinus Torvalds 	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |	\
5001da177e4SLinus Torvalds 		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
5011da177e4SLinus Torvalds 			    (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE)));	\
5021da177e4SLinus Torvalds 	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);	\
5031da177e4SLinus Torvalds       }									\
5041da177e4SLinus Torvalds     else								\
5051da177e4SLinus Torvalds       {									\
5061da177e4SLinus Torvalds 	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |	\
5071da177e4SLinus Torvalds 		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
5081da177e4SLinus Torvalds 			    (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));	\
5091da177e4SLinus Torvalds 	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);	\
5101da177e4SLinus Torvalds       }									\
5111da177e4SLinus Torvalds 									\
5121da177e4SLinus Torvalds     (void) mpn_divrem (_z, 0, _x, 4, _y, 2);				\
5131da177e4SLinus Torvalds     R##_f1 = _z[1];							\
5141da177e4SLinus Torvalds     R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);				\
5151da177e4SLinus Torvalds   } while (0)
5161da177e4SLinus Torvalds 
5171da177e4SLinus Torvalds 
5181da177e4SLinus Torvalds /*
5191da177e4SLinus Torvalds  * Square root algorithms:
5201da177e4SLinus Torvalds  * We have just one right now, maybe Newton approximation
5211da177e4SLinus Torvalds  * should be added for those machines where division is fast.
5221da177e4SLinus Torvalds  */
5231da177e4SLinus Torvalds 
5241da177e4SLinus Torvalds #define _FP_SQRT_MEAT_2(R, S, T, X, q)			\
5251da177e4SLinus Torvalds   do {							\
5261da177e4SLinus Torvalds     while (q)						\
5271da177e4SLinus Torvalds       {							\
5281da177e4SLinus Torvalds 	T##_f1 = S##_f1 + q;				\
5291da177e4SLinus Torvalds 	if (T##_f1 <= X##_f1)				\
5301da177e4SLinus Torvalds 	  {						\
5311da177e4SLinus Torvalds 	    S##_f1 = T##_f1 + q;			\
5321da177e4SLinus Torvalds 	    X##_f1 -= T##_f1;				\
5331da177e4SLinus Torvalds 	    R##_f1 += q;				\
5341da177e4SLinus Torvalds 	  }						\
5351da177e4SLinus Torvalds 	_FP_FRAC_SLL_2(X, 1);				\
5361da177e4SLinus Torvalds 	q >>= 1;					\
5371da177e4SLinus Torvalds       }							\
5381da177e4SLinus Torvalds     q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
5391da177e4SLinus Torvalds     while (q != _FP_WORK_ROUND)				\
5401da177e4SLinus Torvalds       {							\
5411da177e4SLinus Torvalds 	T##_f0 = S##_f0 + q;				\
5421da177e4SLinus Torvalds 	T##_f1 = S##_f1;				\
5431da177e4SLinus Torvalds 	if (T##_f1 < X##_f1 || 				\
5441da177e4SLinus Torvalds 	    (T##_f1 == X##_f1 && T##_f0 <= X##_f0))	\
5451da177e4SLinus Torvalds 	  {						\
5461da177e4SLinus Torvalds 	    S##_f0 = T##_f0 + q;			\
5471da177e4SLinus Torvalds 	    S##_f1 += (T##_f0 > S##_f0);		\
5481da177e4SLinus Torvalds 	    _FP_FRAC_DEC_2(X, T);			\
5491da177e4SLinus Torvalds 	    R##_f0 += q;				\
5501da177e4SLinus Torvalds 	  }						\
5511da177e4SLinus Torvalds 	_FP_FRAC_SLL_2(X, 1);				\
5521da177e4SLinus Torvalds 	q >>= 1;					\
5531da177e4SLinus Torvalds       }							\
5541da177e4SLinus Torvalds     if (X##_f0 | X##_f1)				\
5551da177e4SLinus Torvalds       {							\
5561da177e4SLinus Torvalds 	if (S##_f1 < X##_f1 || 				\
5571da177e4SLinus Torvalds 	    (S##_f1 == X##_f1 && S##_f0 < X##_f0))	\
5581da177e4SLinus Torvalds 	  R##_f0 |= _FP_WORK_ROUND;			\
5591da177e4SLinus Torvalds 	R##_f0 |= _FP_WORK_STICKY;			\
5601da177e4SLinus Torvalds       }							\
5611da177e4SLinus Torvalds   } while (0)
5621da177e4SLinus Torvalds 
5631da177e4SLinus Torvalds 
5641da177e4SLinus Torvalds /*
5651da177e4SLinus Torvalds  * Assembly/disassembly for converting to/from integral types.
5661da177e4SLinus Torvalds  * No shifting or overflow handled here.
5671da177e4SLinus Torvalds  */
5681da177e4SLinus Torvalds 
5691da177e4SLinus Torvalds #define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
570*8183db10SVincent Chen 	(void) (((rsize) <= _FP_W_TYPE_SIZE)	\
571*8183db10SVincent Chen 		? ({ (r) = X##_f0; })		\
572*8183db10SVincent Chen 		: ({				\
573*8183db10SVincent Chen 		     (r) = X##_f1;		\
574*8183db10SVincent Chen 		     (r) <<= _FP_W_TYPE_SIZE;	\
575*8183db10SVincent Chen 		     (r) += X##_f0;		\
576*8183db10SVincent Chen 		    }))
5771da177e4SLinus Torvalds 
5781da177e4SLinus Torvalds #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)				\
5791da177e4SLinus Torvalds   do {									\
5801da177e4SLinus Torvalds     X##_f0 = r;								\
5811da177e4SLinus Torvalds     X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
5821da177e4SLinus Torvalds   } while (0)
5831da177e4SLinus Torvalds 
5841da177e4SLinus Torvalds /*
5851da177e4SLinus Torvalds  * Convert FP values between word sizes
5861da177e4SLinus Torvalds  */
5871da177e4SLinus Torvalds 
5881da177e4SLinus Torvalds #define _FP_FRAC_CONV_1_2(dfs, sfs, D, S)				\
5891da177e4SLinus Torvalds   do {									\
5901da177e4SLinus Torvalds     if (S##_c != FP_CLS_NAN)						\
5911da177e4SLinus Torvalds       _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),	\
5921da177e4SLinus Torvalds 		     _FP_WFRACBITS_##sfs);				\
5931da177e4SLinus Torvalds     else								\
5941da177e4SLinus Torvalds       _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs));	\
5951da177e4SLinus Torvalds     D##_f = S##_f0;							\
5961da177e4SLinus Torvalds   } while (0)
5971da177e4SLinus Torvalds 
5981da177e4SLinus Torvalds #define _FP_FRAC_CONV_2_1(dfs, sfs, D, S)				\
5991da177e4SLinus Torvalds   do {									\
6001da177e4SLinus Torvalds     D##_f0 = S##_f;							\
6011da177e4SLinus Torvalds     D##_f1 = 0;								\
6021da177e4SLinus Torvalds     _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));	\
6031da177e4SLinus Torvalds   } while (0)
6041da177e4SLinus Torvalds 
6051da177e4SLinus Torvalds #endif
606