xref: /openbmc/linux/arch/mips/math-emu/dp_mul.c (revision ad8fb553)
11da177e4SLinus Torvalds /* IEEE754 floating point arithmetic
21da177e4SLinus Torvalds  * double precision: common utilities
31da177e4SLinus Torvalds  */
41da177e4SLinus Torvalds /*
51da177e4SLinus Torvalds  * MIPS floating point support
61da177e4SLinus Torvalds  * Copyright (C) 1994-2000 Algorithmics Ltd.
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  * ########################################################################
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  *  This program is free software; you can distribute it and/or modify it
111da177e4SLinus Torvalds  *  under the terms of the GNU General Public License (Version 2) as
121da177e4SLinus Torvalds  *  published by the Free Software Foundation.
131da177e4SLinus Torvalds  *
141da177e4SLinus Torvalds  *  This program is distributed in the hope it will be useful, but WITHOUT
151da177e4SLinus Torvalds  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
161da177e4SLinus Torvalds  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
171da177e4SLinus Torvalds  *  for more details.
181da177e4SLinus Torvalds  *
191da177e4SLinus Torvalds  *  You should have received a copy of the GNU General Public License along
201da177e4SLinus Torvalds  *  with this program; if not, write to the Free Software Foundation, Inc.,
211da177e4SLinus Torvalds  *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
221da177e4SLinus Torvalds  *
231da177e4SLinus Torvalds  * ########################################################################
241da177e4SLinus Torvalds  */
251da177e4SLinus Torvalds 
261da177e4SLinus Torvalds 
271da177e4SLinus Torvalds #include "ieee754dp.h"
281da177e4SLinus Torvalds 
292209bcb1SRalf Baechle union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y)
301da177e4SLinus Torvalds {
311da177e4SLinus Torvalds 	COMPXDP;
321da177e4SLinus Torvalds 	COMPYDP;
331da177e4SLinus Torvalds 
341da177e4SLinus Torvalds 	EXPLODEXDP;
351da177e4SLinus Torvalds 	EXPLODEYDP;
361da177e4SLinus Torvalds 
379e8bad1fSRalf Baechle 	ieee754_clearcx();
381da177e4SLinus Torvalds 
391da177e4SLinus Torvalds 	FLUSHXDP;
401da177e4SLinus Torvalds 	FLUSHYDP;
411da177e4SLinus Torvalds 
421da177e4SLinus Torvalds 	switch (CLPAIR(xc, yc)) {
431da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
441da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
451da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
461da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
471da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
481da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
491da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
501da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
511da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
521da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
531da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
549e8bad1fSRalf Baechle 		ieee754_setcx(IEEE754_INVALID_OPERATION);
551da177e4SLinus Torvalds 		return ieee754dp_nanxcpt(ieee754dp_indef(), "mul", x, y);
561da177e4SLinus Torvalds 
571da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
581da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
591da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
601da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
611da177e4SLinus Torvalds 		return y;
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
641da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
651da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
661da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
671da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
681da177e4SLinus Torvalds 		return x;
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds 
711da177e4SLinus Torvalds 		/* Infinity handling */
721da177e4SLinus Torvalds 
731da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
741da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
759e8bad1fSRalf Baechle 		ieee754_setcx(IEEE754_INVALID_OPERATION);
761da177e4SLinus Torvalds 		return ieee754dp_xcpt(ieee754dp_indef(), "mul", x, y);
771da177e4SLinus Torvalds 
781da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
791da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
801da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
811da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
821da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
831da177e4SLinus Torvalds 		return ieee754dp_inf(xs ^ ys);
841da177e4SLinus Torvalds 
851da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
861da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
871da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
881da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
891da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
901da177e4SLinus Torvalds 		return ieee754dp_zero(xs ^ ys);
911da177e4SLinus Torvalds 
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
941da177e4SLinus Torvalds 		DPDNORMX;
951da177e4SLinus Torvalds 
961da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
971da177e4SLinus Torvalds 		DPDNORMY;
981da177e4SLinus Torvalds 		break;
991da177e4SLinus Torvalds 
1001da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
1011da177e4SLinus Torvalds 		DPDNORMX;
1021da177e4SLinus Torvalds 		break;
1031da177e4SLinus Torvalds 
1041da177e4SLinus Torvalds 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
1051da177e4SLinus Torvalds 		break;
1061da177e4SLinus Torvalds 	}
10725985edcSLucas De Marchi 	/* rm = xm * ym, re = xe+ye basically */
1081da177e4SLinus Torvalds 	assert(xm & DP_HIDDEN_BIT);
1091da177e4SLinus Torvalds 	assert(ym & DP_HIDDEN_BIT);
1101da177e4SLinus Torvalds 	{
1111da177e4SLinus Torvalds 		int re = xe + ye;
1121da177e4SLinus Torvalds 		int rs = xs ^ ys;
1131da177e4SLinus Torvalds 		u64 rm;
1141da177e4SLinus Torvalds 
1151da177e4SLinus Torvalds 		/* shunt to top of word */
116ad8fb553SRalf Baechle 		xm <<= 64 - (DP_FBITS + 1);
117ad8fb553SRalf Baechle 		ym <<= 64 - (DP_FBITS + 1);
1181da177e4SLinus Torvalds 
1191da177e4SLinus Torvalds 		/* multiply 32bits xm,ym to give high 32bits rm with stickness
1201da177e4SLinus Torvalds 		 */
1211da177e4SLinus Torvalds 
1221da177e4SLinus Torvalds 		/* 32 * 32 => 64 */
1231da177e4SLinus Torvalds #define DPXMULT(x, y)	((u64)(x) * (u64)y)
1241da177e4SLinus Torvalds 
1251da177e4SLinus Torvalds 		{
1261da177e4SLinus Torvalds 			unsigned lxm = xm;
1271da177e4SLinus Torvalds 			unsigned hxm = xm >> 32;
1281da177e4SLinus Torvalds 			unsigned lym = ym;
1291da177e4SLinus Torvalds 			unsigned hym = ym >> 32;
1301da177e4SLinus Torvalds 			u64 lrm;
1311da177e4SLinus Torvalds 			u64 hrm;
1321da177e4SLinus Torvalds 
1331da177e4SLinus Torvalds 			lrm = DPXMULT(lxm, lym);
1341da177e4SLinus Torvalds 			hrm = DPXMULT(hxm, hym);
1351da177e4SLinus Torvalds 
1361da177e4SLinus Torvalds 			{
1371da177e4SLinus Torvalds 				u64 t = DPXMULT(lxm, hym);
1381da177e4SLinus Torvalds 				{
1391da177e4SLinus Torvalds 					u64 at =
1401da177e4SLinus Torvalds 					    lrm + (t << 32);
1411da177e4SLinus Torvalds 					hrm += at < lrm;
1421da177e4SLinus Torvalds 					lrm = at;
1431da177e4SLinus Torvalds 				}
1441da177e4SLinus Torvalds 				hrm = hrm + (t >> 32);
1451da177e4SLinus Torvalds 			}
1461da177e4SLinus Torvalds 
1471da177e4SLinus Torvalds 			{
1481da177e4SLinus Torvalds 				u64 t = DPXMULT(hxm, lym);
1491da177e4SLinus Torvalds 				{
1501da177e4SLinus Torvalds 					u64 at =
1511da177e4SLinus Torvalds 					    lrm + (t << 32);
1521da177e4SLinus Torvalds 					hrm += at < lrm;
1531da177e4SLinus Torvalds 					lrm = at;
1541da177e4SLinus Torvalds 				}
1551da177e4SLinus Torvalds 				hrm = hrm + (t >> 32);
1561da177e4SLinus Torvalds 			}
1571da177e4SLinus Torvalds 			rm = hrm | (lrm != 0);
1581da177e4SLinus Torvalds 		}
1591da177e4SLinus Torvalds 
1601da177e4SLinus Torvalds 		/*
1611da177e4SLinus Torvalds 		 * sticky shift down to normal rounding precision
1621da177e4SLinus Torvalds 		 */
1631da177e4SLinus Torvalds 		if ((s64) rm < 0) {
1641da177e4SLinus Torvalds 			rm =
165ad8fb553SRalf Baechle 			    (rm >> (64 - (DP_FBITS + 1 + 3))) |
166ad8fb553SRalf Baechle 			    ((rm << (DP_FBITS + 1 + 3)) != 0);
1671da177e4SLinus Torvalds 			re++;
1681da177e4SLinus Torvalds 		} else {
1691da177e4SLinus Torvalds 			rm =
170ad8fb553SRalf Baechle 			    (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
171ad8fb553SRalf Baechle 			    ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
1721da177e4SLinus Torvalds 		}
1731da177e4SLinus Torvalds 		assert(rm & (DP_HIDDEN_BIT << 3));
1741da177e4SLinus Torvalds 		DPNORMRET2(rs, re, rm, "mul", x, y);
1751da177e4SLinus Torvalds 	}
1761da177e4SLinus Torvalds }
177