11da177e4SLinus Torvalds /* IEEE754 floating point arithmetic 21da177e4SLinus Torvalds * double precision: common utilities 31da177e4SLinus Torvalds */ 41da177e4SLinus Torvalds /* 51da177e4SLinus Torvalds * MIPS floating point support 61da177e4SLinus Torvalds * Copyright (C) 1994-2000 Algorithmics Ltd. 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * ######################################################################## 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * This program is free software; you can distribute it and/or modify it 111da177e4SLinus Torvalds * under the terms of the GNU General Public License (Version 2) as 121da177e4SLinus Torvalds * published by the Free Software Foundation. 131da177e4SLinus Torvalds * 141da177e4SLinus Torvalds * This program is distributed in the hope it will be useful, but WITHOUT 151da177e4SLinus Torvalds * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 161da177e4SLinus Torvalds * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 171da177e4SLinus Torvalds * for more details. 181da177e4SLinus Torvalds * 191da177e4SLinus Torvalds * You should have received a copy of the GNU General Public License along 201da177e4SLinus Torvalds * with this program; if not, write to the Free Software Foundation, Inc., 211da177e4SLinus Torvalds * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. 221da177e4SLinus Torvalds * 231da177e4SLinus Torvalds * ######################################################################## 241da177e4SLinus Torvalds */ 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds 271da177e4SLinus Torvalds #include "ieee754dp.h" 281da177e4SLinus Torvalds 292209bcb1SRalf Baechle union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) 301da177e4SLinus Torvalds { 311da177e4SLinus Torvalds COMPXDP; 321da177e4SLinus Torvalds COMPYDP; 331da177e4SLinus Torvalds 341da177e4SLinus Torvalds EXPLODEXDP; 351da177e4SLinus Torvalds EXPLODEYDP; 361da177e4SLinus Torvalds 379e8bad1fSRalf Baechle ieee754_clearcx(); 381da177e4SLinus Torvalds 391da177e4SLinus Torvalds FLUSHXDP; 401da177e4SLinus Torvalds FLUSHYDP; 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds switch (CLPAIR(xc, yc)) { 431da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): 441da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): 451da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): 461da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): 471da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): 481da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): 491da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): 501da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): 511da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): 521da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): 531da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): 549e8bad1fSRalf Baechle ieee754_setcx(IEEE754_INVALID_OPERATION); 551da177e4SLinus Torvalds return ieee754dp_nanxcpt(ieee754dp_indef(), "mul", x, y); 561da177e4SLinus Torvalds 571da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): 581da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): 591da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): 601da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): 611da177e4SLinus Torvalds return y; 621da177e4SLinus Torvalds 631da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): 641da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): 651da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): 661da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): 671da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): 681da177e4SLinus Torvalds return x; 691da177e4SLinus Torvalds 701da177e4SLinus Torvalds 711da177e4SLinus Torvalds /* Infinity handling */ 721da177e4SLinus Torvalds 731da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 741da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 759e8bad1fSRalf Baechle ieee754_setcx(IEEE754_INVALID_OPERATION); 761da177e4SLinus Torvalds return ieee754dp_xcpt(ieee754dp_indef(), "mul", x, y); 771da177e4SLinus Torvalds 781da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 791da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 801da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 811da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 821da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 831da177e4SLinus Torvalds return ieee754dp_inf(xs ^ ys); 841da177e4SLinus Torvalds 851da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 861da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 871da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 881da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 891da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 901da177e4SLinus Torvalds return ieee754dp_zero(xs ^ ys); 911da177e4SLinus Torvalds 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 941da177e4SLinus Torvalds DPDNORMX; 951da177e4SLinus Torvalds 961da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 971da177e4SLinus Torvalds DPDNORMY; 981da177e4SLinus Torvalds break; 991da177e4SLinus Torvalds 1001da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 1011da177e4SLinus Torvalds DPDNORMX; 1021da177e4SLinus Torvalds break; 1031da177e4SLinus Torvalds 1041da177e4SLinus Torvalds case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 1051da177e4SLinus Torvalds break; 1061da177e4SLinus Torvalds } 10725985edcSLucas De Marchi /* rm = xm * ym, re = xe+ye basically */ 1081da177e4SLinus Torvalds assert(xm & DP_HIDDEN_BIT); 1091da177e4SLinus Torvalds assert(ym & DP_HIDDEN_BIT); 1101da177e4SLinus Torvalds { 1111da177e4SLinus Torvalds int re = xe + ye; 1121da177e4SLinus Torvalds int rs = xs ^ ys; 1131da177e4SLinus Torvalds u64 rm; 1141da177e4SLinus Torvalds 1151da177e4SLinus Torvalds /* shunt to top of word */ 116ad8fb553SRalf Baechle xm <<= 64 - (DP_FBITS + 1); 117ad8fb553SRalf Baechle ym <<= 64 - (DP_FBITS + 1); 1181da177e4SLinus Torvalds 1191da177e4SLinus Torvalds /* multiply 32bits xm,ym to give high 32bits rm with stickness 1201da177e4SLinus Torvalds */ 1211da177e4SLinus Torvalds 1221da177e4SLinus Torvalds /* 32 * 32 => 64 */ 1231da177e4SLinus Torvalds #define DPXMULT(x, y) ((u64)(x) * (u64)y) 1241da177e4SLinus Torvalds 1251da177e4SLinus Torvalds { 1261da177e4SLinus Torvalds unsigned lxm = xm; 1271da177e4SLinus Torvalds unsigned hxm = xm >> 32; 1281da177e4SLinus Torvalds unsigned lym = ym; 1291da177e4SLinus Torvalds unsigned hym = ym >> 32; 1301da177e4SLinus Torvalds u64 lrm; 1311da177e4SLinus Torvalds u64 hrm; 1321da177e4SLinus Torvalds 1331da177e4SLinus Torvalds lrm = DPXMULT(lxm, lym); 1341da177e4SLinus Torvalds hrm = DPXMULT(hxm, hym); 1351da177e4SLinus Torvalds 1361da177e4SLinus Torvalds { 1371da177e4SLinus Torvalds u64 t = DPXMULT(lxm, hym); 1381da177e4SLinus Torvalds { 1391da177e4SLinus Torvalds u64 at = 1401da177e4SLinus Torvalds lrm + (t << 32); 1411da177e4SLinus Torvalds hrm += at < lrm; 1421da177e4SLinus Torvalds lrm = at; 1431da177e4SLinus Torvalds } 1441da177e4SLinus Torvalds hrm = hrm + (t >> 32); 1451da177e4SLinus Torvalds } 1461da177e4SLinus Torvalds 1471da177e4SLinus Torvalds { 1481da177e4SLinus Torvalds u64 t = DPXMULT(hxm, lym); 1491da177e4SLinus Torvalds { 1501da177e4SLinus Torvalds u64 at = 1511da177e4SLinus Torvalds lrm + (t << 32); 1521da177e4SLinus Torvalds hrm += at < lrm; 1531da177e4SLinus Torvalds lrm = at; 1541da177e4SLinus Torvalds } 1551da177e4SLinus Torvalds hrm = hrm + (t >> 32); 1561da177e4SLinus Torvalds } 1571da177e4SLinus Torvalds rm = hrm | (lrm != 0); 1581da177e4SLinus Torvalds } 1591da177e4SLinus Torvalds 1601da177e4SLinus Torvalds /* 1611da177e4SLinus Torvalds * sticky shift down to normal rounding precision 1621da177e4SLinus Torvalds */ 1631da177e4SLinus Torvalds if ((s64) rm < 0) { 1641da177e4SLinus Torvalds rm = 165ad8fb553SRalf Baechle (rm >> (64 - (DP_FBITS + 1 + 3))) | 166ad8fb553SRalf Baechle ((rm << (DP_FBITS + 1 + 3)) != 0); 1671da177e4SLinus Torvalds re++; 1681da177e4SLinus Torvalds } else { 1691da177e4SLinus Torvalds rm = 170ad8fb553SRalf Baechle (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | 171ad8fb553SRalf Baechle ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); 1721da177e4SLinus Torvalds } 1731da177e4SLinus Torvalds assert(rm & (DP_HIDDEN_BIT << 3)); 1741da177e4SLinus Torvalds DPNORMRET2(rs, re, rm, "mul", x, y); 1751da177e4SLinus Torvalds } 1761da177e4SLinus Torvalds } 177