1 /* 2 * IEEE754 floating point arithmetic 3 * double precision: MADDF.f (Fused Multiply Add) 4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 5 * 6 * MIPS floating point support 7 * Copyright (C) 2015 Imagination Technologies, Ltd. 8 * Author: Markos Chandras <markos.chandras@imgtec.com> 9 * 10 * This program is free software; you can distribute it and/or modify it 11 * under the terms of the GNU General Public License as published by the 12 * Free Software Foundation; version 2 of the License. 13 */ 14 15 #include "ieee754dp.h" 16 17 18 /* 128 bits shift right logical with rounding. */ 19 void srl128(u64 *hptr, u64 *lptr, int count) 20 { 21 u64 low; 22 23 if (count >= 128) { 24 *lptr = *hptr != 0 || *lptr != 0; 25 *hptr = 0; 26 } else if (count >= 64) { 27 if (count == 64) { 28 *lptr = *hptr | (*lptr != 0); 29 } else { 30 low = *lptr; 31 *lptr = *hptr >> (count - 64); 32 *lptr |= (*hptr << (128 - count)) != 0 || low != 0; 33 } 34 *hptr = 0; 35 } else { 36 low = *lptr; 37 *lptr = low >> count | *hptr << (64 - count); 38 *lptr |= (low << (64 - count)) != 0; 39 *hptr = *hptr >> count; 40 } 41 } 42 43 static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, 44 union ieee754dp y, enum maddf_flags flags) 45 { 46 int re; 47 int rs; 48 unsigned lxm; 49 unsigned hxm; 50 unsigned lym; 51 unsigned hym; 52 u64 lrm; 53 u64 hrm; 54 u64 lzm; 55 u64 hzm; 56 u64 t; 57 u64 at; 58 int s; 59 60 COMPXDP; 61 COMPYDP; 62 COMPZDP; 63 64 EXPLODEXDP; 65 EXPLODEYDP; 66 EXPLODEZDP; 67 68 FLUSHXDP; 69 FLUSHYDP; 70 FLUSHZDP; 71 72 ieee754_clearcx(); 73 74 /* 75 * Handle the cases when at least one of x, y or z is a NaN. 76 * Order of precedence is sNaN, qNaN and z, x, y. 77 */ 78 if (zc == IEEE754_CLASS_SNAN) 79 return ieee754dp_nanxcpt(z); 80 if (xc == IEEE754_CLASS_SNAN) 81 return ieee754dp_nanxcpt(x); 82 if (yc == IEEE754_CLASS_SNAN) 83 return ieee754dp_nanxcpt(y); 84 if (zc == IEEE754_CLASS_QNAN) 85 return z; 86 if (xc == IEEE754_CLASS_QNAN) 87 return x; 88 if (yc == IEEE754_CLASS_QNAN) 89 return y; 90 91 if (zc == IEEE754_CLASS_DNORM) 92 DPDNORMZ; 93 /* ZERO z cases are handled separately below */ 94 95 switch (CLPAIR(xc, yc)) { 96 97 /* 98 * Infinity handling 99 */ 100 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 101 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 102 ieee754_setcx(IEEE754_INVALID_OPERATION); 103 return ieee754dp_indef(); 104 105 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 106 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 107 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 108 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 109 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 110 if ((zc == IEEE754_CLASS_INF) && 111 ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || 112 ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { 113 /* 114 * Cases of addition of infinities with opposite signs 115 * or subtraction of infinities with same signs. 116 */ 117 ieee754_setcx(IEEE754_INVALID_OPERATION); 118 return ieee754dp_indef(); 119 } 120 /* 121 * z is here either not an infinity, or an infinity having the 122 * same sign as product (x*y) (in case of MADDF.D instruction) 123 * or product -(x*y) (in MSUBF.D case). The result must be an 124 * infinity, and its sign is determined only by the value of 125 * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. 126 */ 127 if (flags & MADDF_NEGATE_PRODUCT) 128 return ieee754dp_inf(1 ^ (xs ^ ys)); 129 else 130 return ieee754dp_inf(xs ^ ys); 131 132 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 133 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 134 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 135 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 136 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 137 if (zc == IEEE754_CLASS_INF) 138 return ieee754dp_inf(zs); 139 if (zc == IEEE754_CLASS_ZERO) { 140 /* Handle cases +0 + (-0) and similar ones. */ 141 if ((!(flags & MADDF_NEGATE_PRODUCT) 142 && (zs == (xs ^ ys))) || 143 ((flags & MADDF_NEGATE_PRODUCT) 144 && (zs != (xs ^ ys)))) 145 /* 146 * Cases of addition of zeros of equal signs 147 * or subtraction of zeroes of opposite signs. 148 * The sign of the resulting zero is in any 149 * such case determined only by the sign of z. 150 */ 151 return z; 152 153 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 154 } 155 /* x*y is here 0, and z is not 0, so just return z */ 156 return z; 157 158 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 159 DPDNORMX; 160 161 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 162 if (zc == IEEE754_CLASS_INF) 163 return ieee754dp_inf(zs); 164 DPDNORMY; 165 break; 166 167 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 168 if (zc == IEEE754_CLASS_INF) 169 return ieee754dp_inf(zs); 170 DPDNORMX; 171 break; 172 173 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 174 if (zc == IEEE754_CLASS_INF) 175 return ieee754dp_inf(zs); 176 /* fall through to real computations */ 177 } 178 179 /* Finally get to do some computation */ 180 181 /* 182 * Do the multiplication bit first 183 * 184 * rm = xm * ym, re = xe + ye basically 185 * 186 * At this point xm and ym should have been normalized. 187 */ 188 assert(xm & DP_HIDDEN_BIT); 189 assert(ym & DP_HIDDEN_BIT); 190 191 re = xe + ye; 192 rs = xs ^ ys; 193 if (flags & MADDF_NEGATE_PRODUCT) 194 rs ^= 1; 195 196 /* shunt to top of word */ 197 xm <<= 64 - (DP_FBITS + 1); 198 ym <<= 64 - (DP_FBITS + 1); 199 200 /* 201 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. 202 */ 203 204 /* 32 * 32 => 64 */ 205 #define DPXMULT(x, y) ((u64)(x) * (u64)y) 206 207 lxm = xm; 208 hxm = xm >> 32; 209 lym = ym; 210 hym = ym >> 32; 211 212 lrm = DPXMULT(lxm, lym); 213 hrm = DPXMULT(hxm, hym); 214 215 t = DPXMULT(lxm, hym); 216 217 at = lrm + (t << 32); 218 hrm += at < lrm; 219 lrm = at; 220 221 hrm = hrm + (t >> 32); 222 223 t = DPXMULT(hxm, lym); 224 225 at = lrm + (t << 32); 226 hrm += at < lrm; 227 lrm = at; 228 229 hrm = hrm + (t >> 32); 230 231 /* Put explicit bit at bit 126 if necessary */ 232 if ((int64_t)hrm < 0) { 233 lrm = (hrm << 63) | (lrm >> 1); 234 hrm = hrm >> 1; 235 re++; 236 } 237 238 assert(hrm & (1 << 62)); 239 240 if (zc == IEEE754_CLASS_ZERO) { 241 /* 242 * Move explicit bit from bit 126 to bit 55 since the 243 * ieee754dp_format code expects the mantissa to be 244 * 56 bits wide (53 + 3 rounding bits). 245 */ 246 srl128(&hrm, &lrm, (126 - 55)); 247 return ieee754dp_format(rs, re, lrm); 248 } 249 250 /* Move explicit bit from bit 52 to bit 126 */ 251 lzm = 0; 252 hzm = zm << 10; 253 assert(hzm & (1 << 62)); 254 255 /* Make the exponents the same */ 256 if (ze > re) { 257 /* 258 * Have to shift y fraction right to align. 259 */ 260 s = ze - re; 261 srl128(&hrm, &lrm, s); 262 re += s; 263 } else if (re > ze) { 264 /* 265 * Have to shift x fraction right to align. 266 */ 267 s = re - ze; 268 srl128(&hzm, &lzm, s); 269 ze += s; 270 } 271 assert(ze == re); 272 assert(ze <= DP_EMAX); 273 274 /* Do the addition */ 275 if (zs == rs) { 276 /* 277 * Generate 128 bit result by adding two 127 bit numbers 278 * leaving result in hzm:lzm, zs and ze. 279 */ 280 hzm = hzm + hrm + (lzm > (lzm + lrm)); 281 lzm = lzm + lrm; 282 if ((int64_t)hzm < 0) { /* carry out */ 283 srl128(&hzm, &lzm, 1); 284 ze++; 285 } 286 } else { 287 if (hzm > hrm || (hzm == hrm && lzm >= lrm)) { 288 hzm = hzm - hrm - (lzm < lrm); 289 lzm = lzm - lrm; 290 } else { 291 hzm = hrm - hzm - (lrm < lzm); 292 lzm = lrm - lzm; 293 zs = rs; 294 } 295 if (lzm == 0 && hzm == 0) 296 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 297 298 /* 299 * Put explicit bit at bit 126 if necessary. 300 */ 301 if (hzm == 0) { 302 /* left shift by 63 or 64 bits */ 303 if ((int64_t)lzm < 0) { 304 /* MSB of lzm is the explicit bit */ 305 hzm = lzm >> 1; 306 lzm = lzm << 63; 307 ze -= 63; 308 } else { 309 hzm = lzm; 310 lzm = 0; 311 ze -= 64; 312 } 313 } 314 315 t = 0; 316 while ((hzm >> (62 - t)) == 0) 317 t++; 318 319 assert(t <= 62); 320 if (t) { 321 hzm = hzm << t | lzm >> (64 - t); 322 lzm = lzm << t; 323 ze -= t; 324 } 325 } 326 327 /* 328 * Move explicit bit from bit 126 to bit 55 since the 329 * ieee754dp_format code expects the mantissa to be 330 * 56 bits wide (53 + 3 rounding bits). 331 */ 332 srl128(&hzm, &lzm, (126 - 55)); 333 334 return ieee754dp_format(zs, ze, lzm); 335 } 336 337 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, 338 union ieee754dp y) 339 { 340 return _dp_maddf(z, x, y, 0); 341 } 342 343 union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, 344 union ieee754dp y) 345 { 346 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 347 } 348