1 /* 2 * IEEE754 floating point arithmetic 3 * double precision: MADDF.f (Fused Multiply Add) 4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 5 * 6 * MIPS floating point support 7 * Copyright (C) 2015 Imagination Technologies, Ltd. 8 * Author: Markos Chandras <markos.chandras@imgtec.com> 9 * 10 * This program is free software; you can distribute it and/or modify it 11 * under the terms of the GNU General Public License as published by the 12 * Free Software Foundation; version 2 of the License. 13 */ 14 15 #include "ieee754dp.h" 16 17 18 /* 128 bits shift right logical with rounding. */ 19 static void srl128(u64 *hptr, u64 *lptr, int count) 20 { 21 u64 low; 22 23 if (count >= 128) { 24 *lptr = *hptr != 0 || *lptr != 0; 25 *hptr = 0; 26 } else if (count >= 64) { 27 if (count == 64) { 28 *lptr = *hptr | (*lptr != 0); 29 } else { 30 low = *lptr; 31 *lptr = *hptr >> (count - 64); 32 *lptr |= (*hptr << (128 - count)) != 0 || low != 0; 33 } 34 *hptr = 0; 35 } else { 36 low = *lptr; 37 *lptr = low >> count | *hptr << (64 - count); 38 *lptr |= (low << (64 - count)) != 0; 39 *hptr = *hptr >> count; 40 } 41 } 42 43 static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, 44 union ieee754dp y, enum maddf_flags flags) 45 { 46 int re; 47 int rs; 48 unsigned int lxm; 49 unsigned int hxm; 50 unsigned int lym; 51 unsigned int hym; 52 u64 lrm; 53 u64 hrm; 54 u64 lzm; 55 u64 hzm; 56 u64 t; 57 u64 at; 58 int s; 59 60 COMPXDP; 61 COMPYDP; 62 COMPZDP; 63 64 EXPLODEXDP; 65 EXPLODEYDP; 66 EXPLODEZDP; 67 68 FLUSHXDP; 69 FLUSHYDP; 70 FLUSHZDP; 71 72 ieee754_clearcx(); 73 74 /* 75 * Handle the cases when at least one of x, y or z is a NaN. 76 * Order of precedence is sNaN, qNaN and z, x, y. 77 */ 78 if (zc == IEEE754_CLASS_SNAN) 79 return ieee754dp_nanxcpt(z); 80 if (xc == IEEE754_CLASS_SNAN) 81 return ieee754dp_nanxcpt(x); 82 if (yc == IEEE754_CLASS_SNAN) 83 return ieee754dp_nanxcpt(y); 84 if (zc == IEEE754_CLASS_QNAN) 85 return z; 86 if (xc == IEEE754_CLASS_QNAN) 87 return x; 88 if (yc == IEEE754_CLASS_QNAN) 89 return y; 90 91 if (zc == IEEE754_CLASS_DNORM) 92 DPDNORMZ; 93 /* ZERO z cases are handled separately below */ 94 95 switch (CLPAIR(xc, yc)) { 96 97 /* 98 * Infinity handling 99 */ 100 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 101 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 102 ieee754_setcx(IEEE754_INVALID_OPERATION); 103 return ieee754dp_indef(); 104 105 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 106 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 107 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 108 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 109 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 110 if ((zc == IEEE754_CLASS_INF) && 111 ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || 112 ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { 113 /* 114 * Cases of addition of infinities with opposite signs 115 * or subtraction of infinities with same signs. 116 */ 117 ieee754_setcx(IEEE754_INVALID_OPERATION); 118 return ieee754dp_indef(); 119 } 120 /* 121 * z is here either not an infinity, or an infinity having the 122 * same sign as product (x*y) (in case of MADDF.D instruction) 123 * or product -(x*y) (in MSUBF.D case). The result must be an 124 * infinity, and its sign is determined only by the value of 125 * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. 126 */ 127 if (flags & MADDF_NEGATE_PRODUCT) 128 return ieee754dp_inf(1 ^ (xs ^ ys)); 129 else 130 return ieee754dp_inf(xs ^ ys); 131 132 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 133 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 134 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 135 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 136 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 137 if (zc == IEEE754_CLASS_INF) 138 return ieee754dp_inf(zs); 139 if (zc == IEEE754_CLASS_ZERO) { 140 /* Handle cases +0 + (-0) and similar ones. */ 141 if ((!(flags & MADDF_NEGATE_PRODUCT) 142 && (zs == (xs ^ ys))) || 143 ((flags & MADDF_NEGATE_PRODUCT) 144 && (zs != (xs ^ ys)))) 145 /* 146 * Cases of addition of zeros of equal signs 147 * or subtraction of zeroes of opposite signs. 148 * The sign of the resulting zero is in any 149 * such case determined only by the sign of z. 150 */ 151 return z; 152 153 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 154 } 155 /* x*y is here 0, and z is not 0, so just return z */ 156 return z; 157 158 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 159 DPDNORMX; 160 /* fall through */ 161 162 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 163 if (zc == IEEE754_CLASS_INF) 164 return ieee754dp_inf(zs); 165 DPDNORMY; 166 break; 167 168 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 169 if (zc == IEEE754_CLASS_INF) 170 return ieee754dp_inf(zs); 171 DPDNORMX; 172 break; 173 174 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 175 if (zc == IEEE754_CLASS_INF) 176 return ieee754dp_inf(zs); 177 /* continue to real computations */ 178 } 179 180 /* Finally get to do some computation */ 181 182 /* 183 * Do the multiplication bit first 184 * 185 * rm = xm * ym, re = xe + ye basically 186 * 187 * At this point xm and ym should have been normalized. 188 */ 189 assert(xm & DP_HIDDEN_BIT); 190 assert(ym & DP_HIDDEN_BIT); 191 192 re = xe + ye; 193 rs = xs ^ ys; 194 if (flags & MADDF_NEGATE_PRODUCT) 195 rs ^= 1; 196 197 /* shunt to top of word */ 198 xm <<= 64 - (DP_FBITS + 1); 199 ym <<= 64 - (DP_FBITS + 1); 200 201 /* 202 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. 203 */ 204 205 lxm = xm; 206 hxm = xm >> 32; 207 lym = ym; 208 hym = ym >> 32; 209 210 lrm = DPXMULT(lxm, lym); 211 hrm = DPXMULT(hxm, hym); 212 213 t = DPXMULT(lxm, hym); 214 215 at = lrm + (t << 32); 216 hrm += at < lrm; 217 lrm = at; 218 219 hrm = hrm + (t >> 32); 220 221 t = DPXMULT(hxm, lym); 222 223 at = lrm + (t << 32); 224 hrm += at < lrm; 225 lrm = at; 226 227 hrm = hrm + (t >> 32); 228 229 /* Put explicit bit at bit 126 if necessary */ 230 if ((int64_t)hrm < 0) { 231 lrm = (hrm << 63) | (lrm >> 1); 232 hrm = hrm >> 1; 233 re++; 234 } 235 236 assert(hrm & (1 << 62)); 237 238 if (zc == IEEE754_CLASS_ZERO) { 239 /* 240 * Move explicit bit from bit 126 to bit 55 since the 241 * ieee754dp_format code expects the mantissa to be 242 * 56 bits wide (53 + 3 rounding bits). 243 */ 244 srl128(&hrm, &lrm, (126 - 55)); 245 return ieee754dp_format(rs, re, lrm); 246 } 247 248 /* Move explicit bit from bit 52 to bit 126 */ 249 lzm = 0; 250 hzm = zm << 10; 251 assert(hzm & (1 << 62)); 252 253 /* Make the exponents the same */ 254 if (ze > re) { 255 /* 256 * Have to shift y fraction right to align. 257 */ 258 s = ze - re; 259 srl128(&hrm, &lrm, s); 260 re += s; 261 } else if (re > ze) { 262 /* 263 * Have to shift x fraction right to align. 264 */ 265 s = re - ze; 266 srl128(&hzm, &lzm, s); 267 ze += s; 268 } 269 assert(ze == re); 270 assert(ze <= DP_EMAX); 271 272 /* Do the addition */ 273 if (zs == rs) { 274 /* 275 * Generate 128 bit result by adding two 127 bit numbers 276 * leaving result in hzm:lzm, zs and ze. 277 */ 278 hzm = hzm + hrm + (lzm > (lzm + lrm)); 279 lzm = lzm + lrm; 280 if ((int64_t)hzm < 0) { /* carry out */ 281 srl128(&hzm, &lzm, 1); 282 ze++; 283 } 284 } else { 285 if (hzm > hrm || (hzm == hrm && lzm >= lrm)) { 286 hzm = hzm - hrm - (lzm < lrm); 287 lzm = lzm - lrm; 288 } else { 289 hzm = hrm - hzm - (lrm < lzm); 290 lzm = lrm - lzm; 291 zs = rs; 292 } 293 if (lzm == 0 && hzm == 0) 294 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 295 296 /* 297 * Put explicit bit at bit 126 if necessary. 298 */ 299 if (hzm == 0) { 300 /* left shift by 63 or 64 bits */ 301 if ((int64_t)lzm < 0) { 302 /* MSB of lzm is the explicit bit */ 303 hzm = lzm >> 1; 304 lzm = lzm << 63; 305 ze -= 63; 306 } else { 307 hzm = lzm; 308 lzm = 0; 309 ze -= 64; 310 } 311 } 312 313 t = 0; 314 while ((hzm >> (62 - t)) == 0) 315 t++; 316 317 assert(t <= 62); 318 if (t) { 319 hzm = hzm << t | lzm >> (64 - t); 320 lzm = lzm << t; 321 ze -= t; 322 } 323 } 324 325 /* 326 * Move explicit bit from bit 126 to bit 55 since the 327 * ieee754dp_format code expects the mantissa to be 328 * 56 bits wide (53 + 3 rounding bits). 329 */ 330 srl128(&hzm, &lzm, (126 - 55)); 331 332 return ieee754dp_format(zs, ze, lzm); 333 } 334 335 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, 336 union ieee754dp y) 337 { 338 return _dp_maddf(z, x, y, 0); 339 } 340 341 union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, 342 union ieee754dp y) 343 { 344 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 345 } 346