1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * IEEE754 floating point arithmetic 4 * double precision: MADDF.f (Fused Multiply Add) 5 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 6 * 7 * MIPS floating point support 8 * Copyright (C) 2015 Imagination Technologies, Ltd. 9 * Author: Markos Chandras <markos.chandras@imgtec.com> 10 */ 11 12 #include "ieee754dp.h" 13 14 15 /* 128 bits shift right logical with rounding. */ 16 static void srl128(u64 *hptr, u64 *lptr, int count) 17 { 18 u64 low; 19 20 if (count >= 128) { 21 *lptr = *hptr != 0 || *lptr != 0; 22 *hptr = 0; 23 } else if (count >= 64) { 24 if (count == 64) { 25 *lptr = *hptr | (*lptr != 0); 26 } else { 27 low = *lptr; 28 *lptr = *hptr >> (count - 64); 29 *lptr |= (*hptr << (128 - count)) != 0 || low != 0; 30 } 31 *hptr = 0; 32 } else { 33 low = *lptr; 34 *lptr = low >> count | *hptr << (64 - count); 35 *lptr |= (low << (64 - count)) != 0; 36 *hptr = *hptr >> count; 37 } 38 } 39 40 static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, 41 union ieee754dp y, enum maddf_flags flags) 42 { 43 int re; 44 int rs; 45 unsigned int lxm; 46 unsigned int hxm; 47 unsigned int lym; 48 unsigned int hym; 49 u64 lrm; 50 u64 hrm; 51 u64 lzm; 52 u64 hzm; 53 u64 t; 54 u64 at; 55 int s; 56 57 COMPXDP; 58 COMPYDP; 59 COMPZDP; 60 61 EXPLODEXDP; 62 EXPLODEYDP; 63 EXPLODEZDP; 64 65 FLUSHXDP; 66 FLUSHYDP; 67 FLUSHZDP; 68 69 ieee754_clearcx(); 70 71 rs = xs ^ ys; 72 if (flags & MADDF_NEGATE_PRODUCT) 73 rs ^= 1; 74 if (flags & MADDF_NEGATE_ADDITION) 75 zs ^= 1; 76 77 /* 78 * Handle the cases when at least one of x, y or z is a NaN. 79 * Order of precedence is sNaN, qNaN and z, x, y. 80 */ 81 if (zc == IEEE754_CLASS_SNAN) 82 return ieee754dp_nanxcpt(z); 83 if (xc == IEEE754_CLASS_SNAN) 84 return ieee754dp_nanxcpt(x); 85 if (yc == IEEE754_CLASS_SNAN) 86 return ieee754dp_nanxcpt(y); 87 if (zc == IEEE754_CLASS_QNAN) 88 return z; 89 if (xc == IEEE754_CLASS_QNAN) 90 return x; 91 if (yc == IEEE754_CLASS_QNAN) 92 return y; 93 94 if (zc == IEEE754_CLASS_DNORM) 95 DPDNORMZ; 96 /* ZERO z cases are handled separately below */ 97 98 switch (CLPAIR(xc, yc)) { 99 100 /* 101 * Infinity handling 102 */ 103 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 104 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 105 ieee754_setcx(IEEE754_INVALID_OPERATION); 106 return ieee754dp_indef(); 107 108 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 109 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 110 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 111 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 112 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 113 if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { 114 /* 115 * Cases of addition of infinities with opposite signs 116 * or subtraction of infinities with same signs. 117 */ 118 ieee754_setcx(IEEE754_INVALID_OPERATION); 119 return ieee754dp_indef(); 120 } 121 /* 122 * z is here either not an infinity, or an infinity having the 123 * same sign as product (x*y). The result must be an infinity, 124 * and its sign is determined only by the sign of product (x*y). 125 */ 126 return ieee754dp_inf(rs); 127 128 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 129 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 130 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 131 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 132 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 133 if (zc == IEEE754_CLASS_INF) 134 return ieee754dp_inf(zs); 135 if (zc == IEEE754_CLASS_ZERO) { 136 /* Handle cases +0 + (-0) and similar ones. */ 137 if (zs == rs) 138 /* 139 * Cases of addition of zeros of equal signs 140 * or subtraction of zeroes of opposite signs. 141 * The sign of the resulting zero is in any 142 * such case determined only by the sign of z. 143 */ 144 return z; 145 146 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 147 } 148 /* x*y is here 0, and z is not 0, so just return z */ 149 return z; 150 151 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 152 DPDNORMX; 153 /* fall through */ 154 155 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 156 if (zc == IEEE754_CLASS_INF) 157 return ieee754dp_inf(zs); 158 DPDNORMY; 159 break; 160 161 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 162 if (zc == IEEE754_CLASS_INF) 163 return ieee754dp_inf(zs); 164 DPDNORMX; 165 break; 166 167 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 168 if (zc == IEEE754_CLASS_INF) 169 return ieee754dp_inf(zs); 170 /* continue to real computations */ 171 } 172 173 /* Finally get to do some computation */ 174 175 /* 176 * Do the multiplication bit first 177 * 178 * rm = xm * ym, re = xe + ye basically 179 * 180 * At this point xm and ym should have been normalized. 181 */ 182 assert(xm & DP_HIDDEN_BIT); 183 assert(ym & DP_HIDDEN_BIT); 184 185 re = xe + ye; 186 187 /* shunt to top of word */ 188 xm <<= 64 - (DP_FBITS + 1); 189 ym <<= 64 - (DP_FBITS + 1); 190 191 /* 192 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. 193 */ 194 195 lxm = xm; 196 hxm = xm >> 32; 197 lym = ym; 198 hym = ym >> 32; 199 200 lrm = DPXMULT(lxm, lym); 201 hrm = DPXMULT(hxm, hym); 202 203 t = DPXMULT(lxm, hym); 204 205 at = lrm + (t << 32); 206 hrm += at < lrm; 207 lrm = at; 208 209 hrm = hrm + (t >> 32); 210 211 t = DPXMULT(hxm, lym); 212 213 at = lrm + (t << 32); 214 hrm += at < lrm; 215 lrm = at; 216 217 hrm = hrm + (t >> 32); 218 219 /* Put explicit bit at bit 126 if necessary */ 220 if ((int64_t)hrm < 0) { 221 lrm = (hrm << 63) | (lrm >> 1); 222 hrm = hrm >> 1; 223 re++; 224 } 225 226 assert(hrm & (1 << 62)); 227 228 if (zc == IEEE754_CLASS_ZERO) { 229 /* 230 * Move explicit bit from bit 126 to bit 55 since the 231 * ieee754dp_format code expects the mantissa to be 232 * 56 bits wide (53 + 3 rounding bits). 233 */ 234 srl128(&hrm, &lrm, (126 - 55)); 235 return ieee754dp_format(rs, re, lrm); 236 } 237 238 /* Move explicit bit from bit 52 to bit 126 */ 239 lzm = 0; 240 hzm = zm << 10; 241 assert(hzm & (1 << 62)); 242 243 /* Make the exponents the same */ 244 if (ze > re) { 245 /* 246 * Have to shift y fraction right to align. 247 */ 248 s = ze - re; 249 srl128(&hrm, &lrm, s); 250 re += s; 251 } else if (re > ze) { 252 /* 253 * Have to shift x fraction right to align. 254 */ 255 s = re - ze; 256 srl128(&hzm, &lzm, s); 257 ze += s; 258 } 259 assert(ze == re); 260 assert(ze <= DP_EMAX); 261 262 /* Do the addition */ 263 if (zs == rs) { 264 /* 265 * Generate 128 bit result by adding two 127 bit numbers 266 * leaving result in hzm:lzm, zs and ze. 267 */ 268 hzm = hzm + hrm + (lzm > (lzm + lrm)); 269 lzm = lzm + lrm; 270 if ((int64_t)hzm < 0) { /* carry out */ 271 srl128(&hzm, &lzm, 1); 272 ze++; 273 } 274 } else { 275 if (hzm > hrm || (hzm == hrm && lzm >= lrm)) { 276 hzm = hzm - hrm - (lzm < lrm); 277 lzm = lzm - lrm; 278 } else { 279 hzm = hrm - hzm - (lrm < lzm); 280 lzm = lrm - lzm; 281 zs = rs; 282 } 283 if (lzm == 0 && hzm == 0) 284 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 285 286 /* 287 * Put explicit bit at bit 126 if necessary. 288 */ 289 if (hzm == 0) { 290 /* left shift by 63 or 64 bits */ 291 if ((int64_t)lzm < 0) { 292 /* MSB of lzm is the explicit bit */ 293 hzm = lzm >> 1; 294 lzm = lzm << 63; 295 ze -= 63; 296 } else { 297 hzm = lzm; 298 lzm = 0; 299 ze -= 64; 300 } 301 } 302 303 t = 0; 304 while ((hzm >> (62 - t)) == 0) 305 t++; 306 307 assert(t <= 62); 308 if (t) { 309 hzm = hzm << t | lzm >> (64 - t); 310 lzm = lzm << t; 311 ze -= t; 312 } 313 } 314 315 /* 316 * Move explicit bit from bit 126 to bit 55 since the 317 * ieee754dp_format code expects the mantissa to be 318 * 56 bits wide (53 + 3 rounding bits). 319 */ 320 srl128(&hzm, &lzm, (126 - 55)); 321 322 return ieee754dp_format(zs, ze, lzm); 323 } 324 325 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, 326 union ieee754dp y) 327 { 328 return _dp_maddf(z, x, y, 0); 329 } 330 331 union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, 332 union ieee754dp y) 333 { 334 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 335 } 336 337 union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x, 338 union ieee754dp y) 339 { 340 return _dp_maddf(z, x, y, 0); 341 } 342 343 union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x, 344 union ieee754dp y) 345 { 346 return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION); 347 } 348 349 union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x, 350 union ieee754dp y) 351 { 352 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); 353 } 354 355 union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x, 356 union ieee754dp y) 357 { 358 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 359 } 360