1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * IEEE754 floating point arithmetic 4 * double precision: MADDF.f (Fused Multiply Add) 5 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 6 * 7 * MIPS floating point support 8 * Copyright (C) 2015 Imagination Technologies, Ltd. 9 * Author: Markos Chandras <markos.chandras@imgtec.com> 10 */ 11 12 #include "ieee754dp.h" 13 14 15 /* 128 bits shift right logical with rounding. */ 16 static void srl128(u64 *hptr, u64 *lptr, int count) 17 { 18 u64 low; 19 20 if (count >= 128) { 21 *lptr = *hptr != 0 || *lptr != 0; 22 *hptr = 0; 23 } else if (count >= 64) { 24 if (count == 64) { 25 *lptr = *hptr | (*lptr != 0); 26 } else { 27 low = *lptr; 28 *lptr = *hptr >> (count - 64); 29 *lptr |= (*hptr << (128 - count)) != 0 || low != 0; 30 } 31 *hptr = 0; 32 } else { 33 low = *lptr; 34 *lptr = low >> count | *hptr << (64 - count); 35 *lptr |= (low << (64 - count)) != 0; 36 *hptr = *hptr >> count; 37 } 38 } 39 40 static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, 41 union ieee754dp y, enum maddf_flags flags) 42 { 43 int re; 44 int rs; 45 unsigned int lxm; 46 unsigned int hxm; 47 unsigned int lym; 48 unsigned int hym; 49 u64 lrm; 50 u64 hrm; 51 u64 lzm; 52 u64 hzm; 53 u64 t; 54 u64 at; 55 int s; 56 57 COMPXDP; 58 COMPYDP; 59 COMPZDP; 60 61 EXPLODEXDP; 62 EXPLODEYDP; 63 EXPLODEZDP; 64 65 FLUSHXDP; 66 FLUSHYDP; 67 FLUSHZDP; 68 69 ieee754_clearcx(); 70 71 /* 72 * Handle the cases when at least one of x, y or z is a NaN. 73 * Order of precedence is sNaN, qNaN and z, x, y. 74 */ 75 if (zc == IEEE754_CLASS_SNAN) 76 return ieee754dp_nanxcpt(z); 77 if (xc == IEEE754_CLASS_SNAN) 78 return ieee754dp_nanxcpt(x); 79 if (yc == IEEE754_CLASS_SNAN) 80 return ieee754dp_nanxcpt(y); 81 if (zc == IEEE754_CLASS_QNAN) 82 return z; 83 if (xc == IEEE754_CLASS_QNAN) 84 return x; 85 if (yc == IEEE754_CLASS_QNAN) 86 return y; 87 88 if (zc == IEEE754_CLASS_DNORM) 89 DPDNORMZ; 90 /* ZERO z cases are handled separately below */ 91 92 switch (CLPAIR(xc, yc)) { 93 94 /* 95 * Infinity handling 96 */ 97 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 98 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 99 ieee754_setcx(IEEE754_INVALID_OPERATION); 100 return ieee754dp_indef(); 101 102 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 103 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 104 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 105 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 106 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 107 if ((zc == IEEE754_CLASS_INF) && 108 ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || 109 ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { 110 /* 111 * Cases of addition of infinities with opposite signs 112 * or subtraction of infinities with same signs. 113 */ 114 ieee754_setcx(IEEE754_INVALID_OPERATION); 115 return ieee754dp_indef(); 116 } 117 /* 118 * z is here either not an infinity, or an infinity having the 119 * same sign as product (x*y) (in case of MADDF.D instruction) 120 * or product -(x*y) (in MSUBF.D case). The result must be an 121 * infinity, and its sign is determined only by the value of 122 * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. 123 */ 124 if (flags & MADDF_NEGATE_PRODUCT) 125 return ieee754dp_inf(1 ^ (xs ^ ys)); 126 else 127 return ieee754dp_inf(xs ^ ys); 128 129 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 130 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 131 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 132 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 133 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 134 if (zc == IEEE754_CLASS_INF) 135 return ieee754dp_inf(zs); 136 if (zc == IEEE754_CLASS_ZERO) { 137 /* Handle cases +0 + (-0) and similar ones. */ 138 if ((!(flags & MADDF_NEGATE_PRODUCT) 139 && (zs == (xs ^ ys))) || 140 ((flags & MADDF_NEGATE_PRODUCT) 141 && (zs != (xs ^ ys)))) 142 /* 143 * Cases of addition of zeros of equal signs 144 * or subtraction of zeroes of opposite signs. 145 * The sign of the resulting zero is in any 146 * such case determined only by the sign of z. 147 */ 148 return z; 149 150 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 151 } 152 /* x*y is here 0, and z is not 0, so just return z */ 153 return z; 154 155 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 156 DPDNORMX; 157 /* fall through */ 158 159 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 160 if (zc == IEEE754_CLASS_INF) 161 return ieee754dp_inf(zs); 162 DPDNORMY; 163 break; 164 165 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 166 if (zc == IEEE754_CLASS_INF) 167 return ieee754dp_inf(zs); 168 DPDNORMX; 169 break; 170 171 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 172 if (zc == IEEE754_CLASS_INF) 173 return ieee754dp_inf(zs); 174 /* continue to real computations */ 175 } 176 177 /* Finally get to do some computation */ 178 179 /* 180 * Do the multiplication bit first 181 * 182 * rm = xm * ym, re = xe + ye basically 183 * 184 * At this point xm and ym should have been normalized. 185 */ 186 assert(xm & DP_HIDDEN_BIT); 187 assert(ym & DP_HIDDEN_BIT); 188 189 re = xe + ye; 190 rs = xs ^ ys; 191 if (flags & MADDF_NEGATE_PRODUCT) 192 rs ^= 1; 193 194 /* shunt to top of word */ 195 xm <<= 64 - (DP_FBITS + 1); 196 ym <<= 64 - (DP_FBITS + 1); 197 198 /* 199 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. 200 */ 201 202 lxm = xm; 203 hxm = xm >> 32; 204 lym = ym; 205 hym = ym >> 32; 206 207 lrm = DPXMULT(lxm, lym); 208 hrm = DPXMULT(hxm, hym); 209 210 t = DPXMULT(lxm, hym); 211 212 at = lrm + (t << 32); 213 hrm += at < lrm; 214 lrm = at; 215 216 hrm = hrm + (t >> 32); 217 218 t = DPXMULT(hxm, lym); 219 220 at = lrm + (t << 32); 221 hrm += at < lrm; 222 lrm = at; 223 224 hrm = hrm + (t >> 32); 225 226 /* Put explicit bit at bit 126 if necessary */ 227 if ((int64_t)hrm < 0) { 228 lrm = (hrm << 63) | (lrm >> 1); 229 hrm = hrm >> 1; 230 re++; 231 } 232 233 assert(hrm & (1 << 62)); 234 235 if (zc == IEEE754_CLASS_ZERO) { 236 /* 237 * Move explicit bit from bit 126 to bit 55 since the 238 * ieee754dp_format code expects the mantissa to be 239 * 56 bits wide (53 + 3 rounding bits). 240 */ 241 srl128(&hrm, &lrm, (126 - 55)); 242 return ieee754dp_format(rs, re, lrm); 243 } 244 245 /* Move explicit bit from bit 52 to bit 126 */ 246 lzm = 0; 247 hzm = zm << 10; 248 assert(hzm & (1 << 62)); 249 250 /* Make the exponents the same */ 251 if (ze > re) { 252 /* 253 * Have to shift y fraction right to align. 254 */ 255 s = ze - re; 256 srl128(&hrm, &lrm, s); 257 re += s; 258 } else if (re > ze) { 259 /* 260 * Have to shift x fraction right to align. 261 */ 262 s = re - ze; 263 srl128(&hzm, &lzm, s); 264 ze += s; 265 } 266 assert(ze == re); 267 assert(ze <= DP_EMAX); 268 269 /* Do the addition */ 270 if (zs == rs) { 271 /* 272 * Generate 128 bit result by adding two 127 bit numbers 273 * leaving result in hzm:lzm, zs and ze. 274 */ 275 hzm = hzm + hrm + (lzm > (lzm + lrm)); 276 lzm = lzm + lrm; 277 if ((int64_t)hzm < 0) { /* carry out */ 278 srl128(&hzm, &lzm, 1); 279 ze++; 280 } 281 } else { 282 if (hzm > hrm || (hzm == hrm && lzm >= lrm)) { 283 hzm = hzm - hrm - (lzm < lrm); 284 lzm = lzm - lrm; 285 } else { 286 hzm = hrm - hzm - (lrm < lzm); 287 lzm = lrm - lzm; 288 zs = rs; 289 } 290 if (lzm == 0 && hzm == 0) 291 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); 292 293 /* 294 * Put explicit bit at bit 126 if necessary. 295 */ 296 if (hzm == 0) { 297 /* left shift by 63 or 64 bits */ 298 if ((int64_t)lzm < 0) { 299 /* MSB of lzm is the explicit bit */ 300 hzm = lzm >> 1; 301 lzm = lzm << 63; 302 ze -= 63; 303 } else { 304 hzm = lzm; 305 lzm = 0; 306 ze -= 64; 307 } 308 } 309 310 t = 0; 311 while ((hzm >> (62 - t)) == 0) 312 t++; 313 314 assert(t <= 62); 315 if (t) { 316 hzm = hzm << t | lzm >> (64 - t); 317 lzm = lzm << t; 318 ze -= t; 319 } 320 } 321 322 /* 323 * Move explicit bit from bit 126 to bit 55 since the 324 * ieee754dp_format code expects the mantissa to be 325 * 56 bits wide (53 + 3 rounding bits). 326 */ 327 srl128(&hzm, &lzm, (126 - 55)); 328 329 return ieee754dp_format(zs, ze, lzm); 330 } 331 332 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, 333 union ieee754dp y) 334 { 335 return _dp_maddf(z, x, y, 0); 336 } 337 338 union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, 339 union ieee754dp y) 340 { 341 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 342 } 343