1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * IEEE754 floating point arithmetic 4 * single precision: MADDF.f (Fused Multiply Add) 5 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 6 * 7 * MIPS floating point support 8 * Copyright (C) 2015 Imagination Technologies, Ltd. 9 * Author: Markos Chandras <markos.chandras@imgtec.com> 10 */ 11 12 #include "ieee754sp.h" 13 14 15 static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, 16 union ieee754sp y, enum maddf_flags flags) 17 { 18 int re; 19 int rs; 20 unsigned int rm; 21 u64 rm64; 22 u64 zm64; 23 int s; 24 25 COMPXSP; 26 COMPYSP; 27 COMPZSP; 28 29 EXPLODEXSP; 30 EXPLODEYSP; 31 EXPLODEZSP; 32 33 FLUSHXSP; 34 FLUSHYSP; 35 FLUSHZSP; 36 37 ieee754_clearcx(); 38 39 rs = xs ^ ys; 40 if (flags & MADDF_NEGATE_PRODUCT) 41 rs ^= 1; 42 if (flags & MADDF_NEGATE_ADDITION) 43 zs ^= 1; 44 45 /* 46 * Handle the cases when at least one of x, y or z is a NaN. 47 * Order of precedence is sNaN, qNaN and z, x, y. 48 */ 49 if (zc == IEEE754_CLASS_SNAN) 50 return ieee754sp_nanxcpt(z); 51 if (xc == IEEE754_CLASS_SNAN) 52 return ieee754sp_nanxcpt(x); 53 if (yc == IEEE754_CLASS_SNAN) 54 return ieee754sp_nanxcpt(y); 55 if (zc == IEEE754_CLASS_QNAN) 56 return z; 57 if (xc == IEEE754_CLASS_QNAN) 58 return x; 59 if (yc == IEEE754_CLASS_QNAN) 60 return y; 61 62 if (zc == IEEE754_CLASS_DNORM) 63 SPDNORMZ; 64 /* ZERO z cases are handled separately below */ 65 66 switch (CLPAIR(xc, yc)) { 67 68 69 /* 70 * Infinity handling 71 */ 72 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 73 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 74 ieee754_setcx(IEEE754_INVALID_OPERATION); 75 return ieee754sp_indef(); 76 77 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 78 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 79 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 80 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 81 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 82 if ((zc == IEEE754_CLASS_INF) && (zs != rs)) { 83 /* 84 * Cases of addition of infinities with opposite signs 85 * or subtraction of infinities with same signs. 86 */ 87 ieee754_setcx(IEEE754_INVALID_OPERATION); 88 return ieee754sp_indef(); 89 } 90 /* 91 * z is here either not an infinity, or an infinity having the 92 * same sign as product (x*y). The result must be an infinity, 93 * and its sign is determined only by the sign of product (x*y). 94 */ 95 return ieee754sp_inf(rs); 96 97 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 98 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 99 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 100 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 101 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 102 if (zc == IEEE754_CLASS_INF) 103 return ieee754sp_inf(zs); 104 if (zc == IEEE754_CLASS_ZERO) { 105 /* Handle cases +0 + (-0) and similar ones. */ 106 if (zs == rs) 107 /* 108 * Cases of addition of zeros of equal signs 109 * or subtraction of zeroes of opposite signs. 110 * The sign of the resulting zero is in any 111 * such case determined only by the sign of z. 112 */ 113 return z; 114 115 return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); 116 } 117 /* x*y is here 0, and z is not 0, so just return z */ 118 return z; 119 120 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 121 SPDNORMX; 122 /* fall through */ 123 124 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 125 if (zc == IEEE754_CLASS_INF) 126 return ieee754sp_inf(zs); 127 SPDNORMY; 128 break; 129 130 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 131 if (zc == IEEE754_CLASS_INF) 132 return ieee754sp_inf(zs); 133 SPDNORMX; 134 break; 135 136 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 137 if (zc == IEEE754_CLASS_INF) 138 return ieee754sp_inf(zs); 139 /* continue to real computations */ 140 } 141 142 /* Finally get to do some computation */ 143 144 /* 145 * Do the multiplication bit first 146 * 147 * rm = xm * ym, re = xe + ye basically 148 * 149 * At this point xm and ym should have been normalized. 150 */ 151 152 /* rm = xm * ym, re = xe+ye basically */ 153 assert(xm & SP_HIDDEN_BIT); 154 assert(ym & SP_HIDDEN_BIT); 155 156 re = xe + ye; 157 158 /* Multiple 24 bit xm and ym to give 48 bit results */ 159 rm64 = (uint64_t)xm * ym; 160 161 /* Shunt to top of word */ 162 rm64 = rm64 << 16; 163 164 /* Put explicit bit at bit 62 if necessary */ 165 if ((int64_t) rm64 < 0) { 166 rm64 = rm64 >> 1; 167 re++; 168 } 169 170 assert(rm64 & (1 << 62)); 171 172 if (zc == IEEE754_CLASS_ZERO) { 173 /* 174 * Move explicit bit from bit 62 to bit 26 since the 175 * ieee754sp_format code expects the mantissa to be 176 * 27 bits wide (24 + 3 rounding bits). 177 */ 178 rm = XSPSRS64(rm64, (62 - 26)); 179 return ieee754sp_format(rs, re, rm); 180 } 181 182 /* Move explicit bit from bit 23 to bit 62 */ 183 zm64 = (uint64_t)zm << (62 - 23); 184 assert(zm64 & (1 << 62)); 185 186 /* Make the exponents the same */ 187 if (ze > re) { 188 /* 189 * Have to shift r fraction right to align. 190 */ 191 s = ze - re; 192 rm64 = XSPSRS64(rm64, s); 193 re += s; 194 } else if (re > ze) { 195 /* 196 * Have to shift z fraction right to align. 197 */ 198 s = re - ze; 199 zm64 = XSPSRS64(zm64, s); 200 ze += s; 201 } 202 assert(ze == re); 203 assert(ze <= SP_EMAX); 204 205 /* Do the addition */ 206 if (zs == rs) { 207 /* 208 * Generate 64 bit result by adding two 63 bit numbers 209 * leaving result in zm64, zs and ze. 210 */ 211 zm64 = zm64 + rm64; 212 if ((int64_t)zm64 < 0) { /* carry out */ 213 zm64 = XSPSRS1(zm64); 214 ze++; 215 } 216 } else { 217 if (zm64 >= rm64) { 218 zm64 = zm64 - rm64; 219 } else { 220 zm64 = rm64 - zm64; 221 zs = rs; 222 } 223 if (zm64 == 0) 224 return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); 225 226 /* 227 * Put explicit bit at bit 62 if necessary. 228 */ 229 while ((zm64 >> 62) == 0) { 230 zm64 <<= 1; 231 ze--; 232 } 233 } 234 235 /* 236 * Move explicit bit from bit 62 to bit 26 since the 237 * ieee754sp_format code expects the mantissa to be 238 * 27 bits wide (24 + 3 rounding bits). 239 */ 240 zm = XSPSRS64(zm64, (62 - 26)); 241 242 return ieee754sp_format(zs, ze, zm); 243 } 244 245 union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, 246 union ieee754sp y) 247 { 248 return _sp_maddf(z, x, y, 0); 249 } 250 251 union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, 252 union ieee754sp y) 253 { 254 return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 255 } 256 257 union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x, 258 union ieee754sp y) 259 { 260 return _sp_maddf(z, x, y, 0); 261 } 262 263 union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x, 264 union ieee754sp y) 265 { 266 return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION); 267 } 268 269 union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x, 270 union ieee754sp y) 271 { 272 return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION); 273 } 274 275 union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x, 276 union ieee754sp y) 277 { 278 return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); 279 } 280