1 /* 2 * IEEE754 floating point arithmetic 3 * single precision: MADDF.f (Fused Multiply Add) 4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft]) 5 * 6 * MIPS floating point support 7 * Copyright (C) 2015 Imagination Technologies, Ltd. 8 * Author: Markos Chandras <markos.chandras@imgtec.com> 9 * 10 * This program is free software; you can distribute it and/or modify it 11 * under the terms of the GNU General Public License as published by the 12 * Free Software Foundation; version 2 of the License. 13 */ 14 15 #include "ieee754sp.h" 16 17 enum maddf_flags { 18 maddf_negate_product = 1 << 0, 19 }; 20 21 static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, 22 union ieee754sp y, enum maddf_flags flags) 23 { 24 int re; 25 int rs; 26 unsigned rm; 27 unsigned short lxm; 28 unsigned short hxm; 29 unsigned short lym; 30 unsigned short hym; 31 unsigned lrm; 32 unsigned hrm; 33 unsigned t; 34 unsigned at; 35 int s; 36 37 COMPXSP; 38 COMPYSP; 39 COMPZSP; 40 41 EXPLODEXSP; 42 EXPLODEYSP; 43 EXPLODEZSP; 44 45 FLUSHXSP; 46 FLUSHYSP; 47 FLUSHZSP; 48 49 ieee754_clearcx(); 50 51 switch (zc) { 52 case IEEE754_CLASS_SNAN: 53 ieee754_setcx(IEEE754_INVALID_OPERATION); 54 return ieee754sp_nanxcpt(z); 55 case IEEE754_CLASS_DNORM: 56 SPDNORMZ; 57 /* QNAN and ZERO cases are handled separately below */ 58 } 59 60 switch (CLPAIR(xc, yc)) { 61 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): 62 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): 63 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): 64 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): 65 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): 66 return ieee754sp_nanxcpt(y); 67 68 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): 69 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): 70 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): 71 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): 72 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): 73 case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): 74 return ieee754sp_nanxcpt(x); 75 76 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): 77 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): 78 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): 79 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): 80 return y; 81 82 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): 83 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): 84 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): 85 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): 86 case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): 87 return x; 88 89 /* 90 * Infinity handling 91 */ 92 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): 93 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): 94 if (zc == IEEE754_CLASS_QNAN) 95 return z; 96 ieee754_setcx(IEEE754_INVALID_OPERATION); 97 return ieee754sp_indef(); 98 99 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): 100 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): 101 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): 102 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): 103 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): 104 if (zc == IEEE754_CLASS_QNAN) 105 return z; 106 return ieee754sp_inf(xs ^ ys); 107 108 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): 109 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): 110 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): 111 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): 112 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): 113 if (zc == IEEE754_CLASS_INF) 114 return ieee754sp_inf(zs); 115 /* Multiplication is 0 so just return z */ 116 return z; 117 118 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): 119 SPDNORMX; 120 121 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): 122 if (zc == IEEE754_CLASS_QNAN) 123 return z; 124 else if (zc == IEEE754_CLASS_INF) 125 return ieee754sp_inf(zs); 126 SPDNORMY; 127 break; 128 129 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): 130 if (zc == IEEE754_CLASS_QNAN) 131 return z; 132 else if (zc == IEEE754_CLASS_INF) 133 return ieee754sp_inf(zs); 134 SPDNORMX; 135 break; 136 137 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): 138 if (zc == IEEE754_CLASS_QNAN) 139 return z; 140 else if (zc == IEEE754_CLASS_INF) 141 return ieee754sp_inf(zs); 142 /* fall through to real computations */ 143 } 144 145 /* Finally get to do some computation */ 146 147 /* 148 * Do the multiplication bit first 149 * 150 * rm = xm * ym, re = xe + ye basically 151 * 152 * At this point xm and ym should have been normalized. 153 */ 154 155 /* rm = xm * ym, re = xe+ye basically */ 156 assert(xm & SP_HIDDEN_BIT); 157 assert(ym & SP_HIDDEN_BIT); 158 159 re = xe + ye; 160 rs = xs ^ ys; 161 if (flags & maddf_negate_product) 162 rs ^= 1; 163 164 /* shunt to top of word */ 165 xm <<= 32 - (SP_FBITS + 1); 166 ym <<= 32 - (SP_FBITS + 1); 167 168 /* 169 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. 170 */ 171 lxm = xm & 0xffff; 172 hxm = xm >> 16; 173 lym = ym & 0xffff; 174 hym = ym >> 16; 175 176 lrm = lxm * lym; /* 16 * 16 => 32 */ 177 hrm = hxm * hym; /* 16 * 16 => 32 */ 178 179 t = lxm * hym; /* 16 * 16 => 32 */ 180 at = lrm + (t << 16); 181 hrm += at < lrm; 182 lrm = at; 183 hrm = hrm + (t >> 16); 184 185 t = hxm * lym; /* 16 * 16 => 32 */ 186 at = lrm + (t << 16); 187 hrm += at < lrm; 188 lrm = at; 189 hrm = hrm + (t >> 16); 190 191 rm = hrm | (lrm != 0); 192 193 /* 194 * Sticky shift down to normal rounding precision. 195 */ 196 if ((int) rm < 0) { 197 rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | 198 ((rm << (SP_FBITS + 1 + 3)) != 0); 199 re++; 200 } else { 201 rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | 202 ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); 203 } 204 assert(rm & (SP_HIDDEN_BIT << 3)); 205 206 if (zc == IEEE754_CLASS_ZERO) 207 return ieee754sp_format(rs, re, rm); 208 209 /* And now the addition */ 210 211 assert(zm & SP_HIDDEN_BIT); 212 213 /* 214 * Provide guard,round and stick bit space. 215 */ 216 zm <<= 3; 217 218 if (ze > re) { 219 /* 220 * Have to shift r fraction right to align. 221 */ 222 s = ze - re; 223 rm = XSPSRS(rm, s); 224 re += s; 225 } else if (re > ze) { 226 /* 227 * Have to shift z fraction right to align. 228 */ 229 s = re - ze; 230 zm = XSPSRS(zm, s); 231 ze += s; 232 } 233 assert(ze == re); 234 assert(ze <= SP_EMAX); 235 236 if (zs == rs) { 237 /* 238 * Generate 28 bit result of adding two 27 bit numbers 239 * leaving result in zm, zs and ze. 240 */ 241 zm = zm + rm; 242 243 if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ 244 zm = XSPSRS1(zm); 245 ze++; 246 } 247 } else { 248 if (zm >= rm) { 249 zm = zm - rm; 250 } else { 251 zm = rm - zm; 252 zs = rs; 253 } 254 if (zm == 0) 255 return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); 256 257 /* 258 * Normalize in extended single precision 259 */ 260 while ((zm >> (SP_MBITS + 3)) == 0) { 261 zm <<= 1; 262 ze--; 263 } 264 265 } 266 return ieee754sp_format(zs, ze, zm); 267 } 268 269 union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, 270 union ieee754sp y) 271 { 272 return _sp_maddf(z, x, y, 0); 273 } 274 275 union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, 276 union ieee754sp y) 277 { 278 return _sp_maddf(z, x, y, maddf_negate_product); 279 } 280