xref: /openbmc/qemu/target/hexagon/fma_emu.c (revision 42fe7499)
1 /*
2  *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "qemu/osdep.h"
19 #include "qemu/int128.h"
20 #include "fpu/softfloat.h"
21 #include "macros.h"
22 #include "fma_emu.h"
23 
24 #define DF_INF_EXP     0x7ff
25 #define DF_BIAS        1023
26 #define DF_MANTBITS    52
27 #define DF_NAN         0xffffffffffffffffULL
28 #define DF_INF         0x7ff0000000000000ULL
29 #define DF_MINUS_INF   0xfff0000000000000ULL
30 #define DF_MAXF        0x7fefffffffffffffULL
31 #define DF_MINUS_MAXF  0xffefffffffffffffULL
32 
33 #define SF_INF_EXP     0xff
34 #define SF_BIAS        127
35 #define SF_MANTBITS    23
36 #define SF_INF         0x7f800000
37 #define SF_MINUS_INF   0xff800000
38 #define SF_MAXF        0x7f7fffff
39 #define SF_MINUS_MAXF  0xff7fffff
40 
41 #define HF_INF_EXP 0x1f
42 #define HF_BIAS 15
43 
44 #define WAY_BIG_EXP 4096
45 
46 typedef union {
47     double f;
48     uint64_t i;
49     struct {
50         uint64_t mant:52;
51         uint64_t exp:11;
52         uint64_t sign:1;
53     };
54 } Double;
55 
56 typedef union {
57     float f;
58     uint32_t i;
59     struct {
60         uint32_t mant:23;
61         uint32_t exp:8;
62         uint32_t sign:1;
63     };
64 } Float;
65 
66 static uint64_t float64_getmant(float64 f64)
67 {
68     Double a = { .i = f64 };
69     if (float64_is_normal(f64)) {
70         return a.mant | 1ULL << 52;
71     }
72     if (float64_is_zero(f64)) {
73         return 0;
74     }
75     if (float64_is_denormal(f64)) {
76         return a.mant;
77     }
78     return ~0ULL;
79 }
80 
81 int32_t float64_getexp(float64 f64)
82 {
83     Double a = { .i = f64 };
84     if (float64_is_normal(f64)) {
85         return a.exp;
86     }
87     if (float64_is_denormal(f64)) {
88         return a.exp + 1;
89     }
90     return -1;
91 }
92 
93 static uint64_t float32_getmant(float32 f32)
94 {
95     Float a = { .i = f32 };
96     if (float32_is_normal(f32)) {
97         return a.mant | 1ULL << 23;
98     }
99     if (float32_is_zero(f32)) {
100         return 0;
101     }
102     if (float32_is_denormal(f32)) {
103         return a.mant;
104     }
105     return ~0ULL;
106 }
107 
108 int32_t float32_getexp(float32 f32)
109 {
110     Float a = { .i = f32 };
111     if (float32_is_normal(f32)) {
112         return a.exp;
113     }
114     if (float32_is_denormal(f32)) {
115         return a.exp + 1;
116     }
117     return -1;
118 }
119 
120 static uint32_t int128_getw0(Int128 x)
121 {
122     return int128_getlo(x);
123 }
124 
125 static uint32_t int128_getw1(Int128 x)
126 {
127     return int128_getlo(x) >> 32;
128 }
129 
130 static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
131 {
132     Int128 a, b;
133     uint64_t pp0, pp1a, pp1b, pp1s, pp2;
134 
135     a = int128_make64(ai);
136     b = int128_make64(bi);
137     pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
138     pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
139     pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
140     pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
141 
142     pp1s = pp1a + pp1b;
143     if ((pp1s < pp1a) || (pp1s < pp1b)) {
144         pp2 += (1ULL << 32);
145     }
146     uint64_t ret_low = pp0 + (pp1s << 32);
147     if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
148         pp2 += 1;
149     }
150 
151     return int128_make128(ret_low, pp2 + (pp1s >> 32));
152 }
153 
154 static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
155 {
156     Int128 ret = int128_sub(a, b);
157     if (borrow != 0) {
158         ret = int128_sub(ret, int128_one());
159     }
160     return ret;
161 }
162 
163 typedef struct {
164     Int128 mant;
165     int32_t exp;
166     uint8_t sign;
167     uint8_t guard;
168     uint8_t round;
169     uint8_t sticky;
170 } Accum;
171 
172 static void accum_init(Accum *p)
173 {
174     p->mant = int128_zero();
175     p->exp = 0;
176     p->sign = 0;
177     p->guard = 0;
178     p->round = 0;
179     p->sticky = 0;
180 }
181 
182 static Accum accum_norm_left(Accum a)
183 {
184     a.exp--;
185     a.mant = int128_lshift(a.mant, 1);
186     a.mant = int128_or(a.mant, int128_make64(a.guard));
187     a.guard = a.round;
188     a.round = a.sticky;
189     return a;
190 }
191 
192 /* This function is marked inline for performance reasons */
193 static inline Accum accum_norm_right(Accum a, int amt)
194 {
195     if (amt > 130) {
196         a.sticky |=
197             a.round | a.guard | int128_nz(a.mant);
198         a.guard = a.round = 0;
199         a.mant = int128_zero();
200         a.exp += amt;
201         return a;
202 
203     }
204     while (amt >= 64) {
205         a.sticky |= a.round | a.guard | (int128_getlo(a.mant) != 0);
206         a.guard = (int128_getlo(a.mant) >> 63) & 1;
207         a.round = (int128_getlo(a.mant) >> 62) & 1;
208         a.mant = int128_make64(int128_gethi(a.mant));
209         a.exp += 64;
210         amt -= 64;
211     }
212     while (amt > 0) {
213         a.exp++;
214         a.sticky |= a.round;
215         a.round = a.guard;
216         a.guard = int128_getlo(a.mant) & 1;
217         a.mant = int128_rshift(a.mant, 1);
218         amt--;
219     }
220     return a;
221 }
222 
223 /*
224  * On the add/sub, we need to be able to shift out lots of bits, but need a
225  * sticky bit for what was shifted out, I think.
226  */
227 static Accum accum_add(Accum a, Accum b);
228 
229 static Accum accum_sub(Accum a, Accum b, int negate)
230 {
231     Accum ret;
232     accum_init(&ret);
233     int borrow;
234 
235     if (a.sign != b.sign) {
236         b.sign = !b.sign;
237         return accum_add(a, b);
238     }
239     if (b.exp > a.exp) {
240         /* small - big == - (big - small) */
241         return accum_sub(b, a, !negate);
242     }
243     if ((b.exp == a.exp) && (int128_gt(b.mant, a.mant))) {
244         /* small - big == - (big - small) */
245         return accum_sub(b, a, !negate);
246     }
247 
248     while (a.exp > b.exp) {
249         /* Try to normalize exponents: shrink a exponent and grow mantissa */
250         if (int128_gethi(a.mant) & (1ULL << 62)) {
251             /* Can't grow a any more */
252             break;
253         } else {
254             a = accum_norm_left(a);
255         }
256     }
257 
258     while (a.exp > b.exp) {
259         /* Try to normalize exponents: grow b exponent and shrink mantissa */
260         /* Keep around shifted out bits... we might need those later */
261         b = accum_norm_right(b, a.exp - b.exp);
262     }
263 
264     if ((int128_gt(b.mant, a.mant))) {
265         return accum_sub(b, a, !negate);
266     }
267 
268     /* OK, now things should be normalized! */
269     ret.sign = a.sign;
270     ret.exp = a.exp;
271     assert(!int128_gt(b.mant, a.mant));
272     borrow = (b.round << 2) | (b.guard << 1) | b.sticky;
273     ret.mant = int128_sub_borrow(a.mant, b.mant, (borrow != 0));
274     borrow = 0 - borrow;
275     ret.guard = (borrow >> 2) & 1;
276     ret.round = (borrow >> 1) & 1;
277     ret.sticky = (borrow >> 0) & 1;
278     if (negate) {
279         ret.sign = !ret.sign;
280     }
281     return ret;
282 }
283 
284 static Accum accum_add(Accum a, Accum b)
285 {
286     Accum ret;
287     accum_init(&ret);
288     if (a.sign != b.sign) {
289         b.sign = !b.sign;
290         return accum_sub(a, b, 0);
291     }
292     if (b.exp > a.exp) {
293         /* small + big ==  (big + small) */
294         return accum_add(b, a);
295     }
296     if ((b.exp == a.exp) && int128_gt(b.mant, a.mant)) {
297         /* small + big ==  (big + small) */
298         return accum_add(b, a);
299     }
300 
301     while (a.exp > b.exp) {
302         /* Try to normalize exponents: shrink a exponent and grow mantissa */
303         if (int128_gethi(a.mant) & (1ULL << 62)) {
304             /* Can't grow a any more */
305             break;
306         } else {
307             a = accum_norm_left(a);
308         }
309     }
310 
311     while (a.exp > b.exp) {
312         /* Try to normalize exponents: grow b exponent and shrink mantissa */
313         /* Keep around shifted out bits... we might need those later */
314         b = accum_norm_right(b, a.exp - b.exp);
315     }
316 
317     /* OK, now things should be normalized! */
318     if (int128_gt(b.mant, a.mant)) {
319         return accum_add(b, a);
320     };
321     ret.sign = a.sign;
322     ret.exp = a.exp;
323     assert(!int128_gt(b.mant, a.mant));
324     ret.mant = int128_add(a.mant, b.mant);
325     ret.guard = b.guard;
326     ret.round = b.round;
327     ret.sticky = b.sticky;
328     return ret;
329 }
330 
331 /* Return an infinity with requested sign */
332 static float64 infinite_float64(uint8_t sign)
333 {
334     if (sign) {
335         return make_float64(DF_MINUS_INF);
336     } else {
337         return make_float64(DF_INF);
338     }
339 }
340 
341 /* Return a maximum finite value with requested sign */
342 static float64 maxfinite_float64(uint8_t sign)
343 {
344     if (sign) {
345         return make_float64(DF_MINUS_MAXF);
346     } else {
347         return make_float64(DF_MAXF);
348     }
349 }
350 
351 /* Return a zero value with requested sign */
352 static float64 zero_float64(uint8_t sign)
353 {
354     if (sign) {
355         return make_float64(0x8000000000000000);
356     } else {
357         return float64_zero;
358     }
359 }
360 
361 /* Return an infinity with the requested sign */
362 float32 infinite_float32(uint8_t sign)
363 {
364     if (sign) {
365         return make_float32(SF_MINUS_INF);
366     } else {
367         return make_float32(SF_INF);
368     }
369 }
370 
371 /* Return a maximum finite value with the requested sign */
372 static float32 maxfinite_float32(uint8_t sign)
373 {
374     if (sign) {
375         return make_float32(SF_MINUS_MAXF);
376     } else {
377         return make_float32(SF_MAXF);
378     }
379 }
380 
381 /* Return a zero value with requested sign */
382 static float32 zero_float32(uint8_t sign)
383 {
384     if (sign) {
385         return make_float32(0x80000000);
386     } else {
387         return float32_zero;
388     }
389 }
390 
391 #define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
392 static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
393 { \
394     if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
395         && ((a.guard | a.round | a.sticky) == 0)) { \
396         /* result zero */ \
397         switch (fp_status->float_rounding_mode) { \
398         case float_round_down: \
399             return zero_##SUFFIX(1); \
400         default: \
401             return zero_##SUFFIX(0); \
402         } \
403     } \
404     /* Normalize right */ \
405     /* We want MANTBITS bits of mantissa plus the leading one. */ \
406     /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
407     /* So we need to normalize right while the high word is non-zero and \
408     * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
409     while ((int128_gethi(a.mant) != 0) || \
410            ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
411         a = accum_norm_right(a, 1); \
412     } \
413     /* \
414      * OK, now normalize left \
415      * We want to normalize left until we have a leading one in bit 24 \
416      * Theoretically, we only need to shift a maximum of one to the left if we \
417      * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
418      * should be 0  \
419      */ \
420     while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
421         a = accum_norm_left(a); \
422     } \
423     /* \
424      * OK, now we might need to denormalize because of potential underflow. \
425      * We need to do this before rounding, and rounding might make us normal \
426      * again \
427      */ \
428     while (a.exp <= 0) { \
429         a = accum_norm_right(a, 1 - a.exp); \
430         /* \
431          * Do we have underflow? \
432          * That's when we get an inexact answer because we ran out of bits \
433          * in a denormal. \
434          */ \
435         if (a.guard || a.round || a.sticky) { \
436             float_raise(float_flag_underflow, fp_status); \
437         } \
438     } \
439     /* OK, we're relatively canonical... now we need to round */ \
440     if (a.guard || a.round || a.sticky) { \
441         float_raise(float_flag_inexact, fp_status); \
442         switch (fp_status->float_rounding_mode) { \
443         case float_round_to_zero: \
444             /* Chop and we're done */ \
445             break; \
446         case float_round_up: \
447             if (a.sign == 0) { \
448                 a.mant = int128_add(a.mant, int128_one()); \
449             } \
450             break; \
451         case float_round_down: \
452             if (a.sign != 0) { \
453                 a.mant = int128_add(a.mant, int128_one()); \
454             } \
455             break; \
456         default: \
457             if (a.round || a.sticky) { \
458                 /* round up if guard is 1, down if guard is zero */ \
459                 a.mant = int128_add(a.mant, int128_make64(a.guard)); \
460             } else if (a.guard) { \
461                 /* exactly .5, round up if odd */ \
462                 a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
463             } \
464             break; \
465         } \
466     } \
467     /* \
468      * OK, now we might have carried all the way up. \
469      * So we might need to shr once \
470      * at least we know that the lsb should be zero if we rounded and \
471      * got a carry out... \
472      */ \
473     if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
474         a = accum_norm_right(a, 1); \
475     } \
476     /* Overflow? */ \
477     if (a.exp >= INF_EXP) { \
478         /* Yep, inf result */ \
479         float_raise(float_flag_overflow, fp_status); \
480         float_raise(float_flag_inexact, fp_status); \
481         switch (fp_status->float_rounding_mode) { \
482         case float_round_to_zero: \
483             return maxfinite_##SUFFIX(a.sign); \
484         case float_round_up: \
485             if (a.sign == 0) { \
486                 return infinite_##SUFFIX(a.sign); \
487             } else { \
488                 return maxfinite_##SUFFIX(a.sign); \
489             } \
490         case float_round_down: \
491             if (a.sign != 0) { \
492                 return infinite_##SUFFIX(a.sign); \
493             } else { \
494                 return maxfinite_##SUFFIX(a.sign); \
495             } \
496         default: \
497             return infinite_##SUFFIX(a.sign); \
498         } \
499     } \
500     /* Underflow? */ \
501     if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
502         /* Leading one means: No, we're normal. So, we should be done... */ \
503         INTERNAL_TYPE ret; \
504         ret.i = 0; \
505         ret.sign = a.sign; \
506         ret.exp = a.exp; \
507         ret.mant = int128_getlo(a.mant); \
508         return ret.i; \
509     } \
510     assert(a.exp == 1); \
511     INTERNAL_TYPE ret; \
512     ret.i = 0; \
513     ret.sign = a.sign; \
514     ret.exp = 0; \
515     ret.mant = int128_getlo(a.mant); \
516     return ret.i; \
517 }
518 
519 GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
520 GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
521 
522 static bool is_inf_prod(float64 a, float64 b)
523 {
524     return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
525             (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
526             (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
527 }
528 
529 static float64 special_fma(float64 a, float64 b, float64 c,
530                            float_status *fp_status)
531 {
532     float64 ret = make_float64(0);
533 
534     /*
535      * If A multiplied by B is an exact infinity and C is also an infinity
536      * but with the opposite sign, FMA returns NaN and raises invalid.
537      */
538     uint8_t a_sign = float64_is_neg(a);
539     uint8_t b_sign = float64_is_neg(b);
540     uint8_t c_sign = float64_is_neg(c);
541     if (is_inf_prod(a, b) && float64_is_infinity(c)) {
542         if ((a_sign ^ b_sign) != c_sign) {
543             ret = make_float64(DF_NAN);
544             float_raise(float_flag_invalid, fp_status);
545             return ret;
546         }
547     }
548     if ((float64_is_infinity(a) && float64_is_zero(b)) ||
549         (float64_is_zero(a) && float64_is_infinity(b))) {
550         ret = make_float64(DF_NAN);
551         float_raise(float_flag_invalid, fp_status);
552         return ret;
553     }
554     /*
555      * If none of the above checks are true and C is a NaN,
556      * a NaN shall be returned
557      * If A or B are NaN, a NAN shall be returned.
558      */
559     if (float64_is_any_nan(a) ||
560         float64_is_any_nan(b) ||
561         float64_is_any_nan(c)) {
562         if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
563             float_raise(float_flag_invalid, fp_status);
564         }
565         if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
566             float_raise(float_flag_invalid, fp_status);
567         }
568         if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
569             float_raise(float_flag_invalid, fp_status);
570         }
571         ret = make_float64(DF_NAN);
572         return ret;
573     }
574     /*
575      * We have checked for adding opposite-signed infinities.
576      * Other infinities return infinity with the correct sign
577      */
578     if (float64_is_infinity(c)) {
579         ret = infinite_float64(c_sign);
580         return ret;
581     }
582     if (float64_is_infinity(a) || float64_is_infinity(b)) {
583         ret = infinite_float64(a_sign ^ b_sign);
584         return ret;
585     }
586     g_assert_not_reached();
587 }
588 
589 static float32 special_fmaf(float32 a, float32 b, float32 c,
590                             float_status *fp_status)
591 {
592     float64 aa, bb, cc;
593     aa = float32_to_float64(a, fp_status);
594     bb = float32_to_float64(b, fp_status);
595     cc = float32_to_float64(c, fp_status);
596     return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
597 }
598 
599 float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
600                        float_status *fp_status)
601 {
602     Accum prod;
603     Accum acc;
604     Accum result;
605     accum_init(&prod);
606     accum_init(&acc);
607     accum_init(&result);
608 
609     uint8_t a_sign = float32_is_neg(a);
610     uint8_t b_sign = float32_is_neg(b);
611     uint8_t c_sign = float32_is_neg(c);
612     if (float32_is_infinity(a) ||
613         float32_is_infinity(b) ||
614         float32_is_infinity(c)) {
615         return special_fmaf(a, b, c, fp_status);
616     }
617     if (float32_is_any_nan(a) ||
618         float32_is_any_nan(b) ||
619         float32_is_any_nan(c)) {
620         return special_fmaf(a, b, c, fp_status);
621     }
622     if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
623         float32 tmp = float32_mul(a, b, fp_status);
624         tmp = float32_add(tmp, c, fp_status);
625         return tmp;
626     }
627 
628     /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
629     prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
630 
631     /*
632      * Note: extracting the mantissa into an int is multiplying by
633      * 2**23, so adjust here
634      */
635     prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
636     prod.sign = a_sign ^ b_sign;
637     if (float32_is_zero(a) || float32_is_zero(b)) {
638         prod.exp = -2 * WAY_BIG_EXP;
639     }
640     if ((scale > 0) && float32_is_denormal(c)) {
641         acc.mant = int128_mul_6464(0, 0);
642         acc.exp = -WAY_BIG_EXP;
643         acc.sign = c_sign;
644         acc.sticky = 1;
645         result = accum_add(prod, acc);
646     } else if (!float32_is_zero(c)) {
647         acc.mant = int128_mul_6464(float32_getmant(c), 1);
648         acc.exp = float32_getexp(c);
649         acc.sign = c_sign;
650         result = accum_add(prod, acc);
651     } else {
652         result = prod;
653     }
654     result.exp += scale;
655     return accum_round_float32(result, fp_status);
656 }
657 
658 float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
659 {
660     if (float32_is_zero(a) || float32_is_zero(b)) {
661         return float32_mul(a, b, fp_status);
662     }
663     return internal_fmafx(a, b, float32_zero, 0, fp_status);
664 }
665 
666 float64 internal_mpyhh(float64 a, float64 b,
667                       unsigned long long int accumulated,
668                       float_status *fp_status)
669 {
670     Accum x;
671     unsigned long long int prod;
672     unsigned int sticky;
673     uint8_t a_sign, b_sign;
674 
675     sticky = accumulated & 1;
676     accumulated >>= 1;
677     accum_init(&x);
678     if (float64_is_zero(a) ||
679         float64_is_any_nan(a) ||
680         float64_is_infinity(a)) {
681         return float64_mul(a, b, fp_status);
682     }
683     if (float64_is_zero(b) ||
684         float64_is_any_nan(b) ||
685         float64_is_infinity(b)) {
686         return float64_mul(a, b, fp_status);
687     }
688     x.mant = int128_mul_6464(accumulated, 1);
689     x.sticky = sticky;
690     prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
691     x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
692     x.exp = float64_getexp(a) + float64_getexp(b) - DF_BIAS - 20;
693     if (!float64_is_normal(a) || !float64_is_normal(b)) {
694         /* crush to inexact zero */
695         x.sticky = 1;
696         x.exp = -4096;
697     }
698     a_sign = float64_is_neg(a);
699     b_sign = float64_is_neg(b);
700     x.sign = a_sign ^ b_sign;
701     return accum_round_float64(x, fp_status);
702 }
703