xref: /openbmc/qemu/fpu/softfloat-parts.c.inc (revision ba26f1477735a5ad7dd40a3227ac2a54cf82014d)
1/*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 *  the SoftFloat-2a license
10 *  the BSD license
11 *  GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19{
20    switch (a->cls) {
21    case float_class_snan:
22        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
23        if (s->default_nan_mode) {
24            parts_default_nan(a, s);
25        } else {
26            parts_silence_nan(a, s);
27        }
28        break;
29    case float_class_qnan:
30        if (s->default_nan_mode) {
31            parts_default_nan(a, s);
32        }
33        break;
34    default:
35        g_assert_not_reached();
36    }
37}
38
39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40                                     float_status *s)
41{
42    bool have_snan = false;
43    FloatPartsN *ret;
44    int cmp;
45
46    if (is_snan(a->cls) || is_snan(b->cls)) {
47        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
48        have_snan = true;
49    }
50
51    if (s->default_nan_mode) {
52        parts_default_nan(a, s);
53        return a;
54    }
55
56    switch (s->float_2nan_prop_rule) {
57    case float_2nan_prop_s_ab:
58        if (have_snan) {
59            ret = is_snan(a->cls) ? a : b;
60            break;
61        }
62        /* fall through */
63    case float_2nan_prop_ab:
64        ret = is_nan(a->cls) ? a : b;
65        break;
66    case float_2nan_prop_s_ba:
67        if (have_snan) {
68            ret = is_snan(b->cls) ? b : a;
69            break;
70        }
71        /* fall through */
72    case float_2nan_prop_ba:
73        ret = is_nan(b->cls) ? b : a;
74        break;
75    case float_2nan_prop_x87:
76        /*
77         * This implements x87 NaN propagation rules:
78         * SNaN + QNaN => return the QNaN
79         * two SNaNs => return the one with the larger significand, silenced
80         * two QNaNs => return the one with the larger significand
81         * SNaN and a non-NaN => return the SNaN, silenced
82         * QNaN and a non-NaN => return the QNaN
83         *
84         * If we get down to comparing significands and they are the same,
85         * return the NaN with the positive sign bit (if any).
86         */
87        if (is_snan(a->cls)) {
88            if (!is_snan(b->cls)) {
89                ret = is_qnan(b->cls) ? b : a;
90                break;
91            }
92        } else if (is_qnan(a->cls)) {
93            if (is_snan(b->cls) || !is_qnan(b->cls)) {
94                ret = a;
95                break;
96            }
97        } else {
98            ret = b;
99            break;
100        }
101        cmp = frac_cmp(a, b);
102        if (cmp == 0) {
103            cmp = a->sign < b->sign;
104        }
105        ret = cmp > 0 ? a : b;
106        break;
107    default:
108        g_assert_not_reached();
109    }
110
111    if (is_snan(ret->cls)) {
112        parts_silence_nan(ret, s);
113    }
114    return ret;
115}
116
117static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
118                                            FloatPartsN *c, float_status *s,
119                                            int ab_mask, int abc_mask)
120{
121    bool infzero = (ab_mask == float_cmask_infzero);
122    bool have_snan = (abc_mask & float_cmask_snan);
123    FloatPartsN *ret;
124
125    if (unlikely(have_snan)) {
126        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
127    }
128
129    if (infzero &&
130        !(s->float_infzeronan_rule & float_infzeronan_suppress_invalid)) {
131        /* This is (0 * inf) + NaN or (inf * 0) + NaN */
132        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
133    }
134
135    if (s->default_nan_mode) {
136        /*
137         * We guarantee not to require the target to tell us how to
138         * pick a NaN if we're always returning the default NaN.
139         * But if we're not in default-NaN mode then the target must
140         * specify.
141         */
142        goto default_nan;
143    } else if (infzero) {
144        /*
145         * Inf * 0 + NaN -- some implementations return the
146         * default NaN here, and some return the input NaN.
147         */
148        switch (s->float_infzeronan_rule & ~float_infzeronan_suppress_invalid) {
149        case float_infzeronan_dnan_never:
150            break;
151        case float_infzeronan_dnan_always:
152            goto default_nan;
153        case float_infzeronan_dnan_if_qnan:
154            if (is_qnan(c->cls)) {
155                goto default_nan;
156            }
157            break;
158        default:
159            g_assert_not_reached();
160        }
161        ret = c;
162    } else {
163        FloatPartsN *val[R_3NAN_1ST_MASK + 1] = { a, b, c };
164        Float3NaNPropRule rule = s->float_3nan_prop_rule;
165
166        assert(rule != float_3nan_prop_none);
167        if (have_snan && (rule & R_3NAN_SNAN_MASK)) {
168            /* We have at least one SNaN input and should prefer it */
169            do {
170                ret = val[rule & R_3NAN_1ST_MASK];
171                rule >>= R_3NAN_1ST_LENGTH;
172            } while (!is_snan(ret->cls));
173        } else {
174            do {
175                ret = val[rule & R_3NAN_1ST_MASK];
176                rule >>= R_3NAN_1ST_LENGTH;
177            } while (!is_nan(ret->cls));
178        }
179    }
180
181    if (is_snan(ret->cls)) {
182        parts_silence_nan(ret, s);
183    }
184    return ret;
185
186 default_nan:
187    parts_default_nan(a, s);
188    return a;
189}
190
191/*
192 * Canonicalize the FloatParts structure.  Determine the class,
193 * unbias the exponent, and normalize the fraction.
194 */
195static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
196                                 const FloatFmt *fmt)
197{
198    if (unlikely(p->exp == 0)) {
199        if (likely(frac_eqz(p))) {
200            p->cls = float_class_zero;
201        } else if (status->flush_inputs_to_zero) {
202            float_raise(float_flag_input_denormal_flushed, status);
203            p->cls = float_class_zero;
204            frac_clear(p);
205        } else {
206            int shift = frac_normalize(p);
207            p->cls = float_class_denormal;
208            p->exp = fmt->frac_shift - fmt->exp_bias
209                   - shift + !fmt->m68k_denormal;
210        }
211    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
212        p->cls = float_class_normal;
213        p->exp -= fmt->exp_bias;
214        frac_shl(p, fmt->frac_shift);
215        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
216    } else if (likely(frac_eqz(p))) {
217        p->cls = float_class_inf;
218    } else {
219        frac_shl(p, fmt->frac_shift);
220        p->cls = (parts_is_snan_frac(p->frac_hi, status)
221                  ? float_class_snan : float_class_qnan);
222    }
223}
224
225/*
226 * Round and uncanonicalize a floating-point number by parts. There
227 * are FRAC_SHIFT bits that may require rounding at the bottom of the
228 * fraction; these bits will be removed. The exponent will be biased
229 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
230 */
231static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
232                                   const FloatFmt *fmt)
233{
234    const int exp_max = fmt->exp_max;
235    const int frac_shift = fmt->frac_shift;
236    const uint64_t round_mask = fmt->round_mask;
237    const uint64_t frac_lsb = round_mask + 1;
238    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
239    const uint64_t roundeven_mask = round_mask | frac_lsb;
240    uint64_t inc;
241    bool overflow_norm = false;
242    int exp, flags = 0;
243
244    switch (s->float_rounding_mode) {
245    case float_round_nearest_even_max:
246        overflow_norm = true;
247        /* fall through */
248    case float_round_nearest_even:
249        if (N > 64 && frac_lsb == 0) {
250            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
251                   ? frac_lsbm1 : 0);
252        } else {
253            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
254                   ? frac_lsbm1 : 0);
255        }
256        break;
257    case float_round_ties_away:
258        inc = frac_lsbm1;
259        break;
260    case float_round_to_zero:
261        overflow_norm = true;
262        inc = 0;
263        break;
264    case float_round_up:
265        inc = p->sign ? 0 : round_mask;
266        overflow_norm = p->sign;
267        break;
268    case float_round_down:
269        inc = p->sign ? round_mask : 0;
270        overflow_norm = !p->sign;
271        break;
272    case float_round_to_odd:
273        overflow_norm = true;
274        /* fall through */
275    case float_round_to_odd_inf:
276        if (N > 64 && frac_lsb == 0) {
277            inc = p->frac_hi & 1 ? 0 : round_mask;
278        } else {
279            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
280        }
281        break;
282    default:
283        g_assert_not_reached();
284    }
285
286    exp = p->exp + fmt->exp_bias;
287    if (likely(exp > 0)) {
288        if (p->frac_lo & round_mask) {
289            flags |= float_flag_inexact;
290            if (frac_addi(p, p, inc)) {
291                frac_shr(p, 1);
292                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
293                exp++;
294            }
295            p->frac_lo &= ~round_mask;
296        }
297
298        if (fmt->arm_althp) {
299            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
300            if (unlikely(exp > exp_max)) {
301                /* Overflow.  Return the maximum normal.  */
302                flags = float_flag_invalid;
303                exp = exp_max;
304                frac_allones(p);
305                p->frac_lo &= ~round_mask;
306            }
307        } else if (unlikely(exp >= exp_max)) {
308            flags |= float_flag_overflow;
309            if (s->rebias_overflow) {
310                exp -= fmt->exp_re_bias;
311            } else if (overflow_norm) {
312                flags |= float_flag_inexact;
313                exp = exp_max - 1;
314                frac_allones(p);
315                p->frac_lo &= ~round_mask;
316            } else {
317                flags |= float_flag_inexact;
318                p->cls = float_class_inf;
319                exp = exp_max;
320                frac_clear(p);
321            }
322        }
323        frac_shr(p, frac_shift);
324    } else if (unlikely(s->rebias_underflow)) {
325        flags |= float_flag_underflow;
326        exp += fmt->exp_re_bias;
327        if (p->frac_lo & round_mask) {
328            flags |= float_flag_inexact;
329            if (frac_addi(p, p, inc)) {
330                frac_shr(p, 1);
331                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
332                exp++;
333            }
334            p->frac_lo &= ~round_mask;
335        }
336        frac_shr(p, frac_shift);
337    } else if (s->flush_to_zero &&
338               s->ftz_detection == float_ftz_before_rounding) {
339        flags |= float_flag_output_denormal_flushed;
340        p->cls = float_class_zero;
341        exp = 0;
342        frac_clear(p);
343    } else {
344        bool is_tiny = s->tininess_before_rounding || exp < 0;
345
346        if (!is_tiny) {
347            FloatPartsN discard;
348            is_tiny = !frac_addi(&discard, p, inc);
349        }
350
351        frac_shrjam(p, !fmt->m68k_denormal - exp);
352
353        if (p->frac_lo & round_mask) {
354            /* Need to recompute round-to-even/round-to-odd. */
355            switch (s->float_rounding_mode) {
356            case float_round_nearest_even:
357                if (N > 64 && frac_lsb == 0) {
358                    inc = ((p->frac_hi & 1) ||
359                           (p->frac_lo & round_mask) != frac_lsbm1
360                           ? frac_lsbm1 : 0);
361                } else {
362                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
363                           ? frac_lsbm1 : 0);
364                }
365                break;
366            case float_round_to_odd:
367            case float_round_to_odd_inf:
368                if (N > 64 && frac_lsb == 0) {
369                    inc = p->frac_hi & 1 ? 0 : round_mask;
370                } else {
371                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
372                }
373                break;
374            default:
375                break;
376            }
377            flags |= float_flag_inexact;
378            frac_addi(p, p, inc);
379            p->frac_lo &= ~round_mask;
380        }
381
382        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
383        frac_shr(p, frac_shift);
384
385        if (is_tiny) {
386            if (s->flush_to_zero) {
387                assert(s->ftz_detection == float_ftz_after_rounding);
388                flags |= float_flag_output_denormal_flushed;
389                p->cls = float_class_zero;
390                exp = 0;
391                frac_clear(p);
392            } else if (flags & float_flag_inexact) {
393                flags |= float_flag_underflow;
394            }
395            if (exp == 0 && frac_eqz(p)) {
396                p->cls = float_class_zero;
397            }
398        }
399    }
400    p->exp = exp;
401    float_raise(flags, s);
402}
403
404static void partsN(uncanon)(FloatPartsN *p, float_status *s,
405                            const FloatFmt *fmt)
406{
407    if (likely(is_anynorm(p->cls))) {
408        parts_uncanon_normal(p, s, fmt);
409    } else {
410        switch (p->cls) {
411        case float_class_zero:
412            p->exp = 0;
413            frac_clear(p);
414            return;
415        case float_class_inf:
416            g_assert(!fmt->arm_althp);
417            p->exp = fmt->exp_max;
418            frac_clear(p);
419            return;
420        case float_class_qnan:
421        case float_class_snan:
422            g_assert(!fmt->arm_althp);
423            p->exp = fmt->exp_max;
424            frac_shr(p, fmt->frac_shift);
425            return;
426        default:
427            break;
428        }
429        g_assert_not_reached();
430    }
431}
432
433/*
434 * Returns the result of adding or subtracting the values of the
435 * floating-point values `a' and `b'. The operation is performed
436 * according to the IEC/IEEE Standard for Binary Floating-Point
437 * Arithmetic.
438 */
439static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
440                                   float_status *s, bool subtract)
441{
442    bool b_sign = b->sign ^ subtract;
443    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
444
445    /*
446     * For addition and subtraction, we will consume an
447     * input denormal unless the other input is a NaN.
448     */
449    if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) ==
450        float_cmask_denormal) {
451        float_raise(float_flag_input_denormal_used, s);
452    }
453
454    if (a->sign != b_sign) {
455        /* Subtraction */
456        if (likely(cmask_is_only_normals(ab_mask))) {
457            if (parts_sub_normal(a, b)) {
458                return a;
459            }
460            /* Subtract was exact, fall through to set sign. */
461            ab_mask = float_cmask_zero;
462        }
463
464        if (ab_mask == float_cmask_zero) {
465            a->sign = s->float_rounding_mode == float_round_down;
466            return a;
467        }
468
469        if (unlikely(ab_mask & float_cmask_anynan)) {
470            goto p_nan;
471        }
472
473        if (ab_mask & float_cmask_inf) {
474            if (a->cls != float_class_inf) {
475                /* N - Inf */
476                goto return_b;
477            }
478            if (b->cls != float_class_inf) {
479                /* Inf - N */
480                return a;
481            }
482            /* Inf - Inf */
483            float_raise(float_flag_invalid | float_flag_invalid_isi, s);
484            parts_default_nan(a, s);
485            return a;
486        }
487    } else {
488        /* Addition */
489        if (likely(cmask_is_only_normals(ab_mask))) {
490            parts_add_normal(a, b);
491            return a;
492        }
493
494        if (ab_mask == float_cmask_zero) {
495            return a;
496        }
497
498        if (unlikely(ab_mask & float_cmask_anynan)) {
499            goto p_nan;
500        }
501
502        if (ab_mask & float_cmask_inf) {
503            a->cls = float_class_inf;
504            return a;
505        }
506    }
507
508    if (b->cls == float_class_zero) {
509        g_assert(is_anynorm(a->cls));
510        return a;
511    }
512
513    g_assert(a->cls == float_class_zero);
514    g_assert(is_anynorm(b->cls));
515 return_b:
516    b->sign = b_sign;
517    return b;
518
519 p_nan:
520    return parts_pick_nan(a, b, s);
521}
522
523/*
524 * Returns the result of multiplying the floating-point values `a' and
525 * `b'. The operation is performed according to the IEC/IEEE Standard
526 * for Binary Floating-Point Arithmetic.
527 */
528static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
529                                float_status *s)
530{
531    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
532    bool sign = a->sign ^ b->sign;
533
534    if (likely(cmask_is_only_normals(ab_mask))) {
535        FloatPartsW tmp;
536
537        if (ab_mask & float_cmask_denormal) {
538            float_raise(float_flag_input_denormal_used, s);
539        }
540
541        frac_mulw(&tmp, a, b);
542        frac_truncjam(a, &tmp);
543
544        a->exp += b->exp + 1;
545        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
546            frac_add(a, a, a);
547            a->exp -= 1;
548        }
549
550        a->sign = sign;
551        return a;
552    }
553
554    /* Inf * Zero == NaN */
555    if (unlikely(ab_mask == float_cmask_infzero)) {
556        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
557        parts_default_nan(a, s);
558        return a;
559    }
560
561    if (unlikely(ab_mask & float_cmask_anynan)) {
562        return parts_pick_nan(a, b, s);
563    }
564
565    /* Multiply by 0 or Inf */
566    if (ab_mask & float_cmask_denormal) {
567        float_raise(float_flag_input_denormal_used, s);
568    }
569
570    if (ab_mask & float_cmask_inf) {
571        a->cls = float_class_inf;
572        a->sign = sign;
573        return a;
574    }
575
576    g_assert(ab_mask & float_cmask_zero);
577    a->cls = float_class_zero;
578    a->sign = sign;
579    return a;
580}
581
582/*
583 * Returns the result of multiplying the floating-point values `a' and
584 * `b' then adding 'c', with no intermediate rounding step after the
585 * multiplication. The operation is performed according to the
586 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
587 * The flags argument allows the caller to select negation of the
588 * addend, the intermediate product, or the final result. (The
589 * difference between this and having the caller do a separate
590 * negation is that negating externally will flip the sign bit on NaNs.)
591 *
592 * Requires A and C extracted into a double-sized structure to provide the
593 * extra space for the widening multiply.
594 */
595static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
596                                          FloatPartsN *c, int scale,
597                                          int flags, float_status *s)
598{
599    int ab_mask, abc_mask;
600    FloatPartsW p_widen, c_widen;
601
602    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
603    abc_mask = float_cmask(c->cls) | ab_mask;
604
605    /*
606     * It is implementation-defined whether the cases of (0,inf,qnan)
607     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
608     * they return if they do), so we have to hand this information
609     * off to the target-specific pick-a-NaN routine.
610     */
611    if (unlikely(abc_mask & float_cmask_anynan)) {
612        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
613    }
614
615    if (flags & float_muladd_negate_c) {
616        c->sign ^= 1;
617    }
618
619    /* Compute the sign of the product into A. */
620    a->sign ^= b->sign;
621    if (flags & float_muladd_negate_product) {
622        a->sign ^= 1;
623    }
624
625    if (unlikely(!cmask_is_only_normals(ab_mask))) {
626        if (unlikely(ab_mask == float_cmask_infzero)) {
627            float_raise(float_flag_invalid | float_flag_invalid_imz, s);
628            goto d_nan;
629        }
630
631        if (ab_mask & float_cmask_inf) {
632            if (c->cls == float_class_inf && a->sign != c->sign) {
633                float_raise(float_flag_invalid | float_flag_invalid_isi, s);
634                goto d_nan;
635            }
636            goto return_inf;
637        }
638
639        g_assert(ab_mask & float_cmask_zero);
640        if (is_anynorm(c->cls)) {
641            *a = *c;
642            goto return_normal;
643        }
644        if (c->cls == float_class_zero) {
645            if (flags & float_muladd_suppress_add_product_zero) {
646                a->sign = c->sign;
647            } else if (a->sign != c->sign) {
648                goto return_sub_zero;
649            }
650            goto return_zero;
651        }
652        g_assert(c->cls == float_class_inf);
653    }
654
655    if (unlikely(c->cls == float_class_inf)) {
656        a->sign = c->sign;
657        goto return_inf;
658    }
659
660    /* Perform the multiplication step. */
661    p_widen.sign = a->sign;
662    p_widen.exp = a->exp + b->exp + 1;
663    frac_mulw(&p_widen, a, b);
664    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
665        frac_add(&p_widen, &p_widen, &p_widen);
666        p_widen.exp -= 1;
667    }
668
669    /* Perform the addition step. */
670    if (c->cls != float_class_zero) {
671        /* Zero-extend C to less significant bits. */
672        frac_widen(&c_widen, c);
673        c_widen.exp = c->exp;
674
675        if (a->sign == c->sign) {
676            parts_add_normal(&p_widen, &c_widen);
677        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
678            goto return_sub_zero;
679        }
680    }
681
682    /* Narrow with sticky bit, for proper rounding later. */
683    frac_truncjam(a, &p_widen);
684    a->sign = p_widen.sign;
685    a->exp = p_widen.exp;
686
687 return_normal:
688    a->exp += scale;
689 finish_sign:
690    if (flags & float_muladd_negate_result) {
691        a->sign ^= 1;
692    }
693
694    /*
695     * All result types except for "return the default NaN
696     * because this is an Invalid Operation" go through here;
697     * this matches the set of cases where we consumed a
698     * denormal input.
699     */
700    if (abc_mask & float_cmask_denormal) {
701        float_raise(float_flag_input_denormal_used, s);
702    }
703    return a;
704
705 return_sub_zero:
706    a->sign = s->float_rounding_mode == float_round_down;
707 return_zero:
708    a->cls = float_class_zero;
709    goto finish_sign;
710
711 return_inf:
712    a->cls = float_class_inf;
713    goto finish_sign;
714
715 d_nan:
716    parts_default_nan(a, s);
717    return a;
718}
719
720/*
721 * Returns the result of dividing the floating-point value `a' by the
722 * corresponding value `b'. The operation is performed according to
723 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
724 */
725static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
726                                float_status *s)
727{
728    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
729    bool sign = a->sign ^ b->sign;
730
731    if (likely(cmask_is_only_normals(ab_mask))) {
732        if (ab_mask & float_cmask_denormal) {
733            float_raise(float_flag_input_denormal_used, s);
734        }
735        a->sign = sign;
736        a->exp -= b->exp + frac_div(a, b);
737        return a;
738    }
739
740    /* 0/0 or Inf/Inf => NaN */
741    if (unlikely(ab_mask == float_cmask_zero)) {
742        float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
743        goto d_nan;
744    }
745    if (unlikely(ab_mask == float_cmask_inf)) {
746        float_raise(float_flag_invalid | float_flag_invalid_idi, s);
747        goto d_nan;
748    }
749
750    /* All the NaN cases */
751    if (unlikely(ab_mask & float_cmask_anynan)) {
752        return parts_pick_nan(a, b, s);
753    }
754
755    if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) {
756        float_raise(float_flag_input_denormal_used, s);
757    }
758
759    a->sign = sign;
760
761    /* Inf / X */
762    if (a->cls == float_class_inf) {
763        return a;
764    }
765
766    /* 0 / X */
767    if (a->cls == float_class_zero) {
768        return a;
769    }
770
771    /* X / Inf */
772    if (b->cls == float_class_inf) {
773        a->cls = float_class_zero;
774        return a;
775    }
776
777    /* X / 0 => Inf */
778    g_assert(b->cls == float_class_zero);
779    float_raise(float_flag_divbyzero, s);
780    a->cls = float_class_inf;
781    return a;
782
783 d_nan:
784    parts_default_nan(a, s);
785    return a;
786}
787
788/*
789 * Floating point remainder, per IEC/IEEE, or modulus.
790 */
791static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
792                                   uint64_t *mod_quot, float_status *s)
793{
794    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
795
796    if (likely(cmask_is_only_normals(ab_mask))) {
797        if (ab_mask & float_cmask_denormal) {
798            float_raise(float_flag_input_denormal_used, s);
799        }
800        frac_modrem(a, b, mod_quot);
801        return a;
802    }
803
804    if (mod_quot) {
805        *mod_quot = 0;
806    }
807
808    /* All the NaN cases */
809    if (unlikely(ab_mask & float_cmask_anynan)) {
810        return parts_pick_nan(a, b, s);
811    }
812
813    /* Inf % N; N % 0 */
814    if (a->cls == float_class_inf || b->cls == float_class_zero) {
815        float_raise(float_flag_invalid, s);
816        parts_default_nan(a, s);
817        return a;
818    }
819
820    if (ab_mask & float_cmask_denormal) {
821        float_raise(float_flag_input_denormal_used, s);
822    }
823
824    /* N % Inf; 0 % N */
825    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
826    return a;
827}
828
829/*
830 * Square Root
831 *
832 * The base algorithm is lifted from
833 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
834 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
835 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
836 * and is thus MIT licenced.
837 */
838static void partsN(sqrt)(FloatPartsN *a, float_status *status,
839                         const FloatFmt *fmt)
840{
841    const uint32_t three32 = 3u << 30;
842    const uint64_t three64 = 3ull << 62;
843    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
844    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
845    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
846    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
847    uint64_t discard;
848    bool exp_odd;
849    size_t index;
850
851    if (unlikely(a->cls != float_class_normal)) {
852        switch (a->cls) {
853        case float_class_denormal:
854            if (!a->sign) {
855                /* -ve denormal will be InvalidOperation */
856                float_raise(float_flag_input_denormal_used, status);
857            }
858            break;
859        case float_class_snan:
860        case float_class_qnan:
861            parts_return_nan(a, status);
862            return;
863        case float_class_zero:
864            return;
865        case float_class_inf:
866            if (unlikely(a->sign)) {
867                goto d_nan;
868            }
869            return;
870        default:
871            g_assert_not_reached();
872        }
873    }
874
875    if (unlikely(a->sign)) {
876        goto d_nan;
877    }
878
879    /*
880     * Argument reduction.
881     * x = 4^e frac; with integer e, and frac in [1, 4)
882     * m = frac fixed point at bit 62, since we're in base 4.
883     * If base-2 exponent is odd, exchange that for multiply by 2,
884     * which results in no shift.
885     */
886    exp_odd = a->exp & 1;
887    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
888    if (!exp_odd) {
889        frac_shr(a, 1);
890    }
891
892    /*
893     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
894     *
895     * Initial estimate:
896     * 7-bit lookup table (1-bit exponent and 6-bit significand).
897     *
898     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
899     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
900     * a table lookup is faster and needs one less iteration.
901     * The 7-bit table gives |e| < 0x1.fdp-9.
902     *
903     * A Newton-Raphson iteration for r is
904     *   s = m*r
905     *   d = s*r
906     *   u = 3 - d
907     *   r = r*u/2
908     *
909     * Fixed point representations:
910     *   m, s, d, u, three are all 2.30; r is 0.32
911     */
912    m64 = a->frac_hi;
913    m32 = m64 >> 32;
914
915    r32 = rsqrt_tab[index] << 16;
916    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
917
918    s32 = ((uint64_t)m32 * r32) >> 32;
919    d32 = ((uint64_t)s32 * r32) >> 32;
920    u32 = three32 - d32;
921
922    if (N == 64) {
923        /* float64 or smaller */
924
925        r32 = ((uint64_t)r32 * u32) >> 31;
926        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
927
928        s32 = ((uint64_t)m32 * r32) >> 32;
929        d32 = ((uint64_t)s32 * r32) >> 32;
930        u32 = three32 - d32;
931
932        if (fmt->frac_size <= 23) {
933            /* float32 or smaller */
934
935            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
936            s32 = (s32 - 1) >> 6;               /* 9.23 */
937            /* s < sqrt(m) < s + 0x1.08p-23 */
938
939            /* compute nearest rounded result to 2.23 bits */
940            uint32_t d0 = (m32 << 16) - s32 * s32;
941            uint32_t d1 = s32 - d0;
942            uint32_t d2 = d1 + s32 + 1;
943            s32 += d1 >> 31;
944            a->frac_hi = (uint64_t)s32 << (64 - 25);
945
946            /* increment or decrement for inexact */
947            if (d2 != 0) {
948                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
949            }
950            goto done;
951        }
952
953        /* float64 */
954
955        r64 = (uint64_t)r32 * u32 * 2;
956        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
957        mul64To128(m64, r64, &s64, &discard);
958        mul64To128(s64, r64, &d64, &discard);
959        u64 = three64 - d64;
960
961        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
962        s64 = (s64 - 2) >> 9;                  /* 12.52 */
963
964        /* Compute nearest rounded result */
965        uint64_t d0 = (m64 << 42) - s64 * s64;
966        uint64_t d1 = s64 - d0;
967        uint64_t d2 = d1 + s64 + 1;
968        s64 += d1 >> 63;
969        a->frac_hi = s64 << (64 - 54);
970
971        /* increment or decrement for inexact */
972        if (d2 != 0) {
973            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
974        }
975        goto done;
976    }
977
978    r64 = (uint64_t)r32 * u32 * 2;
979    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
980
981    mul64To128(m64, r64, &s64, &discard);
982    mul64To128(s64, r64, &d64, &discard);
983    u64 = three64 - d64;
984    mul64To128(u64, r64, &r64, &discard);
985    r64 <<= 1;
986    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
987
988    mul64To128(m64, r64, &s64, &discard);
989    mul64To128(s64, r64, &d64, &discard);
990    u64 = three64 - d64;
991    mul64To128(u64, r64, &rh, &rl);
992    add128(rh, rl, rh, rl, &rh, &rl);
993    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
994
995    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
996    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
997    sub128(three64, 0, dh, dl, &uh, &ul);
998    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
999    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
1000
1001    sub128(sh, sl, 0, 4, &sh, &sl);
1002    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
1003    /* s < sqrt(m) < s + 1ulp */
1004
1005    /* Compute nearest rounded result */
1006    mul64To128(sl, sl, &d0h, &d0l);
1007    d0h += 2 * sh * sl;
1008    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
1009    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
1010    add128(sh, sl, 0, 1, &d2h, &d2l);
1011    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
1012    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
1013    shift128Left(sh, sl, 128 - 114, &sh, &sl);
1014
1015    /* increment or decrement for inexact */
1016    if (d2h | d2l) {
1017        if ((int64_t)(d1h ^ d2h) < 0) {
1018            sub128(sh, sl, 0, 1, &sh, &sl);
1019        } else {
1020            add128(sh, sl, 0, 1, &sh, &sl);
1021        }
1022    }
1023    a->frac_lo = sl;
1024    a->frac_hi = sh;
1025
1026 done:
1027    /* Convert back from base 4 to base 2. */
1028    a->exp >>= 1;
1029    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
1030        frac_add(a, a, a);
1031    } else {
1032        a->exp += 1;
1033    }
1034    return;
1035
1036 d_nan:
1037    float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
1038    parts_default_nan(a, status);
1039}
1040
1041/*
1042 * Rounds the floating-point value `a' to an integer, and returns the
1043 * result as a floating-point value. The operation is performed
1044 * according to the IEC/IEEE Standard for Binary Floating-Point
1045 * Arithmetic.
1046 *
1047 * parts_round_to_int_normal is an internal helper function for
1048 * normal numbers only, returning true for inexact but not directly
1049 * raising float_flag_inexact.
1050 */
1051static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
1052                                        int scale, int frac_size)
1053{
1054    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
1055    int shift_adj;
1056
1057    scale = MIN(MAX(scale, -0x10000), 0x10000);
1058    a->exp += scale;
1059
1060    if (a->exp < 0) {
1061        bool one;
1062
1063        /* All fractional */
1064        switch (rmode) {
1065        case float_round_nearest_even:
1066            one = false;
1067            if (a->exp == -1) {
1068                FloatPartsN tmp;
1069                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
1070                frac_add(&tmp, a, a);
1071                /* Anything remaining means frac > 0.5. */
1072                one = !frac_eqz(&tmp);
1073            }
1074            break;
1075        case float_round_ties_away:
1076            one = a->exp == -1;
1077            break;
1078        case float_round_to_zero:
1079            one = false;
1080            break;
1081        case float_round_up:
1082            one = !a->sign;
1083            break;
1084        case float_round_down:
1085            one = a->sign;
1086            break;
1087        case float_round_to_odd:
1088            one = true;
1089            break;
1090        default:
1091            g_assert_not_reached();
1092        }
1093
1094        frac_clear(a);
1095        a->exp = 0;
1096        if (one) {
1097            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
1098        } else {
1099            a->cls = float_class_zero;
1100        }
1101        return true;
1102    }
1103
1104    if (a->exp >= frac_size) {
1105        /* All integral */
1106        return false;
1107    }
1108
1109    if (N > 64 && a->exp < N - 64) {
1110        /*
1111         * Rounding is not in the low word -- shift lsb to bit 2,
1112         * which leaves room for sticky and rounding bit.
1113         */
1114        shift_adj = (N - 1) - (a->exp + 2);
1115        frac_shrjam(a, shift_adj);
1116        frac_lsb = 1 << 2;
1117    } else {
1118        shift_adj = 0;
1119        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
1120    }
1121
1122    frac_lsbm1 = frac_lsb >> 1;
1123    rnd_mask = frac_lsb - 1;
1124    rnd_even_mask = rnd_mask | frac_lsb;
1125
1126    if (!(a->frac_lo & rnd_mask)) {
1127        /* Fractional bits already clear, undo the shift above. */
1128        frac_shl(a, shift_adj);
1129        return false;
1130    }
1131
1132    switch (rmode) {
1133    case float_round_nearest_even:
1134        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1135        break;
1136    case float_round_ties_away:
1137        inc = frac_lsbm1;
1138        break;
1139    case float_round_to_zero:
1140        inc = 0;
1141        break;
1142    case float_round_up:
1143        inc = a->sign ? 0 : rnd_mask;
1144        break;
1145    case float_round_down:
1146        inc = a->sign ? rnd_mask : 0;
1147        break;
1148    case float_round_to_odd:
1149        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
1150        break;
1151    default:
1152        g_assert_not_reached();
1153    }
1154
1155    if (shift_adj == 0) {
1156        if (frac_addi(a, a, inc)) {
1157            frac_shr(a, 1);
1158            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
1159            a->exp++;
1160        }
1161        a->frac_lo &= ~rnd_mask;
1162    } else {
1163        frac_addi(a, a, inc);
1164        a->frac_lo &= ~rnd_mask;
1165        /* Be careful shifting back, not to overflow */
1166        frac_shl(a, shift_adj - 1);
1167        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1168            a->exp++;
1169        } else {
1170            frac_add(a, a, a);
1171        }
1172    }
1173    return true;
1174}
1175
1176static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1177                                 int scale, float_status *s,
1178                                 const FloatFmt *fmt)
1179{
1180    switch (a->cls) {
1181    case float_class_qnan:
1182    case float_class_snan:
1183        parts_return_nan(a, s);
1184        break;
1185    case float_class_zero:
1186    case float_class_inf:
1187        break;
1188    case float_class_normal:
1189    case float_class_denormal:
1190        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1191            float_raise(float_flag_inexact, s);
1192        }
1193        break;
1194    default:
1195        g_assert_not_reached();
1196    }
1197}
1198
1199/*
1200 * Returns the result of converting the floating-point value `a' to
1201 * the two's complement integer format. The conversion is performed
1202 * according to the IEC/IEEE Standard for Binary Floating-Point
1203 * Arithmetic---which means in particular that the conversion is
1204 * rounded according to the current rounding mode. If `a' is a NaN,
1205 * the largest positive integer is returned. Otherwise, if the
1206 * conversion overflows, the largest integer with the same sign as `a'
1207 * is returned.
1208 */
1209static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1210                                     int scale, int64_t min, int64_t max,
1211                                     float_status *s)
1212{
1213    int flags = 0;
1214    uint64_t r;
1215
1216    switch (p->cls) {
1217    case float_class_snan:
1218        flags |= float_flag_invalid_snan;
1219        /* fall through */
1220    case float_class_qnan:
1221        flags |= float_flag_invalid;
1222        r = max;
1223        break;
1224
1225    case float_class_inf:
1226        flags = float_flag_invalid | float_flag_invalid_cvti;
1227        r = p->sign ? min : max;
1228        break;
1229
1230    case float_class_zero:
1231        return 0;
1232
1233    case float_class_normal:
1234    case float_class_denormal:
1235        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1236        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1237            flags = float_flag_inexact;
1238        }
1239
1240        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1241            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1242        } else {
1243            r = UINT64_MAX;
1244        }
1245        if (p->sign) {
1246            if (r <= -(uint64_t)min) {
1247                r = -r;
1248            } else {
1249                flags = float_flag_invalid | float_flag_invalid_cvti;
1250                r = min;
1251            }
1252        } else if (r > max) {
1253            flags = float_flag_invalid | float_flag_invalid_cvti;
1254            r = max;
1255        }
1256        break;
1257
1258    default:
1259        g_assert_not_reached();
1260    }
1261
1262    float_raise(flags, s);
1263    return r;
1264}
1265
1266/*
1267 *  Returns the result of converting the floating-point value `a' to
1268 *  the unsigned integer format. The conversion is performed according
1269 *  to the IEC/IEEE Standard for Binary Floating-Point
1270 *  Arithmetic---which means in particular that the conversion is
1271 *  rounded according to the current rounding mode. If `a' is a NaN,
1272 *  the largest unsigned integer is returned. Otherwise, if the
1273 *  conversion overflows, the largest unsigned integer is returned. If
1274 *  the 'a' is negative, the result is rounded and zero is returned;
1275 *  values that do not round to zero will raise the inexact exception
1276 *  flag.
1277 */
1278static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1279                                      int scale, uint64_t max, float_status *s)
1280{
1281    int flags = 0;
1282    uint64_t r;
1283
1284    switch (p->cls) {
1285    case float_class_snan:
1286        flags |= float_flag_invalid_snan;
1287        /* fall through */
1288    case float_class_qnan:
1289        flags |= float_flag_invalid;
1290        r = max;
1291        break;
1292
1293    case float_class_inf:
1294        flags = float_flag_invalid | float_flag_invalid_cvti;
1295        r = p->sign ? 0 : max;
1296        break;
1297
1298    case float_class_zero:
1299        return 0;
1300
1301    case float_class_normal:
1302    case float_class_denormal:
1303        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1304        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1305            flags = float_flag_inexact;
1306            if (p->cls == float_class_zero) {
1307                r = 0;
1308                break;
1309            }
1310        }
1311
1312        if (p->sign) {
1313            flags = float_flag_invalid | float_flag_invalid_cvti;
1314            r = 0;
1315        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1316            flags = float_flag_invalid | float_flag_invalid_cvti;
1317            r = max;
1318        } else {
1319            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1320            if (r > max) {
1321                flags = float_flag_invalid | float_flag_invalid_cvti;
1322                r = max;
1323            }
1324        }
1325        break;
1326
1327    default:
1328        g_assert_not_reached();
1329    }
1330
1331    float_raise(flags, s);
1332    return r;
1333}
1334
1335/*
1336 * Like partsN(float_to_sint), except do not saturate the result.
1337 * Instead, return the rounded unbounded precision two's compliment result,
1338 * modulo 2**(bitsm1 + 1).
1339 */
1340static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
1341                                            FloatRoundMode rmode,
1342                                            int bitsm1, float_status *s)
1343{
1344    int flags = 0;
1345    uint64_t r;
1346    bool overflow = false;
1347
1348    switch (p->cls) {
1349    case float_class_snan:
1350        flags |= float_flag_invalid_snan;
1351        /* fall through */
1352    case float_class_qnan:
1353        flags |= float_flag_invalid;
1354        r = 0;
1355        break;
1356
1357    case float_class_inf:
1358        overflow = true;
1359        r = 0;
1360        break;
1361
1362    case float_class_zero:
1363        return 0;
1364
1365    case float_class_normal:
1366    case float_class_denormal:
1367        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1368        if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
1369            flags = float_flag_inexact;
1370        }
1371
1372        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1373            /*
1374             * Because we rounded to integral, and exp < 64,
1375             * we know frac_low is zero.
1376             */
1377            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1378            if (p->exp < bitsm1) {
1379                /* Result in range. */
1380            } else if (p->exp == bitsm1) {
1381                /* The only in-range value is INT_MIN. */
1382                overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT;
1383            } else {
1384                overflow = true;
1385            }
1386        } else {
1387            /* Overflow, but there might still be bits to return. */
1388            int shl = p->exp - DECOMPOSED_BINARY_POINT;
1389            if (shl < N) {
1390                frac_shl(p, shl);
1391                r = p->frac_hi;
1392            } else {
1393                r = 0;
1394            }
1395            overflow = true;
1396        }
1397
1398        if (p->sign) {
1399            r = -r;
1400        }
1401        break;
1402
1403    default:
1404        g_assert_not_reached();
1405    }
1406
1407    if (overflow) {
1408        flags = float_flag_invalid | float_flag_invalid_cvti;
1409    }
1410    float_raise(flags, s);
1411    return r;
1412}
1413
1414/*
1415 * Integer to float conversions
1416 *
1417 * Returns the result of converting the two's complement integer `a'
1418 * to the floating-point format. The conversion is performed according
1419 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1420 */
1421static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1422                                  int scale, float_status *s)
1423{
1424    uint64_t f = a;
1425    int shift;
1426
1427    memset(p, 0, sizeof(*p));
1428
1429    if (a == 0) {
1430        p->cls = float_class_zero;
1431        return;
1432    }
1433
1434    p->cls = float_class_normal;
1435    if (a < 0) {
1436        f = -f;
1437        p->sign = true;
1438    }
1439    shift = clz64(f);
1440    scale = MIN(MAX(scale, -0x10000), 0x10000);
1441
1442    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1443    p->frac_hi = f << shift;
1444}
1445
1446/*
1447 * Unsigned Integer to float conversions
1448 *
1449 * Returns the result of converting the unsigned integer `a' to the
1450 * floating-point format. The conversion is performed according to the
1451 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1452 */
1453static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1454                                  int scale, float_status *status)
1455{
1456    memset(p, 0, sizeof(*p));
1457
1458    if (a == 0) {
1459        p->cls = float_class_zero;
1460    } else {
1461        int shift = clz64(a);
1462        scale = MIN(MAX(scale, -0x10000), 0x10000);
1463        p->cls = float_class_normal;
1464        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1465        p->frac_hi = a << shift;
1466    }
1467}
1468
1469/*
1470 * Float min/max.
1471 */
1472static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1473                                   float_status *s, int flags)
1474{
1475    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1476    int a_exp, b_exp, cmp;
1477
1478    if (unlikely(ab_mask & float_cmask_anynan)) {
1479        /*
1480         * For minNum/maxNum (IEEE 754-2008)
1481         * or minimumNumber/maximumNumber (IEEE 754-2019),
1482         * if one operand is a QNaN, and the other
1483         * operand is numerical, then return numerical argument.
1484         */
1485        if ((flags & (minmax_isnum | minmax_isnumber))
1486            && !(ab_mask & float_cmask_snan)
1487            && (ab_mask & ~float_cmask_qnan)) {
1488            if (ab_mask & float_cmask_denormal) {
1489                float_raise(float_flag_input_denormal_used, s);
1490            }
1491            return is_nan(a->cls) ? b : a;
1492        }
1493
1494        /*
1495         * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1496         * are removed and replaced with minimum, minimumNumber, maximum
1497         * and maximumNumber.
1498         * minimumNumber/maximumNumber behavior for SNaN is changed to:
1499         *   If both operands are NaNs, a QNaN is returned.
1500         *   If either operand is a SNaN,
1501         *   an invalid operation exception is signaled,
1502         *   but unless both operands are NaNs,
1503         *   the SNaN is otherwise ignored and not converted to a QNaN.
1504         */
1505        if ((flags & minmax_isnumber)
1506            && (ab_mask & float_cmask_snan)
1507            && (ab_mask & ~float_cmask_anynan)) {
1508            float_raise(float_flag_invalid, s);
1509            return is_nan(a->cls) ? b : a;
1510        }
1511
1512        return parts_pick_nan(a, b, s);
1513    }
1514
1515    if (ab_mask & float_cmask_denormal) {
1516        float_raise(float_flag_input_denormal_used, s);
1517    }
1518
1519    a_exp = a->exp;
1520    b_exp = b->exp;
1521
1522    if (unlikely(!cmask_is_only_normals(ab_mask))) {
1523        switch (a->cls) {
1524        case float_class_normal:
1525        case float_class_denormal:
1526            break;
1527        case float_class_inf:
1528            a_exp = INT16_MAX;
1529            break;
1530        case float_class_zero:
1531            a_exp = INT16_MIN;
1532            break;
1533        default:
1534            g_assert_not_reached();
1535        }
1536        switch (b->cls) {
1537        case float_class_normal:
1538        case float_class_denormal:
1539            break;
1540        case float_class_inf:
1541            b_exp = INT16_MAX;
1542            break;
1543        case float_class_zero:
1544            b_exp = INT16_MIN;
1545            break;
1546        default:
1547            g_assert_not_reached();
1548        }
1549    }
1550
1551    /* Compare magnitudes. */
1552    cmp = a_exp - b_exp;
1553    if (cmp == 0) {
1554        cmp = frac_cmp(a, b);
1555    }
1556
1557    /*
1558     * Take the sign into account.
1559     * For ismag, only do this if the magnitudes are equal.
1560     */
1561    if (!(flags & minmax_ismag) || cmp == 0) {
1562        if (a->sign != b->sign) {
1563            /* For differing signs, the negative operand is less. */
1564            cmp = a->sign ? -1 : 1;
1565        } else if (a->sign) {
1566            /* For two negative operands, invert the magnitude comparison. */
1567            cmp = -cmp;
1568        }
1569    }
1570
1571    if (flags & minmax_ismin) {
1572        cmp = -cmp;
1573    }
1574    return cmp < 0 ? b : a;
1575}
1576
1577/*
1578 * Floating point compare
1579 */
1580static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1581                                     float_status *s, bool is_quiet)
1582{
1583    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1584
1585    if (likely(cmask_is_only_normals(ab_mask))) {
1586        FloatRelation cmp;
1587
1588        if (ab_mask & float_cmask_denormal) {
1589            float_raise(float_flag_input_denormal_used, s);
1590        }
1591
1592        if (a->sign != b->sign) {
1593            goto a_sign;
1594        }
1595        if (a->exp == b->exp) {
1596            cmp = frac_cmp(a, b);
1597        } else if (a->exp < b->exp) {
1598            cmp = float_relation_less;
1599        } else {
1600            cmp = float_relation_greater;
1601        }
1602        if (a->sign) {
1603            cmp = -cmp;
1604        }
1605        return cmp;
1606    }
1607
1608    if (unlikely(ab_mask & float_cmask_anynan)) {
1609        if (ab_mask & float_cmask_snan) {
1610            float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1611        } else if (!is_quiet) {
1612            float_raise(float_flag_invalid, s);
1613        }
1614        return float_relation_unordered;
1615    }
1616
1617    if (ab_mask & float_cmask_denormal) {
1618        float_raise(float_flag_input_denormal_used, s);
1619    }
1620
1621    if (ab_mask & float_cmask_zero) {
1622        if (ab_mask == float_cmask_zero) {
1623            return float_relation_equal;
1624        } else if (a->cls == float_class_zero) {
1625            goto b_sign;
1626        } else {
1627            goto a_sign;
1628        }
1629    }
1630
1631    if (ab_mask == float_cmask_inf) {
1632        if (a->sign == b->sign) {
1633            return float_relation_equal;
1634        }
1635    } else if (b->cls == float_class_inf) {
1636        goto b_sign;
1637    } else {
1638        g_assert(a->cls == float_class_inf);
1639    }
1640
1641 a_sign:
1642    return a->sign ? float_relation_less : float_relation_greater;
1643 b_sign:
1644    return b->sign ? float_relation_greater : float_relation_less;
1645}
1646
1647/*
1648 * Multiply A by 2 raised to the power N.
1649 */
1650static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1651{
1652    switch (a->cls) {
1653    case float_class_snan:
1654    case float_class_qnan:
1655        parts_return_nan(a, s);
1656        break;
1657    case float_class_zero:
1658    case float_class_inf:
1659        break;
1660    case float_class_denormal:
1661        float_raise(float_flag_input_denormal_used, s);
1662        /* fall through */
1663    case float_class_normal:
1664        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1665        break;
1666    default:
1667        g_assert_not_reached();
1668    }
1669}
1670
1671/*
1672 * Return log2(A)
1673 */
1674static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1675{
1676    uint64_t a0, a1, r, t, ign;
1677    FloatPartsN f;
1678    int i, n, a_exp, f_exp;
1679
1680    if (unlikely(a->cls != float_class_normal)) {
1681        switch (a->cls) {
1682        case float_class_denormal:
1683            if (!a->sign) {
1684                /* -ve denormal will be InvalidOperation */
1685                float_raise(float_flag_input_denormal_used, s);
1686            }
1687            break;
1688        case float_class_snan:
1689        case float_class_qnan:
1690            parts_return_nan(a, s);
1691            return;
1692        case float_class_zero:
1693            float_raise(float_flag_divbyzero, s);
1694            /* log2(0) = -inf */
1695            a->cls = float_class_inf;
1696            a->sign = 1;
1697            return;
1698        case float_class_inf:
1699            if (unlikely(a->sign)) {
1700                goto d_nan;
1701            }
1702            return;
1703        default:
1704            g_assert_not_reached();
1705        }
1706    }
1707    if (unlikely(a->sign)) {
1708        goto d_nan;
1709    }
1710
1711    /* TODO: This algorithm looses bits too quickly for float128. */
1712    g_assert(N == 64);
1713
1714    a_exp = a->exp;
1715    f_exp = -1;
1716
1717    r = 0;
1718    t = DECOMPOSED_IMPLICIT_BIT;
1719    a0 = a->frac_hi;
1720    a1 = 0;
1721
1722    n = fmt->frac_size + 2;
1723    if (unlikely(a_exp == -1)) {
1724        /*
1725         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1726         * When the value is very close to 1.0, there are lots of 1's in
1727         * the msb parts of the fraction.  At the end, when we subtract
1728         * this value from -1.0, we can see a catastrophic loss of precision,
1729         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1730         * bits of y in the final result.  To minimize this, compute as many
1731         * digits as we can.
1732         * ??? This case needs another algorithm to avoid this.
1733         */
1734        n = fmt->frac_size * 2 + 2;
1735        /* Don't compute a value overlapping the sticky bit */
1736        n = MIN(n, 62);
1737    }
1738
1739    for (i = 0; i < n; i++) {
1740        if (a1) {
1741            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1742        } else if (a0 & 0xffffffffull) {
1743            mul64To128(a0, a0, &a0, &a1);
1744        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1745            a0 >>= 32;
1746            a0 *= a0;
1747        } else {
1748            goto exact;
1749        }
1750
1751        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1752            if (unlikely(a_exp == 0 && r == 0)) {
1753                /*
1754                 * When a_exp == 0, we're computing the log2 of a value
1755                 * [1.0,2.0).  When the value is very close to 1.0, there
1756                 * are lots of 0's in the msb parts of the fraction.
1757                 * We need to compute more digits to produce a correct
1758                 * result -- restart at the top of the fraction.
1759                 * ??? This is likely to lose precision quickly, as for
1760                 * float128; we may need another method.
1761                 */
1762                f_exp -= i;
1763                t = r = DECOMPOSED_IMPLICIT_BIT;
1764                i = 0;
1765            } else {
1766                r |= t;
1767            }
1768        } else {
1769            add128(a0, a1, a0, a1, &a0, &a1);
1770        }
1771        t >>= 1;
1772    }
1773
1774    /* Set sticky for inexact. */
1775    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1776
1777 exact:
1778    parts_sint_to_float(a, a_exp, 0, s);
1779    if (r == 0) {
1780        return;
1781    }
1782
1783    memset(&f, 0, sizeof(f));
1784    f.cls = float_class_normal;
1785    f.frac_hi = r;
1786    f.exp = f_exp - frac_normalize(&f);
1787
1788    if (a_exp < 0) {
1789        parts_sub_normal(a, &f);
1790    } else if (a_exp > 0) {
1791        parts_add_normal(a, &f);
1792    } else {
1793        *a = f;
1794    }
1795    return;
1796
1797 d_nan:
1798    float_raise(float_flag_invalid, s);
1799    parts_default_nan(a, s);
1800}
1801