xref: /openbmc/qemu/fpu/softfloat-parts.c.inc (revision 09951f5a27a7f8633118c1808cf17e66b30c3c62)
1/*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 *  the SoftFloat-2a license
10 *  the BSD license
11 *  GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19{
20    switch (a->cls) {
21    case float_class_snan:
22        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
23        if (s->default_nan_mode) {
24            parts_default_nan(a, s);
25        } else {
26            parts_silence_nan(a, s);
27        }
28        break;
29    case float_class_qnan:
30        if (s->default_nan_mode) {
31            parts_default_nan(a, s);
32        }
33        break;
34    default:
35        g_assert_not_reached();
36    }
37}
38
39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40                                     float_status *s)
41{
42    bool have_snan = false;
43    FloatPartsN *ret;
44    int cmp;
45
46    if (is_snan(a->cls) || is_snan(b->cls)) {
47        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
48        have_snan = true;
49    }
50
51    if (s->default_nan_mode) {
52        parts_default_nan(a, s);
53        return a;
54    }
55
56    switch (s->float_2nan_prop_rule) {
57    case float_2nan_prop_s_ab:
58        if (have_snan) {
59            ret = is_snan(a->cls) ? a : b;
60            break;
61        }
62        /* fall through */
63    case float_2nan_prop_ab:
64        ret = is_nan(a->cls) ? a : b;
65        break;
66    case float_2nan_prop_s_ba:
67        if (have_snan) {
68            ret = is_snan(b->cls) ? b : a;
69            break;
70        }
71        /* fall through */
72    case float_2nan_prop_ba:
73        ret = is_nan(b->cls) ? b : a;
74        break;
75    case float_2nan_prop_x87:
76        /*
77         * This implements x87 NaN propagation rules:
78         * SNaN + QNaN => return the QNaN
79         * two SNaNs => return the one with the larger significand, silenced
80         * two QNaNs => return the one with the larger significand
81         * SNaN and a non-NaN => return the SNaN, silenced
82         * QNaN and a non-NaN => return the QNaN
83         *
84         * If we get down to comparing significands and they are the same,
85         * return the NaN with the positive sign bit (if any).
86         */
87        if (is_snan(a->cls)) {
88            if (!is_snan(b->cls)) {
89                ret = is_qnan(b->cls) ? b : a;
90                break;
91            }
92        } else if (is_qnan(a->cls)) {
93            if (is_snan(b->cls) || !is_qnan(b->cls)) {
94                ret = a;
95                break;
96            }
97        } else {
98            ret = b;
99            break;
100        }
101        cmp = frac_cmp(a, b);
102        if (cmp == 0) {
103            cmp = a->sign < b->sign;
104        }
105        ret = cmp > 0 ? a : b;
106        break;
107    default:
108        g_assert_not_reached();
109    }
110
111    if (is_snan(ret->cls)) {
112        parts_silence_nan(ret, s);
113    }
114    return ret;
115}
116
117static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
118                                            FloatPartsN *c, float_status *s,
119                                            int ab_mask, int abc_mask)
120{
121    bool infzero = (ab_mask == float_cmask_infzero);
122    bool have_snan = (abc_mask & float_cmask_snan);
123    FloatPartsN *ret;
124
125    if (unlikely(have_snan)) {
126        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
127    }
128
129    if (infzero &&
130        !(s->float_infzeronan_rule & float_infzeronan_suppress_invalid)) {
131        /* This is (0 * inf) + NaN or (inf * 0) + NaN */
132        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
133    }
134
135    if (s->default_nan_mode) {
136        /*
137         * We guarantee not to require the target to tell us how to
138         * pick a NaN if we're always returning the default NaN.
139         * But if we're not in default-NaN mode then the target must
140         * specify.
141         */
142        goto default_nan;
143    } else if (infzero) {
144        /*
145         * Inf * 0 + NaN -- some implementations return the
146         * default NaN here, and some return the input NaN.
147         */
148        switch (s->float_infzeronan_rule & ~float_infzeronan_suppress_invalid) {
149        case float_infzeronan_dnan_never:
150            break;
151        case float_infzeronan_dnan_always:
152            goto default_nan;
153        case float_infzeronan_dnan_if_qnan:
154            if (is_qnan(c->cls)) {
155                goto default_nan;
156            }
157            break;
158        default:
159            g_assert_not_reached();
160        }
161        ret = c;
162    } else {
163        FloatPartsN *val[R_3NAN_1ST_MASK + 1] = { a, b, c };
164        Float3NaNPropRule rule = s->float_3nan_prop_rule;
165
166        assert(rule != float_3nan_prop_none);
167        if (have_snan && (rule & R_3NAN_SNAN_MASK)) {
168            /* We have at least one SNaN input and should prefer it */
169            do {
170                ret = val[rule & R_3NAN_1ST_MASK];
171                rule >>= R_3NAN_1ST_LENGTH;
172            } while (!is_snan(ret->cls));
173        } else {
174            do {
175                ret = val[rule & R_3NAN_1ST_MASK];
176                rule >>= R_3NAN_1ST_LENGTH;
177            } while (!is_nan(ret->cls));
178        }
179    }
180
181    if (is_snan(ret->cls)) {
182        parts_silence_nan(ret, s);
183    }
184    return ret;
185
186 default_nan:
187    parts_default_nan(a, s);
188    return a;
189}
190
191/*
192 * Canonicalize the FloatParts structure.  Determine the class,
193 * unbias the exponent, and normalize the fraction.
194 */
195static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
196                                 const FloatFmt *fmt)
197{
198    /*
199     * It's target-dependent how to handle the case of exponent 0
200     * and Integer bit set. Intel calls these "pseudodenormals",
201     * and treats them as if the integer bit was 0, and never
202     * produces them on output. This is the default behaviour for QEMU.
203     * For m68k, the integer bit is considered validly part of the
204     * input value when the exponent is 0, and may be 0 or 1,
205     * giving extra range. They may also be generated as outputs.
206     * (The m68k manual actually calls these values part of the
207     * normalized number range, not the denormalized number range,
208     * but that distinction is not important for us, because
209     * m68k doesn't care about the input_denormal_used status flag.)
210     * floatx80_pseudo_denormal_valid selects the m68k behaviour,
211     * which changes both how we canonicalize such a value and
212     * how we uncanonicalize results.
213     */
214    bool has_pseudo_denormals = fmt->has_explicit_bit &&
215        (status->floatx80_behaviour & floatx80_pseudo_denormal_valid);
216
217    if (unlikely(p->exp == 0)) {
218        if (likely(frac_eqz(p))) {
219            p->cls = float_class_zero;
220        } else if (status->flush_inputs_to_zero) {
221            float_raise(float_flag_input_denormal_flushed, status);
222            p->cls = float_class_zero;
223            frac_clear(p);
224        } else {
225            int shift = frac_normalize(p);
226            p->cls = float_class_denormal;
227            p->exp = fmt->frac_shift - fmt->exp_bias
228                   - shift + !has_pseudo_denormals;
229        }
230    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
231        p->cls = float_class_normal;
232        p->exp -= fmt->exp_bias;
233        frac_shl(p, fmt->frac_shift);
234        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
235    } else if (likely(frac_eqz(p))) {
236        p->cls = float_class_inf;
237    } else {
238        frac_shl(p, fmt->frac_shift);
239        p->cls = (parts_is_snan_frac(p->frac_hi, status)
240                  ? float_class_snan : float_class_qnan);
241    }
242}
243
244/*
245 * Round and uncanonicalize a floating-point number by parts. There
246 * are FRAC_SHIFT bits that may require rounding at the bottom of the
247 * fraction; these bits will be removed. The exponent will be biased
248 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
249 */
250static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
251                                   const FloatFmt *fmt)
252{
253    const int exp_max = fmt->exp_max;
254    const int frac_shift = fmt->frac_shift;
255    const uint64_t round_mask = fmt->round_mask;
256    const uint64_t frac_lsb = round_mask + 1;
257    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
258    const uint64_t roundeven_mask = round_mask | frac_lsb;
259    uint64_t inc;
260    bool overflow_norm = false;
261    int exp, flags = 0;
262
263    switch (s->float_rounding_mode) {
264    case float_round_nearest_even_max:
265        overflow_norm = true;
266        /* fall through */
267    case float_round_nearest_even:
268        if (N > 64 && frac_lsb == 0) {
269            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
270                   ? frac_lsbm1 : 0);
271        } else {
272            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
273                   ? frac_lsbm1 : 0);
274        }
275        break;
276    case float_round_ties_away:
277        inc = frac_lsbm1;
278        break;
279    case float_round_to_zero:
280        overflow_norm = true;
281        inc = 0;
282        break;
283    case float_round_up:
284        inc = p->sign ? 0 : round_mask;
285        overflow_norm = p->sign;
286        break;
287    case float_round_down:
288        inc = p->sign ? round_mask : 0;
289        overflow_norm = !p->sign;
290        break;
291    case float_round_to_odd:
292        overflow_norm = true;
293        /* fall through */
294    case float_round_to_odd_inf:
295        if (N > 64 && frac_lsb == 0) {
296            inc = p->frac_hi & 1 ? 0 : round_mask;
297        } else {
298            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
299        }
300        break;
301    default:
302        g_assert_not_reached();
303    }
304
305    exp = p->exp + fmt->exp_bias;
306    if (likely(exp > 0)) {
307        if (p->frac_lo & round_mask) {
308            flags |= float_flag_inexact;
309            if (frac_addi(p, p, inc)) {
310                frac_shr(p, 1);
311                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
312                exp++;
313            }
314            p->frac_lo &= ~round_mask;
315        }
316
317        if (fmt->arm_althp) {
318            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
319            if (unlikely(exp > exp_max)) {
320                /* Overflow.  Return the maximum normal.  */
321                flags = float_flag_invalid;
322                exp = exp_max;
323                frac_allones(p);
324                p->frac_lo &= ~round_mask;
325            }
326        } else if (unlikely(exp >= exp_max)) {
327            flags |= float_flag_overflow;
328            if (s->rebias_overflow) {
329                exp -= fmt->exp_re_bias;
330            } else if (overflow_norm) {
331                flags |= float_flag_inexact;
332                exp = exp_max - 1;
333                frac_allones(p);
334                p->frac_lo &= ~round_mask;
335            } else {
336                flags |= float_flag_inexact;
337                p->cls = float_class_inf;
338                exp = exp_max;
339                frac_clear(p);
340            }
341        }
342        frac_shr(p, frac_shift);
343    } else if (unlikely(s->rebias_underflow)) {
344        flags |= float_flag_underflow;
345        exp += fmt->exp_re_bias;
346        if (p->frac_lo & round_mask) {
347            flags |= float_flag_inexact;
348            if (frac_addi(p, p, inc)) {
349                frac_shr(p, 1);
350                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
351                exp++;
352            }
353            p->frac_lo &= ~round_mask;
354        }
355        frac_shr(p, frac_shift);
356    } else if (s->flush_to_zero &&
357               s->ftz_detection == float_ftz_before_rounding) {
358        flags |= float_flag_output_denormal_flushed;
359        p->cls = float_class_zero;
360        exp = 0;
361        frac_clear(p);
362    } else {
363        bool is_tiny = s->tininess_before_rounding || exp < 0;
364        bool has_pseudo_denormals = fmt->has_explicit_bit &&
365            (s->floatx80_behaviour & floatx80_pseudo_denormal_valid);
366
367        if (!is_tiny) {
368            FloatPartsN discard;
369            is_tiny = !frac_addi(&discard, p, inc);
370        }
371
372        frac_shrjam(p, !has_pseudo_denormals - exp);
373
374        if (p->frac_lo & round_mask) {
375            /* Need to recompute round-to-even/round-to-odd. */
376            switch (s->float_rounding_mode) {
377            case float_round_nearest_even:
378                if (N > 64 && frac_lsb == 0) {
379                    inc = ((p->frac_hi & 1) ||
380                           (p->frac_lo & round_mask) != frac_lsbm1
381                           ? frac_lsbm1 : 0);
382                } else {
383                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
384                           ? frac_lsbm1 : 0);
385                }
386                break;
387            case float_round_to_odd:
388            case float_round_to_odd_inf:
389                if (N > 64 && frac_lsb == 0) {
390                    inc = p->frac_hi & 1 ? 0 : round_mask;
391                } else {
392                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
393                }
394                break;
395            default:
396                break;
397            }
398            flags |= float_flag_inexact;
399            frac_addi(p, p, inc);
400            p->frac_lo &= ~round_mask;
401        }
402
403        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !has_pseudo_denormals;
404        frac_shr(p, frac_shift);
405
406        if (is_tiny) {
407            if (s->flush_to_zero) {
408                assert(s->ftz_detection == float_ftz_after_rounding);
409                flags |= float_flag_output_denormal_flushed;
410                p->cls = float_class_zero;
411                exp = 0;
412                frac_clear(p);
413            } else if (flags & float_flag_inexact) {
414                flags |= float_flag_underflow;
415            }
416            if (exp == 0 && frac_eqz(p)) {
417                p->cls = float_class_zero;
418            }
419        }
420    }
421    p->exp = exp;
422    float_raise(flags, s);
423}
424
425static void partsN(uncanon)(FloatPartsN *p, float_status *s,
426                            const FloatFmt *fmt)
427{
428    if (likely(is_anynorm(p->cls))) {
429        parts_uncanon_normal(p, s, fmt);
430    } else {
431        switch (p->cls) {
432        case float_class_zero:
433            p->exp = 0;
434            frac_clear(p);
435            return;
436        case float_class_inf:
437            g_assert(!fmt->arm_althp);
438            p->exp = fmt->exp_max;
439            frac_clear(p);
440            return;
441        case float_class_qnan:
442        case float_class_snan:
443            g_assert(!fmt->arm_althp);
444            p->exp = fmt->exp_max;
445            frac_shr(p, fmt->frac_shift);
446            return;
447        default:
448            break;
449        }
450        g_assert_not_reached();
451    }
452}
453
454/*
455 * Returns the result of adding or subtracting the values of the
456 * floating-point values `a' and `b'. The operation is performed
457 * according to the IEC/IEEE Standard for Binary Floating-Point
458 * Arithmetic.
459 */
460static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
461                                   float_status *s, bool subtract)
462{
463    bool b_sign = b->sign ^ subtract;
464    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
465
466    /*
467     * For addition and subtraction, we will consume an
468     * input denormal unless the other input is a NaN.
469     */
470    if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) ==
471        float_cmask_denormal) {
472        float_raise(float_flag_input_denormal_used, s);
473    }
474
475    if (a->sign != b_sign) {
476        /* Subtraction */
477        if (likely(cmask_is_only_normals(ab_mask))) {
478            if (parts_sub_normal(a, b)) {
479                return a;
480            }
481            /* Subtract was exact, fall through to set sign. */
482            ab_mask = float_cmask_zero;
483        }
484
485        if (ab_mask == float_cmask_zero) {
486            a->sign = s->float_rounding_mode == float_round_down;
487            return a;
488        }
489
490        if (unlikely(ab_mask & float_cmask_anynan)) {
491            goto p_nan;
492        }
493
494        if (ab_mask & float_cmask_inf) {
495            if (a->cls != float_class_inf) {
496                /* N - Inf */
497                goto return_b;
498            }
499            if (b->cls != float_class_inf) {
500                /* Inf - N */
501                return a;
502            }
503            /* Inf - Inf */
504            float_raise(float_flag_invalid | float_flag_invalid_isi, s);
505            parts_default_nan(a, s);
506            return a;
507        }
508    } else {
509        /* Addition */
510        if (likely(cmask_is_only_normals(ab_mask))) {
511            parts_add_normal(a, b);
512            return a;
513        }
514
515        if (ab_mask == float_cmask_zero) {
516            return a;
517        }
518
519        if (unlikely(ab_mask & float_cmask_anynan)) {
520            goto p_nan;
521        }
522
523        if (ab_mask & float_cmask_inf) {
524            a->cls = float_class_inf;
525            return a;
526        }
527    }
528
529    if (b->cls == float_class_zero) {
530        g_assert(is_anynorm(a->cls));
531        return a;
532    }
533
534    g_assert(a->cls == float_class_zero);
535    g_assert(is_anynorm(b->cls));
536 return_b:
537    b->sign = b_sign;
538    return b;
539
540 p_nan:
541    return parts_pick_nan(a, b, s);
542}
543
544/*
545 * Returns the result of multiplying the floating-point values `a' and
546 * `b'. The operation is performed according to the IEC/IEEE Standard
547 * for Binary Floating-Point Arithmetic.
548 */
549static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
550                                float_status *s)
551{
552    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
553    bool sign = a->sign ^ b->sign;
554
555    if (likely(cmask_is_only_normals(ab_mask))) {
556        FloatPartsW tmp;
557
558        if (ab_mask & float_cmask_denormal) {
559            float_raise(float_flag_input_denormal_used, s);
560        }
561
562        frac_mulw(&tmp, a, b);
563        frac_truncjam(a, &tmp);
564
565        a->exp += b->exp + 1;
566        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
567            frac_add(a, a, a);
568            a->exp -= 1;
569        }
570
571        a->sign = sign;
572        return a;
573    }
574
575    /* Inf * Zero == NaN */
576    if (unlikely(ab_mask == float_cmask_infzero)) {
577        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
578        parts_default_nan(a, s);
579        return a;
580    }
581
582    if (unlikely(ab_mask & float_cmask_anynan)) {
583        return parts_pick_nan(a, b, s);
584    }
585
586    /* Multiply by 0 or Inf */
587    if (ab_mask & float_cmask_denormal) {
588        float_raise(float_flag_input_denormal_used, s);
589    }
590
591    if (ab_mask & float_cmask_inf) {
592        a->cls = float_class_inf;
593        a->sign = sign;
594        return a;
595    }
596
597    g_assert(ab_mask & float_cmask_zero);
598    a->cls = float_class_zero;
599    a->sign = sign;
600    return a;
601}
602
603/*
604 * Returns the result of multiplying the floating-point values `a' and
605 * `b' then adding 'c', with no intermediate rounding step after the
606 * multiplication. The operation is performed according to the
607 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
608 * The flags argument allows the caller to select negation of the
609 * addend, the intermediate product, or the final result. (The
610 * difference between this and having the caller do a separate
611 * negation is that negating externally will flip the sign bit on NaNs.)
612 *
613 * Requires A and C extracted into a double-sized structure to provide the
614 * extra space for the widening multiply.
615 */
616static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
617                                          FloatPartsN *c, int scale,
618                                          int flags, float_status *s)
619{
620    int ab_mask, abc_mask;
621    FloatPartsW p_widen, c_widen;
622
623    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
624    abc_mask = float_cmask(c->cls) | ab_mask;
625
626    /*
627     * It is implementation-defined whether the cases of (0,inf,qnan)
628     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
629     * they return if they do), so we have to hand this information
630     * off to the target-specific pick-a-NaN routine.
631     */
632    if (unlikely(abc_mask & float_cmask_anynan)) {
633        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
634    }
635
636    if (flags & float_muladd_negate_c) {
637        c->sign ^= 1;
638    }
639
640    /* Compute the sign of the product into A. */
641    a->sign ^= b->sign;
642    if (flags & float_muladd_negate_product) {
643        a->sign ^= 1;
644    }
645
646    if (unlikely(!cmask_is_only_normals(ab_mask))) {
647        if (unlikely(ab_mask == float_cmask_infzero)) {
648            float_raise(float_flag_invalid | float_flag_invalid_imz, s);
649            goto d_nan;
650        }
651
652        if (ab_mask & float_cmask_inf) {
653            if (c->cls == float_class_inf && a->sign != c->sign) {
654                float_raise(float_flag_invalid | float_flag_invalid_isi, s);
655                goto d_nan;
656            }
657            goto return_inf;
658        }
659
660        g_assert(ab_mask & float_cmask_zero);
661        if (is_anynorm(c->cls)) {
662            *a = *c;
663            goto return_normal;
664        }
665        if (c->cls == float_class_zero) {
666            if (flags & float_muladd_suppress_add_product_zero) {
667                a->sign = c->sign;
668            } else if (a->sign != c->sign) {
669                goto return_sub_zero;
670            }
671            goto return_zero;
672        }
673        g_assert(c->cls == float_class_inf);
674    }
675
676    if (unlikely(c->cls == float_class_inf)) {
677        a->sign = c->sign;
678        goto return_inf;
679    }
680
681    /* Perform the multiplication step. */
682    p_widen.sign = a->sign;
683    p_widen.exp = a->exp + b->exp + 1;
684    frac_mulw(&p_widen, a, b);
685    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
686        frac_add(&p_widen, &p_widen, &p_widen);
687        p_widen.exp -= 1;
688    }
689
690    /* Perform the addition step. */
691    if (c->cls != float_class_zero) {
692        /* Zero-extend C to less significant bits. */
693        frac_widen(&c_widen, c);
694        c_widen.exp = c->exp;
695
696        if (a->sign == c->sign) {
697            parts_add_normal(&p_widen, &c_widen);
698        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
699            goto return_sub_zero;
700        }
701    }
702
703    /* Narrow with sticky bit, for proper rounding later. */
704    frac_truncjam(a, &p_widen);
705    a->sign = p_widen.sign;
706    a->exp = p_widen.exp;
707
708 return_normal:
709    a->exp += scale;
710 finish_sign:
711    if (flags & float_muladd_negate_result) {
712        a->sign ^= 1;
713    }
714
715    /*
716     * All result types except for "return the default NaN
717     * because this is an Invalid Operation" go through here;
718     * this matches the set of cases where we consumed a
719     * denormal input.
720     */
721    if (abc_mask & float_cmask_denormal) {
722        float_raise(float_flag_input_denormal_used, s);
723    }
724    return a;
725
726 return_sub_zero:
727    a->sign = s->float_rounding_mode == float_round_down;
728 return_zero:
729    a->cls = float_class_zero;
730    goto finish_sign;
731
732 return_inf:
733    a->cls = float_class_inf;
734    goto finish_sign;
735
736 d_nan:
737    parts_default_nan(a, s);
738    return a;
739}
740
741/*
742 * Returns the result of dividing the floating-point value `a' by the
743 * corresponding value `b'. The operation is performed according to
744 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
745 */
746static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
747                                float_status *s)
748{
749    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
750    bool sign = a->sign ^ b->sign;
751
752    if (likely(cmask_is_only_normals(ab_mask))) {
753        if (ab_mask & float_cmask_denormal) {
754            float_raise(float_flag_input_denormal_used, s);
755        }
756        a->sign = sign;
757        a->exp -= b->exp + frac_div(a, b);
758        return a;
759    }
760
761    /* 0/0 or Inf/Inf => NaN */
762    if (unlikely(ab_mask == float_cmask_zero)) {
763        float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
764        goto d_nan;
765    }
766    if (unlikely(ab_mask == float_cmask_inf)) {
767        float_raise(float_flag_invalid | float_flag_invalid_idi, s);
768        goto d_nan;
769    }
770
771    /* All the NaN cases */
772    if (unlikely(ab_mask & float_cmask_anynan)) {
773        return parts_pick_nan(a, b, s);
774    }
775
776    if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) {
777        float_raise(float_flag_input_denormal_used, s);
778    }
779
780    a->sign = sign;
781
782    /* Inf / X */
783    if (a->cls == float_class_inf) {
784        return a;
785    }
786
787    /* 0 / X */
788    if (a->cls == float_class_zero) {
789        return a;
790    }
791
792    /* X / Inf */
793    if (b->cls == float_class_inf) {
794        a->cls = float_class_zero;
795        return a;
796    }
797
798    /* X / 0 => Inf */
799    g_assert(b->cls == float_class_zero);
800    float_raise(float_flag_divbyzero, s);
801    a->cls = float_class_inf;
802    return a;
803
804 d_nan:
805    parts_default_nan(a, s);
806    return a;
807}
808
809/*
810 * Floating point remainder, per IEC/IEEE, or modulus.
811 */
812static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
813                                   uint64_t *mod_quot, float_status *s)
814{
815    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
816
817    if (likely(cmask_is_only_normals(ab_mask))) {
818        if (ab_mask & float_cmask_denormal) {
819            float_raise(float_flag_input_denormal_used, s);
820        }
821        frac_modrem(a, b, mod_quot);
822        return a;
823    }
824
825    if (mod_quot) {
826        *mod_quot = 0;
827    }
828
829    /* All the NaN cases */
830    if (unlikely(ab_mask & float_cmask_anynan)) {
831        return parts_pick_nan(a, b, s);
832    }
833
834    /* Inf % N; N % 0 */
835    if (a->cls == float_class_inf || b->cls == float_class_zero) {
836        float_raise(float_flag_invalid, s);
837        parts_default_nan(a, s);
838        return a;
839    }
840
841    if (ab_mask & float_cmask_denormal) {
842        float_raise(float_flag_input_denormal_used, s);
843    }
844
845    /* N % Inf; 0 % N */
846    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
847    return a;
848}
849
850/*
851 * Square Root
852 *
853 * The base algorithm is lifted from
854 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
855 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
856 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
857 * and is thus MIT licenced.
858 */
859static void partsN(sqrt)(FloatPartsN *a, float_status *status,
860                         const FloatFmt *fmt)
861{
862    const uint32_t three32 = 3u << 30;
863    const uint64_t three64 = 3ull << 62;
864    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
865    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
866    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
867    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
868    uint64_t discard;
869    bool exp_odd;
870    size_t index;
871
872    if (unlikely(a->cls != float_class_normal)) {
873        switch (a->cls) {
874        case float_class_denormal:
875            if (!a->sign) {
876                /* -ve denormal will be InvalidOperation */
877                float_raise(float_flag_input_denormal_used, status);
878            }
879            break;
880        case float_class_snan:
881        case float_class_qnan:
882            parts_return_nan(a, status);
883            return;
884        case float_class_zero:
885            return;
886        case float_class_inf:
887            if (unlikely(a->sign)) {
888                goto d_nan;
889            }
890            return;
891        default:
892            g_assert_not_reached();
893        }
894    }
895
896    if (unlikely(a->sign)) {
897        goto d_nan;
898    }
899
900    /*
901     * Argument reduction.
902     * x = 4^e frac; with integer e, and frac in [1, 4)
903     * m = frac fixed point at bit 62, since we're in base 4.
904     * If base-2 exponent is odd, exchange that for multiply by 2,
905     * which results in no shift.
906     */
907    exp_odd = a->exp & 1;
908    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
909    if (!exp_odd) {
910        frac_shr(a, 1);
911    }
912
913    /*
914     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
915     *
916     * Initial estimate:
917     * 7-bit lookup table (1-bit exponent and 6-bit significand).
918     *
919     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
920     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
921     * a table lookup is faster and needs one less iteration.
922     * The 7-bit table gives |e| < 0x1.fdp-9.
923     *
924     * A Newton-Raphson iteration for r is
925     *   s = m*r
926     *   d = s*r
927     *   u = 3 - d
928     *   r = r*u/2
929     *
930     * Fixed point representations:
931     *   m, s, d, u, three are all 2.30; r is 0.32
932     */
933    m64 = a->frac_hi;
934    m32 = m64 >> 32;
935
936    r32 = rsqrt_tab[index] << 16;
937    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
938
939    s32 = ((uint64_t)m32 * r32) >> 32;
940    d32 = ((uint64_t)s32 * r32) >> 32;
941    u32 = three32 - d32;
942
943    if (N == 64) {
944        /* float64 or smaller */
945
946        r32 = ((uint64_t)r32 * u32) >> 31;
947        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
948
949        s32 = ((uint64_t)m32 * r32) >> 32;
950        d32 = ((uint64_t)s32 * r32) >> 32;
951        u32 = three32 - d32;
952
953        if (fmt->frac_size <= 23) {
954            /* float32 or smaller */
955
956            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
957            s32 = (s32 - 1) >> 6;               /* 9.23 */
958            /* s < sqrt(m) < s + 0x1.08p-23 */
959
960            /* compute nearest rounded result to 2.23 bits */
961            uint32_t d0 = (m32 << 16) - s32 * s32;
962            uint32_t d1 = s32 - d0;
963            uint32_t d2 = d1 + s32 + 1;
964            s32 += d1 >> 31;
965            a->frac_hi = (uint64_t)s32 << (64 - 25);
966
967            /* increment or decrement for inexact */
968            if (d2 != 0) {
969                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
970            }
971            goto done;
972        }
973
974        /* float64 */
975
976        r64 = (uint64_t)r32 * u32 * 2;
977        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
978        mul64To128(m64, r64, &s64, &discard);
979        mul64To128(s64, r64, &d64, &discard);
980        u64 = three64 - d64;
981
982        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
983        s64 = (s64 - 2) >> 9;                  /* 12.52 */
984
985        /* Compute nearest rounded result */
986        uint64_t d0 = (m64 << 42) - s64 * s64;
987        uint64_t d1 = s64 - d0;
988        uint64_t d2 = d1 + s64 + 1;
989        s64 += d1 >> 63;
990        a->frac_hi = s64 << (64 - 54);
991
992        /* increment or decrement for inexact */
993        if (d2 != 0) {
994            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
995        }
996        goto done;
997    }
998
999    r64 = (uint64_t)r32 * u32 * 2;
1000    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
1001
1002    mul64To128(m64, r64, &s64, &discard);
1003    mul64To128(s64, r64, &d64, &discard);
1004    u64 = three64 - d64;
1005    mul64To128(u64, r64, &r64, &discard);
1006    r64 <<= 1;
1007    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
1008
1009    mul64To128(m64, r64, &s64, &discard);
1010    mul64To128(s64, r64, &d64, &discard);
1011    u64 = three64 - d64;
1012    mul64To128(u64, r64, &rh, &rl);
1013    add128(rh, rl, rh, rl, &rh, &rl);
1014    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
1015
1016    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
1017    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
1018    sub128(three64, 0, dh, dl, &uh, &ul);
1019    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
1020    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
1021
1022    sub128(sh, sl, 0, 4, &sh, &sl);
1023    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
1024    /* s < sqrt(m) < s + 1ulp */
1025
1026    /* Compute nearest rounded result */
1027    mul64To128(sl, sl, &d0h, &d0l);
1028    d0h += 2 * sh * sl;
1029    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
1030    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
1031    add128(sh, sl, 0, 1, &d2h, &d2l);
1032    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
1033    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
1034    shift128Left(sh, sl, 128 - 114, &sh, &sl);
1035
1036    /* increment or decrement for inexact */
1037    if (d2h | d2l) {
1038        if ((int64_t)(d1h ^ d2h) < 0) {
1039            sub128(sh, sl, 0, 1, &sh, &sl);
1040        } else {
1041            add128(sh, sl, 0, 1, &sh, &sl);
1042        }
1043    }
1044    a->frac_lo = sl;
1045    a->frac_hi = sh;
1046
1047 done:
1048    /* Convert back from base 4 to base 2. */
1049    a->exp >>= 1;
1050    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
1051        frac_add(a, a, a);
1052    } else {
1053        a->exp += 1;
1054    }
1055    return;
1056
1057 d_nan:
1058    float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
1059    parts_default_nan(a, status);
1060}
1061
1062/*
1063 * Rounds the floating-point value `a' to an integer, and returns the
1064 * result as a floating-point value. The operation is performed
1065 * according to the IEC/IEEE Standard for Binary Floating-Point
1066 * Arithmetic.
1067 *
1068 * parts_round_to_int_normal is an internal helper function for
1069 * normal numbers only, returning true for inexact but not directly
1070 * raising float_flag_inexact.
1071 */
1072static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
1073                                        int scale, int frac_size)
1074{
1075    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
1076    int shift_adj;
1077
1078    scale = MIN(MAX(scale, -0x10000), 0x10000);
1079    a->exp += scale;
1080
1081    if (a->exp < 0) {
1082        bool one;
1083
1084        /* All fractional */
1085        switch (rmode) {
1086        case float_round_nearest_even:
1087            one = false;
1088            if (a->exp == -1) {
1089                FloatPartsN tmp;
1090                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
1091                frac_add(&tmp, a, a);
1092                /* Anything remaining means frac > 0.5. */
1093                one = !frac_eqz(&tmp);
1094            }
1095            break;
1096        case float_round_ties_away:
1097            one = a->exp == -1;
1098            break;
1099        case float_round_to_zero:
1100            one = false;
1101            break;
1102        case float_round_up:
1103            one = !a->sign;
1104            break;
1105        case float_round_down:
1106            one = a->sign;
1107            break;
1108        case float_round_to_odd:
1109            one = true;
1110            break;
1111        default:
1112            g_assert_not_reached();
1113        }
1114
1115        frac_clear(a);
1116        a->exp = 0;
1117        if (one) {
1118            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
1119        } else {
1120            a->cls = float_class_zero;
1121        }
1122        return true;
1123    }
1124
1125    if (a->exp >= frac_size) {
1126        /* All integral */
1127        return false;
1128    }
1129
1130    if (N > 64 && a->exp < N - 64) {
1131        /*
1132         * Rounding is not in the low word -- shift lsb to bit 2,
1133         * which leaves room for sticky and rounding bit.
1134         */
1135        shift_adj = (N - 1) - (a->exp + 2);
1136        frac_shrjam(a, shift_adj);
1137        frac_lsb = 1 << 2;
1138    } else {
1139        shift_adj = 0;
1140        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
1141    }
1142
1143    frac_lsbm1 = frac_lsb >> 1;
1144    rnd_mask = frac_lsb - 1;
1145    rnd_even_mask = rnd_mask | frac_lsb;
1146
1147    if (!(a->frac_lo & rnd_mask)) {
1148        /* Fractional bits already clear, undo the shift above. */
1149        frac_shl(a, shift_adj);
1150        return false;
1151    }
1152
1153    switch (rmode) {
1154    case float_round_nearest_even:
1155        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1156        break;
1157    case float_round_ties_away:
1158        inc = frac_lsbm1;
1159        break;
1160    case float_round_to_zero:
1161        inc = 0;
1162        break;
1163    case float_round_up:
1164        inc = a->sign ? 0 : rnd_mask;
1165        break;
1166    case float_round_down:
1167        inc = a->sign ? rnd_mask : 0;
1168        break;
1169    case float_round_to_odd:
1170        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
1171        break;
1172    default:
1173        g_assert_not_reached();
1174    }
1175
1176    if (shift_adj == 0) {
1177        if (frac_addi(a, a, inc)) {
1178            frac_shr(a, 1);
1179            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
1180            a->exp++;
1181        }
1182        a->frac_lo &= ~rnd_mask;
1183    } else {
1184        frac_addi(a, a, inc);
1185        a->frac_lo &= ~rnd_mask;
1186        /* Be careful shifting back, not to overflow */
1187        frac_shl(a, shift_adj - 1);
1188        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1189            a->exp++;
1190        } else {
1191            frac_add(a, a, a);
1192        }
1193    }
1194    return true;
1195}
1196
1197static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1198                                 int scale, float_status *s,
1199                                 const FloatFmt *fmt)
1200{
1201    switch (a->cls) {
1202    case float_class_qnan:
1203    case float_class_snan:
1204        parts_return_nan(a, s);
1205        break;
1206    case float_class_zero:
1207    case float_class_inf:
1208        break;
1209    case float_class_normal:
1210    case float_class_denormal:
1211        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1212            float_raise(float_flag_inexact, s);
1213        }
1214        break;
1215    default:
1216        g_assert_not_reached();
1217    }
1218}
1219
1220/*
1221 * Returns the result of converting the floating-point value `a' to
1222 * the two's complement integer format. The conversion is performed
1223 * according to the IEC/IEEE Standard for Binary Floating-Point
1224 * Arithmetic---which means in particular that the conversion is
1225 * rounded according to the current rounding mode. If `a' is a NaN,
1226 * the largest positive integer is returned. Otherwise, if the
1227 * conversion overflows, the largest integer with the same sign as `a'
1228 * is returned.
1229 */
1230static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1231                                     int scale, int64_t min, int64_t max,
1232                                     float_status *s)
1233{
1234    int flags = 0;
1235    uint64_t r;
1236
1237    switch (p->cls) {
1238    case float_class_snan:
1239        flags |= float_flag_invalid_snan;
1240        /* fall through */
1241    case float_class_qnan:
1242        flags |= float_flag_invalid;
1243        r = max;
1244        break;
1245
1246    case float_class_inf:
1247        flags = float_flag_invalid | float_flag_invalid_cvti;
1248        r = p->sign ? min : max;
1249        break;
1250
1251    case float_class_zero:
1252        return 0;
1253
1254    case float_class_normal:
1255    case float_class_denormal:
1256        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1257        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1258            flags = float_flag_inexact;
1259        }
1260
1261        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1262            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1263        } else {
1264            r = UINT64_MAX;
1265        }
1266        if (p->sign) {
1267            if (r <= -(uint64_t)min) {
1268                r = -r;
1269            } else {
1270                flags = float_flag_invalid | float_flag_invalid_cvti;
1271                r = min;
1272            }
1273        } else if (r > max) {
1274            flags = float_flag_invalid | float_flag_invalid_cvti;
1275            r = max;
1276        }
1277        break;
1278
1279    default:
1280        g_assert_not_reached();
1281    }
1282
1283    float_raise(flags, s);
1284    return r;
1285}
1286
1287/*
1288 *  Returns the result of converting the floating-point value `a' to
1289 *  the unsigned integer format. The conversion is performed according
1290 *  to the IEC/IEEE Standard for Binary Floating-Point
1291 *  Arithmetic---which means in particular that the conversion is
1292 *  rounded according to the current rounding mode. If `a' is a NaN,
1293 *  the largest unsigned integer is returned. Otherwise, if the
1294 *  conversion overflows, the largest unsigned integer is returned. If
1295 *  the 'a' is negative, the result is rounded and zero is returned;
1296 *  values that do not round to zero will raise the inexact exception
1297 *  flag.
1298 */
1299static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1300                                      int scale, uint64_t max, float_status *s)
1301{
1302    int flags = 0;
1303    uint64_t r;
1304
1305    switch (p->cls) {
1306    case float_class_snan:
1307        flags |= float_flag_invalid_snan;
1308        /* fall through */
1309    case float_class_qnan:
1310        flags |= float_flag_invalid;
1311        r = max;
1312        break;
1313
1314    case float_class_inf:
1315        flags = float_flag_invalid | float_flag_invalid_cvti;
1316        r = p->sign ? 0 : max;
1317        break;
1318
1319    case float_class_zero:
1320        return 0;
1321
1322    case float_class_normal:
1323    case float_class_denormal:
1324        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1325        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1326            flags = float_flag_inexact;
1327            if (p->cls == float_class_zero) {
1328                r = 0;
1329                break;
1330            }
1331        }
1332
1333        if (p->sign) {
1334            flags = float_flag_invalid | float_flag_invalid_cvti;
1335            r = 0;
1336        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1337            flags = float_flag_invalid | float_flag_invalid_cvti;
1338            r = max;
1339        } else {
1340            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1341            if (r > max) {
1342                flags = float_flag_invalid | float_flag_invalid_cvti;
1343                r = max;
1344            }
1345        }
1346        break;
1347
1348    default:
1349        g_assert_not_reached();
1350    }
1351
1352    float_raise(flags, s);
1353    return r;
1354}
1355
1356/*
1357 * Like partsN(float_to_sint), except do not saturate the result.
1358 * Instead, return the rounded unbounded precision two's compliment result,
1359 * modulo 2**(bitsm1 + 1).
1360 */
1361static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
1362                                            FloatRoundMode rmode,
1363                                            int bitsm1, float_status *s)
1364{
1365    int flags = 0;
1366    uint64_t r;
1367    bool overflow = false;
1368
1369    switch (p->cls) {
1370    case float_class_snan:
1371        flags |= float_flag_invalid_snan;
1372        /* fall through */
1373    case float_class_qnan:
1374        flags |= float_flag_invalid;
1375        r = 0;
1376        break;
1377
1378    case float_class_inf:
1379        overflow = true;
1380        r = 0;
1381        break;
1382
1383    case float_class_zero:
1384        return 0;
1385
1386    case float_class_normal:
1387    case float_class_denormal:
1388        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1389        if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
1390            flags = float_flag_inexact;
1391        }
1392
1393        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1394            /*
1395             * Because we rounded to integral, and exp < 64,
1396             * we know frac_low is zero.
1397             */
1398            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1399            if (p->exp < bitsm1) {
1400                /* Result in range. */
1401            } else if (p->exp == bitsm1) {
1402                /* The only in-range value is INT_MIN. */
1403                overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT;
1404            } else {
1405                overflow = true;
1406            }
1407        } else {
1408            /* Overflow, but there might still be bits to return. */
1409            int shl = p->exp - DECOMPOSED_BINARY_POINT;
1410            if (shl < N) {
1411                frac_shl(p, shl);
1412                r = p->frac_hi;
1413            } else {
1414                r = 0;
1415            }
1416            overflow = true;
1417        }
1418
1419        if (p->sign) {
1420            r = -r;
1421        }
1422        break;
1423
1424    default:
1425        g_assert_not_reached();
1426    }
1427
1428    if (overflow) {
1429        flags = float_flag_invalid | float_flag_invalid_cvti;
1430    }
1431    float_raise(flags, s);
1432    return r;
1433}
1434
1435/*
1436 * Integer to float conversions
1437 *
1438 * Returns the result of converting the two's complement integer `a'
1439 * to the floating-point format. The conversion is performed according
1440 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1441 */
1442static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1443                                  int scale, float_status *s)
1444{
1445    uint64_t f = a;
1446    int shift;
1447
1448    memset(p, 0, sizeof(*p));
1449
1450    if (a == 0) {
1451        p->cls = float_class_zero;
1452        return;
1453    }
1454
1455    p->cls = float_class_normal;
1456    if (a < 0) {
1457        f = -f;
1458        p->sign = true;
1459    }
1460    shift = clz64(f);
1461    scale = MIN(MAX(scale, -0x10000), 0x10000);
1462
1463    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1464    p->frac_hi = f << shift;
1465}
1466
1467/*
1468 * Unsigned Integer to float conversions
1469 *
1470 * Returns the result of converting the unsigned integer `a' to the
1471 * floating-point format. The conversion is performed according to the
1472 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1473 */
1474static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1475                                  int scale, float_status *status)
1476{
1477    memset(p, 0, sizeof(*p));
1478
1479    if (a == 0) {
1480        p->cls = float_class_zero;
1481    } else {
1482        int shift = clz64(a);
1483        scale = MIN(MAX(scale, -0x10000), 0x10000);
1484        p->cls = float_class_normal;
1485        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1486        p->frac_hi = a << shift;
1487    }
1488}
1489
1490/*
1491 * Float min/max.
1492 */
1493static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1494                                   float_status *s, int flags)
1495{
1496    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1497    int a_exp, b_exp, cmp;
1498
1499    if (unlikely(ab_mask & float_cmask_anynan)) {
1500        /*
1501         * For minNum/maxNum (IEEE 754-2008)
1502         * or minimumNumber/maximumNumber (IEEE 754-2019),
1503         * if one operand is a QNaN, and the other
1504         * operand is numerical, then return numerical argument.
1505         */
1506        if ((flags & (minmax_isnum | minmax_isnumber))
1507            && !(ab_mask & float_cmask_snan)
1508            && (ab_mask & ~float_cmask_qnan)) {
1509            if (ab_mask & float_cmask_denormal) {
1510                float_raise(float_flag_input_denormal_used, s);
1511            }
1512            return is_nan(a->cls) ? b : a;
1513        }
1514
1515        /*
1516         * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1517         * are removed and replaced with minimum, minimumNumber, maximum
1518         * and maximumNumber.
1519         * minimumNumber/maximumNumber behavior for SNaN is changed to:
1520         *   If both operands are NaNs, a QNaN is returned.
1521         *   If either operand is a SNaN,
1522         *   an invalid operation exception is signaled,
1523         *   but unless both operands are NaNs,
1524         *   the SNaN is otherwise ignored and not converted to a QNaN.
1525         */
1526        if ((flags & minmax_isnumber)
1527            && (ab_mask & float_cmask_snan)
1528            && (ab_mask & ~float_cmask_anynan)) {
1529            float_raise(float_flag_invalid, s);
1530            return is_nan(a->cls) ? b : a;
1531        }
1532
1533        return parts_pick_nan(a, b, s);
1534    }
1535
1536    if (ab_mask & float_cmask_denormal) {
1537        float_raise(float_flag_input_denormal_used, s);
1538    }
1539
1540    a_exp = a->exp;
1541    b_exp = b->exp;
1542
1543    if (unlikely(!cmask_is_only_normals(ab_mask))) {
1544        switch (a->cls) {
1545        case float_class_normal:
1546        case float_class_denormal:
1547            break;
1548        case float_class_inf:
1549            a_exp = INT16_MAX;
1550            break;
1551        case float_class_zero:
1552            a_exp = INT16_MIN;
1553            break;
1554        default:
1555            g_assert_not_reached();
1556        }
1557        switch (b->cls) {
1558        case float_class_normal:
1559        case float_class_denormal:
1560            break;
1561        case float_class_inf:
1562            b_exp = INT16_MAX;
1563            break;
1564        case float_class_zero:
1565            b_exp = INT16_MIN;
1566            break;
1567        default:
1568            g_assert_not_reached();
1569        }
1570    }
1571
1572    /* Compare magnitudes. */
1573    cmp = a_exp - b_exp;
1574    if (cmp == 0) {
1575        cmp = frac_cmp(a, b);
1576    }
1577
1578    /*
1579     * Take the sign into account.
1580     * For ismag, only do this if the magnitudes are equal.
1581     */
1582    if (!(flags & minmax_ismag) || cmp == 0) {
1583        if (a->sign != b->sign) {
1584            /* For differing signs, the negative operand is less. */
1585            cmp = a->sign ? -1 : 1;
1586        } else if (a->sign) {
1587            /* For two negative operands, invert the magnitude comparison. */
1588            cmp = -cmp;
1589        }
1590    }
1591
1592    if (flags & minmax_ismin) {
1593        cmp = -cmp;
1594    }
1595    return cmp < 0 ? b : a;
1596}
1597
1598/*
1599 * Floating point compare
1600 */
1601static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1602                                     float_status *s, bool is_quiet)
1603{
1604    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1605
1606    if (likely(cmask_is_only_normals(ab_mask))) {
1607        FloatRelation cmp;
1608
1609        if (ab_mask & float_cmask_denormal) {
1610            float_raise(float_flag_input_denormal_used, s);
1611        }
1612
1613        if (a->sign != b->sign) {
1614            goto a_sign;
1615        }
1616        if (a->exp == b->exp) {
1617            cmp = frac_cmp(a, b);
1618        } else if (a->exp < b->exp) {
1619            cmp = float_relation_less;
1620        } else {
1621            cmp = float_relation_greater;
1622        }
1623        if (a->sign) {
1624            cmp = -cmp;
1625        }
1626        return cmp;
1627    }
1628
1629    if (unlikely(ab_mask & float_cmask_anynan)) {
1630        if (ab_mask & float_cmask_snan) {
1631            float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1632        } else if (!is_quiet) {
1633            float_raise(float_flag_invalid, s);
1634        }
1635        return float_relation_unordered;
1636    }
1637
1638    if (ab_mask & float_cmask_denormal) {
1639        float_raise(float_flag_input_denormal_used, s);
1640    }
1641
1642    if (ab_mask & float_cmask_zero) {
1643        if (ab_mask == float_cmask_zero) {
1644            return float_relation_equal;
1645        } else if (a->cls == float_class_zero) {
1646            goto b_sign;
1647        } else {
1648            goto a_sign;
1649        }
1650    }
1651
1652    if (ab_mask == float_cmask_inf) {
1653        if (a->sign == b->sign) {
1654            return float_relation_equal;
1655        }
1656    } else if (b->cls == float_class_inf) {
1657        goto b_sign;
1658    } else {
1659        g_assert(a->cls == float_class_inf);
1660    }
1661
1662 a_sign:
1663    return a->sign ? float_relation_less : float_relation_greater;
1664 b_sign:
1665    return b->sign ? float_relation_greater : float_relation_less;
1666}
1667
1668/*
1669 * Multiply A by 2 raised to the power N.
1670 */
1671static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1672{
1673    switch (a->cls) {
1674    case float_class_snan:
1675    case float_class_qnan:
1676        parts_return_nan(a, s);
1677        break;
1678    case float_class_zero:
1679    case float_class_inf:
1680        break;
1681    case float_class_denormal:
1682        float_raise(float_flag_input_denormal_used, s);
1683        /* fall through */
1684    case float_class_normal:
1685        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1686        break;
1687    default:
1688        g_assert_not_reached();
1689    }
1690}
1691
1692/*
1693 * Return log2(A)
1694 */
1695static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1696{
1697    uint64_t a0, a1, r, t, ign;
1698    FloatPartsN f;
1699    int i, n, a_exp, f_exp;
1700
1701    if (unlikely(a->cls != float_class_normal)) {
1702        switch (a->cls) {
1703        case float_class_denormal:
1704            if (!a->sign) {
1705                /* -ve denormal will be InvalidOperation */
1706                float_raise(float_flag_input_denormal_used, s);
1707            }
1708            break;
1709        case float_class_snan:
1710        case float_class_qnan:
1711            parts_return_nan(a, s);
1712            return;
1713        case float_class_zero:
1714            float_raise(float_flag_divbyzero, s);
1715            /* log2(0) = -inf */
1716            a->cls = float_class_inf;
1717            a->sign = 1;
1718            return;
1719        case float_class_inf:
1720            if (unlikely(a->sign)) {
1721                goto d_nan;
1722            }
1723            return;
1724        default:
1725            g_assert_not_reached();
1726        }
1727    }
1728    if (unlikely(a->sign)) {
1729        goto d_nan;
1730    }
1731
1732    /* TODO: This algorithm looses bits too quickly for float128. */
1733    g_assert(N == 64);
1734
1735    a_exp = a->exp;
1736    f_exp = -1;
1737
1738    r = 0;
1739    t = DECOMPOSED_IMPLICIT_BIT;
1740    a0 = a->frac_hi;
1741    a1 = 0;
1742
1743    n = fmt->frac_size + 2;
1744    if (unlikely(a_exp == -1)) {
1745        /*
1746         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1747         * When the value is very close to 1.0, there are lots of 1's in
1748         * the msb parts of the fraction.  At the end, when we subtract
1749         * this value from -1.0, we can see a catastrophic loss of precision,
1750         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1751         * bits of y in the final result.  To minimize this, compute as many
1752         * digits as we can.
1753         * ??? This case needs another algorithm to avoid this.
1754         */
1755        n = fmt->frac_size * 2 + 2;
1756        /* Don't compute a value overlapping the sticky bit */
1757        n = MIN(n, 62);
1758    }
1759
1760    for (i = 0; i < n; i++) {
1761        if (a1) {
1762            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1763        } else if (a0 & 0xffffffffull) {
1764            mul64To128(a0, a0, &a0, &a1);
1765        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1766            a0 >>= 32;
1767            a0 *= a0;
1768        } else {
1769            goto exact;
1770        }
1771
1772        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1773            if (unlikely(a_exp == 0 && r == 0)) {
1774                /*
1775                 * When a_exp == 0, we're computing the log2 of a value
1776                 * [1.0,2.0).  When the value is very close to 1.0, there
1777                 * are lots of 0's in the msb parts of the fraction.
1778                 * We need to compute more digits to produce a correct
1779                 * result -- restart at the top of the fraction.
1780                 * ??? This is likely to lose precision quickly, as for
1781                 * float128; we may need another method.
1782                 */
1783                f_exp -= i;
1784                t = r = DECOMPOSED_IMPLICIT_BIT;
1785                i = 0;
1786            } else {
1787                r |= t;
1788            }
1789        } else {
1790            add128(a0, a1, a0, a1, &a0, &a1);
1791        }
1792        t >>= 1;
1793    }
1794
1795    /* Set sticky for inexact. */
1796    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1797
1798 exact:
1799    parts_sint_to_float(a, a_exp, 0, s);
1800    if (r == 0) {
1801        return;
1802    }
1803
1804    memset(&f, 0, sizeof(f));
1805    f.cls = float_class_normal;
1806    f.frac_hi = r;
1807    f.exp = f_exp - frac_normalize(&f);
1808
1809    if (a_exp < 0) {
1810        parts_sub_normal(a, &f);
1811    } else if (a_exp > 0) {
1812        parts_add_normal(a, &f);
1813    } else {
1814        *a = f;
1815    }
1816    return;
1817
1818 d_nan:
1819    float_raise(float_flag_invalid, s);
1820    parts_default_nan(a, s);
1821}
1822