xref: /openbmc/qemu/fpu/softfloat-parts.c.inc (revision d732b5a4ac3e8222e9527654f067bb766fdaecb6)
1/*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 *  the SoftFloat-2a license
10 *  the BSD license
11 *  GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19{
20    switch (a->cls) {
21    case float_class_snan:
22        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
23        if (s->default_nan_mode) {
24            parts_default_nan(a, s);
25        } else {
26            parts_silence_nan(a, s);
27        }
28        break;
29    case float_class_qnan:
30        if (s->default_nan_mode) {
31            parts_default_nan(a, s);
32        }
33        break;
34    default:
35        g_assert_not_reached();
36    }
37}
38
39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40                                     float_status *s)
41{
42    bool have_snan = false;
43    FloatPartsN *ret;
44    int cmp;
45
46    if (is_snan(a->cls) || is_snan(b->cls)) {
47        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
48        have_snan = true;
49    }
50
51    if (s->default_nan_mode) {
52        parts_default_nan(a, s);
53        return a;
54    }
55
56    switch (s->float_2nan_prop_rule) {
57    case float_2nan_prop_s_ab:
58        if (have_snan) {
59            ret = is_snan(a->cls) ? a : b;
60            break;
61        }
62        /* fall through */
63    case float_2nan_prop_ab:
64        ret = is_nan(a->cls) ? a : b;
65        break;
66    case float_2nan_prop_s_ba:
67        if (have_snan) {
68            ret = is_snan(b->cls) ? b : a;
69            break;
70        }
71        /* fall through */
72    case float_2nan_prop_ba:
73        ret = is_nan(b->cls) ? b : a;
74        break;
75    case float_2nan_prop_x87:
76        /*
77         * This implements x87 NaN propagation rules:
78         * SNaN + QNaN => return the QNaN
79         * two SNaNs => return the one with the larger significand, silenced
80         * two QNaNs => return the one with the larger significand
81         * SNaN and a non-NaN => return the SNaN, silenced
82         * QNaN and a non-NaN => return the QNaN
83         *
84         * If we get down to comparing significands and they are the same,
85         * return the NaN with the positive sign bit (if any).
86         */
87        if (is_snan(a->cls)) {
88            if (!is_snan(b->cls)) {
89                ret = is_qnan(b->cls) ? b : a;
90                break;
91            }
92        } else if (is_qnan(a->cls)) {
93            if (is_snan(b->cls) || !is_qnan(b->cls)) {
94                ret = a;
95                break;
96            }
97        } else {
98            ret = b;
99            break;
100        }
101        cmp = frac_cmp(a, b);
102        if (cmp == 0) {
103            cmp = a->sign < b->sign;
104        }
105        ret = cmp > 0 ? a : b;
106        break;
107    default:
108        g_assert_not_reached();
109    }
110
111    if (is_snan(ret->cls)) {
112        parts_silence_nan(ret, s);
113    }
114    return ret;
115}
116
117static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
118                                            FloatPartsN *c, float_status *s,
119                                            int ab_mask, int abc_mask)
120{
121    bool infzero = (ab_mask == float_cmask_infzero);
122    bool have_snan = (abc_mask & float_cmask_snan);
123    FloatPartsN *ret;
124
125    if (unlikely(have_snan)) {
126        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
127    }
128
129    if (infzero &&
130        !(s->float_infzeronan_rule & float_infzeronan_suppress_invalid)) {
131        /* This is (0 * inf) + NaN or (inf * 0) + NaN */
132        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
133    }
134
135    if (s->default_nan_mode) {
136        /*
137         * We guarantee not to require the target to tell us how to
138         * pick a NaN if we're always returning the default NaN.
139         * But if we're not in default-NaN mode then the target must
140         * specify.
141         */
142        goto default_nan;
143    } else if (infzero) {
144        /*
145         * Inf * 0 + NaN -- some implementations return the
146         * default NaN here, and some return the input NaN.
147         */
148        switch (s->float_infzeronan_rule & ~float_infzeronan_suppress_invalid) {
149        case float_infzeronan_dnan_never:
150            break;
151        case float_infzeronan_dnan_always:
152            goto default_nan;
153        case float_infzeronan_dnan_if_qnan:
154            if (is_qnan(c->cls)) {
155                goto default_nan;
156            }
157            break;
158        default:
159            g_assert_not_reached();
160        }
161        ret = c;
162    } else {
163        FloatPartsN *val[R_3NAN_1ST_MASK + 1] = { a, b, c };
164        Float3NaNPropRule rule = s->float_3nan_prop_rule;
165
166        assert(rule != float_3nan_prop_none);
167        if (have_snan && (rule & R_3NAN_SNAN_MASK)) {
168            /* We have at least one SNaN input and should prefer it */
169            do {
170                ret = val[rule & R_3NAN_1ST_MASK];
171                rule >>= R_3NAN_1ST_LENGTH;
172            } while (!is_snan(ret->cls));
173        } else {
174            do {
175                ret = val[rule & R_3NAN_1ST_MASK];
176                rule >>= R_3NAN_1ST_LENGTH;
177            } while (!is_nan(ret->cls));
178        }
179    }
180
181    if (is_snan(ret->cls)) {
182        parts_silence_nan(ret, s);
183    }
184    return ret;
185
186 default_nan:
187    parts_default_nan(a, s);
188    return a;
189}
190
191/*
192 * Canonicalize the FloatParts structure.  Determine the class,
193 * unbias the exponent, and normalize the fraction.
194 */
195static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
196                                 const FloatFmt *fmt)
197{
198    if (unlikely(p->exp == 0)) {
199        if (likely(frac_eqz(p))) {
200            p->cls = float_class_zero;
201        } else if (status->flush_inputs_to_zero) {
202            float_raise(float_flag_input_denormal_flushed, status);
203            p->cls = float_class_zero;
204            frac_clear(p);
205        } else {
206            int shift = frac_normalize(p);
207            p->cls = float_class_normal;
208            p->exp = fmt->frac_shift - fmt->exp_bias
209                   - shift + !fmt->m68k_denormal;
210        }
211    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
212        p->cls = float_class_normal;
213        p->exp -= fmt->exp_bias;
214        frac_shl(p, fmt->frac_shift);
215        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
216    } else if (likely(frac_eqz(p))) {
217        p->cls = float_class_inf;
218    } else {
219        frac_shl(p, fmt->frac_shift);
220        p->cls = (parts_is_snan_frac(p->frac_hi, status)
221                  ? float_class_snan : float_class_qnan);
222    }
223}
224
225/*
226 * Round and uncanonicalize a floating-point number by parts. There
227 * are FRAC_SHIFT bits that may require rounding at the bottom of the
228 * fraction; these bits will be removed. The exponent will be biased
229 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
230 */
231static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
232                                   const FloatFmt *fmt)
233{
234    const int exp_max = fmt->exp_max;
235    const int frac_shift = fmt->frac_shift;
236    const uint64_t round_mask = fmt->round_mask;
237    const uint64_t frac_lsb = round_mask + 1;
238    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
239    const uint64_t roundeven_mask = round_mask | frac_lsb;
240    uint64_t inc;
241    bool overflow_norm = false;
242    int exp, flags = 0;
243
244    switch (s->float_rounding_mode) {
245    case float_round_nearest_even_max:
246        overflow_norm = true;
247        /* fall through */
248    case float_round_nearest_even:
249        if (N > 64 && frac_lsb == 0) {
250            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
251                   ? frac_lsbm1 : 0);
252        } else {
253            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
254                   ? frac_lsbm1 : 0);
255        }
256        break;
257    case float_round_ties_away:
258        inc = frac_lsbm1;
259        break;
260    case float_round_to_zero:
261        overflow_norm = true;
262        inc = 0;
263        break;
264    case float_round_up:
265        inc = p->sign ? 0 : round_mask;
266        overflow_norm = p->sign;
267        break;
268    case float_round_down:
269        inc = p->sign ? round_mask : 0;
270        overflow_norm = !p->sign;
271        break;
272    case float_round_to_odd:
273        overflow_norm = true;
274        /* fall through */
275    case float_round_to_odd_inf:
276        if (N > 64 && frac_lsb == 0) {
277            inc = p->frac_hi & 1 ? 0 : round_mask;
278        } else {
279            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
280        }
281        break;
282    default:
283        g_assert_not_reached();
284    }
285
286    exp = p->exp + fmt->exp_bias;
287    if (likely(exp > 0)) {
288        if (p->frac_lo & round_mask) {
289            flags |= float_flag_inexact;
290            if (frac_addi(p, p, inc)) {
291                frac_shr(p, 1);
292                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
293                exp++;
294            }
295            p->frac_lo &= ~round_mask;
296        }
297
298        if (fmt->arm_althp) {
299            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
300            if (unlikely(exp > exp_max)) {
301                /* Overflow.  Return the maximum normal.  */
302                flags = float_flag_invalid;
303                exp = exp_max;
304                frac_allones(p);
305                p->frac_lo &= ~round_mask;
306            }
307        } else if (unlikely(exp >= exp_max)) {
308            flags |= float_flag_overflow;
309            if (s->rebias_overflow) {
310                exp -= fmt->exp_re_bias;
311            } else if (overflow_norm) {
312                flags |= float_flag_inexact;
313                exp = exp_max - 1;
314                frac_allones(p);
315                p->frac_lo &= ~round_mask;
316            } else {
317                flags |= float_flag_inexact;
318                p->cls = float_class_inf;
319                exp = exp_max;
320                frac_clear(p);
321            }
322        }
323        frac_shr(p, frac_shift);
324    } else if (unlikely(s->rebias_underflow)) {
325        flags |= float_flag_underflow;
326        exp += fmt->exp_re_bias;
327        if (p->frac_lo & round_mask) {
328            flags |= float_flag_inexact;
329            if (frac_addi(p, p, inc)) {
330                frac_shr(p, 1);
331                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
332                exp++;
333            }
334            p->frac_lo &= ~round_mask;
335        }
336        frac_shr(p, frac_shift);
337    } else if (s->flush_to_zero) {
338        flags |= float_flag_output_denormal_flushed;
339        p->cls = float_class_zero;
340        exp = 0;
341        frac_clear(p);
342    } else {
343        bool is_tiny = s->tininess_before_rounding || exp < 0;
344
345        if (!is_tiny) {
346            FloatPartsN discard;
347            is_tiny = !frac_addi(&discard, p, inc);
348        }
349
350        frac_shrjam(p, !fmt->m68k_denormal - exp);
351
352        if (p->frac_lo & round_mask) {
353            /* Need to recompute round-to-even/round-to-odd. */
354            switch (s->float_rounding_mode) {
355            case float_round_nearest_even:
356                if (N > 64 && frac_lsb == 0) {
357                    inc = ((p->frac_hi & 1) ||
358                           (p->frac_lo & round_mask) != frac_lsbm1
359                           ? frac_lsbm1 : 0);
360                } else {
361                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
362                           ? frac_lsbm1 : 0);
363                }
364                break;
365            case float_round_to_odd:
366            case float_round_to_odd_inf:
367                if (N > 64 && frac_lsb == 0) {
368                    inc = p->frac_hi & 1 ? 0 : round_mask;
369                } else {
370                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
371                }
372                break;
373            default:
374                break;
375            }
376            flags |= float_flag_inexact;
377            frac_addi(p, p, inc);
378            p->frac_lo &= ~round_mask;
379        }
380
381        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
382        frac_shr(p, frac_shift);
383
384        if (is_tiny && (flags & float_flag_inexact)) {
385            flags |= float_flag_underflow;
386        }
387        if (exp == 0 && frac_eqz(p)) {
388            p->cls = float_class_zero;
389        }
390    }
391    p->exp = exp;
392    float_raise(flags, s);
393}
394
395static void partsN(uncanon)(FloatPartsN *p, float_status *s,
396                            const FloatFmt *fmt)
397{
398    if (likely(p->cls == float_class_normal)) {
399        parts_uncanon_normal(p, s, fmt);
400    } else {
401        switch (p->cls) {
402        case float_class_zero:
403            p->exp = 0;
404            frac_clear(p);
405            return;
406        case float_class_inf:
407            g_assert(!fmt->arm_althp);
408            p->exp = fmt->exp_max;
409            frac_clear(p);
410            return;
411        case float_class_qnan:
412        case float_class_snan:
413            g_assert(!fmt->arm_althp);
414            p->exp = fmt->exp_max;
415            frac_shr(p, fmt->frac_shift);
416            return;
417        default:
418            break;
419        }
420        g_assert_not_reached();
421    }
422}
423
424/*
425 * Returns the result of adding or subtracting the values of the
426 * floating-point values `a' and `b'. The operation is performed
427 * according to the IEC/IEEE Standard for Binary Floating-Point
428 * Arithmetic.
429 */
430static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
431                                   float_status *s, bool subtract)
432{
433    bool b_sign = b->sign ^ subtract;
434    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
435
436    if (a->sign != b_sign) {
437        /* Subtraction */
438        if (likely(ab_mask == float_cmask_normal)) {
439            if (parts_sub_normal(a, b)) {
440                return a;
441            }
442            /* Subtract was exact, fall through to set sign. */
443            ab_mask = float_cmask_zero;
444        }
445
446        if (ab_mask == float_cmask_zero) {
447            a->sign = s->float_rounding_mode == float_round_down;
448            return a;
449        }
450
451        if (unlikely(ab_mask & float_cmask_anynan)) {
452            goto p_nan;
453        }
454
455        if (ab_mask & float_cmask_inf) {
456            if (a->cls != float_class_inf) {
457                /* N - Inf */
458                goto return_b;
459            }
460            if (b->cls != float_class_inf) {
461                /* Inf - N */
462                return a;
463            }
464            /* Inf - Inf */
465            float_raise(float_flag_invalid | float_flag_invalid_isi, s);
466            parts_default_nan(a, s);
467            return a;
468        }
469    } else {
470        /* Addition */
471        if (likely(ab_mask == float_cmask_normal)) {
472            parts_add_normal(a, b);
473            return a;
474        }
475
476        if (ab_mask == float_cmask_zero) {
477            return a;
478        }
479
480        if (unlikely(ab_mask & float_cmask_anynan)) {
481            goto p_nan;
482        }
483
484        if (ab_mask & float_cmask_inf) {
485            a->cls = float_class_inf;
486            return a;
487        }
488    }
489
490    if (b->cls == float_class_zero) {
491        g_assert(a->cls == float_class_normal);
492        return a;
493    }
494
495    g_assert(a->cls == float_class_zero);
496    g_assert(b->cls == float_class_normal);
497 return_b:
498    b->sign = b_sign;
499    return b;
500
501 p_nan:
502    return parts_pick_nan(a, b, s);
503}
504
505/*
506 * Returns the result of multiplying the floating-point values `a' and
507 * `b'. The operation is performed according to the IEC/IEEE Standard
508 * for Binary Floating-Point Arithmetic.
509 */
510static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
511                                float_status *s)
512{
513    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
514    bool sign = a->sign ^ b->sign;
515
516    if (likely(ab_mask == float_cmask_normal)) {
517        FloatPartsW tmp;
518
519        frac_mulw(&tmp, a, b);
520        frac_truncjam(a, &tmp);
521
522        a->exp += b->exp + 1;
523        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
524            frac_add(a, a, a);
525            a->exp -= 1;
526        }
527
528        a->sign = sign;
529        return a;
530    }
531
532    /* Inf * Zero == NaN */
533    if (unlikely(ab_mask == float_cmask_infzero)) {
534        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
535        parts_default_nan(a, s);
536        return a;
537    }
538
539    if (unlikely(ab_mask & float_cmask_anynan)) {
540        return parts_pick_nan(a, b, s);
541    }
542
543    /* Multiply by 0 or Inf */
544    if (ab_mask & float_cmask_inf) {
545        a->cls = float_class_inf;
546        a->sign = sign;
547        return a;
548    }
549
550    g_assert(ab_mask & float_cmask_zero);
551    a->cls = float_class_zero;
552    a->sign = sign;
553    return a;
554}
555
556/*
557 * Returns the result of multiplying the floating-point values `a' and
558 * `b' then adding 'c', with no intermediate rounding step after the
559 * multiplication. The operation is performed according to the
560 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
561 * The flags argument allows the caller to select negation of the
562 * addend, the intermediate product, or the final result. (The
563 * difference between this and having the caller do a separate
564 * negation is that negating externally will flip the sign bit on NaNs.)
565 *
566 * Requires A and C extracted into a double-sized structure to provide the
567 * extra space for the widening multiply.
568 */
569static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
570                                          FloatPartsN *c, int scale,
571                                          int flags, float_status *s)
572{
573    int ab_mask, abc_mask;
574    FloatPartsW p_widen, c_widen;
575
576    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
577    abc_mask = float_cmask(c->cls) | ab_mask;
578
579    /*
580     * It is implementation-defined whether the cases of (0,inf,qnan)
581     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
582     * they return if they do), so we have to hand this information
583     * off to the target-specific pick-a-NaN routine.
584     */
585    if (unlikely(abc_mask & float_cmask_anynan)) {
586        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
587    }
588
589    if (flags & float_muladd_negate_c) {
590        c->sign ^= 1;
591    }
592
593    /* Compute the sign of the product into A. */
594    a->sign ^= b->sign;
595    if (flags & float_muladd_negate_product) {
596        a->sign ^= 1;
597    }
598
599    if (unlikely(ab_mask != float_cmask_normal)) {
600        if (unlikely(ab_mask == float_cmask_infzero)) {
601            float_raise(float_flag_invalid | float_flag_invalid_imz, s);
602            goto d_nan;
603        }
604
605        if (ab_mask & float_cmask_inf) {
606            if (c->cls == float_class_inf && a->sign != c->sign) {
607                float_raise(float_flag_invalid | float_flag_invalid_isi, s);
608                goto d_nan;
609            }
610            goto return_inf;
611        }
612
613        g_assert(ab_mask & float_cmask_zero);
614        if (c->cls == float_class_normal) {
615            *a = *c;
616            goto return_normal;
617        }
618        if (c->cls == float_class_zero) {
619            if (flags & float_muladd_suppress_add_product_zero) {
620                a->sign = c->sign;
621            } else if (a->sign != c->sign) {
622                goto return_sub_zero;
623            }
624            goto return_zero;
625        }
626        g_assert(c->cls == float_class_inf);
627    }
628
629    if (unlikely(c->cls == float_class_inf)) {
630        a->sign = c->sign;
631        goto return_inf;
632    }
633
634    /* Perform the multiplication step. */
635    p_widen.sign = a->sign;
636    p_widen.exp = a->exp + b->exp + 1;
637    frac_mulw(&p_widen, a, b);
638    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
639        frac_add(&p_widen, &p_widen, &p_widen);
640        p_widen.exp -= 1;
641    }
642
643    /* Perform the addition step. */
644    if (c->cls != float_class_zero) {
645        /* Zero-extend C to less significant bits. */
646        frac_widen(&c_widen, c);
647        c_widen.exp = c->exp;
648
649        if (a->sign == c->sign) {
650            parts_add_normal(&p_widen, &c_widen);
651        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
652            goto return_sub_zero;
653        }
654    }
655
656    /* Narrow with sticky bit, for proper rounding later. */
657    frac_truncjam(a, &p_widen);
658    a->sign = p_widen.sign;
659    a->exp = p_widen.exp;
660
661 return_normal:
662    a->exp += scale;
663 finish_sign:
664    if (flags & float_muladd_negate_result) {
665        a->sign ^= 1;
666    }
667    return a;
668
669 return_sub_zero:
670    a->sign = s->float_rounding_mode == float_round_down;
671 return_zero:
672    a->cls = float_class_zero;
673    goto finish_sign;
674
675 return_inf:
676    a->cls = float_class_inf;
677    goto finish_sign;
678
679 d_nan:
680    parts_default_nan(a, s);
681    return a;
682}
683
684/*
685 * Returns the result of dividing the floating-point value `a' by the
686 * corresponding value `b'. The operation is performed according to
687 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
688 */
689static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
690                                float_status *s)
691{
692    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
693    bool sign = a->sign ^ b->sign;
694
695    if (likely(ab_mask == float_cmask_normal)) {
696        a->sign = sign;
697        a->exp -= b->exp + frac_div(a, b);
698        return a;
699    }
700
701    /* 0/0 or Inf/Inf => NaN */
702    if (unlikely(ab_mask == float_cmask_zero)) {
703        float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
704        goto d_nan;
705    }
706    if (unlikely(ab_mask == float_cmask_inf)) {
707        float_raise(float_flag_invalid | float_flag_invalid_idi, s);
708        goto d_nan;
709    }
710
711    /* All the NaN cases */
712    if (unlikely(ab_mask & float_cmask_anynan)) {
713        return parts_pick_nan(a, b, s);
714    }
715
716    a->sign = sign;
717
718    /* Inf / X */
719    if (a->cls == float_class_inf) {
720        return a;
721    }
722
723    /* 0 / X */
724    if (a->cls == float_class_zero) {
725        return a;
726    }
727
728    /* X / Inf */
729    if (b->cls == float_class_inf) {
730        a->cls = float_class_zero;
731        return a;
732    }
733
734    /* X / 0 => Inf */
735    g_assert(b->cls == float_class_zero);
736    float_raise(float_flag_divbyzero, s);
737    a->cls = float_class_inf;
738    return a;
739
740 d_nan:
741    parts_default_nan(a, s);
742    return a;
743}
744
745/*
746 * Floating point remainder, per IEC/IEEE, or modulus.
747 */
748static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
749                                   uint64_t *mod_quot, float_status *s)
750{
751    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
752
753    if (likely(ab_mask == float_cmask_normal)) {
754        frac_modrem(a, b, mod_quot);
755        return a;
756    }
757
758    if (mod_quot) {
759        *mod_quot = 0;
760    }
761
762    /* All the NaN cases */
763    if (unlikely(ab_mask & float_cmask_anynan)) {
764        return parts_pick_nan(a, b, s);
765    }
766
767    /* Inf % N; N % 0 */
768    if (a->cls == float_class_inf || b->cls == float_class_zero) {
769        float_raise(float_flag_invalid, s);
770        parts_default_nan(a, s);
771        return a;
772    }
773
774    /* N % Inf; 0 % N */
775    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
776    return a;
777}
778
779/*
780 * Square Root
781 *
782 * The base algorithm is lifted from
783 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
784 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
785 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
786 * and is thus MIT licenced.
787 */
788static void partsN(sqrt)(FloatPartsN *a, float_status *status,
789                         const FloatFmt *fmt)
790{
791    const uint32_t three32 = 3u << 30;
792    const uint64_t three64 = 3ull << 62;
793    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
794    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
795    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
796    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
797    uint64_t discard;
798    bool exp_odd;
799    size_t index;
800
801    if (unlikely(a->cls != float_class_normal)) {
802        switch (a->cls) {
803        case float_class_snan:
804        case float_class_qnan:
805            parts_return_nan(a, status);
806            return;
807        case float_class_zero:
808            return;
809        case float_class_inf:
810            if (unlikely(a->sign)) {
811                goto d_nan;
812            }
813            return;
814        default:
815            g_assert_not_reached();
816        }
817    }
818
819    if (unlikely(a->sign)) {
820        goto d_nan;
821    }
822
823    /*
824     * Argument reduction.
825     * x = 4^e frac; with integer e, and frac in [1, 4)
826     * m = frac fixed point at bit 62, since we're in base 4.
827     * If base-2 exponent is odd, exchange that for multiply by 2,
828     * which results in no shift.
829     */
830    exp_odd = a->exp & 1;
831    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
832    if (!exp_odd) {
833        frac_shr(a, 1);
834    }
835
836    /*
837     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
838     *
839     * Initial estimate:
840     * 7-bit lookup table (1-bit exponent and 6-bit significand).
841     *
842     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
843     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
844     * a table lookup is faster and needs one less iteration.
845     * The 7-bit table gives |e| < 0x1.fdp-9.
846     *
847     * A Newton-Raphson iteration for r is
848     *   s = m*r
849     *   d = s*r
850     *   u = 3 - d
851     *   r = r*u/2
852     *
853     * Fixed point representations:
854     *   m, s, d, u, three are all 2.30; r is 0.32
855     */
856    m64 = a->frac_hi;
857    m32 = m64 >> 32;
858
859    r32 = rsqrt_tab[index] << 16;
860    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
861
862    s32 = ((uint64_t)m32 * r32) >> 32;
863    d32 = ((uint64_t)s32 * r32) >> 32;
864    u32 = three32 - d32;
865
866    if (N == 64) {
867        /* float64 or smaller */
868
869        r32 = ((uint64_t)r32 * u32) >> 31;
870        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
871
872        s32 = ((uint64_t)m32 * r32) >> 32;
873        d32 = ((uint64_t)s32 * r32) >> 32;
874        u32 = three32 - d32;
875
876        if (fmt->frac_size <= 23) {
877            /* float32 or smaller */
878
879            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
880            s32 = (s32 - 1) >> 6;               /* 9.23 */
881            /* s < sqrt(m) < s + 0x1.08p-23 */
882
883            /* compute nearest rounded result to 2.23 bits */
884            uint32_t d0 = (m32 << 16) - s32 * s32;
885            uint32_t d1 = s32 - d0;
886            uint32_t d2 = d1 + s32 + 1;
887            s32 += d1 >> 31;
888            a->frac_hi = (uint64_t)s32 << (64 - 25);
889
890            /* increment or decrement for inexact */
891            if (d2 != 0) {
892                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
893            }
894            goto done;
895        }
896
897        /* float64 */
898
899        r64 = (uint64_t)r32 * u32 * 2;
900        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
901        mul64To128(m64, r64, &s64, &discard);
902        mul64To128(s64, r64, &d64, &discard);
903        u64 = three64 - d64;
904
905        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
906        s64 = (s64 - 2) >> 9;                  /* 12.52 */
907
908        /* Compute nearest rounded result */
909        uint64_t d0 = (m64 << 42) - s64 * s64;
910        uint64_t d1 = s64 - d0;
911        uint64_t d2 = d1 + s64 + 1;
912        s64 += d1 >> 63;
913        a->frac_hi = s64 << (64 - 54);
914
915        /* increment or decrement for inexact */
916        if (d2 != 0) {
917            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
918        }
919        goto done;
920    }
921
922    r64 = (uint64_t)r32 * u32 * 2;
923    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
924
925    mul64To128(m64, r64, &s64, &discard);
926    mul64To128(s64, r64, &d64, &discard);
927    u64 = three64 - d64;
928    mul64To128(u64, r64, &r64, &discard);
929    r64 <<= 1;
930    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
931
932    mul64To128(m64, r64, &s64, &discard);
933    mul64To128(s64, r64, &d64, &discard);
934    u64 = three64 - d64;
935    mul64To128(u64, r64, &rh, &rl);
936    add128(rh, rl, rh, rl, &rh, &rl);
937    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
938
939    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
940    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
941    sub128(three64, 0, dh, dl, &uh, &ul);
942    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
943    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
944
945    sub128(sh, sl, 0, 4, &sh, &sl);
946    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
947    /* s < sqrt(m) < s + 1ulp */
948
949    /* Compute nearest rounded result */
950    mul64To128(sl, sl, &d0h, &d0l);
951    d0h += 2 * sh * sl;
952    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
953    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
954    add128(sh, sl, 0, 1, &d2h, &d2l);
955    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
956    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
957    shift128Left(sh, sl, 128 - 114, &sh, &sl);
958
959    /* increment or decrement for inexact */
960    if (d2h | d2l) {
961        if ((int64_t)(d1h ^ d2h) < 0) {
962            sub128(sh, sl, 0, 1, &sh, &sl);
963        } else {
964            add128(sh, sl, 0, 1, &sh, &sl);
965        }
966    }
967    a->frac_lo = sl;
968    a->frac_hi = sh;
969
970 done:
971    /* Convert back from base 4 to base 2. */
972    a->exp >>= 1;
973    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
974        frac_add(a, a, a);
975    } else {
976        a->exp += 1;
977    }
978    return;
979
980 d_nan:
981    float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
982    parts_default_nan(a, status);
983}
984
985/*
986 * Rounds the floating-point value `a' to an integer, and returns the
987 * result as a floating-point value. The operation is performed
988 * according to the IEC/IEEE Standard for Binary Floating-Point
989 * Arithmetic.
990 *
991 * parts_round_to_int_normal is an internal helper function for
992 * normal numbers only, returning true for inexact but not directly
993 * raising float_flag_inexact.
994 */
995static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
996                                        int scale, int frac_size)
997{
998    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
999    int shift_adj;
1000
1001    scale = MIN(MAX(scale, -0x10000), 0x10000);
1002    a->exp += scale;
1003
1004    if (a->exp < 0) {
1005        bool one;
1006
1007        /* All fractional */
1008        switch (rmode) {
1009        case float_round_nearest_even:
1010            one = false;
1011            if (a->exp == -1) {
1012                FloatPartsN tmp;
1013                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
1014                frac_add(&tmp, a, a);
1015                /* Anything remaining means frac > 0.5. */
1016                one = !frac_eqz(&tmp);
1017            }
1018            break;
1019        case float_round_ties_away:
1020            one = a->exp == -1;
1021            break;
1022        case float_round_to_zero:
1023            one = false;
1024            break;
1025        case float_round_up:
1026            one = !a->sign;
1027            break;
1028        case float_round_down:
1029            one = a->sign;
1030            break;
1031        case float_round_to_odd:
1032            one = true;
1033            break;
1034        default:
1035            g_assert_not_reached();
1036        }
1037
1038        frac_clear(a);
1039        a->exp = 0;
1040        if (one) {
1041            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
1042        } else {
1043            a->cls = float_class_zero;
1044        }
1045        return true;
1046    }
1047
1048    if (a->exp >= frac_size) {
1049        /* All integral */
1050        return false;
1051    }
1052
1053    if (N > 64 && a->exp < N - 64) {
1054        /*
1055         * Rounding is not in the low word -- shift lsb to bit 2,
1056         * which leaves room for sticky and rounding bit.
1057         */
1058        shift_adj = (N - 1) - (a->exp + 2);
1059        frac_shrjam(a, shift_adj);
1060        frac_lsb = 1 << 2;
1061    } else {
1062        shift_adj = 0;
1063        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
1064    }
1065
1066    frac_lsbm1 = frac_lsb >> 1;
1067    rnd_mask = frac_lsb - 1;
1068    rnd_even_mask = rnd_mask | frac_lsb;
1069
1070    if (!(a->frac_lo & rnd_mask)) {
1071        /* Fractional bits already clear, undo the shift above. */
1072        frac_shl(a, shift_adj);
1073        return false;
1074    }
1075
1076    switch (rmode) {
1077    case float_round_nearest_even:
1078        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1079        break;
1080    case float_round_ties_away:
1081        inc = frac_lsbm1;
1082        break;
1083    case float_round_to_zero:
1084        inc = 0;
1085        break;
1086    case float_round_up:
1087        inc = a->sign ? 0 : rnd_mask;
1088        break;
1089    case float_round_down:
1090        inc = a->sign ? rnd_mask : 0;
1091        break;
1092    case float_round_to_odd:
1093        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
1094        break;
1095    default:
1096        g_assert_not_reached();
1097    }
1098
1099    if (shift_adj == 0) {
1100        if (frac_addi(a, a, inc)) {
1101            frac_shr(a, 1);
1102            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
1103            a->exp++;
1104        }
1105        a->frac_lo &= ~rnd_mask;
1106    } else {
1107        frac_addi(a, a, inc);
1108        a->frac_lo &= ~rnd_mask;
1109        /* Be careful shifting back, not to overflow */
1110        frac_shl(a, shift_adj - 1);
1111        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1112            a->exp++;
1113        } else {
1114            frac_add(a, a, a);
1115        }
1116    }
1117    return true;
1118}
1119
1120static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1121                                 int scale, float_status *s,
1122                                 const FloatFmt *fmt)
1123{
1124    switch (a->cls) {
1125    case float_class_qnan:
1126    case float_class_snan:
1127        parts_return_nan(a, s);
1128        break;
1129    case float_class_zero:
1130    case float_class_inf:
1131        break;
1132    case float_class_normal:
1133        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1134            float_raise(float_flag_inexact, s);
1135        }
1136        break;
1137    default:
1138        g_assert_not_reached();
1139    }
1140}
1141
1142/*
1143 * Returns the result of converting the floating-point value `a' to
1144 * the two's complement integer format. The conversion is performed
1145 * according to the IEC/IEEE Standard for Binary Floating-Point
1146 * Arithmetic---which means in particular that the conversion is
1147 * rounded according to the current rounding mode. If `a' is a NaN,
1148 * the largest positive integer is returned. Otherwise, if the
1149 * conversion overflows, the largest integer with the same sign as `a'
1150 * is returned.
1151 */
1152static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1153                                     int scale, int64_t min, int64_t max,
1154                                     float_status *s)
1155{
1156    int flags = 0;
1157    uint64_t r;
1158
1159    switch (p->cls) {
1160    case float_class_snan:
1161        flags |= float_flag_invalid_snan;
1162        /* fall through */
1163    case float_class_qnan:
1164        flags |= float_flag_invalid;
1165        r = max;
1166        break;
1167
1168    case float_class_inf:
1169        flags = float_flag_invalid | float_flag_invalid_cvti;
1170        r = p->sign ? min : max;
1171        break;
1172
1173    case float_class_zero:
1174        return 0;
1175
1176    case float_class_normal:
1177        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1178        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1179            flags = float_flag_inexact;
1180        }
1181
1182        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1183            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1184        } else {
1185            r = UINT64_MAX;
1186        }
1187        if (p->sign) {
1188            if (r <= -(uint64_t)min) {
1189                r = -r;
1190            } else {
1191                flags = float_flag_invalid | float_flag_invalid_cvti;
1192                r = min;
1193            }
1194        } else if (r > max) {
1195            flags = float_flag_invalid | float_flag_invalid_cvti;
1196            r = max;
1197        }
1198        break;
1199
1200    default:
1201        g_assert_not_reached();
1202    }
1203
1204    float_raise(flags, s);
1205    return r;
1206}
1207
1208/*
1209 *  Returns the result of converting the floating-point value `a' to
1210 *  the unsigned integer format. The conversion is performed according
1211 *  to the IEC/IEEE Standard for Binary Floating-Point
1212 *  Arithmetic---which means in particular that the conversion is
1213 *  rounded according to the current rounding mode. If `a' is a NaN,
1214 *  the largest unsigned integer is returned. Otherwise, if the
1215 *  conversion overflows, the largest unsigned integer is returned. If
1216 *  the 'a' is negative, the result is rounded and zero is returned;
1217 *  values that do not round to zero will raise the inexact exception
1218 *  flag.
1219 */
1220static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1221                                      int scale, uint64_t max, float_status *s)
1222{
1223    int flags = 0;
1224    uint64_t r;
1225
1226    switch (p->cls) {
1227    case float_class_snan:
1228        flags |= float_flag_invalid_snan;
1229        /* fall through */
1230    case float_class_qnan:
1231        flags |= float_flag_invalid;
1232        r = max;
1233        break;
1234
1235    case float_class_inf:
1236        flags = float_flag_invalid | float_flag_invalid_cvti;
1237        r = p->sign ? 0 : max;
1238        break;
1239
1240    case float_class_zero:
1241        return 0;
1242
1243    case float_class_normal:
1244        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1245        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1246            flags = float_flag_inexact;
1247            if (p->cls == float_class_zero) {
1248                r = 0;
1249                break;
1250            }
1251        }
1252
1253        if (p->sign) {
1254            flags = float_flag_invalid | float_flag_invalid_cvti;
1255            r = 0;
1256        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1257            flags = float_flag_invalid | float_flag_invalid_cvti;
1258            r = max;
1259        } else {
1260            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1261            if (r > max) {
1262                flags = float_flag_invalid | float_flag_invalid_cvti;
1263                r = max;
1264            }
1265        }
1266        break;
1267
1268    default:
1269        g_assert_not_reached();
1270    }
1271
1272    float_raise(flags, s);
1273    return r;
1274}
1275
1276/*
1277 * Like partsN(float_to_sint), except do not saturate the result.
1278 * Instead, return the rounded unbounded precision two's compliment result,
1279 * modulo 2**(bitsm1 + 1).
1280 */
1281static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
1282                                            FloatRoundMode rmode,
1283                                            int bitsm1, float_status *s)
1284{
1285    int flags = 0;
1286    uint64_t r;
1287    bool overflow = false;
1288
1289    switch (p->cls) {
1290    case float_class_snan:
1291        flags |= float_flag_invalid_snan;
1292        /* fall through */
1293    case float_class_qnan:
1294        flags |= float_flag_invalid;
1295        r = 0;
1296        break;
1297
1298    case float_class_inf:
1299        overflow = true;
1300        r = 0;
1301        break;
1302
1303    case float_class_zero:
1304        return 0;
1305
1306    case float_class_normal:
1307        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1308        if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
1309            flags = float_flag_inexact;
1310        }
1311
1312        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1313            /*
1314             * Because we rounded to integral, and exp < 64,
1315             * we know frac_low is zero.
1316             */
1317            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1318            if (p->exp < bitsm1) {
1319                /* Result in range. */
1320            } else if (p->exp == bitsm1) {
1321                /* The only in-range value is INT_MIN. */
1322                overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT;
1323            } else {
1324                overflow = true;
1325            }
1326        } else {
1327            /* Overflow, but there might still be bits to return. */
1328            int shl = p->exp - DECOMPOSED_BINARY_POINT;
1329            if (shl < N) {
1330                frac_shl(p, shl);
1331                r = p->frac_hi;
1332            } else {
1333                r = 0;
1334            }
1335            overflow = true;
1336        }
1337
1338        if (p->sign) {
1339            r = -r;
1340        }
1341        break;
1342
1343    default:
1344        g_assert_not_reached();
1345    }
1346
1347    if (overflow) {
1348        flags = float_flag_invalid | float_flag_invalid_cvti;
1349    }
1350    float_raise(flags, s);
1351    return r;
1352}
1353
1354/*
1355 * Integer to float conversions
1356 *
1357 * Returns the result of converting the two's complement integer `a'
1358 * to the floating-point format. The conversion is performed according
1359 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1360 */
1361static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1362                                  int scale, float_status *s)
1363{
1364    uint64_t f = a;
1365    int shift;
1366
1367    memset(p, 0, sizeof(*p));
1368
1369    if (a == 0) {
1370        p->cls = float_class_zero;
1371        return;
1372    }
1373
1374    p->cls = float_class_normal;
1375    if (a < 0) {
1376        f = -f;
1377        p->sign = true;
1378    }
1379    shift = clz64(f);
1380    scale = MIN(MAX(scale, -0x10000), 0x10000);
1381
1382    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1383    p->frac_hi = f << shift;
1384}
1385
1386/*
1387 * Unsigned Integer to float conversions
1388 *
1389 * Returns the result of converting the unsigned integer `a' to the
1390 * floating-point format. The conversion is performed according to the
1391 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1392 */
1393static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1394                                  int scale, float_status *status)
1395{
1396    memset(p, 0, sizeof(*p));
1397
1398    if (a == 0) {
1399        p->cls = float_class_zero;
1400    } else {
1401        int shift = clz64(a);
1402        scale = MIN(MAX(scale, -0x10000), 0x10000);
1403        p->cls = float_class_normal;
1404        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1405        p->frac_hi = a << shift;
1406    }
1407}
1408
1409/*
1410 * Float min/max.
1411 */
1412static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1413                                   float_status *s, int flags)
1414{
1415    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1416    int a_exp, b_exp, cmp;
1417
1418    if (unlikely(ab_mask & float_cmask_anynan)) {
1419        /*
1420         * For minNum/maxNum (IEEE 754-2008)
1421         * or minimumNumber/maximumNumber (IEEE 754-2019),
1422         * if one operand is a QNaN, and the other
1423         * operand is numerical, then return numerical argument.
1424         */
1425        if ((flags & (minmax_isnum | minmax_isnumber))
1426            && !(ab_mask & float_cmask_snan)
1427            && (ab_mask & ~float_cmask_qnan)) {
1428            return is_nan(a->cls) ? b : a;
1429        }
1430
1431        /*
1432         * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1433         * are removed and replaced with minimum, minimumNumber, maximum
1434         * and maximumNumber.
1435         * minimumNumber/maximumNumber behavior for SNaN is changed to:
1436         *   If both operands are NaNs, a QNaN is returned.
1437         *   If either operand is a SNaN,
1438         *   an invalid operation exception is signaled,
1439         *   but unless both operands are NaNs,
1440         *   the SNaN is otherwise ignored and not converted to a QNaN.
1441         */
1442        if ((flags & minmax_isnumber)
1443            && (ab_mask & float_cmask_snan)
1444            && (ab_mask & ~float_cmask_anynan)) {
1445            float_raise(float_flag_invalid, s);
1446            return is_nan(a->cls) ? b : a;
1447        }
1448
1449        return parts_pick_nan(a, b, s);
1450    }
1451
1452    a_exp = a->exp;
1453    b_exp = b->exp;
1454
1455    if (unlikely(ab_mask != float_cmask_normal)) {
1456        switch (a->cls) {
1457        case float_class_normal:
1458            break;
1459        case float_class_inf:
1460            a_exp = INT16_MAX;
1461            break;
1462        case float_class_zero:
1463            a_exp = INT16_MIN;
1464            break;
1465        default:
1466            g_assert_not_reached();
1467        }
1468        switch (b->cls) {
1469        case float_class_normal:
1470            break;
1471        case float_class_inf:
1472            b_exp = INT16_MAX;
1473            break;
1474        case float_class_zero:
1475            b_exp = INT16_MIN;
1476            break;
1477        default:
1478            g_assert_not_reached();
1479        }
1480    }
1481
1482    /* Compare magnitudes. */
1483    cmp = a_exp - b_exp;
1484    if (cmp == 0) {
1485        cmp = frac_cmp(a, b);
1486    }
1487
1488    /*
1489     * Take the sign into account.
1490     * For ismag, only do this if the magnitudes are equal.
1491     */
1492    if (!(flags & minmax_ismag) || cmp == 0) {
1493        if (a->sign != b->sign) {
1494            /* For differing signs, the negative operand is less. */
1495            cmp = a->sign ? -1 : 1;
1496        } else if (a->sign) {
1497            /* For two negative operands, invert the magnitude comparison. */
1498            cmp = -cmp;
1499        }
1500    }
1501
1502    if (flags & minmax_ismin) {
1503        cmp = -cmp;
1504    }
1505    return cmp < 0 ? b : a;
1506}
1507
1508/*
1509 * Floating point compare
1510 */
1511static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1512                                     float_status *s, bool is_quiet)
1513{
1514    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1515
1516    if (likely(ab_mask == float_cmask_normal)) {
1517        FloatRelation cmp;
1518
1519        if (a->sign != b->sign) {
1520            goto a_sign;
1521        }
1522        if (a->exp == b->exp) {
1523            cmp = frac_cmp(a, b);
1524        } else if (a->exp < b->exp) {
1525            cmp = float_relation_less;
1526        } else {
1527            cmp = float_relation_greater;
1528        }
1529        if (a->sign) {
1530            cmp = -cmp;
1531        }
1532        return cmp;
1533    }
1534
1535    if (unlikely(ab_mask & float_cmask_anynan)) {
1536        if (ab_mask & float_cmask_snan) {
1537            float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1538        } else if (!is_quiet) {
1539            float_raise(float_flag_invalid, s);
1540        }
1541        return float_relation_unordered;
1542    }
1543
1544    if (ab_mask & float_cmask_zero) {
1545        if (ab_mask == float_cmask_zero) {
1546            return float_relation_equal;
1547        } else if (a->cls == float_class_zero) {
1548            goto b_sign;
1549        } else {
1550            goto a_sign;
1551        }
1552    }
1553
1554    if (ab_mask == float_cmask_inf) {
1555        if (a->sign == b->sign) {
1556            return float_relation_equal;
1557        }
1558    } else if (b->cls == float_class_inf) {
1559        goto b_sign;
1560    } else {
1561        g_assert(a->cls == float_class_inf);
1562    }
1563
1564 a_sign:
1565    return a->sign ? float_relation_less : float_relation_greater;
1566 b_sign:
1567    return b->sign ? float_relation_greater : float_relation_less;
1568}
1569
1570/*
1571 * Multiply A by 2 raised to the power N.
1572 */
1573static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1574{
1575    switch (a->cls) {
1576    case float_class_snan:
1577    case float_class_qnan:
1578        parts_return_nan(a, s);
1579        break;
1580    case float_class_zero:
1581    case float_class_inf:
1582        break;
1583    case float_class_normal:
1584        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1585        break;
1586    default:
1587        g_assert_not_reached();
1588    }
1589}
1590
1591/*
1592 * Return log2(A)
1593 */
1594static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1595{
1596    uint64_t a0, a1, r, t, ign;
1597    FloatPartsN f;
1598    int i, n, a_exp, f_exp;
1599
1600    if (unlikely(a->cls != float_class_normal)) {
1601        switch (a->cls) {
1602        case float_class_snan:
1603        case float_class_qnan:
1604            parts_return_nan(a, s);
1605            return;
1606        case float_class_zero:
1607            float_raise(float_flag_divbyzero, s);
1608            /* log2(0) = -inf */
1609            a->cls = float_class_inf;
1610            a->sign = 1;
1611            return;
1612        case float_class_inf:
1613            if (unlikely(a->sign)) {
1614                goto d_nan;
1615            }
1616            return;
1617        default:
1618            break;
1619        }
1620        g_assert_not_reached();
1621    }
1622    if (unlikely(a->sign)) {
1623        goto d_nan;
1624    }
1625
1626    /* TODO: This algorithm looses bits too quickly for float128. */
1627    g_assert(N == 64);
1628
1629    a_exp = a->exp;
1630    f_exp = -1;
1631
1632    r = 0;
1633    t = DECOMPOSED_IMPLICIT_BIT;
1634    a0 = a->frac_hi;
1635    a1 = 0;
1636
1637    n = fmt->frac_size + 2;
1638    if (unlikely(a_exp == -1)) {
1639        /*
1640         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1641         * When the value is very close to 1.0, there are lots of 1's in
1642         * the msb parts of the fraction.  At the end, when we subtract
1643         * this value from -1.0, we can see a catastrophic loss of precision,
1644         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1645         * bits of y in the final result.  To minimize this, compute as many
1646         * digits as we can.
1647         * ??? This case needs another algorithm to avoid this.
1648         */
1649        n = fmt->frac_size * 2 + 2;
1650        /* Don't compute a value overlapping the sticky bit */
1651        n = MIN(n, 62);
1652    }
1653
1654    for (i = 0; i < n; i++) {
1655        if (a1) {
1656            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1657        } else if (a0 & 0xffffffffull) {
1658            mul64To128(a0, a0, &a0, &a1);
1659        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1660            a0 >>= 32;
1661            a0 *= a0;
1662        } else {
1663            goto exact;
1664        }
1665
1666        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1667            if (unlikely(a_exp == 0 && r == 0)) {
1668                /*
1669                 * When a_exp == 0, we're computing the log2 of a value
1670                 * [1.0,2.0).  When the value is very close to 1.0, there
1671                 * are lots of 0's in the msb parts of the fraction.
1672                 * We need to compute more digits to produce a correct
1673                 * result -- restart at the top of the fraction.
1674                 * ??? This is likely to lose precision quickly, as for
1675                 * float128; we may need another method.
1676                 */
1677                f_exp -= i;
1678                t = r = DECOMPOSED_IMPLICIT_BIT;
1679                i = 0;
1680            } else {
1681                r |= t;
1682            }
1683        } else {
1684            add128(a0, a1, a0, a1, &a0, &a1);
1685        }
1686        t >>= 1;
1687    }
1688
1689    /* Set sticky for inexact. */
1690    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1691
1692 exact:
1693    parts_sint_to_float(a, a_exp, 0, s);
1694    if (r == 0) {
1695        return;
1696    }
1697
1698    memset(&f, 0, sizeof(f));
1699    f.cls = float_class_normal;
1700    f.frac_hi = r;
1701    f.exp = f_exp - frac_normalize(&f);
1702
1703    if (a_exp < 0) {
1704        parts_sub_normal(a, &f);
1705    } else if (a_exp > 0) {
1706        parts_add_normal(a, &f);
1707    } else {
1708        *a = f;
1709    }
1710    return;
1711
1712 d_nan:
1713    float_raise(float_flag_invalid, s);
1714    parts_default_nan(a, s);
1715}
1716