xref: /openbmc/qemu/fpu/softfloat-parts.c.inc (revision ed1e71dac96a3bf2236ece81916d4fc1ccbce029)
1/*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 *  the SoftFloat-2a license
10 *  the BSD license
11 *  GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19{
20    switch (a->cls) {
21    case float_class_snan:
22        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
23        if (s->default_nan_mode) {
24            parts_default_nan(a, s);
25        } else {
26            parts_silence_nan(a, s);
27        }
28        break;
29    case float_class_qnan:
30        if (s->default_nan_mode) {
31            parts_default_nan(a, s);
32        }
33        break;
34    default:
35        g_assert_not_reached();
36    }
37}
38
39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40                                     float_status *s)
41{
42    bool have_snan = false;
43    FloatPartsN *ret;
44    int cmp;
45
46    if (is_snan(a->cls) || is_snan(b->cls)) {
47        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
48        have_snan = true;
49    }
50
51    if (s->default_nan_mode) {
52        parts_default_nan(a, s);
53        return a;
54    }
55
56    switch (s->float_2nan_prop_rule) {
57    case float_2nan_prop_s_ab:
58        if (have_snan) {
59            ret = is_snan(a->cls) ? a : b;
60            break;
61        }
62        /* fall through */
63    case float_2nan_prop_ab:
64        ret = is_nan(a->cls) ? a : b;
65        break;
66    case float_2nan_prop_s_ba:
67        if (have_snan) {
68            ret = is_snan(b->cls) ? b : a;
69            break;
70        }
71        /* fall through */
72    case float_2nan_prop_ba:
73        ret = is_nan(b->cls) ? b : a;
74        break;
75    case float_2nan_prop_x87:
76        /*
77         * This implements x87 NaN propagation rules:
78         * SNaN + QNaN => return the QNaN
79         * two SNaNs => return the one with the larger significand, silenced
80         * two QNaNs => return the one with the larger significand
81         * SNaN and a non-NaN => return the SNaN, silenced
82         * QNaN and a non-NaN => return the QNaN
83         *
84         * If we get down to comparing significands and they are the same,
85         * return the NaN with the positive sign bit (if any).
86         */
87        if (is_snan(a->cls)) {
88            if (!is_snan(b->cls)) {
89                ret = is_qnan(b->cls) ? b : a;
90                break;
91            }
92        } else if (is_qnan(a->cls)) {
93            if (is_snan(b->cls) || !is_qnan(b->cls)) {
94                ret = a;
95                break;
96            }
97        } else {
98            ret = b;
99            break;
100        }
101        cmp = frac_cmp(a, b);
102        if (cmp == 0) {
103            cmp = a->sign < b->sign;
104        }
105        ret = cmp > 0 ? a : b;
106        break;
107    default:
108        g_assert_not_reached();
109    }
110
111    if (is_snan(ret->cls)) {
112        parts_silence_nan(ret, s);
113    }
114    return ret;
115}
116
117static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
118                                            FloatPartsN *c, float_status *s,
119                                            int ab_mask, int abc_mask)
120{
121    bool infzero = (ab_mask == float_cmask_infzero);
122    bool have_snan = (abc_mask & float_cmask_snan);
123    FloatPartsN *ret;
124
125    if (unlikely(have_snan)) {
126        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
127    }
128
129    if (infzero) {
130        /* This is (0 * inf) + NaN or (inf * 0) + NaN */
131        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
132    }
133
134    if (s->default_nan_mode) {
135        /*
136         * We guarantee not to require the target to tell us how to
137         * pick a NaN if we're always returning the default NaN.
138         * But if we're not in default-NaN mode then the target must
139         * specify.
140         */
141        goto default_nan;
142    } else if (infzero) {
143        /*
144         * Inf * 0 + NaN -- some implementations return the
145         * default NaN here, and some return the input NaN.
146         */
147        switch (s->float_infzeronan_rule) {
148        case float_infzeronan_dnan_never:
149            break;
150        case float_infzeronan_dnan_always:
151            goto default_nan;
152        case float_infzeronan_dnan_if_qnan:
153            if (is_qnan(c->cls)) {
154                goto default_nan;
155            }
156            break;
157        default:
158            g_assert_not_reached();
159        }
160        ret = c;
161    } else {
162        FloatPartsN *val[R_3NAN_1ST_MASK + 1] = { a, b, c };
163        Float3NaNPropRule rule = s->float_3nan_prop_rule;
164
165        assert(rule != float_3nan_prop_none);
166        if (have_snan && (rule & R_3NAN_SNAN_MASK)) {
167            /* We have at least one SNaN input and should prefer it */
168            do {
169                ret = val[rule & R_3NAN_1ST_MASK];
170                rule >>= R_3NAN_1ST_LENGTH;
171            } while (!is_snan(ret->cls));
172        } else {
173            do {
174                ret = val[rule & R_3NAN_1ST_MASK];
175                rule >>= R_3NAN_1ST_LENGTH;
176            } while (!is_nan(ret->cls));
177        }
178    }
179
180    if (is_snan(ret->cls)) {
181        parts_silence_nan(ret, s);
182    }
183    return ret;
184
185 default_nan:
186    parts_default_nan(a, s);
187    return a;
188}
189
190/*
191 * Canonicalize the FloatParts structure.  Determine the class,
192 * unbias the exponent, and normalize the fraction.
193 */
194static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
195                                 const FloatFmt *fmt)
196{
197    if (unlikely(p->exp == 0)) {
198        if (likely(frac_eqz(p))) {
199            p->cls = float_class_zero;
200        } else if (status->flush_inputs_to_zero) {
201            float_raise(float_flag_input_denormal, status);
202            p->cls = float_class_zero;
203            frac_clear(p);
204        } else {
205            int shift = frac_normalize(p);
206            p->cls = float_class_normal;
207            p->exp = fmt->frac_shift - fmt->exp_bias
208                   - shift + !fmt->m68k_denormal;
209        }
210    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
211        p->cls = float_class_normal;
212        p->exp -= fmt->exp_bias;
213        frac_shl(p, fmt->frac_shift);
214        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
215    } else if (likely(frac_eqz(p))) {
216        p->cls = float_class_inf;
217    } else {
218        frac_shl(p, fmt->frac_shift);
219        p->cls = (parts_is_snan_frac(p->frac_hi, status)
220                  ? float_class_snan : float_class_qnan);
221    }
222}
223
224/*
225 * Round and uncanonicalize a floating-point number by parts. There
226 * are FRAC_SHIFT bits that may require rounding at the bottom of the
227 * fraction; these bits will be removed. The exponent will be biased
228 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
229 */
230static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
231                                   const FloatFmt *fmt)
232{
233    const int exp_max = fmt->exp_max;
234    const int frac_shift = fmt->frac_shift;
235    const uint64_t round_mask = fmt->round_mask;
236    const uint64_t frac_lsb = round_mask + 1;
237    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
238    const uint64_t roundeven_mask = round_mask | frac_lsb;
239    uint64_t inc;
240    bool overflow_norm = false;
241    int exp, flags = 0;
242
243    switch (s->float_rounding_mode) {
244    case float_round_nearest_even:
245        if (N > 64 && frac_lsb == 0) {
246            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
247                   ? frac_lsbm1 : 0);
248        } else {
249            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
250                   ? frac_lsbm1 : 0);
251        }
252        break;
253    case float_round_ties_away:
254        inc = frac_lsbm1;
255        break;
256    case float_round_to_zero:
257        overflow_norm = true;
258        inc = 0;
259        break;
260    case float_round_up:
261        inc = p->sign ? 0 : round_mask;
262        overflow_norm = p->sign;
263        break;
264    case float_round_down:
265        inc = p->sign ? round_mask : 0;
266        overflow_norm = !p->sign;
267        break;
268    case float_round_to_odd:
269        overflow_norm = true;
270        /* fall through */
271    case float_round_to_odd_inf:
272        if (N > 64 && frac_lsb == 0) {
273            inc = p->frac_hi & 1 ? 0 : round_mask;
274        } else {
275            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
276        }
277        break;
278    default:
279        g_assert_not_reached();
280    }
281
282    exp = p->exp + fmt->exp_bias;
283    if (likely(exp > 0)) {
284        if (p->frac_lo & round_mask) {
285            flags |= float_flag_inexact;
286            if (frac_addi(p, p, inc)) {
287                frac_shr(p, 1);
288                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
289                exp++;
290            }
291            p->frac_lo &= ~round_mask;
292        }
293
294        if (fmt->arm_althp) {
295            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
296            if (unlikely(exp > exp_max)) {
297                /* Overflow.  Return the maximum normal.  */
298                flags = float_flag_invalid;
299                exp = exp_max;
300                frac_allones(p);
301                p->frac_lo &= ~round_mask;
302            }
303        } else if (unlikely(exp >= exp_max)) {
304            flags |= float_flag_overflow;
305            if (s->rebias_overflow) {
306                exp -= fmt->exp_re_bias;
307            } else if (overflow_norm) {
308                flags |= float_flag_inexact;
309                exp = exp_max - 1;
310                frac_allones(p);
311                p->frac_lo &= ~round_mask;
312            } else {
313                flags |= float_flag_inexact;
314                p->cls = float_class_inf;
315                exp = exp_max;
316                frac_clear(p);
317            }
318        }
319        frac_shr(p, frac_shift);
320    } else if (unlikely(s->rebias_underflow)) {
321        flags |= float_flag_underflow;
322        exp += fmt->exp_re_bias;
323        if (p->frac_lo & round_mask) {
324            flags |= float_flag_inexact;
325            if (frac_addi(p, p, inc)) {
326                frac_shr(p, 1);
327                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
328                exp++;
329            }
330            p->frac_lo &= ~round_mask;
331        }
332        frac_shr(p, frac_shift);
333    } else if (s->flush_to_zero) {
334        flags |= float_flag_output_denormal;
335        p->cls = float_class_zero;
336        exp = 0;
337        frac_clear(p);
338    } else {
339        bool is_tiny = s->tininess_before_rounding || exp < 0;
340
341        if (!is_tiny) {
342            FloatPartsN discard;
343            is_tiny = !frac_addi(&discard, p, inc);
344        }
345
346        frac_shrjam(p, !fmt->m68k_denormal - exp);
347
348        if (p->frac_lo & round_mask) {
349            /* Need to recompute round-to-even/round-to-odd. */
350            switch (s->float_rounding_mode) {
351            case float_round_nearest_even:
352                if (N > 64 && frac_lsb == 0) {
353                    inc = ((p->frac_hi & 1) ||
354                           (p->frac_lo & round_mask) != frac_lsbm1
355                           ? frac_lsbm1 : 0);
356                } else {
357                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
358                           ? frac_lsbm1 : 0);
359                }
360                break;
361            case float_round_to_odd:
362            case float_round_to_odd_inf:
363                if (N > 64 && frac_lsb == 0) {
364                    inc = p->frac_hi & 1 ? 0 : round_mask;
365                } else {
366                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
367                }
368                break;
369            default:
370                break;
371            }
372            flags |= float_flag_inexact;
373            frac_addi(p, p, inc);
374            p->frac_lo &= ~round_mask;
375        }
376
377        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
378        frac_shr(p, frac_shift);
379
380        if (is_tiny && (flags & float_flag_inexact)) {
381            flags |= float_flag_underflow;
382        }
383        if (exp == 0 && frac_eqz(p)) {
384            p->cls = float_class_zero;
385        }
386    }
387    p->exp = exp;
388    float_raise(flags, s);
389}
390
391static void partsN(uncanon)(FloatPartsN *p, float_status *s,
392                            const FloatFmt *fmt)
393{
394    if (likely(p->cls == float_class_normal)) {
395        parts_uncanon_normal(p, s, fmt);
396    } else {
397        switch (p->cls) {
398        case float_class_zero:
399            p->exp = 0;
400            frac_clear(p);
401            return;
402        case float_class_inf:
403            g_assert(!fmt->arm_althp);
404            p->exp = fmt->exp_max;
405            frac_clear(p);
406            return;
407        case float_class_qnan:
408        case float_class_snan:
409            g_assert(!fmt->arm_althp);
410            p->exp = fmt->exp_max;
411            frac_shr(p, fmt->frac_shift);
412            return;
413        default:
414            break;
415        }
416        g_assert_not_reached();
417    }
418}
419
420/*
421 * Returns the result of adding or subtracting the values of the
422 * floating-point values `a' and `b'. The operation is performed
423 * according to the IEC/IEEE Standard for Binary Floating-Point
424 * Arithmetic.
425 */
426static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
427                                   float_status *s, bool subtract)
428{
429    bool b_sign = b->sign ^ subtract;
430    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
431
432    if (a->sign != b_sign) {
433        /* Subtraction */
434        if (likely(ab_mask == float_cmask_normal)) {
435            if (parts_sub_normal(a, b)) {
436                return a;
437            }
438            /* Subtract was exact, fall through to set sign. */
439            ab_mask = float_cmask_zero;
440        }
441
442        if (ab_mask == float_cmask_zero) {
443            a->sign = s->float_rounding_mode == float_round_down;
444            return a;
445        }
446
447        if (unlikely(ab_mask & float_cmask_anynan)) {
448            goto p_nan;
449        }
450
451        if (ab_mask & float_cmask_inf) {
452            if (a->cls != float_class_inf) {
453                /* N - Inf */
454                goto return_b;
455            }
456            if (b->cls != float_class_inf) {
457                /* Inf - N */
458                return a;
459            }
460            /* Inf - Inf */
461            float_raise(float_flag_invalid | float_flag_invalid_isi, s);
462            parts_default_nan(a, s);
463            return a;
464        }
465    } else {
466        /* Addition */
467        if (likely(ab_mask == float_cmask_normal)) {
468            parts_add_normal(a, b);
469            return a;
470        }
471
472        if (ab_mask == float_cmask_zero) {
473            return a;
474        }
475
476        if (unlikely(ab_mask & float_cmask_anynan)) {
477            goto p_nan;
478        }
479
480        if (ab_mask & float_cmask_inf) {
481            a->cls = float_class_inf;
482            return a;
483        }
484    }
485
486    if (b->cls == float_class_zero) {
487        g_assert(a->cls == float_class_normal);
488        return a;
489    }
490
491    g_assert(a->cls == float_class_zero);
492    g_assert(b->cls == float_class_normal);
493 return_b:
494    b->sign = b_sign;
495    return b;
496
497 p_nan:
498    return parts_pick_nan(a, b, s);
499}
500
501/*
502 * Returns the result of multiplying the floating-point values `a' and
503 * `b'. The operation is performed according to the IEC/IEEE Standard
504 * for Binary Floating-Point Arithmetic.
505 */
506static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
507                                float_status *s)
508{
509    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
510    bool sign = a->sign ^ b->sign;
511
512    if (likely(ab_mask == float_cmask_normal)) {
513        FloatPartsW tmp;
514
515        frac_mulw(&tmp, a, b);
516        frac_truncjam(a, &tmp);
517
518        a->exp += b->exp + 1;
519        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
520            frac_add(a, a, a);
521            a->exp -= 1;
522        }
523
524        a->sign = sign;
525        return a;
526    }
527
528    /* Inf * Zero == NaN */
529    if (unlikely(ab_mask == float_cmask_infzero)) {
530        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
531        parts_default_nan(a, s);
532        return a;
533    }
534
535    if (unlikely(ab_mask & float_cmask_anynan)) {
536        return parts_pick_nan(a, b, s);
537    }
538
539    /* Multiply by 0 or Inf */
540    if (ab_mask & float_cmask_inf) {
541        a->cls = float_class_inf;
542        a->sign = sign;
543        return a;
544    }
545
546    g_assert(ab_mask & float_cmask_zero);
547    a->cls = float_class_zero;
548    a->sign = sign;
549    return a;
550}
551
552/*
553 * Returns the result of multiplying the floating-point values `a' and
554 * `b' then adding 'c', with no intermediate rounding step after the
555 * multiplication. The operation is performed according to the
556 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
557 * The flags argument allows the caller to select negation of the
558 * addend, the intermediate product, or the final result. (The
559 * difference between this and having the caller do a separate
560 * negation is that negating externally will flip the sign bit on NaNs.)
561 *
562 * Requires A and C extracted into a double-sized structure to provide the
563 * extra space for the widening multiply.
564 */
565static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
566                                   FloatPartsN *c, int flags, float_status *s)
567{
568    int ab_mask, abc_mask;
569    FloatPartsW p_widen, c_widen;
570
571    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
572    abc_mask = float_cmask(c->cls) | ab_mask;
573
574    /*
575     * It is implementation-defined whether the cases of (0,inf,qnan)
576     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
577     * they return if they do), so we have to hand this information
578     * off to the target-specific pick-a-NaN routine.
579     */
580    if (unlikely(abc_mask & float_cmask_anynan)) {
581        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
582    }
583
584    if (flags & float_muladd_negate_c) {
585        c->sign ^= 1;
586    }
587
588    /* Compute the sign of the product into A. */
589    a->sign ^= b->sign;
590    if (flags & float_muladd_negate_product) {
591        a->sign ^= 1;
592    }
593
594    if (unlikely(ab_mask != float_cmask_normal)) {
595        if (unlikely(ab_mask == float_cmask_infzero)) {
596            float_raise(float_flag_invalid | float_flag_invalid_imz, s);
597            goto d_nan;
598        }
599
600        if (ab_mask & float_cmask_inf) {
601            if (c->cls == float_class_inf && a->sign != c->sign) {
602                float_raise(float_flag_invalid | float_flag_invalid_isi, s);
603                goto d_nan;
604            }
605            goto return_inf;
606        }
607
608        g_assert(ab_mask & float_cmask_zero);
609        if (c->cls == float_class_normal) {
610            *a = *c;
611            goto return_normal;
612        }
613        if (c->cls == float_class_zero) {
614            if (a->sign != c->sign) {
615                goto return_sub_zero;
616            }
617            goto return_zero;
618        }
619        g_assert(c->cls == float_class_inf);
620    }
621
622    if (unlikely(c->cls == float_class_inf)) {
623        a->sign = c->sign;
624        goto return_inf;
625    }
626
627    /* Perform the multiplication step. */
628    p_widen.sign = a->sign;
629    p_widen.exp = a->exp + b->exp + 1;
630    frac_mulw(&p_widen, a, b);
631    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
632        frac_add(&p_widen, &p_widen, &p_widen);
633        p_widen.exp -= 1;
634    }
635
636    /* Perform the addition step. */
637    if (c->cls != float_class_zero) {
638        /* Zero-extend C to less significant bits. */
639        frac_widen(&c_widen, c);
640        c_widen.exp = c->exp;
641
642        if (a->sign == c->sign) {
643            parts_add_normal(&p_widen, &c_widen);
644        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
645            goto return_sub_zero;
646        }
647    }
648
649    /* Narrow with sticky bit, for proper rounding later. */
650    frac_truncjam(a, &p_widen);
651    a->sign = p_widen.sign;
652    a->exp = p_widen.exp;
653
654 return_normal:
655    if (flags & float_muladd_halve_result) {
656        a->exp -= 1;
657    }
658 finish_sign:
659    if (flags & float_muladd_negate_result) {
660        a->sign ^= 1;
661    }
662    return a;
663
664 return_sub_zero:
665    a->sign = s->float_rounding_mode == float_round_down;
666 return_zero:
667    a->cls = float_class_zero;
668    goto finish_sign;
669
670 return_inf:
671    a->cls = float_class_inf;
672    goto finish_sign;
673
674 d_nan:
675    parts_default_nan(a, s);
676    return a;
677}
678
679/*
680 * Returns the result of dividing the floating-point value `a' by the
681 * corresponding value `b'. The operation is performed according to
682 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
683 */
684static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
685                                float_status *s)
686{
687    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
688    bool sign = a->sign ^ b->sign;
689
690    if (likely(ab_mask == float_cmask_normal)) {
691        a->sign = sign;
692        a->exp -= b->exp + frac_div(a, b);
693        return a;
694    }
695
696    /* 0/0 or Inf/Inf => NaN */
697    if (unlikely(ab_mask == float_cmask_zero)) {
698        float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
699        goto d_nan;
700    }
701    if (unlikely(ab_mask == float_cmask_inf)) {
702        float_raise(float_flag_invalid | float_flag_invalid_idi, s);
703        goto d_nan;
704    }
705
706    /* All the NaN cases */
707    if (unlikely(ab_mask & float_cmask_anynan)) {
708        return parts_pick_nan(a, b, s);
709    }
710
711    a->sign = sign;
712
713    /* Inf / X */
714    if (a->cls == float_class_inf) {
715        return a;
716    }
717
718    /* 0 / X */
719    if (a->cls == float_class_zero) {
720        return a;
721    }
722
723    /* X / Inf */
724    if (b->cls == float_class_inf) {
725        a->cls = float_class_zero;
726        return a;
727    }
728
729    /* X / 0 => Inf */
730    g_assert(b->cls == float_class_zero);
731    float_raise(float_flag_divbyzero, s);
732    a->cls = float_class_inf;
733    return a;
734
735 d_nan:
736    parts_default_nan(a, s);
737    return a;
738}
739
740/*
741 * Floating point remainder, per IEC/IEEE, or modulus.
742 */
743static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
744                                   uint64_t *mod_quot, float_status *s)
745{
746    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
747
748    if (likely(ab_mask == float_cmask_normal)) {
749        frac_modrem(a, b, mod_quot);
750        return a;
751    }
752
753    if (mod_quot) {
754        *mod_quot = 0;
755    }
756
757    /* All the NaN cases */
758    if (unlikely(ab_mask & float_cmask_anynan)) {
759        return parts_pick_nan(a, b, s);
760    }
761
762    /* Inf % N; N % 0 */
763    if (a->cls == float_class_inf || b->cls == float_class_zero) {
764        float_raise(float_flag_invalid, s);
765        parts_default_nan(a, s);
766        return a;
767    }
768
769    /* N % Inf; 0 % N */
770    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
771    return a;
772}
773
774/*
775 * Square Root
776 *
777 * The base algorithm is lifted from
778 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
779 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
780 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
781 * and is thus MIT licenced.
782 */
783static void partsN(sqrt)(FloatPartsN *a, float_status *status,
784                         const FloatFmt *fmt)
785{
786    const uint32_t three32 = 3u << 30;
787    const uint64_t three64 = 3ull << 62;
788    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
789    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
790    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
791    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
792    uint64_t discard;
793    bool exp_odd;
794    size_t index;
795
796    if (unlikely(a->cls != float_class_normal)) {
797        switch (a->cls) {
798        case float_class_snan:
799        case float_class_qnan:
800            parts_return_nan(a, status);
801            return;
802        case float_class_zero:
803            return;
804        case float_class_inf:
805            if (unlikely(a->sign)) {
806                goto d_nan;
807            }
808            return;
809        default:
810            g_assert_not_reached();
811        }
812    }
813
814    if (unlikely(a->sign)) {
815        goto d_nan;
816    }
817
818    /*
819     * Argument reduction.
820     * x = 4^e frac; with integer e, and frac in [1, 4)
821     * m = frac fixed point at bit 62, since we're in base 4.
822     * If base-2 exponent is odd, exchange that for multiply by 2,
823     * which results in no shift.
824     */
825    exp_odd = a->exp & 1;
826    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
827    if (!exp_odd) {
828        frac_shr(a, 1);
829    }
830
831    /*
832     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
833     *
834     * Initial estimate:
835     * 7-bit lookup table (1-bit exponent and 6-bit significand).
836     *
837     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
838     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
839     * a table lookup is faster and needs one less iteration.
840     * The 7-bit table gives |e| < 0x1.fdp-9.
841     *
842     * A Newton-Raphson iteration for r is
843     *   s = m*r
844     *   d = s*r
845     *   u = 3 - d
846     *   r = r*u/2
847     *
848     * Fixed point representations:
849     *   m, s, d, u, three are all 2.30; r is 0.32
850     */
851    m64 = a->frac_hi;
852    m32 = m64 >> 32;
853
854    r32 = rsqrt_tab[index] << 16;
855    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
856
857    s32 = ((uint64_t)m32 * r32) >> 32;
858    d32 = ((uint64_t)s32 * r32) >> 32;
859    u32 = three32 - d32;
860
861    if (N == 64) {
862        /* float64 or smaller */
863
864        r32 = ((uint64_t)r32 * u32) >> 31;
865        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
866
867        s32 = ((uint64_t)m32 * r32) >> 32;
868        d32 = ((uint64_t)s32 * r32) >> 32;
869        u32 = three32 - d32;
870
871        if (fmt->frac_size <= 23) {
872            /* float32 or smaller */
873
874            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
875            s32 = (s32 - 1) >> 6;               /* 9.23 */
876            /* s < sqrt(m) < s + 0x1.08p-23 */
877
878            /* compute nearest rounded result to 2.23 bits */
879            uint32_t d0 = (m32 << 16) - s32 * s32;
880            uint32_t d1 = s32 - d0;
881            uint32_t d2 = d1 + s32 + 1;
882            s32 += d1 >> 31;
883            a->frac_hi = (uint64_t)s32 << (64 - 25);
884
885            /* increment or decrement for inexact */
886            if (d2 != 0) {
887                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
888            }
889            goto done;
890        }
891
892        /* float64 */
893
894        r64 = (uint64_t)r32 * u32 * 2;
895        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
896        mul64To128(m64, r64, &s64, &discard);
897        mul64To128(s64, r64, &d64, &discard);
898        u64 = three64 - d64;
899
900        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
901        s64 = (s64 - 2) >> 9;                  /* 12.52 */
902
903        /* Compute nearest rounded result */
904        uint64_t d0 = (m64 << 42) - s64 * s64;
905        uint64_t d1 = s64 - d0;
906        uint64_t d2 = d1 + s64 + 1;
907        s64 += d1 >> 63;
908        a->frac_hi = s64 << (64 - 54);
909
910        /* increment or decrement for inexact */
911        if (d2 != 0) {
912            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
913        }
914        goto done;
915    }
916
917    r64 = (uint64_t)r32 * u32 * 2;
918    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
919
920    mul64To128(m64, r64, &s64, &discard);
921    mul64To128(s64, r64, &d64, &discard);
922    u64 = three64 - d64;
923    mul64To128(u64, r64, &r64, &discard);
924    r64 <<= 1;
925    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
926
927    mul64To128(m64, r64, &s64, &discard);
928    mul64To128(s64, r64, &d64, &discard);
929    u64 = three64 - d64;
930    mul64To128(u64, r64, &rh, &rl);
931    add128(rh, rl, rh, rl, &rh, &rl);
932    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
933
934    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
935    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
936    sub128(three64, 0, dh, dl, &uh, &ul);
937    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
938    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
939
940    sub128(sh, sl, 0, 4, &sh, &sl);
941    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
942    /* s < sqrt(m) < s + 1ulp */
943
944    /* Compute nearest rounded result */
945    mul64To128(sl, sl, &d0h, &d0l);
946    d0h += 2 * sh * sl;
947    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
948    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
949    add128(sh, sl, 0, 1, &d2h, &d2l);
950    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
951    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
952    shift128Left(sh, sl, 128 - 114, &sh, &sl);
953
954    /* increment or decrement for inexact */
955    if (d2h | d2l) {
956        if ((int64_t)(d1h ^ d2h) < 0) {
957            sub128(sh, sl, 0, 1, &sh, &sl);
958        } else {
959            add128(sh, sl, 0, 1, &sh, &sl);
960        }
961    }
962    a->frac_lo = sl;
963    a->frac_hi = sh;
964
965 done:
966    /* Convert back from base 4 to base 2. */
967    a->exp >>= 1;
968    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
969        frac_add(a, a, a);
970    } else {
971        a->exp += 1;
972    }
973    return;
974
975 d_nan:
976    float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
977    parts_default_nan(a, status);
978}
979
980/*
981 * Rounds the floating-point value `a' to an integer, and returns the
982 * result as a floating-point value. The operation is performed
983 * according to the IEC/IEEE Standard for Binary Floating-Point
984 * Arithmetic.
985 *
986 * parts_round_to_int_normal is an internal helper function for
987 * normal numbers only, returning true for inexact but not directly
988 * raising float_flag_inexact.
989 */
990static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
991                                        int scale, int frac_size)
992{
993    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
994    int shift_adj;
995
996    scale = MIN(MAX(scale, -0x10000), 0x10000);
997    a->exp += scale;
998
999    if (a->exp < 0) {
1000        bool one;
1001
1002        /* All fractional */
1003        switch (rmode) {
1004        case float_round_nearest_even:
1005            one = false;
1006            if (a->exp == -1) {
1007                FloatPartsN tmp;
1008                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
1009                frac_add(&tmp, a, a);
1010                /* Anything remaining means frac > 0.5. */
1011                one = !frac_eqz(&tmp);
1012            }
1013            break;
1014        case float_round_ties_away:
1015            one = a->exp == -1;
1016            break;
1017        case float_round_to_zero:
1018            one = false;
1019            break;
1020        case float_round_up:
1021            one = !a->sign;
1022            break;
1023        case float_round_down:
1024            one = a->sign;
1025            break;
1026        case float_round_to_odd:
1027            one = true;
1028            break;
1029        default:
1030            g_assert_not_reached();
1031        }
1032
1033        frac_clear(a);
1034        a->exp = 0;
1035        if (one) {
1036            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
1037        } else {
1038            a->cls = float_class_zero;
1039        }
1040        return true;
1041    }
1042
1043    if (a->exp >= frac_size) {
1044        /* All integral */
1045        return false;
1046    }
1047
1048    if (N > 64 && a->exp < N - 64) {
1049        /*
1050         * Rounding is not in the low word -- shift lsb to bit 2,
1051         * which leaves room for sticky and rounding bit.
1052         */
1053        shift_adj = (N - 1) - (a->exp + 2);
1054        frac_shrjam(a, shift_adj);
1055        frac_lsb = 1 << 2;
1056    } else {
1057        shift_adj = 0;
1058        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
1059    }
1060
1061    frac_lsbm1 = frac_lsb >> 1;
1062    rnd_mask = frac_lsb - 1;
1063    rnd_even_mask = rnd_mask | frac_lsb;
1064
1065    if (!(a->frac_lo & rnd_mask)) {
1066        /* Fractional bits already clear, undo the shift above. */
1067        frac_shl(a, shift_adj);
1068        return false;
1069    }
1070
1071    switch (rmode) {
1072    case float_round_nearest_even:
1073        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1074        break;
1075    case float_round_ties_away:
1076        inc = frac_lsbm1;
1077        break;
1078    case float_round_to_zero:
1079        inc = 0;
1080        break;
1081    case float_round_up:
1082        inc = a->sign ? 0 : rnd_mask;
1083        break;
1084    case float_round_down:
1085        inc = a->sign ? rnd_mask : 0;
1086        break;
1087    case float_round_to_odd:
1088        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
1089        break;
1090    default:
1091        g_assert_not_reached();
1092    }
1093
1094    if (shift_adj == 0) {
1095        if (frac_addi(a, a, inc)) {
1096            frac_shr(a, 1);
1097            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
1098            a->exp++;
1099        }
1100        a->frac_lo &= ~rnd_mask;
1101    } else {
1102        frac_addi(a, a, inc);
1103        a->frac_lo &= ~rnd_mask;
1104        /* Be careful shifting back, not to overflow */
1105        frac_shl(a, shift_adj - 1);
1106        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1107            a->exp++;
1108        } else {
1109            frac_add(a, a, a);
1110        }
1111    }
1112    return true;
1113}
1114
1115static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1116                                 int scale, float_status *s,
1117                                 const FloatFmt *fmt)
1118{
1119    switch (a->cls) {
1120    case float_class_qnan:
1121    case float_class_snan:
1122        parts_return_nan(a, s);
1123        break;
1124    case float_class_zero:
1125    case float_class_inf:
1126        break;
1127    case float_class_normal:
1128        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1129            float_raise(float_flag_inexact, s);
1130        }
1131        break;
1132    default:
1133        g_assert_not_reached();
1134    }
1135}
1136
1137/*
1138 * Returns the result of converting the floating-point value `a' to
1139 * the two's complement integer format. The conversion is performed
1140 * according to the IEC/IEEE Standard for Binary Floating-Point
1141 * Arithmetic---which means in particular that the conversion is
1142 * rounded according to the current rounding mode. If `a' is a NaN,
1143 * the largest positive integer is returned. Otherwise, if the
1144 * conversion overflows, the largest integer with the same sign as `a'
1145 * is returned.
1146 */
1147static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1148                                     int scale, int64_t min, int64_t max,
1149                                     float_status *s)
1150{
1151    int flags = 0;
1152    uint64_t r;
1153
1154    switch (p->cls) {
1155    case float_class_snan:
1156        flags |= float_flag_invalid_snan;
1157        /* fall through */
1158    case float_class_qnan:
1159        flags |= float_flag_invalid;
1160        r = max;
1161        break;
1162
1163    case float_class_inf:
1164        flags = float_flag_invalid | float_flag_invalid_cvti;
1165        r = p->sign ? min : max;
1166        break;
1167
1168    case float_class_zero:
1169        return 0;
1170
1171    case float_class_normal:
1172        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1173        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1174            flags = float_flag_inexact;
1175        }
1176
1177        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1178            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1179        } else {
1180            r = UINT64_MAX;
1181        }
1182        if (p->sign) {
1183            if (r <= -(uint64_t)min) {
1184                r = -r;
1185            } else {
1186                flags = float_flag_invalid | float_flag_invalid_cvti;
1187                r = min;
1188            }
1189        } else if (r > max) {
1190            flags = float_flag_invalid | float_flag_invalid_cvti;
1191            r = max;
1192        }
1193        break;
1194
1195    default:
1196        g_assert_not_reached();
1197    }
1198
1199    float_raise(flags, s);
1200    return r;
1201}
1202
1203/*
1204 *  Returns the result of converting the floating-point value `a' to
1205 *  the unsigned integer format. The conversion is performed according
1206 *  to the IEC/IEEE Standard for Binary Floating-Point
1207 *  Arithmetic---which means in particular that the conversion is
1208 *  rounded according to the current rounding mode. If `a' is a NaN,
1209 *  the largest unsigned integer is returned. Otherwise, if the
1210 *  conversion overflows, the largest unsigned integer is returned. If
1211 *  the 'a' is negative, the result is rounded and zero is returned;
1212 *  values that do not round to zero will raise the inexact exception
1213 *  flag.
1214 */
1215static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1216                                      int scale, uint64_t max, float_status *s)
1217{
1218    int flags = 0;
1219    uint64_t r;
1220
1221    switch (p->cls) {
1222    case float_class_snan:
1223        flags |= float_flag_invalid_snan;
1224        /* fall through */
1225    case float_class_qnan:
1226        flags |= float_flag_invalid;
1227        r = max;
1228        break;
1229
1230    case float_class_inf:
1231        flags = float_flag_invalid | float_flag_invalid_cvti;
1232        r = p->sign ? 0 : max;
1233        break;
1234
1235    case float_class_zero:
1236        return 0;
1237
1238    case float_class_normal:
1239        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1240        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1241            flags = float_flag_inexact;
1242            if (p->cls == float_class_zero) {
1243                r = 0;
1244                break;
1245            }
1246        }
1247
1248        if (p->sign) {
1249            flags = float_flag_invalid | float_flag_invalid_cvti;
1250            r = 0;
1251        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1252            flags = float_flag_invalid | float_flag_invalid_cvti;
1253            r = max;
1254        } else {
1255            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1256            if (r > max) {
1257                flags = float_flag_invalid | float_flag_invalid_cvti;
1258                r = max;
1259            }
1260        }
1261        break;
1262
1263    default:
1264        g_assert_not_reached();
1265    }
1266
1267    float_raise(flags, s);
1268    return r;
1269}
1270
1271/*
1272 * Like partsN(float_to_sint), except do not saturate the result.
1273 * Instead, return the rounded unbounded precision two's compliment result,
1274 * modulo 2**(bitsm1 + 1).
1275 */
1276static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
1277                                            FloatRoundMode rmode,
1278                                            int bitsm1, float_status *s)
1279{
1280    int flags = 0;
1281    uint64_t r;
1282    bool overflow = false;
1283
1284    switch (p->cls) {
1285    case float_class_snan:
1286        flags |= float_flag_invalid_snan;
1287        /* fall through */
1288    case float_class_qnan:
1289        flags |= float_flag_invalid;
1290        r = 0;
1291        break;
1292
1293    case float_class_inf:
1294        overflow = true;
1295        r = 0;
1296        break;
1297
1298    case float_class_zero:
1299        return 0;
1300
1301    case float_class_normal:
1302        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1303        if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
1304            flags = float_flag_inexact;
1305        }
1306
1307        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1308            /*
1309             * Because we rounded to integral, and exp < 64,
1310             * we know frac_low is zero.
1311             */
1312            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1313            if (p->exp < bitsm1) {
1314                /* Result in range. */
1315            } else if (p->exp == bitsm1) {
1316                /* The only in-range value is INT_MIN. */
1317                overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT;
1318            } else {
1319                overflow = true;
1320            }
1321        } else {
1322            /* Overflow, but there might still be bits to return. */
1323            int shl = p->exp - DECOMPOSED_BINARY_POINT;
1324            if (shl < N) {
1325                frac_shl(p, shl);
1326                r = p->frac_hi;
1327            } else {
1328                r = 0;
1329            }
1330            overflow = true;
1331        }
1332
1333        if (p->sign) {
1334            r = -r;
1335        }
1336        break;
1337
1338    default:
1339        g_assert_not_reached();
1340    }
1341
1342    if (overflow) {
1343        flags = float_flag_invalid | float_flag_invalid_cvti;
1344    }
1345    float_raise(flags, s);
1346    return r;
1347}
1348
1349/*
1350 * Integer to float conversions
1351 *
1352 * Returns the result of converting the two's complement integer `a'
1353 * to the floating-point format. The conversion is performed according
1354 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1355 */
1356static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1357                                  int scale, float_status *s)
1358{
1359    uint64_t f = a;
1360    int shift;
1361
1362    memset(p, 0, sizeof(*p));
1363
1364    if (a == 0) {
1365        p->cls = float_class_zero;
1366        return;
1367    }
1368
1369    p->cls = float_class_normal;
1370    if (a < 0) {
1371        f = -f;
1372        p->sign = true;
1373    }
1374    shift = clz64(f);
1375    scale = MIN(MAX(scale, -0x10000), 0x10000);
1376
1377    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1378    p->frac_hi = f << shift;
1379}
1380
1381/*
1382 * Unsigned Integer to float conversions
1383 *
1384 * Returns the result of converting the unsigned integer `a' to the
1385 * floating-point format. The conversion is performed according to the
1386 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1387 */
1388static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1389                                  int scale, float_status *status)
1390{
1391    memset(p, 0, sizeof(*p));
1392
1393    if (a == 0) {
1394        p->cls = float_class_zero;
1395    } else {
1396        int shift = clz64(a);
1397        scale = MIN(MAX(scale, -0x10000), 0x10000);
1398        p->cls = float_class_normal;
1399        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1400        p->frac_hi = a << shift;
1401    }
1402}
1403
1404/*
1405 * Float min/max.
1406 */
1407static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1408                                   float_status *s, int flags)
1409{
1410    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1411    int a_exp, b_exp, cmp;
1412
1413    if (unlikely(ab_mask & float_cmask_anynan)) {
1414        /*
1415         * For minNum/maxNum (IEEE 754-2008)
1416         * or minimumNumber/maximumNumber (IEEE 754-2019),
1417         * if one operand is a QNaN, and the other
1418         * operand is numerical, then return numerical argument.
1419         */
1420        if ((flags & (minmax_isnum | minmax_isnumber))
1421            && !(ab_mask & float_cmask_snan)
1422            && (ab_mask & ~float_cmask_qnan)) {
1423            return is_nan(a->cls) ? b : a;
1424        }
1425
1426        /*
1427         * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1428         * are removed and replaced with minimum, minimumNumber, maximum
1429         * and maximumNumber.
1430         * minimumNumber/maximumNumber behavior for SNaN is changed to:
1431         *   If both operands are NaNs, a QNaN is returned.
1432         *   If either operand is a SNaN,
1433         *   an invalid operation exception is signaled,
1434         *   but unless both operands are NaNs,
1435         *   the SNaN is otherwise ignored and not converted to a QNaN.
1436         */
1437        if ((flags & minmax_isnumber)
1438            && (ab_mask & float_cmask_snan)
1439            && (ab_mask & ~float_cmask_anynan)) {
1440            float_raise(float_flag_invalid, s);
1441            return is_nan(a->cls) ? b : a;
1442        }
1443
1444        return parts_pick_nan(a, b, s);
1445    }
1446
1447    a_exp = a->exp;
1448    b_exp = b->exp;
1449
1450    if (unlikely(ab_mask != float_cmask_normal)) {
1451        switch (a->cls) {
1452        case float_class_normal:
1453            break;
1454        case float_class_inf:
1455            a_exp = INT16_MAX;
1456            break;
1457        case float_class_zero:
1458            a_exp = INT16_MIN;
1459            break;
1460        default:
1461            g_assert_not_reached();
1462        }
1463        switch (b->cls) {
1464        case float_class_normal:
1465            break;
1466        case float_class_inf:
1467            b_exp = INT16_MAX;
1468            break;
1469        case float_class_zero:
1470            b_exp = INT16_MIN;
1471            break;
1472        default:
1473            g_assert_not_reached();
1474        }
1475    }
1476
1477    /* Compare magnitudes. */
1478    cmp = a_exp - b_exp;
1479    if (cmp == 0) {
1480        cmp = frac_cmp(a, b);
1481    }
1482
1483    /*
1484     * Take the sign into account.
1485     * For ismag, only do this if the magnitudes are equal.
1486     */
1487    if (!(flags & minmax_ismag) || cmp == 0) {
1488        if (a->sign != b->sign) {
1489            /* For differing signs, the negative operand is less. */
1490            cmp = a->sign ? -1 : 1;
1491        } else if (a->sign) {
1492            /* For two negative operands, invert the magnitude comparison. */
1493            cmp = -cmp;
1494        }
1495    }
1496
1497    if (flags & minmax_ismin) {
1498        cmp = -cmp;
1499    }
1500    return cmp < 0 ? b : a;
1501}
1502
1503/*
1504 * Floating point compare
1505 */
1506static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1507                                     float_status *s, bool is_quiet)
1508{
1509    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1510
1511    if (likely(ab_mask == float_cmask_normal)) {
1512        FloatRelation cmp;
1513
1514        if (a->sign != b->sign) {
1515            goto a_sign;
1516        }
1517        if (a->exp == b->exp) {
1518            cmp = frac_cmp(a, b);
1519        } else if (a->exp < b->exp) {
1520            cmp = float_relation_less;
1521        } else {
1522            cmp = float_relation_greater;
1523        }
1524        if (a->sign) {
1525            cmp = -cmp;
1526        }
1527        return cmp;
1528    }
1529
1530    if (unlikely(ab_mask & float_cmask_anynan)) {
1531        if (ab_mask & float_cmask_snan) {
1532            float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1533        } else if (!is_quiet) {
1534            float_raise(float_flag_invalid, s);
1535        }
1536        return float_relation_unordered;
1537    }
1538
1539    if (ab_mask & float_cmask_zero) {
1540        if (ab_mask == float_cmask_zero) {
1541            return float_relation_equal;
1542        } else if (a->cls == float_class_zero) {
1543            goto b_sign;
1544        } else {
1545            goto a_sign;
1546        }
1547    }
1548
1549    if (ab_mask == float_cmask_inf) {
1550        if (a->sign == b->sign) {
1551            return float_relation_equal;
1552        }
1553    } else if (b->cls == float_class_inf) {
1554        goto b_sign;
1555    } else {
1556        g_assert(a->cls == float_class_inf);
1557    }
1558
1559 a_sign:
1560    return a->sign ? float_relation_less : float_relation_greater;
1561 b_sign:
1562    return b->sign ? float_relation_greater : float_relation_less;
1563}
1564
1565/*
1566 * Multiply A by 2 raised to the power N.
1567 */
1568static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1569{
1570    switch (a->cls) {
1571    case float_class_snan:
1572    case float_class_qnan:
1573        parts_return_nan(a, s);
1574        break;
1575    case float_class_zero:
1576    case float_class_inf:
1577        break;
1578    case float_class_normal:
1579        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1580        break;
1581    default:
1582        g_assert_not_reached();
1583    }
1584}
1585
1586/*
1587 * Return log2(A)
1588 */
1589static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1590{
1591    uint64_t a0, a1, r, t, ign;
1592    FloatPartsN f;
1593    int i, n, a_exp, f_exp;
1594
1595    if (unlikely(a->cls != float_class_normal)) {
1596        switch (a->cls) {
1597        case float_class_snan:
1598        case float_class_qnan:
1599            parts_return_nan(a, s);
1600            return;
1601        case float_class_zero:
1602            float_raise(float_flag_divbyzero, s);
1603            /* log2(0) = -inf */
1604            a->cls = float_class_inf;
1605            a->sign = 1;
1606            return;
1607        case float_class_inf:
1608            if (unlikely(a->sign)) {
1609                goto d_nan;
1610            }
1611            return;
1612        default:
1613            break;
1614        }
1615        g_assert_not_reached();
1616    }
1617    if (unlikely(a->sign)) {
1618        goto d_nan;
1619    }
1620
1621    /* TODO: This algorithm looses bits too quickly for float128. */
1622    g_assert(N == 64);
1623
1624    a_exp = a->exp;
1625    f_exp = -1;
1626
1627    r = 0;
1628    t = DECOMPOSED_IMPLICIT_BIT;
1629    a0 = a->frac_hi;
1630    a1 = 0;
1631
1632    n = fmt->frac_size + 2;
1633    if (unlikely(a_exp == -1)) {
1634        /*
1635         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1636         * When the value is very close to 1.0, there are lots of 1's in
1637         * the msb parts of the fraction.  At the end, when we subtract
1638         * this value from -1.0, we can see a catastrophic loss of precision,
1639         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1640         * bits of y in the final result.  To minimize this, compute as many
1641         * digits as we can.
1642         * ??? This case needs another algorithm to avoid this.
1643         */
1644        n = fmt->frac_size * 2 + 2;
1645        /* Don't compute a value overlapping the sticky bit */
1646        n = MIN(n, 62);
1647    }
1648
1649    for (i = 0; i < n; i++) {
1650        if (a1) {
1651            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1652        } else if (a0 & 0xffffffffull) {
1653            mul64To128(a0, a0, &a0, &a1);
1654        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1655            a0 >>= 32;
1656            a0 *= a0;
1657        } else {
1658            goto exact;
1659        }
1660
1661        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1662            if (unlikely(a_exp == 0 && r == 0)) {
1663                /*
1664                 * When a_exp == 0, we're computing the log2 of a value
1665                 * [1.0,2.0).  When the value is very close to 1.0, there
1666                 * are lots of 0's in the msb parts of the fraction.
1667                 * We need to compute more digits to produce a correct
1668                 * result -- restart at the top of the fraction.
1669                 * ??? This is likely to lose precision quickly, as for
1670                 * float128; we may need another method.
1671                 */
1672                f_exp -= i;
1673                t = r = DECOMPOSED_IMPLICIT_BIT;
1674                i = 0;
1675            } else {
1676                r |= t;
1677            }
1678        } else {
1679            add128(a0, a1, a0, a1, &a0, &a1);
1680        }
1681        t >>= 1;
1682    }
1683
1684    /* Set sticky for inexact. */
1685    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1686
1687 exact:
1688    parts_sint_to_float(a, a_exp, 0, s);
1689    if (r == 0) {
1690        return;
1691    }
1692
1693    memset(&f, 0, sizeof(f));
1694    f.cls = float_class_normal;
1695    f.frac_hi = r;
1696    f.exp = f_exp - frac_normalize(&f);
1697
1698    if (a_exp < 0) {
1699        parts_sub_normal(a, &f);
1700    } else if (a_exp > 0) {
1701        parts_add_normal(a, &f);
1702    } else {
1703        *a = f;
1704    }
1705    return;
1706
1707 d_nan:
1708    float_raise(float_flag_invalid, s);
1709    parts_default_nan(a, s);
1710}
1711