xref: /openbmc/qemu/fpu/softfloat-parts.c.inc (revision 664280abddcb3cacc9c6204706bb739fcc1316f7)
1/*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 *  the SoftFloat-2a license
10 *  the BSD license
11 *  GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19{
20    switch (a->cls) {
21    case float_class_snan:
22        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
23        if (s->default_nan_mode) {
24            parts_default_nan(a, s);
25        } else {
26            parts_silence_nan(a, s);
27        }
28        break;
29    case float_class_qnan:
30        if (s->default_nan_mode) {
31            parts_default_nan(a, s);
32        }
33        break;
34    default:
35        g_assert_not_reached();
36    }
37}
38
39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40                                     float_status *s)
41{
42    bool have_snan = false;
43    FloatPartsN *ret;
44    int cmp;
45
46    if (is_snan(a->cls) || is_snan(b->cls)) {
47        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
48        have_snan = true;
49    }
50
51    if (s->default_nan_mode) {
52        parts_default_nan(a, s);
53        return a;
54    }
55
56    switch (s->float_2nan_prop_rule) {
57    case float_2nan_prop_s_ab:
58        if (have_snan) {
59            ret = is_snan(a->cls) ? a : b;
60            break;
61        }
62        /* fall through */
63    case float_2nan_prop_ab:
64        ret = is_nan(a->cls) ? a : b;
65        break;
66    case float_2nan_prop_s_ba:
67        if (have_snan) {
68            ret = is_snan(b->cls) ? b : a;
69            break;
70        }
71        /* fall through */
72    case float_2nan_prop_ba:
73        ret = is_nan(b->cls) ? b : a;
74        break;
75    case float_2nan_prop_x87:
76        /*
77         * This implements x87 NaN propagation rules:
78         * SNaN + QNaN => return the QNaN
79         * two SNaNs => return the one with the larger significand, silenced
80         * two QNaNs => return the one with the larger significand
81         * SNaN and a non-NaN => return the SNaN, silenced
82         * QNaN and a non-NaN => return the QNaN
83         *
84         * If we get down to comparing significands and they are the same,
85         * return the NaN with the positive sign bit (if any).
86         */
87        if (is_snan(a->cls)) {
88            if (!is_snan(b->cls)) {
89                ret = is_qnan(b->cls) ? b : a;
90                break;
91            }
92        } else if (is_qnan(a->cls)) {
93            if (is_snan(b->cls) || !is_qnan(b->cls)) {
94                ret = a;
95                break;
96            }
97        } else {
98            ret = b;
99            break;
100        }
101        cmp = frac_cmp(a, b);
102        if (cmp == 0) {
103            cmp = a->sign < b->sign;
104        }
105        ret = cmp > 0 ? a : b;
106        break;
107    default:
108        g_assert_not_reached();
109    }
110
111    if (is_snan(ret->cls)) {
112        parts_silence_nan(ret, s);
113    }
114    return ret;
115}
116
117static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
118                                            FloatPartsN *c, float_status *s,
119                                            int ab_mask, int abc_mask)
120{
121    bool infzero = (ab_mask == float_cmask_infzero);
122    bool have_snan = (abc_mask & float_cmask_snan);
123    FloatPartsN *ret;
124
125    if (unlikely(have_snan)) {
126        float_raise(float_flag_invalid | float_flag_invalid_snan, s);
127    }
128
129    if (infzero) {
130        /* This is (0 * inf) + NaN or (inf * 0) + NaN */
131        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
132    }
133
134    if (s->default_nan_mode) {
135        /*
136         * We guarantee not to require the target to tell us how to
137         * pick a NaN if we're always returning the default NaN.
138         * But if we're not in default-NaN mode then the target must
139         * specify.
140         */
141        goto default_nan;
142    } else if (infzero) {
143        /*
144         * Inf * 0 + NaN -- some implementations return the
145         * default NaN here, and some return the input NaN.
146         */
147        switch (s->float_infzeronan_rule) {
148        case float_infzeronan_dnan_never:
149            break;
150        case float_infzeronan_dnan_always:
151            goto default_nan;
152        case float_infzeronan_dnan_if_qnan:
153            if (is_qnan(c->cls)) {
154                goto default_nan;
155            }
156            break;
157        default:
158            g_assert_not_reached();
159        }
160        ret = c;
161    } else {
162        FloatPartsN *val[R_3NAN_1ST_MASK + 1] = { a, b, c };
163        Float3NaNPropRule rule = s->float_3nan_prop_rule;
164
165        assert(rule != float_3nan_prop_none);
166        if (have_snan && (rule & R_3NAN_SNAN_MASK)) {
167            /* We have at least one SNaN input and should prefer it */
168            do {
169                ret = val[rule & R_3NAN_1ST_MASK];
170                rule >>= R_3NAN_1ST_LENGTH;
171            } while (!is_snan(ret->cls));
172        } else {
173            do {
174                ret = val[rule & R_3NAN_1ST_MASK];
175                rule >>= R_3NAN_1ST_LENGTH;
176            } while (!is_nan(ret->cls));
177        }
178    }
179
180    if (is_snan(ret->cls)) {
181        parts_silence_nan(ret, s);
182    }
183    return ret;
184
185 default_nan:
186    parts_default_nan(a, s);
187    return a;
188}
189
190/*
191 * Canonicalize the FloatParts structure.  Determine the class,
192 * unbias the exponent, and normalize the fraction.
193 */
194static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
195                                 const FloatFmt *fmt)
196{
197    if (unlikely(p->exp == 0)) {
198        if (likely(frac_eqz(p))) {
199            p->cls = float_class_zero;
200        } else if (status->flush_inputs_to_zero) {
201            float_raise(float_flag_input_denormal_flushed, status);
202            p->cls = float_class_zero;
203            frac_clear(p);
204        } else {
205            int shift = frac_normalize(p);
206            p->cls = float_class_normal;
207            p->exp = fmt->frac_shift - fmt->exp_bias
208                   - shift + !fmt->m68k_denormal;
209        }
210    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
211        p->cls = float_class_normal;
212        p->exp -= fmt->exp_bias;
213        frac_shl(p, fmt->frac_shift);
214        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
215    } else if (likely(frac_eqz(p))) {
216        p->cls = float_class_inf;
217    } else {
218        frac_shl(p, fmt->frac_shift);
219        p->cls = (parts_is_snan_frac(p->frac_hi, status)
220                  ? float_class_snan : float_class_qnan);
221    }
222}
223
224/*
225 * Round and uncanonicalize a floating-point number by parts. There
226 * are FRAC_SHIFT bits that may require rounding at the bottom of the
227 * fraction; these bits will be removed. The exponent will be biased
228 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
229 */
230static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
231                                   const FloatFmt *fmt)
232{
233    const int exp_max = fmt->exp_max;
234    const int frac_shift = fmt->frac_shift;
235    const uint64_t round_mask = fmt->round_mask;
236    const uint64_t frac_lsb = round_mask + 1;
237    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
238    const uint64_t roundeven_mask = round_mask | frac_lsb;
239    uint64_t inc;
240    bool overflow_norm = false;
241    int exp, flags = 0;
242
243    switch (s->float_rounding_mode) {
244    case float_round_nearest_even_max:
245        overflow_norm = true;
246        /* fall through */
247    case float_round_nearest_even:
248        if (N > 64 && frac_lsb == 0) {
249            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
250                   ? frac_lsbm1 : 0);
251        } else {
252            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
253                   ? frac_lsbm1 : 0);
254        }
255        break;
256    case float_round_ties_away:
257        inc = frac_lsbm1;
258        break;
259    case float_round_to_zero:
260        overflow_norm = true;
261        inc = 0;
262        break;
263    case float_round_up:
264        inc = p->sign ? 0 : round_mask;
265        overflow_norm = p->sign;
266        break;
267    case float_round_down:
268        inc = p->sign ? round_mask : 0;
269        overflow_norm = !p->sign;
270        break;
271    case float_round_to_odd:
272        overflow_norm = true;
273        /* fall through */
274    case float_round_to_odd_inf:
275        if (N > 64 && frac_lsb == 0) {
276            inc = p->frac_hi & 1 ? 0 : round_mask;
277        } else {
278            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
279        }
280        break;
281    default:
282        g_assert_not_reached();
283    }
284
285    exp = p->exp + fmt->exp_bias;
286    if (likely(exp > 0)) {
287        if (p->frac_lo & round_mask) {
288            flags |= float_flag_inexact;
289            if (frac_addi(p, p, inc)) {
290                frac_shr(p, 1);
291                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
292                exp++;
293            }
294            p->frac_lo &= ~round_mask;
295        }
296
297        if (fmt->arm_althp) {
298            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
299            if (unlikely(exp > exp_max)) {
300                /* Overflow.  Return the maximum normal.  */
301                flags = float_flag_invalid;
302                exp = exp_max;
303                frac_allones(p);
304                p->frac_lo &= ~round_mask;
305            }
306        } else if (unlikely(exp >= exp_max)) {
307            flags |= float_flag_overflow;
308            if (s->rebias_overflow) {
309                exp -= fmt->exp_re_bias;
310            } else if (overflow_norm) {
311                flags |= float_flag_inexact;
312                exp = exp_max - 1;
313                frac_allones(p);
314                p->frac_lo &= ~round_mask;
315            } else {
316                flags |= float_flag_inexact;
317                p->cls = float_class_inf;
318                exp = exp_max;
319                frac_clear(p);
320            }
321        }
322        frac_shr(p, frac_shift);
323    } else if (unlikely(s->rebias_underflow)) {
324        flags |= float_flag_underflow;
325        exp += fmt->exp_re_bias;
326        if (p->frac_lo & round_mask) {
327            flags |= float_flag_inexact;
328            if (frac_addi(p, p, inc)) {
329                frac_shr(p, 1);
330                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
331                exp++;
332            }
333            p->frac_lo &= ~round_mask;
334        }
335        frac_shr(p, frac_shift);
336    } else if (s->flush_to_zero) {
337        flags |= float_flag_output_denormal_flushed;
338        p->cls = float_class_zero;
339        exp = 0;
340        frac_clear(p);
341    } else {
342        bool is_tiny = s->tininess_before_rounding || exp < 0;
343
344        if (!is_tiny) {
345            FloatPartsN discard;
346            is_tiny = !frac_addi(&discard, p, inc);
347        }
348
349        frac_shrjam(p, !fmt->m68k_denormal - exp);
350
351        if (p->frac_lo & round_mask) {
352            /* Need to recompute round-to-even/round-to-odd. */
353            switch (s->float_rounding_mode) {
354            case float_round_nearest_even:
355                if (N > 64 && frac_lsb == 0) {
356                    inc = ((p->frac_hi & 1) ||
357                           (p->frac_lo & round_mask) != frac_lsbm1
358                           ? frac_lsbm1 : 0);
359                } else {
360                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
361                           ? frac_lsbm1 : 0);
362                }
363                break;
364            case float_round_to_odd:
365            case float_round_to_odd_inf:
366                if (N > 64 && frac_lsb == 0) {
367                    inc = p->frac_hi & 1 ? 0 : round_mask;
368                } else {
369                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
370                }
371                break;
372            default:
373                break;
374            }
375            flags |= float_flag_inexact;
376            frac_addi(p, p, inc);
377            p->frac_lo &= ~round_mask;
378        }
379
380        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
381        frac_shr(p, frac_shift);
382
383        if (is_tiny && (flags & float_flag_inexact)) {
384            flags |= float_flag_underflow;
385        }
386        if (exp == 0 && frac_eqz(p)) {
387            p->cls = float_class_zero;
388        }
389    }
390    p->exp = exp;
391    float_raise(flags, s);
392}
393
394static void partsN(uncanon)(FloatPartsN *p, float_status *s,
395                            const FloatFmt *fmt)
396{
397    if (likely(p->cls == float_class_normal)) {
398        parts_uncanon_normal(p, s, fmt);
399    } else {
400        switch (p->cls) {
401        case float_class_zero:
402            p->exp = 0;
403            frac_clear(p);
404            return;
405        case float_class_inf:
406            g_assert(!fmt->arm_althp);
407            p->exp = fmt->exp_max;
408            frac_clear(p);
409            return;
410        case float_class_qnan:
411        case float_class_snan:
412            g_assert(!fmt->arm_althp);
413            p->exp = fmt->exp_max;
414            frac_shr(p, fmt->frac_shift);
415            return;
416        default:
417            break;
418        }
419        g_assert_not_reached();
420    }
421}
422
423/*
424 * Returns the result of adding or subtracting the values of the
425 * floating-point values `a' and `b'. The operation is performed
426 * according to the IEC/IEEE Standard for Binary Floating-Point
427 * Arithmetic.
428 */
429static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
430                                   float_status *s, bool subtract)
431{
432    bool b_sign = b->sign ^ subtract;
433    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
434
435    if (a->sign != b_sign) {
436        /* Subtraction */
437        if (likely(ab_mask == float_cmask_normal)) {
438            if (parts_sub_normal(a, b)) {
439                return a;
440            }
441            /* Subtract was exact, fall through to set sign. */
442            ab_mask = float_cmask_zero;
443        }
444
445        if (ab_mask == float_cmask_zero) {
446            a->sign = s->float_rounding_mode == float_round_down;
447            return a;
448        }
449
450        if (unlikely(ab_mask & float_cmask_anynan)) {
451            goto p_nan;
452        }
453
454        if (ab_mask & float_cmask_inf) {
455            if (a->cls != float_class_inf) {
456                /* N - Inf */
457                goto return_b;
458            }
459            if (b->cls != float_class_inf) {
460                /* Inf - N */
461                return a;
462            }
463            /* Inf - Inf */
464            float_raise(float_flag_invalid | float_flag_invalid_isi, s);
465            parts_default_nan(a, s);
466            return a;
467        }
468    } else {
469        /* Addition */
470        if (likely(ab_mask == float_cmask_normal)) {
471            parts_add_normal(a, b);
472            return a;
473        }
474
475        if (ab_mask == float_cmask_zero) {
476            return a;
477        }
478
479        if (unlikely(ab_mask & float_cmask_anynan)) {
480            goto p_nan;
481        }
482
483        if (ab_mask & float_cmask_inf) {
484            a->cls = float_class_inf;
485            return a;
486        }
487    }
488
489    if (b->cls == float_class_zero) {
490        g_assert(a->cls == float_class_normal);
491        return a;
492    }
493
494    g_assert(a->cls == float_class_zero);
495    g_assert(b->cls == float_class_normal);
496 return_b:
497    b->sign = b_sign;
498    return b;
499
500 p_nan:
501    return parts_pick_nan(a, b, s);
502}
503
504/*
505 * Returns the result of multiplying the floating-point values `a' and
506 * `b'. The operation is performed according to the IEC/IEEE Standard
507 * for Binary Floating-Point Arithmetic.
508 */
509static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
510                                float_status *s)
511{
512    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
513    bool sign = a->sign ^ b->sign;
514
515    if (likely(ab_mask == float_cmask_normal)) {
516        FloatPartsW tmp;
517
518        frac_mulw(&tmp, a, b);
519        frac_truncjam(a, &tmp);
520
521        a->exp += b->exp + 1;
522        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
523            frac_add(a, a, a);
524            a->exp -= 1;
525        }
526
527        a->sign = sign;
528        return a;
529    }
530
531    /* Inf * Zero == NaN */
532    if (unlikely(ab_mask == float_cmask_infzero)) {
533        float_raise(float_flag_invalid | float_flag_invalid_imz, s);
534        parts_default_nan(a, s);
535        return a;
536    }
537
538    if (unlikely(ab_mask & float_cmask_anynan)) {
539        return parts_pick_nan(a, b, s);
540    }
541
542    /* Multiply by 0 or Inf */
543    if (ab_mask & float_cmask_inf) {
544        a->cls = float_class_inf;
545        a->sign = sign;
546        return a;
547    }
548
549    g_assert(ab_mask & float_cmask_zero);
550    a->cls = float_class_zero;
551    a->sign = sign;
552    return a;
553}
554
555/*
556 * Returns the result of multiplying the floating-point values `a' and
557 * `b' then adding 'c', with no intermediate rounding step after the
558 * multiplication. The operation is performed according to the
559 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
560 * The flags argument allows the caller to select negation of the
561 * addend, the intermediate product, or the final result. (The
562 * difference between this and having the caller do a separate
563 * negation is that negating externally will flip the sign bit on NaNs.)
564 *
565 * Requires A and C extracted into a double-sized structure to provide the
566 * extra space for the widening multiply.
567 */
568static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
569                                          FloatPartsN *c, int scale,
570                                          int flags, float_status *s)
571{
572    int ab_mask, abc_mask;
573    FloatPartsW p_widen, c_widen;
574
575    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
576    abc_mask = float_cmask(c->cls) | ab_mask;
577
578    /*
579     * It is implementation-defined whether the cases of (0,inf,qnan)
580     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
581     * they return if they do), so we have to hand this information
582     * off to the target-specific pick-a-NaN routine.
583     */
584    if (unlikely(abc_mask & float_cmask_anynan)) {
585        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
586    }
587
588    if (flags & float_muladd_negate_c) {
589        c->sign ^= 1;
590    }
591
592    /* Compute the sign of the product into A. */
593    a->sign ^= b->sign;
594    if (flags & float_muladd_negate_product) {
595        a->sign ^= 1;
596    }
597
598    if (unlikely(ab_mask != float_cmask_normal)) {
599        if (unlikely(ab_mask == float_cmask_infzero)) {
600            float_raise(float_flag_invalid | float_flag_invalid_imz, s);
601            goto d_nan;
602        }
603
604        if (ab_mask & float_cmask_inf) {
605            if (c->cls == float_class_inf && a->sign != c->sign) {
606                float_raise(float_flag_invalid | float_flag_invalid_isi, s);
607                goto d_nan;
608            }
609            goto return_inf;
610        }
611
612        g_assert(ab_mask & float_cmask_zero);
613        if (c->cls == float_class_normal) {
614            *a = *c;
615            goto return_normal;
616        }
617        if (c->cls == float_class_zero) {
618            if (flags & float_muladd_suppress_add_product_zero) {
619                a->sign = c->sign;
620            } else if (a->sign != c->sign) {
621                goto return_sub_zero;
622            }
623            goto return_zero;
624        }
625        g_assert(c->cls == float_class_inf);
626    }
627
628    if (unlikely(c->cls == float_class_inf)) {
629        a->sign = c->sign;
630        goto return_inf;
631    }
632
633    /* Perform the multiplication step. */
634    p_widen.sign = a->sign;
635    p_widen.exp = a->exp + b->exp + 1;
636    frac_mulw(&p_widen, a, b);
637    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
638        frac_add(&p_widen, &p_widen, &p_widen);
639        p_widen.exp -= 1;
640    }
641
642    /* Perform the addition step. */
643    if (c->cls != float_class_zero) {
644        /* Zero-extend C to less significant bits. */
645        frac_widen(&c_widen, c);
646        c_widen.exp = c->exp;
647
648        if (a->sign == c->sign) {
649            parts_add_normal(&p_widen, &c_widen);
650        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
651            goto return_sub_zero;
652        }
653    }
654
655    /* Narrow with sticky bit, for proper rounding later. */
656    frac_truncjam(a, &p_widen);
657    a->sign = p_widen.sign;
658    a->exp = p_widen.exp;
659
660 return_normal:
661    a->exp += scale;
662 finish_sign:
663    if (flags & float_muladd_negate_result) {
664        a->sign ^= 1;
665    }
666    return a;
667
668 return_sub_zero:
669    a->sign = s->float_rounding_mode == float_round_down;
670 return_zero:
671    a->cls = float_class_zero;
672    goto finish_sign;
673
674 return_inf:
675    a->cls = float_class_inf;
676    goto finish_sign;
677
678 d_nan:
679    parts_default_nan(a, s);
680    return a;
681}
682
683/*
684 * Returns the result of dividing the floating-point value `a' by the
685 * corresponding value `b'. The operation is performed according to
686 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
687 */
688static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
689                                float_status *s)
690{
691    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
692    bool sign = a->sign ^ b->sign;
693
694    if (likely(ab_mask == float_cmask_normal)) {
695        a->sign = sign;
696        a->exp -= b->exp + frac_div(a, b);
697        return a;
698    }
699
700    /* 0/0 or Inf/Inf => NaN */
701    if (unlikely(ab_mask == float_cmask_zero)) {
702        float_raise(float_flag_invalid | float_flag_invalid_zdz, s);
703        goto d_nan;
704    }
705    if (unlikely(ab_mask == float_cmask_inf)) {
706        float_raise(float_flag_invalid | float_flag_invalid_idi, s);
707        goto d_nan;
708    }
709
710    /* All the NaN cases */
711    if (unlikely(ab_mask & float_cmask_anynan)) {
712        return parts_pick_nan(a, b, s);
713    }
714
715    a->sign = sign;
716
717    /* Inf / X */
718    if (a->cls == float_class_inf) {
719        return a;
720    }
721
722    /* 0 / X */
723    if (a->cls == float_class_zero) {
724        return a;
725    }
726
727    /* X / Inf */
728    if (b->cls == float_class_inf) {
729        a->cls = float_class_zero;
730        return a;
731    }
732
733    /* X / 0 => Inf */
734    g_assert(b->cls == float_class_zero);
735    float_raise(float_flag_divbyzero, s);
736    a->cls = float_class_inf;
737    return a;
738
739 d_nan:
740    parts_default_nan(a, s);
741    return a;
742}
743
744/*
745 * Floating point remainder, per IEC/IEEE, or modulus.
746 */
747static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
748                                   uint64_t *mod_quot, float_status *s)
749{
750    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
751
752    if (likely(ab_mask == float_cmask_normal)) {
753        frac_modrem(a, b, mod_quot);
754        return a;
755    }
756
757    if (mod_quot) {
758        *mod_quot = 0;
759    }
760
761    /* All the NaN cases */
762    if (unlikely(ab_mask & float_cmask_anynan)) {
763        return parts_pick_nan(a, b, s);
764    }
765
766    /* Inf % N; N % 0 */
767    if (a->cls == float_class_inf || b->cls == float_class_zero) {
768        float_raise(float_flag_invalid, s);
769        parts_default_nan(a, s);
770        return a;
771    }
772
773    /* N % Inf; 0 % N */
774    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
775    return a;
776}
777
778/*
779 * Square Root
780 *
781 * The base algorithm is lifted from
782 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
783 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
784 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
785 * and is thus MIT licenced.
786 */
787static void partsN(sqrt)(FloatPartsN *a, float_status *status,
788                         const FloatFmt *fmt)
789{
790    const uint32_t three32 = 3u << 30;
791    const uint64_t three64 = 3ull << 62;
792    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
793    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
794    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
795    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
796    uint64_t discard;
797    bool exp_odd;
798    size_t index;
799
800    if (unlikely(a->cls != float_class_normal)) {
801        switch (a->cls) {
802        case float_class_snan:
803        case float_class_qnan:
804            parts_return_nan(a, status);
805            return;
806        case float_class_zero:
807            return;
808        case float_class_inf:
809            if (unlikely(a->sign)) {
810                goto d_nan;
811            }
812            return;
813        default:
814            g_assert_not_reached();
815        }
816    }
817
818    if (unlikely(a->sign)) {
819        goto d_nan;
820    }
821
822    /*
823     * Argument reduction.
824     * x = 4^e frac; with integer e, and frac in [1, 4)
825     * m = frac fixed point at bit 62, since we're in base 4.
826     * If base-2 exponent is odd, exchange that for multiply by 2,
827     * which results in no shift.
828     */
829    exp_odd = a->exp & 1;
830    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
831    if (!exp_odd) {
832        frac_shr(a, 1);
833    }
834
835    /*
836     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
837     *
838     * Initial estimate:
839     * 7-bit lookup table (1-bit exponent and 6-bit significand).
840     *
841     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
842     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
843     * a table lookup is faster and needs one less iteration.
844     * The 7-bit table gives |e| < 0x1.fdp-9.
845     *
846     * A Newton-Raphson iteration for r is
847     *   s = m*r
848     *   d = s*r
849     *   u = 3 - d
850     *   r = r*u/2
851     *
852     * Fixed point representations:
853     *   m, s, d, u, three are all 2.30; r is 0.32
854     */
855    m64 = a->frac_hi;
856    m32 = m64 >> 32;
857
858    r32 = rsqrt_tab[index] << 16;
859    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
860
861    s32 = ((uint64_t)m32 * r32) >> 32;
862    d32 = ((uint64_t)s32 * r32) >> 32;
863    u32 = three32 - d32;
864
865    if (N == 64) {
866        /* float64 or smaller */
867
868        r32 = ((uint64_t)r32 * u32) >> 31;
869        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
870
871        s32 = ((uint64_t)m32 * r32) >> 32;
872        d32 = ((uint64_t)s32 * r32) >> 32;
873        u32 = three32 - d32;
874
875        if (fmt->frac_size <= 23) {
876            /* float32 or smaller */
877
878            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
879            s32 = (s32 - 1) >> 6;               /* 9.23 */
880            /* s < sqrt(m) < s + 0x1.08p-23 */
881
882            /* compute nearest rounded result to 2.23 bits */
883            uint32_t d0 = (m32 << 16) - s32 * s32;
884            uint32_t d1 = s32 - d0;
885            uint32_t d2 = d1 + s32 + 1;
886            s32 += d1 >> 31;
887            a->frac_hi = (uint64_t)s32 << (64 - 25);
888
889            /* increment or decrement for inexact */
890            if (d2 != 0) {
891                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
892            }
893            goto done;
894        }
895
896        /* float64 */
897
898        r64 = (uint64_t)r32 * u32 * 2;
899        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
900        mul64To128(m64, r64, &s64, &discard);
901        mul64To128(s64, r64, &d64, &discard);
902        u64 = three64 - d64;
903
904        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
905        s64 = (s64 - 2) >> 9;                  /* 12.52 */
906
907        /* Compute nearest rounded result */
908        uint64_t d0 = (m64 << 42) - s64 * s64;
909        uint64_t d1 = s64 - d0;
910        uint64_t d2 = d1 + s64 + 1;
911        s64 += d1 >> 63;
912        a->frac_hi = s64 << (64 - 54);
913
914        /* increment or decrement for inexact */
915        if (d2 != 0) {
916            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
917        }
918        goto done;
919    }
920
921    r64 = (uint64_t)r32 * u32 * 2;
922    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
923
924    mul64To128(m64, r64, &s64, &discard);
925    mul64To128(s64, r64, &d64, &discard);
926    u64 = three64 - d64;
927    mul64To128(u64, r64, &r64, &discard);
928    r64 <<= 1;
929    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
930
931    mul64To128(m64, r64, &s64, &discard);
932    mul64To128(s64, r64, &d64, &discard);
933    u64 = three64 - d64;
934    mul64To128(u64, r64, &rh, &rl);
935    add128(rh, rl, rh, rl, &rh, &rl);
936    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
937
938    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
939    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
940    sub128(three64, 0, dh, dl, &uh, &ul);
941    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
942    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
943
944    sub128(sh, sl, 0, 4, &sh, &sl);
945    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
946    /* s < sqrt(m) < s + 1ulp */
947
948    /* Compute nearest rounded result */
949    mul64To128(sl, sl, &d0h, &d0l);
950    d0h += 2 * sh * sl;
951    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
952    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
953    add128(sh, sl, 0, 1, &d2h, &d2l);
954    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
955    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
956    shift128Left(sh, sl, 128 - 114, &sh, &sl);
957
958    /* increment or decrement for inexact */
959    if (d2h | d2l) {
960        if ((int64_t)(d1h ^ d2h) < 0) {
961            sub128(sh, sl, 0, 1, &sh, &sl);
962        } else {
963            add128(sh, sl, 0, 1, &sh, &sl);
964        }
965    }
966    a->frac_lo = sl;
967    a->frac_hi = sh;
968
969 done:
970    /* Convert back from base 4 to base 2. */
971    a->exp >>= 1;
972    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
973        frac_add(a, a, a);
974    } else {
975        a->exp += 1;
976    }
977    return;
978
979 d_nan:
980    float_raise(float_flag_invalid | float_flag_invalid_sqrt, status);
981    parts_default_nan(a, status);
982}
983
984/*
985 * Rounds the floating-point value `a' to an integer, and returns the
986 * result as a floating-point value. The operation is performed
987 * according to the IEC/IEEE Standard for Binary Floating-Point
988 * Arithmetic.
989 *
990 * parts_round_to_int_normal is an internal helper function for
991 * normal numbers only, returning true for inexact but not directly
992 * raising float_flag_inexact.
993 */
994static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
995                                        int scale, int frac_size)
996{
997    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
998    int shift_adj;
999
1000    scale = MIN(MAX(scale, -0x10000), 0x10000);
1001    a->exp += scale;
1002
1003    if (a->exp < 0) {
1004        bool one;
1005
1006        /* All fractional */
1007        switch (rmode) {
1008        case float_round_nearest_even:
1009            one = false;
1010            if (a->exp == -1) {
1011                FloatPartsN tmp;
1012                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
1013                frac_add(&tmp, a, a);
1014                /* Anything remaining means frac > 0.5. */
1015                one = !frac_eqz(&tmp);
1016            }
1017            break;
1018        case float_round_ties_away:
1019            one = a->exp == -1;
1020            break;
1021        case float_round_to_zero:
1022            one = false;
1023            break;
1024        case float_round_up:
1025            one = !a->sign;
1026            break;
1027        case float_round_down:
1028            one = a->sign;
1029            break;
1030        case float_round_to_odd:
1031            one = true;
1032            break;
1033        default:
1034            g_assert_not_reached();
1035        }
1036
1037        frac_clear(a);
1038        a->exp = 0;
1039        if (one) {
1040            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
1041        } else {
1042            a->cls = float_class_zero;
1043        }
1044        return true;
1045    }
1046
1047    if (a->exp >= frac_size) {
1048        /* All integral */
1049        return false;
1050    }
1051
1052    if (N > 64 && a->exp < N - 64) {
1053        /*
1054         * Rounding is not in the low word -- shift lsb to bit 2,
1055         * which leaves room for sticky and rounding bit.
1056         */
1057        shift_adj = (N - 1) - (a->exp + 2);
1058        frac_shrjam(a, shift_adj);
1059        frac_lsb = 1 << 2;
1060    } else {
1061        shift_adj = 0;
1062        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
1063    }
1064
1065    frac_lsbm1 = frac_lsb >> 1;
1066    rnd_mask = frac_lsb - 1;
1067    rnd_even_mask = rnd_mask | frac_lsb;
1068
1069    if (!(a->frac_lo & rnd_mask)) {
1070        /* Fractional bits already clear, undo the shift above. */
1071        frac_shl(a, shift_adj);
1072        return false;
1073    }
1074
1075    switch (rmode) {
1076    case float_round_nearest_even:
1077        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1078        break;
1079    case float_round_ties_away:
1080        inc = frac_lsbm1;
1081        break;
1082    case float_round_to_zero:
1083        inc = 0;
1084        break;
1085    case float_round_up:
1086        inc = a->sign ? 0 : rnd_mask;
1087        break;
1088    case float_round_down:
1089        inc = a->sign ? rnd_mask : 0;
1090        break;
1091    case float_round_to_odd:
1092        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
1093        break;
1094    default:
1095        g_assert_not_reached();
1096    }
1097
1098    if (shift_adj == 0) {
1099        if (frac_addi(a, a, inc)) {
1100            frac_shr(a, 1);
1101            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
1102            a->exp++;
1103        }
1104        a->frac_lo &= ~rnd_mask;
1105    } else {
1106        frac_addi(a, a, inc);
1107        a->frac_lo &= ~rnd_mask;
1108        /* Be careful shifting back, not to overflow */
1109        frac_shl(a, shift_adj - 1);
1110        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
1111            a->exp++;
1112        } else {
1113            frac_add(a, a, a);
1114        }
1115    }
1116    return true;
1117}
1118
1119static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1120                                 int scale, float_status *s,
1121                                 const FloatFmt *fmt)
1122{
1123    switch (a->cls) {
1124    case float_class_qnan:
1125    case float_class_snan:
1126        parts_return_nan(a, s);
1127        break;
1128    case float_class_zero:
1129    case float_class_inf:
1130        break;
1131    case float_class_normal:
1132        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1133            float_raise(float_flag_inexact, s);
1134        }
1135        break;
1136    default:
1137        g_assert_not_reached();
1138    }
1139}
1140
1141/*
1142 * Returns the result of converting the floating-point value `a' to
1143 * the two's complement integer format. The conversion is performed
1144 * according to the IEC/IEEE Standard for Binary Floating-Point
1145 * Arithmetic---which means in particular that the conversion is
1146 * rounded according to the current rounding mode. If `a' is a NaN,
1147 * the largest positive integer is returned. Otherwise, if the
1148 * conversion overflows, the largest integer with the same sign as `a'
1149 * is returned.
1150 */
1151static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1152                                     int scale, int64_t min, int64_t max,
1153                                     float_status *s)
1154{
1155    int flags = 0;
1156    uint64_t r;
1157
1158    switch (p->cls) {
1159    case float_class_snan:
1160        flags |= float_flag_invalid_snan;
1161        /* fall through */
1162    case float_class_qnan:
1163        flags |= float_flag_invalid;
1164        r = max;
1165        break;
1166
1167    case float_class_inf:
1168        flags = float_flag_invalid | float_flag_invalid_cvti;
1169        r = p->sign ? min : max;
1170        break;
1171
1172    case float_class_zero:
1173        return 0;
1174
1175    case float_class_normal:
1176        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1177        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1178            flags = float_flag_inexact;
1179        }
1180
1181        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1182            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1183        } else {
1184            r = UINT64_MAX;
1185        }
1186        if (p->sign) {
1187            if (r <= -(uint64_t)min) {
1188                r = -r;
1189            } else {
1190                flags = float_flag_invalid | float_flag_invalid_cvti;
1191                r = min;
1192            }
1193        } else if (r > max) {
1194            flags = float_flag_invalid | float_flag_invalid_cvti;
1195            r = max;
1196        }
1197        break;
1198
1199    default:
1200        g_assert_not_reached();
1201    }
1202
1203    float_raise(flags, s);
1204    return r;
1205}
1206
1207/*
1208 *  Returns the result of converting the floating-point value `a' to
1209 *  the unsigned integer format. The conversion is performed according
1210 *  to the IEC/IEEE Standard for Binary Floating-Point
1211 *  Arithmetic---which means in particular that the conversion is
1212 *  rounded according to the current rounding mode. If `a' is a NaN,
1213 *  the largest unsigned integer is returned. Otherwise, if the
1214 *  conversion overflows, the largest unsigned integer is returned. If
1215 *  the 'a' is negative, the result is rounded and zero is returned;
1216 *  values that do not round to zero will raise the inexact exception
1217 *  flag.
1218 */
1219static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1220                                      int scale, uint64_t max, float_status *s)
1221{
1222    int flags = 0;
1223    uint64_t r;
1224
1225    switch (p->cls) {
1226    case float_class_snan:
1227        flags |= float_flag_invalid_snan;
1228        /* fall through */
1229    case float_class_qnan:
1230        flags |= float_flag_invalid;
1231        r = max;
1232        break;
1233
1234    case float_class_inf:
1235        flags = float_flag_invalid | float_flag_invalid_cvti;
1236        r = p->sign ? 0 : max;
1237        break;
1238
1239    case float_class_zero:
1240        return 0;
1241
1242    case float_class_normal:
1243        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1244        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1245            flags = float_flag_inexact;
1246            if (p->cls == float_class_zero) {
1247                r = 0;
1248                break;
1249            }
1250        }
1251
1252        if (p->sign) {
1253            flags = float_flag_invalid | float_flag_invalid_cvti;
1254            r = 0;
1255        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1256            flags = float_flag_invalid | float_flag_invalid_cvti;
1257            r = max;
1258        } else {
1259            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1260            if (r > max) {
1261                flags = float_flag_invalid | float_flag_invalid_cvti;
1262                r = max;
1263            }
1264        }
1265        break;
1266
1267    default:
1268        g_assert_not_reached();
1269    }
1270
1271    float_raise(flags, s);
1272    return r;
1273}
1274
1275/*
1276 * Like partsN(float_to_sint), except do not saturate the result.
1277 * Instead, return the rounded unbounded precision two's compliment result,
1278 * modulo 2**(bitsm1 + 1).
1279 */
1280static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
1281                                            FloatRoundMode rmode,
1282                                            int bitsm1, float_status *s)
1283{
1284    int flags = 0;
1285    uint64_t r;
1286    bool overflow = false;
1287
1288    switch (p->cls) {
1289    case float_class_snan:
1290        flags |= float_flag_invalid_snan;
1291        /* fall through */
1292    case float_class_qnan:
1293        flags |= float_flag_invalid;
1294        r = 0;
1295        break;
1296
1297    case float_class_inf:
1298        overflow = true;
1299        r = 0;
1300        break;
1301
1302    case float_class_zero:
1303        return 0;
1304
1305    case float_class_normal:
1306        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1307        if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
1308            flags = float_flag_inexact;
1309        }
1310
1311        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1312            /*
1313             * Because we rounded to integral, and exp < 64,
1314             * we know frac_low is zero.
1315             */
1316            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1317            if (p->exp < bitsm1) {
1318                /* Result in range. */
1319            } else if (p->exp == bitsm1) {
1320                /* The only in-range value is INT_MIN. */
1321                overflow = !p->sign || p->frac_hi != DECOMPOSED_IMPLICIT_BIT;
1322            } else {
1323                overflow = true;
1324            }
1325        } else {
1326            /* Overflow, but there might still be bits to return. */
1327            int shl = p->exp - DECOMPOSED_BINARY_POINT;
1328            if (shl < N) {
1329                frac_shl(p, shl);
1330                r = p->frac_hi;
1331            } else {
1332                r = 0;
1333            }
1334            overflow = true;
1335        }
1336
1337        if (p->sign) {
1338            r = -r;
1339        }
1340        break;
1341
1342    default:
1343        g_assert_not_reached();
1344    }
1345
1346    if (overflow) {
1347        flags = float_flag_invalid | float_flag_invalid_cvti;
1348    }
1349    float_raise(flags, s);
1350    return r;
1351}
1352
1353/*
1354 * Integer to float conversions
1355 *
1356 * Returns the result of converting the two's complement integer `a'
1357 * to the floating-point format. The conversion is performed according
1358 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1359 */
1360static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1361                                  int scale, float_status *s)
1362{
1363    uint64_t f = a;
1364    int shift;
1365
1366    memset(p, 0, sizeof(*p));
1367
1368    if (a == 0) {
1369        p->cls = float_class_zero;
1370        return;
1371    }
1372
1373    p->cls = float_class_normal;
1374    if (a < 0) {
1375        f = -f;
1376        p->sign = true;
1377    }
1378    shift = clz64(f);
1379    scale = MIN(MAX(scale, -0x10000), 0x10000);
1380
1381    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1382    p->frac_hi = f << shift;
1383}
1384
1385/*
1386 * Unsigned Integer to float conversions
1387 *
1388 * Returns the result of converting the unsigned integer `a' to the
1389 * floating-point format. The conversion is performed according to the
1390 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1391 */
1392static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1393                                  int scale, float_status *status)
1394{
1395    memset(p, 0, sizeof(*p));
1396
1397    if (a == 0) {
1398        p->cls = float_class_zero;
1399    } else {
1400        int shift = clz64(a);
1401        scale = MIN(MAX(scale, -0x10000), 0x10000);
1402        p->cls = float_class_normal;
1403        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1404        p->frac_hi = a << shift;
1405    }
1406}
1407
1408/*
1409 * Float min/max.
1410 */
1411static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1412                                   float_status *s, int flags)
1413{
1414    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1415    int a_exp, b_exp, cmp;
1416
1417    if (unlikely(ab_mask & float_cmask_anynan)) {
1418        /*
1419         * For minNum/maxNum (IEEE 754-2008)
1420         * or minimumNumber/maximumNumber (IEEE 754-2019),
1421         * if one operand is a QNaN, and the other
1422         * operand is numerical, then return numerical argument.
1423         */
1424        if ((flags & (minmax_isnum | minmax_isnumber))
1425            && !(ab_mask & float_cmask_snan)
1426            && (ab_mask & ~float_cmask_qnan)) {
1427            return is_nan(a->cls) ? b : a;
1428        }
1429
1430        /*
1431         * In IEEE 754-2019, minNum, maxNum, minNumMag and maxNumMag
1432         * are removed and replaced with minimum, minimumNumber, maximum
1433         * and maximumNumber.
1434         * minimumNumber/maximumNumber behavior for SNaN is changed to:
1435         *   If both operands are NaNs, a QNaN is returned.
1436         *   If either operand is a SNaN,
1437         *   an invalid operation exception is signaled,
1438         *   but unless both operands are NaNs,
1439         *   the SNaN is otherwise ignored and not converted to a QNaN.
1440         */
1441        if ((flags & minmax_isnumber)
1442            && (ab_mask & float_cmask_snan)
1443            && (ab_mask & ~float_cmask_anynan)) {
1444            float_raise(float_flag_invalid, s);
1445            return is_nan(a->cls) ? b : a;
1446        }
1447
1448        return parts_pick_nan(a, b, s);
1449    }
1450
1451    a_exp = a->exp;
1452    b_exp = b->exp;
1453
1454    if (unlikely(ab_mask != float_cmask_normal)) {
1455        switch (a->cls) {
1456        case float_class_normal:
1457            break;
1458        case float_class_inf:
1459            a_exp = INT16_MAX;
1460            break;
1461        case float_class_zero:
1462            a_exp = INT16_MIN;
1463            break;
1464        default:
1465            g_assert_not_reached();
1466        }
1467        switch (b->cls) {
1468        case float_class_normal:
1469            break;
1470        case float_class_inf:
1471            b_exp = INT16_MAX;
1472            break;
1473        case float_class_zero:
1474            b_exp = INT16_MIN;
1475            break;
1476        default:
1477            g_assert_not_reached();
1478        }
1479    }
1480
1481    /* Compare magnitudes. */
1482    cmp = a_exp - b_exp;
1483    if (cmp == 0) {
1484        cmp = frac_cmp(a, b);
1485    }
1486
1487    /*
1488     * Take the sign into account.
1489     * For ismag, only do this if the magnitudes are equal.
1490     */
1491    if (!(flags & minmax_ismag) || cmp == 0) {
1492        if (a->sign != b->sign) {
1493            /* For differing signs, the negative operand is less. */
1494            cmp = a->sign ? -1 : 1;
1495        } else if (a->sign) {
1496            /* For two negative operands, invert the magnitude comparison. */
1497            cmp = -cmp;
1498        }
1499    }
1500
1501    if (flags & minmax_ismin) {
1502        cmp = -cmp;
1503    }
1504    return cmp < 0 ? b : a;
1505}
1506
1507/*
1508 * Floating point compare
1509 */
1510static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1511                                     float_status *s, bool is_quiet)
1512{
1513    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1514
1515    if (likely(ab_mask == float_cmask_normal)) {
1516        FloatRelation cmp;
1517
1518        if (a->sign != b->sign) {
1519            goto a_sign;
1520        }
1521        if (a->exp == b->exp) {
1522            cmp = frac_cmp(a, b);
1523        } else if (a->exp < b->exp) {
1524            cmp = float_relation_less;
1525        } else {
1526            cmp = float_relation_greater;
1527        }
1528        if (a->sign) {
1529            cmp = -cmp;
1530        }
1531        return cmp;
1532    }
1533
1534    if (unlikely(ab_mask & float_cmask_anynan)) {
1535        if (ab_mask & float_cmask_snan) {
1536            float_raise(float_flag_invalid | float_flag_invalid_snan, s);
1537        } else if (!is_quiet) {
1538            float_raise(float_flag_invalid, s);
1539        }
1540        return float_relation_unordered;
1541    }
1542
1543    if (ab_mask & float_cmask_zero) {
1544        if (ab_mask == float_cmask_zero) {
1545            return float_relation_equal;
1546        } else if (a->cls == float_class_zero) {
1547            goto b_sign;
1548        } else {
1549            goto a_sign;
1550        }
1551    }
1552
1553    if (ab_mask == float_cmask_inf) {
1554        if (a->sign == b->sign) {
1555            return float_relation_equal;
1556        }
1557    } else if (b->cls == float_class_inf) {
1558        goto b_sign;
1559    } else {
1560        g_assert(a->cls == float_class_inf);
1561    }
1562
1563 a_sign:
1564    return a->sign ? float_relation_less : float_relation_greater;
1565 b_sign:
1566    return b->sign ? float_relation_greater : float_relation_less;
1567}
1568
1569/*
1570 * Multiply A by 2 raised to the power N.
1571 */
1572static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1573{
1574    switch (a->cls) {
1575    case float_class_snan:
1576    case float_class_qnan:
1577        parts_return_nan(a, s);
1578        break;
1579    case float_class_zero:
1580    case float_class_inf:
1581        break;
1582    case float_class_normal:
1583        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1584        break;
1585    default:
1586        g_assert_not_reached();
1587    }
1588}
1589
1590/*
1591 * Return log2(A)
1592 */
1593static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1594{
1595    uint64_t a0, a1, r, t, ign;
1596    FloatPartsN f;
1597    int i, n, a_exp, f_exp;
1598
1599    if (unlikely(a->cls != float_class_normal)) {
1600        switch (a->cls) {
1601        case float_class_snan:
1602        case float_class_qnan:
1603            parts_return_nan(a, s);
1604            return;
1605        case float_class_zero:
1606            float_raise(float_flag_divbyzero, s);
1607            /* log2(0) = -inf */
1608            a->cls = float_class_inf;
1609            a->sign = 1;
1610            return;
1611        case float_class_inf:
1612            if (unlikely(a->sign)) {
1613                goto d_nan;
1614            }
1615            return;
1616        default:
1617            break;
1618        }
1619        g_assert_not_reached();
1620    }
1621    if (unlikely(a->sign)) {
1622        goto d_nan;
1623    }
1624
1625    /* TODO: This algorithm looses bits too quickly for float128. */
1626    g_assert(N == 64);
1627
1628    a_exp = a->exp;
1629    f_exp = -1;
1630
1631    r = 0;
1632    t = DECOMPOSED_IMPLICIT_BIT;
1633    a0 = a->frac_hi;
1634    a1 = 0;
1635
1636    n = fmt->frac_size + 2;
1637    if (unlikely(a_exp == -1)) {
1638        /*
1639         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1640         * When the value is very close to 1.0, there are lots of 1's in
1641         * the msb parts of the fraction.  At the end, when we subtract
1642         * this value from -1.0, we can see a catastrophic loss of precision,
1643         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1644         * bits of y in the final result.  To minimize this, compute as many
1645         * digits as we can.
1646         * ??? This case needs another algorithm to avoid this.
1647         */
1648        n = fmt->frac_size * 2 + 2;
1649        /* Don't compute a value overlapping the sticky bit */
1650        n = MIN(n, 62);
1651    }
1652
1653    for (i = 0; i < n; i++) {
1654        if (a1) {
1655            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1656        } else if (a0 & 0xffffffffull) {
1657            mul64To128(a0, a0, &a0, &a1);
1658        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1659            a0 >>= 32;
1660            a0 *= a0;
1661        } else {
1662            goto exact;
1663        }
1664
1665        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1666            if (unlikely(a_exp == 0 && r == 0)) {
1667                /*
1668                 * When a_exp == 0, we're computing the log2 of a value
1669                 * [1.0,2.0).  When the value is very close to 1.0, there
1670                 * are lots of 0's in the msb parts of the fraction.
1671                 * We need to compute more digits to produce a correct
1672                 * result -- restart at the top of the fraction.
1673                 * ??? This is likely to lose precision quickly, as for
1674                 * float128; we may need another method.
1675                 */
1676                f_exp -= i;
1677                t = r = DECOMPOSED_IMPLICIT_BIT;
1678                i = 0;
1679            } else {
1680                r |= t;
1681            }
1682        } else {
1683            add128(a0, a1, a0, a1, &a0, &a1);
1684        }
1685        t >>= 1;
1686    }
1687
1688    /* Set sticky for inexact. */
1689    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1690
1691 exact:
1692    parts_sint_to_float(a, a_exp, 0, s);
1693    if (r == 0) {
1694        return;
1695    }
1696
1697    memset(&f, 0, sizeof(f));
1698    f.cls = float_class_normal;
1699    f.frac_hi = r;
1700    f.exp = f_exp - frac_normalize(&f);
1701
1702    if (a_exp < 0) {
1703        parts_sub_normal(a, &f);
1704    } else if (a_exp > 0) {
1705        parts_add_normal(a, &f);
1706    } else {
1707        *a = f;
1708    }
1709    return;
1710
1711 d_nan:
1712    float_raise(float_flag_invalid, s);
1713    parts_default_nan(a, s);
1714}
1715