xref: /openbmc/qemu/fpu/softfloat-parts.c.inc (revision 9261b245)
1/*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 *  the SoftFloat-2a license
10 *  the BSD license
11 *  GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19{
20    switch (a->cls) {
21    case float_class_snan:
22        float_raise(float_flag_invalid, s);
23        if (s->default_nan_mode) {
24            parts_default_nan(a, s);
25        } else {
26            parts_silence_nan(a, s);
27        }
28        break;
29    case float_class_qnan:
30        if (s->default_nan_mode) {
31            parts_default_nan(a, s);
32        }
33        break;
34    default:
35        g_assert_not_reached();
36    }
37}
38
39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40                                     float_status *s)
41{
42    if (is_snan(a->cls) || is_snan(b->cls)) {
43        float_raise(float_flag_invalid, s);
44    }
45
46    if (s->default_nan_mode) {
47        parts_default_nan(a, s);
48    } else {
49        int cmp = frac_cmp(a, b);
50        if (cmp == 0) {
51            cmp = a->sign < b->sign;
52        }
53
54        if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
55            a = b;
56        }
57        if (is_snan(a->cls)) {
58            parts_silence_nan(a, s);
59        }
60    }
61    return a;
62}
63
64static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
65                                            FloatPartsN *c, float_status *s,
66                                            int ab_mask, int abc_mask)
67{
68    int which;
69
70    if (unlikely(abc_mask & float_cmask_snan)) {
71        float_raise(float_flag_invalid, s);
72    }
73
74    which = pickNaNMulAdd(a->cls, b->cls, c->cls,
75                          ab_mask == float_cmask_infzero, s);
76
77    if (s->default_nan_mode || which == 3) {
78        /*
79         * Note that this check is after pickNaNMulAdd so that function
80         * has an opportunity to set the Invalid flag for infzero.
81         */
82        parts_default_nan(a, s);
83        return a;
84    }
85
86    switch (which) {
87    case 0:
88        break;
89    case 1:
90        a = b;
91        break;
92    case 2:
93        a = c;
94        break;
95    default:
96        g_assert_not_reached();
97    }
98    if (is_snan(a->cls)) {
99        parts_silence_nan(a, s);
100    }
101    return a;
102}
103
104/*
105 * Canonicalize the FloatParts structure.  Determine the class,
106 * unbias the exponent, and normalize the fraction.
107 */
108static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
109                                 const FloatFmt *fmt)
110{
111    if (unlikely(p->exp == 0)) {
112        if (likely(frac_eqz(p))) {
113            p->cls = float_class_zero;
114        } else if (status->flush_inputs_to_zero) {
115            float_raise(float_flag_input_denormal, status);
116            p->cls = float_class_zero;
117            frac_clear(p);
118        } else {
119            int shift = frac_normalize(p);
120            p->cls = float_class_normal;
121            p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
122        }
123    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
124        p->cls = float_class_normal;
125        p->exp -= fmt->exp_bias;
126        frac_shl(p, fmt->frac_shift);
127        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
128    } else if (likely(frac_eqz(p))) {
129        p->cls = float_class_inf;
130    } else {
131        frac_shl(p, fmt->frac_shift);
132        p->cls = (parts_is_snan_frac(p->frac_hi, status)
133                  ? float_class_snan : float_class_qnan);
134    }
135}
136
137/*
138 * Round and uncanonicalize a floating-point number by parts. There
139 * are FRAC_SHIFT bits that may require rounding at the bottom of the
140 * fraction; these bits will be removed. The exponent will be biased
141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
142 */
143static void partsN(uncanon)(FloatPartsN *p, float_status *s,
144                            const FloatFmt *fmt)
145{
146    const int exp_max = fmt->exp_max;
147    const int frac_shift = fmt->frac_shift;
148    const uint64_t frac_lsb = fmt->frac_lsb;
149    const uint64_t frac_lsbm1 = fmt->frac_lsbm1;
150    const uint64_t round_mask = fmt->round_mask;
151    const uint64_t roundeven_mask = fmt->roundeven_mask;
152    uint64_t inc;
153    bool overflow_norm;
154    int exp, flags = 0;
155
156    if (unlikely(p->cls != float_class_normal)) {
157        switch (p->cls) {
158        case float_class_zero:
159            p->exp = 0;
160            frac_clear(p);
161            return;
162        case float_class_inf:
163            g_assert(!fmt->arm_althp);
164            p->exp = fmt->exp_max;
165            frac_clear(p);
166            return;
167        case float_class_qnan:
168        case float_class_snan:
169            g_assert(!fmt->arm_althp);
170            p->exp = fmt->exp_max;
171            frac_shr(p, fmt->frac_shift);
172            return;
173        default:
174            break;
175        }
176        g_assert_not_reached();
177    }
178
179    overflow_norm = false;
180    switch (s->float_rounding_mode) {
181    case float_round_nearest_even:
182        inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
183        break;
184    case float_round_ties_away:
185        inc = frac_lsbm1;
186        break;
187    case float_round_to_zero:
188        overflow_norm = true;
189        inc = 0;
190        break;
191    case float_round_up:
192        inc = p->sign ? 0 : round_mask;
193        overflow_norm = p->sign;
194        break;
195    case float_round_down:
196        inc = p->sign ? round_mask : 0;
197        overflow_norm = !p->sign;
198        break;
199    case float_round_to_odd:
200        overflow_norm = true;
201        /* fall through */
202    case float_round_to_odd_inf:
203        inc = p->frac_lo & frac_lsb ? 0 : round_mask;
204        break;
205    default:
206        g_assert_not_reached();
207    }
208
209    exp = p->exp + fmt->exp_bias;
210    if (likely(exp > 0)) {
211        if (p->frac_lo & round_mask) {
212            flags |= float_flag_inexact;
213            if (frac_addi(p, p, inc)) {
214                frac_shr(p, 1);
215                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
216                exp++;
217            }
218        }
219        frac_shr(p, frac_shift);
220
221        if (fmt->arm_althp) {
222            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
223            if (unlikely(exp > exp_max)) {
224                /* Overflow.  Return the maximum normal.  */
225                flags = float_flag_invalid;
226                exp = exp_max;
227                frac_allones(p);
228            }
229        } else if (unlikely(exp >= exp_max)) {
230            flags |= float_flag_overflow | float_flag_inexact;
231            if (overflow_norm) {
232                exp = exp_max - 1;
233                frac_allones(p);
234            } else {
235                p->cls = float_class_inf;
236                exp = exp_max;
237                frac_clear(p);
238            }
239        }
240    } else if (s->flush_to_zero) {
241        flags |= float_flag_output_denormal;
242        p->cls = float_class_zero;
243        exp = 0;
244        frac_clear(p);
245    } else {
246        bool is_tiny = s->tininess_before_rounding || exp < 0;
247
248        if (!is_tiny) {
249            FloatPartsN discard;
250            is_tiny = !frac_addi(&discard, p, inc);
251        }
252
253        frac_shrjam(p, 1 - exp);
254
255        if (p->frac_lo & round_mask) {
256            /* Need to recompute round-to-even/round-to-odd. */
257            switch (s->float_rounding_mode) {
258            case float_round_nearest_even:
259                inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
260                       ? frac_lsbm1 : 0);
261                break;
262            case float_round_to_odd:
263            case float_round_to_odd_inf:
264                inc = p->frac_lo & frac_lsb ? 0 : round_mask;
265                break;
266            default:
267                break;
268            }
269            flags |= float_flag_inexact;
270            frac_addi(p, p, inc);
271        }
272
273        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
274        frac_shr(p, frac_shift);
275
276        if (is_tiny && (flags & float_flag_inexact)) {
277            flags |= float_flag_underflow;
278        }
279        if (exp == 0 && frac_eqz(p)) {
280            p->cls = float_class_zero;
281        }
282    }
283    p->exp = exp;
284    float_raise(flags, s);
285}
286
287/*
288 * Returns the result of adding or subtracting the values of the
289 * floating-point values `a' and `b'. The operation is performed
290 * according to the IEC/IEEE Standard for Binary Floating-Point
291 * Arithmetic.
292 */
293static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
294                                   float_status *s, bool subtract)
295{
296    bool b_sign = b->sign ^ subtract;
297    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
298
299    if (a->sign != b_sign) {
300        /* Subtraction */
301        if (likely(ab_mask == float_cmask_normal)) {
302            if (parts_sub_normal(a, b)) {
303                return a;
304            }
305            /* Subtract was exact, fall through to set sign. */
306            ab_mask = float_cmask_zero;
307        }
308
309        if (ab_mask == float_cmask_zero) {
310            a->sign = s->float_rounding_mode == float_round_down;
311            return a;
312        }
313
314        if (unlikely(ab_mask & float_cmask_anynan)) {
315            goto p_nan;
316        }
317
318        if (ab_mask & float_cmask_inf) {
319            if (a->cls != float_class_inf) {
320                /* N - Inf */
321                goto return_b;
322            }
323            if (b->cls != float_class_inf) {
324                /* Inf - N */
325                return a;
326            }
327            /* Inf - Inf */
328            float_raise(float_flag_invalid, s);
329            parts_default_nan(a, s);
330            return a;
331        }
332    } else {
333        /* Addition */
334        if (likely(ab_mask == float_cmask_normal)) {
335            parts_add_normal(a, b);
336            return a;
337        }
338
339        if (ab_mask == float_cmask_zero) {
340            return a;
341        }
342
343        if (unlikely(ab_mask & float_cmask_anynan)) {
344            goto p_nan;
345        }
346
347        if (ab_mask & float_cmask_inf) {
348            a->cls = float_class_inf;
349            return a;
350        }
351    }
352
353    if (b->cls == float_class_zero) {
354        g_assert(a->cls == float_class_normal);
355        return a;
356    }
357
358    g_assert(a->cls == float_class_zero);
359    g_assert(b->cls == float_class_normal);
360 return_b:
361    b->sign = b_sign;
362    return b;
363
364 p_nan:
365    return parts_pick_nan(a, b, s);
366}
367
368/*
369 * Returns the result of multiplying the floating-point values `a' and
370 * `b'. The operation is performed according to the IEC/IEEE Standard
371 * for Binary Floating-Point Arithmetic.
372 */
373static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
374                                float_status *s)
375{
376    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
377    bool sign = a->sign ^ b->sign;
378
379    if (likely(ab_mask == float_cmask_normal)) {
380        FloatPartsW tmp;
381
382        frac_mulw(&tmp, a, b);
383        frac_truncjam(a, &tmp);
384
385        a->exp += b->exp + 1;
386        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
387            frac_add(a, a, a);
388            a->exp -= 1;
389        }
390
391        a->sign = sign;
392        return a;
393    }
394
395    /* Inf * Zero == NaN */
396    if (unlikely(ab_mask == float_cmask_infzero)) {
397        float_raise(float_flag_invalid, s);
398        parts_default_nan(a, s);
399        return a;
400    }
401
402    if (unlikely(ab_mask & float_cmask_anynan)) {
403        return parts_pick_nan(a, b, s);
404    }
405
406    /* Multiply by 0 or Inf */
407    if (ab_mask & float_cmask_inf) {
408        a->cls = float_class_inf;
409        a->sign = sign;
410        return a;
411    }
412
413    g_assert(ab_mask & float_cmask_zero);
414    a->cls = float_class_zero;
415    a->sign = sign;
416    return a;
417}
418
419/*
420 * Returns the result of multiplying the floating-point values `a' and
421 * `b' then adding 'c', with no intermediate rounding step after the
422 * multiplication. The operation is performed according to the
423 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
424 * The flags argument allows the caller to select negation of the
425 * addend, the intermediate product, or the final result. (The
426 * difference between this and having the caller do a separate
427 * negation is that negating externally will flip the sign bit on NaNs.)
428 *
429 * Requires A and C extracted into a double-sized structure to provide the
430 * extra space for the widening multiply.
431 */
432static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
433                                   FloatPartsN *c, int flags, float_status *s)
434{
435    int ab_mask, abc_mask;
436    FloatPartsW p_widen, c_widen;
437
438    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
439    abc_mask = float_cmask(c->cls) | ab_mask;
440
441    /*
442     * It is implementation-defined whether the cases of (0,inf,qnan)
443     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
444     * they return if they do), so we have to hand this information
445     * off to the target-specific pick-a-NaN routine.
446     */
447    if (unlikely(abc_mask & float_cmask_anynan)) {
448        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
449    }
450
451    if (flags & float_muladd_negate_c) {
452        c->sign ^= 1;
453    }
454
455    /* Compute the sign of the product into A. */
456    a->sign ^= b->sign;
457    if (flags & float_muladd_negate_product) {
458        a->sign ^= 1;
459    }
460
461    if (unlikely(ab_mask != float_cmask_normal)) {
462        if (unlikely(ab_mask == float_cmask_infzero)) {
463            goto d_nan;
464        }
465
466        if (ab_mask & float_cmask_inf) {
467            if (c->cls == float_class_inf && a->sign != c->sign) {
468                goto d_nan;
469            }
470            goto return_inf;
471        }
472
473        g_assert(ab_mask & float_cmask_zero);
474        if (c->cls == float_class_normal) {
475            *a = *c;
476            goto return_normal;
477        }
478        if (c->cls == float_class_zero) {
479            if (a->sign != c->sign) {
480                goto return_sub_zero;
481            }
482            goto return_zero;
483        }
484        g_assert(c->cls == float_class_inf);
485    }
486
487    if (unlikely(c->cls == float_class_inf)) {
488        a->sign = c->sign;
489        goto return_inf;
490    }
491
492    /* Perform the multiplication step. */
493    p_widen.sign = a->sign;
494    p_widen.exp = a->exp + b->exp + 1;
495    frac_mulw(&p_widen, a, b);
496    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
497        frac_add(&p_widen, &p_widen, &p_widen);
498        p_widen.exp -= 1;
499    }
500
501    /* Perform the addition step. */
502    if (c->cls != float_class_zero) {
503        /* Zero-extend C to less significant bits. */
504        frac_widen(&c_widen, c);
505        c_widen.exp = c->exp;
506
507        if (a->sign == c->sign) {
508            parts_add_normal(&p_widen, &c_widen);
509        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
510            goto return_sub_zero;
511        }
512    }
513
514    /* Narrow with sticky bit, for proper rounding later. */
515    frac_truncjam(a, &p_widen);
516    a->sign = p_widen.sign;
517    a->exp = p_widen.exp;
518
519 return_normal:
520    if (flags & float_muladd_halve_result) {
521        a->exp -= 1;
522    }
523 finish_sign:
524    if (flags & float_muladd_negate_result) {
525        a->sign ^= 1;
526    }
527    return a;
528
529 return_sub_zero:
530    a->sign = s->float_rounding_mode == float_round_down;
531 return_zero:
532    a->cls = float_class_zero;
533    goto finish_sign;
534
535 return_inf:
536    a->cls = float_class_inf;
537    goto finish_sign;
538
539 d_nan:
540    float_raise(float_flag_invalid, s);
541    parts_default_nan(a, s);
542    return a;
543}
544
545/*
546 * Returns the result of dividing the floating-point value `a' by the
547 * corresponding value `b'. The operation is performed according to
548 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
549 */
550static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
551                                float_status *s)
552{
553    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
554    bool sign = a->sign ^ b->sign;
555
556    if (likely(ab_mask == float_cmask_normal)) {
557        a->sign = sign;
558        a->exp -= b->exp + frac_div(a, b);
559        return a;
560    }
561
562    /* 0/0 or Inf/Inf => NaN */
563    if (unlikely(ab_mask == float_cmask_zero) ||
564        unlikely(ab_mask == float_cmask_inf)) {
565        float_raise(float_flag_invalid, s);
566        parts_default_nan(a, s);
567        return a;
568    }
569
570    /* All the NaN cases */
571    if (unlikely(ab_mask & float_cmask_anynan)) {
572        return parts_pick_nan(a, b, s);
573    }
574
575    a->sign = sign;
576
577    /* Inf / X */
578    if (a->cls == float_class_inf) {
579        return a;
580    }
581
582    /* 0 / X */
583    if (a->cls == float_class_zero) {
584        return a;
585    }
586
587    /* X / Inf */
588    if (b->cls == float_class_inf) {
589        a->cls = float_class_zero;
590        return a;
591    }
592
593    /* X / 0 => Inf */
594    g_assert(b->cls == float_class_zero);
595    float_raise(float_flag_divbyzero, s);
596    a->cls = float_class_inf;
597    return a;
598}
599
600/*
601 * Square Root
602 *
603 * The base algorithm is lifted from
604 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
605 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
606 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
607 * and is thus MIT licenced.
608 */
609static void partsN(sqrt)(FloatPartsN *a, float_status *status,
610                         const FloatFmt *fmt)
611{
612    const uint32_t three32 = 3u << 30;
613    const uint64_t three64 = 3ull << 62;
614    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
615    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
616    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
617    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
618    uint64_t discard;
619    bool exp_odd;
620    size_t index;
621
622    if (unlikely(a->cls != float_class_normal)) {
623        switch (a->cls) {
624        case float_class_snan:
625        case float_class_qnan:
626            parts_return_nan(a, status);
627            return;
628        case float_class_zero:
629            return;
630        case float_class_inf:
631            if (unlikely(a->sign)) {
632                goto d_nan;
633            }
634            return;
635        default:
636            g_assert_not_reached();
637        }
638    }
639
640    if (unlikely(a->sign)) {
641        goto d_nan;
642    }
643
644    /*
645     * Argument reduction.
646     * x = 4^e frac; with integer e, and frac in [1, 4)
647     * m = frac fixed point at bit 62, since we're in base 4.
648     * If base-2 exponent is odd, exchange that for multiply by 2,
649     * which results in no shift.
650     */
651    exp_odd = a->exp & 1;
652    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
653    if (!exp_odd) {
654        frac_shr(a, 1);
655    }
656
657    /*
658     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
659     *
660     * Initial estimate:
661     * 7-bit lookup table (1-bit exponent and 6-bit significand).
662     *
663     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
664     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
665     * a table lookup is faster and needs one less iteration.
666     * The 7-bit table gives |e| < 0x1.fdp-9.
667     *
668     * A Newton-Raphson iteration for r is
669     *   s = m*r
670     *   d = s*r
671     *   u = 3 - d
672     *   r = r*u/2
673     *
674     * Fixed point representations:
675     *   m, s, d, u, three are all 2.30; r is 0.32
676     */
677    m64 = a->frac_hi;
678    m32 = m64 >> 32;
679
680    r32 = rsqrt_tab[index] << 16;
681    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
682
683    s32 = ((uint64_t)m32 * r32) >> 32;
684    d32 = ((uint64_t)s32 * r32) >> 32;
685    u32 = three32 - d32;
686
687    if (N == 64) {
688        /* float64 or smaller */
689
690        r32 = ((uint64_t)r32 * u32) >> 31;
691        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
692
693        s32 = ((uint64_t)m32 * r32) >> 32;
694        d32 = ((uint64_t)s32 * r32) >> 32;
695        u32 = three32 - d32;
696
697        if (fmt->frac_size <= 23) {
698            /* float32 or smaller */
699
700            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
701            s32 = (s32 - 1) >> 6;               /* 9.23 */
702            /* s < sqrt(m) < s + 0x1.08p-23 */
703
704            /* compute nearest rounded result to 2.23 bits */
705            uint32_t d0 = (m32 << 16) - s32 * s32;
706            uint32_t d1 = s32 - d0;
707            uint32_t d2 = d1 + s32 + 1;
708            s32 += d1 >> 31;
709            a->frac_hi = (uint64_t)s32 << (64 - 25);
710
711            /* increment or decrement for inexact */
712            if (d2 != 0) {
713                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
714            }
715            goto done;
716        }
717
718        /* float64 */
719
720        r64 = (uint64_t)r32 * u32 * 2;
721        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
722        mul64To128(m64, r64, &s64, &discard);
723        mul64To128(s64, r64, &d64, &discard);
724        u64 = three64 - d64;
725
726        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
727        s64 = (s64 - 2) >> 9;                  /* 12.52 */
728
729        /* Compute nearest rounded result */
730        uint64_t d0 = (m64 << 42) - s64 * s64;
731        uint64_t d1 = s64 - d0;
732        uint64_t d2 = d1 + s64 + 1;
733        s64 += d1 >> 63;
734        a->frac_hi = s64 << (64 - 54);
735
736        /* increment or decrement for inexact */
737        if (d2 != 0) {
738            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
739        }
740        goto done;
741    }
742
743    r64 = (uint64_t)r32 * u32 * 2;
744    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
745
746    mul64To128(m64, r64, &s64, &discard);
747    mul64To128(s64, r64, &d64, &discard);
748    u64 = three64 - d64;
749    mul64To128(u64, r64, &r64, &discard);
750    r64 <<= 1;
751    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
752
753    mul64To128(m64, r64, &s64, &discard);
754    mul64To128(s64, r64, &d64, &discard);
755    u64 = three64 - d64;
756    mul64To128(u64, r64, &rh, &rl);
757    add128(rh, rl, rh, rl, &rh, &rl);
758    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
759
760    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
761    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
762    sub128(three64, 0, dh, dl, &uh, &ul);
763    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
764    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
765
766    sub128(sh, sl, 0, 4, &sh, &sl);
767    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
768    /* s < sqrt(m) < s + 1ulp */
769
770    /* Compute nearest rounded result */
771    mul64To128(sl, sl, &d0h, &d0l);
772    d0h += 2 * sh * sl;
773    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
774    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
775    add128(sh, sl, 0, 1, &d2h, &d2l);
776    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
777    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
778    shift128Left(sh, sl, 128 - 114, &sh, &sl);
779
780    /* increment or decrement for inexact */
781    if (d2h | d2l) {
782        if ((int64_t)(d1h ^ d2h) < 0) {
783            sub128(sh, sl, 0, 1, &sh, &sl);
784        } else {
785            add128(sh, sl, 0, 1, &sh, &sl);
786        }
787    }
788    a->frac_lo = sl;
789    a->frac_hi = sh;
790
791 done:
792    /* Convert back from base 4 to base 2. */
793    a->exp >>= 1;
794    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
795        frac_add(a, a, a);
796    } else {
797        a->exp += 1;
798    }
799    return;
800
801 d_nan:
802    float_raise(float_flag_invalid, status);
803    parts_default_nan(a, status);
804}
805
806/*
807 * Rounds the floating-point value `a' to an integer, and returns the
808 * result as a floating-point value. The operation is performed
809 * according to the IEC/IEEE Standard for Binary Floating-Point
810 * Arithmetic.
811 *
812 * parts_round_to_int_normal is an internal helper function for
813 * normal numbers only, returning true for inexact but not directly
814 * raising float_flag_inexact.
815 */
816static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
817                                        int scale, int frac_size)
818{
819    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
820    int shift_adj;
821
822    scale = MIN(MAX(scale, -0x10000), 0x10000);
823    a->exp += scale;
824
825    if (a->exp < 0) {
826        bool one;
827
828        /* All fractional */
829        switch (rmode) {
830        case float_round_nearest_even:
831            one = false;
832            if (a->exp == -1) {
833                FloatPartsN tmp;
834                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
835                frac_add(&tmp, a, a);
836                /* Anything remaining means frac > 0.5. */
837                one = !frac_eqz(&tmp);
838            }
839            break;
840        case float_round_ties_away:
841            one = a->exp == -1;
842            break;
843        case float_round_to_zero:
844            one = false;
845            break;
846        case float_round_up:
847            one = !a->sign;
848            break;
849        case float_round_down:
850            one = a->sign;
851            break;
852        case float_round_to_odd:
853            one = true;
854            break;
855        default:
856            g_assert_not_reached();
857        }
858
859        frac_clear(a);
860        a->exp = 0;
861        if (one) {
862            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
863        } else {
864            a->cls = float_class_zero;
865        }
866        return true;
867    }
868
869    if (a->exp >= frac_size) {
870        /* All integral */
871        return false;
872    }
873
874    if (N > 64 && a->exp < N - 64) {
875        /*
876         * Rounding is not in the low word -- shift lsb to bit 2,
877         * which leaves room for sticky and rounding bit.
878         */
879        shift_adj = (N - 1) - (a->exp + 2);
880        frac_shrjam(a, shift_adj);
881        frac_lsb = 1 << 2;
882    } else {
883        shift_adj = 0;
884        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
885    }
886
887    frac_lsbm1 = frac_lsb >> 1;
888    rnd_mask = frac_lsb - 1;
889    rnd_even_mask = rnd_mask | frac_lsb;
890
891    if (!(a->frac_lo & rnd_mask)) {
892        /* Fractional bits already clear, undo the shift above. */
893        frac_shl(a, shift_adj);
894        return false;
895    }
896
897    switch (rmode) {
898    case float_round_nearest_even:
899        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
900        break;
901    case float_round_ties_away:
902        inc = frac_lsbm1;
903        break;
904    case float_round_to_zero:
905        inc = 0;
906        break;
907    case float_round_up:
908        inc = a->sign ? 0 : rnd_mask;
909        break;
910    case float_round_down:
911        inc = a->sign ? rnd_mask : 0;
912        break;
913    case float_round_to_odd:
914        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
915        break;
916    default:
917        g_assert_not_reached();
918    }
919
920    if (shift_adj == 0) {
921        if (frac_addi(a, a, inc)) {
922            frac_shr(a, 1);
923            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
924            a->exp++;
925        }
926        a->frac_lo &= ~rnd_mask;
927    } else {
928        frac_addi(a, a, inc);
929        a->frac_lo &= ~rnd_mask;
930        /* Be careful shifting back, not to overflow */
931        frac_shl(a, shift_adj - 1);
932        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
933            a->exp++;
934        } else {
935            frac_add(a, a, a);
936        }
937    }
938    return true;
939}
940
941static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
942                                 int scale, float_status *s,
943                                 const FloatFmt *fmt)
944{
945    switch (a->cls) {
946    case float_class_qnan:
947    case float_class_snan:
948        parts_return_nan(a, s);
949        break;
950    case float_class_zero:
951    case float_class_inf:
952        break;
953    case float_class_normal:
954        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
955            float_raise(float_flag_inexact, s);
956        }
957        break;
958    default:
959        g_assert_not_reached();
960    }
961}
962
963/*
964 * Returns the result of converting the floating-point value `a' to
965 * the two's complement integer format. The conversion is performed
966 * according to the IEC/IEEE Standard for Binary Floating-Point
967 * Arithmetic---which means in particular that the conversion is
968 * rounded according to the current rounding mode. If `a' is a NaN,
969 * the largest positive integer is returned. Otherwise, if the
970 * conversion overflows, the largest integer with the same sign as `a'
971 * is returned.
972 */
973static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
974                                     int scale, int64_t min, int64_t max,
975                                     float_status *s)
976{
977    int flags = 0;
978    uint64_t r;
979
980    switch (p->cls) {
981    case float_class_snan:
982    case float_class_qnan:
983        flags = float_flag_invalid;
984        r = max;
985        break;
986
987    case float_class_inf:
988        flags = float_flag_invalid;
989        r = p->sign ? min : max;
990        break;
991
992    case float_class_zero:
993        return 0;
994
995    case float_class_normal:
996        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
997        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
998            flags = float_flag_inexact;
999        }
1000
1001        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1002            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1003        } else {
1004            r = UINT64_MAX;
1005        }
1006        if (p->sign) {
1007            if (r <= -(uint64_t)min) {
1008                r = -r;
1009            } else {
1010                flags = float_flag_invalid;
1011                r = min;
1012            }
1013        } else if (r > max) {
1014            flags = float_flag_invalid;
1015            r = max;
1016        }
1017        break;
1018
1019    default:
1020        g_assert_not_reached();
1021    }
1022
1023    float_raise(flags, s);
1024    return r;
1025}
1026
1027/*
1028 *  Returns the result of converting the floating-point value `a' to
1029 *  the unsigned integer format. The conversion is performed according
1030 *  to the IEC/IEEE Standard for Binary Floating-Point
1031 *  Arithmetic---which means in particular that the conversion is
1032 *  rounded according to the current rounding mode. If `a' is a NaN,
1033 *  the largest unsigned integer is returned. Otherwise, if the
1034 *  conversion overflows, the largest unsigned integer is returned. If
1035 *  the 'a' is negative, the result is rounded and zero is returned;
1036 *  values that do not round to zero will raise the inexact exception
1037 *  flag.
1038 */
1039static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1040                                      int scale, uint64_t max, float_status *s)
1041{
1042    int flags = 0;
1043    uint64_t r;
1044
1045    switch (p->cls) {
1046    case float_class_snan:
1047    case float_class_qnan:
1048        flags = float_flag_invalid;
1049        r = max;
1050        break;
1051
1052    case float_class_inf:
1053        flags = float_flag_invalid;
1054        r = p->sign ? 0 : max;
1055        break;
1056
1057    case float_class_zero:
1058        return 0;
1059
1060    case float_class_normal:
1061        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1062        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1063            flags = float_flag_inexact;
1064            if (p->cls == float_class_zero) {
1065                r = 0;
1066                break;
1067            }
1068        }
1069
1070        if (p->sign) {
1071            flags = float_flag_invalid;
1072            r = 0;
1073        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1074            flags = float_flag_invalid;
1075            r = max;
1076        } else {
1077            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1078            if (r > max) {
1079                flags = float_flag_invalid;
1080                r = max;
1081            }
1082        }
1083        break;
1084
1085    default:
1086        g_assert_not_reached();
1087    }
1088
1089    float_raise(flags, s);
1090    return r;
1091}
1092
1093/*
1094 * Integer to float conversions
1095 *
1096 * Returns the result of converting the two's complement integer `a'
1097 * to the floating-point format. The conversion is performed according
1098 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1099 */
1100static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1101                                  int scale, float_status *s)
1102{
1103    uint64_t f = a;
1104    int shift;
1105
1106    memset(p, 0, sizeof(*p));
1107
1108    if (a == 0) {
1109        p->cls = float_class_zero;
1110        return;
1111    }
1112
1113    p->cls = float_class_normal;
1114    if (a < 0) {
1115        f = -f;
1116        p->sign = true;
1117    }
1118    shift = clz64(f);
1119    scale = MIN(MAX(scale, -0x10000), 0x10000);
1120
1121    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1122    p->frac_hi = f << shift;
1123}
1124
1125/*
1126 * Unsigned Integer to float conversions
1127 *
1128 * Returns the result of converting the unsigned integer `a' to the
1129 * floating-point format. The conversion is performed according to the
1130 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1131 */
1132static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1133                                  int scale, float_status *status)
1134{
1135    memset(p, 0, sizeof(*p));
1136
1137    if (a == 0) {
1138        p->cls = float_class_zero;
1139    } else {
1140        int shift = clz64(a);
1141        scale = MIN(MAX(scale, -0x10000), 0x10000);
1142        p->cls = float_class_normal;
1143        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1144        p->frac_hi = a << shift;
1145    }
1146}
1147
1148/*
1149 * Float min/max.
1150 */
1151static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1152                                   float_status *s, int flags)
1153{
1154    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1155    int a_exp, b_exp, cmp;
1156
1157    if (unlikely(ab_mask & float_cmask_anynan)) {
1158        /*
1159         * For minnum/maxnum, if one operand is a QNaN, and the other
1160         * operand is numerical, then return numerical argument.
1161         */
1162        if ((flags & minmax_isnum)
1163            && !(ab_mask & float_cmask_snan)
1164            && (ab_mask & ~float_cmask_qnan)) {
1165            return is_nan(a->cls) ? b : a;
1166        }
1167        return parts_pick_nan(a, b, s);
1168    }
1169
1170    a_exp = a->exp;
1171    b_exp = b->exp;
1172
1173    if (unlikely(ab_mask != float_cmask_normal)) {
1174        switch (a->cls) {
1175        case float_class_normal:
1176            break;
1177        case float_class_inf:
1178            a_exp = INT16_MAX;
1179            break;
1180        case float_class_zero:
1181            a_exp = INT16_MIN;
1182            break;
1183        default:
1184            g_assert_not_reached();
1185            break;
1186        }
1187        switch (b->cls) {
1188        case float_class_normal:
1189            break;
1190        case float_class_inf:
1191            b_exp = INT16_MAX;
1192            break;
1193        case float_class_zero:
1194            b_exp = INT16_MIN;
1195            break;
1196        default:
1197            g_assert_not_reached();
1198            break;
1199        }
1200    }
1201
1202    /* Compare magnitudes. */
1203    cmp = a_exp - b_exp;
1204    if (cmp == 0) {
1205        cmp = frac_cmp(a, b);
1206    }
1207
1208    /*
1209     * Take the sign into account.
1210     * For ismag, only do this if the magnitudes are equal.
1211     */
1212    if (!(flags & minmax_ismag) || cmp == 0) {
1213        if (a->sign != b->sign) {
1214            /* For differing signs, the negative operand is less. */
1215            cmp = a->sign ? -1 : 1;
1216        } else if (a->sign) {
1217            /* For two negative operands, invert the magnitude comparison. */
1218            cmp = -cmp;
1219        }
1220    }
1221
1222    if (flags & minmax_ismin) {
1223        cmp = -cmp;
1224    }
1225    return cmp < 0 ? b : a;
1226}
1227
1228/*
1229 * Floating point compare
1230 */
1231static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1232                                     float_status *s, bool is_quiet)
1233{
1234    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1235    int cmp;
1236
1237    if (likely(ab_mask == float_cmask_normal)) {
1238        if (a->sign != b->sign) {
1239            goto a_sign;
1240        }
1241        if (a->exp != b->exp) {
1242            cmp = a->exp < b->exp ? -1 : 1;
1243        } else {
1244            cmp = frac_cmp(a, b);
1245        }
1246        if (a->sign) {
1247            cmp = -cmp;
1248        }
1249        return cmp;
1250    }
1251
1252    if (unlikely(ab_mask & float_cmask_anynan)) {
1253        if (!is_quiet || (ab_mask & float_cmask_snan)) {
1254            float_raise(float_flag_invalid, s);
1255        }
1256        return float_relation_unordered;
1257    }
1258
1259    if (ab_mask & float_cmask_zero) {
1260        if (ab_mask == float_cmask_zero) {
1261            return float_relation_equal;
1262        } else if (a->cls == float_class_zero) {
1263            goto b_sign;
1264        } else {
1265            goto a_sign;
1266        }
1267    }
1268
1269    if (ab_mask == float_cmask_inf) {
1270        if (a->sign == b->sign) {
1271            return float_relation_equal;
1272        }
1273    } else if (b->cls == float_class_inf) {
1274        goto b_sign;
1275    } else {
1276        g_assert(a->cls == float_class_inf);
1277    }
1278
1279 a_sign:
1280    return a->sign ? float_relation_less : float_relation_greater;
1281 b_sign:
1282    return b->sign ? float_relation_greater : float_relation_less;
1283}
1284
1285/*
1286 * Multiply A by 2 raised to the power N.
1287 */
1288static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1289{
1290    switch (a->cls) {
1291    case float_class_snan:
1292    case float_class_qnan:
1293        parts_return_nan(a, s);
1294        break;
1295    case float_class_zero:
1296    case float_class_inf:
1297        break;
1298    case float_class_normal:
1299        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1300        break;
1301    default:
1302        g_assert_not_reached();
1303    }
1304}
1305