xref: /openbmc/qemu/fpu/softfloat-parts.c.inc (revision 05e391ae)
1/*
2 * QEMU float support
3 *
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 *  the SoftFloat-2a license
10 *  the BSD license
11 *  GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
16 */
17
18static void partsN(return_nan)(FloatPartsN *a, float_status *s)
19{
20    switch (a->cls) {
21    case float_class_snan:
22        float_raise(float_flag_invalid, s);
23        if (s->default_nan_mode) {
24            parts_default_nan(a, s);
25        } else {
26            parts_silence_nan(a, s);
27        }
28        break;
29    case float_class_qnan:
30        if (s->default_nan_mode) {
31            parts_default_nan(a, s);
32        }
33        break;
34    default:
35        g_assert_not_reached();
36    }
37}
38
39static FloatPartsN *partsN(pick_nan)(FloatPartsN *a, FloatPartsN *b,
40                                     float_status *s)
41{
42    if (is_snan(a->cls) || is_snan(b->cls)) {
43        float_raise(float_flag_invalid, s);
44    }
45
46    if (s->default_nan_mode) {
47        parts_default_nan(a, s);
48    } else {
49        int cmp = frac_cmp(a, b);
50        if (cmp == 0) {
51            cmp = a->sign < b->sign;
52        }
53
54        if (pickNaN(a->cls, b->cls, cmp > 0, s)) {
55            a = b;
56        }
57        if (is_snan(a->cls)) {
58            parts_silence_nan(a, s);
59        }
60    }
61    return a;
62}
63
64static FloatPartsN *partsN(pick_nan_muladd)(FloatPartsN *a, FloatPartsN *b,
65                                            FloatPartsN *c, float_status *s,
66                                            int ab_mask, int abc_mask)
67{
68    int which;
69
70    if (unlikely(abc_mask & float_cmask_snan)) {
71        float_raise(float_flag_invalid, s);
72    }
73
74    which = pickNaNMulAdd(a->cls, b->cls, c->cls,
75                          ab_mask == float_cmask_infzero, s);
76
77    if (s->default_nan_mode || which == 3) {
78        /*
79         * Note that this check is after pickNaNMulAdd so that function
80         * has an opportunity to set the Invalid flag for infzero.
81         */
82        parts_default_nan(a, s);
83        return a;
84    }
85
86    switch (which) {
87    case 0:
88        break;
89    case 1:
90        a = b;
91        break;
92    case 2:
93        a = c;
94        break;
95    default:
96        g_assert_not_reached();
97    }
98    if (is_snan(a->cls)) {
99        parts_silence_nan(a, s);
100    }
101    return a;
102}
103
104/*
105 * Canonicalize the FloatParts structure.  Determine the class,
106 * unbias the exponent, and normalize the fraction.
107 */
108static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
109                                 const FloatFmt *fmt)
110{
111    if (unlikely(p->exp == 0)) {
112        if (likely(frac_eqz(p))) {
113            p->cls = float_class_zero;
114        } else if (status->flush_inputs_to_zero) {
115            float_raise(float_flag_input_denormal, status);
116            p->cls = float_class_zero;
117            frac_clear(p);
118        } else {
119            int shift = frac_normalize(p);
120            p->cls = float_class_normal;
121            p->exp = fmt->frac_shift - fmt->exp_bias - shift + 1;
122        }
123    } else if (likely(p->exp < fmt->exp_max) || fmt->arm_althp) {
124        p->cls = float_class_normal;
125        p->exp -= fmt->exp_bias;
126        frac_shl(p, fmt->frac_shift);
127        p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
128    } else if (likely(frac_eqz(p))) {
129        p->cls = float_class_inf;
130    } else {
131        frac_shl(p, fmt->frac_shift);
132        p->cls = (parts_is_snan_frac(p->frac_hi, status)
133                  ? float_class_snan : float_class_qnan);
134    }
135}
136
137/*
138 * Round and uncanonicalize a floating-point number by parts. There
139 * are FRAC_SHIFT bits that may require rounding at the bottom of the
140 * fraction; these bits will be removed. The exponent will be biased
141 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
142 */
143static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
144                                   const FloatFmt *fmt)
145{
146    const int exp_max = fmt->exp_max;
147    const int frac_shift = fmt->frac_shift;
148    const uint64_t round_mask = fmt->round_mask;
149    const uint64_t frac_lsb = round_mask + 1;
150    const uint64_t frac_lsbm1 = round_mask ^ (round_mask >> 1);
151    const uint64_t roundeven_mask = round_mask | frac_lsb;
152    uint64_t inc;
153    bool overflow_norm = false;
154    int exp, flags = 0;
155
156    switch (s->float_rounding_mode) {
157    case float_round_nearest_even:
158        if (N > 64 && frac_lsb == 0) {
159            inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
160                   ? frac_lsbm1 : 0);
161        } else {
162            inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
163                   ? frac_lsbm1 : 0);
164        }
165        break;
166    case float_round_ties_away:
167        inc = frac_lsbm1;
168        break;
169    case float_round_to_zero:
170        overflow_norm = true;
171        inc = 0;
172        break;
173    case float_round_up:
174        inc = p->sign ? 0 : round_mask;
175        overflow_norm = p->sign;
176        break;
177    case float_round_down:
178        inc = p->sign ? round_mask : 0;
179        overflow_norm = !p->sign;
180        break;
181    case float_round_to_odd:
182        overflow_norm = true;
183        /* fall through */
184    case float_round_to_odd_inf:
185        if (N > 64 && frac_lsb == 0) {
186            inc = p->frac_hi & 1 ? 0 : round_mask;
187        } else {
188            inc = p->frac_lo & frac_lsb ? 0 : round_mask;
189        }
190        break;
191    default:
192        g_assert_not_reached();
193    }
194
195    exp = p->exp + fmt->exp_bias;
196    if (likely(exp > 0)) {
197        if (p->frac_lo & round_mask) {
198            flags |= float_flag_inexact;
199            if (frac_addi(p, p, inc)) {
200                frac_shr(p, 1);
201                p->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
202                exp++;
203            }
204            p->frac_lo &= ~round_mask;
205        }
206
207        if (fmt->arm_althp) {
208            /* ARM Alt HP eschews Inf and NaN for a wider exponent.  */
209            if (unlikely(exp > exp_max)) {
210                /* Overflow.  Return the maximum normal.  */
211                flags = float_flag_invalid;
212                exp = exp_max;
213                frac_allones(p);
214                p->frac_lo &= ~round_mask;
215            }
216        } else if (unlikely(exp >= exp_max)) {
217            flags |= float_flag_overflow | float_flag_inexact;
218            if (overflow_norm) {
219                exp = exp_max - 1;
220                frac_allones(p);
221                p->frac_lo &= ~round_mask;
222            } else {
223                p->cls = float_class_inf;
224                exp = exp_max;
225                frac_clear(p);
226            }
227        }
228        frac_shr(p, frac_shift);
229    } else if (s->flush_to_zero) {
230        flags |= float_flag_output_denormal;
231        p->cls = float_class_zero;
232        exp = 0;
233        frac_clear(p);
234    } else {
235        bool is_tiny = s->tininess_before_rounding || exp < 0;
236
237        if (!is_tiny) {
238            FloatPartsN discard;
239            is_tiny = !frac_addi(&discard, p, inc);
240        }
241
242        frac_shrjam(p, 1 - exp);
243
244        if (p->frac_lo & round_mask) {
245            /* Need to recompute round-to-even/round-to-odd. */
246            switch (s->float_rounding_mode) {
247            case float_round_nearest_even:
248                if (N > 64 && frac_lsb == 0) {
249                    inc = ((p->frac_hi & 1) ||
250                           (p->frac_lo & round_mask) != frac_lsbm1
251                           ? frac_lsbm1 : 0);
252                } else {
253                    inc = ((p->frac_lo & roundeven_mask) != frac_lsbm1
254                           ? frac_lsbm1 : 0);
255                }
256                break;
257            case float_round_to_odd:
258            case float_round_to_odd_inf:
259                if (N > 64 && frac_lsb == 0) {
260                    inc = p->frac_hi & 1 ? 0 : round_mask;
261                } else {
262                    inc = p->frac_lo & frac_lsb ? 0 : round_mask;
263                }
264                break;
265            default:
266                break;
267            }
268            flags |= float_flag_inexact;
269            frac_addi(p, p, inc);
270            p->frac_lo &= ~round_mask;
271        }
272
273        exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) != 0;
274        frac_shr(p, frac_shift);
275
276        if (is_tiny && (flags & float_flag_inexact)) {
277            flags |= float_flag_underflow;
278        }
279        if (exp == 0 && frac_eqz(p)) {
280            p->cls = float_class_zero;
281        }
282    }
283    p->exp = exp;
284    float_raise(flags, s);
285}
286
287static void partsN(uncanon)(FloatPartsN *p, float_status *s,
288                            const FloatFmt *fmt)
289{
290    if (likely(p->cls == float_class_normal)) {
291        parts_uncanon_normal(p, s, fmt);
292    } else {
293        switch (p->cls) {
294        case float_class_zero:
295            p->exp = 0;
296            frac_clear(p);
297            return;
298        case float_class_inf:
299            g_assert(!fmt->arm_althp);
300            p->exp = fmt->exp_max;
301            frac_clear(p);
302            return;
303        case float_class_qnan:
304        case float_class_snan:
305            g_assert(!fmt->arm_althp);
306            p->exp = fmt->exp_max;
307            frac_shr(p, fmt->frac_shift);
308            return;
309        default:
310            break;
311        }
312        g_assert_not_reached();
313    }
314}
315
316/*
317 * Returns the result of adding or subtracting the values of the
318 * floating-point values `a' and `b'. The operation is performed
319 * according to the IEC/IEEE Standard for Binary Floating-Point
320 * Arithmetic.
321 */
322static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
323                                   float_status *s, bool subtract)
324{
325    bool b_sign = b->sign ^ subtract;
326    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
327
328    if (a->sign != b_sign) {
329        /* Subtraction */
330        if (likely(ab_mask == float_cmask_normal)) {
331            if (parts_sub_normal(a, b)) {
332                return a;
333            }
334            /* Subtract was exact, fall through to set sign. */
335            ab_mask = float_cmask_zero;
336        }
337
338        if (ab_mask == float_cmask_zero) {
339            a->sign = s->float_rounding_mode == float_round_down;
340            return a;
341        }
342
343        if (unlikely(ab_mask & float_cmask_anynan)) {
344            goto p_nan;
345        }
346
347        if (ab_mask & float_cmask_inf) {
348            if (a->cls != float_class_inf) {
349                /* N - Inf */
350                goto return_b;
351            }
352            if (b->cls != float_class_inf) {
353                /* Inf - N */
354                return a;
355            }
356            /* Inf - Inf */
357            float_raise(float_flag_invalid, s);
358            parts_default_nan(a, s);
359            return a;
360        }
361    } else {
362        /* Addition */
363        if (likely(ab_mask == float_cmask_normal)) {
364            parts_add_normal(a, b);
365            return a;
366        }
367
368        if (ab_mask == float_cmask_zero) {
369            return a;
370        }
371
372        if (unlikely(ab_mask & float_cmask_anynan)) {
373            goto p_nan;
374        }
375
376        if (ab_mask & float_cmask_inf) {
377            a->cls = float_class_inf;
378            return a;
379        }
380    }
381
382    if (b->cls == float_class_zero) {
383        g_assert(a->cls == float_class_normal);
384        return a;
385    }
386
387    g_assert(a->cls == float_class_zero);
388    g_assert(b->cls == float_class_normal);
389 return_b:
390    b->sign = b_sign;
391    return b;
392
393 p_nan:
394    return parts_pick_nan(a, b, s);
395}
396
397/*
398 * Returns the result of multiplying the floating-point values `a' and
399 * `b'. The operation is performed according to the IEC/IEEE Standard
400 * for Binary Floating-Point Arithmetic.
401 */
402static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
403                                float_status *s)
404{
405    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
406    bool sign = a->sign ^ b->sign;
407
408    if (likely(ab_mask == float_cmask_normal)) {
409        FloatPartsW tmp;
410
411        frac_mulw(&tmp, a, b);
412        frac_truncjam(a, &tmp);
413
414        a->exp += b->exp + 1;
415        if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
416            frac_add(a, a, a);
417            a->exp -= 1;
418        }
419
420        a->sign = sign;
421        return a;
422    }
423
424    /* Inf * Zero == NaN */
425    if (unlikely(ab_mask == float_cmask_infzero)) {
426        float_raise(float_flag_invalid, s);
427        parts_default_nan(a, s);
428        return a;
429    }
430
431    if (unlikely(ab_mask & float_cmask_anynan)) {
432        return parts_pick_nan(a, b, s);
433    }
434
435    /* Multiply by 0 or Inf */
436    if (ab_mask & float_cmask_inf) {
437        a->cls = float_class_inf;
438        a->sign = sign;
439        return a;
440    }
441
442    g_assert(ab_mask & float_cmask_zero);
443    a->cls = float_class_zero;
444    a->sign = sign;
445    return a;
446}
447
448/*
449 * Returns the result of multiplying the floating-point values `a' and
450 * `b' then adding 'c', with no intermediate rounding step after the
451 * multiplication. The operation is performed according to the
452 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
453 * The flags argument allows the caller to select negation of the
454 * addend, the intermediate product, or the final result. (The
455 * difference between this and having the caller do a separate
456 * negation is that negating externally will flip the sign bit on NaNs.)
457 *
458 * Requires A and C extracted into a double-sized structure to provide the
459 * extra space for the widening multiply.
460 */
461static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
462                                   FloatPartsN *c, int flags, float_status *s)
463{
464    int ab_mask, abc_mask;
465    FloatPartsW p_widen, c_widen;
466
467    ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
468    abc_mask = float_cmask(c->cls) | ab_mask;
469
470    /*
471     * It is implementation-defined whether the cases of (0,inf,qnan)
472     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
473     * they return if they do), so we have to hand this information
474     * off to the target-specific pick-a-NaN routine.
475     */
476    if (unlikely(abc_mask & float_cmask_anynan)) {
477        return parts_pick_nan_muladd(a, b, c, s, ab_mask, abc_mask);
478    }
479
480    if (flags & float_muladd_negate_c) {
481        c->sign ^= 1;
482    }
483
484    /* Compute the sign of the product into A. */
485    a->sign ^= b->sign;
486    if (flags & float_muladd_negate_product) {
487        a->sign ^= 1;
488    }
489
490    if (unlikely(ab_mask != float_cmask_normal)) {
491        if (unlikely(ab_mask == float_cmask_infzero)) {
492            goto d_nan;
493        }
494
495        if (ab_mask & float_cmask_inf) {
496            if (c->cls == float_class_inf && a->sign != c->sign) {
497                goto d_nan;
498            }
499            goto return_inf;
500        }
501
502        g_assert(ab_mask & float_cmask_zero);
503        if (c->cls == float_class_normal) {
504            *a = *c;
505            goto return_normal;
506        }
507        if (c->cls == float_class_zero) {
508            if (a->sign != c->sign) {
509                goto return_sub_zero;
510            }
511            goto return_zero;
512        }
513        g_assert(c->cls == float_class_inf);
514    }
515
516    if (unlikely(c->cls == float_class_inf)) {
517        a->sign = c->sign;
518        goto return_inf;
519    }
520
521    /* Perform the multiplication step. */
522    p_widen.sign = a->sign;
523    p_widen.exp = a->exp + b->exp + 1;
524    frac_mulw(&p_widen, a, b);
525    if (!(p_widen.frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
526        frac_add(&p_widen, &p_widen, &p_widen);
527        p_widen.exp -= 1;
528    }
529
530    /* Perform the addition step. */
531    if (c->cls != float_class_zero) {
532        /* Zero-extend C to less significant bits. */
533        frac_widen(&c_widen, c);
534        c_widen.exp = c->exp;
535
536        if (a->sign == c->sign) {
537            parts_add_normal(&p_widen, &c_widen);
538        } else if (!parts_sub_normal(&p_widen, &c_widen)) {
539            goto return_sub_zero;
540        }
541    }
542
543    /* Narrow with sticky bit, for proper rounding later. */
544    frac_truncjam(a, &p_widen);
545    a->sign = p_widen.sign;
546    a->exp = p_widen.exp;
547
548 return_normal:
549    if (flags & float_muladd_halve_result) {
550        a->exp -= 1;
551    }
552 finish_sign:
553    if (flags & float_muladd_negate_result) {
554        a->sign ^= 1;
555    }
556    return a;
557
558 return_sub_zero:
559    a->sign = s->float_rounding_mode == float_round_down;
560 return_zero:
561    a->cls = float_class_zero;
562    goto finish_sign;
563
564 return_inf:
565    a->cls = float_class_inf;
566    goto finish_sign;
567
568 d_nan:
569    float_raise(float_flag_invalid, s);
570    parts_default_nan(a, s);
571    return a;
572}
573
574/*
575 * Returns the result of dividing the floating-point value `a' by the
576 * corresponding value `b'. The operation is performed according to
577 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
578 */
579static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
580                                float_status *s)
581{
582    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
583    bool sign = a->sign ^ b->sign;
584
585    if (likely(ab_mask == float_cmask_normal)) {
586        a->sign = sign;
587        a->exp -= b->exp + frac_div(a, b);
588        return a;
589    }
590
591    /* 0/0 or Inf/Inf => NaN */
592    if (unlikely(ab_mask == float_cmask_zero) ||
593        unlikely(ab_mask == float_cmask_inf)) {
594        float_raise(float_flag_invalid, s);
595        parts_default_nan(a, s);
596        return a;
597    }
598
599    /* All the NaN cases */
600    if (unlikely(ab_mask & float_cmask_anynan)) {
601        return parts_pick_nan(a, b, s);
602    }
603
604    a->sign = sign;
605
606    /* Inf / X */
607    if (a->cls == float_class_inf) {
608        return a;
609    }
610
611    /* 0 / X */
612    if (a->cls == float_class_zero) {
613        return a;
614    }
615
616    /* X / Inf */
617    if (b->cls == float_class_inf) {
618        a->cls = float_class_zero;
619        return a;
620    }
621
622    /* X / 0 => Inf */
623    g_assert(b->cls == float_class_zero);
624    float_raise(float_flag_divbyzero, s);
625    a->cls = float_class_inf;
626    return a;
627}
628
629/*
630 * Floating point remainder, per IEC/IEEE, or modulus.
631 */
632static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
633                                   uint64_t *mod_quot, float_status *s)
634{
635    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
636
637    if (likely(ab_mask == float_cmask_normal)) {
638        frac_modrem(a, b, mod_quot);
639        return a;
640    }
641
642    if (mod_quot) {
643        *mod_quot = 0;
644    }
645
646    /* All the NaN cases */
647    if (unlikely(ab_mask & float_cmask_anynan)) {
648        return parts_pick_nan(a, b, s);
649    }
650
651    /* Inf % N; N % 0 */
652    if (a->cls == float_class_inf || b->cls == float_class_zero) {
653        float_raise(float_flag_invalid, s);
654        parts_default_nan(a, s);
655        return a;
656    }
657
658    /* N % Inf; 0 % N */
659    g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
660    return a;
661}
662
663/*
664 * Square Root
665 *
666 * The base algorithm is lifted from
667 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtf.c
668 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrt.c
669 * https://git.musl-libc.org/cgit/musl/tree/src/math/sqrtl.c
670 * and is thus MIT licenced.
671 */
672static void partsN(sqrt)(FloatPartsN *a, float_status *status,
673                         const FloatFmt *fmt)
674{
675    const uint32_t three32 = 3u << 30;
676    const uint64_t three64 = 3ull << 62;
677    uint32_t d32, m32, r32, s32, u32;            /* 32-bit computation */
678    uint64_t d64, m64, r64, s64, u64;            /* 64-bit computation */
679    uint64_t dh, dl, rh, rl, sh, sl, uh, ul;     /* 128-bit computation */
680    uint64_t d0h, d0l, d1h, d1l, d2h, d2l;
681    uint64_t discard;
682    bool exp_odd;
683    size_t index;
684
685    if (unlikely(a->cls != float_class_normal)) {
686        switch (a->cls) {
687        case float_class_snan:
688        case float_class_qnan:
689            parts_return_nan(a, status);
690            return;
691        case float_class_zero:
692            return;
693        case float_class_inf:
694            if (unlikely(a->sign)) {
695                goto d_nan;
696            }
697            return;
698        default:
699            g_assert_not_reached();
700        }
701    }
702
703    if (unlikely(a->sign)) {
704        goto d_nan;
705    }
706
707    /*
708     * Argument reduction.
709     * x = 4^e frac; with integer e, and frac in [1, 4)
710     * m = frac fixed point at bit 62, since we're in base 4.
711     * If base-2 exponent is odd, exchange that for multiply by 2,
712     * which results in no shift.
713     */
714    exp_odd = a->exp & 1;
715    index = extract64(a->frac_hi, 57, 6) | (!exp_odd << 6);
716    if (!exp_odd) {
717        frac_shr(a, 1);
718    }
719
720    /*
721     * Approximate r ~= 1/sqrt(m) and s ~= sqrt(m) when m in [1, 4).
722     *
723     * Initial estimate:
724     * 7-bit lookup table (1-bit exponent and 6-bit significand).
725     *
726     * The relative error (e = r0*sqrt(m)-1) of a linear estimate
727     * (r0 = a*m + b) is |e| < 0.085955 ~ 0x1.6p-4 at best;
728     * a table lookup is faster and needs one less iteration.
729     * The 7-bit table gives |e| < 0x1.fdp-9.
730     *
731     * A Newton-Raphson iteration for r is
732     *   s = m*r
733     *   d = s*r
734     *   u = 3 - d
735     *   r = r*u/2
736     *
737     * Fixed point representations:
738     *   m, s, d, u, three are all 2.30; r is 0.32
739     */
740    m64 = a->frac_hi;
741    m32 = m64 >> 32;
742
743    r32 = rsqrt_tab[index] << 16;
744    /* |r*sqrt(m) - 1| < 0x1.FDp-9 */
745
746    s32 = ((uint64_t)m32 * r32) >> 32;
747    d32 = ((uint64_t)s32 * r32) >> 32;
748    u32 = three32 - d32;
749
750    if (N == 64) {
751        /* float64 or smaller */
752
753        r32 = ((uint64_t)r32 * u32) >> 31;
754        /* |r*sqrt(m) - 1| < 0x1.7Bp-16 */
755
756        s32 = ((uint64_t)m32 * r32) >> 32;
757        d32 = ((uint64_t)s32 * r32) >> 32;
758        u32 = three32 - d32;
759
760        if (fmt->frac_size <= 23) {
761            /* float32 or smaller */
762
763            s32 = ((uint64_t)s32 * u32) >> 32;  /* 3.29 */
764            s32 = (s32 - 1) >> 6;               /* 9.23 */
765            /* s < sqrt(m) < s + 0x1.08p-23 */
766
767            /* compute nearest rounded result to 2.23 bits */
768            uint32_t d0 = (m32 << 16) - s32 * s32;
769            uint32_t d1 = s32 - d0;
770            uint32_t d2 = d1 + s32 + 1;
771            s32 += d1 >> 31;
772            a->frac_hi = (uint64_t)s32 << (64 - 25);
773
774            /* increment or decrement for inexact */
775            if (d2 != 0) {
776                a->frac_hi += ((int32_t)(d1 ^ d2) < 0 ? -1 : 1);
777            }
778            goto done;
779        }
780
781        /* float64 */
782
783        r64 = (uint64_t)r32 * u32 * 2;
784        /* |r*sqrt(m) - 1| < 0x1.37-p29; convert to 64-bit arithmetic */
785        mul64To128(m64, r64, &s64, &discard);
786        mul64To128(s64, r64, &d64, &discard);
787        u64 = three64 - d64;
788
789        mul64To128(s64, u64, &s64, &discard);  /* 3.61 */
790        s64 = (s64 - 2) >> 9;                  /* 12.52 */
791
792        /* Compute nearest rounded result */
793        uint64_t d0 = (m64 << 42) - s64 * s64;
794        uint64_t d1 = s64 - d0;
795        uint64_t d2 = d1 + s64 + 1;
796        s64 += d1 >> 63;
797        a->frac_hi = s64 << (64 - 54);
798
799        /* increment or decrement for inexact */
800        if (d2 != 0) {
801            a->frac_hi += ((int64_t)(d1 ^ d2) < 0 ? -1 : 1);
802        }
803        goto done;
804    }
805
806    r64 = (uint64_t)r32 * u32 * 2;
807    /* |r*sqrt(m) - 1| < 0x1.7Bp-16; convert to 64-bit arithmetic */
808
809    mul64To128(m64, r64, &s64, &discard);
810    mul64To128(s64, r64, &d64, &discard);
811    u64 = three64 - d64;
812    mul64To128(u64, r64, &r64, &discard);
813    r64 <<= 1;
814    /* |r*sqrt(m) - 1| < 0x1.a5p-31 */
815
816    mul64To128(m64, r64, &s64, &discard);
817    mul64To128(s64, r64, &d64, &discard);
818    u64 = three64 - d64;
819    mul64To128(u64, r64, &rh, &rl);
820    add128(rh, rl, rh, rl, &rh, &rl);
821    /* |r*sqrt(m) - 1| < 0x1.c001p-59; change to 128-bit arithmetic */
822
823    mul128To256(a->frac_hi, a->frac_lo, rh, rl, &sh, &sl, &discard, &discard);
824    mul128To256(sh, sl, rh, rl, &dh, &dl, &discard, &discard);
825    sub128(three64, 0, dh, dl, &uh, &ul);
826    mul128To256(uh, ul, sh, sl, &sh, &sl, &discard, &discard);  /* 3.125 */
827    /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
828
829    sub128(sh, sl, 0, 4, &sh, &sl);
830    shift128Right(sh, sl, 13, &sh, &sl);  /* 16.112 */
831    /* s < sqrt(m) < s + 1ulp */
832
833    /* Compute nearest rounded result */
834    mul64To128(sl, sl, &d0h, &d0l);
835    d0h += 2 * sh * sl;
836    sub128(a->frac_lo << 34, 0, d0h, d0l, &d0h, &d0l);
837    sub128(sh, sl, d0h, d0l, &d1h, &d1l);
838    add128(sh, sl, 0, 1, &d2h, &d2l);
839    add128(d2h, d2l, d1h, d1l, &d2h, &d2l);
840    add128(sh, sl, 0, d1h >> 63, &sh, &sl);
841    shift128Left(sh, sl, 128 - 114, &sh, &sl);
842
843    /* increment or decrement for inexact */
844    if (d2h | d2l) {
845        if ((int64_t)(d1h ^ d2h) < 0) {
846            sub128(sh, sl, 0, 1, &sh, &sl);
847        } else {
848            add128(sh, sl, 0, 1, &sh, &sl);
849        }
850    }
851    a->frac_lo = sl;
852    a->frac_hi = sh;
853
854 done:
855    /* Convert back from base 4 to base 2. */
856    a->exp >>= 1;
857    if (!(a->frac_hi & DECOMPOSED_IMPLICIT_BIT)) {
858        frac_add(a, a, a);
859    } else {
860        a->exp += 1;
861    }
862    return;
863
864 d_nan:
865    float_raise(float_flag_invalid, status);
866    parts_default_nan(a, status);
867}
868
869/*
870 * Rounds the floating-point value `a' to an integer, and returns the
871 * result as a floating-point value. The operation is performed
872 * according to the IEC/IEEE Standard for Binary Floating-Point
873 * Arithmetic.
874 *
875 * parts_round_to_int_normal is an internal helper function for
876 * normal numbers only, returning true for inexact but not directly
877 * raising float_flag_inexact.
878 */
879static bool partsN(round_to_int_normal)(FloatPartsN *a, FloatRoundMode rmode,
880                                        int scale, int frac_size)
881{
882    uint64_t frac_lsb, frac_lsbm1, rnd_even_mask, rnd_mask, inc;
883    int shift_adj;
884
885    scale = MIN(MAX(scale, -0x10000), 0x10000);
886    a->exp += scale;
887
888    if (a->exp < 0) {
889        bool one;
890
891        /* All fractional */
892        switch (rmode) {
893        case float_round_nearest_even:
894            one = false;
895            if (a->exp == -1) {
896                FloatPartsN tmp;
897                /* Shift left one, discarding DECOMPOSED_IMPLICIT_BIT */
898                frac_add(&tmp, a, a);
899                /* Anything remaining means frac > 0.5. */
900                one = !frac_eqz(&tmp);
901            }
902            break;
903        case float_round_ties_away:
904            one = a->exp == -1;
905            break;
906        case float_round_to_zero:
907            one = false;
908            break;
909        case float_round_up:
910            one = !a->sign;
911            break;
912        case float_round_down:
913            one = a->sign;
914            break;
915        case float_round_to_odd:
916            one = true;
917            break;
918        default:
919            g_assert_not_reached();
920        }
921
922        frac_clear(a);
923        a->exp = 0;
924        if (one) {
925            a->frac_hi = DECOMPOSED_IMPLICIT_BIT;
926        } else {
927            a->cls = float_class_zero;
928        }
929        return true;
930    }
931
932    if (a->exp >= frac_size) {
933        /* All integral */
934        return false;
935    }
936
937    if (N > 64 && a->exp < N - 64) {
938        /*
939         * Rounding is not in the low word -- shift lsb to bit 2,
940         * which leaves room for sticky and rounding bit.
941         */
942        shift_adj = (N - 1) - (a->exp + 2);
943        frac_shrjam(a, shift_adj);
944        frac_lsb = 1 << 2;
945    } else {
946        shift_adj = 0;
947        frac_lsb = DECOMPOSED_IMPLICIT_BIT >> (a->exp & 63);
948    }
949
950    frac_lsbm1 = frac_lsb >> 1;
951    rnd_mask = frac_lsb - 1;
952    rnd_even_mask = rnd_mask | frac_lsb;
953
954    if (!(a->frac_lo & rnd_mask)) {
955        /* Fractional bits already clear, undo the shift above. */
956        frac_shl(a, shift_adj);
957        return false;
958    }
959
960    switch (rmode) {
961    case float_round_nearest_even:
962        inc = ((a->frac_lo & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
963        break;
964    case float_round_ties_away:
965        inc = frac_lsbm1;
966        break;
967    case float_round_to_zero:
968        inc = 0;
969        break;
970    case float_round_up:
971        inc = a->sign ? 0 : rnd_mask;
972        break;
973    case float_round_down:
974        inc = a->sign ? rnd_mask : 0;
975        break;
976    case float_round_to_odd:
977        inc = a->frac_lo & frac_lsb ? 0 : rnd_mask;
978        break;
979    default:
980        g_assert_not_reached();
981    }
982
983    if (shift_adj == 0) {
984        if (frac_addi(a, a, inc)) {
985            frac_shr(a, 1);
986            a->frac_hi |= DECOMPOSED_IMPLICIT_BIT;
987            a->exp++;
988        }
989        a->frac_lo &= ~rnd_mask;
990    } else {
991        frac_addi(a, a, inc);
992        a->frac_lo &= ~rnd_mask;
993        /* Be careful shifting back, not to overflow */
994        frac_shl(a, shift_adj - 1);
995        if (a->frac_hi & DECOMPOSED_IMPLICIT_BIT) {
996            a->exp++;
997        } else {
998            frac_add(a, a, a);
999        }
1000    }
1001    return true;
1002}
1003
1004static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
1005                                 int scale, float_status *s,
1006                                 const FloatFmt *fmt)
1007{
1008    switch (a->cls) {
1009    case float_class_qnan:
1010    case float_class_snan:
1011        parts_return_nan(a, s);
1012        break;
1013    case float_class_zero:
1014    case float_class_inf:
1015        break;
1016    case float_class_normal:
1017        if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
1018            float_raise(float_flag_inexact, s);
1019        }
1020        break;
1021    default:
1022        g_assert_not_reached();
1023    }
1024}
1025
1026/*
1027 * Returns the result of converting the floating-point value `a' to
1028 * the two's complement integer format. The conversion is performed
1029 * according to the IEC/IEEE Standard for Binary Floating-Point
1030 * Arithmetic---which means in particular that the conversion is
1031 * rounded according to the current rounding mode. If `a' is a NaN,
1032 * the largest positive integer is returned. Otherwise, if the
1033 * conversion overflows, the largest integer with the same sign as `a'
1034 * is returned.
1035 */
1036static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
1037                                     int scale, int64_t min, int64_t max,
1038                                     float_status *s)
1039{
1040    int flags = 0;
1041    uint64_t r;
1042
1043    switch (p->cls) {
1044    case float_class_snan:
1045    case float_class_qnan:
1046        flags = float_flag_invalid;
1047        r = max;
1048        break;
1049
1050    case float_class_inf:
1051        flags = float_flag_invalid;
1052        r = p->sign ? min : max;
1053        break;
1054
1055    case float_class_zero:
1056        return 0;
1057
1058    case float_class_normal:
1059        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1060        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1061            flags = float_flag_inexact;
1062        }
1063
1064        if (p->exp <= DECOMPOSED_BINARY_POINT) {
1065            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1066        } else {
1067            r = UINT64_MAX;
1068        }
1069        if (p->sign) {
1070            if (r <= -(uint64_t)min) {
1071                r = -r;
1072            } else {
1073                flags = float_flag_invalid;
1074                r = min;
1075            }
1076        } else if (r > max) {
1077            flags = float_flag_invalid;
1078            r = max;
1079        }
1080        break;
1081
1082    default:
1083        g_assert_not_reached();
1084    }
1085
1086    float_raise(flags, s);
1087    return r;
1088}
1089
1090/*
1091 *  Returns the result of converting the floating-point value `a' to
1092 *  the unsigned integer format. The conversion is performed according
1093 *  to the IEC/IEEE Standard for Binary Floating-Point
1094 *  Arithmetic---which means in particular that the conversion is
1095 *  rounded according to the current rounding mode. If `a' is a NaN,
1096 *  the largest unsigned integer is returned. Otherwise, if the
1097 *  conversion overflows, the largest unsigned integer is returned. If
1098 *  the 'a' is negative, the result is rounded and zero is returned;
1099 *  values that do not round to zero will raise the inexact exception
1100 *  flag.
1101 */
1102static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
1103                                      int scale, uint64_t max, float_status *s)
1104{
1105    int flags = 0;
1106    uint64_t r;
1107
1108    switch (p->cls) {
1109    case float_class_snan:
1110    case float_class_qnan:
1111        flags = float_flag_invalid;
1112        r = max;
1113        break;
1114
1115    case float_class_inf:
1116        flags = float_flag_invalid;
1117        r = p->sign ? 0 : max;
1118        break;
1119
1120    case float_class_zero:
1121        return 0;
1122
1123    case float_class_normal:
1124        /* TODO: N - 2 is frac_size for rounding; could use input fmt. */
1125        if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
1126            flags = float_flag_inexact;
1127            if (p->cls == float_class_zero) {
1128                r = 0;
1129                break;
1130            }
1131        }
1132
1133        if (p->sign) {
1134            flags = float_flag_invalid;
1135            r = 0;
1136        } else if (p->exp > DECOMPOSED_BINARY_POINT) {
1137            flags = float_flag_invalid;
1138            r = max;
1139        } else {
1140            r = p->frac_hi >> (DECOMPOSED_BINARY_POINT - p->exp);
1141            if (r > max) {
1142                flags = float_flag_invalid;
1143                r = max;
1144            }
1145        }
1146        break;
1147
1148    default:
1149        g_assert_not_reached();
1150    }
1151
1152    float_raise(flags, s);
1153    return r;
1154}
1155
1156/*
1157 * Integer to float conversions
1158 *
1159 * Returns the result of converting the two's complement integer `a'
1160 * to the floating-point format. The conversion is performed according
1161 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1162 */
1163static void partsN(sint_to_float)(FloatPartsN *p, int64_t a,
1164                                  int scale, float_status *s)
1165{
1166    uint64_t f = a;
1167    int shift;
1168
1169    memset(p, 0, sizeof(*p));
1170
1171    if (a == 0) {
1172        p->cls = float_class_zero;
1173        return;
1174    }
1175
1176    p->cls = float_class_normal;
1177    if (a < 0) {
1178        f = -f;
1179        p->sign = true;
1180    }
1181    shift = clz64(f);
1182    scale = MIN(MAX(scale, -0x10000), 0x10000);
1183
1184    p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1185    p->frac_hi = f << shift;
1186}
1187
1188/*
1189 * Unsigned Integer to float conversions
1190 *
1191 * Returns the result of converting the unsigned integer `a' to the
1192 * floating-point format. The conversion is performed according to the
1193 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1194 */
1195static void partsN(uint_to_float)(FloatPartsN *p, uint64_t a,
1196                                  int scale, float_status *status)
1197{
1198    memset(p, 0, sizeof(*p));
1199
1200    if (a == 0) {
1201        p->cls = float_class_zero;
1202    } else {
1203        int shift = clz64(a);
1204        scale = MIN(MAX(scale, -0x10000), 0x10000);
1205        p->cls = float_class_normal;
1206        p->exp = DECOMPOSED_BINARY_POINT - shift + scale;
1207        p->frac_hi = a << shift;
1208    }
1209}
1210
1211/*
1212 * Float min/max.
1213 */
1214static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
1215                                   float_status *s, int flags)
1216{
1217    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1218    int a_exp, b_exp, cmp;
1219
1220    if (unlikely(ab_mask & float_cmask_anynan)) {
1221        /*
1222         * For minnum/maxnum, if one operand is a QNaN, and the other
1223         * operand is numerical, then return numerical argument.
1224         */
1225        if ((flags & minmax_isnum)
1226            && !(ab_mask & float_cmask_snan)
1227            && (ab_mask & ~float_cmask_qnan)) {
1228            return is_nan(a->cls) ? b : a;
1229        }
1230        return parts_pick_nan(a, b, s);
1231    }
1232
1233    a_exp = a->exp;
1234    b_exp = b->exp;
1235
1236    if (unlikely(ab_mask != float_cmask_normal)) {
1237        switch (a->cls) {
1238        case float_class_normal:
1239            break;
1240        case float_class_inf:
1241            a_exp = INT16_MAX;
1242            break;
1243        case float_class_zero:
1244            a_exp = INT16_MIN;
1245            break;
1246        default:
1247            g_assert_not_reached();
1248            break;
1249        }
1250        switch (b->cls) {
1251        case float_class_normal:
1252            break;
1253        case float_class_inf:
1254            b_exp = INT16_MAX;
1255            break;
1256        case float_class_zero:
1257            b_exp = INT16_MIN;
1258            break;
1259        default:
1260            g_assert_not_reached();
1261            break;
1262        }
1263    }
1264
1265    /* Compare magnitudes. */
1266    cmp = a_exp - b_exp;
1267    if (cmp == 0) {
1268        cmp = frac_cmp(a, b);
1269    }
1270
1271    /*
1272     * Take the sign into account.
1273     * For ismag, only do this if the magnitudes are equal.
1274     */
1275    if (!(flags & minmax_ismag) || cmp == 0) {
1276        if (a->sign != b->sign) {
1277            /* For differing signs, the negative operand is less. */
1278            cmp = a->sign ? -1 : 1;
1279        } else if (a->sign) {
1280            /* For two negative operands, invert the magnitude comparison. */
1281            cmp = -cmp;
1282        }
1283    }
1284
1285    if (flags & minmax_ismin) {
1286        cmp = -cmp;
1287    }
1288    return cmp < 0 ? b : a;
1289}
1290
1291/*
1292 * Floating point compare
1293 */
1294static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
1295                                     float_status *s, bool is_quiet)
1296{
1297    int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
1298    int cmp;
1299
1300    if (likely(ab_mask == float_cmask_normal)) {
1301        if (a->sign != b->sign) {
1302            goto a_sign;
1303        }
1304        if (a->exp != b->exp) {
1305            cmp = a->exp < b->exp ? -1 : 1;
1306        } else {
1307            cmp = frac_cmp(a, b);
1308        }
1309        if (a->sign) {
1310            cmp = -cmp;
1311        }
1312        return cmp;
1313    }
1314
1315    if (unlikely(ab_mask & float_cmask_anynan)) {
1316        if (!is_quiet || (ab_mask & float_cmask_snan)) {
1317            float_raise(float_flag_invalid, s);
1318        }
1319        return float_relation_unordered;
1320    }
1321
1322    if (ab_mask & float_cmask_zero) {
1323        if (ab_mask == float_cmask_zero) {
1324            return float_relation_equal;
1325        } else if (a->cls == float_class_zero) {
1326            goto b_sign;
1327        } else {
1328            goto a_sign;
1329        }
1330    }
1331
1332    if (ab_mask == float_cmask_inf) {
1333        if (a->sign == b->sign) {
1334            return float_relation_equal;
1335        }
1336    } else if (b->cls == float_class_inf) {
1337        goto b_sign;
1338    } else {
1339        g_assert(a->cls == float_class_inf);
1340    }
1341
1342 a_sign:
1343    return a->sign ? float_relation_less : float_relation_greater;
1344 b_sign:
1345    return b->sign ? float_relation_greater : float_relation_less;
1346}
1347
1348/*
1349 * Multiply A by 2 raised to the power N.
1350 */
1351static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
1352{
1353    switch (a->cls) {
1354    case float_class_snan:
1355    case float_class_qnan:
1356        parts_return_nan(a, s);
1357        break;
1358    case float_class_zero:
1359    case float_class_inf:
1360        break;
1361    case float_class_normal:
1362        a->exp += MIN(MAX(n, -0x10000), 0x10000);
1363        break;
1364    default:
1365        g_assert_not_reached();
1366    }
1367}
1368
1369/*
1370 * Return log2(A)
1371 */
1372static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
1373{
1374    uint64_t a0, a1, r, t, ign;
1375    FloatPartsN f;
1376    int i, n, a_exp, f_exp;
1377
1378    if (unlikely(a->cls != float_class_normal)) {
1379        switch (a->cls) {
1380        case float_class_snan:
1381        case float_class_qnan:
1382            parts_return_nan(a, s);
1383            return;
1384        case float_class_zero:
1385            /* log2(0) = -inf */
1386            a->cls = float_class_inf;
1387            a->sign = 1;
1388            return;
1389        case float_class_inf:
1390            if (unlikely(a->sign)) {
1391                goto d_nan;
1392            }
1393            return;
1394        default:
1395            break;
1396        }
1397        g_assert_not_reached();
1398    }
1399    if (unlikely(a->sign)) {
1400        goto d_nan;
1401    }
1402
1403    /* TODO: This algorithm looses bits too quickly for float128. */
1404    g_assert(N == 64);
1405
1406    a_exp = a->exp;
1407    f_exp = -1;
1408
1409    r = 0;
1410    t = DECOMPOSED_IMPLICIT_BIT;
1411    a0 = a->frac_hi;
1412    a1 = 0;
1413
1414    n = fmt->frac_size + 2;
1415    if (unlikely(a_exp == -1)) {
1416        /*
1417         * When a_exp == -1, we're computing the log2 of a value [0.5,1.0).
1418         * When the value is very close to 1.0, there are lots of 1's in
1419         * the msb parts of the fraction.  At the end, when we subtract
1420         * this value from -1.0, we can see a catastrophic loss of precision,
1421         * as 0x800..000 - 0x7ff..ffx becomes 0x000..00y, leaving only the
1422         * bits of y in the final result.  To minimize this, compute as many
1423         * digits as we can.
1424         * ??? This case needs another algorithm to avoid this.
1425         */
1426        n = fmt->frac_size * 2 + 2;
1427        /* Don't compute a value overlapping the sticky bit */
1428        n = MIN(n, 62);
1429    }
1430
1431    for (i = 0; i < n; i++) {
1432        if (a1) {
1433            mul128To256(a0, a1, a0, a1, &a0, &a1, &ign, &ign);
1434        } else if (a0 & 0xffffffffull) {
1435            mul64To128(a0, a0, &a0, &a1);
1436        } else if (a0 & ~DECOMPOSED_IMPLICIT_BIT) {
1437            a0 >>= 32;
1438            a0 *= a0;
1439        } else {
1440            goto exact;
1441        }
1442
1443        if (a0 & DECOMPOSED_IMPLICIT_BIT) {
1444            if (unlikely(a_exp == 0 && r == 0)) {
1445                /*
1446                 * When a_exp == 0, we're computing the log2 of a value
1447                 * [1.0,2.0).  When the value is very close to 1.0, there
1448                 * are lots of 0's in the msb parts of the fraction.
1449                 * We need to compute more digits to produce a correct
1450                 * result -- restart at the top of the fraction.
1451                 * ??? This is likely to lose precision quickly, as for
1452                 * float128; we may need another method.
1453                 */
1454                f_exp -= i;
1455                t = r = DECOMPOSED_IMPLICIT_BIT;
1456                i = 0;
1457            } else {
1458                r |= t;
1459            }
1460        } else {
1461            add128(a0, a1, a0, a1, &a0, &a1);
1462        }
1463        t >>= 1;
1464    }
1465
1466    /* Set sticky for inexact. */
1467    r |= (a1 || a0 & ~DECOMPOSED_IMPLICIT_BIT);
1468
1469 exact:
1470    parts_sint_to_float(a, a_exp, 0, s);
1471    if (r == 0) {
1472        return;
1473    }
1474
1475    memset(&f, 0, sizeof(f));
1476    f.cls = float_class_normal;
1477    f.frac_hi = r;
1478    f.exp = f_exp - frac_normalize(&f);
1479
1480    if (a_exp < 0) {
1481        parts_sub_normal(a, &f);
1482    } else if (a_exp > 0) {
1483        parts_add_normal(a, &f);
1484    } else {
1485        *a = f;
1486    }
1487    return;
1488
1489 d_nan:
1490    float_raise(float_flag_invalid, s);
1491    parts_default_nan(a, s);
1492}
1493