xref: /openbmc/qemu/target/i386/tcg/fpu_helper.c (revision 7604bbc2d87d153e65e38cf2d671a5a9a35917b1)
11b248f14SClaudio Fontana /*
21b248f14SClaudio Fontana  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
31b248f14SClaudio Fontana  *
41b248f14SClaudio Fontana  *  Copyright (c) 2003 Fabrice Bellard
51b248f14SClaudio Fontana  *
61b248f14SClaudio Fontana  * This library is free software; you can redistribute it and/or
71b248f14SClaudio Fontana  * modify it under the terms of the GNU Lesser General Public
81b248f14SClaudio Fontana  * License as published by the Free Software Foundation; either
91b248f14SClaudio Fontana  * version 2.1 of the License, or (at your option) any later version.
101b248f14SClaudio Fontana  *
111b248f14SClaudio Fontana  * This library is distributed in the hope that it will be useful,
121b248f14SClaudio Fontana  * but WITHOUT ANY WARRANTY; without even the implied warranty of
131b248f14SClaudio Fontana  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
141b248f14SClaudio Fontana  * Lesser General Public License for more details.
151b248f14SClaudio Fontana  *
161b248f14SClaudio Fontana  * You should have received a copy of the GNU Lesser General Public
171b248f14SClaudio Fontana  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
181b248f14SClaudio Fontana  */
191b248f14SClaudio Fontana 
201b248f14SClaudio Fontana #include "qemu/osdep.h"
211b248f14SClaudio Fontana #include <math.h>
221b248f14SClaudio Fontana #include "cpu.h"
2348e5c98aSDavid Edmondson #include "tcg-cpu.h"
247e17a524SPhilippe Mathieu-Daudé #include "exec/exec-all.h"
2509b07f28SPhilippe Mathieu-Daudé #include "exec/cpu_ldst.h"
261b248f14SClaudio Fontana #include "exec/helper-proto.h"
271b248f14SClaudio Fontana #include "fpu/softfloat.h"
281b248f14SClaudio Fontana #include "fpu/softfloat-macros.h"
29ed69e831SClaudio Fontana #include "helper-tcg.h"
30d3e8b648SRichard Henderson #include "access.h"
311b248f14SClaudio Fontana 
32ed69e831SClaudio Fontana /* float macros */
33ed69e831SClaudio Fontana #define FT0    (env->ft0)
34ed69e831SClaudio Fontana #define ST0    (env->fpregs[env->fpstt].d)
35ed69e831SClaudio Fontana #define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
36ed69e831SClaudio Fontana #define ST1    ST(1)
37ed69e831SClaudio Fontana 
38314d3effSPaolo Bonzini #define FPU_RC_SHIFT        10
39314d3effSPaolo Bonzini #define FPU_RC_MASK         (3 << FPU_RC_SHIFT)
401b248f14SClaudio Fontana #define FPU_RC_NEAR         0x000
411b248f14SClaudio Fontana #define FPU_RC_DOWN         0x400
421b248f14SClaudio Fontana #define FPU_RC_UP           0x800
431b248f14SClaudio Fontana #define FPU_RC_CHOP         0xc00
441b248f14SClaudio Fontana 
451b248f14SClaudio Fontana #define MAXTAN 9223372036854775808.0
461b248f14SClaudio Fontana 
471b248f14SClaudio Fontana /* the following deal with x86 long double-precision numbers */
481b248f14SClaudio Fontana #define MAXEXPD 0x7fff
491b248f14SClaudio Fontana #define EXPBIAS 16383
501b248f14SClaudio Fontana #define EXPD(fp)        (fp.l.upper & 0x7fff)
511b248f14SClaudio Fontana #define SIGND(fp)       ((fp.l.upper) & 0x8000)
521b248f14SClaudio Fontana #define MANTD(fp)       (fp.l.lower)
531b248f14SClaudio Fontana #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
541b248f14SClaudio Fontana 
551b248f14SClaudio Fontana #define FPUS_IE (1 << 0)
561b248f14SClaudio Fontana #define FPUS_DE (1 << 1)
571b248f14SClaudio Fontana #define FPUS_ZE (1 << 2)
581b248f14SClaudio Fontana #define FPUS_OE (1 << 3)
591b248f14SClaudio Fontana #define FPUS_UE (1 << 4)
601b248f14SClaudio Fontana #define FPUS_PE (1 << 5)
611b248f14SClaudio Fontana #define FPUS_SF (1 << 6)
621b248f14SClaudio Fontana #define FPUS_SE (1 << 7)
631b248f14SClaudio Fontana #define FPUS_B  (1 << 15)
641b248f14SClaudio Fontana 
651b248f14SClaudio Fontana #define FPUC_EM 0x3f
661b248f14SClaudio Fontana 
671b248f14SClaudio Fontana #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
681b248f14SClaudio Fontana #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
691b248f14SClaudio Fontana #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
701b248f14SClaudio Fontana #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
711b248f14SClaudio Fontana #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
721b248f14SClaudio Fontana #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
731b248f14SClaudio Fontana #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
741b248f14SClaudio Fontana #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
751b248f14SClaudio Fontana 
761b248f14SClaudio Fontana static inline void fpush(CPUX86State *env)
771b248f14SClaudio Fontana {
781b248f14SClaudio Fontana     env->fpstt = (env->fpstt - 1) & 7;
791b248f14SClaudio Fontana     env->fptags[env->fpstt] = 0; /* validate stack entry */
801b248f14SClaudio Fontana }
811b248f14SClaudio Fontana 
821b248f14SClaudio Fontana static inline void fpop(CPUX86State *env)
831b248f14SClaudio Fontana {
841b248f14SClaudio Fontana     env->fptags[env->fpstt] = 1; /* invalidate stack entry */
851b248f14SClaudio Fontana     env->fpstt = (env->fpstt + 1) & 7;
861b248f14SClaudio Fontana }
871b248f14SClaudio Fontana 
88d3e8b648SRichard Henderson static floatx80 do_fldt(X86Access *ac, target_ulong ptr)
891b248f14SClaudio Fontana {
901b248f14SClaudio Fontana     CPU_LDoubleU temp;
911b248f14SClaudio Fontana 
92d3e8b648SRichard Henderson     temp.l.lower = access_ldq(ac, ptr);
93d3e8b648SRichard Henderson     temp.l.upper = access_ldw(ac, ptr + 8);
941b248f14SClaudio Fontana     return temp.d;
951b248f14SClaudio Fontana }
961b248f14SClaudio Fontana 
97d3e8b648SRichard Henderson static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f)
981b248f14SClaudio Fontana {
991b248f14SClaudio Fontana     CPU_LDoubleU temp;
1001b248f14SClaudio Fontana 
1011b248f14SClaudio Fontana     temp.d = f;
102d3e8b648SRichard Henderson     access_stq(ac, ptr, temp.l.lower);
103d3e8b648SRichard Henderson     access_stw(ac, ptr + 8, temp.l.upper);
1041b248f14SClaudio Fontana }
1051b248f14SClaudio Fontana 
1061b248f14SClaudio Fontana /* x87 FPU helpers */
1071b248f14SClaudio Fontana 
1081b248f14SClaudio Fontana static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
1091b248f14SClaudio Fontana {
1101b248f14SClaudio Fontana     union {
1111b248f14SClaudio Fontana         float64 f64;
1121b248f14SClaudio Fontana         double d;
1131b248f14SClaudio Fontana     } u;
1141b248f14SClaudio Fontana 
1151b248f14SClaudio Fontana     u.f64 = floatx80_to_float64(a, &env->fp_status);
1161b248f14SClaudio Fontana     return u.d;
1171b248f14SClaudio Fontana }
1181b248f14SClaudio Fontana 
1191b248f14SClaudio Fontana static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
1201b248f14SClaudio Fontana {
1211b248f14SClaudio Fontana     union {
1221b248f14SClaudio Fontana         float64 f64;
1231b248f14SClaudio Fontana         double d;
1241b248f14SClaudio Fontana     } u;
1251b248f14SClaudio Fontana 
1261b248f14SClaudio Fontana     u.d = a;
1271b248f14SClaudio Fontana     return float64_to_floatx80(u.f64, &env->fp_status);
1281b248f14SClaudio Fontana }
1291b248f14SClaudio Fontana 
1301b248f14SClaudio Fontana static void fpu_set_exception(CPUX86State *env, int mask)
1311b248f14SClaudio Fontana {
1321b248f14SClaudio Fontana     env->fpus |= mask;
1331b248f14SClaudio Fontana     if (env->fpus & (~env->fpuc & FPUC_EM)) {
1341b248f14SClaudio Fontana         env->fpus |= FPUS_SE | FPUS_B;
1351b248f14SClaudio Fontana     }
1361b248f14SClaudio Fontana }
1371b248f14SClaudio Fontana 
1381b248f14SClaudio Fontana static inline uint8_t save_exception_flags(CPUX86State *env)
1391b248f14SClaudio Fontana {
1401b248f14SClaudio Fontana     uint8_t old_flags = get_float_exception_flags(&env->fp_status);
1411b248f14SClaudio Fontana     set_float_exception_flags(0, &env->fp_status);
1421b248f14SClaudio Fontana     return old_flags;
1431b248f14SClaudio Fontana }
1441b248f14SClaudio Fontana 
1451b248f14SClaudio Fontana static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
1461b248f14SClaudio Fontana {
1471b248f14SClaudio Fontana     uint8_t new_flags = get_float_exception_flags(&env->fp_status);
1481b248f14SClaudio Fontana     float_raise(old_flags, &env->fp_status);
1491b248f14SClaudio Fontana     fpu_set_exception(env,
1501b248f14SClaudio Fontana                       ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
1511b248f14SClaudio Fontana                        (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
1521b248f14SClaudio Fontana                        (new_flags & float_flag_overflow ? FPUS_OE : 0) |
1531b248f14SClaudio Fontana                        (new_flags & float_flag_underflow ? FPUS_UE : 0) |
1541b248f14SClaudio Fontana                        (new_flags & float_flag_inexact ? FPUS_PE : 0) |
1551b248f14SClaudio Fontana                        (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
1561b248f14SClaudio Fontana }
1571b248f14SClaudio Fontana 
1581b248f14SClaudio Fontana static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
1591b248f14SClaudio Fontana {
1601b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
1611b248f14SClaudio Fontana     floatx80 ret = floatx80_div(a, b, &env->fp_status);
1621b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
1631b248f14SClaudio Fontana     return ret;
1641b248f14SClaudio Fontana }
1651b248f14SClaudio Fontana 
1661b248f14SClaudio Fontana static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
1671b248f14SClaudio Fontana {
1681b248f14SClaudio Fontana     if (env->cr[0] & CR0_NE_MASK) {
1691b248f14SClaudio Fontana         raise_exception_ra(env, EXCP10_COPR, retaddr);
1701b248f14SClaudio Fontana     }
1711b248f14SClaudio Fontana #if !defined(CONFIG_USER_ONLY)
17283a3d9c7SClaudio Fontana     else {
17383a3d9c7SClaudio Fontana         fpu_check_raise_ferr_irq(env);
1741b248f14SClaudio Fontana     }
1751b248f14SClaudio Fontana #endif
1761b248f14SClaudio Fontana }
1771b248f14SClaudio Fontana 
1781b248f14SClaudio Fontana void helper_flds_FT0(CPUX86State *env, uint32_t val)
1791b248f14SClaudio Fontana {
1801b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
1811b248f14SClaudio Fontana     union {
1821b248f14SClaudio Fontana         float32 f;
1831b248f14SClaudio Fontana         uint32_t i;
1841b248f14SClaudio Fontana     } u;
1851b248f14SClaudio Fontana 
1861b248f14SClaudio Fontana     u.i = val;
1871b248f14SClaudio Fontana     FT0 = float32_to_floatx80(u.f, &env->fp_status);
1881b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
1891b248f14SClaudio Fontana }
1901b248f14SClaudio Fontana 
1911b248f14SClaudio Fontana void helper_fldl_FT0(CPUX86State *env, uint64_t val)
1921b248f14SClaudio Fontana {
1931b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
1941b248f14SClaudio Fontana     union {
1951b248f14SClaudio Fontana         float64 f;
1961b248f14SClaudio Fontana         uint64_t i;
1971b248f14SClaudio Fontana     } u;
1981b248f14SClaudio Fontana 
1991b248f14SClaudio Fontana     u.i = val;
2001b248f14SClaudio Fontana     FT0 = float64_to_floatx80(u.f, &env->fp_status);
2011b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
2021b248f14SClaudio Fontana }
2031b248f14SClaudio Fontana 
2041b248f14SClaudio Fontana void helper_fildl_FT0(CPUX86State *env, int32_t val)
2051b248f14SClaudio Fontana {
2061b248f14SClaudio Fontana     FT0 = int32_to_floatx80(val, &env->fp_status);
2071b248f14SClaudio Fontana }
2081b248f14SClaudio Fontana 
2091b248f14SClaudio Fontana void helper_flds_ST0(CPUX86State *env, uint32_t val)
2101b248f14SClaudio Fontana {
2111b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
2121b248f14SClaudio Fontana     int new_fpstt;
2131b248f14SClaudio Fontana     union {
2141b248f14SClaudio Fontana         float32 f;
2151b248f14SClaudio Fontana         uint32_t i;
2161b248f14SClaudio Fontana     } u;
2171b248f14SClaudio Fontana 
2181b248f14SClaudio Fontana     new_fpstt = (env->fpstt - 1) & 7;
2191b248f14SClaudio Fontana     u.i = val;
2201b248f14SClaudio Fontana     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
2211b248f14SClaudio Fontana     env->fpstt = new_fpstt;
2221b248f14SClaudio Fontana     env->fptags[new_fpstt] = 0; /* validate stack entry */
2231b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
2241b248f14SClaudio Fontana }
2251b248f14SClaudio Fontana 
2261b248f14SClaudio Fontana void helper_fldl_ST0(CPUX86State *env, uint64_t val)
2271b248f14SClaudio Fontana {
2281b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
2291b248f14SClaudio Fontana     int new_fpstt;
2301b248f14SClaudio Fontana     union {
2311b248f14SClaudio Fontana         float64 f;
2321b248f14SClaudio Fontana         uint64_t i;
2331b248f14SClaudio Fontana     } u;
2341b248f14SClaudio Fontana 
2351b248f14SClaudio Fontana     new_fpstt = (env->fpstt - 1) & 7;
2361b248f14SClaudio Fontana     u.i = val;
2371b248f14SClaudio Fontana     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
2381b248f14SClaudio Fontana     env->fpstt = new_fpstt;
2391b248f14SClaudio Fontana     env->fptags[new_fpstt] = 0; /* validate stack entry */
2401b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
2411b248f14SClaudio Fontana }
2421b248f14SClaudio Fontana 
243276de33fSAlex Bennée static FloatX80RoundPrec tmp_maximise_precision(float_status *st)
244276de33fSAlex Bennée {
245276de33fSAlex Bennée     FloatX80RoundPrec old = get_floatx80_rounding_precision(st);
246276de33fSAlex Bennée     set_floatx80_rounding_precision(floatx80_precision_x, st);
247276de33fSAlex Bennée     return old;
248276de33fSAlex Bennée }
249276de33fSAlex Bennée 
2501b248f14SClaudio Fontana void helper_fildl_ST0(CPUX86State *env, int32_t val)
2511b248f14SClaudio Fontana {
2521b248f14SClaudio Fontana     int new_fpstt;
253276de33fSAlex Bennée     FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
2541b248f14SClaudio Fontana 
2551b248f14SClaudio Fontana     new_fpstt = (env->fpstt - 1) & 7;
2561b248f14SClaudio Fontana     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
2571b248f14SClaudio Fontana     env->fpstt = new_fpstt;
2581b248f14SClaudio Fontana     env->fptags[new_fpstt] = 0; /* validate stack entry */
259276de33fSAlex Bennée 
260276de33fSAlex Bennée     set_floatx80_rounding_precision(old, &env->fp_status);
2611b248f14SClaudio Fontana }
2621b248f14SClaudio Fontana 
2631b248f14SClaudio Fontana void helper_fildll_ST0(CPUX86State *env, int64_t val)
2641b248f14SClaudio Fontana {
2651b248f14SClaudio Fontana     int new_fpstt;
266276de33fSAlex Bennée     FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
2671b248f14SClaudio Fontana 
2681b248f14SClaudio Fontana     new_fpstt = (env->fpstt - 1) & 7;
2691b248f14SClaudio Fontana     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
2701b248f14SClaudio Fontana     env->fpstt = new_fpstt;
2711b248f14SClaudio Fontana     env->fptags[new_fpstt] = 0; /* validate stack entry */
272276de33fSAlex Bennée 
273276de33fSAlex Bennée     set_floatx80_rounding_precision(old, &env->fp_status);
2741b248f14SClaudio Fontana }
2751b248f14SClaudio Fontana 
2761b248f14SClaudio Fontana uint32_t helper_fsts_ST0(CPUX86State *env)
2771b248f14SClaudio Fontana {
2781b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
2791b248f14SClaudio Fontana     union {
2801b248f14SClaudio Fontana         float32 f;
2811b248f14SClaudio Fontana         uint32_t i;
2821b248f14SClaudio Fontana     } u;
2831b248f14SClaudio Fontana 
2841b248f14SClaudio Fontana     u.f = floatx80_to_float32(ST0, &env->fp_status);
2851b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
2861b248f14SClaudio Fontana     return u.i;
2871b248f14SClaudio Fontana }
2881b248f14SClaudio Fontana 
2891b248f14SClaudio Fontana uint64_t helper_fstl_ST0(CPUX86State *env)
2901b248f14SClaudio Fontana {
2911b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
2921b248f14SClaudio Fontana     union {
2931b248f14SClaudio Fontana         float64 f;
2941b248f14SClaudio Fontana         uint64_t i;
2951b248f14SClaudio Fontana     } u;
2961b248f14SClaudio Fontana 
2971b248f14SClaudio Fontana     u.f = floatx80_to_float64(ST0, &env->fp_status);
2981b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
2991b248f14SClaudio Fontana     return u.i;
3001b248f14SClaudio Fontana }
3011b248f14SClaudio Fontana 
3021b248f14SClaudio Fontana int32_t helper_fist_ST0(CPUX86State *env)
3031b248f14SClaudio Fontana {
3041b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3051b248f14SClaudio Fontana     int32_t val;
3061b248f14SClaudio Fontana 
3071b248f14SClaudio Fontana     val = floatx80_to_int32(ST0, &env->fp_status);
3081b248f14SClaudio Fontana     if (val != (int16_t)val) {
3091b248f14SClaudio Fontana         set_float_exception_flags(float_flag_invalid, &env->fp_status);
3101b248f14SClaudio Fontana         val = -32768;
3111b248f14SClaudio Fontana     }
3121b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3131b248f14SClaudio Fontana     return val;
3141b248f14SClaudio Fontana }
3151b248f14SClaudio Fontana 
3161b248f14SClaudio Fontana int32_t helper_fistl_ST0(CPUX86State *env)
3171b248f14SClaudio Fontana {
3181b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3191b248f14SClaudio Fontana     int32_t val;
3201b248f14SClaudio Fontana 
3211b248f14SClaudio Fontana     val = floatx80_to_int32(ST0, &env->fp_status);
3221b248f14SClaudio Fontana     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
3231b248f14SClaudio Fontana         val = 0x80000000;
3241b248f14SClaudio Fontana     }
3251b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3261b248f14SClaudio Fontana     return val;
3271b248f14SClaudio Fontana }
3281b248f14SClaudio Fontana 
3291b248f14SClaudio Fontana int64_t helper_fistll_ST0(CPUX86State *env)
3301b248f14SClaudio Fontana {
3311b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3321b248f14SClaudio Fontana     int64_t val;
3331b248f14SClaudio Fontana 
3341b248f14SClaudio Fontana     val = floatx80_to_int64(ST0, &env->fp_status);
3351b248f14SClaudio Fontana     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
3361b248f14SClaudio Fontana         val = 0x8000000000000000ULL;
3371b248f14SClaudio Fontana     }
3381b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3391b248f14SClaudio Fontana     return val;
3401b248f14SClaudio Fontana }
3411b248f14SClaudio Fontana 
3421b248f14SClaudio Fontana int32_t helper_fistt_ST0(CPUX86State *env)
3431b248f14SClaudio Fontana {
3441b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3451b248f14SClaudio Fontana     int32_t val;
3461b248f14SClaudio Fontana 
3471b248f14SClaudio Fontana     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
3481b248f14SClaudio Fontana     if (val != (int16_t)val) {
3491b248f14SClaudio Fontana         set_float_exception_flags(float_flag_invalid, &env->fp_status);
3501b248f14SClaudio Fontana         val = -32768;
3511b248f14SClaudio Fontana     }
3521b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3531b248f14SClaudio Fontana     return val;
3541b248f14SClaudio Fontana }
3551b248f14SClaudio Fontana 
3561b248f14SClaudio Fontana int32_t helper_fisttl_ST0(CPUX86State *env)
3571b248f14SClaudio Fontana {
3581b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3591b248f14SClaudio Fontana     int32_t val;
3601b248f14SClaudio Fontana 
3611b248f14SClaudio Fontana     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
3621b248f14SClaudio Fontana     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
3631b248f14SClaudio Fontana         val = 0x80000000;
3641b248f14SClaudio Fontana     }
3651b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3661b248f14SClaudio Fontana     return val;
3671b248f14SClaudio Fontana }
3681b248f14SClaudio Fontana 
3691b248f14SClaudio Fontana int64_t helper_fisttll_ST0(CPUX86State *env)
3701b248f14SClaudio Fontana {
3711b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3721b248f14SClaudio Fontana     int64_t val;
3731b248f14SClaudio Fontana 
3741b248f14SClaudio Fontana     val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
3751b248f14SClaudio Fontana     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
3761b248f14SClaudio Fontana         val = 0x8000000000000000ULL;
3771b248f14SClaudio Fontana     }
3781b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3791b248f14SClaudio Fontana     return val;
3801b248f14SClaudio Fontana }
3811b248f14SClaudio Fontana 
3821b248f14SClaudio Fontana void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
3831b248f14SClaudio Fontana {
3841b248f14SClaudio Fontana     int new_fpstt;
385d3e8b648SRichard Henderson     X86Access ac;
386d3e8b648SRichard Henderson 
387d3e8b648SRichard Henderson     access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
3881b248f14SClaudio Fontana 
3891b248f14SClaudio Fontana     new_fpstt = (env->fpstt - 1) & 7;
390d3e8b648SRichard Henderson     env->fpregs[new_fpstt].d = do_fldt(&ac, ptr);
3911b248f14SClaudio Fontana     env->fpstt = new_fpstt;
3921b248f14SClaudio Fontana     env->fptags[new_fpstt] = 0; /* validate stack entry */
3931b248f14SClaudio Fontana }
3941b248f14SClaudio Fontana 
3951b248f14SClaudio Fontana void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
3961b248f14SClaudio Fontana {
397d3e8b648SRichard Henderson     X86Access ac;
398d3e8b648SRichard Henderson 
399d3e8b648SRichard Henderson     access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
400d3e8b648SRichard Henderson     do_fstt(&ac, ptr, ST0);
4011b248f14SClaudio Fontana }
4021b248f14SClaudio Fontana 
4031b248f14SClaudio Fontana void helper_fpush(CPUX86State *env)
4041b248f14SClaudio Fontana {
4051b248f14SClaudio Fontana     fpush(env);
4061b248f14SClaudio Fontana }
4071b248f14SClaudio Fontana 
4081b248f14SClaudio Fontana void helper_fpop(CPUX86State *env)
4091b248f14SClaudio Fontana {
4101b248f14SClaudio Fontana     fpop(env);
4111b248f14SClaudio Fontana }
4121b248f14SClaudio Fontana 
4131b248f14SClaudio Fontana void helper_fdecstp(CPUX86State *env)
4141b248f14SClaudio Fontana {
4151b248f14SClaudio Fontana     env->fpstt = (env->fpstt - 1) & 7;
4161b248f14SClaudio Fontana     env->fpus &= ~0x4700;
4171b248f14SClaudio Fontana }
4181b248f14SClaudio Fontana 
4191b248f14SClaudio Fontana void helper_fincstp(CPUX86State *env)
4201b248f14SClaudio Fontana {
4211b248f14SClaudio Fontana     env->fpstt = (env->fpstt + 1) & 7;
4221b248f14SClaudio Fontana     env->fpus &= ~0x4700;
4231b248f14SClaudio Fontana }
4241b248f14SClaudio Fontana 
4251b248f14SClaudio Fontana /* FPU move */
4261b248f14SClaudio Fontana 
4271b248f14SClaudio Fontana void helper_ffree_STN(CPUX86State *env, int st_index)
4281b248f14SClaudio Fontana {
4291b248f14SClaudio Fontana     env->fptags[(env->fpstt + st_index) & 7] = 1;
4301b248f14SClaudio Fontana }
4311b248f14SClaudio Fontana 
4321b248f14SClaudio Fontana void helper_fmov_ST0_FT0(CPUX86State *env)
4331b248f14SClaudio Fontana {
4341b248f14SClaudio Fontana     ST0 = FT0;
4351b248f14SClaudio Fontana }
4361b248f14SClaudio Fontana 
4371b248f14SClaudio Fontana void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
4381b248f14SClaudio Fontana {
4391b248f14SClaudio Fontana     FT0 = ST(st_index);
4401b248f14SClaudio Fontana }
4411b248f14SClaudio Fontana 
4421b248f14SClaudio Fontana void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
4431b248f14SClaudio Fontana {
4441b248f14SClaudio Fontana     ST0 = ST(st_index);
4451b248f14SClaudio Fontana }
4461b248f14SClaudio Fontana 
4471b248f14SClaudio Fontana void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
4481b248f14SClaudio Fontana {
4491b248f14SClaudio Fontana     ST(st_index) = ST0;
4501b248f14SClaudio Fontana }
4511b248f14SClaudio Fontana 
4521b248f14SClaudio Fontana void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
4531b248f14SClaudio Fontana {
4541b248f14SClaudio Fontana     floatx80 tmp;
4551b248f14SClaudio Fontana 
4561b248f14SClaudio Fontana     tmp = ST(st_index);
4571b248f14SClaudio Fontana     ST(st_index) = ST0;
4581b248f14SClaudio Fontana     ST0 = tmp;
4591b248f14SClaudio Fontana }
4601b248f14SClaudio Fontana 
4611b248f14SClaudio Fontana /* FPU operations */
4621b248f14SClaudio Fontana 
4631b248f14SClaudio Fontana static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
4641b248f14SClaudio Fontana 
4651b248f14SClaudio Fontana void helper_fcom_ST0_FT0(CPUX86State *env)
4661b248f14SClaudio Fontana {
4671b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
4681b248f14SClaudio Fontana     FloatRelation ret;
4691b248f14SClaudio Fontana 
4701b248f14SClaudio Fontana     ret = floatx80_compare(ST0, FT0, &env->fp_status);
4711b248f14SClaudio Fontana     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
4721b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
4731b248f14SClaudio Fontana }
4741b248f14SClaudio Fontana 
4751b248f14SClaudio Fontana void helper_fucom_ST0_FT0(CPUX86State *env)
4761b248f14SClaudio Fontana {
4771b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
4781b248f14SClaudio Fontana     FloatRelation ret;
4791b248f14SClaudio Fontana 
4801b248f14SClaudio Fontana     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
4811b248f14SClaudio Fontana     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
4821b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
4831b248f14SClaudio Fontana }
4841b248f14SClaudio Fontana 
4851b248f14SClaudio Fontana static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
4861b248f14SClaudio Fontana 
4871b248f14SClaudio Fontana void helper_fcomi_ST0_FT0(CPUX86State *env)
4881b248f14SClaudio Fontana {
4891b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
4901b248f14SClaudio Fontana     int eflags;
4911b248f14SClaudio Fontana     FloatRelation ret;
4921b248f14SClaudio Fontana 
4931b248f14SClaudio Fontana     ret = floatx80_compare(ST0, FT0, &env->fp_status);
4942455e9cfSPaolo Bonzini     eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C);
4952455e9cfSPaolo Bonzini     CC_SRC = eflags | fcomi_ccval[ret + 1];
496abdcc5c8SPaolo Bonzini     CC_OP = CC_OP_EFLAGS;
4971b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
4981b248f14SClaudio Fontana }
4991b248f14SClaudio Fontana 
5001b248f14SClaudio Fontana void helper_fucomi_ST0_FT0(CPUX86State *env)
5011b248f14SClaudio Fontana {
5021b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5031b248f14SClaudio Fontana     int eflags;
5041b248f14SClaudio Fontana     FloatRelation ret;
5051b248f14SClaudio Fontana 
5061b248f14SClaudio Fontana     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
5072455e9cfSPaolo Bonzini     eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C);
5082455e9cfSPaolo Bonzini     CC_SRC = eflags | fcomi_ccval[ret + 1];
509abdcc5c8SPaolo Bonzini     CC_OP = CC_OP_EFLAGS;
5101b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5111b248f14SClaudio Fontana }
5121b248f14SClaudio Fontana 
5131b248f14SClaudio Fontana void helper_fadd_ST0_FT0(CPUX86State *env)
5141b248f14SClaudio Fontana {
5151b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5161b248f14SClaudio Fontana     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
5171b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5181b248f14SClaudio Fontana }
5191b248f14SClaudio Fontana 
5201b248f14SClaudio Fontana void helper_fmul_ST0_FT0(CPUX86State *env)
5211b248f14SClaudio Fontana {
5221b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5231b248f14SClaudio Fontana     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
5241b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5251b248f14SClaudio Fontana }
5261b248f14SClaudio Fontana 
5271b248f14SClaudio Fontana void helper_fsub_ST0_FT0(CPUX86State *env)
5281b248f14SClaudio Fontana {
5291b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5301b248f14SClaudio Fontana     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
5311b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5321b248f14SClaudio Fontana }
5331b248f14SClaudio Fontana 
5341b248f14SClaudio Fontana void helper_fsubr_ST0_FT0(CPUX86State *env)
5351b248f14SClaudio Fontana {
5361b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5371b248f14SClaudio Fontana     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
5381b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5391b248f14SClaudio Fontana }
5401b248f14SClaudio Fontana 
5411b248f14SClaudio Fontana void helper_fdiv_ST0_FT0(CPUX86State *env)
5421b248f14SClaudio Fontana {
5431b248f14SClaudio Fontana     ST0 = helper_fdiv(env, ST0, FT0);
5441b248f14SClaudio Fontana }
5451b248f14SClaudio Fontana 
5461b248f14SClaudio Fontana void helper_fdivr_ST0_FT0(CPUX86State *env)
5471b248f14SClaudio Fontana {
5481b248f14SClaudio Fontana     ST0 = helper_fdiv(env, FT0, ST0);
5491b248f14SClaudio Fontana }
5501b248f14SClaudio Fontana 
5511b248f14SClaudio Fontana /* fp operations between STN and ST0 */
5521b248f14SClaudio Fontana 
5531b248f14SClaudio Fontana void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
5541b248f14SClaudio Fontana {
5551b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5561b248f14SClaudio Fontana     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
5571b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5581b248f14SClaudio Fontana }
5591b248f14SClaudio Fontana 
5601b248f14SClaudio Fontana void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
5611b248f14SClaudio Fontana {
5621b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5631b248f14SClaudio Fontana     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
5641b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5651b248f14SClaudio Fontana }
5661b248f14SClaudio Fontana 
5671b248f14SClaudio Fontana void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
5681b248f14SClaudio Fontana {
5691b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5701b248f14SClaudio Fontana     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
5711b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5721b248f14SClaudio Fontana }
5731b248f14SClaudio Fontana 
5741b248f14SClaudio Fontana void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
5751b248f14SClaudio Fontana {
5761b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5771b248f14SClaudio Fontana     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
5781b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5791b248f14SClaudio Fontana }
5801b248f14SClaudio Fontana 
5811b248f14SClaudio Fontana void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
5821b248f14SClaudio Fontana {
5831b248f14SClaudio Fontana     floatx80 *p;
5841b248f14SClaudio Fontana 
5851b248f14SClaudio Fontana     p = &ST(st_index);
5861b248f14SClaudio Fontana     *p = helper_fdiv(env, *p, ST0);
5871b248f14SClaudio Fontana }
5881b248f14SClaudio Fontana 
5891b248f14SClaudio Fontana void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
5901b248f14SClaudio Fontana {
5911b248f14SClaudio Fontana     floatx80 *p;
5921b248f14SClaudio Fontana 
5931b248f14SClaudio Fontana     p = &ST(st_index);
5941b248f14SClaudio Fontana     *p = helper_fdiv(env, ST0, *p);
5951b248f14SClaudio Fontana }
5961b248f14SClaudio Fontana 
5971b248f14SClaudio Fontana /* misc FPU operations */
5981b248f14SClaudio Fontana void helper_fchs_ST0(CPUX86State *env)
5991b248f14SClaudio Fontana {
6001b248f14SClaudio Fontana     ST0 = floatx80_chs(ST0);
6011b248f14SClaudio Fontana }
6021b248f14SClaudio Fontana 
6031b248f14SClaudio Fontana void helper_fabs_ST0(CPUX86State *env)
6041b248f14SClaudio Fontana {
6051b248f14SClaudio Fontana     ST0 = floatx80_abs(ST0);
6061b248f14SClaudio Fontana }
6071b248f14SClaudio Fontana 
6081b248f14SClaudio Fontana void helper_fld1_ST0(CPUX86State *env)
6091b248f14SClaudio Fontana {
6101b248f14SClaudio Fontana     ST0 = floatx80_one;
6111b248f14SClaudio Fontana }
6121b248f14SClaudio Fontana 
6131b248f14SClaudio Fontana void helper_fldl2t_ST0(CPUX86State *env)
6141b248f14SClaudio Fontana {
6151b248f14SClaudio Fontana     switch (env->fpuc & FPU_RC_MASK) {
6161b248f14SClaudio Fontana     case FPU_RC_UP:
6171b248f14SClaudio Fontana         ST0 = floatx80_l2t_u;
6181b248f14SClaudio Fontana         break;
6191b248f14SClaudio Fontana     default:
6201b248f14SClaudio Fontana         ST0 = floatx80_l2t;
6211b248f14SClaudio Fontana         break;
6221b248f14SClaudio Fontana     }
6231b248f14SClaudio Fontana }
6241b248f14SClaudio Fontana 
6251b248f14SClaudio Fontana void helper_fldl2e_ST0(CPUX86State *env)
6261b248f14SClaudio Fontana {
6271b248f14SClaudio Fontana     switch (env->fpuc & FPU_RC_MASK) {
6281b248f14SClaudio Fontana     case FPU_RC_DOWN:
6291b248f14SClaudio Fontana     case FPU_RC_CHOP:
6301b248f14SClaudio Fontana         ST0 = floatx80_l2e_d;
6311b248f14SClaudio Fontana         break;
6321b248f14SClaudio Fontana     default:
6331b248f14SClaudio Fontana         ST0 = floatx80_l2e;
6341b248f14SClaudio Fontana         break;
6351b248f14SClaudio Fontana     }
6361b248f14SClaudio Fontana }
6371b248f14SClaudio Fontana 
6381b248f14SClaudio Fontana void helper_fldpi_ST0(CPUX86State *env)
6391b248f14SClaudio Fontana {
6401b248f14SClaudio Fontana     switch (env->fpuc & FPU_RC_MASK) {
6411b248f14SClaudio Fontana     case FPU_RC_DOWN:
6421b248f14SClaudio Fontana     case FPU_RC_CHOP:
6431b248f14SClaudio Fontana         ST0 = floatx80_pi_d;
6441b248f14SClaudio Fontana         break;
6451b248f14SClaudio Fontana     default:
6461b248f14SClaudio Fontana         ST0 = floatx80_pi;
6471b248f14SClaudio Fontana         break;
6481b248f14SClaudio Fontana     }
6491b248f14SClaudio Fontana }
6501b248f14SClaudio Fontana 
6511b248f14SClaudio Fontana void helper_fldlg2_ST0(CPUX86State *env)
6521b248f14SClaudio Fontana {
6531b248f14SClaudio Fontana     switch (env->fpuc & FPU_RC_MASK) {
6541b248f14SClaudio Fontana     case FPU_RC_DOWN:
6551b248f14SClaudio Fontana     case FPU_RC_CHOP:
6561b248f14SClaudio Fontana         ST0 = floatx80_lg2_d;
6571b248f14SClaudio Fontana         break;
6581b248f14SClaudio Fontana     default:
6591b248f14SClaudio Fontana         ST0 = floatx80_lg2;
6601b248f14SClaudio Fontana         break;
6611b248f14SClaudio Fontana     }
6621b248f14SClaudio Fontana }
6631b248f14SClaudio Fontana 
6641b248f14SClaudio Fontana void helper_fldln2_ST0(CPUX86State *env)
6651b248f14SClaudio Fontana {
6661b248f14SClaudio Fontana     switch (env->fpuc & FPU_RC_MASK) {
6671b248f14SClaudio Fontana     case FPU_RC_DOWN:
6681b248f14SClaudio Fontana     case FPU_RC_CHOP:
6691b248f14SClaudio Fontana         ST0 = floatx80_ln2_d;
6701b248f14SClaudio Fontana         break;
6711b248f14SClaudio Fontana     default:
6721b248f14SClaudio Fontana         ST0 = floatx80_ln2;
6731b248f14SClaudio Fontana         break;
6741b248f14SClaudio Fontana     }
6751b248f14SClaudio Fontana }
6761b248f14SClaudio Fontana 
6771b248f14SClaudio Fontana void helper_fldz_ST0(CPUX86State *env)
6781b248f14SClaudio Fontana {
6791b248f14SClaudio Fontana     ST0 = floatx80_zero;
6801b248f14SClaudio Fontana }
6811b248f14SClaudio Fontana 
6821b248f14SClaudio Fontana void helper_fldz_FT0(CPUX86State *env)
6831b248f14SClaudio Fontana {
6841b248f14SClaudio Fontana     FT0 = floatx80_zero;
6851b248f14SClaudio Fontana }
6861b248f14SClaudio Fontana 
6871b248f14SClaudio Fontana uint32_t helper_fnstsw(CPUX86State *env)
6881b248f14SClaudio Fontana {
6891b248f14SClaudio Fontana     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
6901b248f14SClaudio Fontana }
6911b248f14SClaudio Fontana 
6921b248f14SClaudio Fontana uint32_t helper_fnstcw(CPUX86State *env)
6931b248f14SClaudio Fontana {
6941b248f14SClaudio Fontana     return env->fpuc;
6951b248f14SClaudio Fontana }
6961b248f14SClaudio Fontana 
697314d3effSPaolo Bonzini static void set_x86_rounding_mode(unsigned mode, float_status *status)
698314d3effSPaolo Bonzini {
699314d3effSPaolo Bonzini     static FloatRoundMode x86_round_mode[4] = {
700314d3effSPaolo Bonzini         float_round_nearest_even,
701314d3effSPaolo Bonzini         float_round_down,
702314d3effSPaolo Bonzini         float_round_up,
703314d3effSPaolo Bonzini         float_round_to_zero
704314d3effSPaolo Bonzini     };
705314d3effSPaolo Bonzini     assert(mode < ARRAY_SIZE(x86_round_mode));
706314d3effSPaolo Bonzini     set_float_rounding_mode(x86_round_mode[mode], status);
707314d3effSPaolo Bonzini }
708314d3effSPaolo Bonzini 
7091b248f14SClaudio Fontana void update_fp_status(CPUX86State *env)
7101b248f14SClaudio Fontana {
711314d3effSPaolo Bonzini     int rnd_mode;
7128da5f1dbSRichard Henderson     FloatX80RoundPrec rnd_prec;
7131b248f14SClaudio Fontana 
7141b248f14SClaudio Fontana     /* set rounding mode */
715314d3effSPaolo Bonzini     rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT;
716314d3effSPaolo Bonzini     set_x86_rounding_mode(rnd_mode, &env->fp_status);
7178da5f1dbSRichard Henderson 
7181b248f14SClaudio Fontana     switch ((env->fpuc >> 8) & 3) {
7191b248f14SClaudio Fontana     case 0:
7208da5f1dbSRichard Henderson         rnd_prec = floatx80_precision_s;
7211b248f14SClaudio Fontana         break;
7221b248f14SClaudio Fontana     case 2:
7238da5f1dbSRichard Henderson         rnd_prec = floatx80_precision_d;
7241b248f14SClaudio Fontana         break;
7251b248f14SClaudio Fontana     case 3:
7261b248f14SClaudio Fontana     default:
7278da5f1dbSRichard Henderson         rnd_prec = floatx80_precision_x;
7281b248f14SClaudio Fontana         break;
7291b248f14SClaudio Fontana     }
7308da5f1dbSRichard Henderson     set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
7311b248f14SClaudio Fontana }
7321b248f14SClaudio Fontana 
7331b248f14SClaudio Fontana void helper_fldcw(CPUX86State *env, uint32_t val)
7341b248f14SClaudio Fontana {
7351b248f14SClaudio Fontana     cpu_set_fpuc(env, val);
7361b248f14SClaudio Fontana }
7371b248f14SClaudio Fontana 
7381b248f14SClaudio Fontana void helper_fclex(CPUX86State *env)
7391b248f14SClaudio Fontana {
7401b248f14SClaudio Fontana     env->fpus &= 0x7f00;
7411b248f14SClaudio Fontana }
7421b248f14SClaudio Fontana 
7431b248f14SClaudio Fontana void helper_fwait(CPUX86State *env)
7441b248f14SClaudio Fontana {
7451b248f14SClaudio Fontana     if (env->fpus & FPUS_SE) {
7461b248f14SClaudio Fontana         fpu_raise_exception(env, GETPC());
7471b248f14SClaudio Fontana     }
7481b248f14SClaudio Fontana }
7491b248f14SClaudio Fontana 
750bbdda9b7SRichard Henderson static void do_fninit(CPUX86State *env)
7511b248f14SClaudio Fontana {
7521b248f14SClaudio Fontana     env->fpus = 0;
7531b248f14SClaudio Fontana     env->fpstt = 0;
75484abdd7dSZiqiao Kong     env->fpcs = 0;
75584abdd7dSZiqiao Kong     env->fpds = 0;
75684abdd7dSZiqiao Kong     env->fpip = 0;
75784abdd7dSZiqiao Kong     env->fpdp = 0;
7581b248f14SClaudio Fontana     cpu_set_fpuc(env, 0x37f);
7591b248f14SClaudio Fontana     env->fptags[0] = 1;
7601b248f14SClaudio Fontana     env->fptags[1] = 1;
7611b248f14SClaudio Fontana     env->fptags[2] = 1;
7621b248f14SClaudio Fontana     env->fptags[3] = 1;
7631b248f14SClaudio Fontana     env->fptags[4] = 1;
7641b248f14SClaudio Fontana     env->fptags[5] = 1;
7651b248f14SClaudio Fontana     env->fptags[6] = 1;
7661b248f14SClaudio Fontana     env->fptags[7] = 1;
7671b248f14SClaudio Fontana }
7681b248f14SClaudio Fontana 
769bbdda9b7SRichard Henderson void helper_fninit(CPUX86State *env)
770bbdda9b7SRichard Henderson {
771bbdda9b7SRichard Henderson     do_fninit(env);
772bbdda9b7SRichard Henderson }
773bbdda9b7SRichard Henderson 
7741b248f14SClaudio Fontana /* BCD ops */
7751b248f14SClaudio Fontana 
7761b248f14SClaudio Fontana void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
7771b248f14SClaudio Fontana {
7784526f58aSRichard Henderson     X86Access ac;
7791b248f14SClaudio Fontana     floatx80 tmp;
7801b248f14SClaudio Fontana     uint64_t val;
7811b248f14SClaudio Fontana     unsigned int v;
7821b248f14SClaudio Fontana     int i;
7831b248f14SClaudio Fontana 
7844526f58aSRichard Henderson     access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
7854526f58aSRichard Henderson 
7861b248f14SClaudio Fontana     val = 0;
7871b248f14SClaudio Fontana     for (i = 8; i >= 0; i--) {
7884526f58aSRichard Henderson         v = access_ldb(&ac, ptr + i);
7891b248f14SClaudio Fontana         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
7901b248f14SClaudio Fontana     }
7911b248f14SClaudio Fontana     tmp = int64_to_floatx80(val, &env->fp_status);
7924526f58aSRichard Henderson     if (access_ldb(&ac, ptr + 9) & 0x80) {
7931b248f14SClaudio Fontana         tmp = floatx80_chs(tmp);
7941b248f14SClaudio Fontana     }
7951b248f14SClaudio Fontana     fpush(env);
7961b248f14SClaudio Fontana     ST0 = tmp;
7971b248f14SClaudio Fontana }
7981b248f14SClaudio Fontana 
7991b248f14SClaudio Fontana void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
8001b248f14SClaudio Fontana {
8011b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
8021b248f14SClaudio Fontana     int v;
8031b248f14SClaudio Fontana     target_ulong mem_ref, mem_end;
8041b248f14SClaudio Fontana     int64_t val;
8051b248f14SClaudio Fontana     CPU_LDoubleU temp;
8064526f58aSRichard Henderson     X86Access ac;
8071b248f14SClaudio Fontana 
8084526f58aSRichard Henderson     access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
8091b248f14SClaudio Fontana     temp.d = ST0;
8101b248f14SClaudio Fontana 
8111b248f14SClaudio Fontana     val = floatx80_to_int64(ST0, &env->fp_status);
8121b248f14SClaudio Fontana     mem_ref = ptr;
8131b248f14SClaudio Fontana     if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
8141b248f14SClaudio Fontana         set_float_exception_flags(float_flag_invalid, &env->fp_status);
8151b248f14SClaudio Fontana         while (mem_ref < ptr + 7) {
8164526f58aSRichard Henderson             access_stb(&ac, mem_ref++, 0);
8171b248f14SClaudio Fontana         }
8184526f58aSRichard Henderson         access_stb(&ac, mem_ref++, 0xc0);
8194526f58aSRichard Henderson         access_stb(&ac, mem_ref++, 0xff);
8204526f58aSRichard Henderson         access_stb(&ac, mem_ref++, 0xff);
8211b248f14SClaudio Fontana         merge_exception_flags(env, old_flags);
8221b248f14SClaudio Fontana         return;
8231b248f14SClaudio Fontana     }
8241b248f14SClaudio Fontana     mem_end = mem_ref + 9;
8251b248f14SClaudio Fontana     if (SIGND(temp)) {
8264526f58aSRichard Henderson         access_stb(&ac, mem_end, 0x80);
8271b248f14SClaudio Fontana         val = -val;
8281b248f14SClaudio Fontana     } else {
8294526f58aSRichard Henderson         access_stb(&ac, mem_end, 0x00);
8301b248f14SClaudio Fontana     }
8311b248f14SClaudio Fontana     while (mem_ref < mem_end) {
8321b248f14SClaudio Fontana         if (val == 0) {
8331b248f14SClaudio Fontana             break;
8341b248f14SClaudio Fontana         }
8351b248f14SClaudio Fontana         v = val % 100;
8361b248f14SClaudio Fontana         val = val / 100;
8371b248f14SClaudio Fontana         v = ((v / 10) << 4) | (v % 10);
8384526f58aSRichard Henderson         access_stb(&ac, mem_ref++, v);
8391b248f14SClaudio Fontana     }
8401b248f14SClaudio Fontana     while (mem_ref < mem_end) {
8414526f58aSRichard Henderson         access_stb(&ac, mem_ref++, 0);
8421b248f14SClaudio Fontana     }
8431b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
8441b248f14SClaudio Fontana }
8451b248f14SClaudio Fontana 
8461b248f14SClaudio Fontana /* 128-bit significand of log(2).  */
8471b248f14SClaudio Fontana #define ln2_sig_high 0xb17217f7d1cf79abULL
8481b248f14SClaudio Fontana #define ln2_sig_low 0xc9e3b39803f2f6afULL
8491b248f14SClaudio Fontana 
8501b248f14SClaudio Fontana /*
8511b248f14SClaudio Fontana  * Polynomial coefficients for an approximation to (2^x - 1) / x, on
8521b248f14SClaudio Fontana  * the interval [-1/64, 1/64].
8531b248f14SClaudio Fontana  */
8541b248f14SClaudio Fontana #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
8551b248f14SClaudio Fontana #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
8561b248f14SClaudio Fontana #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
8571b248f14SClaudio Fontana #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
8581b248f14SClaudio Fontana #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
8591b248f14SClaudio Fontana #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
8601b248f14SClaudio Fontana #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
8611b248f14SClaudio Fontana #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
8621b248f14SClaudio Fontana #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
8631b248f14SClaudio Fontana 
8641b248f14SClaudio Fontana struct f2xm1_data {
8651b248f14SClaudio Fontana     /*
8661b248f14SClaudio Fontana      * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
8671b248f14SClaudio Fontana      * are very close to exact floatx80 values.
8681b248f14SClaudio Fontana      */
8691b248f14SClaudio Fontana     floatx80 t;
8701b248f14SClaudio Fontana     /* The value of 2^t.  */
8711b248f14SClaudio Fontana     floatx80 exp2;
8721b248f14SClaudio Fontana     /* The value of 2^t - 1.  */
8731b248f14SClaudio Fontana     floatx80 exp2m1;
8741b248f14SClaudio Fontana };
8751b248f14SClaudio Fontana 
8761b248f14SClaudio Fontana static const struct f2xm1_data f2xm1_table[65] = {
8771b248f14SClaudio Fontana     { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
8781b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
8791b248f14SClaudio Fontana       make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
8801b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
8811b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
8821b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
8831b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
8841b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
8851b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
8861b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
8871b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
8881b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
8891b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
8901b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
8911b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
8921b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
8931b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
8941b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
8951b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
8961b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
8971b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
8981b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
8991b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
9001b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
9011b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
9021b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
9031b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
9041b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
9051b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
9061b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
9071b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
9081b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
9091b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
9101b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
9111b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
9121b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
9131b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
9141b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
9151b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
9161b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
9171b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
9181b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
9191b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
9201b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
9211b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
9221b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
9231b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
9241b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
9251b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0x800000000000227dULL),
9261b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
9271b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
9281b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
9291b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
9301b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
9311b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
9321b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
9331b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
9341b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
9351b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
9361b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
9371b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
9381b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
9391b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
9401b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
9411b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
9421b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
9431b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
9441b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
9451b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
9461b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
9471b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
9481b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
9491b248f14SClaudio Fontana     { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
9501b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
9511b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
9521b248f14SClaudio Fontana     { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
9531b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
9541b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
9551b248f14SClaudio Fontana     { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
9561b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
9571b248f14SClaudio Fontana       make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
9581b248f14SClaudio Fontana     { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
9591b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
9601b248f14SClaudio Fontana       make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
9611b248f14SClaudio Fontana     { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
9621b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
9631b248f14SClaudio Fontana       make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
9641b248f14SClaudio Fontana     { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
9651b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
9661b248f14SClaudio Fontana       make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
9671b248f14SClaudio Fontana     { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
9681b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
9691b248f14SClaudio Fontana       make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
9701b248f14SClaudio Fontana     { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
9711b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
9721b248f14SClaudio Fontana       make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
9731b248f14SClaudio Fontana     { floatx80_zero_init,
9741b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x8000000000000000ULL),
9751b248f14SClaudio Fontana       floatx80_zero_init },
9761b248f14SClaudio Fontana     { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
9771b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
9781b248f14SClaudio Fontana       make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
9791b248f14SClaudio Fontana     { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
9801b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
9811b248f14SClaudio Fontana       make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
9821b248f14SClaudio Fontana     { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
9831b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
9841b248f14SClaudio Fontana       make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
9851b248f14SClaudio Fontana     { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
9861b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
9871b248f14SClaudio Fontana       make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
9881b248f14SClaudio Fontana     { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
9891b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
9901b248f14SClaudio Fontana       make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
9911b248f14SClaudio Fontana     { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
9921b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
9931b248f14SClaudio Fontana       make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
9941b248f14SClaudio Fontana     { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
9951b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
9961b248f14SClaudio Fontana       make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
9971b248f14SClaudio Fontana     { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
9981b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
9991b248f14SClaudio Fontana       make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
10001b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
10011b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
10021b248f14SClaudio Fontana       make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
10031b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
10041b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
10051b248f14SClaudio Fontana       make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
10061b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
10071b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
10081b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
10091b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
10101b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
10111b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
10121b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
10131b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
10141b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
10151b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
10161b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
10171b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
10181b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
10191b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
10201b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
10211b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
10221b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
10231b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
10241b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
10251b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
10261b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
10271b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
10281b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
10291b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
10301b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
10311b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
10321b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
10331b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
10341b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
10351b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
10361b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
10371b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
10381b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
10391b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
10401b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
10411b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
10421b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
10431b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
10441b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
10451b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
10461b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
10471b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
10481b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
10491b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
10501b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
10511b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
10521b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
10531b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
10541b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
10551b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
10561b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
10571b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
10581b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
10591b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
10601b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
10611b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
10621b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
10631b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
10641b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
10651b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
10661b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
10671b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
10681b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
10691b248f14SClaudio Fontana     { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
10701b248f14SClaudio Fontana       make_floatx80_init(0x4000, 0x8000000000000000ULL),
10711b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
10721b248f14SClaudio Fontana };
10731b248f14SClaudio Fontana 
10741b248f14SClaudio Fontana void helper_f2xm1(CPUX86State *env)
10751b248f14SClaudio Fontana {
10761b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
10771b248f14SClaudio Fontana     uint64_t sig = extractFloatx80Frac(ST0);
10781b248f14SClaudio Fontana     int32_t exp = extractFloatx80Exp(ST0);
10791b248f14SClaudio Fontana     bool sign = extractFloatx80Sign(ST0);
10801b248f14SClaudio Fontana 
10811b248f14SClaudio Fontana     if (floatx80_invalid_encoding(ST0)) {
10821b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
10831b248f14SClaudio Fontana         ST0 = floatx80_default_nan(&env->fp_status);
10841b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST0)) {
10851b248f14SClaudio Fontana         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
10861b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
10871b248f14SClaudio Fontana             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
10881b248f14SClaudio Fontana         }
10891b248f14SClaudio Fontana     } else if (exp > 0x3fff ||
10901b248f14SClaudio Fontana                (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
10911b248f14SClaudio Fontana         /* Out of range for the instruction, treat as invalid.  */
10921b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
10931b248f14SClaudio Fontana         ST0 = floatx80_default_nan(&env->fp_status);
10941b248f14SClaudio Fontana     } else if (exp == 0x3fff) {
10951b248f14SClaudio Fontana         /* Argument 1 or -1, exact result 1 or -0.5.  */
10961b248f14SClaudio Fontana         if (sign) {
10971b248f14SClaudio Fontana             ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
10981b248f14SClaudio Fontana         }
10991b248f14SClaudio Fontana     } else if (exp < 0x3fb0) {
11001b248f14SClaudio Fontana         if (!floatx80_is_zero(ST0)) {
11011b248f14SClaudio Fontana             /*
11021b248f14SClaudio Fontana              * Multiplying the argument by an extra-precision version
11031b248f14SClaudio Fontana              * of log(2) is sufficiently precise.  Zero arguments are
11041b248f14SClaudio Fontana              * returned unchanged.
11051b248f14SClaudio Fontana              */
11061b248f14SClaudio Fontana             uint64_t sig0, sig1, sig2;
11071b248f14SClaudio Fontana             if (exp == 0) {
11081b248f14SClaudio Fontana                 normalizeFloatx80Subnormal(sig, &exp, &sig);
11091b248f14SClaudio Fontana             }
11101b248f14SClaudio Fontana             mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
11111b248f14SClaudio Fontana                             &sig2);
11121b248f14SClaudio Fontana             /* This result is inexact.  */
11131b248f14SClaudio Fontana             sig1 |= 1;
11148da5f1dbSRichard Henderson             ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
11158da5f1dbSRichard Henderson                                                 sign, exp, sig0, sig1,
11161b248f14SClaudio Fontana                                                 &env->fp_status);
11171b248f14SClaudio Fontana         }
11181b248f14SClaudio Fontana     } else {
11191b248f14SClaudio Fontana         floatx80 tmp, y, accum;
11201b248f14SClaudio Fontana         bool asign, bsign;
11211b248f14SClaudio Fontana         int32_t n, aexp, bexp;
11221b248f14SClaudio Fontana         uint64_t asig0, asig1, asig2, bsig0, bsig1;
11231b248f14SClaudio Fontana         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
11248da5f1dbSRichard Henderson         FloatX80RoundPrec save_prec =
11258da5f1dbSRichard Henderson             env->fp_status.floatx80_rounding_precision;
11261b248f14SClaudio Fontana         env->fp_status.float_rounding_mode = float_round_nearest_even;
11278da5f1dbSRichard Henderson         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
11281b248f14SClaudio Fontana 
11291b248f14SClaudio Fontana         /* Find the nearest multiple of 1/32 to the argument.  */
11301b248f14SClaudio Fontana         tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
11311b248f14SClaudio Fontana         n = 32 + floatx80_to_int32(tmp, &env->fp_status);
11321b248f14SClaudio Fontana         y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
11331b248f14SClaudio Fontana 
11341b248f14SClaudio Fontana         if (floatx80_is_zero(y)) {
11351b248f14SClaudio Fontana             /*
11361b248f14SClaudio Fontana              * Use the value of 2^t - 1 from the table, to avoid
11371b248f14SClaudio Fontana              * needing to special-case zero as a result of
11381b248f14SClaudio Fontana              * multiplication below.
11391b248f14SClaudio Fontana              */
11401b248f14SClaudio Fontana             ST0 = f2xm1_table[n].t;
11411b248f14SClaudio Fontana             set_float_exception_flags(float_flag_inexact, &env->fp_status);
11421b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = save_mode;
11431b248f14SClaudio Fontana         } else {
11441b248f14SClaudio Fontana             /*
11451b248f14SClaudio Fontana              * Compute the lower parts of a polynomial expansion for
11461b248f14SClaudio Fontana              * (2^y - 1) / y.
11471b248f14SClaudio Fontana              */
11481b248f14SClaudio Fontana             accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
11491b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
11501b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11511b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
11521b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11531b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
11541b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11551b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
11561b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11571b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
11581b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11591b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
11601b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11611b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
11621b248f14SClaudio Fontana 
11631b248f14SClaudio Fontana             /*
11641b248f14SClaudio Fontana              * The full polynomial expansion is f2xm1_coeff_0 + accum
11651b248f14SClaudio Fontana              * (where accum has much lower magnitude, and so, in
11661b248f14SClaudio Fontana              * particular, carry out of the addition is not possible).
11671b248f14SClaudio Fontana              * (This expansion is only accurate to about 70 bits, not
11681b248f14SClaudio Fontana              * 128 bits.)
11691b248f14SClaudio Fontana              */
11701b248f14SClaudio Fontana             aexp = extractFloatx80Exp(f2xm1_coeff_0);
11711b248f14SClaudio Fontana             asign = extractFloatx80Sign(f2xm1_coeff_0);
11721b248f14SClaudio Fontana             shift128RightJamming(extractFloatx80Frac(accum), 0,
11731b248f14SClaudio Fontana                                  aexp - extractFloatx80Exp(accum),
11741b248f14SClaudio Fontana                                  &asig0, &asig1);
11751b248f14SClaudio Fontana             bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
11761b248f14SClaudio Fontana             bsig1 = 0;
11771b248f14SClaudio Fontana             if (asign == extractFloatx80Sign(accum)) {
11781b248f14SClaudio Fontana                 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
11791b248f14SClaudio Fontana             } else {
11801b248f14SClaudio Fontana                 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
11811b248f14SClaudio Fontana             }
11821b248f14SClaudio Fontana             /* And thus compute an approximation to 2^y - 1.  */
11831b248f14SClaudio Fontana             mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
11841b248f14SClaudio Fontana                             &asig0, &asig1, &asig2);
11851b248f14SClaudio Fontana             aexp += extractFloatx80Exp(y) - 0x3ffe;
11861b248f14SClaudio Fontana             asign ^= extractFloatx80Sign(y);
11871b248f14SClaudio Fontana             if (n != 32) {
11881b248f14SClaudio Fontana                 /*
11891b248f14SClaudio Fontana                  * Multiply this by the precomputed value of 2^t and
11901b248f14SClaudio Fontana                  * add that of 2^t - 1.
11911b248f14SClaudio Fontana                  */
11921b248f14SClaudio Fontana                 mul128By64To192(asig0, asig1,
11931b248f14SClaudio Fontana                                 extractFloatx80Frac(f2xm1_table[n].exp2),
11941b248f14SClaudio Fontana                                 &asig0, &asig1, &asig2);
11951b248f14SClaudio Fontana                 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
11961b248f14SClaudio Fontana                 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
11971b248f14SClaudio Fontana                 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
11981b248f14SClaudio Fontana                 bsig1 = 0;
11991b248f14SClaudio Fontana                 if (bexp < aexp) {
12001b248f14SClaudio Fontana                     shift128RightJamming(bsig0, bsig1, aexp - bexp,
12011b248f14SClaudio Fontana                                          &bsig0, &bsig1);
12021b248f14SClaudio Fontana                 } else if (aexp < bexp) {
12031b248f14SClaudio Fontana                     shift128RightJamming(asig0, asig1, bexp - aexp,
12041b248f14SClaudio Fontana                                          &asig0, &asig1);
12051b248f14SClaudio Fontana                     aexp = bexp;
12061b248f14SClaudio Fontana                 }
12071b248f14SClaudio Fontana                 /* The sign of 2^t - 1 is always that of the result.  */
12081b248f14SClaudio Fontana                 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
12091b248f14SClaudio Fontana                 if (asign == bsign) {
12101b248f14SClaudio Fontana                     /* Avoid possible carry out of the addition.  */
12111b248f14SClaudio Fontana                     shift128RightJamming(asig0, asig1, 1,
12121b248f14SClaudio Fontana                                          &asig0, &asig1);
12131b248f14SClaudio Fontana                     shift128RightJamming(bsig0, bsig1, 1,
12141b248f14SClaudio Fontana                                          &bsig0, &bsig1);
12151b248f14SClaudio Fontana                     ++aexp;
12161b248f14SClaudio Fontana                     add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
12171b248f14SClaudio Fontana                 } else {
12181b248f14SClaudio Fontana                     sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
12191b248f14SClaudio Fontana                     asign = bsign;
12201b248f14SClaudio Fontana                 }
12211b248f14SClaudio Fontana             }
12221b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = save_mode;
12231b248f14SClaudio Fontana             /* This result is inexact.  */
12241b248f14SClaudio Fontana             asig1 |= 1;
12258da5f1dbSRichard Henderson             ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
12268da5f1dbSRichard Henderson                                                 asign, aexp, asig0, asig1,
12271b248f14SClaudio Fontana                                                 &env->fp_status);
12281b248f14SClaudio Fontana         }
12291b248f14SClaudio Fontana 
12301b248f14SClaudio Fontana         env->fp_status.floatx80_rounding_precision = save_prec;
12311b248f14SClaudio Fontana     }
12321b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
12331b248f14SClaudio Fontana }
12341b248f14SClaudio Fontana 
12351b248f14SClaudio Fontana void helper_fptan(CPUX86State *env)
12361b248f14SClaudio Fontana {
12371b248f14SClaudio Fontana     double fptemp = floatx80_to_double(env, ST0);
12381b248f14SClaudio Fontana 
12391b248f14SClaudio Fontana     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
12401b248f14SClaudio Fontana         env->fpus |= 0x400;
12411b248f14SClaudio Fontana     } else {
12421b248f14SClaudio Fontana         fptemp = tan(fptemp);
12431b248f14SClaudio Fontana         ST0 = double_to_floatx80(env, fptemp);
12441b248f14SClaudio Fontana         fpush(env);
12451b248f14SClaudio Fontana         ST0 = floatx80_one;
12461b248f14SClaudio Fontana         env->fpus &= ~0x400; /* C2 <-- 0 */
12471b248f14SClaudio Fontana         /* the above code is for |arg| < 2**52 only */
12481b248f14SClaudio Fontana     }
12491b248f14SClaudio Fontana }
12501b248f14SClaudio Fontana 
12511b248f14SClaudio Fontana /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision.  */
12521b248f14SClaudio Fontana #define pi_4_exp 0x3ffe
12531b248f14SClaudio Fontana #define pi_4_sig_high 0xc90fdaa22168c234ULL
12541b248f14SClaudio Fontana #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
12551b248f14SClaudio Fontana #define pi_2_exp 0x3fff
12561b248f14SClaudio Fontana #define pi_2_sig_high 0xc90fdaa22168c234ULL
12571b248f14SClaudio Fontana #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
12581b248f14SClaudio Fontana #define pi_34_exp 0x4000
12591b248f14SClaudio Fontana #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
12601b248f14SClaudio Fontana #define pi_34_sig_low 0x9394c9e8a0a5159dULL
12611b248f14SClaudio Fontana #define pi_exp 0x4000
12621b248f14SClaudio Fontana #define pi_sig_high 0xc90fdaa22168c234ULL
12631b248f14SClaudio Fontana #define pi_sig_low 0xc4c6628b80dc1cd1ULL
12641b248f14SClaudio Fontana 
12651b248f14SClaudio Fontana /*
12661b248f14SClaudio Fontana  * Polynomial coefficients for an approximation to atan(x), with only
12671b248f14SClaudio Fontana  * odd powers of x used, for x in the interval [-1/16, 1/16].  (Unlike
12681b248f14SClaudio Fontana  * for some other approximations, no low part is needed for the first
12691b248f14SClaudio Fontana  * coefficient here to achieve a sufficiently accurate result, because
12701b248f14SClaudio Fontana  * the coefficient in this minimax approximation is very close to
12711b248f14SClaudio Fontana  * exactly 1.)
12721b248f14SClaudio Fontana  */
12731b248f14SClaudio Fontana #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
12741b248f14SClaudio Fontana #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
12751b248f14SClaudio Fontana #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
12761b248f14SClaudio Fontana #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
12771b248f14SClaudio Fontana #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
12781b248f14SClaudio Fontana #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
12791b248f14SClaudio Fontana #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
12801b248f14SClaudio Fontana 
12811b248f14SClaudio Fontana struct fpatan_data {
12821b248f14SClaudio Fontana     /* High and low parts of atan(x).  */
12831b248f14SClaudio Fontana     floatx80 atan_high, atan_low;
12841b248f14SClaudio Fontana };
12851b248f14SClaudio Fontana 
12861b248f14SClaudio Fontana static const struct fpatan_data fpatan_table[9] = {
12871b248f14SClaudio Fontana     { floatx80_zero_init,
12881b248f14SClaudio Fontana       floatx80_zero_init },
12891b248f14SClaudio Fontana     { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
12901b248f14SClaudio Fontana       make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
12911b248f14SClaudio Fontana     { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
12921b248f14SClaudio Fontana       make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
12931b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
12941b248f14SClaudio Fontana       make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
12951b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
12961b248f14SClaudio Fontana       make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
12971b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
12981b248f14SClaudio Fontana       make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
12991b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
13001b248f14SClaudio Fontana       make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
13011b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
13021b248f14SClaudio Fontana       make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
13031b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
13041b248f14SClaudio Fontana       make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
13051b248f14SClaudio Fontana };
13061b248f14SClaudio Fontana 
13071b248f14SClaudio Fontana void helper_fpatan(CPUX86State *env)
13081b248f14SClaudio Fontana {
13091b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
13101b248f14SClaudio Fontana     uint64_t arg0_sig = extractFloatx80Frac(ST0);
13111b248f14SClaudio Fontana     int32_t arg0_exp = extractFloatx80Exp(ST0);
13121b248f14SClaudio Fontana     bool arg0_sign = extractFloatx80Sign(ST0);
13131b248f14SClaudio Fontana     uint64_t arg1_sig = extractFloatx80Frac(ST1);
13141b248f14SClaudio Fontana     int32_t arg1_exp = extractFloatx80Exp(ST1);
13151b248f14SClaudio Fontana     bool arg1_sign = extractFloatx80Sign(ST1);
13161b248f14SClaudio Fontana 
13171b248f14SClaudio Fontana     if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
13181b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
13191b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST0, &env->fp_status);
13201b248f14SClaudio Fontana     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
13211b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
13221b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
13231b248f14SClaudio Fontana     } else if (floatx80_invalid_encoding(ST0) ||
13241b248f14SClaudio Fontana                floatx80_invalid_encoding(ST1)) {
13251b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
13261b248f14SClaudio Fontana         ST1 = floatx80_default_nan(&env->fp_status);
13271b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST0)) {
13281b248f14SClaudio Fontana         ST1 = ST0;
13291b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST1)) {
13301b248f14SClaudio Fontana         /* Pass this NaN through.  */
13311b248f14SClaudio Fontana     } else if (floatx80_is_zero(ST1) && !arg0_sign) {
13321b248f14SClaudio Fontana         /* Pass this zero through.  */
13331b248f14SClaudio Fontana     } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
13341b248f14SClaudio Fontana                  arg0_exp - arg1_exp >= 80) &&
13351b248f14SClaudio Fontana                !arg0_sign) {
13361b248f14SClaudio Fontana         /*
13371b248f14SClaudio Fontana          * Dividing ST1 by ST0 gives the correct result up to
13381b248f14SClaudio Fontana          * rounding, and avoids spurious underflow exceptions that
13391b248f14SClaudio Fontana          * might result from passing some small values through the
13401b248f14SClaudio Fontana          * polynomial approximation, but if a finite nonzero result of
13411b248f14SClaudio Fontana          * division is exact, the result of fpatan is still inexact
13421b248f14SClaudio Fontana          * (and underflowing where appropriate).
13431b248f14SClaudio Fontana          */
13448da5f1dbSRichard Henderson         FloatX80RoundPrec save_prec =
13458da5f1dbSRichard Henderson             env->fp_status.floatx80_rounding_precision;
13468da5f1dbSRichard Henderson         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
13471b248f14SClaudio Fontana         ST1 = floatx80_div(ST1, ST0, &env->fp_status);
13481b248f14SClaudio Fontana         env->fp_status.floatx80_rounding_precision = save_prec;
13491b248f14SClaudio Fontana         if (!floatx80_is_zero(ST1) &&
13501b248f14SClaudio Fontana             !(get_float_exception_flags(&env->fp_status) &
13511b248f14SClaudio Fontana               float_flag_inexact)) {
13521b248f14SClaudio Fontana             /*
13531b248f14SClaudio Fontana              * The mathematical result is very slightly closer to zero
13541b248f14SClaudio Fontana              * than this exact result.  Round a value with the
13551b248f14SClaudio Fontana              * significand adjusted accordingly to get the correct
13561b248f14SClaudio Fontana              * exceptions, and possibly an adjusted result depending
13571b248f14SClaudio Fontana              * on the rounding mode.
13581b248f14SClaudio Fontana              */
13591b248f14SClaudio Fontana             uint64_t sig = extractFloatx80Frac(ST1);
13601b248f14SClaudio Fontana             int32_t exp = extractFloatx80Exp(ST1);
13611b248f14SClaudio Fontana             bool sign = extractFloatx80Sign(ST1);
13621b248f14SClaudio Fontana             if (exp == 0) {
13631b248f14SClaudio Fontana                 normalizeFloatx80Subnormal(sig, &exp, &sig);
13641b248f14SClaudio Fontana             }
13658da5f1dbSRichard Henderson             ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
13668da5f1dbSRichard Henderson                                                 sign, exp, sig - 1,
13671b248f14SClaudio Fontana                                                 -1, &env->fp_status);
13681b248f14SClaudio Fontana         }
13691b248f14SClaudio Fontana     } else {
13701b248f14SClaudio Fontana         /* The result is inexact.  */
13711b248f14SClaudio Fontana         bool rsign = arg1_sign;
13721b248f14SClaudio Fontana         int32_t rexp;
13731b248f14SClaudio Fontana         uint64_t rsig0, rsig1;
13741b248f14SClaudio Fontana         if (floatx80_is_zero(ST1)) {
13751b248f14SClaudio Fontana             /*
13761b248f14SClaudio Fontana              * ST0 is negative.  The result is pi with the sign of
13771b248f14SClaudio Fontana              * ST1.
13781b248f14SClaudio Fontana              */
13791b248f14SClaudio Fontana             rexp = pi_exp;
13801b248f14SClaudio Fontana             rsig0 = pi_sig_high;
13811b248f14SClaudio Fontana             rsig1 = pi_sig_low;
13821b248f14SClaudio Fontana         } else if (floatx80_is_infinity(ST1)) {
13831b248f14SClaudio Fontana             if (floatx80_is_infinity(ST0)) {
13841b248f14SClaudio Fontana                 if (arg0_sign) {
13851b248f14SClaudio Fontana                     rexp = pi_34_exp;
13861b248f14SClaudio Fontana                     rsig0 = pi_34_sig_high;
13871b248f14SClaudio Fontana                     rsig1 = pi_34_sig_low;
13881b248f14SClaudio Fontana                 } else {
13891b248f14SClaudio Fontana                     rexp = pi_4_exp;
13901b248f14SClaudio Fontana                     rsig0 = pi_4_sig_high;
13911b248f14SClaudio Fontana                     rsig1 = pi_4_sig_low;
13921b248f14SClaudio Fontana                 }
13931b248f14SClaudio Fontana             } else {
13941b248f14SClaudio Fontana                 rexp = pi_2_exp;
13951b248f14SClaudio Fontana                 rsig0 = pi_2_sig_high;
13961b248f14SClaudio Fontana                 rsig1 = pi_2_sig_low;
13971b248f14SClaudio Fontana             }
13981b248f14SClaudio Fontana         } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
13991b248f14SClaudio Fontana             rexp = pi_2_exp;
14001b248f14SClaudio Fontana             rsig0 = pi_2_sig_high;
14011b248f14SClaudio Fontana             rsig1 = pi_2_sig_low;
14021b248f14SClaudio Fontana         } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
14031b248f14SClaudio Fontana             /* ST0 is negative.  */
14041b248f14SClaudio Fontana             rexp = pi_exp;
14051b248f14SClaudio Fontana             rsig0 = pi_sig_high;
14061b248f14SClaudio Fontana             rsig1 = pi_sig_low;
14071b248f14SClaudio Fontana         } else {
14081b248f14SClaudio Fontana             /*
14091b248f14SClaudio Fontana              * ST0 and ST1 are finite, nonzero and with exponents not
14101b248f14SClaudio Fontana              * too far apart.
14111b248f14SClaudio Fontana              */
14121b248f14SClaudio Fontana             int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
14131b248f14SClaudio Fontana             int32_t azexp, axexp;
14141b248f14SClaudio Fontana             bool adj_sub, ysign, zsign;
14151b248f14SClaudio Fontana             uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
14161b248f14SClaudio Fontana             uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
14171b248f14SClaudio Fontana             uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
14181b248f14SClaudio Fontana             uint64_t azsig0, azsig1;
14191b248f14SClaudio Fontana             uint64_t azsig2, azsig3, axsig0, axsig1;
14201b248f14SClaudio Fontana             floatx80 x8;
14211b248f14SClaudio Fontana             FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
14228da5f1dbSRichard Henderson             FloatX80RoundPrec save_prec =
14238da5f1dbSRichard Henderson                 env->fp_status.floatx80_rounding_precision;
14241b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = float_round_nearest_even;
14258da5f1dbSRichard Henderson             env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
14261b248f14SClaudio Fontana 
14271b248f14SClaudio Fontana             if (arg0_exp == 0) {
14281b248f14SClaudio Fontana                 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
14291b248f14SClaudio Fontana             }
14301b248f14SClaudio Fontana             if (arg1_exp == 0) {
14311b248f14SClaudio Fontana                 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
14321b248f14SClaudio Fontana             }
14331b248f14SClaudio Fontana             if (arg0_exp > arg1_exp ||
14341b248f14SClaudio Fontana                 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
14351b248f14SClaudio Fontana                 /* Work with abs(ST1) / abs(ST0).  */
14361b248f14SClaudio Fontana                 num_exp = arg1_exp;
14371b248f14SClaudio Fontana                 num_sig = arg1_sig;
14381b248f14SClaudio Fontana                 den_exp = arg0_exp;
14391b248f14SClaudio Fontana                 den_sig = arg0_sig;
14401b248f14SClaudio Fontana                 if (arg0_sign) {
14411b248f14SClaudio Fontana                     /* The result is subtracted from pi.  */
14421b248f14SClaudio Fontana                     adj_exp = pi_exp;
14431b248f14SClaudio Fontana                     adj_sig0 = pi_sig_high;
14441b248f14SClaudio Fontana                     adj_sig1 = pi_sig_low;
14451b248f14SClaudio Fontana                     adj_sub = true;
14461b248f14SClaudio Fontana                 } else {
14471b248f14SClaudio Fontana                     /* The result is used as-is.  */
14481b248f14SClaudio Fontana                     adj_exp = 0;
14491b248f14SClaudio Fontana                     adj_sig0 = 0;
14501b248f14SClaudio Fontana                     adj_sig1 = 0;
14511b248f14SClaudio Fontana                     adj_sub = false;
14521b248f14SClaudio Fontana                 }
14531b248f14SClaudio Fontana             } else {
14541b248f14SClaudio Fontana                 /* Work with abs(ST0) / abs(ST1).  */
14551b248f14SClaudio Fontana                 num_exp = arg0_exp;
14561b248f14SClaudio Fontana                 num_sig = arg0_sig;
14571b248f14SClaudio Fontana                 den_exp = arg1_exp;
14581b248f14SClaudio Fontana                 den_sig = arg1_sig;
14591b248f14SClaudio Fontana                 /* The result is added to or subtracted from pi/2.  */
14601b248f14SClaudio Fontana                 adj_exp = pi_2_exp;
14611b248f14SClaudio Fontana                 adj_sig0 = pi_2_sig_high;
14621b248f14SClaudio Fontana                 adj_sig1 = pi_2_sig_low;
14631b248f14SClaudio Fontana                 adj_sub = !arg0_sign;
14641b248f14SClaudio Fontana             }
14651b248f14SClaudio Fontana 
14661b248f14SClaudio Fontana             /*
14671b248f14SClaudio Fontana              * Compute x = num/den, where 0 < x <= 1 and x is not too
14681b248f14SClaudio Fontana              * small.
14691b248f14SClaudio Fontana              */
14701b248f14SClaudio Fontana             xexp = num_exp - den_exp + 0x3ffe;
14711b248f14SClaudio Fontana             remsig0 = num_sig;
14721b248f14SClaudio Fontana             remsig1 = 0;
14731b248f14SClaudio Fontana             if (den_sig <= remsig0) {
14741b248f14SClaudio Fontana                 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
14751b248f14SClaudio Fontana                 ++xexp;
14761b248f14SClaudio Fontana             }
14771b248f14SClaudio Fontana             xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
14781b248f14SClaudio Fontana             mul64To128(den_sig, xsig0, &msig0, &msig1);
14791b248f14SClaudio Fontana             sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
14801b248f14SClaudio Fontana             while ((int64_t) remsig0 < 0) {
14811b248f14SClaudio Fontana                 --xsig0;
14821b248f14SClaudio Fontana                 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
14831b248f14SClaudio Fontana             }
14841b248f14SClaudio Fontana             xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
14851b248f14SClaudio Fontana             /*
14861b248f14SClaudio Fontana              * No need to correct any estimation error in xsig1; even
14871b248f14SClaudio Fontana              * with such error, it is accurate enough.
14881b248f14SClaudio Fontana              */
14891b248f14SClaudio Fontana 
14901b248f14SClaudio Fontana             /*
14911b248f14SClaudio Fontana              * Split x as x = t + y, where t = n/8 is the nearest
14921b248f14SClaudio Fontana              * multiple of 1/8 to x.
14931b248f14SClaudio Fontana              */
14948da5f1dbSRichard Henderson             x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
14958da5f1dbSRichard Henderson                                                false, xexp + 3, xsig0,
14961b248f14SClaudio Fontana                                                xsig1, &env->fp_status);
14971b248f14SClaudio Fontana             n = floatx80_to_int32(x8, &env->fp_status);
14981b248f14SClaudio Fontana             if (n == 0) {
14991b248f14SClaudio Fontana                 ysign = false;
15001b248f14SClaudio Fontana                 yexp = xexp;
15011b248f14SClaudio Fontana                 ysig0 = xsig0;
15021b248f14SClaudio Fontana                 ysig1 = xsig1;
15031b248f14SClaudio Fontana                 texp = 0;
15041b248f14SClaudio Fontana                 tsig = 0;
15051b248f14SClaudio Fontana             } else {
15061b248f14SClaudio Fontana                 int shift = clz32(n) + 32;
15071b248f14SClaudio Fontana                 texp = 0x403b - shift;
15081b248f14SClaudio Fontana                 tsig = n;
15091b248f14SClaudio Fontana                 tsig <<= shift;
15101b248f14SClaudio Fontana                 if (texp == xexp) {
15111b248f14SClaudio Fontana                     sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
15121b248f14SClaudio Fontana                     if ((int64_t) ysig0 >= 0) {
15131b248f14SClaudio Fontana                         ysign = false;
15141b248f14SClaudio Fontana                         if (ysig0 == 0) {
15151b248f14SClaudio Fontana                             if (ysig1 == 0) {
15161b248f14SClaudio Fontana                                 yexp = 0;
15171b248f14SClaudio Fontana                             } else {
15181b248f14SClaudio Fontana                                 shift = clz64(ysig1) + 64;
15191b248f14SClaudio Fontana                                 yexp = xexp - shift;
15201b248f14SClaudio Fontana                                 shift128Left(ysig0, ysig1, shift,
15211b248f14SClaudio Fontana                                              &ysig0, &ysig1);
15221b248f14SClaudio Fontana                             }
15231b248f14SClaudio Fontana                         } else {
15241b248f14SClaudio Fontana                             shift = clz64(ysig0);
15251b248f14SClaudio Fontana                             yexp = xexp - shift;
15261b248f14SClaudio Fontana                             shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
15271b248f14SClaudio Fontana                         }
15281b248f14SClaudio Fontana                     } else {
15291b248f14SClaudio Fontana                         ysign = true;
15301b248f14SClaudio Fontana                         sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
15311b248f14SClaudio Fontana                         if (ysig0 == 0) {
15321b248f14SClaudio Fontana                             shift = clz64(ysig1) + 64;
15331b248f14SClaudio Fontana                         } else {
15341b248f14SClaudio Fontana                             shift = clz64(ysig0);
15351b248f14SClaudio Fontana                         }
15361b248f14SClaudio Fontana                         yexp = xexp - shift;
15371b248f14SClaudio Fontana                         shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
15381b248f14SClaudio Fontana                     }
15391b248f14SClaudio Fontana                 } else {
15401b248f14SClaudio Fontana                     /*
15411b248f14SClaudio Fontana                      * t's exponent must be greater than x's because t
15421b248f14SClaudio Fontana                      * is positive and the nearest multiple of 1/8 to
15431b248f14SClaudio Fontana                      * x, and if x has a greater exponent, the power
15441b248f14SClaudio Fontana                      * of 2 with that exponent is also a multiple of
15451b248f14SClaudio Fontana                      * 1/8.
15461b248f14SClaudio Fontana                      */
15471b248f14SClaudio Fontana                     uint64_t usig0, usig1;
15481b248f14SClaudio Fontana                     shift128RightJamming(xsig0, xsig1, texp - xexp,
15491b248f14SClaudio Fontana                                          &usig0, &usig1);
15501b248f14SClaudio Fontana                     ysign = true;
15511b248f14SClaudio Fontana                     sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
15521b248f14SClaudio Fontana                     if (ysig0 == 0) {
15531b248f14SClaudio Fontana                         shift = clz64(ysig1) + 64;
15541b248f14SClaudio Fontana                     } else {
15551b248f14SClaudio Fontana                         shift = clz64(ysig0);
15561b248f14SClaudio Fontana                     }
15571b248f14SClaudio Fontana                     yexp = texp - shift;
15581b248f14SClaudio Fontana                     shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
15591b248f14SClaudio Fontana                 }
15601b248f14SClaudio Fontana             }
15611b248f14SClaudio Fontana 
15621b248f14SClaudio Fontana             /*
15631b248f14SClaudio Fontana              * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
15641b248f14SClaudio Fontana              * arctan(z).
15651b248f14SClaudio Fontana              */
15661b248f14SClaudio Fontana             zsign = ysign;
15671b248f14SClaudio Fontana             if (texp == 0 || yexp == 0) {
15681b248f14SClaudio Fontana                 zexp = yexp;
15691b248f14SClaudio Fontana                 zsig0 = ysig0;
15701b248f14SClaudio Fontana                 zsig1 = ysig1;
15711b248f14SClaudio Fontana             } else {
15721b248f14SClaudio Fontana                 /*
15731b248f14SClaudio Fontana                  * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
15741b248f14SClaudio Fontana                  */
15751b248f14SClaudio Fontana                 int32_t dexp = texp + xexp - 0x3ffe;
15761b248f14SClaudio Fontana                 uint64_t dsig0, dsig1, dsig2;
15771b248f14SClaudio Fontana                 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
15781b248f14SClaudio Fontana                 /*
15791b248f14SClaudio Fontana                  * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
15801b248f14SClaudio Fontana                  * bit).  Add 1 to produce the denominator 1+tx.
15811b248f14SClaudio Fontana                  */
15821b248f14SClaudio Fontana                 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
15831b248f14SClaudio Fontana                                      &dsig0, &dsig1);
15841b248f14SClaudio Fontana                 dsig0 |= 0x8000000000000000ULL;
15851b248f14SClaudio Fontana                 zexp = yexp - 1;
15861b248f14SClaudio Fontana                 remsig0 = ysig0;
15871b248f14SClaudio Fontana                 remsig1 = ysig1;
15881b248f14SClaudio Fontana                 remsig2 = 0;
15891b248f14SClaudio Fontana                 if (dsig0 <= remsig0) {
15901b248f14SClaudio Fontana                     shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
15911b248f14SClaudio Fontana                     ++zexp;
15921b248f14SClaudio Fontana                 }
15931b248f14SClaudio Fontana                 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
15941b248f14SClaudio Fontana                 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
15951b248f14SClaudio Fontana                 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
15961b248f14SClaudio Fontana                        &remsig0, &remsig1, &remsig2);
15971b248f14SClaudio Fontana                 while ((int64_t) remsig0 < 0) {
15981b248f14SClaudio Fontana                     --zsig0;
15991b248f14SClaudio Fontana                     add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
16001b248f14SClaudio Fontana                            &remsig0, &remsig1, &remsig2);
16011b248f14SClaudio Fontana                 }
16021b248f14SClaudio Fontana                 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
16031b248f14SClaudio Fontana                 /* No need to correct any estimation error in zsig1.  */
16041b248f14SClaudio Fontana             }
16051b248f14SClaudio Fontana 
16061b248f14SClaudio Fontana             if (zexp == 0) {
16071b248f14SClaudio Fontana                 azexp = 0;
16081b248f14SClaudio Fontana                 azsig0 = 0;
16091b248f14SClaudio Fontana                 azsig1 = 0;
16101b248f14SClaudio Fontana             } else {
16111b248f14SClaudio Fontana                 floatx80 z2, accum;
16121b248f14SClaudio Fontana                 uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
16131b248f14SClaudio Fontana                 /* Compute z^2.  */
16141b248f14SClaudio Fontana                 mul128To256(zsig0, zsig1, zsig0, zsig1,
16151b248f14SClaudio Fontana                             &z2sig0, &z2sig1, &z2sig2, &z2sig3);
16168da5f1dbSRichard Henderson                 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
16171b248f14SClaudio Fontana                                                    zexp + zexp - 0x3ffe,
16181b248f14SClaudio Fontana                                                    z2sig0, z2sig1,
16191b248f14SClaudio Fontana                                                    &env->fp_status);
16201b248f14SClaudio Fontana 
16211b248f14SClaudio Fontana                 /* Compute the lower parts of the polynomial expansion.  */
16221b248f14SClaudio Fontana                 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
16231b248f14SClaudio Fontana                 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
16241b248f14SClaudio Fontana                 accum = floatx80_mul(accum, z2, &env->fp_status);
16251b248f14SClaudio Fontana                 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
16261b248f14SClaudio Fontana                 accum = floatx80_mul(accum, z2, &env->fp_status);
16271b248f14SClaudio Fontana                 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
16281b248f14SClaudio Fontana                 accum = floatx80_mul(accum, z2, &env->fp_status);
16291b248f14SClaudio Fontana                 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
16301b248f14SClaudio Fontana                 accum = floatx80_mul(accum, z2, &env->fp_status);
16311b248f14SClaudio Fontana                 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
16321b248f14SClaudio Fontana                 accum = floatx80_mul(accum, z2, &env->fp_status);
16331b248f14SClaudio Fontana 
16341b248f14SClaudio Fontana                 /*
16351b248f14SClaudio Fontana                  * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
16361b248f14SClaudio Fontana                  * fpatan_coeff_0 is 1, and accum is negative and much smaller.
16371b248f14SClaudio Fontana                  */
16381b248f14SClaudio Fontana                 aexp = extractFloatx80Exp(fpatan_coeff_0);
16391b248f14SClaudio Fontana                 shift128RightJamming(extractFloatx80Frac(accum), 0,
16401b248f14SClaudio Fontana                                      aexp - extractFloatx80Exp(accum),
16411b248f14SClaudio Fontana                                      &asig0, &asig1);
16421b248f14SClaudio Fontana                 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
16431b248f14SClaudio Fontana                        &asig0, &asig1);
16441b248f14SClaudio Fontana                 /* Multiply by z to compute arctan(z).  */
16451b248f14SClaudio Fontana                 azexp = aexp + zexp - 0x3ffe;
16461b248f14SClaudio Fontana                 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
16471b248f14SClaudio Fontana                             &azsig2, &azsig3);
16481b248f14SClaudio Fontana             }
16491b248f14SClaudio Fontana 
16501b248f14SClaudio Fontana             /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign).  */
16511b248f14SClaudio Fontana             if (texp == 0) {
16521b248f14SClaudio Fontana                 /* z is positive.  */
16531b248f14SClaudio Fontana                 axexp = azexp;
16541b248f14SClaudio Fontana                 axsig0 = azsig0;
16551b248f14SClaudio Fontana                 axsig1 = azsig1;
16561b248f14SClaudio Fontana             } else {
16571b248f14SClaudio Fontana                 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
16581b248f14SClaudio Fontana                 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
16591b248f14SClaudio Fontana                 uint64_t low_sig0 =
16601b248f14SClaudio Fontana                     extractFloatx80Frac(fpatan_table[n].atan_low);
16611b248f14SClaudio Fontana                 uint64_t low_sig1 = 0;
16621b248f14SClaudio Fontana                 axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
16631b248f14SClaudio Fontana                 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
16641b248f14SClaudio Fontana                 axsig1 = 0;
16651b248f14SClaudio Fontana                 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
16661b248f14SClaudio Fontana                                      &low_sig0, &low_sig1);
16671b248f14SClaudio Fontana                 if (low_sign) {
16681b248f14SClaudio Fontana                     sub128(axsig0, axsig1, low_sig0, low_sig1,
16691b248f14SClaudio Fontana                            &axsig0, &axsig1);
16701b248f14SClaudio Fontana                 } else {
16711b248f14SClaudio Fontana                     add128(axsig0, axsig1, low_sig0, low_sig1,
16721b248f14SClaudio Fontana                            &axsig0, &axsig1);
16731b248f14SClaudio Fontana                 }
16741b248f14SClaudio Fontana                 if (azexp >= axexp) {
16751b248f14SClaudio Fontana                     shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
16761b248f14SClaudio Fontana                                          &axsig0, &axsig1);
16771b248f14SClaudio Fontana                     axexp = azexp + 1;
16781b248f14SClaudio Fontana                     shift128RightJamming(azsig0, azsig1, 1,
16791b248f14SClaudio Fontana                                          &azsig0, &azsig1);
16801b248f14SClaudio Fontana                 } else {
16811b248f14SClaudio Fontana                     shift128RightJamming(axsig0, axsig1, 1,
16821b248f14SClaudio Fontana                                          &axsig0, &axsig1);
16831b248f14SClaudio Fontana                     shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
16841b248f14SClaudio Fontana                                          &azsig0, &azsig1);
16851b248f14SClaudio Fontana                     ++axexp;
16861b248f14SClaudio Fontana                 }
16871b248f14SClaudio Fontana                 if (zsign) {
16881b248f14SClaudio Fontana                     sub128(axsig0, axsig1, azsig0, azsig1,
16891b248f14SClaudio Fontana                            &axsig0, &axsig1);
16901b248f14SClaudio Fontana                 } else {
16911b248f14SClaudio Fontana                     add128(axsig0, axsig1, azsig0, azsig1,
16921b248f14SClaudio Fontana                            &axsig0, &axsig1);
16931b248f14SClaudio Fontana                 }
16941b248f14SClaudio Fontana             }
16951b248f14SClaudio Fontana 
16961b248f14SClaudio Fontana             if (adj_exp == 0) {
16971b248f14SClaudio Fontana                 rexp = axexp;
16981b248f14SClaudio Fontana                 rsig0 = axsig0;
16991b248f14SClaudio Fontana                 rsig1 = axsig1;
17001b248f14SClaudio Fontana             } else {
17011b248f14SClaudio Fontana                 /*
17021b248f14SClaudio Fontana                  * Add or subtract arctan(x) (exponent axexp,
17031b248f14SClaudio Fontana                  * significand axsig0 and axsig1, positive, not
17041b248f14SClaudio Fontana                  * necessarily normalized) to the number given by
17051b248f14SClaudio Fontana                  * adj_exp, adj_sig0 and adj_sig1, according to
17061b248f14SClaudio Fontana                  * adj_sub.
17071b248f14SClaudio Fontana                  */
17081b248f14SClaudio Fontana                 if (adj_exp >= axexp) {
17091b248f14SClaudio Fontana                     shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
17101b248f14SClaudio Fontana                                          &axsig0, &axsig1);
17111b248f14SClaudio Fontana                     rexp = adj_exp + 1;
17121b248f14SClaudio Fontana                     shift128RightJamming(adj_sig0, adj_sig1, 1,
17131b248f14SClaudio Fontana                                          &adj_sig0, &adj_sig1);
17141b248f14SClaudio Fontana                 } else {
17151b248f14SClaudio Fontana                     shift128RightJamming(axsig0, axsig1, 1,
17161b248f14SClaudio Fontana                                          &axsig0, &axsig1);
17171b248f14SClaudio Fontana                     shift128RightJamming(adj_sig0, adj_sig1,
17181b248f14SClaudio Fontana                                          axexp - adj_exp + 1,
17191b248f14SClaudio Fontana                                          &adj_sig0, &adj_sig1);
17201b248f14SClaudio Fontana                     rexp = axexp + 1;
17211b248f14SClaudio Fontana                 }
17221b248f14SClaudio Fontana                 if (adj_sub) {
17231b248f14SClaudio Fontana                     sub128(adj_sig0, adj_sig1, axsig0, axsig1,
17241b248f14SClaudio Fontana                            &rsig0, &rsig1);
17251b248f14SClaudio Fontana                 } else {
17261b248f14SClaudio Fontana                     add128(adj_sig0, adj_sig1, axsig0, axsig1,
17271b248f14SClaudio Fontana                            &rsig0, &rsig1);
17281b248f14SClaudio Fontana                 }
17291b248f14SClaudio Fontana             }
17301b248f14SClaudio Fontana 
17311b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = save_mode;
17321b248f14SClaudio Fontana             env->fp_status.floatx80_rounding_precision = save_prec;
17331b248f14SClaudio Fontana         }
17341b248f14SClaudio Fontana         /* This result is inexact.  */
17351b248f14SClaudio Fontana         rsig1 |= 1;
17368da5f1dbSRichard Henderson         ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
17371b248f14SClaudio Fontana                                             rsig0, rsig1, &env->fp_status);
17381b248f14SClaudio Fontana     }
17391b248f14SClaudio Fontana 
17401b248f14SClaudio Fontana     fpop(env);
17411b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
17421b248f14SClaudio Fontana }
17431b248f14SClaudio Fontana 
17441b248f14SClaudio Fontana void helper_fxtract(CPUX86State *env)
17451b248f14SClaudio Fontana {
17461b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
17471b248f14SClaudio Fontana     CPU_LDoubleU temp;
17481b248f14SClaudio Fontana 
17491b248f14SClaudio Fontana     temp.d = ST0;
17501b248f14SClaudio Fontana 
17511b248f14SClaudio Fontana     if (floatx80_is_zero(ST0)) {
17521b248f14SClaudio Fontana         /* Easy way to generate -inf and raising division by 0 exception */
17531b248f14SClaudio Fontana         ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
17541b248f14SClaudio Fontana                            &env->fp_status);
17551b248f14SClaudio Fontana         fpush(env);
17561b248f14SClaudio Fontana         ST0 = temp.d;
17571b248f14SClaudio Fontana     } else if (floatx80_invalid_encoding(ST0)) {
17581b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
17591b248f14SClaudio Fontana         ST0 = floatx80_default_nan(&env->fp_status);
17601b248f14SClaudio Fontana         fpush(env);
17611b248f14SClaudio Fontana         ST0 = ST1;
17621b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST0)) {
17631b248f14SClaudio Fontana         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
17641b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
17651b248f14SClaudio Fontana             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
17661b248f14SClaudio Fontana         }
17671b248f14SClaudio Fontana         fpush(env);
17681b248f14SClaudio Fontana         ST0 = ST1;
17691b248f14SClaudio Fontana     } else if (floatx80_is_infinity(ST0)) {
17701b248f14SClaudio Fontana         fpush(env);
17711b248f14SClaudio Fontana         ST0 = ST1;
17721b248f14SClaudio Fontana         ST1 = floatx80_infinity;
17731b248f14SClaudio Fontana     } else {
17741b248f14SClaudio Fontana         int expdif;
17751b248f14SClaudio Fontana 
17761b248f14SClaudio Fontana         if (EXPD(temp) == 0) {
17771b248f14SClaudio Fontana             int shift = clz64(temp.l.lower);
17781b248f14SClaudio Fontana             temp.l.lower <<= shift;
17791b248f14SClaudio Fontana             expdif = 1 - EXPBIAS - shift;
17801b248f14SClaudio Fontana             float_raise(float_flag_input_denormal, &env->fp_status);
17811b248f14SClaudio Fontana         } else {
17821b248f14SClaudio Fontana             expdif = EXPD(temp) - EXPBIAS;
17831b248f14SClaudio Fontana         }
17841b248f14SClaudio Fontana         /* DP exponent bias */
17851b248f14SClaudio Fontana         ST0 = int32_to_floatx80(expdif, &env->fp_status);
17861b248f14SClaudio Fontana         fpush(env);
17871b248f14SClaudio Fontana         BIASEXPONENT(temp);
17881b248f14SClaudio Fontana         ST0 = temp.d;
17891b248f14SClaudio Fontana     }
17901b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
17911b248f14SClaudio Fontana }
17921b248f14SClaudio Fontana 
17931b248f14SClaudio Fontana static void helper_fprem_common(CPUX86State *env, bool mod)
17941b248f14SClaudio Fontana {
17951b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
17961b248f14SClaudio Fontana     uint64_t quotient;
17971b248f14SClaudio Fontana     CPU_LDoubleU temp0, temp1;
17981b248f14SClaudio Fontana     int exp0, exp1, expdiff;
17991b248f14SClaudio Fontana 
18001b248f14SClaudio Fontana     temp0.d = ST0;
18011b248f14SClaudio Fontana     temp1.d = ST1;
18021b248f14SClaudio Fontana     exp0 = EXPD(temp0);
18031b248f14SClaudio Fontana     exp1 = EXPD(temp1);
18041b248f14SClaudio Fontana 
18051b248f14SClaudio Fontana     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
18061b248f14SClaudio Fontana     if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
18071b248f14SClaudio Fontana         exp0 == 0x7fff || exp1 == 0x7fff ||
18081b248f14SClaudio Fontana         floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
18091b248f14SClaudio Fontana         ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
18101b248f14SClaudio Fontana     } else {
18111b248f14SClaudio Fontana         if (exp0 == 0) {
18121b248f14SClaudio Fontana             exp0 = 1 - clz64(temp0.l.lower);
18131b248f14SClaudio Fontana         }
18141b248f14SClaudio Fontana         if (exp1 == 0) {
18151b248f14SClaudio Fontana             exp1 = 1 - clz64(temp1.l.lower);
18161b248f14SClaudio Fontana         }
18171b248f14SClaudio Fontana         expdiff = exp0 - exp1;
18181b248f14SClaudio Fontana         if (expdiff < 64) {
18191b248f14SClaudio Fontana             ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
18201b248f14SClaudio Fontana             env->fpus |= (quotient & 0x4) << (8 - 2);  /* (C0) <-- q2 */
18211b248f14SClaudio Fontana             env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
18221b248f14SClaudio Fontana             env->fpus |= (quotient & 0x1) << (9 - 0);  /* (C1) <-- q0 */
18231b248f14SClaudio Fontana         } else {
18241b248f14SClaudio Fontana             /*
18251b248f14SClaudio Fontana              * Partial remainder.  This choice of how many bits to
18261b248f14SClaudio Fontana              * process at once is specified in AMD instruction set
18271b248f14SClaudio Fontana              * manuals, and empirically is followed by Intel
18281b248f14SClaudio Fontana              * processors as well; it ensures that the final remainder
18291b248f14SClaudio Fontana              * operation in a loop does produce the correct low three
18301b248f14SClaudio Fontana              * bits of the quotient.  AMD manuals specify that the
18311b248f14SClaudio Fontana              * flags other than C2 are cleared, and empirically Intel
18321b248f14SClaudio Fontana              * processors clear them as well.
18331b248f14SClaudio Fontana              */
18341b248f14SClaudio Fontana             int n = 32 + (expdiff % 32);
18351b248f14SClaudio Fontana             temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
18361b248f14SClaudio Fontana             ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
18371b248f14SClaudio Fontana             env->fpus |= 0x400;  /* C2 <-- 1 */
18381b248f14SClaudio Fontana         }
18391b248f14SClaudio Fontana     }
18401b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
18411b248f14SClaudio Fontana }
18421b248f14SClaudio Fontana 
18431b248f14SClaudio Fontana void helper_fprem1(CPUX86State *env)
18441b248f14SClaudio Fontana {
18451b248f14SClaudio Fontana     helper_fprem_common(env, false);
18461b248f14SClaudio Fontana }
18471b248f14SClaudio Fontana 
18481b248f14SClaudio Fontana void helper_fprem(CPUX86State *env)
18491b248f14SClaudio Fontana {
18501b248f14SClaudio Fontana     helper_fprem_common(env, true);
18511b248f14SClaudio Fontana }
18521b248f14SClaudio Fontana 
18531b248f14SClaudio Fontana /* 128-bit significand of log2(e).  */
18541b248f14SClaudio Fontana #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
18551b248f14SClaudio Fontana #define log2_e_sig_low 0xbe87fed0691d3e89ULL
18561b248f14SClaudio Fontana 
18571b248f14SClaudio Fontana /*
18581b248f14SClaudio Fontana  * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
18591b248f14SClaudio Fontana  * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
18601b248f14SClaudio Fontana  * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
18611b248f14SClaudio Fontana  * interval [sqrt(2)/2, sqrt(2)].
18621b248f14SClaudio Fontana  */
18631b248f14SClaudio Fontana #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
18641b248f14SClaudio Fontana #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
18651b248f14SClaudio Fontana #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
18661b248f14SClaudio Fontana #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
18671b248f14SClaudio Fontana #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
18681b248f14SClaudio Fontana #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
18691b248f14SClaudio Fontana #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
18701b248f14SClaudio Fontana #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
18711b248f14SClaudio Fontana #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
18721b248f14SClaudio Fontana #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
18731b248f14SClaudio Fontana #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
18741b248f14SClaudio Fontana 
18751b248f14SClaudio Fontana /*
18761b248f14SClaudio Fontana  * Compute an approximation of log2(1+arg), where 1+arg is in the
18771b248f14SClaudio Fontana  * interval [sqrt(2)/2, sqrt(2)].  It is assumed that when this
18781b248f14SClaudio Fontana  * function is called, rounding precision is set to 80 and the
18791b248f14SClaudio Fontana  * round-to-nearest mode is in effect.  arg must not be exactly zero,
18801b248f14SClaudio Fontana  * and must not be so close to zero that underflow might occur.
18811b248f14SClaudio Fontana  */
18821b248f14SClaudio Fontana static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
18831b248f14SClaudio Fontana                                 uint64_t *sig0, uint64_t *sig1)
18841b248f14SClaudio Fontana {
18851b248f14SClaudio Fontana     uint64_t arg0_sig = extractFloatx80Frac(arg);
18861b248f14SClaudio Fontana     int32_t arg0_exp = extractFloatx80Exp(arg);
18871b248f14SClaudio Fontana     bool arg0_sign = extractFloatx80Sign(arg);
18881b248f14SClaudio Fontana     bool asign;
18891b248f14SClaudio Fontana     int32_t dexp, texp, aexp;
18901b248f14SClaudio Fontana     uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
18911b248f14SClaudio Fontana     uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
18921b248f14SClaudio Fontana     uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
18931b248f14SClaudio Fontana     floatx80 t2, accum;
18941b248f14SClaudio Fontana 
18951b248f14SClaudio Fontana     /*
18961b248f14SClaudio Fontana      * Compute an approximation of arg/(2+arg), with extra precision,
18971b248f14SClaudio Fontana      * as the argument to a polynomial approximation.  The extra
18981b248f14SClaudio Fontana      * precision is only needed for the first term of the
18991b248f14SClaudio Fontana      * approximation, with subsequent terms being significantly
19001b248f14SClaudio Fontana      * smaller; the approximation only uses odd exponents, and the
19011b248f14SClaudio Fontana      * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
19021b248f14SClaudio Fontana      */
19031b248f14SClaudio Fontana     if (arg0_sign) {
19041b248f14SClaudio Fontana         dexp = 0x3fff;
19051b248f14SClaudio Fontana         shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
19061b248f14SClaudio Fontana         sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
19071b248f14SClaudio Fontana     } else {
19081b248f14SClaudio Fontana         dexp = 0x4000;
19091b248f14SClaudio Fontana         shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
19101b248f14SClaudio Fontana         dsig0 |= 0x8000000000000000ULL;
19111b248f14SClaudio Fontana     }
19121b248f14SClaudio Fontana     texp = arg0_exp - dexp + 0x3ffe;
19131b248f14SClaudio Fontana     rsig0 = arg0_sig;
19141b248f14SClaudio Fontana     rsig1 = 0;
19151b248f14SClaudio Fontana     rsig2 = 0;
19161b248f14SClaudio Fontana     if (dsig0 <= rsig0) {
19171b248f14SClaudio Fontana         shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
19181b248f14SClaudio Fontana         ++texp;
19191b248f14SClaudio Fontana     }
19201b248f14SClaudio Fontana     tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
19211b248f14SClaudio Fontana     mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
19221b248f14SClaudio Fontana     sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
19231b248f14SClaudio Fontana            &rsig0, &rsig1, &rsig2);
19241b248f14SClaudio Fontana     while ((int64_t) rsig0 < 0) {
19251b248f14SClaudio Fontana         --tsig0;
19261b248f14SClaudio Fontana         add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
19271b248f14SClaudio Fontana                &rsig0, &rsig1, &rsig2);
19281b248f14SClaudio Fontana     }
19291b248f14SClaudio Fontana     tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
19301b248f14SClaudio Fontana     /*
19311b248f14SClaudio Fontana      * No need to correct any estimation error in tsig1; even with
19321b248f14SClaudio Fontana      * such error, it is accurate enough.  Now compute the square of
19331b248f14SClaudio Fontana      * that approximation.
19341b248f14SClaudio Fontana      */
19351b248f14SClaudio Fontana     mul128To256(tsig0, tsig1, tsig0, tsig1,
19361b248f14SClaudio Fontana                 &t2sig0, &t2sig1, &t2sig2, &t2sig3);
19378da5f1dbSRichard Henderson     t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
19388da5f1dbSRichard Henderson                                        texp + texp - 0x3ffe,
19391b248f14SClaudio Fontana                                        t2sig0, t2sig1, &env->fp_status);
19401b248f14SClaudio Fontana 
19411b248f14SClaudio Fontana     /* Compute the lower parts of the polynomial expansion.  */
19421b248f14SClaudio Fontana     accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
19431b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
19441b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19451b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
19461b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19471b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
19481b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19491b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
19501b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19511b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
19521b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19531b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
19541b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19551b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
19561b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19571b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
19581b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19591b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
19601b248f14SClaudio Fontana 
19611b248f14SClaudio Fontana     /*
19621b248f14SClaudio Fontana      * The full polynomial expansion is fyl2x_coeff_0 + accum (where
19631b248f14SClaudio Fontana      * accum has much lower magnitude, and so, in particular, carry
19641b248f14SClaudio Fontana      * out of the addition is not possible), multiplied by t.  (This
19651b248f14SClaudio Fontana      * expansion is only accurate to about 70 bits, not 128 bits.)
19661b248f14SClaudio Fontana      */
19671b248f14SClaudio Fontana     aexp = extractFloatx80Exp(fyl2x_coeff_0);
19681b248f14SClaudio Fontana     asign = extractFloatx80Sign(fyl2x_coeff_0);
19691b248f14SClaudio Fontana     shift128RightJamming(extractFloatx80Frac(accum), 0,
19701b248f14SClaudio Fontana                          aexp - extractFloatx80Exp(accum),
19711b248f14SClaudio Fontana                          &asig0, &asig1);
19721b248f14SClaudio Fontana     bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
19731b248f14SClaudio Fontana     bsig1 = 0;
19741b248f14SClaudio Fontana     if (asign == extractFloatx80Sign(accum)) {
19751b248f14SClaudio Fontana         add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
19761b248f14SClaudio Fontana     } else {
19771b248f14SClaudio Fontana         sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
19781b248f14SClaudio Fontana     }
19791b248f14SClaudio Fontana     /* Multiply by t to compute the required result.  */
19801b248f14SClaudio Fontana     mul128To256(asig0, asig1, tsig0, tsig1,
19811b248f14SClaudio Fontana                 &asig0, &asig1, &asig2, &asig3);
19821b248f14SClaudio Fontana     aexp += texp - 0x3ffe;
19831b248f14SClaudio Fontana     *exp = aexp;
19841b248f14SClaudio Fontana     *sig0 = asig0;
19851b248f14SClaudio Fontana     *sig1 = asig1;
19861b248f14SClaudio Fontana }
19871b248f14SClaudio Fontana 
19881b248f14SClaudio Fontana void helper_fyl2xp1(CPUX86State *env)
19891b248f14SClaudio Fontana {
19901b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
19911b248f14SClaudio Fontana     uint64_t arg0_sig = extractFloatx80Frac(ST0);
19921b248f14SClaudio Fontana     int32_t arg0_exp = extractFloatx80Exp(ST0);
19931b248f14SClaudio Fontana     bool arg0_sign = extractFloatx80Sign(ST0);
19941b248f14SClaudio Fontana     uint64_t arg1_sig = extractFloatx80Frac(ST1);
19951b248f14SClaudio Fontana     int32_t arg1_exp = extractFloatx80Exp(ST1);
19961b248f14SClaudio Fontana     bool arg1_sign = extractFloatx80Sign(ST1);
19971b248f14SClaudio Fontana 
19981b248f14SClaudio Fontana     if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
19991b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
20001b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST0, &env->fp_status);
20011b248f14SClaudio Fontana     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
20021b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
20031b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
20041b248f14SClaudio Fontana     } else if (floatx80_invalid_encoding(ST0) ||
20051b248f14SClaudio Fontana                floatx80_invalid_encoding(ST1)) {
20061b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
20071b248f14SClaudio Fontana         ST1 = floatx80_default_nan(&env->fp_status);
20081b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST0)) {
20091b248f14SClaudio Fontana         ST1 = ST0;
20101b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST1)) {
20111b248f14SClaudio Fontana         /* Pass this NaN through.  */
20121b248f14SClaudio Fontana     } else if (arg0_exp > 0x3ffd ||
20131b248f14SClaudio Fontana                (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
20141b248f14SClaudio Fontana                                                   0x95f619980c4336f7ULL :
20151b248f14SClaudio Fontana                                                   0xd413cccfe7799211ULL))) {
20161b248f14SClaudio Fontana         /*
20171b248f14SClaudio Fontana          * Out of range for the instruction (ST0 must have absolute
20181b248f14SClaudio Fontana          * value less than 1 - sqrt(2)/2 = 0.292..., according to
20191b248f14SClaudio Fontana          * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
20201b248f14SClaudio Fontana          * to sqrt(2) - 1, which we allow here), treat as invalid.
20211b248f14SClaudio Fontana          */
20221b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
20231b248f14SClaudio Fontana         ST1 = floatx80_default_nan(&env->fp_status);
20241b248f14SClaudio Fontana     } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
20251b248f14SClaudio Fontana                arg1_exp == 0x7fff) {
20261b248f14SClaudio Fontana         /*
20271b248f14SClaudio Fontana          * One argument is zero, or multiplying by infinity; correct
20281b248f14SClaudio Fontana          * result is exact and can be obtained by multiplying the
20291b248f14SClaudio Fontana          * arguments.
20301b248f14SClaudio Fontana          */
20311b248f14SClaudio Fontana         ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
20321b248f14SClaudio Fontana     } else if (arg0_exp < 0x3fb0) {
20331b248f14SClaudio Fontana         /*
20341b248f14SClaudio Fontana          * Multiplying both arguments and an extra-precision version
20351b248f14SClaudio Fontana          * of log2(e) is sufficiently precise.
20361b248f14SClaudio Fontana          */
20371b248f14SClaudio Fontana         uint64_t sig0, sig1, sig2;
20381b248f14SClaudio Fontana         int32_t exp;
20391b248f14SClaudio Fontana         if (arg0_exp == 0) {
20401b248f14SClaudio Fontana             normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
20411b248f14SClaudio Fontana         }
20421b248f14SClaudio Fontana         if (arg1_exp == 0) {
20431b248f14SClaudio Fontana             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
20441b248f14SClaudio Fontana         }
20451b248f14SClaudio Fontana         mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
20461b248f14SClaudio Fontana                         &sig0, &sig1, &sig2);
20471b248f14SClaudio Fontana         exp = arg0_exp + 1;
20481b248f14SClaudio Fontana         mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
20491b248f14SClaudio Fontana         exp += arg1_exp - 0x3ffe;
20501b248f14SClaudio Fontana         /* This result is inexact.  */
20511b248f14SClaudio Fontana         sig1 |= 1;
20528da5f1dbSRichard Henderson         ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
20538da5f1dbSRichard Henderson                                             arg0_sign ^ arg1_sign, exp,
20541b248f14SClaudio Fontana                                             sig0, sig1, &env->fp_status);
20551b248f14SClaudio Fontana     } else {
20561b248f14SClaudio Fontana         int32_t aexp;
20571b248f14SClaudio Fontana         uint64_t asig0, asig1, asig2;
20581b248f14SClaudio Fontana         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
20598da5f1dbSRichard Henderson         FloatX80RoundPrec save_prec =
20608da5f1dbSRichard Henderson             env->fp_status.floatx80_rounding_precision;
20611b248f14SClaudio Fontana         env->fp_status.float_rounding_mode = float_round_nearest_even;
20628da5f1dbSRichard Henderson         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
20631b248f14SClaudio Fontana 
20641b248f14SClaudio Fontana         helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
20651b248f14SClaudio Fontana         /*
20661b248f14SClaudio Fontana          * Multiply by the second argument to compute the required
20671b248f14SClaudio Fontana          * result.
20681b248f14SClaudio Fontana          */
20691b248f14SClaudio Fontana         if (arg1_exp == 0) {
20701b248f14SClaudio Fontana             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
20711b248f14SClaudio Fontana         }
20721b248f14SClaudio Fontana         mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
20731b248f14SClaudio Fontana         aexp += arg1_exp - 0x3ffe;
20741b248f14SClaudio Fontana         /* This result is inexact.  */
20751b248f14SClaudio Fontana         asig1 |= 1;
20761b248f14SClaudio Fontana         env->fp_status.float_rounding_mode = save_mode;
20778da5f1dbSRichard Henderson         ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
20788da5f1dbSRichard Henderson                                             arg0_sign ^ arg1_sign, aexp,
20791b248f14SClaudio Fontana                                             asig0, asig1, &env->fp_status);
20801b248f14SClaudio Fontana         env->fp_status.floatx80_rounding_precision = save_prec;
20811b248f14SClaudio Fontana     }
20821b248f14SClaudio Fontana     fpop(env);
20831b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
20841b248f14SClaudio Fontana }
20851b248f14SClaudio Fontana 
20861b248f14SClaudio Fontana void helper_fyl2x(CPUX86State *env)
20871b248f14SClaudio Fontana {
20881b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
20891b248f14SClaudio Fontana     uint64_t arg0_sig = extractFloatx80Frac(ST0);
20901b248f14SClaudio Fontana     int32_t arg0_exp = extractFloatx80Exp(ST0);
20911b248f14SClaudio Fontana     bool arg0_sign = extractFloatx80Sign(ST0);
20921b248f14SClaudio Fontana     uint64_t arg1_sig = extractFloatx80Frac(ST1);
20931b248f14SClaudio Fontana     int32_t arg1_exp = extractFloatx80Exp(ST1);
20941b248f14SClaudio Fontana     bool arg1_sign = extractFloatx80Sign(ST1);
20951b248f14SClaudio Fontana 
20961b248f14SClaudio Fontana     if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
20971b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
20981b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST0, &env->fp_status);
20991b248f14SClaudio Fontana     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
21001b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
21011b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
21021b248f14SClaudio Fontana     } else if (floatx80_invalid_encoding(ST0) ||
21031b248f14SClaudio Fontana                floatx80_invalid_encoding(ST1)) {
21041b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
21051b248f14SClaudio Fontana         ST1 = floatx80_default_nan(&env->fp_status);
21061b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST0)) {
21071b248f14SClaudio Fontana         ST1 = ST0;
21081b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST1)) {
21091b248f14SClaudio Fontana         /* Pass this NaN through.  */
21101b248f14SClaudio Fontana     } else if (arg0_sign && !floatx80_is_zero(ST0)) {
21111b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
21121b248f14SClaudio Fontana         ST1 = floatx80_default_nan(&env->fp_status);
21131b248f14SClaudio Fontana     } else if (floatx80_is_infinity(ST1)) {
21141b248f14SClaudio Fontana         FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
21151b248f14SClaudio Fontana                                              &env->fp_status);
21161b248f14SClaudio Fontana         switch (cmp) {
21171b248f14SClaudio Fontana         case float_relation_less:
21181b248f14SClaudio Fontana             ST1 = floatx80_chs(ST1);
21191b248f14SClaudio Fontana             break;
21201b248f14SClaudio Fontana         case float_relation_greater:
21211b248f14SClaudio Fontana             /* Result is infinity of the same sign as ST1.  */
21221b248f14SClaudio Fontana             break;
21231b248f14SClaudio Fontana         default:
21241b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
21251b248f14SClaudio Fontana             ST1 = floatx80_default_nan(&env->fp_status);
21261b248f14SClaudio Fontana             break;
21271b248f14SClaudio Fontana         }
21281b248f14SClaudio Fontana     } else if (floatx80_is_infinity(ST0)) {
21291b248f14SClaudio Fontana         if (floatx80_is_zero(ST1)) {
21301b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
21311b248f14SClaudio Fontana             ST1 = floatx80_default_nan(&env->fp_status);
21321b248f14SClaudio Fontana         } else if (arg1_sign) {
21331b248f14SClaudio Fontana             ST1 = floatx80_chs(ST0);
21341b248f14SClaudio Fontana         } else {
21351b248f14SClaudio Fontana             ST1 = ST0;
21361b248f14SClaudio Fontana         }
21371b248f14SClaudio Fontana     } else if (floatx80_is_zero(ST0)) {
21381b248f14SClaudio Fontana         if (floatx80_is_zero(ST1)) {
21391b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
21401b248f14SClaudio Fontana             ST1 = floatx80_default_nan(&env->fp_status);
21411b248f14SClaudio Fontana         } else {
21421b248f14SClaudio Fontana             /* Result is infinity with opposite sign to ST1.  */
21431b248f14SClaudio Fontana             float_raise(float_flag_divbyzero, &env->fp_status);
21441b248f14SClaudio Fontana             ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
21451b248f14SClaudio Fontana                                 0x8000000000000000ULL);
21461b248f14SClaudio Fontana         }
21471b248f14SClaudio Fontana     } else if (floatx80_is_zero(ST1)) {
21481b248f14SClaudio Fontana         if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
21491b248f14SClaudio Fontana             ST1 = floatx80_chs(ST1);
21501b248f14SClaudio Fontana         }
21511b248f14SClaudio Fontana         /* Otherwise, ST1 is already the correct result.  */
21521b248f14SClaudio Fontana     } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
21531b248f14SClaudio Fontana         if (arg1_sign) {
21541b248f14SClaudio Fontana             ST1 = floatx80_chs(floatx80_zero);
21551b248f14SClaudio Fontana         } else {
21561b248f14SClaudio Fontana             ST1 = floatx80_zero;
21571b248f14SClaudio Fontana         }
21581b248f14SClaudio Fontana     } else {
21591b248f14SClaudio Fontana         int32_t int_exp;
21601b248f14SClaudio Fontana         floatx80 arg0_m1;
21611b248f14SClaudio Fontana         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
21628da5f1dbSRichard Henderson         FloatX80RoundPrec save_prec =
21638da5f1dbSRichard Henderson             env->fp_status.floatx80_rounding_precision;
21641b248f14SClaudio Fontana         env->fp_status.float_rounding_mode = float_round_nearest_even;
21658da5f1dbSRichard Henderson         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
21661b248f14SClaudio Fontana 
21671b248f14SClaudio Fontana         if (arg0_exp == 0) {
21681b248f14SClaudio Fontana             normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
21691b248f14SClaudio Fontana         }
21701b248f14SClaudio Fontana         if (arg1_exp == 0) {
21711b248f14SClaudio Fontana             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
21721b248f14SClaudio Fontana         }
21731b248f14SClaudio Fontana         int_exp = arg0_exp - 0x3fff;
21741b248f14SClaudio Fontana         if (arg0_sig > 0xb504f333f9de6484ULL) {
21751b248f14SClaudio Fontana             ++int_exp;
21761b248f14SClaudio Fontana         }
21771b248f14SClaudio Fontana         arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
21781b248f14SClaudio Fontana                                                &env->fp_status),
21791b248f14SClaudio Fontana                                floatx80_one, &env->fp_status);
21801b248f14SClaudio Fontana         if (floatx80_is_zero(arg0_m1)) {
21811b248f14SClaudio Fontana             /* Exact power of 2; multiply by ST1.  */
21821b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = save_mode;
21831b248f14SClaudio Fontana             ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
21841b248f14SClaudio Fontana                                ST1, &env->fp_status);
21851b248f14SClaudio Fontana         } else {
21861b248f14SClaudio Fontana             bool asign = extractFloatx80Sign(arg0_m1);
21871b248f14SClaudio Fontana             int32_t aexp;
21881b248f14SClaudio Fontana             uint64_t asig0, asig1, asig2;
21891b248f14SClaudio Fontana             helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
21901b248f14SClaudio Fontana             if (int_exp != 0) {
21911b248f14SClaudio Fontana                 bool isign = (int_exp < 0);
21921b248f14SClaudio Fontana                 int32_t iexp;
21931b248f14SClaudio Fontana                 uint64_t isig;
21941b248f14SClaudio Fontana                 int shift;
21951b248f14SClaudio Fontana                 int_exp = isign ? -int_exp : int_exp;
21961b248f14SClaudio Fontana                 shift = clz32(int_exp) + 32;
21971b248f14SClaudio Fontana                 isig = int_exp;
21981b248f14SClaudio Fontana                 isig <<= shift;
21991b248f14SClaudio Fontana                 iexp = 0x403e - shift;
22001b248f14SClaudio Fontana                 shift128RightJamming(asig0, asig1, iexp - aexp,
22011b248f14SClaudio Fontana                                      &asig0, &asig1);
22021b248f14SClaudio Fontana                 if (asign == isign) {
22031b248f14SClaudio Fontana                     add128(isig, 0, asig0, asig1, &asig0, &asig1);
22041b248f14SClaudio Fontana                 } else {
22051b248f14SClaudio Fontana                     sub128(isig, 0, asig0, asig1, &asig0, &asig1);
22061b248f14SClaudio Fontana                 }
22071b248f14SClaudio Fontana                 aexp = iexp;
22081b248f14SClaudio Fontana                 asign = isign;
22091b248f14SClaudio Fontana             }
22101b248f14SClaudio Fontana             /*
22111b248f14SClaudio Fontana              * Multiply by the second argument to compute the required
22121b248f14SClaudio Fontana              * result.
22131b248f14SClaudio Fontana              */
22141b248f14SClaudio Fontana             if (arg1_exp == 0) {
22151b248f14SClaudio Fontana                 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
22161b248f14SClaudio Fontana             }
22171b248f14SClaudio Fontana             mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
22181b248f14SClaudio Fontana             aexp += arg1_exp - 0x3ffe;
22191b248f14SClaudio Fontana             /* This result is inexact.  */
22201b248f14SClaudio Fontana             asig1 |= 1;
22211b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = save_mode;
22228da5f1dbSRichard Henderson             ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
22238da5f1dbSRichard Henderson                                                 asign ^ arg1_sign, aexp,
22241b248f14SClaudio Fontana                                                 asig0, asig1, &env->fp_status);
22251b248f14SClaudio Fontana         }
22261b248f14SClaudio Fontana 
22271b248f14SClaudio Fontana         env->fp_status.floatx80_rounding_precision = save_prec;
22281b248f14SClaudio Fontana     }
22291b248f14SClaudio Fontana     fpop(env);
22301b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
22311b248f14SClaudio Fontana }
22321b248f14SClaudio Fontana 
22331b248f14SClaudio Fontana void helper_fsqrt(CPUX86State *env)
22341b248f14SClaudio Fontana {
22351b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
22361b248f14SClaudio Fontana     if (floatx80_is_neg(ST0)) {
22371b248f14SClaudio Fontana         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
22381b248f14SClaudio Fontana         env->fpus |= 0x400;
22391b248f14SClaudio Fontana     }
22401b248f14SClaudio Fontana     ST0 = floatx80_sqrt(ST0, &env->fp_status);
22411b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
22421b248f14SClaudio Fontana }
22431b248f14SClaudio Fontana 
22441b248f14SClaudio Fontana void helper_fsincos(CPUX86State *env)
22451b248f14SClaudio Fontana {
22461b248f14SClaudio Fontana     double fptemp = floatx80_to_double(env, ST0);
22471b248f14SClaudio Fontana 
22481b248f14SClaudio Fontana     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
22491b248f14SClaudio Fontana         env->fpus |= 0x400;
22501b248f14SClaudio Fontana     } else {
22511b248f14SClaudio Fontana         ST0 = double_to_floatx80(env, sin(fptemp));
22521b248f14SClaudio Fontana         fpush(env);
22531b248f14SClaudio Fontana         ST0 = double_to_floatx80(env, cos(fptemp));
22541b248f14SClaudio Fontana         env->fpus &= ~0x400;  /* C2 <-- 0 */
22551b248f14SClaudio Fontana         /* the above code is for |arg| < 2**63 only */
22561b248f14SClaudio Fontana     }
22571b248f14SClaudio Fontana }
22581b248f14SClaudio Fontana 
22591b248f14SClaudio Fontana void helper_frndint(CPUX86State *env)
22601b248f14SClaudio Fontana {
22611b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
22621b248f14SClaudio Fontana     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
22631b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
22641b248f14SClaudio Fontana }
22651b248f14SClaudio Fontana 
22661b248f14SClaudio Fontana void helper_fscale(CPUX86State *env)
22671b248f14SClaudio Fontana {
22681b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
22691b248f14SClaudio Fontana     if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
22701b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
22711b248f14SClaudio Fontana         ST0 = floatx80_default_nan(&env->fp_status);
22721b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST1)) {
22731b248f14SClaudio Fontana         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
22741b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
22751b248f14SClaudio Fontana         }
22761b248f14SClaudio Fontana         ST0 = ST1;
22771b248f14SClaudio Fontana         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
22781b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
22791b248f14SClaudio Fontana             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
22801b248f14SClaudio Fontana         }
22811b248f14SClaudio Fontana     } else if (floatx80_is_infinity(ST1) &&
22821b248f14SClaudio Fontana                !floatx80_invalid_encoding(ST0) &&
22831b248f14SClaudio Fontana                !floatx80_is_any_nan(ST0)) {
22841b248f14SClaudio Fontana         if (floatx80_is_neg(ST1)) {
22851b248f14SClaudio Fontana             if (floatx80_is_infinity(ST0)) {
22861b248f14SClaudio Fontana                 float_raise(float_flag_invalid, &env->fp_status);
22871b248f14SClaudio Fontana                 ST0 = floatx80_default_nan(&env->fp_status);
22881b248f14SClaudio Fontana             } else {
22891b248f14SClaudio Fontana                 ST0 = (floatx80_is_neg(ST0) ?
22901b248f14SClaudio Fontana                        floatx80_chs(floatx80_zero) :
22911b248f14SClaudio Fontana                        floatx80_zero);
22921b248f14SClaudio Fontana             }
22931b248f14SClaudio Fontana         } else {
22941b248f14SClaudio Fontana             if (floatx80_is_zero(ST0)) {
22951b248f14SClaudio Fontana                 float_raise(float_flag_invalid, &env->fp_status);
22961b248f14SClaudio Fontana                 ST0 = floatx80_default_nan(&env->fp_status);
22971b248f14SClaudio Fontana             } else {
22981b248f14SClaudio Fontana                 ST0 = (floatx80_is_neg(ST0) ?
22991b248f14SClaudio Fontana                        floatx80_chs(floatx80_infinity) :
23001b248f14SClaudio Fontana                        floatx80_infinity);
23011b248f14SClaudio Fontana             }
23021b248f14SClaudio Fontana         }
23031b248f14SClaudio Fontana     } else {
23041b248f14SClaudio Fontana         int n;
23058da5f1dbSRichard Henderson         FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
23061b248f14SClaudio Fontana         uint8_t save_flags = get_float_exception_flags(&env->fp_status);
23071b248f14SClaudio Fontana         set_float_exception_flags(0, &env->fp_status);
23081b248f14SClaudio Fontana         n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
23091b248f14SClaudio Fontana         set_float_exception_flags(save_flags, &env->fp_status);
23108da5f1dbSRichard Henderson         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
23111b248f14SClaudio Fontana         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
23121b248f14SClaudio Fontana         env->fp_status.floatx80_rounding_precision = save;
23131b248f14SClaudio Fontana     }
23141b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
23151b248f14SClaudio Fontana }
23161b248f14SClaudio Fontana 
23171b248f14SClaudio Fontana void helper_fsin(CPUX86State *env)
23181b248f14SClaudio Fontana {
23191b248f14SClaudio Fontana     double fptemp = floatx80_to_double(env, ST0);
23201b248f14SClaudio Fontana 
23211b248f14SClaudio Fontana     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
23221b248f14SClaudio Fontana         env->fpus |= 0x400;
23231b248f14SClaudio Fontana     } else {
23241b248f14SClaudio Fontana         ST0 = double_to_floatx80(env, sin(fptemp));
23251b248f14SClaudio Fontana         env->fpus &= ~0x400;  /* C2 <-- 0 */
23261b248f14SClaudio Fontana         /* the above code is for |arg| < 2**53 only */
23271b248f14SClaudio Fontana     }
23281b248f14SClaudio Fontana }
23291b248f14SClaudio Fontana 
23301b248f14SClaudio Fontana void helper_fcos(CPUX86State *env)
23311b248f14SClaudio Fontana {
23321b248f14SClaudio Fontana     double fptemp = floatx80_to_double(env, ST0);
23331b248f14SClaudio Fontana 
23341b248f14SClaudio Fontana     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
23351b248f14SClaudio Fontana         env->fpus |= 0x400;
23361b248f14SClaudio Fontana     } else {
23371b248f14SClaudio Fontana         ST0 = double_to_floatx80(env, cos(fptemp));
23381b248f14SClaudio Fontana         env->fpus &= ~0x400;  /* C2 <-- 0 */
23391b248f14SClaudio Fontana         /* the above code is for |arg| < 2**63 only */
23401b248f14SClaudio Fontana     }
23411b248f14SClaudio Fontana }
23421b248f14SClaudio Fontana 
23431b248f14SClaudio Fontana void helper_fxam_ST0(CPUX86State *env)
23441b248f14SClaudio Fontana {
23451b248f14SClaudio Fontana     CPU_LDoubleU temp;
23461b248f14SClaudio Fontana     int expdif;
23471b248f14SClaudio Fontana 
23481b248f14SClaudio Fontana     temp.d = ST0;
23491b248f14SClaudio Fontana 
23501b248f14SClaudio Fontana     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
23511b248f14SClaudio Fontana     if (SIGND(temp)) {
23521b248f14SClaudio Fontana         env->fpus |= 0x200; /* C1 <-- 1 */
23531b248f14SClaudio Fontana     }
23541b248f14SClaudio Fontana 
23551b248f14SClaudio Fontana     if (env->fptags[env->fpstt]) {
23561b248f14SClaudio Fontana         env->fpus |= 0x4100; /* Empty */
23571b248f14SClaudio Fontana         return;
23581b248f14SClaudio Fontana     }
23591b248f14SClaudio Fontana 
23601b248f14SClaudio Fontana     expdif = EXPD(temp);
23611b248f14SClaudio Fontana     if (expdif == MAXEXPD) {
23621b248f14SClaudio Fontana         if (MANTD(temp) == 0x8000000000000000ULL) {
23631b248f14SClaudio Fontana             env->fpus |= 0x500; /* Infinity */
23641b248f14SClaudio Fontana         } else if (MANTD(temp) & 0x8000000000000000ULL) {
23651b248f14SClaudio Fontana             env->fpus |= 0x100; /* NaN */
23661b248f14SClaudio Fontana         }
23671b248f14SClaudio Fontana     } else if (expdif == 0) {
23681b248f14SClaudio Fontana         if (MANTD(temp) == 0) {
23691b248f14SClaudio Fontana             env->fpus |=  0x4000; /* Zero */
23701b248f14SClaudio Fontana         } else {
23711b248f14SClaudio Fontana             env->fpus |= 0x4400; /* Denormal */
23721b248f14SClaudio Fontana         }
23731b248f14SClaudio Fontana     } else if (MANTD(temp) & 0x8000000000000000ULL) {
23741b248f14SClaudio Fontana         env->fpus |= 0x400;
23751b248f14SClaudio Fontana     }
23761b248f14SClaudio Fontana }
23771b248f14SClaudio Fontana 
2378505e2ef7SRichard Henderson static void do_fstenv(X86Access *ac, target_ulong ptr, int data32)
23791b248f14SClaudio Fontana {
2380505e2ef7SRichard Henderson     CPUX86State *env = ac->env;
23811b248f14SClaudio Fontana     int fpus, fptag, exp, i;
23821b248f14SClaudio Fontana     uint64_t mant;
23831b248f14SClaudio Fontana     CPU_LDoubleU tmp;
23841b248f14SClaudio Fontana 
23851b248f14SClaudio Fontana     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
23861b248f14SClaudio Fontana     fptag = 0;
23871b248f14SClaudio Fontana     for (i = 7; i >= 0; i--) {
23881b248f14SClaudio Fontana         fptag <<= 2;
23891b248f14SClaudio Fontana         if (env->fptags[i]) {
23901b248f14SClaudio Fontana             fptag |= 3;
23911b248f14SClaudio Fontana         } else {
23921b248f14SClaudio Fontana             tmp.d = env->fpregs[i].d;
23931b248f14SClaudio Fontana             exp = EXPD(tmp);
23941b248f14SClaudio Fontana             mant = MANTD(tmp);
23951b248f14SClaudio Fontana             if (exp == 0 && mant == 0) {
23961b248f14SClaudio Fontana                 /* zero */
23971b248f14SClaudio Fontana                 fptag |= 1;
23981b248f14SClaudio Fontana             } else if (exp == 0 || exp == MAXEXPD
23991b248f14SClaudio Fontana                        || (mant & (1LL << 63)) == 0) {
24001b248f14SClaudio Fontana                 /* NaNs, infinity, denormal */
24011b248f14SClaudio Fontana                 fptag |= 2;
24021b248f14SClaudio Fontana             }
24031b248f14SClaudio Fontana         }
24041b248f14SClaudio Fontana     }
24051b248f14SClaudio Fontana     if (data32) {
24061b248f14SClaudio Fontana         /* 32 bit */
2407505e2ef7SRichard Henderson         access_stl(ac, ptr, env->fpuc);
2408505e2ef7SRichard Henderson         access_stl(ac, ptr + 4, fpus);
2409505e2ef7SRichard Henderson         access_stl(ac, ptr + 8, fptag);
2410505e2ef7SRichard Henderson         access_stl(ac, ptr + 12, env->fpip); /* fpip */
2411505e2ef7SRichard Henderson         access_stl(ac, ptr + 16, env->fpcs); /* fpcs */
2412505e2ef7SRichard Henderson         access_stl(ac, ptr + 20, env->fpdp); /* fpoo */
2413505e2ef7SRichard Henderson         access_stl(ac, ptr + 24, env->fpds); /* fpos */
24141b248f14SClaudio Fontana     } else {
24151b248f14SClaudio Fontana         /* 16 bit */
2416505e2ef7SRichard Henderson         access_stw(ac, ptr, env->fpuc);
2417505e2ef7SRichard Henderson         access_stw(ac, ptr + 2, fpus);
2418505e2ef7SRichard Henderson         access_stw(ac, ptr + 4, fptag);
2419505e2ef7SRichard Henderson         access_stw(ac, ptr + 6, env->fpip);
2420505e2ef7SRichard Henderson         access_stw(ac, ptr + 8, env->fpcs);
2421505e2ef7SRichard Henderson         access_stw(ac, ptr + 10, env->fpdp);
2422505e2ef7SRichard Henderson         access_stw(ac, ptr + 12, env->fpds);
24231b248f14SClaudio Fontana     }
24241b248f14SClaudio Fontana }
24251b248f14SClaudio Fontana 
24261b248f14SClaudio Fontana void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
24271b248f14SClaudio Fontana {
2428505e2ef7SRichard Henderson     X86Access ac;
2429505e2ef7SRichard Henderson 
2430505e2ef7SRichard Henderson     access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
2431505e2ef7SRichard Henderson     do_fstenv(&ac, ptr, data32);
24321b248f14SClaudio Fontana }
24331b248f14SClaudio Fontana 
24341b248f14SClaudio Fontana static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
24351b248f14SClaudio Fontana {
24361b248f14SClaudio Fontana     env->fpstt = (fpus >> 11) & 7;
24371b248f14SClaudio Fontana     env->fpus = fpus & ~0x3800 & ~FPUS_B;
24381b248f14SClaudio Fontana     env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
24391b248f14SClaudio Fontana #if !defined(CONFIG_USER_ONLY)
24401b248f14SClaudio Fontana     if (!(env->fpus & FPUS_SE)) {
24411b248f14SClaudio Fontana         /*
24421b248f14SClaudio Fontana          * Here the processor deasserts FERR#; in response, the chipset deasserts
24431b248f14SClaudio Fontana          * IGNNE#.
24441b248f14SClaudio Fontana          */
24451b248f14SClaudio Fontana         cpu_clear_ignne();
24461b248f14SClaudio Fontana     }
24471b248f14SClaudio Fontana #endif
24481b248f14SClaudio Fontana }
24491b248f14SClaudio Fontana 
2450bc13c2ddSRichard Henderson static void do_fldenv(X86Access *ac, target_ulong ptr, int data32)
24511b248f14SClaudio Fontana {
24521b248f14SClaudio Fontana     int i, fpus, fptag;
2453bc13c2ddSRichard Henderson     CPUX86State *env = ac->env;
24541b248f14SClaudio Fontana 
2455bc13c2ddSRichard Henderson     cpu_set_fpuc(env, access_ldw(ac, ptr));
2456bc13c2ddSRichard Henderson     fpus = access_ldw(ac, ptr + (2 << data32));
2457bc13c2ddSRichard Henderson     fptag = access_ldw(ac, ptr + (4 << data32));
2458bc13c2ddSRichard Henderson 
24591b248f14SClaudio Fontana     cpu_set_fpus(env, fpus);
24601b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
24611b248f14SClaudio Fontana         env->fptags[i] = ((fptag & 3) == 3);
24621b248f14SClaudio Fontana         fptag >>= 2;
24631b248f14SClaudio Fontana     }
24641b248f14SClaudio Fontana }
24651b248f14SClaudio Fontana 
24661b248f14SClaudio Fontana void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
24671b248f14SClaudio Fontana {
2468bc13c2ddSRichard Henderson     X86Access ac;
2469bc13c2ddSRichard Henderson 
2470bc13c2ddSRichard Henderson     access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
2471bc13c2ddSRichard Henderson     do_fldenv(&ac, ptr, data32);
24721b248f14SClaudio Fontana }
24731b248f14SClaudio Fontana 
247494f60f8fSRichard Henderson static void do_fsave(X86Access *ac, target_ulong ptr, int data32)
24751b248f14SClaudio Fontana {
247694f60f8fSRichard Henderson     CPUX86State *env = ac->env;
24771b248f14SClaudio Fontana 
247894f60f8fSRichard Henderson     do_fstenv(ac, ptr, data32);
247994f60f8fSRichard Henderson     ptr += 14 << data32;
24801b248f14SClaudio Fontana 
248194f60f8fSRichard Henderson     for (int i = 0; i < 8; i++) {
248294f60f8fSRichard Henderson         floatx80 tmp = ST(i);
248394f60f8fSRichard Henderson         do_fstt(ac, ptr, tmp);
24841b248f14SClaudio Fontana         ptr += 10;
24851b248f14SClaudio Fontana     }
24861b248f14SClaudio Fontana 
2487bbdda9b7SRichard Henderson     do_fninit(env);
24881b248f14SClaudio Fontana }
24891b248f14SClaudio Fontana 
24900ac2b197SRichard Henderson void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
24910ac2b197SRichard Henderson {
249294f60f8fSRichard Henderson     int size = (14 << data32) + 80;
249394f60f8fSRichard Henderson     X86Access ac;
249494f60f8fSRichard Henderson 
249594f60f8fSRichard Henderson     access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC());
249694f60f8fSRichard Henderson     do_fsave(&ac, ptr, data32);
24970ac2b197SRichard Henderson }
24980ac2b197SRichard Henderson 
249994f60f8fSRichard Henderson static void do_frstor(X86Access *ac, target_ulong ptr, int data32)
25001b248f14SClaudio Fontana {
250194f60f8fSRichard Henderson     CPUX86State *env = ac->env;
25021b248f14SClaudio Fontana 
250394f60f8fSRichard Henderson     do_fldenv(ac, ptr, data32);
250494f60f8fSRichard Henderson     ptr += 14 << data32;
25051b248f14SClaudio Fontana 
250694f60f8fSRichard Henderson     for (int i = 0; i < 8; i++) {
250794f60f8fSRichard Henderson         floatx80 tmp = do_fldt(ac, ptr);
25081b248f14SClaudio Fontana         ST(i) = tmp;
25091b248f14SClaudio Fontana         ptr += 10;
25101b248f14SClaudio Fontana     }
25111b248f14SClaudio Fontana }
25121b248f14SClaudio Fontana 
25130ac2b197SRichard Henderson void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
25140ac2b197SRichard Henderson {
251594f60f8fSRichard Henderson     int size = (14 << data32) + 80;
251694f60f8fSRichard Henderson     X86Access ac;
251794f60f8fSRichard Henderson 
251894f60f8fSRichard Henderson     access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC());
251994f60f8fSRichard Henderson     do_frstor(&ac, ptr, data32);
25200ac2b197SRichard Henderson }
25210ac2b197SRichard Henderson 
25221b248f14SClaudio Fontana #define XO(X)  offsetof(X86XSaveArea, X)
25231b248f14SClaudio Fontana 
2524b7e6d3adSRichard Henderson static void do_xsave_fpu(X86Access *ac, target_ulong ptr)
25251b248f14SClaudio Fontana {
2526b7e6d3adSRichard Henderson     CPUX86State *env = ac->env;
25271b248f14SClaudio Fontana     int fpus, fptag, i;
25281b248f14SClaudio Fontana     target_ulong addr;
25291b248f14SClaudio Fontana 
25301b248f14SClaudio Fontana     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
25311b248f14SClaudio Fontana     fptag = 0;
25321b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
25331b248f14SClaudio Fontana         fptag |= (env->fptags[i] << i);
25341b248f14SClaudio Fontana     }
25351b248f14SClaudio Fontana 
2536b7e6d3adSRichard Henderson     access_stw(ac, ptr + XO(legacy.fcw), env->fpuc);
2537b7e6d3adSRichard Henderson     access_stw(ac, ptr + XO(legacy.fsw), fpus);
2538b7e6d3adSRichard Henderson     access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff);
25391b248f14SClaudio Fontana 
25401b248f14SClaudio Fontana     /* In 32-bit mode this is eip, sel, dp, sel.
25411b248f14SClaudio Fontana        In 64-bit mode this is rip, rdp.
25421b248f14SClaudio Fontana        But in either case we don't write actual data, just zeros.  */
2543b7e6d3adSRichard Henderson     access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */
2544b7e6d3adSRichard Henderson     access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */
25451b248f14SClaudio Fontana 
25461b248f14SClaudio Fontana     addr = ptr + XO(legacy.fpregs);
2547d3e8b648SRichard Henderson 
25481b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
25491b248f14SClaudio Fontana         floatx80 tmp = ST(i);
2550b7e6d3adSRichard Henderson         do_fstt(ac, addr, tmp);
25511b248f14SClaudio Fontana         addr += 16;
25521b248f14SClaudio Fontana     }
25531b248f14SClaudio Fontana }
25541b248f14SClaudio Fontana 
2555b7e6d3adSRichard Henderson static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr)
25561b248f14SClaudio Fontana {
2557b7e6d3adSRichard Henderson     CPUX86State *env = ac->env;
2558b7e6d3adSRichard Henderson 
25591b248f14SClaudio Fontana     update_mxcsr_from_sse_status(env);
2560b7e6d3adSRichard Henderson     access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr);
2561b7e6d3adSRichard Henderson     access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff);
25621b248f14SClaudio Fontana }
25631b248f14SClaudio Fontana 
2564b7e6d3adSRichard Henderson static void do_xsave_sse(X86Access *ac, target_ulong ptr)
25651b248f14SClaudio Fontana {
2566b7e6d3adSRichard Henderson     CPUX86State *env = ac->env;
25671b248f14SClaudio Fontana     int i, nb_xmm_regs;
25681b248f14SClaudio Fontana     target_ulong addr;
25691b248f14SClaudio Fontana 
25701b248f14SClaudio Fontana     if (env->hflags & HF_CS64_MASK) {
25711b248f14SClaudio Fontana         nb_xmm_regs = 16;
25721b248f14SClaudio Fontana     } else {
25731b248f14SClaudio Fontana         nb_xmm_regs = 8;
25741b248f14SClaudio Fontana     }
25751b248f14SClaudio Fontana 
25761b248f14SClaudio Fontana     addr = ptr + XO(legacy.xmm_regs);
25771b248f14SClaudio Fontana     for (i = 0; i < nb_xmm_regs; i++) {
2578b7e6d3adSRichard Henderson         access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0));
2579b7e6d3adSRichard Henderson         access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1));
25801b248f14SClaudio Fontana         addr += 16;
25811b248f14SClaudio Fontana     }
25821b248f14SClaudio Fontana }
25831b248f14SClaudio Fontana 
25846b1b736bSRichard Henderson static void do_xsave_ymmh(X86Access *ac, target_ulong ptr)
258589254431SPaolo Bonzini {
25866b1b736bSRichard Henderson     CPUX86State *env = ac->env;
258789254431SPaolo Bonzini     int i, nb_xmm_regs;
258889254431SPaolo Bonzini 
258989254431SPaolo Bonzini     if (env->hflags & HF_CS64_MASK) {
259089254431SPaolo Bonzini         nb_xmm_regs = 16;
259189254431SPaolo Bonzini     } else {
259289254431SPaolo Bonzini         nb_xmm_regs = 8;
259389254431SPaolo Bonzini     }
259489254431SPaolo Bonzini 
259589254431SPaolo Bonzini     for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
25966b1b736bSRichard Henderson         access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2));
25976b1b736bSRichard Henderson         access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3));
259889254431SPaolo Bonzini     }
259989254431SPaolo Bonzini }
260089254431SPaolo Bonzini 
26016b1b736bSRichard Henderson static void do_xsave_bndregs(X86Access *ac, target_ulong ptr)
26021b248f14SClaudio Fontana {
26036b1b736bSRichard Henderson     CPUX86State *env = ac->env;
26041b248f14SClaudio Fontana     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
26051b248f14SClaudio Fontana     int i;
26061b248f14SClaudio Fontana 
26071b248f14SClaudio Fontana     for (i = 0; i < 4; i++, addr += 16) {
26086b1b736bSRichard Henderson         access_stq(ac, addr, env->bnd_regs[i].lb);
26096b1b736bSRichard Henderson         access_stq(ac, addr + 8, env->bnd_regs[i].ub);
26101b248f14SClaudio Fontana     }
26111b248f14SClaudio Fontana }
26121b248f14SClaudio Fontana 
26136b1b736bSRichard Henderson static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr)
26141b248f14SClaudio Fontana {
26156b1b736bSRichard Henderson     CPUX86State *env = ac->env;
26166b1b736bSRichard Henderson 
26176b1b736bSRichard Henderson     access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
26186b1b736bSRichard Henderson                env->bndcs_regs.cfgu);
26196b1b736bSRichard Henderson     access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
26206b1b736bSRichard Henderson                env->bndcs_regs.sts);
26211b248f14SClaudio Fontana }
26221b248f14SClaudio Fontana 
26236b1b736bSRichard Henderson static void do_xsave_pkru(X86Access *ac, target_ulong ptr)
26241b248f14SClaudio Fontana {
26256b1b736bSRichard Henderson     access_stq(ac, ptr, ac->env->pkru);
26261b248f14SClaudio Fontana }
26271b248f14SClaudio Fontana 
26286d030aabSRichard Henderson static void do_fxsave(X86Access *ac, target_ulong ptr)
26291b248f14SClaudio Fontana {
26306d030aabSRichard Henderson     CPUX86State *env = ac->env;
26311b248f14SClaudio Fontana 
26326d030aabSRichard Henderson     do_xsave_fpu(ac, ptr);
26331b248f14SClaudio Fontana     if (env->cr[4] & CR4_OSFXSR_MASK) {
26346d030aabSRichard Henderson         do_xsave_mxcsr(ac, ptr);
26351b248f14SClaudio Fontana         /* Fast FXSAVE leaves out the XMM registers */
26361b248f14SClaudio Fontana         if (!(env->efer & MSR_EFER_FFXSR)
26371b248f14SClaudio Fontana             || (env->hflags & HF_CPL_MASK)
26381b248f14SClaudio Fontana             || !(env->hflags & HF_LMA_MASK)) {
26396d030aabSRichard Henderson             do_xsave_sse(ac, ptr);
26401b248f14SClaudio Fontana         }
26411b248f14SClaudio Fontana     }
26421b248f14SClaudio Fontana }
26431b248f14SClaudio Fontana 
26440ac2b197SRichard Henderson void helper_fxsave(CPUX86State *env, target_ulong ptr)
26450ac2b197SRichard Henderson {
26466d030aabSRichard Henderson     uintptr_t ra = GETPC();
2647b7e6d3adSRichard Henderson     X86Access ac;
2648b7e6d3adSRichard Henderson 
26491b248f14SClaudio Fontana     /* The operand must be 16 byte aligned */
26501b248f14SClaudio Fontana     if (ptr & 0xf) {
26511b248f14SClaudio Fontana         raise_exception_ra(env, EXCP0D_GPF, ra);
26521b248f14SClaudio Fontana     }
26531b248f14SClaudio Fontana 
2654b7e6d3adSRichard Henderson     access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
2655b7e6d3adSRichard Henderson                    MMU_DATA_STORE, ra);
26566d030aabSRichard Henderson     do_fxsave(&ac, ptr);
26570ac2b197SRichard Henderson }
26580ac2b197SRichard Henderson 
26591b248f14SClaudio Fontana static uint64_t get_xinuse(CPUX86State *env)
26601b248f14SClaudio Fontana {
26611b248f14SClaudio Fontana     uint64_t inuse = -1;
26621b248f14SClaudio Fontana 
26631b248f14SClaudio Fontana     /* For the most part, we don't track XINUSE.  We could calculate it
26641b248f14SClaudio Fontana        here for all components, but it's probably less work to simply
26651b248f14SClaudio Fontana        indicate in use.  That said, the state of BNDREGS is important
26661b248f14SClaudio Fontana        enough to track in HFLAGS, so we might as well use that here.  */
26671b248f14SClaudio Fontana     if ((env->hflags & HF_MPX_IU_MASK) == 0) {
26681b248f14SClaudio Fontana        inuse &= ~XSTATE_BNDREGS_MASK;
26691b248f14SClaudio Fontana     }
26701b248f14SClaudio Fontana     return inuse;
26711b248f14SClaudio Fontana }
26721b248f14SClaudio Fontana 
2673c6e6d150SRichard Henderson static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm,
2674c6e6d150SRichard Henderson                             uint64_t inuse, uint64_t opt)
26751b248f14SClaudio Fontana {
26761b248f14SClaudio Fontana     uint64_t old_bv, new_bv;
26771b248f14SClaudio Fontana 
26781b248f14SClaudio Fontana     if (opt & XSTATE_FP_MASK) {
2679c6e6d150SRichard Henderson         do_xsave_fpu(ac, ptr);
26801b248f14SClaudio Fontana     }
26811b248f14SClaudio Fontana     if (rfbm & XSTATE_SSE_MASK) {
26821b248f14SClaudio Fontana         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
2683c6e6d150SRichard Henderson         do_xsave_mxcsr(ac, ptr);
26841b248f14SClaudio Fontana     }
26851b248f14SClaudio Fontana     if (opt & XSTATE_SSE_MASK) {
2686c6e6d150SRichard Henderson         do_xsave_sse(ac, ptr);
26871b248f14SClaudio Fontana     }
268889254431SPaolo Bonzini     if (opt & XSTATE_YMM_MASK) {
2689c6e6d150SRichard Henderson         do_xsave_ymmh(ac, ptr + XO(avx_state));
269089254431SPaolo Bonzini     }
26911b248f14SClaudio Fontana     if (opt & XSTATE_BNDREGS_MASK) {
2692c6e6d150SRichard Henderson         do_xsave_bndregs(ac, ptr + XO(bndreg_state));
26931b248f14SClaudio Fontana     }
26941b248f14SClaudio Fontana     if (opt & XSTATE_BNDCSR_MASK) {
2695c6e6d150SRichard Henderson         do_xsave_bndcsr(ac, ptr + XO(bndcsr_state));
26961b248f14SClaudio Fontana     }
26971b248f14SClaudio Fontana     if (opt & XSTATE_PKRU_MASK) {
2698c6e6d150SRichard Henderson         do_xsave_pkru(ac, ptr + XO(pkru_state));
26991b248f14SClaudio Fontana     }
27001b248f14SClaudio Fontana 
27011b248f14SClaudio Fontana     /* Update the XSTATE_BV field.  */
2702c6e6d150SRichard Henderson     old_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
27031b248f14SClaudio Fontana     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2704c6e6d150SRichard Henderson     access_stq(ac, ptr + XO(header.xstate_bv), new_bv);
27051b248f14SClaudio Fontana }
27061b248f14SClaudio Fontana 
2707a8f68831SRichard Henderson static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2708a8f68831SRichard Henderson {
27091b248f14SClaudio Fontana     /* The OS must have enabled XSAVE.  */
27101b248f14SClaudio Fontana     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
27111b248f14SClaudio Fontana         raise_exception_ra(env, EXCP06_ILLOP, ra);
27121b248f14SClaudio Fontana     }
27131b248f14SClaudio Fontana 
27141b248f14SClaudio Fontana     /* The operand must be 64 byte aligned.  */
27151b248f14SClaudio Fontana     if (ptr & 63) {
27161b248f14SClaudio Fontana         raise_exception_ra(env, EXCP0D_GPF, ra);
27171b248f14SClaudio Fontana     }
2718a8f68831SRichard Henderson }
2719a8f68831SRichard Henderson 
2720c6e6d150SRichard Henderson static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2721c6e6d150SRichard Henderson                      uint64_t inuse, uint64_t opt, uintptr_t ra)
27221b248f14SClaudio Fontana {
2723c6e6d150SRichard Henderson     X86Access ac;
2724c6e6d150SRichard Henderson     unsigned size;
2725a8f68831SRichard Henderson 
2726a8f68831SRichard Henderson     do_xsave_chk(env, ptr, ra);
27271b248f14SClaudio Fontana 
27281b248f14SClaudio Fontana     /* Never save anything not enabled by XCR0.  */
27291b248f14SClaudio Fontana     rfbm &= env->xcr0;
27301b248f14SClaudio Fontana     opt &= rfbm;
2731c6e6d150SRichard Henderson     size = xsave_area_size(opt, false);
27321b248f14SClaudio Fontana 
2733c6e6d150SRichard Henderson     access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra);
2734c6e6d150SRichard Henderson     do_xsave_access(&ac, ptr, rfbm, inuse, opt);
27351b248f14SClaudio Fontana }
27361b248f14SClaudio Fontana 
27371b248f14SClaudio Fontana void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
27381b248f14SClaudio Fontana {
2739c6e6d150SRichard Henderson     do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC());
27401b248f14SClaudio Fontana }
27411b248f14SClaudio Fontana 
27421b248f14SClaudio Fontana void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
27431b248f14SClaudio Fontana {
27441b248f14SClaudio Fontana     uint64_t inuse = get_xinuse(env);
27451b248f14SClaudio Fontana     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
27461b248f14SClaudio Fontana }
27471b248f14SClaudio Fontana 
2748e41d2eafSRichard Henderson static void do_xrstor_fpu(X86Access *ac, target_ulong ptr)
27491b248f14SClaudio Fontana {
2750e41d2eafSRichard Henderson     CPUX86State *env = ac->env;
27511b248f14SClaudio Fontana     int i, fpuc, fpus, fptag;
27521b248f14SClaudio Fontana     target_ulong addr;
27531b248f14SClaudio Fontana 
2754e41d2eafSRichard Henderson     fpuc = access_ldw(ac, ptr + XO(legacy.fcw));
2755e41d2eafSRichard Henderson     fpus = access_ldw(ac, ptr + XO(legacy.fsw));
2756e41d2eafSRichard Henderson     fptag = access_ldw(ac, ptr + XO(legacy.ftw));
27571b248f14SClaudio Fontana     cpu_set_fpuc(env, fpuc);
27581b248f14SClaudio Fontana     cpu_set_fpus(env, fpus);
2759e41d2eafSRichard Henderson 
27601b248f14SClaudio Fontana     fptag ^= 0xff;
27611b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
27621b248f14SClaudio Fontana         env->fptags[i] = ((fptag >> i) & 1);
27631b248f14SClaudio Fontana     }
27641b248f14SClaudio Fontana 
27651b248f14SClaudio Fontana     addr = ptr + XO(legacy.fpregs);
2766d3e8b648SRichard Henderson 
27671b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
2768e41d2eafSRichard Henderson         floatx80 tmp = do_fldt(ac, addr);
27691b248f14SClaudio Fontana         ST(i) = tmp;
27701b248f14SClaudio Fontana         addr += 16;
27711b248f14SClaudio Fontana     }
27721b248f14SClaudio Fontana }
27731b248f14SClaudio Fontana 
2774e41d2eafSRichard Henderson static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr)
27751b248f14SClaudio Fontana {
2776e41d2eafSRichard Henderson     CPUX86State *env = ac->env;
2777e41d2eafSRichard Henderson     cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr)));
27781b248f14SClaudio Fontana }
27791b248f14SClaudio Fontana 
2780e41d2eafSRichard Henderson static void do_xrstor_sse(X86Access *ac, target_ulong ptr)
27811b248f14SClaudio Fontana {
2782e41d2eafSRichard Henderson     CPUX86State *env = ac->env;
27831b248f14SClaudio Fontana     int i, nb_xmm_regs;
27841b248f14SClaudio Fontana     target_ulong addr;
27851b248f14SClaudio Fontana 
27861b248f14SClaudio Fontana     if (env->hflags & HF_CS64_MASK) {
27871b248f14SClaudio Fontana         nb_xmm_regs = 16;
27881b248f14SClaudio Fontana     } else {
27891b248f14SClaudio Fontana         nb_xmm_regs = 8;
27901b248f14SClaudio Fontana     }
27911b248f14SClaudio Fontana 
27921b248f14SClaudio Fontana     addr = ptr + XO(legacy.xmm_regs);
27931b248f14SClaudio Fontana     for (i = 0; i < nb_xmm_regs; i++) {
2794e41d2eafSRichard Henderson         env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr);
2795e41d2eafSRichard Henderson         env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8);
27961b248f14SClaudio Fontana         addr += 16;
27971b248f14SClaudio Fontana     }
27981b248f14SClaudio Fontana }
27991b248f14SClaudio Fontana 
280089254431SPaolo Bonzini static void do_clear_sse(CPUX86State *env)
280189254431SPaolo Bonzini {
280289254431SPaolo Bonzini     int i, nb_xmm_regs;
280389254431SPaolo Bonzini 
280489254431SPaolo Bonzini     if (env->hflags & HF_CS64_MASK) {
280589254431SPaolo Bonzini         nb_xmm_regs = 16;
280689254431SPaolo Bonzini     } else {
280789254431SPaolo Bonzini         nb_xmm_regs = 8;
280889254431SPaolo Bonzini     }
280989254431SPaolo Bonzini 
281089254431SPaolo Bonzini     for (i = 0; i < nb_xmm_regs; i++) {
281189254431SPaolo Bonzini         env->xmm_regs[i].ZMM_Q(0) = 0;
281289254431SPaolo Bonzini         env->xmm_regs[i].ZMM_Q(1) = 0;
281389254431SPaolo Bonzini     }
281489254431SPaolo Bonzini }
281589254431SPaolo Bonzini 
281658955a96SRichard Henderson static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr)
281789254431SPaolo Bonzini {
281858955a96SRichard Henderson     CPUX86State *env = ac->env;
281989254431SPaolo Bonzini     int i, nb_xmm_regs;
282089254431SPaolo Bonzini 
282189254431SPaolo Bonzini     if (env->hflags & HF_CS64_MASK) {
282289254431SPaolo Bonzini         nb_xmm_regs = 16;
282389254431SPaolo Bonzini     } else {
282489254431SPaolo Bonzini         nb_xmm_regs = 8;
282589254431SPaolo Bonzini     }
282689254431SPaolo Bonzini 
282789254431SPaolo Bonzini     for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
282858955a96SRichard Henderson         env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr);
282958955a96SRichard Henderson         env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8);
283089254431SPaolo Bonzini     }
283189254431SPaolo Bonzini }
283289254431SPaolo Bonzini 
283389254431SPaolo Bonzini static void do_clear_ymmh(CPUX86State *env)
283489254431SPaolo Bonzini {
283589254431SPaolo Bonzini     int i, nb_xmm_regs;
283689254431SPaolo Bonzini 
283789254431SPaolo Bonzini     if (env->hflags & HF_CS64_MASK) {
283889254431SPaolo Bonzini         nb_xmm_regs = 16;
283989254431SPaolo Bonzini     } else {
284089254431SPaolo Bonzini         nb_xmm_regs = 8;
284189254431SPaolo Bonzini     }
284289254431SPaolo Bonzini 
284389254431SPaolo Bonzini     for (i = 0; i < nb_xmm_regs; i++) {
284489254431SPaolo Bonzini         env->xmm_regs[i].ZMM_Q(2) = 0;
284589254431SPaolo Bonzini         env->xmm_regs[i].ZMM_Q(3) = 0;
284689254431SPaolo Bonzini     }
284789254431SPaolo Bonzini }
284889254431SPaolo Bonzini 
284958955a96SRichard Henderson static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr)
28501b248f14SClaudio Fontana {
285158955a96SRichard Henderson     CPUX86State *env = ac->env;
28521b248f14SClaudio Fontana     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
28531b248f14SClaudio Fontana     int i;
28541b248f14SClaudio Fontana 
28551b248f14SClaudio Fontana     for (i = 0; i < 4; i++, addr += 16) {
285658955a96SRichard Henderson         env->bnd_regs[i].lb = access_ldq(ac, addr);
285758955a96SRichard Henderson         env->bnd_regs[i].ub = access_ldq(ac, addr + 8);
28581b248f14SClaudio Fontana     }
28591b248f14SClaudio Fontana }
28601b248f14SClaudio Fontana 
286158955a96SRichard Henderson static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr)
28621b248f14SClaudio Fontana {
286358955a96SRichard Henderson     CPUX86State *env = ac->env;
286458955a96SRichard Henderson 
28651b248f14SClaudio Fontana     /* FIXME: Extend highest implemented bit of linear address.  */
28661b248f14SClaudio Fontana     env->bndcs_regs.cfgu
286758955a96SRichard Henderson         = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu));
28681b248f14SClaudio Fontana     env->bndcs_regs.sts
286958955a96SRichard Henderson         = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts));
28701b248f14SClaudio Fontana }
28711b248f14SClaudio Fontana 
287258955a96SRichard Henderson static void do_xrstor_pkru(X86Access *ac, target_ulong ptr)
28731b248f14SClaudio Fontana {
287458955a96SRichard Henderson     ac->env->pkru = access_ldq(ac, ptr);
28751b248f14SClaudio Fontana }
28761b248f14SClaudio Fontana 
28776d030aabSRichard Henderson static void do_fxrstor(X86Access *ac, target_ulong ptr)
28781b248f14SClaudio Fontana {
28796d030aabSRichard Henderson     CPUX86State *env = ac->env;
28801b248f14SClaudio Fontana 
28816d030aabSRichard Henderson     do_xrstor_fpu(ac, ptr);
28821b248f14SClaudio Fontana     if (env->cr[4] & CR4_OSFXSR_MASK) {
28836d030aabSRichard Henderson         do_xrstor_mxcsr(ac, ptr);
28841b248f14SClaudio Fontana         /* Fast FXRSTOR leaves out the XMM registers */
28851b248f14SClaudio Fontana         if (!(env->efer & MSR_EFER_FFXSR)
28861b248f14SClaudio Fontana             || (env->hflags & HF_CPL_MASK)
28871b248f14SClaudio Fontana             || !(env->hflags & HF_LMA_MASK)) {
28886d030aabSRichard Henderson             do_xrstor_sse(ac, ptr);
28891b248f14SClaudio Fontana         }
28901b248f14SClaudio Fontana     }
28911b248f14SClaudio Fontana }
28921b248f14SClaudio Fontana 
28930ac2b197SRichard Henderson void helper_fxrstor(CPUX86State *env, target_ulong ptr)
28940ac2b197SRichard Henderson {
28956d030aabSRichard Henderson     uintptr_t ra = GETPC();
2896e41d2eafSRichard Henderson     X86Access ac;
2897e41d2eafSRichard Henderson 
28981b248f14SClaudio Fontana     /* The operand must be 16 byte aligned */
28991b248f14SClaudio Fontana     if (ptr & 0xf) {
29001b248f14SClaudio Fontana         raise_exception_ra(env, EXCP0D_GPF, ra);
29010ac2b197SRichard Henderson     }
29020ac2b197SRichard Henderson 
2903e41d2eafSRichard Henderson     access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
2904e41d2eafSRichard Henderson                    MMU_DATA_LOAD, ra);
29056d030aabSRichard Henderson     do_fxrstor(&ac, ptr);
29061b248f14SClaudio Fontana }
29071b248f14SClaudio Fontana 
2908d5dc3a92SRichard Henderson static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv,
2909d5dc3a92SRichard Henderson                                 target_ulong ptr)
29101b248f14SClaudio Fontana {
29111b248f14SClaudio Fontana     uint64_t xstate_bv, xcomp_bv, reserve0;
29121b248f14SClaudio Fontana 
2913d5dc3a92SRichard Henderson     xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
2914d5dc3a92SRichard Henderson     xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv));
2915d5dc3a92SRichard Henderson     reserve0 = access_ldq(ac, ptr + XO(header.reserve0));
2916d5dc3a92SRichard Henderson     *pxsbv = xstate_bv;
29171b248f14SClaudio Fontana 
2918d5dc3a92SRichard Henderson     /*
2919d5dc3a92SRichard Henderson      * XCOMP_BV bit 63 indicates compact form, which we do not support,
2920d5dc3a92SRichard Henderson      * and thus must raise #GP.  That leaves us in standard form.
2921d5dc3a92SRichard Henderson      * In standard form, bytes 23:8 must be zero -- which is both
2922d5dc3a92SRichard Henderson      * XCOMP_BV and the following 64-bit field.
2923d5dc3a92SRichard Henderson      */
2924d5dc3a92SRichard Henderson     if (xcomp_bv || reserve0) {
2925d5dc3a92SRichard Henderson         return false;
29261b248f14SClaudio Fontana     }
29271b248f14SClaudio Fontana 
29281b248f14SClaudio Fontana     /* The XSTATE_BV field must not set bits not present in XCR0.  */
2929d5dc3a92SRichard Henderson     return (xstate_bv & ~ac->env->xcr0) == 0;
29301b248f14SClaudio Fontana }
29311b248f14SClaudio Fontana 
2932d5dc3a92SRichard Henderson static void do_xrstor(X86Access *ac, target_ulong ptr,
2933d5dc3a92SRichard Henderson                       uint64_t rfbm, uint64_t xstate_bv)
2934d5dc3a92SRichard Henderson {
2935d5dc3a92SRichard Henderson     CPUX86State *env = ac->env;
29361b248f14SClaudio Fontana 
29371b248f14SClaudio Fontana     if (rfbm & XSTATE_FP_MASK) {
29381b248f14SClaudio Fontana         if (xstate_bv & XSTATE_FP_MASK) {
2939d5dc3a92SRichard Henderson             do_xrstor_fpu(ac, ptr);
29401b248f14SClaudio Fontana         } else {
2941bbdda9b7SRichard Henderson             do_fninit(env);
29421b248f14SClaudio Fontana             memset(env->fpregs, 0, sizeof(env->fpregs));
29431b248f14SClaudio Fontana         }
29441b248f14SClaudio Fontana     }
29451b248f14SClaudio Fontana     if (rfbm & XSTATE_SSE_MASK) {
29461b248f14SClaudio Fontana         /* Note that the standard form of XRSTOR loads MXCSR from memory
29471b248f14SClaudio Fontana            whether or not the XSTATE_BV bit is set.  */
2948d5dc3a92SRichard Henderson         do_xrstor_mxcsr(ac, ptr);
29491b248f14SClaudio Fontana         if (xstate_bv & XSTATE_SSE_MASK) {
2950d5dc3a92SRichard Henderson             do_xrstor_sse(ac, ptr);
29511b248f14SClaudio Fontana         } else {
295289254431SPaolo Bonzini             do_clear_sse(env);
295389254431SPaolo Bonzini         }
295489254431SPaolo Bonzini     }
295589254431SPaolo Bonzini     if (rfbm & XSTATE_YMM_MASK) {
295689254431SPaolo Bonzini         if (xstate_bv & XSTATE_YMM_MASK) {
2957d5dc3a92SRichard Henderson             do_xrstor_ymmh(ac, ptr + XO(avx_state));
295889254431SPaolo Bonzini         } else {
295989254431SPaolo Bonzini             do_clear_ymmh(env);
29601b248f14SClaudio Fontana         }
29611b248f14SClaudio Fontana     }
29621b248f14SClaudio Fontana     if (rfbm & XSTATE_BNDREGS_MASK) {
29631b248f14SClaudio Fontana         if (xstate_bv & XSTATE_BNDREGS_MASK) {
2964d5dc3a92SRichard Henderson             do_xrstor_bndregs(ac, ptr + XO(bndreg_state));
29651b248f14SClaudio Fontana             env->hflags |= HF_MPX_IU_MASK;
29661b248f14SClaudio Fontana         } else {
29671b248f14SClaudio Fontana             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
29681b248f14SClaudio Fontana             env->hflags &= ~HF_MPX_IU_MASK;
29691b248f14SClaudio Fontana         }
29701b248f14SClaudio Fontana     }
29711b248f14SClaudio Fontana     if (rfbm & XSTATE_BNDCSR_MASK) {
29721b248f14SClaudio Fontana         if (xstate_bv & XSTATE_BNDCSR_MASK) {
2973d5dc3a92SRichard Henderson             do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state));
29741b248f14SClaudio Fontana         } else {
29751b248f14SClaudio Fontana             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
29761b248f14SClaudio Fontana         }
29771b248f14SClaudio Fontana         cpu_sync_bndcs_hflags(env);
29781b248f14SClaudio Fontana     }
29791b248f14SClaudio Fontana     if (rfbm & XSTATE_PKRU_MASK) {
29801b248f14SClaudio Fontana         uint64_t old_pkru = env->pkru;
29811b248f14SClaudio Fontana         if (xstate_bv & XSTATE_PKRU_MASK) {
2982d5dc3a92SRichard Henderson             do_xrstor_pkru(ac, ptr + XO(pkru_state));
29831b248f14SClaudio Fontana         } else {
29841b248f14SClaudio Fontana             env->pkru = 0;
29851b248f14SClaudio Fontana         }
29861b248f14SClaudio Fontana         if (env->pkru != old_pkru) {
29871b248f14SClaudio Fontana             CPUState *cs = env_cpu(env);
29881b248f14SClaudio Fontana             tlb_flush(cs);
29891b248f14SClaudio Fontana         }
29901b248f14SClaudio Fontana     }
29911b248f14SClaudio Fontana }
29921b248f14SClaudio Fontana 
29931b248f14SClaudio Fontana #undef XO
29941b248f14SClaudio Fontana 
29955d245678SPaolo Bonzini void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
29965d245678SPaolo Bonzini {
2997a8f68831SRichard Henderson     uintptr_t ra = GETPC();
2998d5dc3a92SRichard Henderson     X86Access ac;
2999d5dc3a92SRichard Henderson     uint64_t xstate_bv;
3000d5dc3a92SRichard Henderson     unsigned size, size_ext;
3001a8f68831SRichard Henderson 
3002a8f68831SRichard Henderson     do_xsave_chk(env, ptr, ra);
3003d5dc3a92SRichard Henderson 
3004d5dc3a92SRichard Henderson     /* Begin with just the minimum size to validate the header. */
3005d5dc3a92SRichard Henderson     size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
3006d5dc3a92SRichard Henderson     access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra);
3007d5dc3a92SRichard Henderson     if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) {
3008d5dc3a92SRichard Henderson         raise_exception_ra(env, EXCP0D_GPF, ra);
3009d5dc3a92SRichard Henderson     }
3010d5dc3a92SRichard Henderson 
3011d5dc3a92SRichard Henderson     rfbm &= env->xcr0;
3012d5dc3a92SRichard Henderson     size_ext = xsave_area_size(rfbm & xstate_bv, false);
3013d5dc3a92SRichard Henderson     if (size < size_ext) {
3014d5dc3a92SRichard Henderson         /* TODO: See if existing page probe has covered extra size. */
3015d5dc3a92SRichard Henderson         access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra);
3016d5dc3a92SRichard Henderson     }
3017d5dc3a92SRichard Henderson 
3018d5dc3a92SRichard Henderson     do_xrstor(&ac, ptr, rfbm, xstate_bv);
30195d245678SPaolo Bonzini }
30205d245678SPaolo Bonzini 
30215d245678SPaolo Bonzini #if defined(CONFIG_USER_ONLY)
302276d8d0f8SRichard Henderson void cpu_x86_fsave(CPUX86State *env, void *host, size_t len)
30235d245678SPaolo Bonzini {
302476d8d0f8SRichard Henderson     X86Access ac = {
302576d8d0f8SRichard Henderson         .haddr1 = host,
302676d8d0f8SRichard Henderson         .size = 4 * 7 + 8 * 10,
302776d8d0f8SRichard Henderson         .env = env,
302876d8d0f8SRichard Henderson     };
302994f60f8fSRichard Henderson 
303076d8d0f8SRichard Henderson     assert(ac.size <= len);
303176d8d0f8SRichard Henderson     do_fsave(&ac, 0, true);
30325d245678SPaolo Bonzini }
30335d245678SPaolo Bonzini 
303476d8d0f8SRichard Henderson void cpu_x86_frstor(CPUX86State *env, void *host, size_t len)
30355d245678SPaolo Bonzini {
303676d8d0f8SRichard Henderson     X86Access ac = {
303776d8d0f8SRichard Henderson         .haddr1 = host,
303876d8d0f8SRichard Henderson         .size = 4 * 7 + 8 * 10,
303976d8d0f8SRichard Henderson         .env = env,
304076d8d0f8SRichard Henderson     };
304194f60f8fSRichard Henderson 
304276d8d0f8SRichard Henderson     assert(ac.size <= len);
304376d8d0f8SRichard Henderson     do_frstor(&ac, 0, true);
30445d245678SPaolo Bonzini }
30455d245678SPaolo Bonzini 
30469c2fb9e1SRichard Henderson void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len)
30475d245678SPaolo Bonzini {
30489c2fb9e1SRichard Henderson     X86Access ac = {
30499c2fb9e1SRichard Henderson         .haddr1 = host,
30509c2fb9e1SRichard Henderson         .size = sizeof(X86LegacyXSaveArea),
30519c2fb9e1SRichard Henderson         .env = env,
30529c2fb9e1SRichard Henderson     };
30536d030aabSRichard Henderson 
30549c2fb9e1SRichard Henderson     assert(ac.size <= len);
30559c2fb9e1SRichard Henderson     do_fxsave(&ac, 0);
30565d245678SPaolo Bonzini }
30575d245678SPaolo Bonzini 
30589c2fb9e1SRichard Henderson void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len)
30595d245678SPaolo Bonzini {
30609c2fb9e1SRichard Henderson     X86Access ac = {
30619c2fb9e1SRichard Henderson         .haddr1 = host,
30629c2fb9e1SRichard Henderson         .size = sizeof(X86LegacyXSaveArea),
30639c2fb9e1SRichard Henderson         .env = env,
30649c2fb9e1SRichard Henderson     };
30656d030aabSRichard Henderson 
30669c2fb9e1SRichard Henderson     assert(ac.size <= len);
30679c2fb9e1SRichard Henderson     do_fxrstor(&ac, 0);
30685d245678SPaolo Bonzini }
30695d245678SPaolo Bonzini 
3070701890bdSRichard Henderson void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
30715d245678SPaolo Bonzini {
3072701890bdSRichard Henderson     X86Access ac = {
3073701890bdSRichard Henderson         .haddr1 = host,
3074701890bdSRichard Henderson         .env = env,
3075701890bdSRichard Henderson     };
3076c6e6d150SRichard Henderson 
3077c6e6d150SRichard Henderson     /*
3078c6e6d150SRichard Henderson      * Since this is only called from user-level signal handling,
3079c6e6d150SRichard Henderson      * we should have done the job correctly there.
3080c6e6d150SRichard Henderson      */
3081c6e6d150SRichard Henderson     assert((rfbm & ~env->xcr0) == 0);
3082701890bdSRichard Henderson     ac.size = xsave_area_size(rfbm, false);
3083701890bdSRichard Henderson     assert(ac.size <= len);
3084701890bdSRichard Henderson     do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm);
30855d245678SPaolo Bonzini }
30865d245678SPaolo Bonzini 
3087701890bdSRichard Henderson bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
30885d245678SPaolo Bonzini {
3089701890bdSRichard Henderson     X86Access ac = {
3090701890bdSRichard Henderson         .haddr1 = host,
3091701890bdSRichard Henderson         .env = env,
3092701890bdSRichard Henderson     };
3093d5dc3a92SRichard Henderson     uint64_t xstate_bv;
3094d5dc3a92SRichard Henderson 
3095d5dc3a92SRichard Henderson     /*
3096d5dc3a92SRichard Henderson      * Since this is only called from user-level signal handling,
3097d5dc3a92SRichard Henderson      * we should have done the job correctly there.
3098d5dc3a92SRichard Henderson      */
3099d5dc3a92SRichard Henderson     assert((rfbm & ~env->xcr0) == 0);
3100701890bdSRichard Henderson     ac.size = xsave_area_size(rfbm, false);
3101701890bdSRichard Henderson     assert(ac.size <= len);
3102d5dc3a92SRichard Henderson 
3103701890bdSRichard Henderson     if (!valid_xrstor_header(&ac, &xstate_bv, 0)) {
3104701890bdSRichard Henderson         return false;
3105d5dc3a92SRichard Henderson     }
3106701890bdSRichard Henderson     do_xrstor(&ac, 0, rfbm, xstate_bv);
3107701890bdSRichard Henderson     return true;
31085d245678SPaolo Bonzini }
31095d245678SPaolo Bonzini #endif
31105d245678SPaolo Bonzini 
31111b248f14SClaudio Fontana uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
31121b248f14SClaudio Fontana {
31131b248f14SClaudio Fontana     /* The OS must have enabled XSAVE.  */
31141b248f14SClaudio Fontana     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
31151b248f14SClaudio Fontana         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
31161b248f14SClaudio Fontana     }
31171b248f14SClaudio Fontana 
31181b248f14SClaudio Fontana     switch (ecx) {
31191b248f14SClaudio Fontana     case 0:
31201b248f14SClaudio Fontana         return env->xcr0;
31211b248f14SClaudio Fontana     case 1:
31221b248f14SClaudio Fontana         if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
31231b248f14SClaudio Fontana             return env->xcr0 & get_xinuse(env);
31241b248f14SClaudio Fontana         }
31251b248f14SClaudio Fontana         break;
31261b248f14SClaudio Fontana     }
31271b248f14SClaudio Fontana     raise_exception_ra(env, EXCP0D_GPF, GETPC());
31281b248f14SClaudio Fontana }
31291b248f14SClaudio Fontana 
31301b248f14SClaudio Fontana void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
31311b248f14SClaudio Fontana {
31321b248f14SClaudio Fontana     uint32_t dummy, ena_lo, ena_hi;
31331b248f14SClaudio Fontana     uint64_t ena;
31341b248f14SClaudio Fontana 
31351b248f14SClaudio Fontana     /* The OS must have enabled XSAVE.  */
31361b248f14SClaudio Fontana     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
31371b248f14SClaudio Fontana         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
31381b248f14SClaudio Fontana     }
31391b248f14SClaudio Fontana 
31401b248f14SClaudio Fontana     /* Only XCR0 is defined at present; the FPU may not be disabled.  */
31411b248f14SClaudio Fontana     if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
31421b248f14SClaudio Fontana         goto do_gpf;
31431b248f14SClaudio Fontana     }
31441b248f14SClaudio Fontana 
3145*7604bbc2SPaolo Bonzini     /* SSE can be disabled, but only if AVX is disabled too.  */
3146*7604bbc2SPaolo Bonzini     if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) {
3147*7604bbc2SPaolo Bonzini         goto do_gpf;
3148*7604bbc2SPaolo Bonzini     }
3149*7604bbc2SPaolo Bonzini 
31501b248f14SClaudio Fontana     /* Disallow enabling unimplemented features.  */
31511b248f14SClaudio Fontana     cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
31521b248f14SClaudio Fontana     ena = ((uint64_t)ena_hi << 32) | ena_lo;
31531b248f14SClaudio Fontana     if (mask & ~ena) {
31541b248f14SClaudio Fontana         goto do_gpf;
31551b248f14SClaudio Fontana     }
31561b248f14SClaudio Fontana 
31571b248f14SClaudio Fontana     /* Disallow enabling only half of MPX.  */
31581b248f14SClaudio Fontana     if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
31591b248f14SClaudio Fontana         & XSTATE_BNDCSR_MASK) {
31601b248f14SClaudio Fontana         goto do_gpf;
31611b248f14SClaudio Fontana     }
31621b248f14SClaudio Fontana 
31631b248f14SClaudio Fontana     env->xcr0 = mask;
31641b248f14SClaudio Fontana     cpu_sync_bndcs_hflags(env);
3165608db8dbSPaul Brook     cpu_sync_avx_hflag(env);
31661b248f14SClaudio Fontana     return;
31671b248f14SClaudio Fontana 
31681b248f14SClaudio Fontana  do_gpf:
31691b248f14SClaudio Fontana     raise_exception_ra(env, EXCP0D_GPF, GETPC());
31701b248f14SClaudio Fontana }
31711b248f14SClaudio Fontana 
31721b248f14SClaudio Fontana /* MMX/SSE */
31731b248f14SClaudio Fontana /* XXX: optimize by storing fptt and fptags in the static cpu state */
31741b248f14SClaudio Fontana 
31751b248f14SClaudio Fontana #define SSE_DAZ             0x0040
3176314d3effSPaolo Bonzini #define SSE_RC_SHIFT        13
3177314d3effSPaolo Bonzini #define SSE_RC_MASK         (3 << SSE_RC_SHIFT)
31781b248f14SClaudio Fontana #define SSE_FZ              0x8000
31791b248f14SClaudio Fontana 
31801b248f14SClaudio Fontana void update_mxcsr_status(CPUX86State *env)
31811b248f14SClaudio Fontana {
31821b248f14SClaudio Fontana     uint32_t mxcsr = env->mxcsr;
31831b248f14SClaudio Fontana     int rnd_type;
31841b248f14SClaudio Fontana 
31851b248f14SClaudio Fontana     /* set rounding mode */
3186314d3effSPaolo Bonzini     rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT;
3187314d3effSPaolo Bonzini     set_x86_rounding_mode(rnd_type, &env->sse_status);
31881b248f14SClaudio Fontana 
31891b248f14SClaudio Fontana     /* Set exception flags.  */
31901b248f14SClaudio Fontana     set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
31911b248f14SClaudio Fontana                               (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
31921b248f14SClaudio Fontana                               (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
31931b248f14SClaudio Fontana                               (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
31941b248f14SClaudio Fontana                               (mxcsr & FPUS_PE ? float_flag_inexact : 0),
31951b248f14SClaudio Fontana                               &env->sse_status);
31961b248f14SClaudio Fontana 
31971b248f14SClaudio Fontana     /* set denormals are zero */
31981b248f14SClaudio Fontana     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
31991b248f14SClaudio Fontana 
32001b248f14SClaudio Fontana     /* set flush to zero */
32011b248f14SClaudio Fontana     set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
32021b248f14SClaudio Fontana }
32031b248f14SClaudio Fontana 
32041b248f14SClaudio Fontana void update_mxcsr_from_sse_status(CPUX86State *env)
32051b248f14SClaudio Fontana {
32061b248f14SClaudio Fontana     uint8_t flags = get_float_exception_flags(&env->sse_status);
32071b248f14SClaudio Fontana     /*
32081b248f14SClaudio Fontana      * The MXCSR denormal flag has opposite semantics to
32091b248f14SClaudio Fontana      * float_flag_input_denormal (the softfloat code sets that flag
32101b248f14SClaudio Fontana      * only when flushing input denormals to zero, but SSE sets it
32111b248f14SClaudio Fontana      * only when not flushing them to zero), so is not converted
32121b248f14SClaudio Fontana      * here.
32131b248f14SClaudio Fontana      */
32141b248f14SClaudio Fontana     env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
32151b248f14SClaudio Fontana                    (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
32161b248f14SClaudio Fontana                    (flags & float_flag_overflow ? FPUS_OE : 0) |
32171b248f14SClaudio Fontana                    (flags & float_flag_underflow ? FPUS_UE : 0) |
32181b248f14SClaudio Fontana                    (flags & float_flag_inexact ? FPUS_PE : 0) |
32191b248f14SClaudio Fontana                    (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
32201b248f14SClaudio Fontana                     0));
32211b248f14SClaudio Fontana }
32221b248f14SClaudio Fontana 
32231b248f14SClaudio Fontana void helper_update_mxcsr(CPUX86State *env)
32241b248f14SClaudio Fontana {
32251b248f14SClaudio Fontana     update_mxcsr_from_sse_status(env);
32261b248f14SClaudio Fontana }
32271b248f14SClaudio Fontana 
32281b248f14SClaudio Fontana void helper_ldmxcsr(CPUX86State *env, uint32_t val)
32291b248f14SClaudio Fontana {
32301b248f14SClaudio Fontana     cpu_set_mxcsr(env, val);
32311b248f14SClaudio Fontana }
32321b248f14SClaudio Fontana 
32331b248f14SClaudio Fontana void helper_enter_mmx(CPUX86State *env)
32341b248f14SClaudio Fontana {
32351b248f14SClaudio Fontana     env->fpstt = 0;
32361b248f14SClaudio Fontana     *(uint32_t *)(env->fptags) = 0;
32371b248f14SClaudio Fontana     *(uint32_t *)(env->fptags + 4) = 0;
32381b248f14SClaudio Fontana }
32391b248f14SClaudio Fontana 
32401b248f14SClaudio Fontana void helper_emms(CPUX86State *env)
32411b248f14SClaudio Fontana {
32421b248f14SClaudio Fontana     /* set to empty state */
32431b248f14SClaudio Fontana     *(uint32_t *)(env->fptags) = 0x01010101;
32441b248f14SClaudio Fontana     *(uint32_t *)(env->fptags + 4) = 0x01010101;
32451b248f14SClaudio Fontana }
32461b248f14SClaudio Fontana 
32471b248f14SClaudio Fontana #define SHIFT 0
32481b248f14SClaudio Fontana #include "ops_sse.h"
32491b248f14SClaudio Fontana 
32501b248f14SClaudio Fontana #define SHIFT 1
32511b248f14SClaudio Fontana #include "ops_sse.h"
3252b98f886cSPaolo Bonzini 
3253b98f886cSPaolo Bonzini #define SHIFT 2
3254b98f886cSPaolo Bonzini #include "ops_sse.h"
3255