11b248f14SClaudio Fontana /* 21b248f14SClaudio Fontana * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 31b248f14SClaudio Fontana * 41b248f14SClaudio Fontana * Copyright (c) 2003 Fabrice Bellard 51b248f14SClaudio Fontana * 61b248f14SClaudio Fontana * This library is free software; you can redistribute it and/or 71b248f14SClaudio Fontana * modify it under the terms of the GNU Lesser General Public 81b248f14SClaudio Fontana * License as published by the Free Software Foundation; either 91b248f14SClaudio Fontana * version 2.1 of the License, or (at your option) any later version. 101b248f14SClaudio Fontana * 111b248f14SClaudio Fontana * This library is distributed in the hope that it will be useful, 121b248f14SClaudio Fontana * but WITHOUT ANY WARRANTY; without even the implied warranty of 131b248f14SClaudio Fontana * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 141b248f14SClaudio Fontana * Lesser General Public License for more details. 151b248f14SClaudio Fontana * 161b248f14SClaudio Fontana * You should have received a copy of the GNU Lesser General Public 171b248f14SClaudio Fontana * License along with this library; if not, see <http://www.gnu.org/licenses/>. 181b248f14SClaudio Fontana */ 191b248f14SClaudio Fontana 201b248f14SClaudio Fontana #include "qemu/osdep.h" 211b248f14SClaudio Fontana #include <math.h> 221b248f14SClaudio Fontana #include "cpu.h" 2348e5c98aSDavid Edmondson #include "tcg-cpu.h" 247e17a524SPhilippe Mathieu-Daudé #include "exec/exec-all.h" 2509b07f28SPhilippe Mathieu-Daudé #include "exec/cpu_ldst.h" 261b248f14SClaudio Fontana #include "exec/helper-proto.h" 271b248f14SClaudio Fontana #include "fpu/softfloat.h" 281b248f14SClaudio Fontana #include "fpu/softfloat-macros.h" 29ed69e831SClaudio Fontana #include "helper-tcg.h" 301b248f14SClaudio Fontana 31ed69e831SClaudio Fontana /* float macros */ 32ed69e831SClaudio Fontana #define FT0 (env->ft0) 33ed69e831SClaudio Fontana #define ST0 (env->fpregs[env->fpstt].d) 34ed69e831SClaudio Fontana #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 35ed69e831SClaudio Fontana #define ST1 ST(1) 36ed69e831SClaudio Fontana 37314d3effSPaolo Bonzini #define FPU_RC_SHIFT 10 38314d3effSPaolo Bonzini #define FPU_RC_MASK (3 << FPU_RC_SHIFT) 391b248f14SClaudio Fontana #define FPU_RC_NEAR 0x000 401b248f14SClaudio Fontana #define FPU_RC_DOWN 0x400 411b248f14SClaudio Fontana #define FPU_RC_UP 0x800 421b248f14SClaudio Fontana #define FPU_RC_CHOP 0xc00 431b248f14SClaudio Fontana 441b248f14SClaudio Fontana #define MAXTAN 9223372036854775808.0 451b248f14SClaudio Fontana 461b248f14SClaudio Fontana /* the following deal with x86 long double-precision numbers */ 471b248f14SClaudio Fontana #define MAXEXPD 0x7fff 481b248f14SClaudio Fontana #define EXPBIAS 16383 491b248f14SClaudio Fontana #define EXPD(fp) (fp.l.upper & 0x7fff) 501b248f14SClaudio Fontana #define SIGND(fp) ((fp.l.upper) & 0x8000) 511b248f14SClaudio Fontana #define MANTD(fp) (fp.l.lower) 521b248f14SClaudio Fontana #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 531b248f14SClaudio Fontana 541b248f14SClaudio Fontana #define FPUS_IE (1 << 0) 551b248f14SClaudio Fontana #define FPUS_DE (1 << 1) 561b248f14SClaudio Fontana #define FPUS_ZE (1 << 2) 571b248f14SClaudio Fontana #define FPUS_OE (1 << 3) 581b248f14SClaudio Fontana #define FPUS_UE (1 << 4) 591b248f14SClaudio Fontana #define FPUS_PE (1 << 5) 601b248f14SClaudio Fontana #define FPUS_SF (1 << 6) 611b248f14SClaudio Fontana #define FPUS_SE (1 << 7) 621b248f14SClaudio Fontana #define FPUS_B (1 << 15) 631b248f14SClaudio Fontana 641b248f14SClaudio Fontana #define FPUC_EM 0x3f 651b248f14SClaudio Fontana 661b248f14SClaudio Fontana #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 671b248f14SClaudio Fontana #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 681b248f14SClaudio Fontana #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 691b248f14SClaudio Fontana #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 701b248f14SClaudio Fontana #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 711b248f14SClaudio Fontana #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 721b248f14SClaudio Fontana #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 731b248f14SClaudio Fontana #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 741b248f14SClaudio Fontana 751b248f14SClaudio Fontana static inline void fpush(CPUX86State *env) 761b248f14SClaudio Fontana { 771b248f14SClaudio Fontana env->fpstt = (env->fpstt - 1) & 7; 781b248f14SClaudio Fontana env->fptags[env->fpstt] = 0; /* validate stack entry */ 791b248f14SClaudio Fontana } 801b248f14SClaudio Fontana 811b248f14SClaudio Fontana static inline void fpop(CPUX86State *env) 821b248f14SClaudio Fontana { 831b248f14SClaudio Fontana env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 841b248f14SClaudio Fontana env->fpstt = (env->fpstt + 1) & 7; 851b248f14SClaudio Fontana } 861b248f14SClaudio Fontana 87e3a69234SRichard Henderson static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) 881b248f14SClaudio Fontana { 891b248f14SClaudio Fontana CPU_LDoubleU temp; 901b248f14SClaudio Fontana 911b248f14SClaudio Fontana temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 921b248f14SClaudio Fontana temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 931b248f14SClaudio Fontana return temp.d; 941b248f14SClaudio Fontana } 951b248f14SClaudio Fontana 96e3a69234SRichard Henderson static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 971b248f14SClaudio Fontana uintptr_t retaddr) 981b248f14SClaudio Fontana { 991b248f14SClaudio Fontana CPU_LDoubleU temp; 1001b248f14SClaudio Fontana 1011b248f14SClaudio Fontana temp.d = f; 1021b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 1031b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 1041b248f14SClaudio Fontana } 1051b248f14SClaudio Fontana 1061b248f14SClaudio Fontana /* x87 FPU helpers */ 1071b248f14SClaudio Fontana 1081b248f14SClaudio Fontana static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 1091b248f14SClaudio Fontana { 1101b248f14SClaudio Fontana union { 1111b248f14SClaudio Fontana float64 f64; 1121b248f14SClaudio Fontana double d; 1131b248f14SClaudio Fontana } u; 1141b248f14SClaudio Fontana 1151b248f14SClaudio Fontana u.f64 = floatx80_to_float64(a, &env->fp_status); 1161b248f14SClaudio Fontana return u.d; 1171b248f14SClaudio Fontana } 1181b248f14SClaudio Fontana 1191b248f14SClaudio Fontana static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 1201b248f14SClaudio Fontana { 1211b248f14SClaudio Fontana union { 1221b248f14SClaudio Fontana float64 f64; 1231b248f14SClaudio Fontana double d; 1241b248f14SClaudio Fontana } u; 1251b248f14SClaudio Fontana 1261b248f14SClaudio Fontana u.d = a; 1271b248f14SClaudio Fontana return float64_to_floatx80(u.f64, &env->fp_status); 1281b248f14SClaudio Fontana } 1291b248f14SClaudio Fontana 1301b248f14SClaudio Fontana static void fpu_set_exception(CPUX86State *env, int mask) 1311b248f14SClaudio Fontana { 1321b248f14SClaudio Fontana env->fpus |= mask; 1331b248f14SClaudio Fontana if (env->fpus & (~env->fpuc & FPUC_EM)) { 1341b248f14SClaudio Fontana env->fpus |= FPUS_SE | FPUS_B; 1351b248f14SClaudio Fontana } 1361b248f14SClaudio Fontana } 1371b248f14SClaudio Fontana 1381b248f14SClaudio Fontana static inline uint8_t save_exception_flags(CPUX86State *env) 1391b248f14SClaudio Fontana { 1401b248f14SClaudio Fontana uint8_t old_flags = get_float_exception_flags(&env->fp_status); 1411b248f14SClaudio Fontana set_float_exception_flags(0, &env->fp_status); 1421b248f14SClaudio Fontana return old_flags; 1431b248f14SClaudio Fontana } 1441b248f14SClaudio Fontana 1451b248f14SClaudio Fontana static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 1461b248f14SClaudio Fontana { 1471b248f14SClaudio Fontana uint8_t new_flags = get_float_exception_flags(&env->fp_status); 1481b248f14SClaudio Fontana float_raise(old_flags, &env->fp_status); 1491b248f14SClaudio Fontana fpu_set_exception(env, 1501b248f14SClaudio Fontana ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 1511b248f14SClaudio Fontana (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 1521b248f14SClaudio Fontana (new_flags & float_flag_overflow ? FPUS_OE : 0) | 1531b248f14SClaudio Fontana (new_flags & float_flag_underflow ? FPUS_UE : 0) | 1541b248f14SClaudio Fontana (new_flags & float_flag_inexact ? FPUS_PE : 0) | 1551b248f14SClaudio Fontana (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 1561b248f14SClaudio Fontana } 1571b248f14SClaudio Fontana 1581b248f14SClaudio Fontana static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 1591b248f14SClaudio Fontana { 1601b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 1611b248f14SClaudio Fontana floatx80 ret = floatx80_div(a, b, &env->fp_status); 1621b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 1631b248f14SClaudio Fontana return ret; 1641b248f14SClaudio Fontana } 1651b248f14SClaudio Fontana 1661b248f14SClaudio Fontana static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 1671b248f14SClaudio Fontana { 1681b248f14SClaudio Fontana if (env->cr[0] & CR0_NE_MASK) { 1691b248f14SClaudio Fontana raise_exception_ra(env, EXCP10_COPR, retaddr); 1701b248f14SClaudio Fontana } 1711b248f14SClaudio Fontana #if !defined(CONFIG_USER_ONLY) 17283a3d9c7SClaudio Fontana else { 17383a3d9c7SClaudio Fontana fpu_check_raise_ferr_irq(env); 1741b248f14SClaudio Fontana } 1751b248f14SClaudio Fontana #endif 1761b248f14SClaudio Fontana } 1771b248f14SClaudio Fontana 1781b248f14SClaudio Fontana void helper_flds_FT0(CPUX86State *env, uint32_t val) 1791b248f14SClaudio Fontana { 1801b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 1811b248f14SClaudio Fontana union { 1821b248f14SClaudio Fontana float32 f; 1831b248f14SClaudio Fontana uint32_t i; 1841b248f14SClaudio Fontana } u; 1851b248f14SClaudio Fontana 1861b248f14SClaudio Fontana u.i = val; 1871b248f14SClaudio Fontana FT0 = float32_to_floatx80(u.f, &env->fp_status); 1881b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 1891b248f14SClaudio Fontana } 1901b248f14SClaudio Fontana 1911b248f14SClaudio Fontana void helper_fldl_FT0(CPUX86State *env, uint64_t val) 1921b248f14SClaudio Fontana { 1931b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 1941b248f14SClaudio Fontana union { 1951b248f14SClaudio Fontana float64 f; 1961b248f14SClaudio Fontana uint64_t i; 1971b248f14SClaudio Fontana } u; 1981b248f14SClaudio Fontana 1991b248f14SClaudio Fontana u.i = val; 2001b248f14SClaudio Fontana FT0 = float64_to_floatx80(u.f, &env->fp_status); 2011b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 2021b248f14SClaudio Fontana } 2031b248f14SClaudio Fontana 2041b248f14SClaudio Fontana void helper_fildl_FT0(CPUX86State *env, int32_t val) 2051b248f14SClaudio Fontana { 2061b248f14SClaudio Fontana FT0 = int32_to_floatx80(val, &env->fp_status); 2071b248f14SClaudio Fontana } 2081b248f14SClaudio Fontana 2091b248f14SClaudio Fontana void helper_flds_ST0(CPUX86State *env, uint32_t val) 2101b248f14SClaudio Fontana { 2111b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 2121b248f14SClaudio Fontana int new_fpstt; 2131b248f14SClaudio Fontana union { 2141b248f14SClaudio Fontana float32 f; 2151b248f14SClaudio Fontana uint32_t i; 2161b248f14SClaudio Fontana } u; 2171b248f14SClaudio Fontana 2181b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7; 2191b248f14SClaudio Fontana u.i = val; 2201b248f14SClaudio Fontana env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 2211b248f14SClaudio Fontana env->fpstt = new_fpstt; 2221b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */ 2231b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 2241b248f14SClaudio Fontana } 2251b248f14SClaudio Fontana 2261b248f14SClaudio Fontana void helper_fldl_ST0(CPUX86State *env, uint64_t val) 2271b248f14SClaudio Fontana { 2281b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 2291b248f14SClaudio Fontana int new_fpstt; 2301b248f14SClaudio Fontana union { 2311b248f14SClaudio Fontana float64 f; 2321b248f14SClaudio Fontana uint64_t i; 2331b248f14SClaudio Fontana } u; 2341b248f14SClaudio Fontana 2351b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7; 2361b248f14SClaudio Fontana u.i = val; 2371b248f14SClaudio Fontana env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 2381b248f14SClaudio Fontana env->fpstt = new_fpstt; 2391b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */ 2401b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 2411b248f14SClaudio Fontana } 2421b248f14SClaudio Fontana 243276de33fSAlex Bennée static FloatX80RoundPrec tmp_maximise_precision(float_status *st) 244276de33fSAlex Bennée { 245276de33fSAlex Bennée FloatX80RoundPrec old = get_floatx80_rounding_precision(st); 246276de33fSAlex Bennée set_floatx80_rounding_precision(floatx80_precision_x, st); 247276de33fSAlex Bennée return old; 248276de33fSAlex Bennée } 249276de33fSAlex Bennée 2501b248f14SClaudio Fontana void helper_fildl_ST0(CPUX86State *env, int32_t val) 2511b248f14SClaudio Fontana { 2521b248f14SClaudio Fontana int new_fpstt; 253276de33fSAlex Bennée FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 2541b248f14SClaudio Fontana 2551b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7; 2561b248f14SClaudio Fontana env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 2571b248f14SClaudio Fontana env->fpstt = new_fpstt; 2581b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */ 259276de33fSAlex Bennée 260276de33fSAlex Bennée set_floatx80_rounding_precision(old, &env->fp_status); 2611b248f14SClaudio Fontana } 2621b248f14SClaudio Fontana 2631b248f14SClaudio Fontana void helper_fildll_ST0(CPUX86State *env, int64_t val) 2641b248f14SClaudio Fontana { 2651b248f14SClaudio Fontana int new_fpstt; 266276de33fSAlex Bennée FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 2671b248f14SClaudio Fontana 2681b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7; 2691b248f14SClaudio Fontana env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 2701b248f14SClaudio Fontana env->fpstt = new_fpstt; 2711b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */ 272276de33fSAlex Bennée 273276de33fSAlex Bennée set_floatx80_rounding_precision(old, &env->fp_status); 2741b248f14SClaudio Fontana } 2751b248f14SClaudio Fontana 2761b248f14SClaudio Fontana uint32_t helper_fsts_ST0(CPUX86State *env) 2771b248f14SClaudio Fontana { 2781b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 2791b248f14SClaudio Fontana union { 2801b248f14SClaudio Fontana float32 f; 2811b248f14SClaudio Fontana uint32_t i; 2821b248f14SClaudio Fontana } u; 2831b248f14SClaudio Fontana 2841b248f14SClaudio Fontana u.f = floatx80_to_float32(ST0, &env->fp_status); 2851b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 2861b248f14SClaudio Fontana return u.i; 2871b248f14SClaudio Fontana } 2881b248f14SClaudio Fontana 2891b248f14SClaudio Fontana uint64_t helper_fstl_ST0(CPUX86State *env) 2901b248f14SClaudio Fontana { 2911b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 2921b248f14SClaudio Fontana union { 2931b248f14SClaudio Fontana float64 f; 2941b248f14SClaudio Fontana uint64_t i; 2951b248f14SClaudio Fontana } u; 2961b248f14SClaudio Fontana 2971b248f14SClaudio Fontana u.f = floatx80_to_float64(ST0, &env->fp_status); 2981b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 2991b248f14SClaudio Fontana return u.i; 3001b248f14SClaudio Fontana } 3011b248f14SClaudio Fontana 3021b248f14SClaudio Fontana int32_t helper_fist_ST0(CPUX86State *env) 3031b248f14SClaudio Fontana { 3041b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 3051b248f14SClaudio Fontana int32_t val; 3061b248f14SClaudio Fontana 3071b248f14SClaudio Fontana val = floatx80_to_int32(ST0, &env->fp_status); 3081b248f14SClaudio Fontana if (val != (int16_t)val) { 3091b248f14SClaudio Fontana set_float_exception_flags(float_flag_invalid, &env->fp_status); 3101b248f14SClaudio Fontana val = -32768; 3111b248f14SClaudio Fontana } 3121b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 3131b248f14SClaudio Fontana return val; 3141b248f14SClaudio Fontana } 3151b248f14SClaudio Fontana 3161b248f14SClaudio Fontana int32_t helper_fistl_ST0(CPUX86State *env) 3171b248f14SClaudio Fontana { 3181b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 3191b248f14SClaudio Fontana int32_t val; 3201b248f14SClaudio Fontana 3211b248f14SClaudio Fontana val = floatx80_to_int32(ST0, &env->fp_status); 3221b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 3231b248f14SClaudio Fontana val = 0x80000000; 3241b248f14SClaudio Fontana } 3251b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 3261b248f14SClaudio Fontana return val; 3271b248f14SClaudio Fontana } 3281b248f14SClaudio Fontana 3291b248f14SClaudio Fontana int64_t helper_fistll_ST0(CPUX86State *env) 3301b248f14SClaudio Fontana { 3311b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 3321b248f14SClaudio Fontana int64_t val; 3331b248f14SClaudio Fontana 3341b248f14SClaudio Fontana val = floatx80_to_int64(ST0, &env->fp_status); 3351b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 3361b248f14SClaudio Fontana val = 0x8000000000000000ULL; 3371b248f14SClaudio Fontana } 3381b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 3391b248f14SClaudio Fontana return val; 3401b248f14SClaudio Fontana } 3411b248f14SClaudio Fontana 3421b248f14SClaudio Fontana int32_t helper_fistt_ST0(CPUX86State *env) 3431b248f14SClaudio Fontana { 3441b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 3451b248f14SClaudio Fontana int32_t val; 3461b248f14SClaudio Fontana 3471b248f14SClaudio Fontana val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 3481b248f14SClaudio Fontana if (val != (int16_t)val) { 3491b248f14SClaudio Fontana set_float_exception_flags(float_flag_invalid, &env->fp_status); 3501b248f14SClaudio Fontana val = -32768; 3511b248f14SClaudio Fontana } 3521b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 3531b248f14SClaudio Fontana return val; 3541b248f14SClaudio Fontana } 3551b248f14SClaudio Fontana 3561b248f14SClaudio Fontana int32_t helper_fisttl_ST0(CPUX86State *env) 3571b248f14SClaudio Fontana { 3581b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 3591b248f14SClaudio Fontana int32_t val; 3601b248f14SClaudio Fontana 3611b248f14SClaudio Fontana val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 3621b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 3631b248f14SClaudio Fontana val = 0x80000000; 3641b248f14SClaudio Fontana } 3651b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 3661b248f14SClaudio Fontana return val; 3671b248f14SClaudio Fontana } 3681b248f14SClaudio Fontana 3691b248f14SClaudio Fontana int64_t helper_fisttll_ST0(CPUX86State *env) 3701b248f14SClaudio Fontana { 3711b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 3721b248f14SClaudio Fontana int64_t val; 3731b248f14SClaudio Fontana 3741b248f14SClaudio Fontana val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 3751b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 3761b248f14SClaudio Fontana val = 0x8000000000000000ULL; 3771b248f14SClaudio Fontana } 3781b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 3791b248f14SClaudio Fontana return val; 3801b248f14SClaudio Fontana } 3811b248f14SClaudio Fontana 3821b248f14SClaudio Fontana void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 3831b248f14SClaudio Fontana { 3841b248f14SClaudio Fontana int new_fpstt; 3851b248f14SClaudio Fontana 3861b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7; 387e3a69234SRichard Henderson env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); 3881b248f14SClaudio Fontana env->fpstt = new_fpstt; 3891b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */ 3901b248f14SClaudio Fontana } 3911b248f14SClaudio Fontana 3921b248f14SClaudio Fontana void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 3931b248f14SClaudio Fontana { 394e3a69234SRichard Henderson do_fstt(env, ST0, ptr, GETPC()); 3951b248f14SClaudio Fontana } 3961b248f14SClaudio Fontana 3971b248f14SClaudio Fontana void helper_fpush(CPUX86State *env) 3981b248f14SClaudio Fontana { 3991b248f14SClaudio Fontana fpush(env); 4001b248f14SClaudio Fontana } 4011b248f14SClaudio Fontana 4021b248f14SClaudio Fontana void helper_fpop(CPUX86State *env) 4031b248f14SClaudio Fontana { 4041b248f14SClaudio Fontana fpop(env); 4051b248f14SClaudio Fontana } 4061b248f14SClaudio Fontana 4071b248f14SClaudio Fontana void helper_fdecstp(CPUX86State *env) 4081b248f14SClaudio Fontana { 4091b248f14SClaudio Fontana env->fpstt = (env->fpstt - 1) & 7; 4101b248f14SClaudio Fontana env->fpus &= ~0x4700; 4111b248f14SClaudio Fontana } 4121b248f14SClaudio Fontana 4131b248f14SClaudio Fontana void helper_fincstp(CPUX86State *env) 4141b248f14SClaudio Fontana { 4151b248f14SClaudio Fontana env->fpstt = (env->fpstt + 1) & 7; 4161b248f14SClaudio Fontana env->fpus &= ~0x4700; 4171b248f14SClaudio Fontana } 4181b248f14SClaudio Fontana 4191b248f14SClaudio Fontana /* FPU move */ 4201b248f14SClaudio Fontana 4211b248f14SClaudio Fontana void helper_ffree_STN(CPUX86State *env, int st_index) 4221b248f14SClaudio Fontana { 4231b248f14SClaudio Fontana env->fptags[(env->fpstt + st_index) & 7] = 1; 4241b248f14SClaudio Fontana } 4251b248f14SClaudio Fontana 4261b248f14SClaudio Fontana void helper_fmov_ST0_FT0(CPUX86State *env) 4271b248f14SClaudio Fontana { 4281b248f14SClaudio Fontana ST0 = FT0; 4291b248f14SClaudio Fontana } 4301b248f14SClaudio Fontana 4311b248f14SClaudio Fontana void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 4321b248f14SClaudio Fontana { 4331b248f14SClaudio Fontana FT0 = ST(st_index); 4341b248f14SClaudio Fontana } 4351b248f14SClaudio Fontana 4361b248f14SClaudio Fontana void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 4371b248f14SClaudio Fontana { 4381b248f14SClaudio Fontana ST0 = ST(st_index); 4391b248f14SClaudio Fontana } 4401b248f14SClaudio Fontana 4411b248f14SClaudio Fontana void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 4421b248f14SClaudio Fontana { 4431b248f14SClaudio Fontana ST(st_index) = ST0; 4441b248f14SClaudio Fontana } 4451b248f14SClaudio Fontana 4461b248f14SClaudio Fontana void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 4471b248f14SClaudio Fontana { 4481b248f14SClaudio Fontana floatx80 tmp; 4491b248f14SClaudio Fontana 4501b248f14SClaudio Fontana tmp = ST(st_index); 4511b248f14SClaudio Fontana ST(st_index) = ST0; 4521b248f14SClaudio Fontana ST0 = tmp; 4531b248f14SClaudio Fontana } 4541b248f14SClaudio Fontana 4551b248f14SClaudio Fontana /* FPU operations */ 4561b248f14SClaudio Fontana 4571b248f14SClaudio Fontana static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 4581b248f14SClaudio Fontana 4591b248f14SClaudio Fontana void helper_fcom_ST0_FT0(CPUX86State *env) 4601b248f14SClaudio Fontana { 4611b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 4621b248f14SClaudio Fontana FloatRelation ret; 4631b248f14SClaudio Fontana 4641b248f14SClaudio Fontana ret = floatx80_compare(ST0, FT0, &env->fp_status); 4651b248f14SClaudio Fontana env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 4661b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 4671b248f14SClaudio Fontana } 4681b248f14SClaudio Fontana 4691b248f14SClaudio Fontana void helper_fucom_ST0_FT0(CPUX86State *env) 4701b248f14SClaudio Fontana { 4711b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 4721b248f14SClaudio Fontana FloatRelation ret; 4731b248f14SClaudio Fontana 4741b248f14SClaudio Fontana ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 4751b248f14SClaudio Fontana env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 4761b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 4771b248f14SClaudio Fontana } 4781b248f14SClaudio Fontana 4791b248f14SClaudio Fontana static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 4801b248f14SClaudio Fontana 4811b248f14SClaudio Fontana void helper_fcomi_ST0_FT0(CPUX86State *env) 4821b248f14SClaudio Fontana { 4831b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 4841b248f14SClaudio Fontana int eflags; 4851b248f14SClaudio Fontana FloatRelation ret; 4861b248f14SClaudio Fontana 4871b248f14SClaudio Fontana ret = floatx80_compare(ST0, FT0, &env->fp_status); 4882455e9cfSPaolo Bonzini eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 4892455e9cfSPaolo Bonzini CC_SRC = eflags | fcomi_ccval[ret + 1]; 490*abdcc5c8SPaolo Bonzini CC_OP = CC_OP_EFLAGS; 4911b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 4921b248f14SClaudio Fontana } 4931b248f14SClaudio Fontana 4941b248f14SClaudio Fontana void helper_fucomi_ST0_FT0(CPUX86State *env) 4951b248f14SClaudio Fontana { 4961b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 4971b248f14SClaudio Fontana int eflags; 4981b248f14SClaudio Fontana FloatRelation ret; 4991b248f14SClaudio Fontana 5001b248f14SClaudio Fontana ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 5012455e9cfSPaolo Bonzini eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 5022455e9cfSPaolo Bonzini CC_SRC = eflags | fcomi_ccval[ret + 1]; 503*abdcc5c8SPaolo Bonzini CC_OP = CC_OP_EFLAGS; 5041b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 5051b248f14SClaudio Fontana } 5061b248f14SClaudio Fontana 5071b248f14SClaudio Fontana void helper_fadd_ST0_FT0(CPUX86State *env) 5081b248f14SClaudio Fontana { 5091b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 5101b248f14SClaudio Fontana ST0 = floatx80_add(ST0, FT0, &env->fp_status); 5111b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 5121b248f14SClaudio Fontana } 5131b248f14SClaudio Fontana 5141b248f14SClaudio Fontana void helper_fmul_ST0_FT0(CPUX86State *env) 5151b248f14SClaudio Fontana { 5161b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 5171b248f14SClaudio Fontana ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 5181b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 5191b248f14SClaudio Fontana } 5201b248f14SClaudio Fontana 5211b248f14SClaudio Fontana void helper_fsub_ST0_FT0(CPUX86State *env) 5221b248f14SClaudio Fontana { 5231b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 5241b248f14SClaudio Fontana ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 5251b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 5261b248f14SClaudio Fontana } 5271b248f14SClaudio Fontana 5281b248f14SClaudio Fontana void helper_fsubr_ST0_FT0(CPUX86State *env) 5291b248f14SClaudio Fontana { 5301b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 5311b248f14SClaudio Fontana ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 5321b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 5331b248f14SClaudio Fontana } 5341b248f14SClaudio Fontana 5351b248f14SClaudio Fontana void helper_fdiv_ST0_FT0(CPUX86State *env) 5361b248f14SClaudio Fontana { 5371b248f14SClaudio Fontana ST0 = helper_fdiv(env, ST0, FT0); 5381b248f14SClaudio Fontana } 5391b248f14SClaudio Fontana 5401b248f14SClaudio Fontana void helper_fdivr_ST0_FT0(CPUX86State *env) 5411b248f14SClaudio Fontana { 5421b248f14SClaudio Fontana ST0 = helper_fdiv(env, FT0, ST0); 5431b248f14SClaudio Fontana } 5441b248f14SClaudio Fontana 5451b248f14SClaudio Fontana /* fp operations between STN and ST0 */ 5461b248f14SClaudio Fontana 5471b248f14SClaudio Fontana void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 5481b248f14SClaudio Fontana { 5491b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 5501b248f14SClaudio Fontana ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 5511b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 5521b248f14SClaudio Fontana } 5531b248f14SClaudio Fontana 5541b248f14SClaudio Fontana void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 5551b248f14SClaudio Fontana { 5561b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 5571b248f14SClaudio Fontana ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 5581b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 5591b248f14SClaudio Fontana } 5601b248f14SClaudio Fontana 5611b248f14SClaudio Fontana void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 5621b248f14SClaudio Fontana { 5631b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 5641b248f14SClaudio Fontana ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 5651b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 5661b248f14SClaudio Fontana } 5671b248f14SClaudio Fontana 5681b248f14SClaudio Fontana void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 5691b248f14SClaudio Fontana { 5701b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 5711b248f14SClaudio Fontana ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 5721b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 5731b248f14SClaudio Fontana } 5741b248f14SClaudio Fontana 5751b248f14SClaudio Fontana void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 5761b248f14SClaudio Fontana { 5771b248f14SClaudio Fontana floatx80 *p; 5781b248f14SClaudio Fontana 5791b248f14SClaudio Fontana p = &ST(st_index); 5801b248f14SClaudio Fontana *p = helper_fdiv(env, *p, ST0); 5811b248f14SClaudio Fontana } 5821b248f14SClaudio Fontana 5831b248f14SClaudio Fontana void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 5841b248f14SClaudio Fontana { 5851b248f14SClaudio Fontana floatx80 *p; 5861b248f14SClaudio Fontana 5871b248f14SClaudio Fontana p = &ST(st_index); 5881b248f14SClaudio Fontana *p = helper_fdiv(env, ST0, *p); 5891b248f14SClaudio Fontana } 5901b248f14SClaudio Fontana 5911b248f14SClaudio Fontana /* misc FPU operations */ 5921b248f14SClaudio Fontana void helper_fchs_ST0(CPUX86State *env) 5931b248f14SClaudio Fontana { 5941b248f14SClaudio Fontana ST0 = floatx80_chs(ST0); 5951b248f14SClaudio Fontana } 5961b248f14SClaudio Fontana 5971b248f14SClaudio Fontana void helper_fabs_ST0(CPUX86State *env) 5981b248f14SClaudio Fontana { 5991b248f14SClaudio Fontana ST0 = floatx80_abs(ST0); 6001b248f14SClaudio Fontana } 6011b248f14SClaudio Fontana 6021b248f14SClaudio Fontana void helper_fld1_ST0(CPUX86State *env) 6031b248f14SClaudio Fontana { 6041b248f14SClaudio Fontana ST0 = floatx80_one; 6051b248f14SClaudio Fontana } 6061b248f14SClaudio Fontana 6071b248f14SClaudio Fontana void helper_fldl2t_ST0(CPUX86State *env) 6081b248f14SClaudio Fontana { 6091b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) { 6101b248f14SClaudio Fontana case FPU_RC_UP: 6111b248f14SClaudio Fontana ST0 = floatx80_l2t_u; 6121b248f14SClaudio Fontana break; 6131b248f14SClaudio Fontana default: 6141b248f14SClaudio Fontana ST0 = floatx80_l2t; 6151b248f14SClaudio Fontana break; 6161b248f14SClaudio Fontana } 6171b248f14SClaudio Fontana } 6181b248f14SClaudio Fontana 6191b248f14SClaudio Fontana void helper_fldl2e_ST0(CPUX86State *env) 6201b248f14SClaudio Fontana { 6211b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) { 6221b248f14SClaudio Fontana case FPU_RC_DOWN: 6231b248f14SClaudio Fontana case FPU_RC_CHOP: 6241b248f14SClaudio Fontana ST0 = floatx80_l2e_d; 6251b248f14SClaudio Fontana break; 6261b248f14SClaudio Fontana default: 6271b248f14SClaudio Fontana ST0 = floatx80_l2e; 6281b248f14SClaudio Fontana break; 6291b248f14SClaudio Fontana } 6301b248f14SClaudio Fontana } 6311b248f14SClaudio Fontana 6321b248f14SClaudio Fontana void helper_fldpi_ST0(CPUX86State *env) 6331b248f14SClaudio Fontana { 6341b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) { 6351b248f14SClaudio Fontana case FPU_RC_DOWN: 6361b248f14SClaudio Fontana case FPU_RC_CHOP: 6371b248f14SClaudio Fontana ST0 = floatx80_pi_d; 6381b248f14SClaudio Fontana break; 6391b248f14SClaudio Fontana default: 6401b248f14SClaudio Fontana ST0 = floatx80_pi; 6411b248f14SClaudio Fontana break; 6421b248f14SClaudio Fontana } 6431b248f14SClaudio Fontana } 6441b248f14SClaudio Fontana 6451b248f14SClaudio Fontana void helper_fldlg2_ST0(CPUX86State *env) 6461b248f14SClaudio Fontana { 6471b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) { 6481b248f14SClaudio Fontana case FPU_RC_DOWN: 6491b248f14SClaudio Fontana case FPU_RC_CHOP: 6501b248f14SClaudio Fontana ST0 = floatx80_lg2_d; 6511b248f14SClaudio Fontana break; 6521b248f14SClaudio Fontana default: 6531b248f14SClaudio Fontana ST0 = floatx80_lg2; 6541b248f14SClaudio Fontana break; 6551b248f14SClaudio Fontana } 6561b248f14SClaudio Fontana } 6571b248f14SClaudio Fontana 6581b248f14SClaudio Fontana void helper_fldln2_ST0(CPUX86State *env) 6591b248f14SClaudio Fontana { 6601b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) { 6611b248f14SClaudio Fontana case FPU_RC_DOWN: 6621b248f14SClaudio Fontana case FPU_RC_CHOP: 6631b248f14SClaudio Fontana ST0 = floatx80_ln2_d; 6641b248f14SClaudio Fontana break; 6651b248f14SClaudio Fontana default: 6661b248f14SClaudio Fontana ST0 = floatx80_ln2; 6671b248f14SClaudio Fontana break; 6681b248f14SClaudio Fontana } 6691b248f14SClaudio Fontana } 6701b248f14SClaudio Fontana 6711b248f14SClaudio Fontana void helper_fldz_ST0(CPUX86State *env) 6721b248f14SClaudio Fontana { 6731b248f14SClaudio Fontana ST0 = floatx80_zero; 6741b248f14SClaudio Fontana } 6751b248f14SClaudio Fontana 6761b248f14SClaudio Fontana void helper_fldz_FT0(CPUX86State *env) 6771b248f14SClaudio Fontana { 6781b248f14SClaudio Fontana FT0 = floatx80_zero; 6791b248f14SClaudio Fontana } 6801b248f14SClaudio Fontana 6811b248f14SClaudio Fontana uint32_t helper_fnstsw(CPUX86State *env) 6821b248f14SClaudio Fontana { 6831b248f14SClaudio Fontana return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 6841b248f14SClaudio Fontana } 6851b248f14SClaudio Fontana 6861b248f14SClaudio Fontana uint32_t helper_fnstcw(CPUX86State *env) 6871b248f14SClaudio Fontana { 6881b248f14SClaudio Fontana return env->fpuc; 6891b248f14SClaudio Fontana } 6901b248f14SClaudio Fontana 691314d3effSPaolo Bonzini static void set_x86_rounding_mode(unsigned mode, float_status *status) 692314d3effSPaolo Bonzini { 693314d3effSPaolo Bonzini static FloatRoundMode x86_round_mode[4] = { 694314d3effSPaolo Bonzini float_round_nearest_even, 695314d3effSPaolo Bonzini float_round_down, 696314d3effSPaolo Bonzini float_round_up, 697314d3effSPaolo Bonzini float_round_to_zero 698314d3effSPaolo Bonzini }; 699314d3effSPaolo Bonzini assert(mode < ARRAY_SIZE(x86_round_mode)); 700314d3effSPaolo Bonzini set_float_rounding_mode(x86_round_mode[mode], status); 701314d3effSPaolo Bonzini } 702314d3effSPaolo Bonzini 7031b248f14SClaudio Fontana void update_fp_status(CPUX86State *env) 7041b248f14SClaudio Fontana { 705314d3effSPaolo Bonzini int rnd_mode; 7068da5f1dbSRichard Henderson FloatX80RoundPrec rnd_prec; 7071b248f14SClaudio Fontana 7081b248f14SClaudio Fontana /* set rounding mode */ 709314d3effSPaolo Bonzini rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; 710314d3effSPaolo Bonzini set_x86_rounding_mode(rnd_mode, &env->fp_status); 7118da5f1dbSRichard Henderson 7121b248f14SClaudio Fontana switch ((env->fpuc >> 8) & 3) { 7131b248f14SClaudio Fontana case 0: 7148da5f1dbSRichard Henderson rnd_prec = floatx80_precision_s; 7151b248f14SClaudio Fontana break; 7161b248f14SClaudio Fontana case 2: 7178da5f1dbSRichard Henderson rnd_prec = floatx80_precision_d; 7181b248f14SClaudio Fontana break; 7191b248f14SClaudio Fontana case 3: 7201b248f14SClaudio Fontana default: 7218da5f1dbSRichard Henderson rnd_prec = floatx80_precision_x; 7221b248f14SClaudio Fontana break; 7231b248f14SClaudio Fontana } 7248da5f1dbSRichard Henderson set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 7251b248f14SClaudio Fontana } 7261b248f14SClaudio Fontana 7271b248f14SClaudio Fontana void helper_fldcw(CPUX86State *env, uint32_t val) 7281b248f14SClaudio Fontana { 7291b248f14SClaudio Fontana cpu_set_fpuc(env, val); 7301b248f14SClaudio Fontana } 7311b248f14SClaudio Fontana 7321b248f14SClaudio Fontana void helper_fclex(CPUX86State *env) 7331b248f14SClaudio Fontana { 7341b248f14SClaudio Fontana env->fpus &= 0x7f00; 7351b248f14SClaudio Fontana } 7361b248f14SClaudio Fontana 7371b248f14SClaudio Fontana void helper_fwait(CPUX86State *env) 7381b248f14SClaudio Fontana { 7391b248f14SClaudio Fontana if (env->fpus & FPUS_SE) { 7401b248f14SClaudio Fontana fpu_raise_exception(env, GETPC()); 7411b248f14SClaudio Fontana } 7421b248f14SClaudio Fontana } 7431b248f14SClaudio Fontana 744bbdda9b7SRichard Henderson static void do_fninit(CPUX86State *env) 7451b248f14SClaudio Fontana { 7461b248f14SClaudio Fontana env->fpus = 0; 7471b248f14SClaudio Fontana env->fpstt = 0; 74884abdd7dSZiqiao Kong env->fpcs = 0; 74984abdd7dSZiqiao Kong env->fpds = 0; 75084abdd7dSZiqiao Kong env->fpip = 0; 75184abdd7dSZiqiao Kong env->fpdp = 0; 7521b248f14SClaudio Fontana cpu_set_fpuc(env, 0x37f); 7531b248f14SClaudio Fontana env->fptags[0] = 1; 7541b248f14SClaudio Fontana env->fptags[1] = 1; 7551b248f14SClaudio Fontana env->fptags[2] = 1; 7561b248f14SClaudio Fontana env->fptags[3] = 1; 7571b248f14SClaudio Fontana env->fptags[4] = 1; 7581b248f14SClaudio Fontana env->fptags[5] = 1; 7591b248f14SClaudio Fontana env->fptags[6] = 1; 7601b248f14SClaudio Fontana env->fptags[7] = 1; 7611b248f14SClaudio Fontana } 7621b248f14SClaudio Fontana 763bbdda9b7SRichard Henderson void helper_fninit(CPUX86State *env) 764bbdda9b7SRichard Henderson { 765bbdda9b7SRichard Henderson do_fninit(env); 766bbdda9b7SRichard Henderson } 767bbdda9b7SRichard Henderson 7681b248f14SClaudio Fontana /* BCD ops */ 7691b248f14SClaudio Fontana 7701b248f14SClaudio Fontana void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 7711b248f14SClaudio Fontana { 7721b248f14SClaudio Fontana floatx80 tmp; 7731b248f14SClaudio Fontana uint64_t val; 7741b248f14SClaudio Fontana unsigned int v; 7751b248f14SClaudio Fontana int i; 7761b248f14SClaudio Fontana 7771b248f14SClaudio Fontana val = 0; 7781b248f14SClaudio Fontana for (i = 8; i >= 0; i--) { 7791b248f14SClaudio Fontana v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 7801b248f14SClaudio Fontana val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 7811b248f14SClaudio Fontana } 7821b248f14SClaudio Fontana tmp = int64_to_floatx80(val, &env->fp_status); 7831b248f14SClaudio Fontana if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 7841b248f14SClaudio Fontana tmp = floatx80_chs(tmp); 7851b248f14SClaudio Fontana } 7861b248f14SClaudio Fontana fpush(env); 7871b248f14SClaudio Fontana ST0 = tmp; 7881b248f14SClaudio Fontana } 7891b248f14SClaudio Fontana 7901b248f14SClaudio Fontana void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 7911b248f14SClaudio Fontana { 7921b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 7931b248f14SClaudio Fontana int v; 7941b248f14SClaudio Fontana target_ulong mem_ref, mem_end; 7951b248f14SClaudio Fontana int64_t val; 7961b248f14SClaudio Fontana CPU_LDoubleU temp; 7971b248f14SClaudio Fontana 7981b248f14SClaudio Fontana temp.d = ST0; 7991b248f14SClaudio Fontana 8001b248f14SClaudio Fontana val = floatx80_to_int64(ST0, &env->fp_status); 8011b248f14SClaudio Fontana mem_ref = ptr; 8021b248f14SClaudio Fontana if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 8031b248f14SClaudio Fontana set_float_exception_flags(float_flag_invalid, &env->fp_status); 8041b248f14SClaudio Fontana while (mem_ref < ptr + 7) { 8051b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 8061b248f14SClaudio Fontana } 8071b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 8081b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 8091b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 8101b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 8111b248f14SClaudio Fontana return; 8121b248f14SClaudio Fontana } 8131b248f14SClaudio Fontana mem_end = mem_ref + 9; 8141b248f14SClaudio Fontana if (SIGND(temp)) { 8151b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 8161b248f14SClaudio Fontana val = -val; 8171b248f14SClaudio Fontana } else { 8181b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 8191b248f14SClaudio Fontana } 8201b248f14SClaudio Fontana while (mem_ref < mem_end) { 8211b248f14SClaudio Fontana if (val == 0) { 8221b248f14SClaudio Fontana break; 8231b248f14SClaudio Fontana } 8241b248f14SClaudio Fontana v = val % 100; 8251b248f14SClaudio Fontana val = val / 100; 8261b248f14SClaudio Fontana v = ((v / 10) << 4) | (v % 10); 8271b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 8281b248f14SClaudio Fontana } 8291b248f14SClaudio Fontana while (mem_ref < mem_end) { 8301b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 8311b248f14SClaudio Fontana } 8321b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 8331b248f14SClaudio Fontana } 8341b248f14SClaudio Fontana 8351b248f14SClaudio Fontana /* 128-bit significand of log(2). */ 8361b248f14SClaudio Fontana #define ln2_sig_high 0xb17217f7d1cf79abULL 8371b248f14SClaudio Fontana #define ln2_sig_low 0xc9e3b39803f2f6afULL 8381b248f14SClaudio Fontana 8391b248f14SClaudio Fontana /* 8401b248f14SClaudio Fontana * Polynomial coefficients for an approximation to (2^x - 1) / x, on 8411b248f14SClaudio Fontana * the interval [-1/64, 1/64]. 8421b248f14SClaudio Fontana */ 8431b248f14SClaudio Fontana #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 8441b248f14SClaudio Fontana #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 8451b248f14SClaudio Fontana #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 8461b248f14SClaudio Fontana #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 8471b248f14SClaudio Fontana #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 8481b248f14SClaudio Fontana #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 8491b248f14SClaudio Fontana #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 8501b248f14SClaudio Fontana #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 8511b248f14SClaudio Fontana #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 8521b248f14SClaudio Fontana 8531b248f14SClaudio Fontana struct f2xm1_data { 8541b248f14SClaudio Fontana /* 8551b248f14SClaudio Fontana * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 8561b248f14SClaudio Fontana * are very close to exact floatx80 values. 8571b248f14SClaudio Fontana */ 8581b248f14SClaudio Fontana floatx80 t; 8591b248f14SClaudio Fontana /* The value of 2^t. */ 8601b248f14SClaudio Fontana floatx80 exp2; 8611b248f14SClaudio Fontana /* The value of 2^t - 1. */ 8621b248f14SClaudio Fontana floatx80 exp2m1; 8631b248f14SClaudio Fontana }; 8641b248f14SClaudio Fontana 8651b248f14SClaudio Fontana static const struct f2xm1_data f2xm1_table[65] = { 8661b248f14SClaudio Fontana { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 8671b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 8681b248f14SClaudio Fontana make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 8691b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 8701b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 8711b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 8721b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 8731b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 8741b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 8751b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 8761b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 8771b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 8781b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 8791b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 8801b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 8811b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 8821b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 8831b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 8841b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 8851b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 8861b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 8871b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 8881b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 8891b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 8901b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 8911b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 8921b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 8931b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 8941b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 8951b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 8961b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 8971b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 8981b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 8991b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 9001b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 9011b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 9021b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 9031b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 9041b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 9051b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 9061b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 9071b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 9081b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 9091b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 9101b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 9111b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 9121b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 9131b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 9141b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x800000000000227dULL), 9151b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 9161b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 9171b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 9181b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 9191b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 9201b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 9211b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 9221b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 9231b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 9241b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 9251b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 9261b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 9271b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 9281b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 9291b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 9301b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 9311b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 9321b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 9331b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 9341b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 9351b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 9361b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 9371b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 9381b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 9391b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 9401b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 9411b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 9421b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 9431b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 9441b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 9451b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 9461b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 9471b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 9481b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 9491b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 9501b248f14SClaudio Fontana { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 9511b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 9521b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 9531b248f14SClaudio Fontana { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 9541b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 9551b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 9561b248f14SClaudio Fontana { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 9571b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 9581b248f14SClaudio Fontana make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 9591b248f14SClaudio Fontana { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 9601b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 9611b248f14SClaudio Fontana make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 9621b248f14SClaudio Fontana { floatx80_zero_init, 9631b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8000000000000000ULL), 9641b248f14SClaudio Fontana floatx80_zero_init }, 9651b248f14SClaudio Fontana { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 9661b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 9671b248f14SClaudio Fontana make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 9681b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 9691b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 9701b248f14SClaudio Fontana make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 9711b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 9721b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 9731b248f14SClaudio Fontana make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 9741b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 9751b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 9761b248f14SClaudio Fontana make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 9771b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 9781b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 9791b248f14SClaudio Fontana make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 9801b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 9811b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 9821b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 9831b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 9841b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 9851b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 9861b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 9871b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 9881b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 9891b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 9901b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 9911b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 9921b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 9931b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 9941b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 9951b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 9961b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 9971b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 9981b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 9991b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 10001b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 10011b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 10021b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 10031b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 10041b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 10051b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 10061b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 10071b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 10081b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 10091b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 10101b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 10111b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 10121b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 10131b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 10141b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 10151b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 10161b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 10171b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 10181b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 10191b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 10201b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 10211b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 10221b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 10231b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 10241b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 10251b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 10261b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 10271b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 10281b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 10291b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 10301b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 10311b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 10321b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 10331b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 10341b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 10351b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 10361b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 10371b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 10381b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 10391b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 10401b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 10411b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 10421b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 10431b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 10441b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 10451b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 10461b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 10471b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 10481b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 10491b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 10501b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 10511b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 10521b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 10531b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 10541b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 10551b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 10561b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 10571b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 10581b248f14SClaudio Fontana { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 10591b248f14SClaudio Fontana make_floatx80_init(0x4000, 0x8000000000000000ULL), 10601b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 10611b248f14SClaudio Fontana }; 10621b248f14SClaudio Fontana 10631b248f14SClaudio Fontana void helper_f2xm1(CPUX86State *env) 10641b248f14SClaudio Fontana { 10651b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 10661b248f14SClaudio Fontana uint64_t sig = extractFloatx80Frac(ST0); 10671b248f14SClaudio Fontana int32_t exp = extractFloatx80Exp(ST0); 10681b248f14SClaudio Fontana bool sign = extractFloatx80Sign(ST0); 10691b248f14SClaudio Fontana 10701b248f14SClaudio Fontana if (floatx80_invalid_encoding(ST0)) { 10711b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 10721b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 10731b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) { 10741b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 10751b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 10761b248f14SClaudio Fontana ST0 = floatx80_silence_nan(ST0, &env->fp_status); 10771b248f14SClaudio Fontana } 10781b248f14SClaudio Fontana } else if (exp > 0x3fff || 10791b248f14SClaudio Fontana (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 10801b248f14SClaudio Fontana /* Out of range for the instruction, treat as invalid. */ 10811b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 10821b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 10831b248f14SClaudio Fontana } else if (exp == 0x3fff) { 10841b248f14SClaudio Fontana /* Argument 1 or -1, exact result 1 or -0.5. */ 10851b248f14SClaudio Fontana if (sign) { 10861b248f14SClaudio Fontana ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 10871b248f14SClaudio Fontana } 10881b248f14SClaudio Fontana } else if (exp < 0x3fb0) { 10891b248f14SClaudio Fontana if (!floatx80_is_zero(ST0)) { 10901b248f14SClaudio Fontana /* 10911b248f14SClaudio Fontana * Multiplying the argument by an extra-precision version 10921b248f14SClaudio Fontana * of log(2) is sufficiently precise. Zero arguments are 10931b248f14SClaudio Fontana * returned unchanged. 10941b248f14SClaudio Fontana */ 10951b248f14SClaudio Fontana uint64_t sig0, sig1, sig2; 10961b248f14SClaudio Fontana if (exp == 0) { 10971b248f14SClaudio Fontana normalizeFloatx80Subnormal(sig, &exp, &sig); 10981b248f14SClaudio Fontana } 10991b248f14SClaudio Fontana mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 11001b248f14SClaudio Fontana &sig2); 11011b248f14SClaudio Fontana /* This result is inexact. */ 11021b248f14SClaudio Fontana sig1 |= 1; 11038da5f1dbSRichard Henderson ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 11048da5f1dbSRichard Henderson sign, exp, sig0, sig1, 11051b248f14SClaudio Fontana &env->fp_status); 11061b248f14SClaudio Fontana } 11071b248f14SClaudio Fontana } else { 11081b248f14SClaudio Fontana floatx80 tmp, y, accum; 11091b248f14SClaudio Fontana bool asign, bsign; 11101b248f14SClaudio Fontana int32_t n, aexp, bexp; 11111b248f14SClaudio Fontana uint64_t asig0, asig1, asig2, bsig0, bsig1; 11121b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 11138da5f1dbSRichard Henderson FloatX80RoundPrec save_prec = 11148da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision; 11151b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even; 11168da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 11171b248f14SClaudio Fontana 11181b248f14SClaudio Fontana /* Find the nearest multiple of 1/32 to the argument. */ 11191b248f14SClaudio Fontana tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 11201b248f14SClaudio Fontana n = 32 + floatx80_to_int32(tmp, &env->fp_status); 11211b248f14SClaudio Fontana y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 11221b248f14SClaudio Fontana 11231b248f14SClaudio Fontana if (floatx80_is_zero(y)) { 11241b248f14SClaudio Fontana /* 11251b248f14SClaudio Fontana * Use the value of 2^t - 1 from the table, to avoid 11261b248f14SClaudio Fontana * needing to special-case zero as a result of 11271b248f14SClaudio Fontana * multiplication below. 11281b248f14SClaudio Fontana */ 11291b248f14SClaudio Fontana ST0 = f2xm1_table[n].t; 11301b248f14SClaudio Fontana set_float_exception_flags(float_flag_inexact, &env->fp_status); 11311b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 11321b248f14SClaudio Fontana } else { 11331b248f14SClaudio Fontana /* 11341b248f14SClaudio Fontana * Compute the lower parts of a polynomial expansion for 11351b248f14SClaudio Fontana * (2^y - 1) / y. 11361b248f14SClaudio Fontana */ 11371b248f14SClaudio Fontana accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 11381b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 11391b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 11401b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 11411b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 11421b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 11431b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 11441b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 11451b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 11461b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 11471b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 11481b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 11491b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 11501b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 11511b248f14SClaudio Fontana 11521b248f14SClaudio Fontana /* 11531b248f14SClaudio Fontana * The full polynomial expansion is f2xm1_coeff_0 + accum 11541b248f14SClaudio Fontana * (where accum has much lower magnitude, and so, in 11551b248f14SClaudio Fontana * particular, carry out of the addition is not possible). 11561b248f14SClaudio Fontana * (This expansion is only accurate to about 70 bits, not 11571b248f14SClaudio Fontana * 128 bits.) 11581b248f14SClaudio Fontana */ 11591b248f14SClaudio Fontana aexp = extractFloatx80Exp(f2xm1_coeff_0); 11601b248f14SClaudio Fontana asign = extractFloatx80Sign(f2xm1_coeff_0); 11611b248f14SClaudio Fontana shift128RightJamming(extractFloatx80Frac(accum), 0, 11621b248f14SClaudio Fontana aexp - extractFloatx80Exp(accum), 11631b248f14SClaudio Fontana &asig0, &asig1); 11641b248f14SClaudio Fontana bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 11651b248f14SClaudio Fontana bsig1 = 0; 11661b248f14SClaudio Fontana if (asign == extractFloatx80Sign(accum)) { 11671b248f14SClaudio Fontana add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 11681b248f14SClaudio Fontana } else { 11691b248f14SClaudio Fontana sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 11701b248f14SClaudio Fontana } 11711b248f14SClaudio Fontana /* And thus compute an approximation to 2^y - 1. */ 11721b248f14SClaudio Fontana mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 11731b248f14SClaudio Fontana &asig0, &asig1, &asig2); 11741b248f14SClaudio Fontana aexp += extractFloatx80Exp(y) - 0x3ffe; 11751b248f14SClaudio Fontana asign ^= extractFloatx80Sign(y); 11761b248f14SClaudio Fontana if (n != 32) { 11771b248f14SClaudio Fontana /* 11781b248f14SClaudio Fontana * Multiply this by the precomputed value of 2^t and 11791b248f14SClaudio Fontana * add that of 2^t - 1. 11801b248f14SClaudio Fontana */ 11811b248f14SClaudio Fontana mul128By64To192(asig0, asig1, 11821b248f14SClaudio Fontana extractFloatx80Frac(f2xm1_table[n].exp2), 11831b248f14SClaudio Fontana &asig0, &asig1, &asig2); 11841b248f14SClaudio Fontana aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 11851b248f14SClaudio Fontana bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 11861b248f14SClaudio Fontana bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 11871b248f14SClaudio Fontana bsig1 = 0; 11881b248f14SClaudio Fontana if (bexp < aexp) { 11891b248f14SClaudio Fontana shift128RightJamming(bsig0, bsig1, aexp - bexp, 11901b248f14SClaudio Fontana &bsig0, &bsig1); 11911b248f14SClaudio Fontana } else if (aexp < bexp) { 11921b248f14SClaudio Fontana shift128RightJamming(asig0, asig1, bexp - aexp, 11931b248f14SClaudio Fontana &asig0, &asig1); 11941b248f14SClaudio Fontana aexp = bexp; 11951b248f14SClaudio Fontana } 11961b248f14SClaudio Fontana /* The sign of 2^t - 1 is always that of the result. */ 11971b248f14SClaudio Fontana bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 11981b248f14SClaudio Fontana if (asign == bsign) { 11991b248f14SClaudio Fontana /* Avoid possible carry out of the addition. */ 12001b248f14SClaudio Fontana shift128RightJamming(asig0, asig1, 1, 12011b248f14SClaudio Fontana &asig0, &asig1); 12021b248f14SClaudio Fontana shift128RightJamming(bsig0, bsig1, 1, 12031b248f14SClaudio Fontana &bsig0, &bsig1); 12041b248f14SClaudio Fontana ++aexp; 12051b248f14SClaudio Fontana add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 12061b248f14SClaudio Fontana } else { 12071b248f14SClaudio Fontana sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 12081b248f14SClaudio Fontana asign = bsign; 12091b248f14SClaudio Fontana } 12101b248f14SClaudio Fontana } 12111b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 12121b248f14SClaudio Fontana /* This result is inexact. */ 12131b248f14SClaudio Fontana asig1 |= 1; 12148da5f1dbSRichard Henderson ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 12158da5f1dbSRichard Henderson asign, aexp, asig0, asig1, 12161b248f14SClaudio Fontana &env->fp_status); 12171b248f14SClaudio Fontana } 12181b248f14SClaudio Fontana 12191b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec; 12201b248f14SClaudio Fontana } 12211b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 12221b248f14SClaudio Fontana } 12231b248f14SClaudio Fontana 12241b248f14SClaudio Fontana void helper_fptan(CPUX86State *env) 12251b248f14SClaudio Fontana { 12261b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0); 12271b248f14SClaudio Fontana 12281b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 12291b248f14SClaudio Fontana env->fpus |= 0x400; 12301b248f14SClaudio Fontana } else { 12311b248f14SClaudio Fontana fptemp = tan(fptemp); 12321b248f14SClaudio Fontana ST0 = double_to_floatx80(env, fptemp); 12331b248f14SClaudio Fontana fpush(env); 12341b248f14SClaudio Fontana ST0 = floatx80_one; 12351b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */ 12361b248f14SClaudio Fontana /* the above code is for |arg| < 2**52 only */ 12371b248f14SClaudio Fontana } 12381b248f14SClaudio Fontana } 12391b248f14SClaudio Fontana 12401b248f14SClaudio Fontana /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 12411b248f14SClaudio Fontana #define pi_4_exp 0x3ffe 12421b248f14SClaudio Fontana #define pi_4_sig_high 0xc90fdaa22168c234ULL 12431b248f14SClaudio Fontana #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 12441b248f14SClaudio Fontana #define pi_2_exp 0x3fff 12451b248f14SClaudio Fontana #define pi_2_sig_high 0xc90fdaa22168c234ULL 12461b248f14SClaudio Fontana #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 12471b248f14SClaudio Fontana #define pi_34_exp 0x4000 12481b248f14SClaudio Fontana #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 12491b248f14SClaudio Fontana #define pi_34_sig_low 0x9394c9e8a0a5159dULL 12501b248f14SClaudio Fontana #define pi_exp 0x4000 12511b248f14SClaudio Fontana #define pi_sig_high 0xc90fdaa22168c234ULL 12521b248f14SClaudio Fontana #define pi_sig_low 0xc4c6628b80dc1cd1ULL 12531b248f14SClaudio Fontana 12541b248f14SClaudio Fontana /* 12551b248f14SClaudio Fontana * Polynomial coefficients for an approximation to atan(x), with only 12561b248f14SClaudio Fontana * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 12571b248f14SClaudio Fontana * for some other approximations, no low part is needed for the first 12581b248f14SClaudio Fontana * coefficient here to achieve a sufficiently accurate result, because 12591b248f14SClaudio Fontana * the coefficient in this minimax approximation is very close to 12601b248f14SClaudio Fontana * exactly 1.) 12611b248f14SClaudio Fontana */ 12621b248f14SClaudio Fontana #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 12631b248f14SClaudio Fontana #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 12641b248f14SClaudio Fontana #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 12651b248f14SClaudio Fontana #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 12661b248f14SClaudio Fontana #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 12671b248f14SClaudio Fontana #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 12681b248f14SClaudio Fontana #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 12691b248f14SClaudio Fontana 12701b248f14SClaudio Fontana struct fpatan_data { 12711b248f14SClaudio Fontana /* High and low parts of atan(x). */ 12721b248f14SClaudio Fontana floatx80 atan_high, atan_low; 12731b248f14SClaudio Fontana }; 12741b248f14SClaudio Fontana 12751b248f14SClaudio Fontana static const struct fpatan_data fpatan_table[9] = { 12761b248f14SClaudio Fontana { floatx80_zero_init, 12771b248f14SClaudio Fontana floatx80_zero_init }, 12781b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 12791b248f14SClaudio Fontana make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 12801b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 12811b248f14SClaudio Fontana make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 12821b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 12831b248f14SClaudio Fontana make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 12841b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 12851b248f14SClaudio Fontana make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 12861b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 12871b248f14SClaudio Fontana make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 12881b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 12891b248f14SClaudio Fontana make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 12901b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 12911b248f14SClaudio Fontana make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 12921b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 12931b248f14SClaudio Fontana make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 12941b248f14SClaudio Fontana }; 12951b248f14SClaudio Fontana 12961b248f14SClaudio Fontana void helper_fpatan(CPUX86State *env) 12971b248f14SClaudio Fontana { 12981b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 12991b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(ST0); 13001b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(ST0); 13011b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(ST0); 13021b248f14SClaudio Fontana uint64_t arg1_sig = extractFloatx80Frac(ST1); 13031b248f14SClaudio Fontana int32_t arg1_exp = extractFloatx80Exp(ST1); 13041b248f14SClaudio Fontana bool arg1_sign = extractFloatx80Sign(ST1); 13051b248f14SClaudio Fontana 13061b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 13071b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 13081b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST0, &env->fp_status); 13091b248f14SClaudio Fontana } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 13101b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 13111b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST1, &env->fp_status); 13121b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0) || 13131b248f14SClaudio Fontana floatx80_invalid_encoding(ST1)) { 13141b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 13151b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 13161b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) { 13171b248f14SClaudio Fontana ST1 = ST0; 13181b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) { 13191b248f14SClaudio Fontana /* Pass this NaN through. */ 13201b248f14SClaudio Fontana } else if (floatx80_is_zero(ST1) && !arg0_sign) { 13211b248f14SClaudio Fontana /* Pass this zero through. */ 13221b248f14SClaudio Fontana } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 13231b248f14SClaudio Fontana arg0_exp - arg1_exp >= 80) && 13241b248f14SClaudio Fontana !arg0_sign) { 13251b248f14SClaudio Fontana /* 13261b248f14SClaudio Fontana * Dividing ST1 by ST0 gives the correct result up to 13271b248f14SClaudio Fontana * rounding, and avoids spurious underflow exceptions that 13281b248f14SClaudio Fontana * might result from passing some small values through the 13291b248f14SClaudio Fontana * polynomial approximation, but if a finite nonzero result of 13301b248f14SClaudio Fontana * division is exact, the result of fpatan is still inexact 13311b248f14SClaudio Fontana * (and underflowing where appropriate). 13321b248f14SClaudio Fontana */ 13338da5f1dbSRichard Henderson FloatX80RoundPrec save_prec = 13348da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision; 13358da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 13361b248f14SClaudio Fontana ST1 = floatx80_div(ST1, ST0, &env->fp_status); 13371b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec; 13381b248f14SClaudio Fontana if (!floatx80_is_zero(ST1) && 13391b248f14SClaudio Fontana !(get_float_exception_flags(&env->fp_status) & 13401b248f14SClaudio Fontana float_flag_inexact)) { 13411b248f14SClaudio Fontana /* 13421b248f14SClaudio Fontana * The mathematical result is very slightly closer to zero 13431b248f14SClaudio Fontana * than this exact result. Round a value with the 13441b248f14SClaudio Fontana * significand adjusted accordingly to get the correct 13451b248f14SClaudio Fontana * exceptions, and possibly an adjusted result depending 13461b248f14SClaudio Fontana * on the rounding mode. 13471b248f14SClaudio Fontana */ 13481b248f14SClaudio Fontana uint64_t sig = extractFloatx80Frac(ST1); 13491b248f14SClaudio Fontana int32_t exp = extractFloatx80Exp(ST1); 13501b248f14SClaudio Fontana bool sign = extractFloatx80Sign(ST1); 13511b248f14SClaudio Fontana if (exp == 0) { 13521b248f14SClaudio Fontana normalizeFloatx80Subnormal(sig, &exp, &sig); 13531b248f14SClaudio Fontana } 13548da5f1dbSRichard Henderson ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 13558da5f1dbSRichard Henderson sign, exp, sig - 1, 13561b248f14SClaudio Fontana -1, &env->fp_status); 13571b248f14SClaudio Fontana } 13581b248f14SClaudio Fontana } else { 13591b248f14SClaudio Fontana /* The result is inexact. */ 13601b248f14SClaudio Fontana bool rsign = arg1_sign; 13611b248f14SClaudio Fontana int32_t rexp; 13621b248f14SClaudio Fontana uint64_t rsig0, rsig1; 13631b248f14SClaudio Fontana if (floatx80_is_zero(ST1)) { 13641b248f14SClaudio Fontana /* 13651b248f14SClaudio Fontana * ST0 is negative. The result is pi with the sign of 13661b248f14SClaudio Fontana * ST1. 13671b248f14SClaudio Fontana */ 13681b248f14SClaudio Fontana rexp = pi_exp; 13691b248f14SClaudio Fontana rsig0 = pi_sig_high; 13701b248f14SClaudio Fontana rsig1 = pi_sig_low; 13711b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST1)) { 13721b248f14SClaudio Fontana if (floatx80_is_infinity(ST0)) { 13731b248f14SClaudio Fontana if (arg0_sign) { 13741b248f14SClaudio Fontana rexp = pi_34_exp; 13751b248f14SClaudio Fontana rsig0 = pi_34_sig_high; 13761b248f14SClaudio Fontana rsig1 = pi_34_sig_low; 13771b248f14SClaudio Fontana } else { 13781b248f14SClaudio Fontana rexp = pi_4_exp; 13791b248f14SClaudio Fontana rsig0 = pi_4_sig_high; 13801b248f14SClaudio Fontana rsig1 = pi_4_sig_low; 13811b248f14SClaudio Fontana } 13821b248f14SClaudio Fontana } else { 13831b248f14SClaudio Fontana rexp = pi_2_exp; 13841b248f14SClaudio Fontana rsig0 = pi_2_sig_high; 13851b248f14SClaudio Fontana rsig1 = pi_2_sig_low; 13861b248f14SClaudio Fontana } 13871b248f14SClaudio Fontana } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 13881b248f14SClaudio Fontana rexp = pi_2_exp; 13891b248f14SClaudio Fontana rsig0 = pi_2_sig_high; 13901b248f14SClaudio Fontana rsig1 = pi_2_sig_low; 13911b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 13921b248f14SClaudio Fontana /* ST0 is negative. */ 13931b248f14SClaudio Fontana rexp = pi_exp; 13941b248f14SClaudio Fontana rsig0 = pi_sig_high; 13951b248f14SClaudio Fontana rsig1 = pi_sig_low; 13961b248f14SClaudio Fontana } else { 13971b248f14SClaudio Fontana /* 13981b248f14SClaudio Fontana * ST0 and ST1 are finite, nonzero and with exponents not 13991b248f14SClaudio Fontana * too far apart. 14001b248f14SClaudio Fontana */ 14011b248f14SClaudio Fontana int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 14021b248f14SClaudio Fontana int32_t azexp, axexp; 14031b248f14SClaudio Fontana bool adj_sub, ysign, zsign; 14041b248f14SClaudio Fontana uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 14051b248f14SClaudio Fontana uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 14061b248f14SClaudio Fontana uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 14071b248f14SClaudio Fontana uint64_t azsig0, azsig1; 14081b248f14SClaudio Fontana uint64_t azsig2, azsig3, axsig0, axsig1; 14091b248f14SClaudio Fontana floatx80 x8; 14101b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 14118da5f1dbSRichard Henderson FloatX80RoundPrec save_prec = 14128da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision; 14131b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even; 14148da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 14151b248f14SClaudio Fontana 14161b248f14SClaudio Fontana if (arg0_exp == 0) { 14171b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 14181b248f14SClaudio Fontana } 14191b248f14SClaudio Fontana if (arg1_exp == 0) { 14201b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 14211b248f14SClaudio Fontana } 14221b248f14SClaudio Fontana if (arg0_exp > arg1_exp || 14231b248f14SClaudio Fontana (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 14241b248f14SClaudio Fontana /* Work with abs(ST1) / abs(ST0). */ 14251b248f14SClaudio Fontana num_exp = arg1_exp; 14261b248f14SClaudio Fontana num_sig = arg1_sig; 14271b248f14SClaudio Fontana den_exp = arg0_exp; 14281b248f14SClaudio Fontana den_sig = arg0_sig; 14291b248f14SClaudio Fontana if (arg0_sign) { 14301b248f14SClaudio Fontana /* The result is subtracted from pi. */ 14311b248f14SClaudio Fontana adj_exp = pi_exp; 14321b248f14SClaudio Fontana adj_sig0 = pi_sig_high; 14331b248f14SClaudio Fontana adj_sig1 = pi_sig_low; 14341b248f14SClaudio Fontana adj_sub = true; 14351b248f14SClaudio Fontana } else { 14361b248f14SClaudio Fontana /* The result is used as-is. */ 14371b248f14SClaudio Fontana adj_exp = 0; 14381b248f14SClaudio Fontana adj_sig0 = 0; 14391b248f14SClaudio Fontana adj_sig1 = 0; 14401b248f14SClaudio Fontana adj_sub = false; 14411b248f14SClaudio Fontana } 14421b248f14SClaudio Fontana } else { 14431b248f14SClaudio Fontana /* Work with abs(ST0) / abs(ST1). */ 14441b248f14SClaudio Fontana num_exp = arg0_exp; 14451b248f14SClaudio Fontana num_sig = arg0_sig; 14461b248f14SClaudio Fontana den_exp = arg1_exp; 14471b248f14SClaudio Fontana den_sig = arg1_sig; 14481b248f14SClaudio Fontana /* The result is added to or subtracted from pi/2. */ 14491b248f14SClaudio Fontana adj_exp = pi_2_exp; 14501b248f14SClaudio Fontana adj_sig0 = pi_2_sig_high; 14511b248f14SClaudio Fontana adj_sig1 = pi_2_sig_low; 14521b248f14SClaudio Fontana adj_sub = !arg0_sign; 14531b248f14SClaudio Fontana } 14541b248f14SClaudio Fontana 14551b248f14SClaudio Fontana /* 14561b248f14SClaudio Fontana * Compute x = num/den, where 0 < x <= 1 and x is not too 14571b248f14SClaudio Fontana * small. 14581b248f14SClaudio Fontana */ 14591b248f14SClaudio Fontana xexp = num_exp - den_exp + 0x3ffe; 14601b248f14SClaudio Fontana remsig0 = num_sig; 14611b248f14SClaudio Fontana remsig1 = 0; 14621b248f14SClaudio Fontana if (den_sig <= remsig0) { 14631b248f14SClaudio Fontana shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 14641b248f14SClaudio Fontana ++xexp; 14651b248f14SClaudio Fontana } 14661b248f14SClaudio Fontana xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 14671b248f14SClaudio Fontana mul64To128(den_sig, xsig0, &msig0, &msig1); 14681b248f14SClaudio Fontana sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 14691b248f14SClaudio Fontana while ((int64_t) remsig0 < 0) { 14701b248f14SClaudio Fontana --xsig0; 14711b248f14SClaudio Fontana add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 14721b248f14SClaudio Fontana } 14731b248f14SClaudio Fontana xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 14741b248f14SClaudio Fontana /* 14751b248f14SClaudio Fontana * No need to correct any estimation error in xsig1; even 14761b248f14SClaudio Fontana * with such error, it is accurate enough. 14771b248f14SClaudio Fontana */ 14781b248f14SClaudio Fontana 14791b248f14SClaudio Fontana /* 14801b248f14SClaudio Fontana * Split x as x = t + y, where t = n/8 is the nearest 14811b248f14SClaudio Fontana * multiple of 1/8 to x. 14821b248f14SClaudio Fontana */ 14838da5f1dbSRichard Henderson x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 14848da5f1dbSRichard Henderson false, xexp + 3, xsig0, 14851b248f14SClaudio Fontana xsig1, &env->fp_status); 14861b248f14SClaudio Fontana n = floatx80_to_int32(x8, &env->fp_status); 14871b248f14SClaudio Fontana if (n == 0) { 14881b248f14SClaudio Fontana ysign = false; 14891b248f14SClaudio Fontana yexp = xexp; 14901b248f14SClaudio Fontana ysig0 = xsig0; 14911b248f14SClaudio Fontana ysig1 = xsig1; 14921b248f14SClaudio Fontana texp = 0; 14931b248f14SClaudio Fontana tsig = 0; 14941b248f14SClaudio Fontana } else { 14951b248f14SClaudio Fontana int shift = clz32(n) + 32; 14961b248f14SClaudio Fontana texp = 0x403b - shift; 14971b248f14SClaudio Fontana tsig = n; 14981b248f14SClaudio Fontana tsig <<= shift; 14991b248f14SClaudio Fontana if (texp == xexp) { 15001b248f14SClaudio Fontana sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 15011b248f14SClaudio Fontana if ((int64_t) ysig0 >= 0) { 15021b248f14SClaudio Fontana ysign = false; 15031b248f14SClaudio Fontana if (ysig0 == 0) { 15041b248f14SClaudio Fontana if (ysig1 == 0) { 15051b248f14SClaudio Fontana yexp = 0; 15061b248f14SClaudio Fontana } else { 15071b248f14SClaudio Fontana shift = clz64(ysig1) + 64; 15081b248f14SClaudio Fontana yexp = xexp - shift; 15091b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift, 15101b248f14SClaudio Fontana &ysig0, &ysig1); 15111b248f14SClaudio Fontana } 15121b248f14SClaudio Fontana } else { 15131b248f14SClaudio Fontana shift = clz64(ysig0); 15141b248f14SClaudio Fontana yexp = xexp - shift; 15151b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 15161b248f14SClaudio Fontana } 15171b248f14SClaudio Fontana } else { 15181b248f14SClaudio Fontana ysign = true; 15191b248f14SClaudio Fontana sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 15201b248f14SClaudio Fontana if (ysig0 == 0) { 15211b248f14SClaudio Fontana shift = clz64(ysig1) + 64; 15221b248f14SClaudio Fontana } else { 15231b248f14SClaudio Fontana shift = clz64(ysig0); 15241b248f14SClaudio Fontana } 15251b248f14SClaudio Fontana yexp = xexp - shift; 15261b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 15271b248f14SClaudio Fontana } 15281b248f14SClaudio Fontana } else { 15291b248f14SClaudio Fontana /* 15301b248f14SClaudio Fontana * t's exponent must be greater than x's because t 15311b248f14SClaudio Fontana * is positive and the nearest multiple of 1/8 to 15321b248f14SClaudio Fontana * x, and if x has a greater exponent, the power 15331b248f14SClaudio Fontana * of 2 with that exponent is also a multiple of 15341b248f14SClaudio Fontana * 1/8. 15351b248f14SClaudio Fontana */ 15361b248f14SClaudio Fontana uint64_t usig0, usig1; 15371b248f14SClaudio Fontana shift128RightJamming(xsig0, xsig1, texp - xexp, 15381b248f14SClaudio Fontana &usig0, &usig1); 15391b248f14SClaudio Fontana ysign = true; 15401b248f14SClaudio Fontana sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 15411b248f14SClaudio Fontana if (ysig0 == 0) { 15421b248f14SClaudio Fontana shift = clz64(ysig1) + 64; 15431b248f14SClaudio Fontana } else { 15441b248f14SClaudio Fontana shift = clz64(ysig0); 15451b248f14SClaudio Fontana } 15461b248f14SClaudio Fontana yexp = texp - shift; 15471b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 15481b248f14SClaudio Fontana } 15491b248f14SClaudio Fontana } 15501b248f14SClaudio Fontana 15511b248f14SClaudio Fontana /* 15521b248f14SClaudio Fontana * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 15531b248f14SClaudio Fontana * arctan(z). 15541b248f14SClaudio Fontana */ 15551b248f14SClaudio Fontana zsign = ysign; 15561b248f14SClaudio Fontana if (texp == 0 || yexp == 0) { 15571b248f14SClaudio Fontana zexp = yexp; 15581b248f14SClaudio Fontana zsig0 = ysig0; 15591b248f14SClaudio Fontana zsig1 = ysig1; 15601b248f14SClaudio Fontana } else { 15611b248f14SClaudio Fontana /* 15621b248f14SClaudio Fontana * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 15631b248f14SClaudio Fontana */ 15641b248f14SClaudio Fontana int32_t dexp = texp + xexp - 0x3ffe; 15651b248f14SClaudio Fontana uint64_t dsig0, dsig1, dsig2; 15661b248f14SClaudio Fontana mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 15671b248f14SClaudio Fontana /* 15681b248f14SClaudio Fontana * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 15691b248f14SClaudio Fontana * bit). Add 1 to produce the denominator 1+tx. 15701b248f14SClaudio Fontana */ 15711b248f14SClaudio Fontana shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 15721b248f14SClaudio Fontana &dsig0, &dsig1); 15731b248f14SClaudio Fontana dsig0 |= 0x8000000000000000ULL; 15741b248f14SClaudio Fontana zexp = yexp - 1; 15751b248f14SClaudio Fontana remsig0 = ysig0; 15761b248f14SClaudio Fontana remsig1 = ysig1; 15771b248f14SClaudio Fontana remsig2 = 0; 15781b248f14SClaudio Fontana if (dsig0 <= remsig0) { 15791b248f14SClaudio Fontana shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 15801b248f14SClaudio Fontana ++zexp; 15811b248f14SClaudio Fontana } 15821b248f14SClaudio Fontana zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 15831b248f14SClaudio Fontana mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 15841b248f14SClaudio Fontana sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 15851b248f14SClaudio Fontana &remsig0, &remsig1, &remsig2); 15861b248f14SClaudio Fontana while ((int64_t) remsig0 < 0) { 15871b248f14SClaudio Fontana --zsig0; 15881b248f14SClaudio Fontana add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 15891b248f14SClaudio Fontana &remsig0, &remsig1, &remsig2); 15901b248f14SClaudio Fontana } 15911b248f14SClaudio Fontana zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 15921b248f14SClaudio Fontana /* No need to correct any estimation error in zsig1. */ 15931b248f14SClaudio Fontana } 15941b248f14SClaudio Fontana 15951b248f14SClaudio Fontana if (zexp == 0) { 15961b248f14SClaudio Fontana azexp = 0; 15971b248f14SClaudio Fontana azsig0 = 0; 15981b248f14SClaudio Fontana azsig1 = 0; 15991b248f14SClaudio Fontana } else { 16001b248f14SClaudio Fontana floatx80 z2, accum; 16011b248f14SClaudio Fontana uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 16021b248f14SClaudio Fontana /* Compute z^2. */ 16031b248f14SClaudio Fontana mul128To256(zsig0, zsig1, zsig0, zsig1, 16041b248f14SClaudio Fontana &z2sig0, &z2sig1, &z2sig2, &z2sig3); 16058da5f1dbSRichard Henderson z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 16061b248f14SClaudio Fontana zexp + zexp - 0x3ffe, 16071b248f14SClaudio Fontana z2sig0, z2sig1, 16081b248f14SClaudio Fontana &env->fp_status); 16091b248f14SClaudio Fontana 16101b248f14SClaudio Fontana /* Compute the lower parts of the polynomial expansion. */ 16111b248f14SClaudio Fontana accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 16121b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 16131b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status); 16141b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 16151b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status); 16161b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 16171b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status); 16181b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 16191b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status); 16201b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 16211b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status); 16221b248f14SClaudio Fontana 16231b248f14SClaudio Fontana /* 16241b248f14SClaudio Fontana * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 16251b248f14SClaudio Fontana * fpatan_coeff_0 is 1, and accum is negative and much smaller. 16261b248f14SClaudio Fontana */ 16271b248f14SClaudio Fontana aexp = extractFloatx80Exp(fpatan_coeff_0); 16281b248f14SClaudio Fontana shift128RightJamming(extractFloatx80Frac(accum), 0, 16291b248f14SClaudio Fontana aexp - extractFloatx80Exp(accum), 16301b248f14SClaudio Fontana &asig0, &asig1); 16311b248f14SClaudio Fontana sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 16321b248f14SClaudio Fontana &asig0, &asig1); 16331b248f14SClaudio Fontana /* Multiply by z to compute arctan(z). */ 16341b248f14SClaudio Fontana azexp = aexp + zexp - 0x3ffe; 16351b248f14SClaudio Fontana mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 16361b248f14SClaudio Fontana &azsig2, &azsig3); 16371b248f14SClaudio Fontana } 16381b248f14SClaudio Fontana 16391b248f14SClaudio Fontana /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 16401b248f14SClaudio Fontana if (texp == 0) { 16411b248f14SClaudio Fontana /* z is positive. */ 16421b248f14SClaudio Fontana axexp = azexp; 16431b248f14SClaudio Fontana axsig0 = azsig0; 16441b248f14SClaudio Fontana axsig1 = azsig1; 16451b248f14SClaudio Fontana } else { 16461b248f14SClaudio Fontana bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 16471b248f14SClaudio Fontana int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 16481b248f14SClaudio Fontana uint64_t low_sig0 = 16491b248f14SClaudio Fontana extractFloatx80Frac(fpatan_table[n].atan_low); 16501b248f14SClaudio Fontana uint64_t low_sig1 = 0; 16511b248f14SClaudio Fontana axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 16521b248f14SClaudio Fontana axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 16531b248f14SClaudio Fontana axsig1 = 0; 16541b248f14SClaudio Fontana shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 16551b248f14SClaudio Fontana &low_sig0, &low_sig1); 16561b248f14SClaudio Fontana if (low_sign) { 16571b248f14SClaudio Fontana sub128(axsig0, axsig1, low_sig0, low_sig1, 16581b248f14SClaudio Fontana &axsig0, &axsig1); 16591b248f14SClaudio Fontana } else { 16601b248f14SClaudio Fontana add128(axsig0, axsig1, low_sig0, low_sig1, 16611b248f14SClaudio Fontana &axsig0, &axsig1); 16621b248f14SClaudio Fontana } 16631b248f14SClaudio Fontana if (azexp >= axexp) { 16641b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 16651b248f14SClaudio Fontana &axsig0, &axsig1); 16661b248f14SClaudio Fontana axexp = azexp + 1; 16671b248f14SClaudio Fontana shift128RightJamming(azsig0, azsig1, 1, 16681b248f14SClaudio Fontana &azsig0, &azsig1); 16691b248f14SClaudio Fontana } else { 16701b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, 1, 16711b248f14SClaudio Fontana &axsig0, &axsig1); 16721b248f14SClaudio Fontana shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 16731b248f14SClaudio Fontana &azsig0, &azsig1); 16741b248f14SClaudio Fontana ++axexp; 16751b248f14SClaudio Fontana } 16761b248f14SClaudio Fontana if (zsign) { 16771b248f14SClaudio Fontana sub128(axsig0, axsig1, azsig0, azsig1, 16781b248f14SClaudio Fontana &axsig0, &axsig1); 16791b248f14SClaudio Fontana } else { 16801b248f14SClaudio Fontana add128(axsig0, axsig1, azsig0, azsig1, 16811b248f14SClaudio Fontana &axsig0, &axsig1); 16821b248f14SClaudio Fontana } 16831b248f14SClaudio Fontana } 16841b248f14SClaudio Fontana 16851b248f14SClaudio Fontana if (adj_exp == 0) { 16861b248f14SClaudio Fontana rexp = axexp; 16871b248f14SClaudio Fontana rsig0 = axsig0; 16881b248f14SClaudio Fontana rsig1 = axsig1; 16891b248f14SClaudio Fontana } else { 16901b248f14SClaudio Fontana /* 16911b248f14SClaudio Fontana * Add or subtract arctan(x) (exponent axexp, 16921b248f14SClaudio Fontana * significand axsig0 and axsig1, positive, not 16931b248f14SClaudio Fontana * necessarily normalized) to the number given by 16941b248f14SClaudio Fontana * adj_exp, adj_sig0 and adj_sig1, according to 16951b248f14SClaudio Fontana * adj_sub. 16961b248f14SClaudio Fontana */ 16971b248f14SClaudio Fontana if (adj_exp >= axexp) { 16981b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 16991b248f14SClaudio Fontana &axsig0, &axsig1); 17001b248f14SClaudio Fontana rexp = adj_exp + 1; 17011b248f14SClaudio Fontana shift128RightJamming(adj_sig0, adj_sig1, 1, 17021b248f14SClaudio Fontana &adj_sig0, &adj_sig1); 17031b248f14SClaudio Fontana } else { 17041b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, 1, 17051b248f14SClaudio Fontana &axsig0, &axsig1); 17061b248f14SClaudio Fontana shift128RightJamming(adj_sig0, adj_sig1, 17071b248f14SClaudio Fontana axexp - adj_exp + 1, 17081b248f14SClaudio Fontana &adj_sig0, &adj_sig1); 17091b248f14SClaudio Fontana rexp = axexp + 1; 17101b248f14SClaudio Fontana } 17111b248f14SClaudio Fontana if (adj_sub) { 17121b248f14SClaudio Fontana sub128(adj_sig0, adj_sig1, axsig0, axsig1, 17131b248f14SClaudio Fontana &rsig0, &rsig1); 17141b248f14SClaudio Fontana } else { 17151b248f14SClaudio Fontana add128(adj_sig0, adj_sig1, axsig0, axsig1, 17161b248f14SClaudio Fontana &rsig0, &rsig1); 17171b248f14SClaudio Fontana } 17181b248f14SClaudio Fontana } 17191b248f14SClaudio Fontana 17201b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 17211b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec; 17221b248f14SClaudio Fontana } 17231b248f14SClaudio Fontana /* This result is inexact. */ 17241b248f14SClaudio Fontana rsig1 |= 1; 17258da5f1dbSRichard Henderson ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 17261b248f14SClaudio Fontana rsig0, rsig1, &env->fp_status); 17271b248f14SClaudio Fontana } 17281b248f14SClaudio Fontana 17291b248f14SClaudio Fontana fpop(env); 17301b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 17311b248f14SClaudio Fontana } 17321b248f14SClaudio Fontana 17331b248f14SClaudio Fontana void helper_fxtract(CPUX86State *env) 17341b248f14SClaudio Fontana { 17351b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 17361b248f14SClaudio Fontana CPU_LDoubleU temp; 17371b248f14SClaudio Fontana 17381b248f14SClaudio Fontana temp.d = ST0; 17391b248f14SClaudio Fontana 17401b248f14SClaudio Fontana if (floatx80_is_zero(ST0)) { 17411b248f14SClaudio Fontana /* Easy way to generate -inf and raising division by 0 exception */ 17421b248f14SClaudio Fontana ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 17431b248f14SClaudio Fontana &env->fp_status); 17441b248f14SClaudio Fontana fpush(env); 17451b248f14SClaudio Fontana ST0 = temp.d; 17461b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0)) { 17471b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 17481b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 17491b248f14SClaudio Fontana fpush(env); 17501b248f14SClaudio Fontana ST0 = ST1; 17511b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) { 17521b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 17531b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 17541b248f14SClaudio Fontana ST0 = floatx80_silence_nan(ST0, &env->fp_status); 17551b248f14SClaudio Fontana } 17561b248f14SClaudio Fontana fpush(env); 17571b248f14SClaudio Fontana ST0 = ST1; 17581b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST0)) { 17591b248f14SClaudio Fontana fpush(env); 17601b248f14SClaudio Fontana ST0 = ST1; 17611b248f14SClaudio Fontana ST1 = floatx80_infinity; 17621b248f14SClaudio Fontana } else { 17631b248f14SClaudio Fontana int expdif; 17641b248f14SClaudio Fontana 17651b248f14SClaudio Fontana if (EXPD(temp) == 0) { 17661b248f14SClaudio Fontana int shift = clz64(temp.l.lower); 17671b248f14SClaudio Fontana temp.l.lower <<= shift; 17681b248f14SClaudio Fontana expdif = 1 - EXPBIAS - shift; 17691b248f14SClaudio Fontana float_raise(float_flag_input_denormal, &env->fp_status); 17701b248f14SClaudio Fontana } else { 17711b248f14SClaudio Fontana expdif = EXPD(temp) - EXPBIAS; 17721b248f14SClaudio Fontana } 17731b248f14SClaudio Fontana /* DP exponent bias */ 17741b248f14SClaudio Fontana ST0 = int32_to_floatx80(expdif, &env->fp_status); 17751b248f14SClaudio Fontana fpush(env); 17761b248f14SClaudio Fontana BIASEXPONENT(temp); 17771b248f14SClaudio Fontana ST0 = temp.d; 17781b248f14SClaudio Fontana } 17791b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 17801b248f14SClaudio Fontana } 17811b248f14SClaudio Fontana 17821b248f14SClaudio Fontana static void helper_fprem_common(CPUX86State *env, bool mod) 17831b248f14SClaudio Fontana { 17841b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 17851b248f14SClaudio Fontana uint64_t quotient; 17861b248f14SClaudio Fontana CPU_LDoubleU temp0, temp1; 17871b248f14SClaudio Fontana int exp0, exp1, expdiff; 17881b248f14SClaudio Fontana 17891b248f14SClaudio Fontana temp0.d = ST0; 17901b248f14SClaudio Fontana temp1.d = ST1; 17911b248f14SClaudio Fontana exp0 = EXPD(temp0); 17921b248f14SClaudio Fontana exp1 = EXPD(temp1); 17931b248f14SClaudio Fontana 17941b248f14SClaudio Fontana env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 17951b248f14SClaudio Fontana if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 17961b248f14SClaudio Fontana exp0 == 0x7fff || exp1 == 0x7fff || 17971b248f14SClaudio Fontana floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 17981b248f14SClaudio Fontana ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 17991b248f14SClaudio Fontana } else { 18001b248f14SClaudio Fontana if (exp0 == 0) { 18011b248f14SClaudio Fontana exp0 = 1 - clz64(temp0.l.lower); 18021b248f14SClaudio Fontana } 18031b248f14SClaudio Fontana if (exp1 == 0) { 18041b248f14SClaudio Fontana exp1 = 1 - clz64(temp1.l.lower); 18051b248f14SClaudio Fontana } 18061b248f14SClaudio Fontana expdiff = exp0 - exp1; 18071b248f14SClaudio Fontana if (expdiff < 64) { 18081b248f14SClaudio Fontana ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 18091b248f14SClaudio Fontana env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 18101b248f14SClaudio Fontana env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 18111b248f14SClaudio Fontana env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 18121b248f14SClaudio Fontana } else { 18131b248f14SClaudio Fontana /* 18141b248f14SClaudio Fontana * Partial remainder. This choice of how many bits to 18151b248f14SClaudio Fontana * process at once is specified in AMD instruction set 18161b248f14SClaudio Fontana * manuals, and empirically is followed by Intel 18171b248f14SClaudio Fontana * processors as well; it ensures that the final remainder 18181b248f14SClaudio Fontana * operation in a loop does produce the correct low three 18191b248f14SClaudio Fontana * bits of the quotient. AMD manuals specify that the 18201b248f14SClaudio Fontana * flags other than C2 are cleared, and empirically Intel 18211b248f14SClaudio Fontana * processors clear them as well. 18221b248f14SClaudio Fontana */ 18231b248f14SClaudio Fontana int n = 32 + (expdiff % 32); 18241b248f14SClaudio Fontana temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 18251b248f14SClaudio Fontana ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 18261b248f14SClaudio Fontana env->fpus |= 0x400; /* C2 <-- 1 */ 18271b248f14SClaudio Fontana } 18281b248f14SClaudio Fontana } 18291b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 18301b248f14SClaudio Fontana } 18311b248f14SClaudio Fontana 18321b248f14SClaudio Fontana void helper_fprem1(CPUX86State *env) 18331b248f14SClaudio Fontana { 18341b248f14SClaudio Fontana helper_fprem_common(env, false); 18351b248f14SClaudio Fontana } 18361b248f14SClaudio Fontana 18371b248f14SClaudio Fontana void helper_fprem(CPUX86State *env) 18381b248f14SClaudio Fontana { 18391b248f14SClaudio Fontana helper_fprem_common(env, true); 18401b248f14SClaudio Fontana } 18411b248f14SClaudio Fontana 18421b248f14SClaudio Fontana /* 128-bit significand of log2(e). */ 18431b248f14SClaudio Fontana #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 18441b248f14SClaudio Fontana #define log2_e_sig_low 0xbe87fed0691d3e89ULL 18451b248f14SClaudio Fontana 18461b248f14SClaudio Fontana /* 18471b248f14SClaudio Fontana * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 18481b248f14SClaudio Fontana * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 18491b248f14SClaudio Fontana * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 18501b248f14SClaudio Fontana * interval [sqrt(2)/2, sqrt(2)]. 18511b248f14SClaudio Fontana */ 18521b248f14SClaudio Fontana #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 18531b248f14SClaudio Fontana #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 18541b248f14SClaudio Fontana #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 18551b248f14SClaudio Fontana #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 18561b248f14SClaudio Fontana #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 18571b248f14SClaudio Fontana #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 18581b248f14SClaudio Fontana #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 18591b248f14SClaudio Fontana #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 18601b248f14SClaudio Fontana #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 18611b248f14SClaudio Fontana #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 18621b248f14SClaudio Fontana #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 18631b248f14SClaudio Fontana 18641b248f14SClaudio Fontana /* 18651b248f14SClaudio Fontana * Compute an approximation of log2(1+arg), where 1+arg is in the 18661b248f14SClaudio Fontana * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 18671b248f14SClaudio Fontana * function is called, rounding precision is set to 80 and the 18681b248f14SClaudio Fontana * round-to-nearest mode is in effect. arg must not be exactly zero, 18691b248f14SClaudio Fontana * and must not be so close to zero that underflow might occur. 18701b248f14SClaudio Fontana */ 18711b248f14SClaudio Fontana static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 18721b248f14SClaudio Fontana uint64_t *sig0, uint64_t *sig1) 18731b248f14SClaudio Fontana { 18741b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(arg); 18751b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(arg); 18761b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(arg); 18771b248f14SClaudio Fontana bool asign; 18781b248f14SClaudio Fontana int32_t dexp, texp, aexp; 18791b248f14SClaudio Fontana uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 18801b248f14SClaudio Fontana uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 18811b248f14SClaudio Fontana uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 18821b248f14SClaudio Fontana floatx80 t2, accum; 18831b248f14SClaudio Fontana 18841b248f14SClaudio Fontana /* 18851b248f14SClaudio Fontana * Compute an approximation of arg/(2+arg), with extra precision, 18861b248f14SClaudio Fontana * as the argument to a polynomial approximation. The extra 18871b248f14SClaudio Fontana * precision is only needed for the first term of the 18881b248f14SClaudio Fontana * approximation, with subsequent terms being significantly 18891b248f14SClaudio Fontana * smaller; the approximation only uses odd exponents, and the 18901b248f14SClaudio Fontana * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 18911b248f14SClaudio Fontana */ 18921b248f14SClaudio Fontana if (arg0_sign) { 18931b248f14SClaudio Fontana dexp = 0x3fff; 18941b248f14SClaudio Fontana shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 18951b248f14SClaudio Fontana sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 18961b248f14SClaudio Fontana } else { 18971b248f14SClaudio Fontana dexp = 0x4000; 18981b248f14SClaudio Fontana shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 18991b248f14SClaudio Fontana dsig0 |= 0x8000000000000000ULL; 19001b248f14SClaudio Fontana } 19011b248f14SClaudio Fontana texp = arg0_exp - dexp + 0x3ffe; 19021b248f14SClaudio Fontana rsig0 = arg0_sig; 19031b248f14SClaudio Fontana rsig1 = 0; 19041b248f14SClaudio Fontana rsig2 = 0; 19051b248f14SClaudio Fontana if (dsig0 <= rsig0) { 19061b248f14SClaudio Fontana shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 19071b248f14SClaudio Fontana ++texp; 19081b248f14SClaudio Fontana } 19091b248f14SClaudio Fontana tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 19101b248f14SClaudio Fontana mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 19111b248f14SClaudio Fontana sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 19121b248f14SClaudio Fontana &rsig0, &rsig1, &rsig2); 19131b248f14SClaudio Fontana while ((int64_t) rsig0 < 0) { 19141b248f14SClaudio Fontana --tsig0; 19151b248f14SClaudio Fontana add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 19161b248f14SClaudio Fontana &rsig0, &rsig1, &rsig2); 19171b248f14SClaudio Fontana } 19181b248f14SClaudio Fontana tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 19191b248f14SClaudio Fontana /* 19201b248f14SClaudio Fontana * No need to correct any estimation error in tsig1; even with 19211b248f14SClaudio Fontana * such error, it is accurate enough. Now compute the square of 19221b248f14SClaudio Fontana * that approximation. 19231b248f14SClaudio Fontana */ 19241b248f14SClaudio Fontana mul128To256(tsig0, tsig1, tsig0, tsig1, 19251b248f14SClaudio Fontana &t2sig0, &t2sig1, &t2sig2, &t2sig3); 19268da5f1dbSRichard Henderson t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 19278da5f1dbSRichard Henderson texp + texp - 0x3ffe, 19281b248f14SClaudio Fontana t2sig0, t2sig1, &env->fp_status); 19291b248f14SClaudio Fontana 19301b248f14SClaudio Fontana /* Compute the lower parts of the polynomial expansion. */ 19311b248f14SClaudio Fontana accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 19321b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 19331b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 19341b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 19351b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 19361b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 19371b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 19381b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 19391b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 19401b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 19411b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 19421b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 19431b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 19441b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 19451b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 19461b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 19471b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 19481b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 19491b248f14SClaudio Fontana 19501b248f14SClaudio Fontana /* 19511b248f14SClaudio Fontana * The full polynomial expansion is fyl2x_coeff_0 + accum (where 19521b248f14SClaudio Fontana * accum has much lower magnitude, and so, in particular, carry 19531b248f14SClaudio Fontana * out of the addition is not possible), multiplied by t. (This 19541b248f14SClaudio Fontana * expansion is only accurate to about 70 bits, not 128 bits.) 19551b248f14SClaudio Fontana */ 19561b248f14SClaudio Fontana aexp = extractFloatx80Exp(fyl2x_coeff_0); 19571b248f14SClaudio Fontana asign = extractFloatx80Sign(fyl2x_coeff_0); 19581b248f14SClaudio Fontana shift128RightJamming(extractFloatx80Frac(accum), 0, 19591b248f14SClaudio Fontana aexp - extractFloatx80Exp(accum), 19601b248f14SClaudio Fontana &asig0, &asig1); 19611b248f14SClaudio Fontana bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 19621b248f14SClaudio Fontana bsig1 = 0; 19631b248f14SClaudio Fontana if (asign == extractFloatx80Sign(accum)) { 19641b248f14SClaudio Fontana add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 19651b248f14SClaudio Fontana } else { 19661b248f14SClaudio Fontana sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 19671b248f14SClaudio Fontana } 19681b248f14SClaudio Fontana /* Multiply by t to compute the required result. */ 19691b248f14SClaudio Fontana mul128To256(asig0, asig1, tsig0, tsig1, 19701b248f14SClaudio Fontana &asig0, &asig1, &asig2, &asig3); 19711b248f14SClaudio Fontana aexp += texp - 0x3ffe; 19721b248f14SClaudio Fontana *exp = aexp; 19731b248f14SClaudio Fontana *sig0 = asig0; 19741b248f14SClaudio Fontana *sig1 = asig1; 19751b248f14SClaudio Fontana } 19761b248f14SClaudio Fontana 19771b248f14SClaudio Fontana void helper_fyl2xp1(CPUX86State *env) 19781b248f14SClaudio Fontana { 19791b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 19801b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(ST0); 19811b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(ST0); 19821b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(ST0); 19831b248f14SClaudio Fontana uint64_t arg1_sig = extractFloatx80Frac(ST1); 19841b248f14SClaudio Fontana int32_t arg1_exp = extractFloatx80Exp(ST1); 19851b248f14SClaudio Fontana bool arg1_sign = extractFloatx80Sign(ST1); 19861b248f14SClaudio Fontana 19871b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 19881b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 19891b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST0, &env->fp_status); 19901b248f14SClaudio Fontana } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 19911b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 19921b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST1, &env->fp_status); 19931b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0) || 19941b248f14SClaudio Fontana floatx80_invalid_encoding(ST1)) { 19951b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 19961b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 19971b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) { 19981b248f14SClaudio Fontana ST1 = ST0; 19991b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) { 20001b248f14SClaudio Fontana /* Pass this NaN through. */ 20011b248f14SClaudio Fontana } else if (arg0_exp > 0x3ffd || 20021b248f14SClaudio Fontana (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 20031b248f14SClaudio Fontana 0x95f619980c4336f7ULL : 20041b248f14SClaudio Fontana 0xd413cccfe7799211ULL))) { 20051b248f14SClaudio Fontana /* 20061b248f14SClaudio Fontana * Out of range for the instruction (ST0 must have absolute 20071b248f14SClaudio Fontana * value less than 1 - sqrt(2)/2 = 0.292..., according to 20081b248f14SClaudio Fontana * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 20091b248f14SClaudio Fontana * to sqrt(2) - 1, which we allow here), treat as invalid. 20101b248f14SClaudio Fontana */ 20111b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 20121b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 20131b248f14SClaudio Fontana } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 20141b248f14SClaudio Fontana arg1_exp == 0x7fff) { 20151b248f14SClaudio Fontana /* 20161b248f14SClaudio Fontana * One argument is zero, or multiplying by infinity; correct 20171b248f14SClaudio Fontana * result is exact and can be obtained by multiplying the 20181b248f14SClaudio Fontana * arguments. 20191b248f14SClaudio Fontana */ 20201b248f14SClaudio Fontana ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 20211b248f14SClaudio Fontana } else if (arg0_exp < 0x3fb0) { 20221b248f14SClaudio Fontana /* 20231b248f14SClaudio Fontana * Multiplying both arguments and an extra-precision version 20241b248f14SClaudio Fontana * of log2(e) is sufficiently precise. 20251b248f14SClaudio Fontana */ 20261b248f14SClaudio Fontana uint64_t sig0, sig1, sig2; 20271b248f14SClaudio Fontana int32_t exp; 20281b248f14SClaudio Fontana if (arg0_exp == 0) { 20291b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 20301b248f14SClaudio Fontana } 20311b248f14SClaudio Fontana if (arg1_exp == 0) { 20321b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 20331b248f14SClaudio Fontana } 20341b248f14SClaudio Fontana mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 20351b248f14SClaudio Fontana &sig0, &sig1, &sig2); 20361b248f14SClaudio Fontana exp = arg0_exp + 1; 20371b248f14SClaudio Fontana mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 20381b248f14SClaudio Fontana exp += arg1_exp - 0x3ffe; 20391b248f14SClaudio Fontana /* This result is inexact. */ 20401b248f14SClaudio Fontana sig1 |= 1; 20418da5f1dbSRichard Henderson ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 20428da5f1dbSRichard Henderson arg0_sign ^ arg1_sign, exp, 20431b248f14SClaudio Fontana sig0, sig1, &env->fp_status); 20441b248f14SClaudio Fontana } else { 20451b248f14SClaudio Fontana int32_t aexp; 20461b248f14SClaudio Fontana uint64_t asig0, asig1, asig2; 20471b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 20488da5f1dbSRichard Henderson FloatX80RoundPrec save_prec = 20498da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision; 20501b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even; 20518da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 20521b248f14SClaudio Fontana 20531b248f14SClaudio Fontana helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 20541b248f14SClaudio Fontana /* 20551b248f14SClaudio Fontana * Multiply by the second argument to compute the required 20561b248f14SClaudio Fontana * result. 20571b248f14SClaudio Fontana */ 20581b248f14SClaudio Fontana if (arg1_exp == 0) { 20591b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 20601b248f14SClaudio Fontana } 20611b248f14SClaudio Fontana mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 20621b248f14SClaudio Fontana aexp += arg1_exp - 0x3ffe; 20631b248f14SClaudio Fontana /* This result is inexact. */ 20641b248f14SClaudio Fontana asig1 |= 1; 20651b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 20668da5f1dbSRichard Henderson ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 20678da5f1dbSRichard Henderson arg0_sign ^ arg1_sign, aexp, 20681b248f14SClaudio Fontana asig0, asig1, &env->fp_status); 20691b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec; 20701b248f14SClaudio Fontana } 20711b248f14SClaudio Fontana fpop(env); 20721b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 20731b248f14SClaudio Fontana } 20741b248f14SClaudio Fontana 20751b248f14SClaudio Fontana void helper_fyl2x(CPUX86State *env) 20761b248f14SClaudio Fontana { 20771b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 20781b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(ST0); 20791b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(ST0); 20801b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(ST0); 20811b248f14SClaudio Fontana uint64_t arg1_sig = extractFloatx80Frac(ST1); 20821b248f14SClaudio Fontana int32_t arg1_exp = extractFloatx80Exp(ST1); 20831b248f14SClaudio Fontana bool arg1_sign = extractFloatx80Sign(ST1); 20841b248f14SClaudio Fontana 20851b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 20861b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 20871b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST0, &env->fp_status); 20881b248f14SClaudio Fontana } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 20891b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 20901b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST1, &env->fp_status); 20911b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0) || 20921b248f14SClaudio Fontana floatx80_invalid_encoding(ST1)) { 20931b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 20941b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 20951b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) { 20961b248f14SClaudio Fontana ST1 = ST0; 20971b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) { 20981b248f14SClaudio Fontana /* Pass this NaN through. */ 20991b248f14SClaudio Fontana } else if (arg0_sign && !floatx80_is_zero(ST0)) { 21001b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 21011b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 21021b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST1)) { 21031b248f14SClaudio Fontana FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 21041b248f14SClaudio Fontana &env->fp_status); 21051b248f14SClaudio Fontana switch (cmp) { 21061b248f14SClaudio Fontana case float_relation_less: 21071b248f14SClaudio Fontana ST1 = floatx80_chs(ST1); 21081b248f14SClaudio Fontana break; 21091b248f14SClaudio Fontana case float_relation_greater: 21101b248f14SClaudio Fontana /* Result is infinity of the same sign as ST1. */ 21111b248f14SClaudio Fontana break; 21121b248f14SClaudio Fontana default: 21131b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 21141b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 21151b248f14SClaudio Fontana break; 21161b248f14SClaudio Fontana } 21171b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST0)) { 21181b248f14SClaudio Fontana if (floatx80_is_zero(ST1)) { 21191b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 21201b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 21211b248f14SClaudio Fontana } else if (arg1_sign) { 21221b248f14SClaudio Fontana ST1 = floatx80_chs(ST0); 21231b248f14SClaudio Fontana } else { 21241b248f14SClaudio Fontana ST1 = ST0; 21251b248f14SClaudio Fontana } 21261b248f14SClaudio Fontana } else if (floatx80_is_zero(ST0)) { 21271b248f14SClaudio Fontana if (floatx80_is_zero(ST1)) { 21281b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 21291b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 21301b248f14SClaudio Fontana } else { 21311b248f14SClaudio Fontana /* Result is infinity with opposite sign to ST1. */ 21321b248f14SClaudio Fontana float_raise(float_flag_divbyzero, &env->fp_status); 21331b248f14SClaudio Fontana ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 21341b248f14SClaudio Fontana 0x8000000000000000ULL); 21351b248f14SClaudio Fontana } 21361b248f14SClaudio Fontana } else if (floatx80_is_zero(ST1)) { 21371b248f14SClaudio Fontana if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 21381b248f14SClaudio Fontana ST1 = floatx80_chs(ST1); 21391b248f14SClaudio Fontana } 21401b248f14SClaudio Fontana /* Otherwise, ST1 is already the correct result. */ 21411b248f14SClaudio Fontana } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 21421b248f14SClaudio Fontana if (arg1_sign) { 21431b248f14SClaudio Fontana ST1 = floatx80_chs(floatx80_zero); 21441b248f14SClaudio Fontana } else { 21451b248f14SClaudio Fontana ST1 = floatx80_zero; 21461b248f14SClaudio Fontana } 21471b248f14SClaudio Fontana } else { 21481b248f14SClaudio Fontana int32_t int_exp; 21491b248f14SClaudio Fontana floatx80 arg0_m1; 21501b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 21518da5f1dbSRichard Henderson FloatX80RoundPrec save_prec = 21528da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision; 21531b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even; 21548da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 21551b248f14SClaudio Fontana 21561b248f14SClaudio Fontana if (arg0_exp == 0) { 21571b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 21581b248f14SClaudio Fontana } 21591b248f14SClaudio Fontana if (arg1_exp == 0) { 21601b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 21611b248f14SClaudio Fontana } 21621b248f14SClaudio Fontana int_exp = arg0_exp - 0x3fff; 21631b248f14SClaudio Fontana if (arg0_sig > 0xb504f333f9de6484ULL) { 21641b248f14SClaudio Fontana ++int_exp; 21651b248f14SClaudio Fontana } 21661b248f14SClaudio Fontana arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 21671b248f14SClaudio Fontana &env->fp_status), 21681b248f14SClaudio Fontana floatx80_one, &env->fp_status); 21691b248f14SClaudio Fontana if (floatx80_is_zero(arg0_m1)) { 21701b248f14SClaudio Fontana /* Exact power of 2; multiply by ST1. */ 21711b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 21721b248f14SClaudio Fontana ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 21731b248f14SClaudio Fontana ST1, &env->fp_status); 21741b248f14SClaudio Fontana } else { 21751b248f14SClaudio Fontana bool asign = extractFloatx80Sign(arg0_m1); 21761b248f14SClaudio Fontana int32_t aexp; 21771b248f14SClaudio Fontana uint64_t asig0, asig1, asig2; 21781b248f14SClaudio Fontana helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 21791b248f14SClaudio Fontana if (int_exp != 0) { 21801b248f14SClaudio Fontana bool isign = (int_exp < 0); 21811b248f14SClaudio Fontana int32_t iexp; 21821b248f14SClaudio Fontana uint64_t isig; 21831b248f14SClaudio Fontana int shift; 21841b248f14SClaudio Fontana int_exp = isign ? -int_exp : int_exp; 21851b248f14SClaudio Fontana shift = clz32(int_exp) + 32; 21861b248f14SClaudio Fontana isig = int_exp; 21871b248f14SClaudio Fontana isig <<= shift; 21881b248f14SClaudio Fontana iexp = 0x403e - shift; 21891b248f14SClaudio Fontana shift128RightJamming(asig0, asig1, iexp - aexp, 21901b248f14SClaudio Fontana &asig0, &asig1); 21911b248f14SClaudio Fontana if (asign == isign) { 21921b248f14SClaudio Fontana add128(isig, 0, asig0, asig1, &asig0, &asig1); 21931b248f14SClaudio Fontana } else { 21941b248f14SClaudio Fontana sub128(isig, 0, asig0, asig1, &asig0, &asig1); 21951b248f14SClaudio Fontana } 21961b248f14SClaudio Fontana aexp = iexp; 21971b248f14SClaudio Fontana asign = isign; 21981b248f14SClaudio Fontana } 21991b248f14SClaudio Fontana /* 22001b248f14SClaudio Fontana * Multiply by the second argument to compute the required 22011b248f14SClaudio Fontana * result. 22021b248f14SClaudio Fontana */ 22031b248f14SClaudio Fontana if (arg1_exp == 0) { 22041b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 22051b248f14SClaudio Fontana } 22061b248f14SClaudio Fontana mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 22071b248f14SClaudio Fontana aexp += arg1_exp - 0x3ffe; 22081b248f14SClaudio Fontana /* This result is inexact. */ 22091b248f14SClaudio Fontana asig1 |= 1; 22101b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 22118da5f1dbSRichard Henderson ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 22128da5f1dbSRichard Henderson asign ^ arg1_sign, aexp, 22131b248f14SClaudio Fontana asig0, asig1, &env->fp_status); 22141b248f14SClaudio Fontana } 22151b248f14SClaudio Fontana 22161b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec; 22171b248f14SClaudio Fontana } 22181b248f14SClaudio Fontana fpop(env); 22191b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 22201b248f14SClaudio Fontana } 22211b248f14SClaudio Fontana 22221b248f14SClaudio Fontana void helper_fsqrt(CPUX86State *env) 22231b248f14SClaudio Fontana { 22241b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 22251b248f14SClaudio Fontana if (floatx80_is_neg(ST0)) { 22261b248f14SClaudio Fontana env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 22271b248f14SClaudio Fontana env->fpus |= 0x400; 22281b248f14SClaudio Fontana } 22291b248f14SClaudio Fontana ST0 = floatx80_sqrt(ST0, &env->fp_status); 22301b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 22311b248f14SClaudio Fontana } 22321b248f14SClaudio Fontana 22331b248f14SClaudio Fontana void helper_fsincos(CPUX86State *env) 22341b248f14SClaudio Fontana { 22351b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0); 22361b248f14SClaudio Fontana 22371b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 22381b248f14SClaudio Fontana env->fpus |= 0x400; 22391b248f14SClaudio Fontana } else { 22401b248f14SClaudio Fontana ST0 = double_to_floatx80(env, sin(fptemp)); 22411b248f14SClaudio Fontana fpush(env); 22421b248f14SClaudio Fontana ST0 = double_to_floatx80(env, cos(fptemp)); 22431b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */ 22441b248f14SClaudio Fontana /* the above code is for |arg| < 2**63 only */ 22451b248f14SClaudio Fontana } 22461b248f14SClaudio Fontana } 22471b248f14SClaudio Fontana 22481b248f14SClaudio Fontana void helper_frndint(CPUX86State *env) 22491b248f14SClaudio Fontana { 22501b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 22511b248f14SClaudio Fontana ST0 = floatx80_round_to_int(ST0, &env->fp_status); 22521b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 22531b248f14SClaudio Fontana } 22541b248f14SClaudio Fontana 22551b248f14SClaudio Fontana void helper_fscale(CPUX86State *env) 22561b248f14SClaudio Fontana { 22571b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 22581b248f14SClaudio Fontana if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 22591b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 22601b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 22611b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) { 22621b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 22631b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 22641b248f14SClaudio Fontana } 22651b248f14SClaudio Fontana ST0 = ST1; 22661b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 22671b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 22681b248f14SClaudio Fontana ST0 = floatx80_silence_nan(ST0, &env->fp_status); 22691b248f14SClaudio Fontana } 22701b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST1) && 22711b248f14SClaudio Fontana !floatx80_invalid_encoding(ST0) && 22721b248f14SClaudio Fontana !floatx80_is_any_nan(ST0)) { 22731b248f14SClaudio Fontana if (floatx80_is_neg(ST1)) { 22741b248f14SClaudio Fontana if (floatx80_is_infinity(ST0)) { 22751b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 22761b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 22771b248f14SClaudio Fontana } else { 22781b248f14SClaudio Fontana ST0 = (floatx80_is_neg(ST0) ? 22791b248f14SClaudio Fontana floatx80_chs(floatx80_zero) : 22801b248f14SClaudio Fontana floatx80_zero); 22811b248f14SClaudio Fontana } 22821b248f14SClaudio Fontana } else { 22831b248f14SClaudio Fontana if (floatx80_is_zero(ST0)) { 22841b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 22851b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 22861b248f14SClaudio Fontana } else { 22871b248f14SClaudio Fontana ST0 = (floatx80_is_neg(ST0) ? 22881b248f14SClaudio Fontana floatx80_chs(floatx80_infinity) : 22891b248f14SClaudio Fontana floatx80_infinity); 22901b248f14SClaudio Fontana } 22911b248f14SClaudio Fontana } 22921b248f14SClaudio Fontana } else { 22931b248f14SClaudio Fontana int n; 22948da5f1dbSRichard Henderson FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 22951b248f14SClaudio Fontana uint8_t save_flags = get_float_exception_flags(&env->fp_status); 22961b248f14SClaudio Fontana set_float_exception_flags(0, &env->fp_status); 22971b248f14SClaudio Fontana n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 22981b248f14SClaudio Fontana set_float_exception_flags(save_flags, &env->fp_status); 22998da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 23001b248f14SClaudio Fontana ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 23011b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save; 23021b248f14SClaudio Fontana } 23031b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 23041b248f14SClaudio Fontana } 23051b248f14SClaudio Fontana 23061b248f14SClaudio Fontana void helper_fsin(CPUX86State *env) 23071b248f14SClaudio Fontana { 23081b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0); 23091b248f14SClaudio Fontana 23101b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 23111b248f14SClaudio Fontana env->fpus |= 0x400; 23121b248f14SClaudio Fontana } else { 23131b248f14SClaudio Fontana ST0 = double_to_floatx80(env, sin(fptemp)); 23141b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */ 23151b248f14SClaudio Fontana /* the above code is for |arg| < 2**53 only */ 23161b248f14SClaudio Fontana } 23171b248f14SClaudio Fontana } 23181b248f14SClaudio Fontana 23191b248f14SClaudio Fontana void helper_fcos(CPUX86State *env) 23201b248f14SClaudio Fontana { 23211b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0); 23221b248f14SClaudio Fontana 23231b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 23241b248f14SClaudio Fontana env->fpus |= 0x400; 23251b248f14SClaudio Fontana } else { 23261b248f14SClaudio Fontana ST0 = double_to_floatx80(env, cos(fptemp)); 23271b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */ 23281b248f14SClaudio Fontana /* the above code is for |arg| < 2**63 only */ 23291b248f14SClaudio Fontana } 23301b248f14SClaudio Fontana } 23311b248f14SClaudio Fontana 23321b248f14SClaudio Fontana void helper_fxam_ST0(CPUX86State *env) 23331b248f14SClaudio Fontana { 23341b248f14SClaudio Fontana CPU_LDoubleU temp; 23351b248f14SClaudio Fontana int expdif; 23361b248f14SClaudio Fontana 23371b248f14SClaudio Fontana temp.d = ST0; 23381b248f14SClaudio Fontana 23391b248f14SClaudio Fontana env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 23401b248f14SClaudio Fontana if (SIGND(temp)) { 23411b248f14SClaudio Fontana env->fpus |= 0x200; /* C1 <-- 1 */ 23421b248f14SClaudio Fontana } 23431b248f14SClaudio Fontana 23441b248f14SClaudio Fontana if (env->fptags[env->fpstt]) { 23451b248f14SClaudio Fontana env->fpus |= 0x4100; /* Empty */ 23461b248f14SClaudio Fontana return; 23471b248f14SClaudio Fontana } 23481b248f14SClaudio Fontana 23491b248f14SClaudio Fontana expdif = EXPD(temp); 23501b248f14SClaudio Fontana if (expdif == MAXEXPD) { 23511b248f14SClaudio Fontana if (MANTD(temp) == 0x8000000000000000ULL) { 23521b248f14SClaudio Fontana env->fpus |= 0x500; /* Infinity */ 23531b248f14SClaudio Fontana } else if (MANTD(temp) & 0x8000000000000000ULL) { 23541b248f14SClaudio Fontana env->fpus |= 0x100; /* NaN */ 23551b248f14SClaudio Fontana } 23561b248f14SClaudio Fontana } else if (expdif == 0) { 23571b248f14SClaudio Fontana if (MANTD(temp) == 0) { 23581b248f14SClaudio Fontana env->fpus |= 0x4000; /* Zero */ 23591b248f14SClaudio Fontana } else { 23601b248f14SClaudio Fontana env->fpus |= 0x4400; /* Denormal */ 23611b248f14SClaudio Fontana } 23621b248f14SClaudio Fontana } else if (MANTD(temp) & 0x8000000000000000ULL) { 23631b248f14SClaudio Fontana env->fpus |= 0x400; 23641b248f14SClaudio Fontana } 23651b248f14SClaudio Fontana } 23661b248f14SClaudio Fontana 23671b248f14SClaudio Fontana static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 23681b248f14SClaudio Fontana uintptr_t retaddr) 23691b248f14SClaudio Fontana { 23701b248f14SClaudio Fontana int fpus, fptag, exp, i; 23711b248f14SClaudio Fontana uint64_t mant; 23721b248f14SClaudio Fontana CPU_LDoubleU tmp; 23731b248f14SClaudio Fontana 23741b248f14SClaudio Fontana fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 23751b248f14SClaudio Fontana fptag = 0; 23761b248f14SClaudio Fontana for (i = 7; i >= 0; i--) { 23771b248f14SClaudio Fontana fptag <<= 2; 23781b248f14SClaudio Fontana if (env->fptags[i]) { 23791b248f14SClaudio Fontana fptag |= 3; 23801b248f14SClaudio Fontana } else { 23811b248f14SClaudio Fontana tmp.d = env->fpregs[i].d; 23821b248f14SClaudio Fontana exp = EXPD(tmp); 23831b248f14SClaudio Fontana mant = MANTD(tmp); 23841b248f14SClaudio Fontana if (exp == 0 && mant == 0) { 23851b248f14SClaudio Fontana /* zero */ 23861b248f14SClaudio Fontana fptag |= 1; 23871b248f14SClaudio Fontana } else if (exp == 0 || exp == MAXEXPD 23881b248f14SClaudio Fontana || (mant & (1LL << 63)) == 0) { 23891b248f14SClaudio Fontana /* NaNs, infinity, denormal */ 23901b248f14SClaudio Fontana fptag |= 2; 23911b248f14SClaudio Fontana } 23921b248f14SClaudio Fontana } 23931b248f14SClaudio Fontana } 23941b248f14SClaudio Fontana if (data32) { 23951b248f14SClaudio Fontana /* 32 bit */ 23961b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 23971b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 23981b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 239984abdd7dSZiqiao Kong cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */ 240084abdd7dSZiqiao Kong cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */ 240184abdd7dSZiqiao Kong cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */ 240284abdd7dSZiqiao Kong cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */ 24031b248f14SClaudio Fontana } else { 24041b248f14SClaudio Fontana /* 16 bit */ 24051b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 24061b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 24071b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 240884abdd7dSZiqiao Kong cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr); 240984abdd7dSZiqiao Kong cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr); 241084abdd7dSZiqiao Kong cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr); 241184abdd7dSZiqiao Kong cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr); 24121b248f14SClaudio Fontana } 24131b248f14SClaudio Fontana } 24141b248f14SClaudio Fontana 24151b248f14SClaudio Fontana void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 24161b248f14SClaudio Fontana { 24171b248f14SClaudio Fontana do_fstenv(env, ptr, data32, GETPC()); 24181b248f14SClaudio Fontana } 24191b248f14SClaudio Fontana 24201b248f14SClaudio Fontana static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 24211b248f14SClaudio Fontana { 24221b248f14SClaudio Fontana env->fpstt = (fpus >> 11) & 7; 24231b248f14SClaudio Fontana env->fpus = fpus & ~0x3800 & ~FPUS_B; 24241b248f14SClaudio Fontana env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 24251b248f14SClaudio Fontana #if !defined(CONFIG_USER_ONLY) 24261b248f14SClaudio Fontana if (!(env->fpus & FPUS_SE)) { 24271b248f14SClaudio Fontana /* 24281b248f14SClaudio Fontana * Here the processor deasserts FERR#; in response, the chipset deasserts 24291b248f14SClaudio Fontana * IGNNE#. 24301b248f14SClaudio Fontana */ 24311b248f14SClaudio Fontana cpu_clear_ignne(); 24321b248f14SClaudio Fontana } 24331b248f14SClaudio Fontana #endif 24341b248f14SClaudio Fontana } 24351b248f14SClaudio Fontana 24361b248f14SClaudio Fontana static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 24371b248f14SClaudio Fontana uintptr_t retaddr) 24381b248f14SClaudio Fontana { 24391b248f14SClaudio Fontana int i, fpus, fptag; 24401b248f14SClaudio Fontana 24411b248f14SClaudio Fontana if (data32) { 24421b248f14SClaudio Fontana cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 24431b248f14SClaudio Fontana fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 24441b248f14SClaudio Fontana fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 24451b248f14SClaudio Fontana } else { 24461b248f14SClaudio Fontana cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 24471b248f14SClaudio Fontana fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 24481b248f14SClaudio Fontana fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 24491b248f14SClaudio Fontana } 24501b248f14SClaudio Fontana cpu_set_fpus(env, fpus); 24511b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 24521b248f14SClaudio Fontana env->fptags[i] = ((fptag & 3) == 3); 24531b248f14SClaudio Fontana fptag >>= 2; 24541b248f14SClaudio Fontana } 24551b248f14SClaudio Fontana } 24561b248f14SClaudio Fontana 24571b248f14SClaudio Fontana void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 24581b248f14SClaudio Fontana { 24591b248f14SClaudio Fontana do_fldenv(env, ptr, data32, GETPC()); 24601b248f14SClaudio Fontana } 24611b248f14SClaudio Fontana 24620ac2b197SRichard Henderson static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, 24630ac2b197SRichard Henderson uintptr_t retaddr) 24641b248f14SClaudio Fontana { 24651b248f14SClaudio Fontana floatx80 tmp; 24661b248f14SClaudio Fontana int i; 24671b248f14SClaudio Fontana 24680ac2b197SRichard Henderson do_fstenv(env, ptr, data32, retaddr); 24691b248f14SClaudio Fontana 24700cbc1359SRichard Henderson ptr += (target_ulong)14 << data32; 24711b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 24721b248f14SClaudio Fontana tmp = ST(i); 24730ac2b197SRichard Henderson do_fstt(env, tmp, ptr, retaddr); 24741b248f14SClaudio Fontana ptr += 10; 24751b248f14SClaudio Fontana } 24761b248f14SClaudio Fontana 2477bbdda9b7SRichard Henderson do_fninit(env); 24781b248f14SClaudio Fontana } 24791b248f14SClaudio Fontana 24800ac2b197SRichard Henderson void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 24810ac2b197SRichard Henderson { 24820ac2b197SRichard Henderson do_fsave(env, ptr, data32, GETPC()); 24830ac2b197SRichard Henderson } 24840ac2b197SRichard Henderson 24850ac2b197SRichard Henderson static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, 24860ac2b197SRichard Henderson uintptr_t retaddr) 24871b248f14SClaudio Fontana { 24881b248f14SClaudio Fontana floatx80 tmp; 24891b248f14SClaudio Fontana int i; 24901b248f14SClaudio Fontana 24910ac2b197SRichard Henderson do_fldenv(env, ptr, data32, retaddr); 24920cbc1359SRichard Henderson ptr += (target_ulong)14 << data32; 24931b248f14SClaudio Fontana 24941b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 24950ac2b197SRichard Henderson tmp = do_fldt(env, ptr, retaddr); 24961b248f14SClaudio Fontana ST(i) = tmp; 24971b248f14SClaudio Fontana ptr += 10; 24981b248f14SClaudio Fontana } 24991b248f14SClaudio Fontana } 25001b248f14SClaudio Fontana 25010ac2b197SRichard Henderson void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 25020ac2b197SRichard Henderson { 25030ac2b197SRichard Henderson do_frstor(env, ptr, data32, GETPC()); 25040ac2b197SRichard Henderson } 25050ac2b197SRichard Henderson 25061b248f14SClaudio Fontana #define XO(X) offsetof(X86XSaveArea, X) 25071b248f14SClaudio Fontana 25081b248f14SClaudio Fontana static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 25091b248f14SClaudio Fontana { 25101b248f14SClaudio Fontana int fpus, fptag, i; 25111b248f14SClaudio Fontana target_ulong addr; 25121b248f14SClaudio Fontana 25131b248f14SClaudio Fontana fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 25141b248f14SClaudio Fontana fptag = 0; 25151b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 25161b248f14SClaudio Fontana fptag |= (env->fptags[i] << i); 25171b248f14SClaudio Fontana } 25181b248f14SClaudio Fontana 25191b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 25201b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 25211b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 25221b248f14SClaudio Fontana 25231b248f14SClaudio Fontana /* In 32-bit mode this is eip, sel, dp, sel. 25241b248f14SClaudio Fontana In 64-bit mode this is rip, rdp. 25251b248f14SClaudio Fontana But in either case we don't write actual data, just zeros. */ 25261b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 25271b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 25281b248f14SClaudio Fontana 25291b248f14SClaudio Fontana addr = ptr + XO(legacy.fpregs); 25301b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 25311b248f14SClaudio Fontana floatx80 tmp = ST(i); 2532e3a69234SRichard Henderson do_fstt(env, tmp, addr, ra); 25331b248f14SClaudio Fontana addr += 16; 25341b248f14SClaudio Fontana } 25351b248f14SClaudio Fontana } 25361b248f14SClaudio Fontana 25371b248f14SClaudio Fontana static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 25381b248f14SClaudio Fontana { 25391b248f14SClaudio Fontana update_mxcsr_from_sse_status(env); 25401b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 25411b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 25421b248f14SClaudio Fontana } 25431b248f14SClaudio Fontana 25441b248f14SClaudio Fontana static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 25451b248f14SClaudio Fontana { 25461b248f14SClaudio Fontana int i, nb_xmm_regs; 25471b248f14SClaudio Fontana target_ulong addr; 25481b248f14SClaudio Fontana 25491b248f14SClaudio Fontana if (env->hflags & HF_CS64_MASK) { 25501b248f14SClaudio Fontana nb_xmm_regs = 16; 25511b248f14SClaudio Fontana } else { 25521b248f14SClaudio Fontana nb_xmm_regs = 8; 25531b248f14SClaudio Fontana } 25541b248f14SClaudio Fontana 25551b248f14SClaudio Fontana addr = ptr + XO(legacy.xmm_regs); 25561b248f14SClaudio Fontana for (i = 0; i < nb_xmm_regs; i++) { 25571b248f14SClaudio Fontana cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 25581b248f14SClaudio Fontana cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 25591b248f14SClaudio Fontana addr += 16; 25601b248f14SClaudio Fontana } 25611b248f14SClaudio Fontana } 25621b248f14SClaudio Fontana 256389254431SPaolo Bonzini static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 256489254431SPaolo Bonzini { 256589254431SPaolo Bonzini int i, nb_xmm_regs; 256689254431SPaolo Bonzini 256789254431SPaolo Bonzini if (env->hflags & HF_CS64_MASK) { 256889254431SPaolo Bonzini nb_xmm_regs = 16; 256989254431SPaolo Bonzini } else { 257089254431SPaolo Bonzini nb_xmm_regs = 8; 257189254431SPaolo Bonzini } 257289254431SPaolo Bonzini 257389254431SPaolo Bonzini for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 257489254431SPaolo Bonzini cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra); 257589254431SPaolo Bonzini cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra); 257689254431SPaolo Bonzini } 257789254431SPaolo Bonzini } 257889254431SPaolo Bonzini 25791b248f14SClaudio Fontana static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 25801b248f14SClaudio Fontana { 25811b248f14SClaudio Fontana target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 25821b248f14SClaudio Fontana int i; 25831b248f14SClaudio Fontana 25841b248f14SClaudio Fontana for (i = 0; i < 4; i++, addr += 16) { 25851b248f14SClaudio Fontana cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 25861b248f14SClaudio Fontana cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 25871b248f14SClaudio Fontana } 25881b248f14SClaudio Fontana } 25891b248f14SClaudio Fontana 25901b248f14SClaudio Fontana static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 25911b248f14SClaudio Fontana { 25921b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 25931b248f14SClaudio Fontana env->bndcs_regs.cfgu, ra); 25941b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 25951b248f14SClaudio Fontana env->bndcs_regs.sts, ra); 25961b248f14SClaudio Fontana } 25971b248f14SClaudio Fontana 25981b248f14SClaudio Fontana static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 25991b248f14SClaudio Fontana { 26001b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr, env->pkru, ra); 26011b248f14SClaudio Fontana } 26021b248f14SClaudio Fontana 26030ac2b197SRichard Henderson static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) 26041b248f14SClaudio Fontana { 26051b248f14SClaudio Fontana /* The operand must be 16 byte aligned */ 26061b248f14SClaudio Fontana if (ptr & 0xf) { 26071b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 26081b248f14SClaudio Fontana } 26091b248f14SClaudio Fontana 26101b248f14SClaudio Fontana do_xsave_fpu(env, ptr, ra); 26111b248f14SClaudio Fontana 26121b248f14SClaudio Fontana if (env->cr[4] & CR4_OSFXSR_MASK) { 26131b248f14SClaudio Fontana do_xsave_mxcsr(env, ptr, ra); 26141b248f14SClaudio Fontana /* Fast FXSAVE leaves out the XMM registers */ 26151b248f14SClaudio Fontana if (!(env->efer & MSR_EFER_FFXSR) 26161b248f14SClaudio Fontana || (env->hflags & HF_CPL_MASK) 26171b248f14SClaudio Fontana || !(env->hflags & HF_LMA_MASK)) { 26181b248f14SClaudio Fontana do_xsave_sse(env, ptr, ra); 26191b248f14SClaudio Fontana } 26201b248f14SClaudio Fontana } 26211b248f14SClaudio Fontana } 26221b248f14SClaudio Fontana 26230ac2b197SRichard Henderson void helper_fxsave(CPUX86State *env, target_ulong ptr) 26240ac2b197SRichard Henderson { 26250ac2b197SRichard Henderson do_fxsave(env, ptr, GETPC()); 26260ac2b197SRichard Henderson } 26270ac2b197SRichard Henderson 26281b248f14SClaudio Fontana static uint64_t get_xinuse(CPUX86State *env) 26291b248f14SClaudio Fontana { 26301b248f14SClaudio Fontana uint64_t inuse = -1; 26311b248f14SClaudio Fontana 26321b248f14SClaudio Fontana /* For the most part, we don't track XINUSE. We could calculate it 26331b248f14SClaudio Fontana here for all components, but it's probably less work to simply 26341b248f14SClaudio Fontana indicate in use. That said, the state of BNDREGS is important 26351b248f14SClaudio Fontana enough to track in HFLAGS, so we might as well use that here. */ 26361b248f14SClaudio Fontana if ((env->hflags & HF_MPX_IU_MASK) == 0) { 26371b248f14SClaudio Fontana inuse &= ~XSTATE_BNDREGS_MASK; 26381b248f14SClaudio Fontana } 26391b248f14SClaudio Fontana return inuse; 26401b248f14SClaudio Fontana } 26411b248f14SClaudio Fontana 26421b248f14SClaudio Fontana static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 26431b248f14SClaudio Fontana uint64_t inuse, uint64_t opt, uintptr_t ra) 26441b248f14SClaudio Fontana { 26451b248f14SClaudio Fontana uint64_t old_bv, new_bv; 26461b248f14SClaudio Fontana 26471b248f14SClaudio Fontana /* The OS must have enabled XSAVE. */ 26481b248f14SClaudio Fontana if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 26491b248f14SClaudio Fontana raise_exception_ra(env, EXCP06_ILLOP, ra); 26501b248f14SClaudio Fontana } 26511b248f14SClaudio Fontana 26521b248f14SClaudio Fontana /* The operand must be 64 byte aligned. */ 26531b248f14SClaudio Fontana if (ptr & 63) { 26541b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 26551b248f14SClaudio Fontana } 26561b248f14SClaudio Fontana 26571b248f14SClaudio Fontana /* Never save anything not enabled by XCR0. */ 26581b248f14SClaudio Fontana rfbm &= env->xcr0; 26591b248f14SClaudio Fontana opt &= rfbm; 26601b248f14SClaudio Fontana 26611b248f14SClaudio Fontana if (opt & XSTATE_FP_MASK) { 26621b248f14SClaudio Fontana do_xsave_fpu(env, ptr, ra); 26631b248f14SClaudio Fontana } 26641b248f14SClaudio Fontana if (rfbm & XSTATE_SSE_MASK) { 26651b248f14SClaudio Fontana /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 26661b248f14SClaudio Fontana do_xsave_mxcsr(env, ptr, ra); 26671b248f14SClaudio Fontana } 26681b248f14SClaudio Fontana if (opt & XSTATE_SSE_MASK) { 26691b248f14SClaudio Fontana do_xsave_sse(env, ptr, ra); 26701b248f14SClaudio Fontana } 267189254431SPaolo Bonzini if (opt & XSTATE_YMM_MASK) { 267289254431SPaolo Bonzini do_xsave_ymmh(env, ptr + XO(avx_state), ra); 267389254431SPaolo Bonzini } 26741b248f14SClaudio Fontana if (opt & XSTATE_BNDREGS_MASK) { 26751b248f14SClaudio Fontana do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 26761b248f14SClaudio Fontana } 26771b248f14SClaudio Fontana if (opt & XSTATE_BNDCSR_MASK) { 26781b248f14SClaudio Fontana do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 26791b248f14SClaudio Fontana } 26801b248f14SClaudio Fontana if (opt & XSTATE_PKRU_MASK) { 26811b248f14SClaudio Fontana do_xsave_pkru(env, ptr + XO(pkru_state), ra); 26821b248f14SClaudio Fontana } 26831b248f14SClaudio Fontana 26841b248f14SClaudio Fontana /* Update the XSTATE_BV field. */ 26851b248f14SClaudio Fontana old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 26861b248f14SClaudio Fontana new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 26871b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 26881b248f14SClaudio Fontana } 26891b248f14SClaudio Fontana 26901b248f14SClaudio Fontana void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 26911b248f14SClaudio Fontana { 26921b248f14SClaudio Fontana do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 26931b248f14SClaudio Fontana } 26941b248f14SClaudio Fontana 26951b248f14SClaudio Fontana void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 26961b248f14SClaudio Fontana { 26971b248f14SClaudio Fontana uint64_t inuse = get_xinuse(env); 26981b248f14SClaudio Fontana do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 26991b248f14SClaudio Fontana } 27001b248f14SClaudio Fontana 27011b248f14SClaudio Fontana static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 27021b248f14SClaudio Fontana { 27031b248f14SClaudio Fontana int i, fpuc, fpus, fptag; 27041b248f14SClaudio Fontana target_ulong addr; 27051b248f14SClaudio Fontana 27061b248f14SClaudio Fontana fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 27071b248f14SClaudio Fontana fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 27081b248f14SClaudio Fontana fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 27091b248f14SClaudio Fontana cpu_set_fpuc(env, fpuc); 27101b248f14SClaudio Fontana cpu_set_fpus(env, fpus); 27111b248f14SClaudio Fontana fptag ^= 0xff; 27121b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 27131b248f14SClaudio Fontana env->fptags[i] = ((fptag >> i) & 1); 27141b248f14SClaudio Fontana } 27151b248f14SClaudio Fontana 27161b248f14SClaudio Fontana addr = ptr + XO(legacy.fpregs); 27171b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 2718e3a69234SRichard Henderson floatx80 tmp = do_fldt(env, addr, ra); 27191b248f14SClaudio Fontana ST(i) = tmp; 27201b248f14SClaudio Fontana addr += 16; 27211b248f14SClaudio Fontana } 27221b248f14SClaudio Fontana } 27231b248f14SClaudio Fontana 27241b248f14SClaudio Fontana static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 27251b248f14SClaudio Fontana { 27261b248f14SClaudio Fontana cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 27271b248f14SClaudio Fontana } 27281b248f14SClaudio Fontana 27291b248f14SClaudio Fontana static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 27301b248f14SClaudio Fontana { 27311b248f14SClaudio Fontana int i, nb_xmm_regs; 27321b248f14SClaudio Fontana target_ulong addr; 27331b248f14SClaudio Fontana 27341b248f14SClaudio Fontana if (env->hflags & HF_CS64_MASK) { 27351b248f14SClaudio Fontana nb_xmm_regs = 16; 27361b248f14SClaudio Fontana } else { 27371b248f14SClaudio Fontana nb_xmm_regs = 8; 27381b248f14SClaudio Fontana } 27391b248f14SClaudio Fontana 27401b248f14SClaudio Fontana addr = ptr + XO(legacy.xmm_regs); 27411b248f14SClaudio Fontana for (i = 0; i < nb_xmm_regs; i++) { 27421b248f14SClaudio Fontana env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 27431b248f14SClaudio Fontana env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 27441b248f14SClaudio Fontana addr += 16; 27451b248f14SClaudio Fontana } 27461b248f14SClaudio Fontana } 27471b248f14SClaudio Fontana 274889254431SPaolo Bonzini static void do_clear_sse(CPUX86State *env) 274989254431SPaolo Bonzini { 275089254431SPaolo Bonzini int i, nb_xmm_regs; 275189254431SPaolo Bonzini 275289254431SPaolo Bonzini if (env->hflags & HF_CS64_MASK) { 275389254431SPaolo Bonzini nb_xmm_regs = 16; 275489254431SPaolo Bonzini } else { 275589254431SPaolo Bonzini nb_xmm_regs = 8; 275689254431SPaolo Bonzini } 275789254431SPaolo Bonzini 275889254431SPaolo Bonzini for (i = 0; i < nb_xmm_regs; i++) { 275989254431SPaolo Bonzini env->xmm_regs[i].ZMM_Q(0) = 0; 276089254431SPaolo Bonzini env->xmm_regs[i].ZMM_Q(1) = 0; 276189254431SPaolo Bonzini } 276289254431SPaolo Bonzini } 276389254431SPaolo Bonzini 276489254431SPaolo Bonzini static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 276589254431SPaolo Bonzini { 276689254431SPaolo Bonzini int i, nb_xmm_regs; 276789254431SPaolo Bonzini 276889254431SPaolo Bonzini if (env->hflags & HF_CS64_MASK) { 276989254431SPaolo Bonzini nb_xmm_regs = 16; 277089254431SPaolo Bonzini } else { 277189254431SPaolo Bonzini nb_xmm_regs = 8; 277289254431SPaolo Bonzini } 277389254431SPaolo Bonzini 277489254431SPaolo Bonzini for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 277589254431SPaolo Bonzini env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra); 277689254431SPaolo Bonzini env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra); 277789254431SPaolo Bonzini } 277889254431SPaolo Bonzini } 277989254431SPaolo Bonzini 278089254431SPaolo Bonzini static void do_clear_ymmh(CPUX86State *env) 278189254431SPaolo Bonzini { 278289254431SPaolo Bonzini int i, nb_xmm_regs; 278389254431SPaolo Bonzini 278489254431SPaolo Bonzini if (env->hflags & HF_CS64_MASK) { 278589254431SPaolo Bonzini nb_xmm_regs = 16; 278689254431SPaolo Bonzini } else { 278789254431SPaolo Bonzini nb_xmm_regs = 8; 278889254431SPaolo Bonzini } 278989254431SPaolo Bonzini 279089254431SPaolo Bonzini for (i = 0; i < nb_xmm_regs; i++) { 279189254431SPaolo Bonzini env->xmm_regs[i].ZMM_Q(2) = 0; 279289254431SPaolo Bonzini env->xmm_regs[i].ZMM_Q(3) = 0; 279389254431SPaolo Bonzini } 279489254431SPaolo Bonzini } 279589254431SPaolo Bonzini 27961b248f14SClaudio Fontana static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 27971b248f14SClaudio Fontana { 27981b248f14SClaudio Fontana target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 27991b248f14SClaudio Fontana int i; 28001b248f14SClaudio Fontana 28011b248f14SClaudio Fontana for (i = 0; i < 4; i++, addr += 16) { 28021b248f14SClaudio Fontana env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 28031b248f14SClaudio Fontana env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 28041b248f14SClaudio Fontana } 28051b248f14SClaudio Fontana } 28061b248f14SClaudio Fontana 28071b248f14SClaudio Fontana static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 28081b248f14SClaudio Fontana { 28091b248f14SClaudio Fontana /* FIXME: Extend highest implemented bit of linear address. */ 28101b248f14SClaudio Fontana env->bndcs_regs.cfgu 28111b248f14SClaudio Fontana = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 28121b248f14SClaudio Fontana env->bndcs_regs.sts 28131b248f14SClaudio Fontana = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 28141b248f14SClaudio Fontana } 28151b248f14SClaudio Fontana 28161b248f14SClaudio Fontana static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 28171b248f14SClaudio Fontana { 28181b248f14SClaudio Fontana env->pkru = cpu_ldq_data_ra(env, ptr, ra); 28191b248f14SClaudio Fontana } 28201b248f14SClaudio Fontana 28210ac2b197SRichard Henderson static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) 28221b248f14SClaudio Fontana { 28231b248f14SClaudio Fontana /* The operand must be 16 byte aligned */ 28241b248f14SClaudio Fontana if (ptr & 0xf) { 28251b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 28261b248f14SClaudio Fontana } 28271b248f14SClaudio Fontana 28281b248f14SClaudio Fontana do_xrstor_fpu(env, ptr, ra); 28291b248f14SClaudio Fontana 28301b248f14SClaudio Fontana if (env->cr[4] & CR4_OSFXSR_MASK) { 28311b248f14SClaudio Fontana do_xrstor_mxcsr(env, ptr, ra); 28321b248f14SClaudio Fontana /* Fast FXRSTOR leaves out the XMM registers */ 28331b248f14SClaudio Fontana if (!(env->efer & MSR_EFER_FFXSR) 28341b248f14SClaudio Fontana || (env->hflags & HF_CPL_MASK) 28351b248f14SClaudio Fontana || !(env->hflags & HF_LMA_MASK)) { 28361b248f14SClaudio Fontana do_xrstor_sse(env, ptr, ra); 28371b248f14SClaudio Fontana } 28381b248f14SClaudio Fontana } 28391b248f14SClaudio Fontana } 28401b248f14SClaudio Fontana 28410ac2b197SRichard Henderson void helper_fxrstor(CPUX86State *env, target_ulong ptr) 28420ac2b197SRichard Henderson { 28430ac2b197SRichard Henderson do_fxrstor(env, ptr, GETPC()); 28440ac2b197SRichard Henderson } 28450ac2b197SRichard Henderson 28465d245678SPaolo Bonzini static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) 28471b248f14SClaudio Fontana { 28481b248f14SClaudio Fontana uint64_t xstate_bv, xcomp_bv, reserve0; 28491b248f14SClaudio Fontana 28501b248f14SClaudio Fontana rfbm &= env->xcr0; 28511b248f14SClaudio Fontana 28521b248f14SClaudio Fontana /* The OS must have enabled XSAVE. */ 28531b248f14SClaudio Fontana if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 28541b248f14SClaudio Fontana raise_exception_ra(env, EXCP06_ILLOP, ra); 28551b248f14SClaudio Fontana } 28561b248f14SClaudio Fontana 28571b248f14SClaudio Fontana /* The operand must be 64 byte aligned. */ 28581b248f14SClaudio Fontana if (ptr & 63) { 28591b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 28601b248f14SClaudio Fontana } 28611b248f14SClaudio Fontana 28621b248f14SClaudio Fontana xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 28631b248f14SClaudio Fontana 28641b248f14SClaudio Fontana if ((int64_t)xstate_bv < 0) { 28651b248f14SClaudio Fontana /* FIXME: Compact form. */ 28661b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 28671b248f14SClaudio Fontana } 28681b248f14SClaudio Fontana 28691b248f14SClaudio Fontana /* Standard form. */ 28701b248f14SClaudio Fontana 28711b248f14SClaudio Fontana /* The XSTATE_BV field must not set bits not present in XCR0. */ 28721b248f14SClaudio Fontana if (xstate_bv & ~env->xcr0) { 28731b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 28741b248f14SClaudio Fontana } 28751b248f14SClaudio Fontana 28761b248f14SClaudio Fontana /* The XCOMP_BV field must be zero. Note that, as of the April 2016 28771b248f14SClaudio Fontana revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 28781b248f14SClaudio Fontana describes only XCOMP_BV, but the description of the standard form 28791b248f14SClaudio Fontana of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 28801b248f14SClaudio Fontana includes the next 64-bit field. */ 28811b248f14SClaudio Fontana xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 28821b248f14SClaudio Fontana reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 28831b248f14SClaudio Fontana if (xcomp_bv || reserve0) { 28841b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 28851b248f14SClaudio Fontana } 28861b248f14SClaudio Fontana 28871b248f14SClaudio Fontana if (rfbm & XSTATE_FP_MASK) { 28881b248f14SClaudio Fontana if (xstate_bv & XSTATE_FP_MASK) { 28891b248f14SClaudio Fontana do_xrstor_fpu(env, ptr, ra); 28901b248f14SClaudio Fontana } else { 2891bbdda9b7SRichard Henderson do_fninit(env); 28921b248f14SClaudio Fontana memset(env->fpregs, 0, sizeof(env->fpregs)); 28931b248f14SClaudio Fontana } 28941b248f14SClaudio Fontana } 28951b248f14SClaudio Fontana if (rfbm & XSTATE_SSE_MASK) { 28961b248f14SClaudio Fontana /* Note that the standard form of XRSTOR loads MXCSR from memory 28971b248f14SClaudio Fontana whether or not the XSTATE_BV bit is set. */ 28981b248f14SClaudio Fontana do_xrstor_mxcsr(env, ptr, ra); 28991b248f14SClaudio Fontana if (xstate_bv & XSTATE_SSE_MASK) { 29001b248f14SClaudio Fontana do_xrstor_sse(env, ptr, ra); 29011b248f14SClaudio Fontana } else { 290289254431SPaolo Bonzini do_clear_sse(env); 290389254431SPaolo Bonzini } 290489254431SPaolo Bonzini } 290589254431SPaolo Bonzini if (rfbm & XSTATE_YMM_MASK) { 290689254431SPaolo Bonzini if (xstate_bv & XSTATE_YMM_MASK) { 290789254431SPaolo Bonzini do_xrstor_ymmh(env, ptr + XO(avx_state), ra); 290889254431SPaolo Bonzini } else { 290989254431SPaolo Bonzini do_clear_ymmh(env); 29101b248f14SClaudio Fontana } 29111b248f14SClaudio Fontana } 29121b248f14SClaudio Fontana if (rfbm & XSTATE_BNDREGS_MASK) { 29131b248f14SClaudio Fontana if (xstate_bv & XSTATE_BNDREGS_MASK) { 29141b248f14SClaudio Fontana do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 29151b248f14SClaudio Fontana env->hflags |= HF_MPX_IU_MASK; 29161b248f14SClaudio Fontana } else { 29171b248f14SClaudio Fontana memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 29181b248f14SClaudio Fontana env->hflags &= ~HF_MPX_IU_MASK; 29191b248f14SClaudio Fontana } 29201b248f14SClaudio Fontana } 29211b248f14SClaudio Fontana if (rfbm & XSTATE_BNDCSR_MASK) { 29221b248f14SClaudio Fontana if (xstate_bv & XSTATE_BNDCSR_MASK) { 29231b248f14SClaudio Fontana do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 29241b248f14SClaudio Fontana } else { 29251b248f14SClaudio Fontana memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 29261b248f14SClaudio Fontana } 29271b248f14SClaudio Fontana cpu_sync_bndcs_hflags(env); 29281b248f14SClaudio Fontana } 29291b248f14SClaudio Fontana if (rfbm & XSTATE_PKRU_MASK) { 29301b248f14SClaudio Fontana uint64_t old_pkru = env->pkru; 29311b248f14SClaudio Fontana if (xstate_bv & XSTATE_PKRU_MASK) { 29321b248f14SClaudio Fontana do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 29331b248f14SClaudio Fontana } else { 29341b248f14SClaudio Fontana env->pkru = 0; 29351b248f14SClaudio Fontana } 29361b248f14SClaudio Fontana if (env->pkru != old_pkru) { 29371b248f14SClaudio Fontana CPUState *cs = env_cpu(env); 29381b248f14SClaudio Fontana tlb_flush(cs); 29391b248f14SClaudio Fontana } 29401b248f14SClaudio Fontana } 29411b248f14SClaudio Fontana } 29421b248f14SClaudio Fontana 29431b248f14SClaudio Fontana #undef XO 29441b248f14SClaudio Fontana 29455d245678SPaolo Bonzini void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 29465d245678SPaolo Bonzini { 29475d245678SPaolo Bonzini do_xrstor(env, ptr, rfbm, GETPC()); 29485d245678SPaolo Bonzini } 29495d245678SPaolo Bonzini 29505d245678SPaolo Bonzini #if defined(CONFIG_USER_ONLY) 29515d245678SPaolo Bonzini void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 29525d245678SPaolo Bonzini { 29535d245678SPaolo Bonzini do_fsave(env, ptr, data32, 0); 29545d245678SPaolo Bonzini } 29555d245678SPaolo Bonzini 29565d245678SPaolo Bonzini void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 29575d245678SPaolo Bonzini { 29585d245678SPaolo Bonzini do_frstor(env, ptr, data32, 0); 29595d245678SPaolo Bonzini } 29605d245678SPaolo Bonzini 29615d245678SPaolo Bonzini void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 29625d245678SPaolo Bonzini { 29635d245678SPaolo Bonzini do_fxsave(env, ptr, 0); 29645d245678SPaolo Bonzini } 29655d245678SPaolo Bonzini 29665d245678SPaolo Bonzini void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 29675d245678SPaolo Bonzini { 29685d245678SPaolo Bonzini do_fxrstor(env, ptr, 0); 29695d245678SPaolo Bonzini } 29705d245678SPaolo Bonzini 29715d245678SPaolo Bonzini void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) 29725d245678SPaolo Bonzini { 29735d245678SPaolo Bonzini do_xsave(env, ptr, -1, get_xinuse(env), -1, 0); 29745d245678SPaolo Bonzini } 29755d245678SPaolo Bonzini 29765d245678SPaolo Bonzini void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr) 29775d245678SPaolo Bonzini { 29785d245678SPaolo Bonzini do_xrstor(env, ptr, -1, 0); 29795d245678SPaolo Bonzini } 29805d245678SPaolo Bonzini #endif 29815d245678SPaolo Bonzini 29821b248f14SClaudio Fontana uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 29831b248f14SClaudio Fontana { 29841b248f14SClaudio Fontana /* The OS must have enabled XSAVE. */ 29851b248f14SClaudio Fontana if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 29861b248f14SClaudio Fontana raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 29871b248f14SClaudio Fontana } 29881b248f14SClaudio Fontana 29891b248f14SClaudio Fontana switch (ecx) { 29901b248f14SClaudio Fontana case 0: 29911b248f14SClaudio Fontana return env->xcr0; 29921b248f14SClaudio Fontana case 1: 29931b248f14SClaudio Fontana if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 29941b248f14SClaudio Fontana return env->xcr0 & get_xinuse(env); 29951b248f14SClaudio Fontana } 29961b248f14SClaudio Fontana break; 29971b248f14SClaudio Fontana } 29981b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, GETPC()); 29991b248f14SClaudio Fontana } 30001b248f14SClaudio Fontana 30011b248f14SClaudio Fontana void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 30021b248f14SClaudio Fontana { 30031b248f14SClaudio Fontana uint32_t dummy, ena_lo, ena_hi; 30041b248f14SClaudio Fontana uint64_t ena; 30051b248f14SClaudio Fontana 30061b248f14SClaudio Fontana /* The OS must have enabled XSAVE. */ 30071b248f14SClaudio Fontana if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 30081b248f14SClaudio Fontana raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 30091b248f14SClaudio Fontana } 30101b248f14SClaudio Fontana 30111b248f14SClaudio Fontana /* Only XCR0 is defined at present; the FPU may not be disabled. */ 30121b248f14SClaudio Fontana if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 30131b248f14SClaudio Fontana goto do_gpf; 30141b248f14SClaudio Fontana } 30151b248f14SClaudio Fontana 30161b248f14SClaudio Fontana /* Disallow enabling unimplemented features. */ 30171b248f14SClaudio Fontana cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 30181b248f14SClaudio Fontana ena = ((uint64_t)ena_hi << 32) | ena_lo; 30191b248f14SClaudio Fontana if (mask & ~ena) { 30201b248f14SClaudio Fontana goto do_gpf; 30211b248f14SClaudio Fontana } 30221b248f14SClaudio Fontana 30231b248f14SClaudio Fontana /* Disallow enabling only half of MPX. */ 30241b248f14SClaudio Fontana if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 30251b248f14SClaudio Fontana & XSTATE_BNDCSR_MASK) { 30261b248f14SClaudio Fontana goto do_gpf; 30271b248f14SClaudio Fontana } 30281b248f14SClaudio Fontana 30291b248f14SClaudio Fontana env->xcr0 = mask; 30301b248f14SClaudio Fontana cpu_sync_bndcs_hflags(env); 3031608db8dbSPaul Brook cpu_sync_avx_hflag(env); 30321b248f14SClaudio Fontana return; 30331b248f14SClaudio Fontana 30341b248f14SClaudio Fontana do_gpf: 30351b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, GETPC()); 30361b248f14SClaudio Fontana } 30371b248f14SClaudio Fontana 30381b248f14SClaudio Fontana /* MMX/SSE */ 30391b248f14SClaudio Fontana /* XXX: optimize by storing fptt and fptags in the static cpu state */ 30401b248f14SClaudio Fontana 30411b248f14SClaudio Fontana #define SSE_DAZ 0x0040 3042314d3effSPaolo Bonzini #define SSE_RC_SHIFT 13 3043314d3effSPaolo Bonzini #define SSE_RC_MASK (3 << SSE_RC_SHIFT) 30441b248f14SClaudio Fontana #define SSE_FZ 0x8000 30451b248f14SClaudio Fontana 30461b248f14SClaudio Fontana void update_mxcsr_status(CPUX86State *env) 30471b248f14SClaudio Fontana { 30481b248f14SClaudio Fontana uint32_t mxcsr = env->mxcsr; 30491b248f14SClaudio Fontana int rnd_type; 30501b248f14SClaudio Fontana 30511b248f14SClaudio Fontana /* set rounding mode */ 3052314d3effSPaolo Bonzini rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; 3053314d3effSPaolo Bonzini set_x86_rounding_mode(rnd_type, &env->sse_status); 30541b248f14SClaudio Fontana 30551b248f14SClaudio Fontana /* Set exception flags. */ 30561b248f14SClaudio Fontana set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 30571b248f14SClaudio Fontana (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 30581b248f14SClaudio Fontana (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 30591b248f14SClaudio Fontana (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 30601b248f14SClaudio Fontana (mxcsr & FPUS_PE ? float_flag_inexact : 0), 30611b248f14SClaudio Fontana &env->sse_status); 30621b248f14SClaudio Fontana 30631b248f14SClaudio Fontana /* set denormals are zero */ 30641b248f14SClaudio Fontana set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 30651b248f14SClaudio Fontana 30661b248f14SClaudio Fontana /* set flush to zero */ 30671b248f14SClaudio Fontana set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 30681b248f14SClaudio Fontana } 30691b248f14SClaudio Fontana 30701b248f14SClaudio Fontana void update_mxcsr_from_sse_status(CPUX86State *env) 30711b248f14SClaudio Fontana { 30721b248f14SClaudio Fontana uint8_t flags = get_float_exception_flags(&env->sse_status); 30731b248f14SClaudio Fontana /* 30741b248f14SClaudio Fontana * The MXCSR denormal flag has opposite semantics to 30751b248f14SClaudio Fontana * float_flag_input_denormal (the softfloat code sets that flag 30761b248f14SClaudio Fontana * only when flushing input denormals to zero, but SSE sets it 30771b248f14SClaudio Fontana * only when not flushing them to zero), so is not converted 30781b248f14SClaudio Fontana * here. 30791b248f14SClaudio Fontana */ 30801b248f14SClaudio Fontana env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 30811b248f14SClaudio Fontana (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 30821b248f14SClaudio Fontana (flags & float_flag_overflow ? FPUS_OE : 0) | 30831b248f14SClaudio Fontana (flags & float_flag_underflow ? FPUS_UE : 0) | 30841b248f14SClaudio Fontana (flags & float_flag_inexact ? FPUS_PE : 0) | 30851b248f14SClaudio Fontana (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 30861b248f14SClaudio Fontana 0)); 30871b248f14SClaudio Fontana } 30881b248f14SClaudio Fontana 30891b248f14SClaudio Fontana void helper_update_mxcsr(CPUX86State *env) 30901b248f14SClaudio Fontana { 30911b248f14SClaudio Fontana update_mxcsr_from_sse_status(env); 30921b248f14SClaudio Fontana } 30931b248f14SClaudio Fontana 30941b248f14SClaudio Fontana void helper_ldmxcsr(CPUX86State *env, uint32_t val) 30951b248f14SClaudio Fontana { 30961b248f14SClaudio Fontana cpu_set_mxcsr(env, val); 30971b248f14SClaudio Fontana } 30981b248f14SClaudio Fontana 30991b248f14SClaudio Fontana void helper_enter_mmx(CPUX86State *env) 31001b248f14SClaudio Fontana { 31011b248f14SClaudio Fontana env->fpstt = 0; 31021b248f14SClaudio Fontana *(uint32_t *)(env->fptags) = 0; 31031b248f14SClaudio Fontana *(uint32_t *)(env->fptags + 4) = 0; 31041b248f14SClaudio Fontana } 31051b248f14SClaudio Fontana 31061b248f14SClaudio Fontana void helper_emms(CPUX86State *env) 31071b248f14SClaudio Fontana { 31081b248f14SClaudio Fontana /* set to empty state */ 31091b248f14SClaudio Fontana *(uint32_t *)(env->fptags) = 0x01010101; 31101b248f14SClaudio Fontana *(uint32_t *)(env->fptags + 4) = 0x01010101; 31111b248f14SClaudio Fontana } 31121b248f14SClaudio Fontana 31131b248f14SClaudio Fontana #define SHIFT 0 31141b248f14SClaudio Fontana #include "ops_sse.h" 31151b248f14SClaudio Fontana 31161b248f14SClaudio Fontana #define SHIFT 1 31171b248f14SClaudio Fontana #include "ops_sse.h" 3118b98f886cSPaolo Bonzini 3119b98f886cSPaolo Bonzini #define SHIFT 2 3120b98f886cSPaolo Bonzini #include "ops_sse.h" 3121