xref: /openbmc/qemu/target/i386/tcg/fpu_helper.c (revision 314d3eff66f41f39191aaca2e5f6e3dc81480c1b)
11b248f14SClaudio Fontana /*
21b248f14SClaudio Fontana  *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
31b248f14SClaudio Fontana  *
41b248f14SClaudio Fontana  *  Copyright (c) 2003 Fabrice Bellard
51b248f14SClaudio Fontana  *
61b248f14SClaudio Fontana  * This library is free software; you can redistribute it and/or
71b248f14SClaudio Fontana  * modify it under the terms of the GNU Lesser General Public
81b248f14SClaudio Fontana  * License as published by the Free Software Foundation; either
91b248f14SClaudio Fontana  * version 2.1 of the License, or (at your option) any later version.
101b248f14SClaudio Fontana  *
111b248f14SClaudio Fontana  * This library is distributed in the hope that it will be useful,
121b248f14SClaudio Fontana  * but WITHOUT ANY WARRANTY; without even the implied warranty of
131b248f14SClaudio Fontana  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
141b248f14SClaudio Fontana  * Lesser General Public License for more details.
151b248f14SClaudio Fontana  *
161b248f14SClaudio Fontana  * You should have received a copy of the GNU Lesser General Public
171b248f14SClaudio Fontana  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
181b248f14SClaudio Fontana  */
191b248f14SClaudio Fontana 
201b248f14SClaudio Fontana #include "qemu/osdep.h"
211b248f14SClaudio Fontana #include <math.h>
221b248f14SClaudio Fontana #include "cpu.h"
2348e5c98aSDavid Edmondson #include "tcg-cpu.h"
241b248f14SClaudio Fontana #include "exec/helper-proto.h"
251b248f14SClaudio Fontana #include "fpu/softfloat.h"
261b248f14SClaudio Fontana #include "fpu/softfloat-macros.h"
27ed69e831SClaudio Fontana #include "helper-tcg.h"
281b248f14SClaudio Fontana 
29ed69e831SClaudio Fontana /* float macros */
30ed69e831SClaudio Fontana #define FT0    (env->ft0)
31ed69e831SClaudio Fontana #define ST0    (env->fpregs[env->fpstt].d)
32ed69e831SClaudio Fontana #define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
33ed69e831SClaudio Fontana #define ST1    ST(1)
34ed69e831SClaudio Fontana 
35*314d3effSPaolo Bonzini #define FPU_RC_SHIFT        10
36*314d3effSPaolo Bonzini #define FPU_RC_MASK         (3 << FPU_RC_SHIFT)
371b248f14SClaudio Fontana #define FPU_RC_NEAR         0x000
381b248f14SClaudio Fontana #define FPU_RC_DOWN         0x400
391b248f14SClaudio Fontana #define FPU_RC_UP           0x800
401b248f14SClaudio Fontana #define FPU_RC_CHOP         0xc00
411b248f14SClaudio Fontana 
421b248f14SClaudio Fontana #define MAXTAN 9223372036854775808.0
431b248f14SClaudio Fontana 
441b248f14SClaudio Fontana /* the following deal with x86 long double-precision numbers */
451b248f14SClaudio Fontana #define MAXEXPD 0x7fff
461b248f14SClaudio Fontana #define EXPBIAS 16383
471b248f14SClaudio Fontana #define EXPD(fp)        (fp.l.upper & 0x7fff)
481b248f14SClaudio Fontana #define SIGND(fp)       ((fp.l.upper) & 0x8000)
491b248f14SClaudio Fontana #define MANTD(fp)       (fp.l.lower)
501b248f14SClaudio Fontana #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
511b248f14SClaudio Fontana 
521b248f14SClaudio Fontana #define FPUS_IE (1 << 0)
531b248f14SClaudio Fontana #define FPUS_DE (1 << 1)
541b248f14SClaudio Fontana #define FPUS_ZE (1 << 2)
551b248f14SClaudio Fontana #define FPUS_OE (1 << 3)
561b248f14SClaudio Fontana #define FPUS_UE (1 << 4)
571b248f14SClaudio Fontana #define FPUS_PE (1 << 5)
581b248f14SClaudio Fontana #define FPUS_SF (1 << 6)
591b248f14SClaudio Fontana #define FPUS_SE (1 << 7)
601b248f14SClaudio Fontana #define FPUS_B  (1 << 15)
611b248f14SClaudio Fontana 
621b248f14SClaudio Fontana #define FPUC_EM 0x3f
631b248f14SClaudio Fontana 
641b248f14SClaudio Fontana #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
651b248f14SClaudio Fontana #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
661b248f14SClaudio Fontana #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
671b248f14SClaudio Fontana #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
681b248f14SClaudio Fontana #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
691b248f14SClaudio Fontana #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
701b248f14SClaudio Fontana #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
711b248f14SClaudio Fontana #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
721b248f14SClaudio Fontana 
731b248f14SClaudio Fontana static inline void fpush(CPUX86State *env)
741b248f14SClaudio Fontana {
751b248f14SClaudio Fontana     env->fpstt = (env->fpstt - 1) & 7;
761b248f14SClaudio Fontana     env->fptags[env->fpstt] = 0; /* validate stack entry */
771b248f14SClaudio Fontana }
781b248f14SClaudio Fontana 
791b248f14SClaudio Fontana static inline void fpop(CPUX86State *env)
801b248f14SClaudio Fontana {
811b248f14SClaudio Fontana     env->fptags[env->fpstt] = 1; /* invalidate stack entry */
821b248f14SClaudio Fontana     env->fpstt = (env->fpstt + 1) & 7;
831b248f14SClaudio Fontana }
841b248f14SClaudio Fontana 
85e3a69234SRichard Henderson static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr)
861b248f14SClaudio Fontana {
871b248f14SClaudio Fontana     CPU_LDoubleU temp;
881b248f14SClaudio Fontana 
891b248f14SClaudio Fontana     temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
901b248f14SClaudio Fontana     temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
911b248f14SClaudio Fontana     return temp.d;
921b248f14SClaudio Fontana }
931b248f14SClaudio Fontana 
94e3a69234SRichard Henderson static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
951b248f14SClaudio Fontana                     uintptr_t retaddr)
961b248f14SClaudio Fontana {
971b248f14SClaudio Fontana     CPU_LDoubleU temp;
981b248f14SClaudio Fontana 
991b248f14SClaudio Fontana     temp.d = f;
1001b248f14SClaudio Fontana     cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
1011b248f14SClaudio Fontana     cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
1021b248f14SClaudio Fontana }
1031b248f14SClaudio Fontana 
1041b248f14SClaudio Fontana /* x87 FPU helpers */
1051b248f14SClaudio Fontana 
1061b248f14SClaudio Fontana static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
1071b248f14SClaudio Fontana {
1081b248f14SClaudio Fontana     union {
1091b248f14SClaudio Fontana         float64 f64;
1101b248f14SClaudio Fontana         double d;
1111b248f14SClaudio Fontana     } u;
1121b248f14SClaudio Fontana 
1131b248f14SClaudio Fontana     u.f64 = floatx80_to_float64(a, &env->fp_status);
1141b248f14SClaudio Fontana     return u.d;
1151b248f14SClaudio Fontana }
1161b248f14SClaudio Fontana 
1171b248f14SClaudio Fontana static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
1181b248f14SClaudio Fontana {
1191b248f14SClaudio Fontana     union {
1201b248f14SClaudio Fontana         float64 f64;
1211b248f14SClaudio Fontana         double d;
1221b248f14SClaudio Fontana     } u;
1231b248f14SClaudio Fontana 
1241b248f14SClaudio Fontana     u.d = a;
1251b248f14SClaudio Fontana     return float64_to_floatx80(u.f64, &env->fp_status);
1261b248f14SClaudio Fontana }
1271b248f14SClaudio Fontana 
1281b248f14SClaudio Fontana static void fpu_set_exception(CPUX86State *env, int mask)
1291b248f14SClaudio Fontana {
1301b248f14SClaudio Fontana     env->fpus |= mask;
1311b248f14SClaudio Fontana     if (env->fpus & (~env->fpuc & FPUC_EM)) {
1321b248f14SClaudio Fontana         env->fpus |= FPUS_SE | FPUS_B;
1331b248f14SClaudio Fontana     }
1341b248f14SClaudio Fontana }
1351b248f14SClaudio Fontana 
1361b248f14SClaudio Fontana static inline uint8_t save_exception_flags(CPUX86State *env)
1371b248f14SClaudio Fontana {
1381b248f14SClaudio Fontana     uint8_t old_flags = get_float_exception_flags(&env->fp_status);
1391b248f14SClaudio Fontana     set_float_exception_flags(0, &env->fp_status);
1401b248f14SClaudio Fontana     return old_flags;
1411b248f14SClaudio Fontana }
1421b248f14SClaudio Fontana 
1431b248f14SClaudio Fontana static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
1441b248f14SClaudio Fontana {
1451b248f14SClaudio Fontana     uint8_t new_flags = get_float_exception_flags(&env->fp_status);
1461b248f14SClaudio Fontana     float_raise(old_flags, &env->fp_status);
1471b248f14SClaudio Fontana     fpu_set_exception(env,
1481b248f14SClaudio Fontana                       ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
1491b248f14SClaudio Fontana                        (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
1501b248f14SClaudio Fontana                        (new_flags & float_flag_overflow ? FPUS_OE : 0) |
1511b248f14SClaudio Fontana                        (new_flags & float_flag_underflow ? FPUS_UE : 0) |
1521b248f14SClaudio Fontana                        (new_flags & float_flag_inexact ? FPUS_PE : 0) |
1531b248f14SClaudio Fontana                        (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
1541b248f14SClaudio Fontana }
1551b248f14SClaudio Fontana 
1561b248f14SClaudio Fontana static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
1571b248f14SClaudio Fontana {
1581b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
1591b248f14SClaudio Fontana     floatx80 ret = floatx80_div(a, b, &env->fp_status);
1601b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
1611b248f14SClaudio Fontana     return ret;
1621b248f14SClaudio Fontana }
1631b248f14SClaudio Fontana 
1641b248f14SClaudio Fontana static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
1651b248f14SClaudio Fontana {
1661b248f14SClaudio Fontana     if (env->cr[0] & CR0_NE_MASK) {
1671b248f14SClaudio Fontana         raise_exception_ra(env, EXCP10_COPR, retaddr);
1681b248f14SClaudio Fontana     }
1691b248f14SClaudio Fontana #if !defined(CONFIG_USER_ONLY)
17083a3d9c7SClaudio Fontana     else {
17183a3d9c7SClaudio Fontana         fpu_check_raise_ferr_irq(env);
1721b248f14SClaudio Fontana     }
1731b248f14SClaudio Fontana #endif
1741b248f14SClaudio Fontana }
1751b248f14SClaudio Fontana 
1761b248f14SClaudio Fontana void helper_flds_FT0(CPUX86State *env, uint32_t val)
1771b248f14SClaudio Fontana {
1781b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
1791b248f14SClaudio Fontana     union {
1801b248f14SClaudio Fontana         float32 f;
1811b248f14SClaudio Fontana         uint32_t i;
1821b248f14SClaudio Fontana     } u;
1831b248f14SClaudio Fontana 
1841b248f14SClaudio Fontana     u.i = val;
1851b248f14SClaudio Fontana     FT0 = float32_to_floatx80(u.f, &env->fp_status);
1861b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
1871b248f14SClaudio Fontana }
1881b248f14SClaudio Fontana 
1891b248f14SClaudio Fontana void helper_fldl_FT0(CPUX86State *env, uint64_t val)
1901b248f14SClaudio Fontana {
1911b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
1921b248f14SClaudio Fontana     union {
1931b248f14SClaudio Fontana         float64 f;
1941b248f14SClaudio Fontana         uint64_t i;
1951b248f14SClaudio Fontana     } u;
1961b248f14SClaudio Fontana 
1971b248f14SClaudio Fontana     u.i = val;
1981b248f14SClaudio Fontana     FT0 = float64_to_floatx80(u.f, &env->fp_status);
1991b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
2001b248f14SClaudio Fontana }
2011b248f14SClaudio Fontana 
2021b248f14SClaudio Fontana void helper_fildl_FT0(CPUX86State *env, int32_t val)
2031b248f14SClaudio Fontana {
2041b248f14SClaudio Fontana     FT0 = int32_to_floatx80(val, &env->fp_status);
2051b248f14SClaudio Fontana }
2061b248f14SClaudio Fontana 
2071b248f14SClaudio Fontana void helper_flds_ST0(CPUX86State *env, uint32_t val)
2081b248f14SClaudio Fontana {
2091b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
2101b248f14SClaudio Fontana     int new_fpstt;
2111b248f14SClaudio Fontana     union {
2121b248f14SClaudio Fontana         float32 f;
2131b248f14SClaudio Fontana         uint32_t i;
2141b248f14SClaudio Fontana     } u;
2151b248f14SClaudio Fontana 
2161b248f14SClaudio Fontana     new_fpstt = (env->fpstt - 1) & 7;
2171b248f14SClaudio Fontana     u.i = val;
2181b248f14SClaudio Fontana     env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
2191b248f14SClaudio Fontana     env->fpstt = new_fpstt;
2201b248f14SClaudio Fontana     env->fptags[new_fpstt] = 0; /* validate stack entry */
2211b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
2221b248f14SClaudio Fontana }
2231b248f14SClaudio Fontana 
2241b248f14SClaudio Fontana void helper_fldl_ST0(CPUX86State *env, uint64_t val)
2251b248f14SClaudio Fontana {
2261b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
2271b248f14SClaudio Fontana     int new_fpstt;
2281b248f14SClaudio Fontana     union {
2291b248f14SClaudio Fontana         float64 f;
2301b248f14SClaudio Fontana         uint64_t i;
2311b248f14SClaudio Fontana     } u;
2321b248f14SClaudio Fontana 
2331b248f14SClaudio Fontana     new_fpstt = (env->fpstt - 1) & 7;
2341b248f14SClaudio Fontana     u.i = val;
2351b248f14SClaudio Fontana     env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
2361b248f14SClaudio Fontana     env->fpstt = new_fpstt;
2371b248f14SClaudio Fontana     env->fptags[new_fpstt] = 0; /* validate stack entry */
2381b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
2391b248f14SClaudio Fontana }
2401b248f14SClaudio Fontana 
241276de33fSAlex Bennée static FloatX80RoundPrec tmp_maximise_precision(float_status *st)
242276de33fSAlex Bennée {
243276de33fSAlex Bennée     FloatX80RoundPrec old = get_floatx80_rounding_precision(st);
244276de33fSAlex Bennée     set_floatx80_rounding_precision(floatx80_precision_x, st);
245276de33fSAlex Bennée     return old;
246276de33fSAlex Bennée }
247276de33fSAlex Bennée 
2481b248f14SClaudio Fontana void helper_fildl_ST0(CPUX86State *env, int32_t val)
2491b248f14SClaudio Fontana {
2501b248f14SClaudio Fontana     int new_fpstt;
251276de33fSAlex Bennée     FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
2521b248f14SClaudio Fontana 
2531b248f14SClaudio Fontana     new_fpstt = (env->fpstt - 1) & 7;
2541b248f14SClaudio Fontana     env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
2551b248f14SClaudio Fontana     env->fpstt = new_fpstt;
2561b248f14SClaudio Fontana     env->fptags[new_fpstt] = 0; /* validate stack entry */
257276de33fSAlex Bennée 
258276de33fSAlex Bennée     set_floatx80_rounding_precision(old, &env->fp_status);
2591b248f14SClaudio Fontana }
2601b248f14SClaudio Fontana 
2611b248f14SClaudio Fontana void helper_fildll_ST0(CPUX86State *env, int64_t val)
2621b248f14SClaudio Fontana {
2631b248f14SClaudio Fontana     int new_fpstt;
264276de33fSAlex Bennée     FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
2651b248f14SClaudio Fontana 
2661b248f14SClaudio Fontana     new_fpstt = (env->fpstt - 1) & 7;
2671b248f14SClaudio Fontana     env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
2681b248f14SClaudio Fontana     env->fpstt = new_fpstt;
2691b248f14SClaudio Fontana     env->fptags[new_fpstt] = 0; /* validate stack entry */
270276de33fSAlex Bennée 
271276de33fSAlex Bennée     set_floatx80_rounding_precision(old, &env->fp_status);
2721b248f14SClaudio Fontana }
2731b248f14SClaudio Fontana 
2741b248f14SClaudio Fontana uint32_t helper_fsts_ST0(CPUX86State *env)
2751b248f14SClaudio Fontana {
2761b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
2771b248f14SClaudio Fontana     union {
2781b248f14SClaudio Fontana         float32 f;
2791b248f14SClaudio Fontana         uint32_t i;
2801b248f14SClaudio Fontana     } u;
2811b248f14SClaudio Fontana 
2821b248f14SClaudio Fontana     u.f = floatx80_to_float32(ST0, &env->fp_status);
2831b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
2841b248f14SClaudio Fontana     return u.i;
2851b248f14SClaudio Fontana }
2861b248f14SClaudio Fontana 
2871b248f14SClaudio Fontana uint64_t helper_fstl_ST0(CPUX86State *env)
2881b248f14SClaudio Fontana {
2891b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
2901b248f14SClaudio Fontana     union {
2911b248f14SClaudio Fontana         float64 f;
2921b248f14SClaudio Fontana         uint64_t i;
2931b248f14SClaudio Fontana     } u;
2941b248f14SClaudio Fontana 
2951b248f14SClaudio Fontana     u.f = floatx80_to_float64(ST0, &env->fp_status);
2961b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
2971b248f14SClaudio Fontana     return u.i;
2981b248f14SClaudio Fontana }
2991b248f14SClaudio Fontana 
3001b248f14SClaudio Fontana int32_t helper_fist_ST0(CPUX86State *env)
3011b248f14SClaudio Fontana {
3021b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3031b248f14SClaudio Fontana     int32_t val;
3041b248f14SClaudio Fontana 
3051b248f14SClaudio Fontana     val = floatx80_to_int32(ST0, &env->fp_status);
3061b248f14SClaudio Fontana     if (val != (int16_t)val) {
3071b248f14SClaudio Fontana         set_float_exception_flags(float_flag_invalid, &env->fp_status);
3081b248f14SClaudio Fontana         val = -32768;
3091b248f14SClaudio Fontana     }
3101b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3111b248f14SClaudio Fontana     return val;
3121b248f14SClaudio Fontana }
3131b248f14SClaudio Fontana 
3141b248f14SClaudio Fontana int32_t helper_fistl_ST0(CPUX86State *env)
3151b248f14SClaudio Fontana {
3161b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3171b248f14SClaudio Fontana     int32_t val;
3181b248f14SClaudio Fontana 
3191b248f14SClaudio Fontana     val = floatx80_to_int32(ST0, &env->fp_status);
3201b248f14SClaudio Fontana     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
3211b248f14SClaudio Fontana         val = 0x80000000;
3221b248f14SClaudio Fontana     }
3231b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3241b248f14SClaudio Fontana     return val;
3251b248f14SClaudio Fontana }
3261b248f14SClaudio Fontana 
3271b248f14SClaudio Fontana int64_t helper_fistll_ST0(CPUX86State *env)
3281b248f14SClaudio Fontana {
3291b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3301b248f14SClaudio Fontana     int64_t val;
3311b248f14SClaudio Fontana 
3321b248f14SClaudio Fontana     val = floatx80_to_int64(ST0, &env->fp_status);
3331b248f14SClaudio Fontana     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
3341b248f14SClaudio Fontana         val = 0x8000000000000000ULL;
3351b248f14SClaudio Fontana     }
3361b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3371b248f14SClaudio Fontana     return val;
3381b248f14SClaudio Fontana }
3391b248f14SClaudio Fontana 
3401b248f14SClaudio Fontana int32_t helper_fistt_ST0(CPUX86State *env)
3411b248f14SClaudio Fontana {
3421b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3431b248f14SClaudio Fontana     int32_t val;
3441b248f14SClaudio Fontana 
3451b248f14SClaudio Fontana     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
3461b248f14SClaudio Fontana     if (val != (int16_t)val) {
3471b248f14SClaudio Fontana         set_float_exception_flags(float_flag_invalid, &env->fp_status);
3481b248f14SClaudio Fontana         val = -32768;
3491b248f14SClaudio Fontana     }
3501b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3511b248f14SClaudio Fontana     return val;
3521b248f14SClaudio Fontana }
3531b248f14SClaudio Fontana 
3541b248f14SClaudio Fontana int32_t helper_fisttl_ST0(CPUX86State *env)
3551b248f14SClaudio Fontana {
3561b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3571b248f14SClaudio Fontana     int32_t val;
3581b248f14SClaudio Fontana 
3591b248f14SClaudio Fontana     val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
3601b248f14SClaudio Fontana     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
3611b248f14SClaudio Fontana         val = 0x80000000;
3621b248f14SClaudio Fontana     }
3631b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3641b248f14SClaudio Fontana     return val;
3651b248f14SClaudio Fontana }
3661b248f14SClaudio Fontana 
3671b248f14SClaudio Fontana int64_t helper_fisttll_ST0(CPUX86State *env)
3681b248f14SClaudio Fontana {
3691b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
3701b248f14SClaudio Fontana     int64_t val;
3711b248f14SClaudio Fontana 
3721b248f14SClaudio Fontana     val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
3731b248f14SClaudio Fontana     if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
3741b248f14SClaudio Fontana         val = 0x8000000000000000ULL;
3751b248f14SClaudio Fontana     }
3761b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
3771b248f14SClaudio Fontana     return val;
3781b248f14SClaudio Fontana }
3791b248f14SClaudio Fontana 
3801b248f14SClaudio Fontana void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
3811b248f14SClaudio Fontana {
3821b248f14SClaudio Fontana     int new_fpstt;
3831b248f14SClaudio Fontana 
3841b248f14SClaudio Fontana     new_fpstt = (env->fpstt - 1) & 7;
385e3a69234SRichard Henderson     env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC());
3861b248f14SClaudio Fontana     env->fpstt = new_fpstt;
3871b248f14SClaudio Fontana     env->fptags[new_fpstt] = 0; /* validate stack entry */
3881b248f14SClaudio Fontana }
3891b248f14SClaudio Fontana 
3901b248f14SClaudio Fontana void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
3911b248f14SClaudio Fontana {
392e3a69234SRichard Henderson     do_fstt(env, ST0, ptr, GETPC());
3931b248f14SClaudio Fontana }
3941b248f14SClaudio Fontana 
3951b248f14SClaudio Fontana void helper_fpush(CPUX86State *env)
3961b248f14SClaudio Fontana {
3971b248f14SClaudio Fontana     fpush(env);
3981b248f14SClaudio Fontana }
3991b248f14SClaudio Fontana 
4001b248f14SClaudio Fontana void helper_fpop(CPUX86State *env)
4011b248f14SClaudio Fontana {
4021b248f14SClaudio Fontana     fpop(env);
4031b248f14SClaudio Fontana }
4041b248f14SClaudio Fontana 
4051b248f14SClaudio Fontana void helper_fdecstp(CPUX86State *env)
4061b248f14SClaudio Fontana {
4071b248f14SClaudio Fontana     env->fpstt = (env->fpstt - 1) & 7;
4081b248f14SClaudio Fontana     env->fpus &= ~0x4700;
4091b248f14SClaudio Fontana }
4101b248f14SClaudio Fontana 
4111b248f14SClaudio Fontana void helper_fincstp(CPUX86State *env)
4121b248f14SClaudio Fontana {
4131b248f14SClaudio Fontana     env->fpstt = (env->fpstt + 1) & 7;
4141b248f14SClaudio Fontana     env->fpus &= ~0x4700;
4151b248f14SClaudio Fontana }
4161b248f14SClaudio Fontana 
4171b248f14SClaudio Fontana /* FPU move */
4181b248f14SClaudio Fontana 
4191b248f14SClaudio Fontana void helper_ffree_STN(CPUX86State *env, int st_index)
4201b248f14SClaudio Fontana {
4211b248f14SClaudio Fontana     env->fptags[(env->fpstt + st_index) & 7] = 1;
4221b248f14SClaudio Fontana }
4231b248f14SClaudio Fontana 
4241b248f14SClaudio Fontana void helper_fmov_ST0_FT0(CPUX86State *env)
4251b248f14SClaudio Fontana {
4261b248f14SClaudio Fontana     ST0 = FT0;
4271b248f14SClaudio Fontana }
4281b248f14SClaudio Fontana 
4291b248f14SClaudio Fontana void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
4301b248f14SClaudio Fontana {
4311b248f14SClaudio Fontana     FT0 = ST(st_index);
4321b248f14SClaudio Fontana }
4331b248f14SClaudio Fontana 
4341b248f14SClaudio Fontana void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
4351b248f14SClaudio Fontana {
4361b248f14SClaudio Fontana     ST0 = ST(st_index);
4371b248f14SClaudio Fontana }
4381b248f14SClaudio Fontana 
4391b248f14SClaudio Fontana void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
4401b248f14SClaudio Fontana {
4411b248f14SClaudio Fontana     ST(st_index) = ST0;
4421b248f14SClaudio Fontana }
4431b248f14SClaudio Fontana 
4441b248f14SClaudio Fontana void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
4451b248f14SClaudio Fontana {
4461b248f14SClaudio Fontana     floatx80 tmp;
4471b248f14SClaudio Fontana 
4481b248f14SClaudio Fontana     tmp = ST(st_index);
4491b248f14SClaudio Fontana     ST(st_index) = ST0;
4501b248f14SClaudio Fontana     ST0 = tmp;
4511b248f14SClaudio Fontana }
4521b248f14SClaudio Fontana 
4531b248f14SClaudio Fontana /* FPU operations */
4541b248f14SClaudio Fontana 
4551b248f14SClaudio Fontana static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
4561b248f14SClaudio Fontana 
4571b248f14SClaudio Fontana void helper_fcom_ST0_FT0(CPUX86State *env)
4581b248f14SClaudio Fontana {
4591b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
4601b248f14SClaudio Fontana     FloatRelation ret;
4611b248f14SClaudio Fontana 
4621b248f14SClaudio Fontana     ret = floatx80_compare(ST0, FT0, &env->fp_status);
4631b248f14SClaudio Fontana     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
4641b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
4651b248f14SClaudio Fontana }
4661b248f14SClaudio Fontana 
4671b248f14SClaudio Fontana void helper_fucom_ST0_FT0(CPUX86State *env)
4681b248f14SClaudio Fontana {
4691b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
4701b248f14SClaudio Fontana     FloatRelation ret;
4711b248f14SClaudio Fontana 
4721b248f14SClaudio Fontana     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
4731b248f14SClaudio Fontana     env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
4741b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
4751b248f14SClaudio Fontana }
4761b248f14SClaudio Fontana 
4771b248f14SClaudio Fontana static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
4781b248f14SClaudio Fontana 
4791b248f14SClaudio Fontana void helper_fcomi_ST0_FT0(CPUX86State *env)
4801b248f14SClaudio Fontana {
4811b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
4821b248f14SClaudio Fontana     int eflags;
4831b248f14SClaudio Fontana     FloatRelation ret;
4841b248f14SClaudio Fontana 
4851b248f14SClaudio Fontana     ret = floatx80_compare(ST0, FT0, &env->fp_status);
4861b248f14SClaudio Fontana     eflags = cpu_cc_compute_all(env, CC_OP);
4871b248f14SClaudio Fontana     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
4881b248f14SClaudio Fontana     CC_SRC = eflags;
4891b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
4901b248f14SClaudio Fontana }
4911b248f14SClaudio Fontana 
4921b248f14SClaudio Fontana void helper_fucomi_ST0_FT0(CPUX86State *env)
4931b248f14SClaudio Fontana {
4941b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
4951b248f14SClaudio Fontana     int eflags;
4961b248f14SClaudio Fontana     FloatRelation ret;
4971b248f14SClaudio Fontana 
4981b248f14SClaudio Fontana     ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
4991b248f14SClaudio Fontana     eflags = cpu_cc_compute_all(env, CC_OP);
5001b248f14SClaudio Fontana     eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
5011b248f14SClaudio Fontana     CC_SRC = eflags;
5021b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5031b248f14SClaudio Fontana }
5041b248f14SClaudio Fontana 
5051b248f14SClaudio Fontana void helper_fadd_ST0_FT0(CPUX86State *env)
5061b248f14SClaudio Fontana {
5071b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5081b248f14SClaudio Fontana     ST0 = floatx80_add(ST0, FT0, &env->fp_status);
5091b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5101b248f14SClaudio Fontana }
5111b248f14SClaudio Fontana 
5121b248f14SClaudio Fontana void helper_fmul_ST0_FT0(CPUX86State *env)
5131b248f14SClaudio Fontana {
5141b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5151b248f14SClaudio Fontana     ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
5161b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5171b248f14SClaudio Fontana }
5181b248f14SClaudio Fontana 
5191b248f14SClaudio Fontana void helper_fsub_ST0_FT0(CPUX86State *env)
5201b248f14SClaudio Fontana {
5211b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5221b248f14SClaudio Fontana     ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
5231b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5241b248f14SClaudio Fontana }
5251b248f14SClaudio Fontana 
5261b248f14SClaudio Fontana void helper_fsubr_ST0_FT0(CPUX86State *env)
5271b248f14SClaudio Fontana {
5281b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5291b248f14SClaudio Fontana     ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
5301b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5311b248f14SClaudio Fontana }
5321b248f14SClaudio Fontana 
5331b248f14SClaudio Fontana void helper_fdiv_ST0_FT0(CPUX86State *env)
5341b248f14SClaudio Fontana {
5351b248f14SClaudio Fontana     ST0 = helper_fdiv(env, ST0, FT0);
5361b248f14SClaudio Fontana }
5371b248f14SClaudio Fontana 
5381b248f14SClaudio Fontana void helper_fdivr_ST0_FT0(CPUX86State *env)
5391b248f14SClaudio Fontana {
5401b248f14SClaudio Fontana     ST0 = helper_fdiv(env, FT0, ST0);
5411b248f14SClaudio Fontana }
5421b248f14SClaudio Fontana 
5431b248f14SClaudio Fontana /* fp operations between STN and ST0 */
5441b248f14SClaudio Fontana 
5451b248f14SClaudio Fontana void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
5461b248f14SClaudio Fontana {
5471b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5481b248f14SClaudio Fontana     ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
5491b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5501b248f14SClaudio Fontana }
5511b248f14SClaudio Fontana 
5521b248f14SClaudio Fontana void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
5531b248f14SClaudio Fontana {
5541b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5551b248f14SClaudio Fontana     ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
5561b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5571b248f14SClaudio Fontana }
5581b248f14SClaudio Fontana 
5591b248f14SClaudio Fontana void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
5601b248f14SClaudio Fontana {
5611b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5621b248f14SClaudio Fontana     ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
5631b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5641b248f14SClaudio Fontana }
5651b248f14SClaudio Fontana 
5661b248f14SClaudio Fontana void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
5671b248f14SClaudio Fontana {
5681b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
5691b248f14SClaudio Fontana     ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
5701b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
5711b248f14SClaudio Fontana }
5721b248f14SClaudio Fontana 
5731b248f14SClaudio Fontana void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
5741b248f14SClaudio Fontana {
5751b248f14SClaudio Fontana     floatx80 *p;
5761b248f14SClaudio Fontana 
5771b248f14SClaudio Fontana     p = &ST(st_index);
5781b248f14SClaudio Fontana     *p = helper_fdiv(env, *p, ST0);
5791b248f14SClaudio Fontana }
5801b248f14SClaudio Fontana 
5811b248f14SClaudio Fontana void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
5821b248f14SClaudio Fontana {
5831b248f14SClaudio Fontana     floatx80 *p;
5841b248f14SClaudio Fontana 
5851b248f14SClaudio Fontana     p = &ST(st_index);
5861b248f14SClaudio Fontana     *p = helper_fdiv(env, ST0, *p);
5871b248f14SClaudio Fontana }
5881b248f14SClaudio Fontana 
5891b248f14SClaudio Fontana /* misc FPU operations */
5901b248f14SClaudio Fontana void helper_fchs_ST0(CPUX86State *env)
5911b248f14SClaudio Fontana {
5921b248f14SClaudio Fontana     ST0 = floatx80_chs(ST0);
5931b248f14SClaudio Fontana }
5941b248f14SClaudio Fontana 
5951b248f14SClaudio Fontana void helper_fabs_ST0(CPUX86State *env)
5961b248f14SClaudio Fontana {
5971b248f14SClaudio Fontana     ST0 = floatx80_abs(ST0);
5981b248f14SClaudio Fontana }
5991b248f14SClaudio Fontana 
6001b248f14SClaudio Fontana void helper_fld1_ST0(CPUX86State *env)
6011b248f14SClaudio Fontana {
6021b248f14SClaudio Fontana     ST0 = floatx80_one;
6031b248f14SClaudio Fontana }
6041b248f14SClaudio Fontana 
6051b248f14SClaudio Fontana void helper_fldl2t_ST0(CPUX86State *env)
6061b248f14SClaudio Fontana {
6071b248f14SClaudio Fontana     switch (env->fpuc & FPU_RC_MASK) {
6081b248f14SClaudio Fontana     case FPU_RC_UP:
6091b248f14SClaudio Fontana         ST0 = floatx80_l2t_u;
6101b248f14SClaudio Fontana         break;
6111b248f14SClaudio Fontana     default:
6121b248f14SClaudio Fontana         ST0 = floatx80_l2t;
6131b248f14SClaudio Fontana         break;
6141b248f14SClaudio Fontana     }
6151b248f14SClaudio Fontana }
6161b248f14SClaudio Fontana 
6171b248f14SClaudio Fontana void helper_fldl2e_ST0(CPUX86State *env)
6181b248f14SClaudio Fontana {
6191b248f14SClaudio Fontana     switch (env->fpuc & FPU_RC_MASK) {
6201b248f14SClaudio Fontana     case FPU_RC_DOWN:
6211b248f14SClaudio Fontana     case FPU_RC_CHOP:
6221b248f14SClaudio Fontana         ST0 = floatx80_l2e_d;
6231b248f14SClaudio Fontana         break;
6241b248f14SClaudio Fontana     default:
6251b248f14SClaudio Fontana         ST0 = floatx80_l2e;
6261b248f14SClaudio Fontana         break;
6271b248f14SClaudio Fontana     }
6281b248f14SClaudio Fontana }
6291b248f14SClaudio Fontana 
6301b248f14SClaudio Fontana void helper_fldpi_ST0(CPUX86State *env)
6311b248f14SClaudio Fontana {
6321b248f14SClaudio Fontana     switch (env->fpuc & FPU_RC_MASK) {
6331b248f14SClaudio Fontana     case FPU_RC_DOWN:
6341b248f14SClaudio Fontana     case FPU_RC_CHOP:
6351b248f14SClaudio Fontana         ST0 = floatx80_pi_d;
6361b248f14SClaudio Fontana         break;
6371b248f14SClaudio Fontana     default:
6381b248f14SClaudio Fontana         ST0 = floatx80_pi;
6391b248f14SClaudio Fontana         break;
6401b248f14SClaudio Fontana     }
6411b248f14SClaudio Fontana }
6421b248f14SClaudio Fontana 
6431b248f14SClaudio Fontana void helper_fldlg2_ST0(CPUX86State *env)
6441b248f14SClaudio Fontana {
6451b248f14SClaudio Fontana     switch (env->fpuc & FPU_RC_MASK) {
6461b248f14SClaudio Fontana     case FPU_RC_DOWN:
6471b248f14SClaudio Fontana     case FPU_RC_CHOP:
6481b248f14SClaudio Fontana         ST0 = floatx80_lg2_d;
6491b248f14SClaudio Fontana         break;
6501b248f14SClaudio Fontana     default:
6511b248f14SClaudio Fontana         ST0 = floatx80_lg2;
6521b248f14SClaudio Fontana         break;
6531b248f14SClaudio Fontana     }
6541b248f14SClaudio Fontana }
6551b248f14SClaudio Fontana 
6561b248f14SClaudio Fontana void helper_fldln2_ST0(CPUX86State *env)
6571b248f14SClaudio Fontana {
6581b248f14SClaudio Fontana     switch (env->fpuc & FPU_RC_MASK) {
6591b248f14SClaudio Fontana     case FPU_RC_DOWN:
6601b248f14SClaudio Fontana     case FPU_RC_CHOP:
6611b248f14SClaudio Fontana         ST0 = floatx80_ln2_d;
6621b248f14SClaudio Fontana         break;
6631b248f14SClaudio Fontana     default:
6641b248f14SClaudio Fontana         ST0 = floatx80_ln2;
6651b248f14SClaudio Fontana         break;
6661b248f14SClaudio Fontana     }
6671b248f14SClaudio Fontana }
6681b248f14SClaudio Fontana 
6691b248f14SClaudio Fontana void helper_fldz_ST0(CPUX86State *env)
6701b248f14SClaudio Fontana {
6711b248f14SClaudio Fontana     ST0 = floatx80_zero;
6721b248f14SClaudio Fontana }
6731b248f14SClaudio Fontana 
6741b248f14SClaudio Fontana void helper_fldz_FT0(CPUX86State *env)
6751b248f14SClaudio Fontana {
6761b248f14SClaudio Fontana     FT0 = floatx80_zero;
6771b248f14SClaudio Fontana }
6781b248f14SClaudio Fontana 
6791b248f14SClaudio Fontana uint32_t helper_fnstsw(CPUX86State *env)
6801b248f14SClaudio Fontana {
6811b248f14SClaudio Fontana     return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
6821b248f14SClaudio Fontana }
6831b248f14SClaudio Fontana 
6841b248f14SClaudio Fontana uint32_t helper_fnstcw(CPUX86State *env)
6851b248f14SClaudio Fontana {
6861b248f14SClaudio Fontana     return env->fpuc;
6871b248f14SClaudio Fontana }
6881b248f14SClaudio Fontana 
689*314d3effSPaolo Bonzini static void set_x86_rounding_mode(unsigned mode, float_status *status)
690*314d3effSPaolo Bonzini {
691*314d3effSPaolo Bonzini     static FloatRoundMode x86_round_mode[4] = {
692*314d3effSPaolo Bonzini         float_round_nearest_even,
693*314d3effSPaolo Bonzini         float_round_down,
694*314d3effSPaolo Bonzini         float_round_up,
695*314d3effSPaolo Bonzini         float_round_to_zero
696*314d3effSPaolo Bonzini     };
697*314d3effSPaolo Bonzini     assert(mode < ARRAY_SIZE(x86_round_mode));
698*314d3effSPaolo Bonzini     set_float_rounding_mode(x86_round_mode[mode], status);
699*314d3effSPaolo Bonzini }
700*314d3effSPaolo Bonzini 
7011b248f14SClaudio Fontana void update_fp_status(CPUX86State *env)
7021b248f14SClaudio Fontana {
703*314d3effSPaolo Bonzini     int rnd_mode;
7048da5f1dbSRichard Henderson     FloatX80RoundPrec rnd_prec;
7051b248f14SClaudio Fontana 
7061b248f14SClaudio Fontana     /* set rounding mode */
707*314d3effSPaolo Bonzini     rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT;
708*314d3effSPaolo Bonzini     set_x86_rounding_mode(rnd_mode, &env->fp_status);
7098da5f1dbSRichard Henderson 
7101b248f14SClaudio Fontana     switch ((env->fpuc >> 8) & 3) {
7111b248f14SClaudio Fontana     case 0:
7128da5f1dbSRichard Henderson         rnd_prec = floatx80_precision_s;
7131b248f14SClaudio Fontana         break;
7141b248f14SClaudio Fontana     case 2:
7158da5f1dbSRichard Henderson         rnd_prec = floatx80_precision_d;
7161b248f14SClaudio Fontana         break;
7171b248f14SClaudio Fontana     case 3:
7181b248f14SClaudio Fontana     default:
7198da5f1dbSRichard Henderson         rnd_prec = floatx80_precision_x;
7201b248f14SClaudio Fontana         break;
7211b248f14SClaudio Fontana     }
7228da5f1dbSRichard Henderson     set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
7231b248f14SClaudio Fontana }
7241b248f14SClaudio Fontana 
7251b248f14SClaudio Fontana void helper_fldcw(CPUX86State *env, uint32_t val)
7261b248f14SClaudio Fontana {
7271b248f14SClaudio Fontana     cpu_set_fpuc(env, val);
7281b248f14SClaudio Fontana }
7291b248f14SClaudio Fontana 
7301b248f14SClaudio Fontana void helper_fclex(CPUX86State *env)
7311b248f14SClaudio Fontana {
7321b248f14SClaudio Fontana     env->fpus &= 0x7f00;
7331b248f14SClaudio Fontana }
7341b248f14SClaudio Fontana 
7351b248f14SClaudio Fontana void helper_fwait(CPUX86State *env)
7361b248f14SClaudio Fontana {
7371b248f14SClaudio Fontana     if (env->fpus & FPUS_SE) {
7381b248f14SClaudio Fontana         fpu_raise_exception(env, GETPC());
7391b248f14SClaudio Fontana     }
7401b248f14SClaudio Fontana }
7411b248f14SClaudio Fontana 
742bbdda9b7SRichard Henderson static void do_fninit(CPUX86State *env)
7431b248f14SClaudio Fontana {
7441b248f14SClaudio Fontana     env->fpus = 0;
7451b248f14SClaudio Fontana     env->fpstt = 0;
74684abdd7dSZiqiao Kong     env->fpcs = 0;
74784abdd7dSZiqiao Kong     env->fpds = 0;
74884abdd7dSZiqiao Kong     env->fpip = 0;
74984abdd7dSZiqiao Kong     env->fpdp = 0;
7501b248f14SClaudio Fontana     cpu_set_fpuc(env, 0x37f);
7511b248f14SClaudio Fontana     env->fptags[0] = 1;
7521b248f14SClaudio Fontana     env->fptags[1] = 1;
7531b248f14SClaudio Fontana     env->fptags[2] = 1;
7541b248f14SClaudio Fontana     env->fptags[3] = 1;
7551b248f14SClaudio Fontana     env->fptags[4] = 1;
7561b248f14SClaudio Fontana     env->fptags[5] = 1;
7571b248f14SClaudio Fontana     env->fptags[6] = 1;
7581b248f14SClaudio Fontana     env->fptags[7] = 1;
7591b248f14SClaudio Fontana }
7601b248f14SClaudio Fontana 
761bbdda9b7SRichard Henderson void helper_fninit(CPUX86State *env)
762bbdda9b7SRichard Henderson {
763bbdda9b7SRichard Henderson     do_fninit(env);
764bbdda9b7SRichard Henderson }
765bbdda9b7SRichard Henderson 
7661b248f14SClaudio Fontana /* BCD ops */
7671b248f14SClaudio Fontana 
7681b248f14SClaudio Fontana void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
7691b248f14SClaudio Fontana {
7701b248f14SClaudio Fontana     floatx80 tmp;
7711b248f14SClaudio Fontana     uint64_t val;
7721b248f14SClaudio Fontana     unsigned int v;
7731b248f14SClaudio Fontana     int i;
7741b248f14SClaudio Fontana 
7751b248f14SClaudio Fontana     val = 0;
7761b248f14SClaudio Fontana     for (i = 8; i >= 0; i--) {
7771b248f14SClaudio Fontana         v = cpu_ldub_data_ra(env, ptr + i, GETPC());
7781b248f14SClaudio Fontana         val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
7791b248f14SClaudio Fontana     }
7801b248f14SClaudio Fontana     tmp = int64_to_floatx80(val, &env->fp_status);
7811b248f14SClaudio Fontana     if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
7821b248f14SClaudio Fontana         tmp = floatx80_chs(tmp);
7831b248f14SClaudio Fontana     }
7841b248f14SClaudio Fontana     fpush(env);
7851b248f14SClaudio Fontana     ST0 = tmp;
7861b248f14SClaudio Fontana }
7871b248f14SClaudio Fontana 
7881b248f14SClaudio Fontana void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
7891b248f14SClaudio Fontana {
7901b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
7911b248f14SClaudio Fontana     int v;
7921b248f14SClaudio Fontana     target_ulong mem_ref, mem_end;
7931b248f14SClaudio Fontana     int64_t val;
7941b248f14SClaudio Fontana     CPU_LDoubleU temp;
7951b248f14SClaudio Fontana 
7961b248f14SClaudio Fontana     temp.d = ST0;
7971b248f14SClaudio Fontana 
7981b248f14SClaudio Fontana     val = floatx80_to_int64(ST0, &env->fp_status);
7991b248f14SClaudio Fontana     mem_ref = ptr;
8001b248f14SClaudio Fontana     if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
8011b248f14SClaudio Fontana         set_float_exception_flags(float_flag_invalid, &env->fp_status);
8021b248f14SClaudio Fontana         while (mem_ref < ptr + 7) {
8031b248f14SClaudio Fontana             cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
8041b248f14SClaudio Fontana         }
8051b248f14SClaudio Fontana         cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
8061b248f14SClaudio Fontana         cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
8071b248f14SClaudio Fontana         cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
8081b248f14SClaudio Fontana         merge_exception_flags(env, old_flags);
8091b248f14SClaudio Fontana         return;
8101b248f14SClaudio Fontana     }
8111b248f14SClaudio Fontana     mem_end = mem_ref + 9;
8121b248f14SClaudio Fontana     if (SIGND(temp)) {
8131b248f14SClaudio Fontana         cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
8141b248f14SClaudio Fontana         val = -val;
8151b248f14SClaudio Fontana     } else {
8161b248f14SClaudio Fontana         cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
8171b248f14SClaudio Fontana     }
8181b248f14SClaudio Fontana     while (mem_ref < mem_end) {
8191b248f14SClaudio Fontana         if (val == 0) {
8201b248f14SClaudio Fontana             break;
8211b248f14SClaudio Fontana         }
8221b248f14SClaudio Fontana         v = val % 100;
8231b248f14SClaudio Fontana         val = val / 100;
8241b248f14SClaudio Fontana         v = ((v / 10) << 4) | (v % 10);
8251b248f14SClaudio Fontana         cpu_stb_data_ra(env, mem_ref++, v, GETPC());
8261b248f14SClaudio Fontana     }
8271b248f14SClaudio Fontana     while (mem_ref < mem_end) {
8281b248f14SClaudio Fontana         cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
8291b248f14SClaudio Fontana     }
8301b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
8311b248f14SClaudio Fontana }
8321b248f14SClaudio Fontana 
8331b248f14SClaudio Fontana /* 128-bit significand of log(2).  */
8341b248f14SClaudio Fontana #define ln2_sig_high 0xb17217f7d1cf79abULL
8351b248f14SClaudio Fontana #define ln2_sig_low 0xc9e3b39803f2f6afULL
8361b248f14SClaudio Fontana 
8371b248f14SClaudio Fontana /*
8381b248f14SClaudio Fontana  * Polynomial coefficients for an approximation to (2^x - 1) / x, on
8391b248f14SClaudio Fontana  * the interval [-1/64, 1/64].
8401b248f14SClaudio Fontana  */
8411b248f14SClaudio Fontana #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
8421b248f14SClaudio Fontana #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
8431b248f14SClaudio Fontana #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
8441b248f14SClaudio Fontana #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
8451b248f14SClaudio Fontana #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
8461b248f14SClaudio Fontana #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
8471b248f14SClaudio Fontana #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
8481b248f14SClaudio Fontana #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
8491b248f14SClaudio Fontana #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
8501b248f14SClaudio Fontana 
8511b248f14SClaudio Fontana struct f2xm1_data {
8521b248f14SClaudio Fontana     /*
8531b248f14SClaudio Fontana      * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
8541b248f14SClaudio Fontana      * are very close to exact floatx80 values.
8551b248f14SClaudio Fontana      */
8561b248f14SClaudio Fontana     floatx80 t;
8571b248f14SClaudio Fontana     /* The value of 2^t.  */
8581b248f14SClaudio Fontana     floatx80 exp2;
8591b248f14SClaudio Fontana     /* The value of 2^t - 1.  */
8601b248f14SClaudio Fontana     floatx80 exp2m1;
8611b248f14SClaudio Fontana };
8621b248f14SClaudio Fontana 
8631b248f14SClaudio Fontana static const struct f2xm1_data f2xm1_table[65] = {
8641b248f14SClaudio Fontana     { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
8651b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
8661b248f14SClaudio Fontana       make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
8671b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
8681b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
8691b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
8701b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
8711b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
8721b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
8731b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
8741b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
8751b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
8761b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
8771b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
8781b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
8791b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
8801b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
8811b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
8821b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
8831b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
8841b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
8851b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
8861b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
8871b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
8881b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
8891b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
8901b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
8911b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
8921b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
8931b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
8941b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
8951b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
8961b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
8971b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
8981b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
8991b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
9001b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
9011b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
9021b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
9031b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
9041b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
9051b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
9061b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
9071b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
9081b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
9091b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
9101b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
9111b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
9121b248f14SClaudio Fontana     { make_floatx80_init(0xbffe, 0x800000000000227dULL),
9131b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
9141b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
9151b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
9161b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
9171b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
9181b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
9191b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
9201b248f14SClaudio Fontana       make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
9211b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
9221b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
9231b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
9241b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
9251b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
9261b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
9271b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
9281b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
9291b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
9301b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
9311b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
9321b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
9331b248f14SClaudio Fontana     { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
9341b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
9351b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
9361b248f14SClaudio Fontana     { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
9371b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
9381b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
9391b248f14SClaudio Fontana     { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
9401b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
9411b248f14SClaudio Fontana       make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
9421b248f14SClaudio Fontana     { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
9431b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
9441b248f14SClaudio Fontana       make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
9451b248f14SClaudio Fontana     { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
9461b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
9471b248f14SClaudio Fontana       make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
9481b248f14SClaudio Fontana     { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
9491b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
9501b248f14SClaudio Fontana       make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
9511b248f14SClaudio Fontana     { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
9521b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
9531b248f14SClaudio Fontana       make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
9541b248f14SClaudio Fontana     { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
9551b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
9561b248f14SClaudio Fontana       make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
9571b248f14SClaudio Fontana     { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
9581b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
9591b248f14SClaudio Fontana       make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
9601b248f14SClaudio Fontana     { floatx80_zero_init,
9611b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x8000000000000000ULL),
9621b248f14SClaudio Fontana       floatx80_zero_init },
9631b248f14SClaudio Fontana     { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
9641b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
9651b248f14SClaudio Fontana       make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
9661b248f14SClaudio Fontana     { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
9671b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
9681b248f14SClaudio Fontana       make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
9691b248f14SClaudio Fontana     { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
9701b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
9711b248f14SClaudio Fontana       make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
9721b248f14SClaudio Fontana     { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
9731b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
9741b248f14SClaudio Fontana       make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
9751b248f14SClaudio Fontana     { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
9761b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
9771b248f14SClaudio Fontana       make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
9781b248f14SClaudio Fontana     { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
9791b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
9801b248f14SClaudio Fontana       make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
9811b248f14SClaudio Fontana     { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
9821b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
9831b248f14SClaudio Fontana       make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
9841b248f14SClaudio Fontana     { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
9851b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
9861b248f14SClaudio Fontana       make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
9871b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
9881b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
9891b248f14SClaudio Fontana       make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
9901b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
9911b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
9921b248f14SClaudio Fontana       make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
9931b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
9941b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
9951b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
9961b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
9971b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
9981b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
9991b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
10001b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
10011b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
10021b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
10031b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
10041b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
10051b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
10061b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
10071b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
10081b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
10091b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
10101b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
10111b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
10121b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
10131b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
10141b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
10151b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
10161b248f14SClaudio Fontana       make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
10171b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
10181b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
10191b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
10201b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
10211b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
10221b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
10231b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
10241b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
10251b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
10261b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
10271b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
10281b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
10291b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
10301b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
10311b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
10321b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
10331b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
10341b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
10351b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
10361b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
10371b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
10381b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
10391b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
10401b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
10411b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
10421b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
10431b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
10441b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
10451b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
10461b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
10471b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
10481b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
10491b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
10501b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
10511b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
10521b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
10531b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
10541b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
10551b248f14SClaudio Fontana       make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
10561b248f14SClaudio Fontana     { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
10571b248f14SClaudio Fontana       make_floatx80_init(0x4000, 0x8000000000000000ULL),
10581b248f14SClaudio Fontana       make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
10591b248f14SClaudio Fontana };
10601b248f14SClaudio Fontana 
10611b248f14SClaudio Fontana void helper_f2xm1(CPUX86State *env)
10621b248f14SClaudio Fontana {
10631b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
10641b248f14SClaudio Fontana     uint64_t sig = extractFloatx80Frac(ST0);
10651b248f14SClaudio Fontana     int32_t exp = extractFloatx80Exp(ST0);
10661b248f14SClaudio Fontana     bool sign = extractFloatx80Sign(ST0);
10671b248f14SClaudio Fontana 
10681b248f14SClaudio Fontana     if (floatx80_invalid_encoding(ST0)) {
10691b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
10701b248f14SClaudio Fontana         ST0 = floatx80_default_nan(&env->fp_status);
10711b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST0)) {
10721b248f14SClaudio Fontana         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
10731b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
10741b248f14SClaudio Fontana             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
10751b248f14SClaudio Fontana         }
10761b248f14SClaudio Fontana     } else if (exp > 0x3fff ||
10771b248f14SClaudio Fontana                (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
10781b248f14SClaudio Fontana         /* Out of range for the instruction, treat as invalid.  */
10791b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
10801b248f14SClaudio Fontana         ST0 = floatx80_default_nan(&env->fp_status);
10811b248f14SClaudio Fontana     } else if (exp == 0x3fff) {
10821b248f14SClaudio Fontana         /* Argument 1 or -1, exact result 1 or -0.5.  */
10831b248f14SClaudio Fontana         if (sign) {
10841b248f14SClaudio Fontana             ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
10851b248f14SClaudio Fontana         }
10861b248f14SClaudio Fontana     } else if (exp < 0x3fb0) {
10871b248f14SClaudio Fontana         if (!floatx80_is_zero(ST0)) {
10881b248f14SClaudio Fontana             /*
10891b248f14SClaudio Fontana              * Multiplying the argument by an extra-precision version
10901b248f14SClaudio Fontana              * of log(2) is sufficiently precise.  Zero arguments are
10911b248f14SClaudio Fontana              * returned unchanged.
10921b248f14SClaudio Fontana              */
10931b248f14SClaudio Fontana             uint64_t sig0, sig1, sig2;
10941b248f14SClaudio Fontana             if (exp == 0) {
10951b248f14SClaudio Fontana                 normalizeFloatx80Subnormal(sig, &exp, &sig);
10961b248f14SClaudio Fontana             }
10971b248f14SClaudio Fontana             mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
10981b248f14SClaudio Fontana                             &sig2);
10991b248f14SClaudio Fontana             /* This result is inexact.  */
11001b248f14SClaudio Fontana             sig1 |= 1;
11018da5f1dbSRichard Henderson             ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
11028da5f1dbSRichard Henderson                                                 sign, exp, sig0, sig1,
11031b248f14SClaudio Fontana                                                 &env->fp_status);
11041b248f14SClaudio Fontana         }
11051b248f14SClaudio Fontana     } else {
11061b248f14SClaudio Fontana         floatx80 tmp, y, accum;
11071b248f14SClaudio Fontana         bool asign, bsign;
11081b248f14SClaudio Fontana         int32_t n, aexp, bexp;
11091b248f14SClaudio Fontana         uint64_t asig0, asig1, asig2, bsig0, bsig1;
11101b248f14SClaudio Fontana         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
11118da5f1dbSRichard Henderson         FloatX80RoundPrec save_prec =
11128da5f1dbSRichard Henderson             env->fp_status.floatx80_rounding_precision;
11131b248f14SClaudio Fontana         env->fp_status.float_rounding_mode = float_round_nearest_even;
11148da5f1dbSRichard Henderson         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
11151b248f14SClaudio Fontana 
11161b248f14SClaudio Fontana         /* Find the nearest multiple of 1/32 to the argument.  */
11171b248f14SClaudio Fontana         tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
11181b248f14SClaudio Fontana         n = 32 + floatx80_to_int32(tmp, &env->fp_status);
11191b248f14SClaudio Fontana         y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
11201b248f14SClaudio Fontana 
11211b248f14SClaudio Fontana         if (floatx80_is_zero(y)) {
11221b248f14SClaudio Fontana             /*
11231b248f14SClaudio Fontana              * Use the value of 2^t - 1 from the table, to avoid
11241b248f14SClaudio Fontana              * needing to special-case zero as a result of
11251b248f14SClaudio Fontana              * multiplication below.
11261b248f14SClaudio Fontana              */
11271b248f14SClaudio Fontana             ST0 = f2xm1_table[n].t;
11281b248f14SClaudio Fontana             set_float_exception_flags(float_flag_inexact, &env->fp_status);
11291b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = save_mode;
11301b248f14SClaudio Fontana         } else {
11311b248f14SClaudio Fontana             /*
11321b248f14SClaudio Fontana              * Compute the lower parts of a polynomial expansion for
11331b248f14SClaudio Fontana              * (2^y - 1) / y.
11341b248f14SClaudio Fontana              */
11351b248f14SClaudio Fontana             accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
11361b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
11371b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11381b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
11391b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11401b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
11411b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11421b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
11431b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11441b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
11451b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11461b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
11471b248f14SClaudio Fontana             accum = floatx80_mul(accum, y, &env->fp_status);
11481b248f14SClaudio Fontana             accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
11491b248f14SClaudio Fontana 
11501b248f14SClaudio Fontana             /*
11511b248f14SClaudio Fontana              * The full polynomial expansion is f2xm1_coeff_0 + accum
11521b248f14SClaudio Fontana              * (where accum has much lower magnitude, and so, in
11531b248f14SClaudio Fontana              * particular, carry out of the addition is not possible).
11541b248f14SClaudio Fontana              * (This expansion is only accurate to about 70 bits, not
11551b248f14SClaudio Fontana              * 128 bits.)
11561b248f14SClaudio Fontana              */
11571b248f14SClaudio Fontana             aexp = extractFloatx80Exp(f2xm1_coeff_0);
11581b248f14SClaudio Fontana             asign = extractFloatx80Sign(f2xm1_coeff_0);
11591b248f14SClaudio Fontana             shift128RightJamming(extractFloatx80Frac(accum), 0,
11601b248f14SClaudio Fontana                                  aexp - extractFloatx80Exp(accum),
11611b248f14SClaudio Fontana                                  &asig0, &asig1);
11621b248f14SClaudio Fontana             bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
11631b248f14SClaudio Fontana             bsig1 = 0;
11641b248f14SClaudio Fontana             if (asign == extractFloatx80Sign(accum)) {
11651b248f14SClaudio Fontana                 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
11661b248f14SClaudio Fontana             } else {
11671b248f14SClaudio Fontana                 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
11681b248f14SClaudio Fontana             }
11691b248f14SClaudio Fontana             /* And thus compute an approximation to 2^y - 1.  */
11701b248f14SClaudio Fontana             mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
11711b248f14SClaudio Fontana                             &asig0, &asig1, &asig2);
11721b248f14SClaudio Fontana             aexp += extractFloatx80Exp(y) - 0x3ffe;
11731b248f14SClaudio Fontana             asign ^= extractFloatx80Sign(y);
11741b248f14SClaudio Fontana             if (n != 32) {
11751b248f14SClaudio Fontana                 /*
11761b248f14SClaudio Fontana                  * Multiply this by the precomputed value of 2^t and
11771b248f14SClaudio Fontana                  * add that of 2^t - 1.
11781b248f14SClaudio Fontana                  */
11791b248f14SClaudio Fontana                 mul128By64To192(asig0, asig1,
11801b248f14SClaudio Fontana                                 extractFloatx80Frac(f2xm1_table[n].exp2),
11811b248f14SClaudio Fontana                                 &asig0, &asig1, &asig2);
11821b248f14SClaudio Fontana                 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
11831b248f14SClaudio Fontana                 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
11841b248f14SClaudio Fontana                 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
11851b248f14SClaudio Fontana                 bsig1 = 0;
11861b248f14SClaudio Fontana                 if (bexp < aexp) {
11871b248f14SClaudio Fontana                     shift128RightJamming(bsig0, bsig1, aexp - bexp,
11881b248f14SClaudio Fontana                                          &bsig0, &bsig1);
11891b248f14SClaudio Fontana                 } else if (aexp < bexp) {
11901b248f14SClaudio Fontana                     shift128RightJamming(asig0, asig1, bexp - aexp,
11911b248f14SClaudio Fontana                                          &asig0, &asig1);
11921b248f14SClaudio Fontana                     aexp = bexp;
11931b248f14SClaudio Fontana                 }
11941b248f14SClaudio Fontana                 /* The sign of 2^t - 1 is always that of the result.  */
11951b248f14SClaudio Fontana                 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
11961b248f14SClaudio Fontana                 if (asign == bsign) {
11971b248f14SClaudio Fontana                     /* Avoid possible carry out of the addition.  */
11981b248f14SClaudio Fontana                     shift128RightJamming(asig0, asig1, 1,
11991b248f14SClaudio Fontana                                          &asig0, &asig1);
12001b248f14SClaudio Fontana                     shift128RightJamming(bsig0, bsig1, 1,
12011b248f14SClaudio Fontana                                          &bsig0, &bsig1);
12021b248f14SClaudio Fontana                     ++aexp;
12031b248f14SClaudio Fontana                     add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
12041b248f14SClaudio Fontana                 } else {
12051b248f14SClaudio Fontana                     sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
12061b248f14SClaudio Fontana                     asign = bsign;
12071b248f14SClaudio Fontana                 }
12081b248f14SClaudio Fontana             }
12091b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = save_mode;
12101b248f14SClaudio Fontana             /* This result is inexact.  */
12111b248f14SClaudio Fontana             asig1 |= 1;
12128da5f1dbSRichard Henderson             ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
12138da5f1dbSRichard Henderson                                                 asign, aexp, asig0, asig1,
12141b248f14SClaudio Fontana                                                 &env->fp_status);
12151b248f14SClaudio Fontana         }
12161b248f14SClaudio Fontana 
12171b248f14SClaudio Fontana         env->fp_status.floatx80_rounding_precision = save_prec;
12181b248f14SClaudio Fontana     }
12191b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
12201b248f14SClaudio Fontana }
12211b248f14SClaudio Fontana 
12221b248f14SClaudio Fontana void helper_fptan(CPUX86State *env)
12231b248f14SClaudio Fontana {
12241b248f14SClaudio Fontana     double fptemp = floatx80_to_double(env, ST0);
12251b248f14SClaudio Fontana 
12261b248f14SClaudio Fontana     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
12271b248f14SClaudio Fontana         env->fpus |= 0x400;
12281b248f14SClaudio Fontana     } else {
12291b248f14SClaudio Fontana         fptemp = tan(fptemp);
12301b248f14SClaudio Fontana         ST0 = double_to_floatx80(env, fptemp);
12311b248f14SClaudio Fontana         fpush(env);
12321b248f14SClaudio Fontana         ST0 = floatx80_one;
12331b248f14SClaudio Fontana         env->fpus &= ~0x400; /* C2 <-- 0 */
12341b248f14SClaudio Fontana         /* the above code is for |arg| < 2**52 only */
12351b248f14SClaudio Fontana     }
12361b248f14SClaudio Fontana }
12371b248f14SClaudio Fontana 
12381b248f14SClaudio Fontana /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision.  */
12391b248f14SClaudio Fontana #define pi_4_exp 0x3ffe
12401b248f14SClaudio Fontana #define pi_4_sig_high 0xc90fdaa22168c234ULL
12411b248f14SClaudio Fontana #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
12421b248f14SClaudio Fontana #define pi_2_exp 0x3fff
12431b248f14SClaudio Fontana #define pi_2_sig_high 0xc90fdaa22168c234ULL
12441b248f14SClaudio Fontana #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
12451b248f14SClaudio Fontana #define pi_34_exp 0x4000
12461b248f14SClaudio Fontana #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
12471b248f14SClaudio Fontana #define pi_34_sig_low 0x9394c9e8a0a5159dULL
12481b248f14SClaudio Fontana #define pi_exp 0x4000
12491b248f14SClaudio Fontana #define pi_sig_high 0xc90fdaa22168c234ULL
12501b248f14SClaudio Fontana #define pi_sig_low 0xc4c6628b80dc1cd1ULL
12511b248f14SClaudio Fontana 
12521b248f14SClaudio Fontana /*
12531b248f14SClaudio Fontana  * Polynomial coefficients for an approximation to atan(x), with only
12541b248f14SClaudio Fontana  * odd powers of x used, for x in the interval [-1/16, 1/16].  (Unlike
12551b248f14SClaudio Fontana  * for some other approximations, no low part is needed for the first
12561b248f14SClaudio Fontana  * coefficient here to achieve a sufficiently accurate result, because
12571b248f14SClaudio Fontana  * the coefficient in this minimax approximation is very close to
12581b248f14SClaudio Fontana  * exactly 1.)
12591b248f14SClaudio Fontana  */
12601b248f14SClaudio Fontana #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
12611b248f14SClaudio Fontana #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
12621b248f14SClaudio Fontana #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
12631b248f14SClaudio Fontana #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
12641b248f14SClaudio Fontana #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
12651b248f14SClaudio Fontana #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
12661b248f14SClaudio Fontana #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
12671b248f14SClaudio Fontana 
12681b248f14SClaudio Fontana struct fpatan_data {
12691b248f14SClaudio Fontana     /* High and low parts of atan(x).  */
12701b248f14SClaudio Fontana     floatx80 atan_high, atan_low;
12711b248f14SClaudio Fontana };
12721b248f14SClaudio Fontana 
12731b248f14SClaudio Fontana static const struct fpatan_data fpatan_table[9] = {
12741b248f14SClaudio Fontana     { floatx80_zero_init,
12751b248f14SClaudio Fontana       floatx80_zero_init },
12761b248f14SClaudio Fontana     { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
12771b248f14SClaudio Fontana       make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
12781b248f14SClaudio Fontana     { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
12791b248f14SClaudio Fontana       make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
12801b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
12811b248f14SClaudio Fontana       make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
12821b248f14SClaudio Fontana     { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
12831b248f14SClaudio Fontana       make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
12841b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
12851b248f14SClaudio Fontana       make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
12861b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
12871b248f14SClaudio Fontana       make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
12881b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
12891b248f14SClaudio Fontana       make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
12901b248f14SClaudio Fontana     { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
12911b248f14SClaudio Fontana       make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
12921b248f14SClaudio Fontana };
12931b248f14SClaudio Fontana 
12941b248f14SClaudio Fontana void helper_fpatan(CPUX86State *env)
12951b248f14SClaudio Fontana {
12961b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
12971b248f14SClaudio Fontana     uint64_t arg0_sig = extractFloatx80Frac(ST0);
12981b248f14SClaudio Fontana     int32_t arg0_exp = extractFloatx80Exp(ST0);
12991b248f14SClaudio Fontana     bool arg0_sign = extractFloatx80Sign(ST0);
13001b248f14SClaudio Fontana     uint64_t arg1_sig = extractFloatx80Frac(ST1);
13011b248f14SClaudio Fontana     int32_t arg1_exp = extractFloatx80Exp(ST1);
13021b248f14SClaudio Fontana     bool arg1_sign = extractFloatx80Sign(ST1);
13031b248f14SClaudio Fontana 
13041b248f14SClaudio Fontana     if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
13051b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
13061b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST0, &env->fp_status);
13071b248f14SClaudio Fontana     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
13081b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
13091b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
13101b248f14SClaudio Fontana     } else if (floatx80_invalid_encoding(ST0) ||
13111b248f14SClaudio Fontana                floatx80_invalid_encoding(ST1)) {
13121b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
13131b248f14SClaudio Fontana         ST1 = floatx80_default_nan(&env->fp_status);
13141b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST0)) {
13151b248f14SClaudio Fontana         ST1 = ST0;
13161b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST1)) {
13171b248f14SClaudio Fontana         /* Pass this NaN through.  */
13181b248f14SClaudio Fontana     } else if (floatx80_is_zero(ST1) && !arg0_sign) {
13191b248f14SClaudio Fontana         /* Pass this zero through.  */
13201b248f14SClaudio Fontana     } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
13211b248f14SClaudio Fontana                  arg0_exp - arg1_exp >= 80) &&
13221b248f14SClaudio Fontana                !arg0_sign) {
13231b248f14SClaudio Fontana         /*
13241b248f14SClaudio Fontana          * Dividing ST1 by ST0 gives the correct result up to
13251b248f14SClaudio Fontana          * rounding, and avoids spurious underflow exceptions that
13261b248f14SClaudio Fontana          * might result from passing some small values through the
13271b248f14SClaudio Fontana          * polynomial approximation, but if a finite nonzero result of
13281b248f14SClaudio Fontana          * division is exact, the result of fpatan is still inexact
13291b248f14SClaudio Fontana          * (and underflowing where appropriate).
13301b248f14SClaudio Fontana          */
13318da5f1dbSRichard Henderson         FloatX80RoundPrec save_prec =
13328da5f1dbSRichard Henderson             env->fp_status.floatx80_rounding_precision;
13338da5f1dbSRichard Henderson         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
13341b248f14SClaudio Fontana         ST1 = floatx80_div(ST1, ST0, &env->fp_status);
13351b248f14SClaudio Fontana         env->fp_status.floatx80_rounding_precision = save_prec;
13361b248f14SClaudio Fontana         if (!floatx80_is_zero(ST1) &&
13371b248f14SClaudio Fontana             !(get_float_exception_flags(&env->fp_status) &
13381b248f14SClaudio Fontana               float_flag_inexact)) {
13391b248f14SClaudio Fontana             /*
13401b248f14SClaudio Fontana              * The mathematical result is very slightly closer to zero
13411b248f14SClaudio Fontana              * than this exact result.  Round a value with the
13421b248f14SClaudio Fontana              * significand adjusted accordingly to get the correct
13431b248f14SClaudio Fontana              * exceptions, and possibly an adjusted result depending
13441b248f14SClaudio Fontana              * on the rounding mode.
13451b248f14SClaudio Fontana              */
13461b248f14SClaudio Fontana             uint64_t sig = extractFloatx80Frac(ST1);
13471b248f14SClaudio Fontana             int32_t exp = extractFloatx80Exp(ST1);
13481b248f14SClaudio Fontana             bool sign = extractFloatx80Sign(ST1);
13491b248f14SClaudio Fontana             if (exp == 0) {
13501b248f14SClaudio Fontana                 normalizeFloatx80Subnormal(sig, &exp, &sig);
13511b248f14SClaudio Fontana             }
13528da5f1dbSRichard Henderson             ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
13538da5f1dbSRichard Henderson                                                 sign, exp, sig - 1,
13541b248f14SClaudio Fontana                                                 -1, &env->fp_status);
13551b248f14SClaudio Fontana         }
13561b248f14SClaudio Fontana     } else {
13571b248f14SClaudio Fontana         /* The result is inexact.  */
13581b248f14SClaudio Fontana         bool rsign = arg1_sign;
13591b248f14SClaudio Fontana         int32_t rexp;
13601b248f14SClaudio Fontana         uint64_t rsig0, rsig1;
13611b248f14SClaudio Fontana         if (floatx80_is_zero(ST1)) {
13621b248f14SClaudio Fontana             /*
13631b248f14SClaudio Fontana              * ST0 is negative.  The result is pi with the sign of
13641b248f14SClaudio Fontana              * ST1.
13651b248f14SClaudio Fontana              */
13661b248f14SClaudio Fontana             rexp = pi_exp;
13671b248f14SClaudio Fontana             rsig0 = pi_sig_high;
13681b248f14SClaudio Fontana             rsig1 = pi_sig_low;
13691b248f14SClaudio Fontana         } else if (floatx80_is_infinity(ST1)) {
13701b248f14SClaudio Fontana             if (floatx80_is_infinity(ST0)) {
13711b248f14SClaudio Fontana                 if (arg0_sign) {
13721b248f14SClaudio Fontana                     rexp = pi_34_exp;
13731b248f14SClaudio Fontana                     rsig0 = pi_34_sig_high;
13741b248f14SClaudio Fontana                     rsig1 = pi_34_sig_low;
13751b248f14SClaudio Fontana                 } else {
13761b248f14SClaudio Fontana                     rexp = pi_4_exp;
13771b248f14SClaudio Fontana                     rsig0 = pi_4_sig_high;
13781b248f14SClaudio Fontana                     rsig1 = pi_4_sig_low;
13791b248f14SClaudio Fontana                 }
13801b248f14SClaudio Fontana             } else {
13811b248f14SClaudio Fontana                 rexp = pi_2_exp;
13821b248f14SClaudio Fontana                 rsig0 = pi_2_sig_high;
13831b248f14SClaudio Fontana                 rsig1 = pi_2_sig_low;
13841b248f14SClaudio Fontana             }
13851b248f14SClaudio Fontana         } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
13861b248f14SClaudio Fontana             rexp = pi_2_exp;
13871b248f14SClaudio Fontana             rsig0 = pi_2_sig_high;
13881b248f14SClaudio Fontana             rsig1 = pi_2_sig_low;
13891b248f14SClaudio Fontana         } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
13901b248f14SClaudio Fontana             /* ST0 is negative.  */
13911b248f14SClaudio Fontana             rexp = pi_exp;
13921b248f14SClaudio Fontana             rsig0 = pi_sig_high;
13931b248f14SClaudio Fontana             rsig1 = pi_sig_low;
13941b248f14SClaudio Fontana         } else {
13951b248f14SClaudio Fontana             /*
13961b248f14SClaudio Fontana              * ST0 and ST1 are finite, nonzero and with exponents not
13971b248f14SClaudio Fontana              * too far apart.
13981b248f14SClaudio Fontana              */
13991b248f14SClaudio Fontana             int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
14001b248f14SClaudio Fontana             int32_t azexp, axexp;
14011b248f14SClaudio Fontana             bool adj_sub, ysign, zsign;
14021b248f14SClaudio Fontana             uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
14031b248f14SClaudio Fontana             uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
14041b248f14SClaudio Fontana             uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
14051b248f14SClaudio Fontana             uint64_t azsig0, azsig1;
14061b248f14SClaudio Fontana             uint64_t azsig2, azsig3, axsig0, axsig1;
14071b248f14SClaudio Fontana             floatx80 x8;
14081b248f14SClaudio Fontana             FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
14098da5f1dbSRichard Henderson             FloatX80RoundPrec save_prec =
14108da5f1dbSRichard Henderson                 env->fp_status.floatx80_rounding_precision;
14111b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = float_round_nearest_even;
14128da5f1dbSRichard Henderson             env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
14131b248f14SClaudio Fontana 
14141b248f14SClaudio Fontana             if (arg0_exp == 0) {
14151b248f14SClaudio Fontana                 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
14161b248f14SClaudio Fontana             }
14171b248f14SClaudio Fontana             if (arg1_exp == 0) {
14181b248f14SClaudio Fontana                 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
14191b248f14SClaudio Fontana             }
14201b248f14SClaudio Fontana             if (arg0_exp > arg1_exp ||
14211b248f14SClaudio Fontana                 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
14221b248f14SClaudio Fontana                 /* Work with abs(ST1) / abs(ST0).  */
14231b248f14SClaudio Fontana                 num_exp = arg1_exp;
14241b248f14SClaudio Fontana                 num_sig = arg1_sig;
14251b248f14SClaudio Fontana                 den_exp = arg0_exp;
14261b248f14SClaudio Fontana                 den_sig = arg0_sig;
14271b248f14SClaudio Fontana                 if (arg0_sign) {
14281b248f14SClaudio Fontana                     /* The result is subtracted from pi.  */
14291b248f14SClaudio Fontana                     adj_exp = pi_exp;
14301b248f14SClaudio Fontana                     adj_sig0 = pi_sig_high;
14311b248f14SClaudio Fontana                     adj_sig1 = pi_sig_low;
14321b248f14SClaudio Fontana                     adj_sub = true;
14331b248f14SClaudio Fontana                 } else {
14341b248f14SClaudio Fontana                     /* The result is used as-is.  */
14351b248f14SClaudio Fontana                     adj_exp = 0;
14361b248f14SClaudio Fontana                     adj_sig0 = 0;
14371b248f14SClaudio Fontana                     adj_sig1 = 0;
14381b248f14SClaudio Fontana                     adj_sub = false;
14391b248f14SClaudio Fontana                 }
14401b248f14SClaudio Fontana             } else {
14411b248f14SClaudio Fontana                 /* Work with abs(ST0) / abs(ST1).  */
14421b248f14SClaudio Fontana                 num_exp = arg0_exp;
14431b248f14SClaudio Fontana                 num_sig = arg0_sig;
14441b248f14SClaudio Fontana                 den_exp = arg1_exp;
14451b248f14SClaudio Fontana                 den_sig = arg1_sig;
14461b248f14SClaudio Fontana                 /* The result is added to or subtracted from pi/2.  */
14471b248f14SClaudio Fontana                 adj_exp = pi_2_exp;
14481b248f14SClaudio Fontana                 adj_sig0 = pi_2_sig_high;
14491b248f14SClaudio Fontana                 adj_sig1 = pi_2_sig_low;
14501b248f14SClaudio Fontana                 adj_sub = !arg0_sign;
14511b248f14SClaudio Fontana             }
14521b248f14SClaudio Fontana 
14531b248f14SClaudio Fontana             /*
14541b248f14SClaudio Fontana              * Compute x = num/den, where 0 < x <= 1 and x is not too
14551b248f14SClaudio Fontana              * small.
14561b248f14SClaudio Fontana              */
14571b248f14SClaudio Fontana             xexp = num_exp - den_exp + 0x3ffe;
14581b248f14SClaudio Fontana             remsig0 = num_sig;
14591b248f14SClaudio Fontana             remsig1 = 0;
14601b248f14SClaudio Fontana             if (den_sig <= remsig0) {
14611b248f14SClaudio Fontana                 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
14621b248f14SClaudio Fontana                 ++xexp;
14631b248f14SClaudio Fontana             }
14641b248f14SClaudio Fontana             xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
14651b248f14SClaudio Fontana             mul64To128(den_sig, xsig0, &msig0, &msig1);
14661b248f14SClaudio Fontana             sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
14671b248f14SClaudio Fontana             while ((int64_t) remsig0 < 0) {
14681b248f14SClaudio Fontana                 --xsig0;
14691b248f14SClaudio Fontana                 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
14701b248f14SClaudio Fontana             }
14711b248f14SClaudio Fontana             xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
14721b248f14SClaudio Fontana             /*
14731b248f14SClaudio Fontana              * No need to correct any estimation error in xsig1; even
14741b248f14SClaudio Fontana              * with such error, it is accurate enough.
14751b248f14SClaudio Fontana              */
14761b248f14SClaudio Fontana 
14771b248f14SClaudio Fontana             /*
14781b248f14SClaudio Fontana              * Split x as x = t + y, where t = n/8 is the nearest
14791b248f14SClaudio Fontana              * multiple of 1/8 to x.
14801b248f14SClaudio Fontana              */
14818da5f1dbSRichard Henderson             x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
14828da5f1dbSRichard Henderson                                                false, xexp + 3, xsig0,
14831b248f14SClaudio Fontana                                                xsig1, &env->fp_status);
14841b248f14SClaudio Fontana             n = floatx80_to_int32(x8, &env->fp_status);
14851b248f14SClaudio Fontana             if (n == 0) {
14861b248f14SClaudio Fontana                 ysign = false;
14871b248f14SClaudio Fontana                 yexp = xexp;
14881b248f14SClaudio Fontana                 ysig0 = xsig0;
14891b248f14SClaudio Fontana                 ysig1 = xsig1;
14901b248f14SClaudio Fontana                 texp = 0;
14911b248f14SClaudio Fontana                 tsig = 0;
14921b248f14SClaudio Fontana             } else {
14931b248f14SClaudio Fontana                 int shift = clz32(n) + 32;
14941b248f14SClaudio Fontana                 texp = 0x403b - shift;
14951b248f14SClaudio Fontana                 tsig = n;
14961b248f14SClaudio Fontana                 tsig <<= shift;
14971b248f14SClaudio Fontana                 if (texp == xexp) {
14981b248f14SClaudio Fontana                     sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
14991b248f14SClaudio Fontana                     if ((int64_t) ysig0 >= 0) {
15001b248f14SClaudio Fontana                         ysign = false;
15011b248f14SClaudio Fontana                         if (ysig0 == 0) {
15021b248f14SClaudio Fontana                             if (ysig1 == 0) {
15031b248f14SClaudio Fontana                                 yexp = 0;
15041b248f14SClaudio Fontana                             } else {
15051b248f14SClaudio Fontana                                 shift = clz64(ysig1) + 64;
15061b248f14SClaudio Fontana                                 yexp = xexp - shift;
15071b248f14SClaudio Fontana                                 shift128Left(ysig0, ysig1, shift,
15081b248f14SClaudio Fontana                                              &ysig0, &ysig1);
15091b248f14SClaudio Fontana                             }
15101b248f14SClaudio Fontana                         } else {
15111b248f14SClaudio Fontana                             shift = clz64(ysig0);
15121b248f14SClaudio Fontana                             yexp = xexp - shift;
15131b248f14SClaudio Fontana                             shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
15141b248f14SClaudio Fontana                         }
15151b248f14SClaudio Fontana                     } else {
15161b248f14SClaudio Fontana                         ysign = true;
15171b248f14SClaudio Fontana                         sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
15181b248f14SClaudio Fontana                         if (ysig0 == 0) {
15191b248f14SClaudio Fontana                             shift = clz64(ysig1) + 64;
15201b248f14SClaudio Fontana                         } else {
15211b248f14SClaudio Fontana                             shift = clz64(ysig0);
15221b248f14SClaudio Fontana                         }
15231b248f14SClaudio Fontana                         yexp = xexp - shift;
15241b248f14SClaudio Fontana                         shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
15251b248f14SClaudio Fontana                     }
15261b248f14SClaudio Fontana                 } else {
15271b248f14SClaudio Fontana                     /*
15281b248f14SClaudio Fontana                      * t's exponent must be greater than x's because t
15291b248f14SClaudio Fontana                      * is positive and the nearest multiple of 1/8 to
15301b248f14SClaudio Fontana                      * x, and if x has a greater exponent, the power
15311b248f14SClaudio Fontana                      * of 2 with that exponent is also a multiple of
15321b248f14SClaudio Fontana                      * 1/8.
15331b248f14SClaudio Fontana                      */
15341b248f14SClaudio Fontana                     uint64_t usig0, usig1;
15351b248f14SClaudio Fontana                     shift128RightJamming(xsig0, xsig1, texp - xexp,
15361b248f14SClaudio Fontana                                          &usig0, &usig1);
15371b248f14SClaudio Fontana                     ysign = true;
15381b248f14SClaudio Fontana                     sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
15391b248f14SClaudio Fontana                     if (ysig0 == 0) {
15401b248f14SClaudio Fontana                         shift = clz64(ysig1) + 64;
15411b248f14SClaudio Fontana                     } else {
15421b248f14SClaudio Fontana                         shift = clz64(ysig0);
15431b248f14SClaudio Fontana                     }
15441b248f14SClaudio Fontana                     yexp = texp - shift;
15451b248f14SClaudio Fontana                     shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
15461b248f14SClaudio Fontana                 }
15471b248f14SClaudio Fontana             }
15481b248f14SClaudio Fontana 
15491b248f14SClaudio Fontana             /*
15501b248f14SClaudio Fontana              * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
15511b248f14SClaudio Fontana              * arctan(z).
15521b248f14SClaudio Fontana              */
15531b248f14SClaudio Fontana             zsign = ysign;
15541b248f14SClaudio Fontana             if (texp == 0 || yexp == 0) {
15551b248f14SClaudio Fontana                 zexp = yexp;
15561b248f14SClaudio Fontana                 zsig0 = ysig0;
15571b248f14SClaudio Fontana                 zsig1 = ysig1;
15581b248f14SClaudio Fontana             } else {
15591b248f14SClaudio Fontana                 /*
15601b248f14SClaudio Fontana                  * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
15611b248f14SClaudio Fontana                  */
15621b248f14SClaudio Fontana                 int32_t dexp = texp + xexp - 0x3ffe;
15631b248f14SClaudio Fontana                 uint64_t dsig0, dsig1, dsig2;
15641b248f14SClaudio Fontana                 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
15651b248f14SClaudio Fontana                 /*
15661b248f14SClaudio Fontana                  * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
15671b248f14SClaudio Fontana                  * bit).  Add 1 to produce the denominator 1+tx.
15681b248f14SClaudio Fontana                  */
15691b248f14SClaudio Fontana                 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
15701b248f14SClaudio Fontana                                      &dsig0, &dsig1);
15711b248f14SClaudio Fontana                 dsig0 |= 0x8000000000000000ULL;
15721b248f14SClaudio Fontana                 zexp = yexp - 1;
15731b248f14SClaudio Fontana                 remsig0 = ysig0;
15741b248f14SClaudio Fontana                 remsig1 = ysig1;
15751b248f14SClaudio Fontana                 remsig2 = 0;
15761b248f14SClaudio Fontana                 if (dsig0 <= remsig0) {
15771b248f14SClaudio Fontana                     shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
15781b248f14SClaudio Fontana                     ++zexp;
15791b248f14SClaudio Fontana                 }
15801b248f14SClaudio Fontana                 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
15811b248f14SClaudio Fontana                 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
15821b248f14SClaudio Fontana                 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
15831b248f14SClaudio Fontana                        &remsig0, &remsig1, &remsig2);
15841b248f14SClaudio Fontana                 while ((int64_t) remsig0 < 0) {
15851b248f14SClaudio Fontana                     --zsig0;
15861b248f14SClaudio Fontana                     add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
15871b248f14SClaudio Fontana                            &remsig0, &remsig1, &remsig2);
15881b248f14SClaudio Fontana                 }
15891b248f14SClaudio Fontana                 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
15901b248f14SClaudio Fontana                 /* No need to correct any estimation error in zsig1.  */
15911b248f14SClaudio Fontana             }
15921b248f14SClaudio Fontana 
15931b248f14SClaudio Fontana             if (zexp == 0) {
15941b248f14SClaudio Fontana                 azexp = 0;
15951b248f14SClaudio Fontana                 azsig0 = 0;
15961b248f14SClaudio Fontana                 azsig1 = 0;
15971b248f14SClaudio Fontana             } else {
15981b248f14SClaudio Fontana                 floatx80 z2, accum;
15991b248f14SClaudio Fontana                 uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
16001b248f14SClaudio Fontana                 /* Compute z^2.  */
16011b248f14SClaudio Fontana                 mul128To256(zsig0, zsig1, zsig0, zsig1,
16021b248f14SClaudio Fontana                             &z2sig0, &z2sig1, &z2sig2, &z2sig3);
16038da5f1dbSRichard Henderson                 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
16041b248f14SClaudio Fontana                                                    zexp + zexp - 0x3ffe,
16051b248f14SClaudio Fontana                                                    z2sig0, z2sig1,
16061b248f14SClaudio Fontana                                                    &env->fp_status);
16071b248f14SClaudio Fontana 
16081b248f14SClaudio Fontana                 /* Compute the lower parts of the polynomial expansion.  */
16091b248f14SClaudio Fontana                 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
16101b248f14SClaudio Fontana                 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
16111b248f14SClaudio Fontana                 accum = floatx80_mul(accum, z2, &env->fp_status);
16121b248f14SClaudio Fontana                 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
16131b248f14SClaudio Fontana                 accum = floatx80_mul(accum, z2, &env->fp_status);
16141b248f14SClaudio Fontana                 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
16151b248f14SClaudio Fontana                 accum = floatx80_mul(accum, z2, &env->fp_status);
16161b248f14SClaudio Fontana                 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
16171b248f14SClaudio Fontana                 accum = floatx80_mul(accum, z2, &env->fp_status);
16181b248f14SClaudio Fontana                 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
16191b248f14SClaudio Fontana                 accum = floatx80_mul(accum, z2, &env->fp_status);
16201b248f14SClaudio Fontana 
16211b248f14SClaudio Fontana                 /*
16221b248f14SClaudio Fontana                  * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
16231b248f14SClaudio Fontana                  * fpatan_coeff_0 is 1, and accum is negative and much smaller.
16241b248f14SClaudio Fontana                  */
16251b248f14SClaudio Fontana                 aexp = extractFloatx80Exp(fpatan_coeff_0);
16261b248f14SClaudio Fontana                 shift128RightJamming(extractFloatx80Frac(accum), 0,
16271b248f14SClaudio Fontana                                      aexp - extractFloatx80Exp(accum),
16281b248f14SClaudio Fontana                                      &asig0, &asig1);
16291b248f14SClaudio Fontana                 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
16301b248f14SClaudio Fontana                        &asig0, &asig1);
16311b248f14SClaudio Fontana                 /* Multiply by z to compute arctan(z).  */
16321b248f14SClaudio Fontana                 azexp = aexp + zexp - 0x3ffe;
16331b248f14SClaudio Fontana                 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
16341b248f14SClaudio Fontana                             &azsig2, &azsig3);
16351b248f14SClaudio Fontana             }
16361b248f14SClaudio Fontana 
16371b248f14SClaudio Fontana             /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign).  */
16381b248f14SClaudio Fontana             if (texp == 0) {
16391b248f14SClaudio Fontana                 /* z is positive.  */
16401b248f14SClaudio Fontana                 axexp = azexp;
16411b248f14SClaudio Fontana                 axsig0 = azsig0;
16421b248f14SClaudio Fontana                 axsig1 = azsig1;
16431b248f14SClaudio Fontana             } else {
16441b248f14SClaudio Fontana                 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
16451b248f14SClaudio Fontana                 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
16461b248f14SClaudio Fontana                 uint64_t low_sig0 =
16471b248f14SClaudio Fontana                     extractFloatx80Frac(fpatan_table[n].atan_low);
16481b248f14SClaudio Fontana                 uint64_t low_sig1 = 0;
16491b248f14SClaudio Fontana                 axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
16501b248f14SClaudio Fontana                 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
16511b248f14SClaudio Fontana                 axsig1 = 0;
16521b248f14SClaudio Fontana                 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
16531b248f14SClaudio Fontana                                      &low_sig0, &low_sig1);
16541b248f14SClaudio Fontana                 if (low_sign) {
16551b248f14SClaudio Fontana                     sub128(axsig0, axsig1, low_sig0, low_sig1,
16561b248f14SClaudio Fontana                            &axsig0, &axsig1);
16571b248f14SClaudio Fontana                 } else {
16581b248f14SClaudio Fontana                     add128(axsig0, axsig1, low_sig0, low_sig1,
16591b248f14SClaudio Fontana                            &axsig0, &axsig1);
16601b248f14SClaudio Fontana                 }
16611b248f14SClaudio Fontana                 if (azexp >= axexp) {
16621b248f14SClaudio Fontana                     shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
16631b248f14SClaudio Fontana                                          &axsig0, &axsig1);
16641b248f14SClaudio Fontana                     axexp = azexp + 1;
16651b248f14SClaudio Fontana                     shift128RightJamming(azsig0, azsig1, 1,
16661b248f14SClaudio Fontana                                          &azsig0, &azsig1);
16671b248f14SClaudio Fontana                 } else {
16681b248f14SClaudio Fontana                     shift128RightJamming(axsig0, axsig1, 1,
16691b248f14SClaudio Fontana                                          &axsig0, &axsig1);
16701b248f14SClaudio Fontana                     shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
16711b248f14SClaudio Fontana                                          &azsig0, &azsig1);
16721b248f14SClaudio Fontana                     ++axexp;
16731b248f14SClaudio Fontana                 }
16741b248f14SClaudio Fontana                 if (zsign) {
16751b248f14SClaudio Fontana                     sub128(axsig0, axsig1, azsig0, azsig1,
16761b248f14SClaudio Fontana                            &axsig0, &axsig1);
16771b248f14SClaudio Fontana                 } else {
16781b248f14SClaudio Fontana                     add128(axsig0, axsig1, azsig0, azsig1,
16791b248f14SClaudio Fontana                            &axsig0, &axsig1);
16801b248f14SClaudio Fontana                 }
16811b248f14SClaudio Fontana             }
16821b248f14SClaudio Fontana 
16831b248f14SClaudio Fontana             if (adj_exp == 0) {
16841b248f14SClaudio Fontana                 rexp = axexp;
16851b248f14SClaudio Fontana                 rsig0 = axsig0;
16861b248f14SClaudio Fontana                 rsig1 = axsig1;
16871b248f14SClaudio Fontana             } else {
16881b248f14SClaudio Fontana                 /*
16891b248f14SClaudio Fontana                  * Add or subtract arctan(x) (exponent axexp,
16901b248f14SClaudio Fontana                  * significand axsig0 and axsig1, positive, not
16911b248f14SClaudio Fontana                  * necessarily normalized) to the number given by
16921b248f14SClaudio Fontana                  * adj_exp, adj_sig0 and adj_sig1, according to
16931b248f14SClaudio Fontana                  * adj_sub.
16941b248f14SClaudio Fontana                  */
16951b248f14SClaudio Fontana                 if (adj_exp >= axexp) {
16961b248f14SClaudio Fontana                     shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
16971b248f14SClaudio Fontana                                          &axsig0, &axsig1);
16981b248f14SClaudio Fontana                     rexp = adj_exp + 1;
16991b248f14SClaudio Fontana                     shift128RightJamming(adj_sig0, adj_sig1, 1,
17001b248f14SClaudio Fontana                                          &adj_sig0, &adj_sig1);
17011b248f14SClaudio Fontana                 } else {
17021b248f14SClaudio Fontana                     shift128RightJamming(axsig0, axsig1, 1,
17031b248f14SClaudio Fontana                                          &axsig0, &axsig1);
17041b248f14SClaudio Fontana                     shift128RightJamming(adj_sig0, adj_sig1,
17051b248f14SClaudio Fontana                                          axexp - adj_exp + 1,
17061b248f14SClaudio Fontana                                          &adj_sig0, &adj_sig1);
17071b248f14SClaudio Fontana                     rexp = axexp + 1;
17081b248f14SClaudio Fontana                 }
17091b248f14SClaudio Fontana                 if (adj_sub) {
17101b248f14SClaudio Fontana                     sub128(adj_sig0, adj_sig1, axsig0, axsig1,
17111b248f14SClaudio Fontana                            &rsig0, &rsig1);
17121b248f14SClaudio Fontana                 } else {
17131b248f14SClaudio Fontana                     add128(adj_sig0, adj_sig1, axsig0, axsig1,
17141b248f14SClaudio Fontana                            &rsig0, &rsig1);
17151b248f14SClaudio Fontana                 }
17161b248f14SClaudio Fontana             }
17171b248f14SClaudio Fontana 
17181b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = save_mode;
17191b248f14SClaudio Fontana             env->fp_status.floatx80_rounding_precision = save_prec;
17201b248f14SClaudio Fontana         }
17211b248f14SClaudio Fontana         /* This result is inexact.  */
17221b248f14SClaudio Fontana         rsig1 |= 1;
17238da5f1dbSRichard Henderson         ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
17241b248f14SClaudio Fontana                                             rsig0, rsig1, &env->fp_status);
17251b248f14SClaudio Fontana     }
17261b248f14SClaudio Fontana 
17271b248f14SClaudio Fontana     fpop(env);
17281b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
17291b248f14SClaudio Fontana }
17301b248f14SClaudio Fontana 
17311b248f14SClaudio Fontana void helper_fxtract(CPUX86State *env)
17321b248f14SClaudio Fontana {
17331b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
17341b248f14SClaudio Fontana     CPU_LDoubleU temp;
17351b248f14SClaudio Fontana 
17361b248f14SClaudio Fontana     temp.d = ST0;
17371b248f14SClaudio Fontana 
17381b248f14SClaudio Fontana     if (floatx80_is_zero(ST0)) {
17391b248f14SClaudio Fontana         /* Easy way to generate -inf and raising division by 0 exception */
17401b248f14SClaudio Fontana         ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
17411b248f14SClaudio Fontana                            &env->fp_status);
17421b248f14SClaudio Fontana         fpush(env);
17431b248f14SClaudio Fontana         ST0 = temp.d;
17441b248f14SClaudio Fontana     } else if (floatx80_invalid_encoding(ST0)) {
17451b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
17461b248f14SClaudio Fontana         ST0 = floatx80_default_nan(&env->fp_status);
17471b248f14SClaudio Fontana         fpush(env);
17481b248f14SClaudio Fontana         ST0 = ST1;
17491b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST0)) {
17501b248f14SClaudio Fontana         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
17511b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
17521b248f14SClaudio Fontana             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
17531b248f14SClaudio Fontana         }
17541b248f14SClaudio Fontana         fpush(env);
17551b248f14SClaudio Fontana         ST0 = ST1;
17561b248f14SClaudio Fontana     } else if (floatx80_is_infinity(ST0)) {
17571b248f14SClaudio Fontana         fpush(env);
17581b248f14SClaudio Fontana         ST0 = ST1;
17591b248f14SClaudio Fontana         ST1 = floatx80_infinity;
17601b248f14SClaudio Fontana     } else {
17611b248f14SClaudio Fontana         int expdif;
17621b248f14SClaudio Fontana 
17631b248f14SClaudio Fontana         if (EXPD(temp) == 0) {
17641b248f14SClaudio Fontana             int shift = clz64(temp.l.lower);
17651b248f14SClaudio Fontana             temp.l.lower <<= shift;
17661b248f14SClaudio Fontana             expdif = 1 - EXPBIAS - shift;
17671b248f14SClaudio Fontana             float_raise(float_flag_input_denormal, &env->fp_status);
17681b248f14SClaudio Fontana         } else {
17691b248f14SClaudio Fontana             expdif = EXPD(temp) - EXPBIAS;
17701b248f14SClaudio Fontana         }
17711b248f14SClaudio Fontana         /* DP exponent bias */
17721b248f14SClaudio Fontana         ST0 = int32_to_floatx80(expdif, &env->fp_status);
17731b248f14SClaudio Fontana         fpush(env);
17741b248f14SClaudio Fontana         BIASEXPONENT(temp);
17751b248f14SClaudio Fontana         ST0 = temp.d;
17761b248f14SClaudio Fontana     }
17771b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
17781b248f14SClaudio Fontana }
17791b248f14SClaudio Fontana 
17801b248f14SClaudio Fontana static void helper_fprem_common(CPUX86State *env, bool mod)
17811b248f14SClaudio Fontana {
17821b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
17831b248f14SClaudio Fontana     uint64_t quotient;
17841b248f14SClaudio Fontana     CPU_LDoubleU temp0, temp1;
17851b248f14SClaudio Fontana     int exp0, exp1, expdiff;
17861b248f14SClaudio Fontana 
17871b248f14SClaudio Fontana     temp0.d = ST0;
17881b248f14SClaudio Fontana     temp1.d = ST1;
17891b248f14SClaudio Fontana     exp0 = EXPD(temp0);
17901b248f14SClaudio Fontana     exp1 = EXPD(temp1);
17911b248f14SClaudio Fontana 
17921b248f14SClaudio Fontana     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
17931b248f14SClaudio Fontana     if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
17941b248f14SClaudio Fontana         exp0 == 0x7fff || exp1 == 0x7fff ||
17951b248f14SClaudio Fontana         floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
17961b248f14SClaudio Fontana         ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
17971b248f14SClaudio Fontana     } else {
17981b248f14SClaudio Fontana         if (exp0 == 0) {
17991b248f14SClaudio Fontana             exp0 = 1 - clz64(temp0.l.lower);
18001b248f14SClaudio Fontana         }
18011b248f14SClaudio Fontana         if (exp1 == 0) {
18021b248f14SClaudio Fontana             exp1 = 1 - clz64(temp1.l.lower);
18031b248f14SClaudio Fontana         }
18041b248f14SClaudio Fontana         expdiff = exp0 - exp1;
18051b248f14SClaudio Fontana         if (expdiff < 64) {
18061b248f14SClaudio Fontana             ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
18071b248f14SClaudio Fontana             env->fpus |= (quotient & 0x4) << (8 - 2);  /* (C0) <-- q2 */
18081b248f14SClaudio Fontana             env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
18091b248f14SClaudio Fontana             env->fpus |= (quotient & 0x1) << (9 - 0);  /* (C1) <-- q0 */
18101b248f14SClaudio Fontana         } else {
18111b248f14SClaudio Fontana             /*
18121b248f14SClaudio Fontana              * Partial remainder.  This choice of how many bits to
18131b248f14SClaudio Fontana              * process at once is specified in AMD instruction set
18141b248f14SClaudio Fontana              * manuals, and empirically is followed by Intel
18151b248f14SClaudio Fontana              * processors as well; it ensures that the final remainder
18161b248f14SClaudio Fontana              * operation in a loop does produce the correct low three
18171b248f14SClaudio Fontana              * bits of the quotient.  AMD manuals specify that the
18181b248f14SClaudio Fontana              * flags other than C2 are cleared, and empirically Intel
18191b248f14SClaudio Fontana              * processors clear them as well.
18201b248f14SClaudio Fontana              */
18211b248f14SClaudio Fontana             int n = 32 + (expdiff % 32);
18221b248f14SClaudio Fontana             temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
18231b248f14SClaudio Fontana             ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
18241b248f14SClaudio Fontana             env->fpus |= 0x400;  /* C2 <-- 1 */
18251b248f14SClaudio Fontana         }
18261b248f14SClaudio Fontana     }
18271b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
18281b248f14SClaudio Fontana }
18291b248f14SClaudio Fontana 
18301b248f14SClaudio Fontana void helper_fprem1(CPUX86State *env)
18311b248f14SClaudio Fontana {
18321b248f14SClaudio Fontana     helper_fprem_common(env, false);
18331b248f14SClaudio Fontana }
18341b248f14SClaudio Fontana 
18351b248f14SClaudio Fontana void helper_fprem(CPUX86State *env)
18361b248f14SClaudio Fontana {
18371b248f14SClaudio Fontana     helper_fprem_common(env, true);
18381b248f14SClaudio Fontana }
18391b248f14SClaudio Fontana 
18401b248f14SClaudio Fontana /* 128-bit significand of log2(e).  */
18411b248f14SClaudio Fontana #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
18421b248f14SClaudio Fontana #define log2_e_sig_low 0xbe87fed0691d3e89ULL
18431b248f14SClaudio Fontana 
18441b248f14SClaudio Fontana /*
18451b248f14SClaudio Fontana  * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
18461b248f14SClaudio Fontana  * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
18471b248f14SClaudio Fontana  * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
18481b248f14SClaudio Fontana  * interval [sqrt(2)/2, sqrt(2)].
18491b248f14SClaudio Fontana  */
18501b248f14SClaudio Fontana #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
18511b248f14SClaudio Fontana #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
18521b248f14SClaudio Fontana #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
18531b248f14SClaudio Fontana #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
18541b248f14SClaudio Fontana #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
18551b248f14SClaudio Fontana #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
18561b248f14SClaudio Fontana #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
18571b248f14SClaudio Fontana #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
18581b248f14SClaudio Fontana #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
18591b248f14SClaudio Fontana #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
18601b248f14SClaudio Fontana #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
18611b248f14SClaudio Fontana 
18621b248f14SClaudio Fontana /*
18631b248f14SClaudio Fontana  * Compute an approximation of log2(1+arg), where 1+arg is in the
18641b248f14SClaudio Fontana  * interval [sqrt(2)/2, sqrt(2)].  It is assumed that when this
18651b248f14SClaudio Fontana  * function is called, rounding precision is set to 80 and the
18661b248f14SClaudio Fontana  * round-to-nearest mode is in effect.  arg must not be exactly zero,
18671b248f14SClaudio Fontana  * and must not be so close to zero that underflow might occur.
18681b248f14SClaudio Fontana  */
18691b248f14SClaudio Fontana static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
18701b248f14SClaudio Fontana                                 uint64_t *sig0, uint64_t *sig1)
18711b248f14SClaudio Fontana {
18721b248f14SClaudio Fontana     uint64_t arg0_sig = extractFloatx80Frac(arg);
18731b248f14SClaudio Fontana     int32_t arg0_exp = extractFloatx80Exp(arg);
18741b248f14SClaudio Fontana     bool arg0_sign = extractFloatx80Sign(arg);
18751b248f14SClaudio Fontana     bool asign;
18761b248f14SClaudio Fontana     int32_t dexp, texp, aexp;
18771b248f14SClaudio Fontana     uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
18781b248f14SClaudio Fontana     uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
18791b248f14SClaudio Fontana     uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
18801b248f14SClaudio Fontana     floatx80 t2, accum;
18811b248f14SClaudio Fontana 
18821b248f14SClaudio Fontana     /*
18831b248f14SClaudio Fontana      * Compute an approximation of arg/(2+arg), with extra precision,
18841b248f14SClaudio Fontana      * as the argument to a polynomial approximation.  The extra
18851b248f14SClaudio Fontana      * precision is only needed for the first term of the
18861b248f14SClaudio Fontana      * approximation, with subsequent terms being significantly
18871b248f14SClaudio Fontana      * smaller; the approximation only uses odd exponents, and the
18881b248f14SClaudio Fontana      * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
18891b248f14SClaudio Fontana      */
18901b248f14SClaudio Fontana     if (arg0_sign) {
18911b248f14SClaudio Fontana         dexp = 0x3fff;
18921b248f14SClaudio Fontana         shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
18931b248f14SClaudio Fontana         sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
18941b248f14SClaudio Fontana     } else {
18951b248f14SClaudio Fontana         dexp = 0x4000;
18961b248f14SClaudio Fontana         shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
18971b248f14SClaudio Fontana         dsig0 |= 0x8000000000000000ULL;
18981b248f14SClaudio Fontana     }
18991b248f14SClaudio Fontana     texp = arg0_exp - dexp + 0x3ffe;
19001b248f14SClaudio Fontana     rsig0 = arg0_sig;
19011b248f14SClaudio Fontana     rsig1 = 0;
19021b248f14SClaudio Fontana     rsig2 = 0;
19031b248f14SClaudio Fontana     if (dsig0 <= rsig0) {
19041b248f14SClaudio Fontana         shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
19051b248f14SClaudio Fontana         ++texp;
19061b248f14SClaudio Fontana     }
19071b248f14SClaudio Fontana     tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
19081b248f14SClaudio Fontana     mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
19091b248f14SClaudio Fontana     sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
19101b248f14SClaudio Fontana            &rsig0, &rsig1, &rsig2);
19111b248f14SClaudio Fontana     while ((int64_t) rsig0 < 0) {
19121b248f14SClaudio Fontana         --tsig0;
19131b248f14SClaudio Fontana         add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
19141b248f14SClaudio Fontana                &rsig0, &rsig1, &rsig2);
19151b248f14SClaudio Fontana     }
19161b248f14SClaudio Fontana     tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
19171b248f14SClaudio Fontana     /*
19181b248f14SClaudio Fontana      * No need to correct any estimation error in tsig1; even with
19191b248f14SClaudio Fontana      * such error, it is accurate enough.  Now compute the square of
19201b248f14SClaudio Fontana      * that approximation.
19211b248f14SClaudio Fontana      */
19221b248f14SClaudio Fontana     mul128To256(tsig0, tsig1, tsig0, tsig1,
19231b248f14SClaudio Fontana                 &t2sig0, &t2sig1, &t2sig2, &t2sig3);
19248da5f1dbSRichard Henderson     t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
19258da5f1dbSRichard Henderson                                        texp + texp - 0x3ffe,
19261b248f14SClaudio Fontana                                        t2sig0, t2sig1, &env->fp_status);
19271b248f14SClaudio Fontana 
19281b248f14SClaudio Fontana     /* Compute the lower parts of the polynomial expansion.  */
19291b248f14SClaudio Fontana     accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
19301b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
19311b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19321b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
19331b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19341b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
19351b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19361b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
19371b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19381b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
19391b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19401b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
19411b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19421b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
19431b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19441b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
19451b248f14SClaudio Fontana     accum = floatx80_mul(accum, t2, &env->fp_status);
19461b248f14SClaudio Fontana     accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
19471b248f14SClaudio Fontana 
19481b248f14SClaudio Fontana     /*
19491b248f14SClaudio Fontana      * The full polynomial expansion is fyl2x_coeff_0 + accum (where
19501b248f14SClaudio Fontana      * accum has much lower magnitude, and so, in particular, carry
19511b248f14SClaudio Fontana      * out of the addition is not possible), multiplied by t.  (This
19521b248f14SClaudio Fontana      * expansion is only accurate to about 70 bits, not 128 bits.)
19531b248f14SClaudio Fontana      */
19541b248f14SClaudio Fontana     aexp = extractFloatx80Exp(fyl2x_coeff_0);
19551b248f14SClaudio Fontana     asign = extractFloatx80Sign(fyl2x_coeff_0);
19561b248f14SClaudio Fontana     shift128RightJamming(extractFloatx80Frac(accum), 0,
19571b248f14SClaudio Fontana                          aexp - extractFloatx80Exp(accum),
19581b248f14SClaudio Fontana                          &asig0, &asig1);
19591b248f14SClaudio Fontana     bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
19601b248f14SClaudio Fontana     bsig1 = 0;
19611b248f14SClaudio Fontana     if (asign == extractFloatx80Sign(accum)) {
19621b248f14SClaudio Fontana         add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
19631b248f14SClaudio Fontana     } else {
19641b248f14SClaudio Fontana         sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
19651b248f14SClaudio Fontana     }
19661b248f14SClaudio Fontana     /* Multiply by t to compute the required result.  */
19671b248f14SClaudio Fontana     mul128To256(asig0, asig1, tsig0, tsig1,
19681b248f14SClaudio Fontana                 &asig0, &asig1, &asig2, &asig3);
19691b248f14SClaudio Fontana     aexp += texp - 0x3ffe;
19701b248f14SClaudio Fontana     *exp = aexp;
19711b248f14SClaudio Fontana     *sig0 = asig0;
19721b248f14SClaudio Fontana     *sig1 = asig1;
19731b248f14SClaudio Fontana }
19741b248f14SClaudio Fontana 
19751b248f14SClaudio Fontana void helper_fyl2xp1(CPUX86State *env)
19761b248f14SClaudio Fontana {
19771b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
19781b248f14SClaudio Fontana     uint64_t arg0_sig = extractFloatx80Frac(ST0);
19791b248f14SClaudio Fontana     int32_t arg0_exp = extractFloatx80Exp(ST0);
19801b248f14SClaudio Fontana     bool arg0_sign = extractFloatx80Sign(ST0);
19811b248f14SClaudio Fontana     uint64_t arg1_sig = extractFloatx80Frac(ST1);
19821b248f14SClaudio Fontana     int32_t arg1_exp = extractFloatx80Exp(ST1);
19831b248f14SClaudio Fontana     bool arg1_sign = extractFloatx80Sign(ST1);
19841b248f14SClaudio Fontana 
19851b248f14SClaudio Fontana     if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
19861b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
19871b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST0, &env->fp_status);
19881b248f14SClaudio Fontana     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
19891b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
19901b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
19911b248f14SClaudio Fontana     } else if (floatx80_invalid_encoding(ST0) ||
19921b248f14SClaudio Fontana                floatx80_invalid_encoding(ST1)) {
19931b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
19941b248f14SClaudio Fontana         ST1 = floatx80_default_nan(&env->fp_status);
19951b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST0)) {
19961b248f14SClaudio Fontana         ST1 = ST0;
19971b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST1)) {
19981b248f14SClaudio Fontana         /* Pass this NaN through.  */
19991b248f14SClaudio Fontana     } else if (arg0_exp > 0x3ffd ||
20001b248f14SClaudio Fontana                (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
20011b248f14SClaudio Fontana                                                   0x95f619980c4336f7ULL :
20021b248f14SClaudio Fontana                                                   0xd413cccfe7799211ULL))) {
20031b248f14SClaudio Fontana         /*
20041b248f14SClaudio Fontana          * Out of range for the instruction (ST0 must have absolute
20051b248f14SClaudio Fontana          * value less than 1 - sqrt(2)/2 = 0.292..., according to
20061b248f14SClaudio Fontana          * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
20071b248f14SClaudio Fontana          * to sqrt(2) - 1, which we allow here), treat as invalid.
20081b248f14SClaudio Fontana          */
20091b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
20101b248f14SClaudio Fontana         ST1 = floatx80_default_nan(&env->fp_status);
20111b248f14SClaudio Fontana     } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
20121b248f14SClaudio Fontana                arg1_exp == 0x7fff) {
20131b248f14SClaudio Fontana         /*
20141b248f14SClaudio Fontana          * One argument is zero, or multiplying by infinity; correct
20151b248f14SClaudio Fontana          * result is exact and can be obtained by multiplying the
20161b248f14SClaudio Fontana          * arguments.
20171b248f14SClaudio Fontana          */
20181b248f14SClaudio Fontana         ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
20191b248f14SClaudio Fontana     } else if (arg0_exp < 0x3fb0) {
20201b248f14SClaudio Fontana         /*
20211b248f14SClaudio Fontana          * Multiplying both arguments and an extra-precision version
20221b248f14SClaudio Fontana          * of log2(e) is sufficiently precise.
20231b248f14SClaudio Fontana          */
20241b248f14SClaudio Fontana         uint64_t sig0, sig1, sig2;
20251b248f14SClaudio Fontana         int32_t exp;
20261b248f14SClaudio Fontana         if (arg0_exp == 0) {
20271b248f14SClaudio Fontana             normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
20281b248f14SClaudio Fontana         }
20291b248f14SClaudio Fontana         if (arg1_exp == 0) {
20301b248f14SClaudio Fontana             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
20311b248f14SClaudio Fontana         }
20321b248f14SClaudio Fontana         mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
20331b248f14SClaudio Fontana                         &sig0, &sig1, &sig2);
20341b248f14SClaudio Fontana         exp = arg0_exp + 1;
20351b248f14SClaudio Fontana         mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
20361b248f14SClaudio Fontana         exp += arg1_exp - 0x3ffe;
20371b248f14SClaudio Fontana         /* This result is inexact.  */
20381b248f14SClaudio Fontana         sig1 |= 1;
20398da5f1dbSRichard Henderson         ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
20408da5f1dbSRichard Henderson                                             arg0_sign ^ arg1_sign, exp,
20411b248f14SClaudio Fontana                                             sig0, sig1, &env->fp_status);
20421b248f14SClaudio Fontana     } else {
20431b248f14SClaudio Fontana         int32_t aexp;
20441b248f14SClaudio Fontana         uint64_t asig0, asig1, asig2;
20451b248f14SClaudio Fontana         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
20468da5f1dbSRichard Henderson         FloatX80RoundPrec save_prec =
20478da5f1dbSRichard Henderson             env->fp_status.floatx80_rounding_precision;
20481b248f14SClaudio Fontana         env->fp_status.float_rounding_mode = float_round_nearest_even;
20498da5f1dbSRichard Henderson         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
20501b248f14SClaudio Fontana 
20511b248f14SClaudio Fontana         helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
20521b248f14SClaudio Fontana         /*
20531b248f14SClaudio Fontana          * Multiply by the second argument to compute the required
20541b248f14SClaudio Fontana          * result.
20551b248f14SClaudio Fontana          */
20561b248f14SClaudio Fontana         if (arg1_exp == 0) {
20571b248f14SClaudio Fontana             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
20581b248f14SClaudio Fontana         }
20591b248f14SClaudio Fontana         mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
20601b248f14SClaudio Fontana         aexp += arg1_exp - 0x3ffe;
20611b248f14SClaudio Fontana         /* This result is inexact.  */
20621b248f14SClaudio Fontana         asig1 |= 1;
20631b248f14SClaudio Fontana         env->fp_status.float_rounding_mode = save_mode;
20648da5f1dbSRichard Henderson         ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
20658da5f1dbSRichard Henderson                                             arg0_sign ^ arg1_sign, aexp,
20661b248f14SClaudio Fontana                                             asig0, asig1, &env->fp_status);
20671b248f14SClaudio Fontana         env->fp_status.floatx80_rounding_precision = save_prec;
20681b248f14SClaudio Fontana     }
20691b248f14SClaudio Fontana     fpop(env);
20701b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
20711b248f14SClaudio Fontana }
20721b248f14SClaudio Fontana 
20731b248f14SClaudio Fontana void helper_fyl2x(CPUX86State *env)
20741b248f14SClaudio Fontana {
20751b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
20761b248f14SClaudio Fontana     uint64_t arg0_sig = extractFloatx80Frac(ST0);
20771b248f14SClaudio Fontana     int32_t arg0_exp = extractFloatx80Exp(ST0);
20781b248f14SClaudio Fontana     bool arg0_sign = extractFloatx80Sign(ST0);
20791b248f14SClaudio Fontana     uint64_t arg1_sig = extractFloatx80Frac(ST1);
20801b248f14SClaudio Fontana     int32_t arg1_exp = extractFloatx80Exp(ST1);
20811b248f14SClaudio Fontana     bool arg1_sign = extractFloatx80Sign(ST1);
20821b248f14SClaudio Fontana 
20831b248f14SClaudio Fontana     if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
20841b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
20851b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST0, &env->fp_status);
20861b248f14SClaudio Fontana     } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
20871b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
20881b248f14SClaudio Fontana         ST1 = floatx80_silence_nan(ST1, &env->fp_status);
20891b248f14SClaudio Fontana     } else if (floatx80_invalid_encoding(ST0) ||
20901b248f14SClaudio Fontana                floatx80_invalid_encoding(ST1)) {
20911b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
20921b248f14SClaudio Fontana         ST1 = floatx80_default_nan(&env->fp_status);
20931b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST0)) {
20941b248f14SClaudio Fontana         ST1 = ST0;
20951b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST1)) {
20961b248f14SClaudio Fontana         /* Pass this NaN through.  */
20971b248f14SClaudio Fontana     } else if (arg0_sign && !floatx80_is_zero(ST0)) {
20981b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
20991b248f14SClaudio Fontana         ST1 = floatx80_default_nan(&env->fp_status);
21001b248f14SClaudio Fontana     } else if (floatx80_is_infinity(ST1)) {
21011b248f14SClaudio Fontana         FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
21021b248f14SClaudio Fontana                                              &env->fp_status);
21031b248f14SClaudio Fontana         switch (cmp) {
21041b248f14SClaudio Fontana         case float_relation_less:
21051b248f14SClaudio Fontana             ST1 = floatx80_chs(ST1);
21061b248f14SClaudio Fontana             break;
21071b248f14SClaudio Fontana         case float_relation_greater:
21081b248f14SClaudio Fontana             /* Result is infinity of the same sign as ST1.  */
21091b248f14SClaudio Fontana             break;
21101b248f14SClaudio Fontana         default:
21111b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
21121b248f14SClaudio Fontana             ST1 = floatx80_default_nan(&env->fp_status);
21131b248f14SClaudio Fontana             break;
21141b248f14SClaudio Fontana         }
21151b248f14SClaudio Fontana     } else if (floatx80_is_infinity(ST0)) {
21161b248f14SClaudio Fontana         if (floatx80_is_zero(ST1)) {
21171b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
21181b248f14SClaudio Fontana             ST1 = floatx80_default_nan(&env->fp_status);
21191b248f14SClaudio Fontana         } else if (arg1_sign) {
21201b248f14SClaudio Fontana             ST1 = floatx80_chs(ST0);
21211b248f14SClaudio Fontana         } else {
21221b248f14SClaudio Fontana             ST1 = ST0;
21231b248f14SClaudio Fontana         }
21241b248f14SClaudio Fontana     } else if (floatx80_is_zero(ST0)) {
21251b248f14SClaudio Fontana         if (floatx80_is_zero(ST1)) {
21261b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
21271b248f14SClaudio Fontana             ST1 = floatx80_default_nan(&env->fp_status);
21281b248f14SClaudio Fontana         } else {
21291b248f14SClaudio Fontana             /* Result is infinity with opposite sign to ST1.  */
21301b248f14SClaudio Fontana             float_raise(float_flag_divbyzero, &env->fp_status);
21311b248f14SClaudio Fontana             ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
21321b248f14SClaudio Fontana                                 0x8000000000000000ULL);
21331b248f14SClaudio Fontana         }
21341b248f14SClaudio Fontana     } else if (floatx80_is_zero(ST1)) {
21351b248f14SClaudio Fontana         if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
21361b248f14SClaudio Fontana             ST1 = floatx80_chs(ST1);
21371b248f14SClaudio Fontana         }
21381b248f14SClaudio Fontana         /* Otherwise, ST1 is already the correct result.  */
21391b248f14SClaudio Fontana     } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
21401b248f14SClaudio Fontana         if (arg1_sign) {
21411b248f14SClaudio Fontana             ST1 = floatx80_chs(floatx80_zero);
21421b248f14SClaudio Fontana         } else {
21431b248f14SClaudio Fontana             ST1 = floatx80_zero;
21441b248f14SClaudio Fontana         }
21451b248f14SClaudio Fontana     } else {
21461b248f14SClaudio Fontana         int32_t int_exp;
21471b248f14SClaudio Fontana         floatx80 arg0_m1;
21481b248f14SClaudio Fontana         FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
21498da5f1dbSRichard Henderson         FloatX80RoundPrec save_prec =
21508da5f1dbSRichard Henderson             env->fp_status.floatx80_rounding_precision;
21511b248f14SClaudio Fontana         env->fp_status.float_rounding_mode = float_round_nearest_even;
21528da5f1dbSRichard Henderson         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
21531b248f14SClaudio Fontana 
21541b248f14SClaudio Fontana         if (arg0_exp == 0) {
21551b248f14SClaudio Fontana             normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
21561b248f14SClaudio Fontana         }
21571b248f14SClaudio Fontana         if (arg1_exp == 0) {
21581b248f14SClaudio Fontana             normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
21591b248f14SClaudio Fontana         }
21601b248f14SClaudio Fontana         int_exp = arg0_exp - 0x3fff;
21611b248f14SClaudio Fontana         if (arg0_sig > 0xb504f333f9de6484ULL) {
21621b248f14SClaudio Fontana             ++int_exp;
21631b248f14SClaudio Fontana         }
21641b248f14SClaudio Fontana         arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
21651b248f14SClaudio Fontana                                                &env->fp_status),
21661b248f14SClaudio Fontana                                floatx80_one, &env->fp_status);
21671b248f14SClaudio Fontana         if (floatx80_is_zero(arg0_m1)) {
21681b248f14SClaudio Fontana             /* Exact power of 2; multiply by ST1.  */
21691b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = save_mode;
21701b248f14SClaudio Fontana             ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
21711b248f14SClaudio Fontana                                ST1, &env->fp_status);
21721b248f14SClaudio Fontana         } else {
21731b248f14SClaudio Fontana             bool asign = extractFloatx80Sign(arg0_m1);
21741b248f14SClaudio Fontana             int32_t aexp;
21751b248f14SClaudio Fontana             uint64_t asig0, asig1, asig2;
21761b248f14SClaudio Fontana             helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
21771b248f14SClaudio Fontana             if (int_exp != 0) {
21781b248f14SClaudio Fontana                 bool isign = (int_exp < 0);
21791b248f14SClaudio Fontana                 int32_t iexp;
21801b248f14SClaudio Fontana                 uint64_t isig;
21811b248f14SClaudio Fontana                 int shift;
21821b248f14SClaudio Fontana                 int_exp = isign ? -int_exp : int_exp;
21831b248f14SClaudio Fontana                 shift = clz32(int_exp) + 32;
21841b248f14SClaudio Fontana                 isig = int_exp;
21851b248f14SClaudio Fontana                 isig <<= shift;
21861b248f14SClaudio Fontana                 iexp = 0x403e - shift;
21871b248f14SClaudio Fontana                 shift128RightJamming(asig0, asig1, iexp - aexp,
21881b248f14SClaudio Fontana                                      &asig0, &asig1);
21891b248f14SClaudio Fontana                 if (asign == isign) {
21901b248f14SClaudio Fontana                     add128(isig, 0, asig0, asig1, &asig0, &asig1);
21911b248f14SClaudio Fontana                 } else {
21921b248f14SClaudio Fontana                     sub128(isig, 0, asig0, asig1, &asig0, &asig1);
21931b248f14SClaudio Fontana                 }
21941b248f14SClaudio Fontana                 aexp = iexp;
21951b248f14SClaudio Fontana                 asign = isign;
21961b248f14SClaudio Fontana             }
21971b248f14SClaudio Fontana             /*
21981b248f14SClaudio Fontana              * Multiply by the second argument to compute the required
21991b248f14SClaudio Fontana              * result.
22001b248f14SClaudio Fontana              */
22011b248f14SClaudio Fontana             if (arg1_exp == 0) {
22021b248f14SClaudio Fontana                 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
22031b248f14SClaudio Fontana             }
22041b248f14SClaudio Fontana             mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
22051b248f14SClaudio Fontana             aexp += arg1_exp - 0x3ffe;
22061b248f14SClaudio Fontana             /* This result is inexact.  */
22071b248f14SClaudio Fontana             asig1 |= 1;
22081b248f14SClaudio Fontana             env->fp_status.float_rounding_mode = save_mode;
22098da5f1dbSRichard Henderson             ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
22108da5f1dbSRichard Henderson                                                 asign ^ arg1_sign, aexp,
22111b248f14SClaudio Fontana                                                 asig0, asig1, &env->fp_status);
22121b248f14SClaudio Fontana         }
22131b248f14SClaudio Fontana 
22141b248f14SClaudio Fontana         env->fp_status.floatx80_rounding_precision = save_prec;
22151b248f14SClaudio Fontana     }
22161b248f14SClaudio Fontana     fpop(env);
22171b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
22181b248f14SClaudio Fontana }
22191b248f14SClaudio Fontana 
22201b248f14SClaudio Fontana void helper_fsqrt(CPUX86State *env)
22211b248f14SClaudio Fontana {
22221b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
22231b248f14SClaudio Fontana     if (floatx80_is_neg(ST0)) {
22241b248f14SClaudio Fontana         env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
22251b248f14SClaudio Fontana         env->fpus |= 0x400;
22261b248f14SClaudio Fontana     }
22271b248f14SClaudio Fontana     ST0 = floatx80_sqrt(ST0, &env->fp_status);
22281b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
22291b248f14SClaudio Fontana }
22301b248f14SClaudio Fontana 
22311b248f14SClaudio Fontana void helper_fsincos(CPUX86State *env)
22321b248f14SClaudio Fontana {
22331b248f14SClaudio Fontana     double fptemp = floatx80_to_double(env, ST0);
22341b248f14SClaudio Fontana 
22351b248f14SClaudio Fontana     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
22361b248f14SClaudio Fontana         env->fpus |= 0x400;
22371b248f14SClaudio Fontana     } else {
22381b248f14SClaudio Fontana         ST0 = double_to_floatx80(env, sin(fptemp));
22391b248f14SClaudio Fontana         fpush(env);
22401b248f14SClaudio Fontana         ST0 = double_to_floatx80(env, cos(fptemp));
22411b248f14SClaudio Fontana         env->fpus &= ~0x400;  /* C2 <-- 0 */
22421b248f14SClaudio Fontana         /* the above code is for |arg| < 2**63 only */
22431b248f14SClaudio Fontana     }
22441b248f14SClaudio Fontana }
22451b248f14SClaudio Fontana 
22461b248f14SClaudio Fontana void helper_frndint(CPUX86State *env)
22471b248f14SClaudio Fontana {
22481b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
22491b248f14SClaudio Fontana     ST0 = floatx80_round_to_int(ST0, &env->fp_status);
22501b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
22511b248f14SClaudio Fontana }
22521b248f14SClaudio Fontana 
22531b248f14SClaudio Fontana void helper_fscale(CPUX86State *env)
22541b248f14SClaudio Fontana {
22551b248f14SClaudio Fontana     uint8_t old_flags = save_exception_flags(env);
22561b248f14SClaudio Fontana     if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
22571b248f14SClaudio Fontana         float_raise(float_flag_invalid, &env->fp_status);
22581b248f14SClaudio Fontana         ST0 = floatx80_default_nan(&env->fp_status);
22591b248f14SClaudio Fontana     } else if (floatx80_is_any_nan(ST1)) {
22601b248f14SClaudio Fontana         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
22611b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
22621b248f14SClaudio Fontana         }
22631b248f14SClaudio Fontana         ST0 = ST1;
22641b248f14SClaudio Fontana         if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
22651b248f14SClaudio Fontana             float_raise(float_flag_invalid, &env->fp_status);
22661b248f14SClaudio Fontana             ST0 = floatx80_silence_nan(ST0, &env->fp_status);
22671b248f14SClaudio Fontana         }
22681b248f14SClaudio Fontana     } else if (floatx80_is_infinity(ST1) &&
22691b248f14SClaudio Fontana                !floatx80_invalid_encoding(ST0) &&
22701b248f14SClaudio Fontana                !floatx80_is_any_nan(ST0)) {
22711b248f14SClaudio Fontana         if (floatx80_is_neg(ST1)) {
22721b248f14SClaudio Fontana             if (floatx80_is_infinity(ST0)) {
22731b248f14SClaudio Fontana                 float_raise(float_flag_invalid, &env->fp_status);
22741b248f14SClaudio Fontana                 ST0 = floatx80_default_nan(&env->fp_status);
22751b248f14SClaudio Fontana             } else {
22761b248f14SClaudio Fontana                 ST0 = (floatx80_is_neg(ST0) ?
22771b248f14SClaudio Fontana                        floatx80_chs(floatx80_zero) :
22781b248f14SClaudio Fontana                        floatx80_zero);
22791b248f14SClaudio Fontana             }
22801b248f14SClaudio Fontana         } else {
22811b248f14SClaudio Fontana             if (floatx80_is_zero(ST0)) {
22821b248f14SClaudio Fontana                 float_raise(float_flag_invalid, &env->fp_status);
22831b248f14SClaudio Fontana                 ST0 = floatx80_default_nan(&env->fp_status);
22841b248f14SClaudio Fontana             } else {
22851b248f14SClaudio Fontana                 ST0 = (floatx80_is_neg(ST0) ?
22861b248f14SClaudio Fontana                        floatx80_chs(floatx80_infinity) :
22871b248f14SClaudio Fontana                        floatx80_infinity);
22881b248f14SClaudio Fontana             }
22891b248f14SClaudio Fontana         }
22901b248f14SClaudio Fontana     } else {
22911b248f14SClaudio Fontana         int n;
22928da5f1dbSRichard Henderson         FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
22931b248f14SClaudio Fontana         uint8_t save_flags = get_float_exception_flags(&env->fp_status);
22941b248f14SClaudio Fontana         set_float_exception_flags(0, &env->fp_status);
22951b248f14SClaudio Fontana         n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
22961b248f14SClaudio Fontana         set_float_exception_flags(save_flags, &env->fp_status);
22978da5f1dbSRichard Henderson         env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
22981b248f14SClaudio Fontana         ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
22991b248f14SClaudio Fontana         env->fp_status.floatx80_rounding_precision = save;
23001b248f14SClaudio Fontana     }
23011b248f14SClaudio Fontana     merge_exception_flags(env, old_flags);
23021b248f14SClaudio Fontana }
23031b248f14SClaudio Fontana 
23041b248f14SClaudio Fontana void helper_fsin(CPUX86State *env)
23051b248f14SClaudio Fontana {
23061b248f14SClaudio Fontana     double fptemp = floatx80_to_double(env, ST0);
23071b248f14SClaudio Fontana 
23081b248f14SClaudio Fontana     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
23091b248f14SClaudio Fontana         env->fpus |= 0x400;
23101b248f14SClaudio Fontana     } else {
23111b248f14SClaudio Fontana         ST0 = double_to_floatx80(env, sin(fptemp));
23121b248f14SClaudio Fontana         env->fpus &= ~0x400;  /* C2 <-- 0 */
23131b248f14SClaudio Fontana         /* the above code is for |arg| < 2**53 only */
23141b248f14SClaudio Fontana     }
23151b248f14SClaudio Fontana }
23161b248f14SClaudio Fontana 
23171b248f14SClaudio Fontana void helper_fcos(CPUX86State *env)
23181b248f14SClaudio Fontana {
23191b248f14SClaudio Fontana     double fptemp = floatx80_to_double(env, ST0);
23201b248f14SClaudio Fontana 
23211b248f14SClaudio Fontana     if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
23221b248f14SClaudio Fontana         env->fpus |= 0x400;
23231b248f14SClaudio Fontana     } else {
23241b248f14SClaudio Fontana         ST0 = double_to_floatx80(env, cos(fptemp));
23251b248f14SClaudio Fontana         env->fpus &= ~0x400;  /* C2 <-- 0 */
23261b248f14SClaudio Fontana         /* the above code is for |arg| < 2**63 only */
23271b248f14SClaudio Fontana     }
23281b248f14SClaudio Fontana }
23291b248f14SClaudio Fontana 
23301b248f14SClaudio Fontana void helper_fxam_ST0(CPUX86State *env)
23311b248f14SClaudio Fontana {
23321b248f14SClaudio Fontana     CPU_LDoubleU temp;
23331b248f14SClaudio Fontana     int expdif;
23341b248f14SClaudio Fontana 
23351b248f14SClaudio Fontana     temp.d = ST0;
23361b248f14SClaudio Fontana 
23371b248f14SClaudio Fontana     env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
23381b248f14SClaudio Fontana     if (SIGND(temp)) {
23391b248f14SClaudio Fontana         env->fpus |= 0x200; /* C1 <-- 1 */
23401b248f14SClaudio Fontana     }
23411b248f14SClaudio Fontana 
23421b248f14SClaudio Fontana     if (env->fptags[env->fpstt]) {
23431b248f14SClaudio Fontana         env->fpus |= 0x4100; /* Empty */
23441b248f14SClaudio Fontana         return;
23451b248f14SClaudio Fontana     }
23461b248f14SClaudio Fontana 
23471b248f14SClaudio Fontana     expdif = EXPD(temp);
23481b248f14SClaudio Fontana     if (expdif == MAXEXPD) {
23491b248f14SClaudio Fontana         if (MANTD(temp) == 0x8000000000000000ULL) {
23501b248f14SClaudio Fontana             env->fpus |= 0x500; /* Infinity */
23511b248f14SClaudio Fontana         } else if (MANTD(temp) & 0x8000000000000000ULL) {
23521b248f14SClaudio Fontana             env->fpus |= 0x100; /* NaN */
23531b248f14SClaudio Fontana         }
23541b248f14SClaudio Fontana     } else if (expdif == 0) {
23551b248f14SClaudio Fontana         if (MANTD(temp) == 0) {
23561b248f14SClaudio Fontana             env->fpus |=  0x4000; /* Zero */
23571b248f14SClaudio Fontana         } else {
23581b248f14SClaudio Fontana             env->fpus |= 0x4400; /* Denormal */
23591b248f14SClaudio Fontana         }
23601b248f14SClaudio Fontana     } else if (MANTD(temp) & 0x8000000000000000ULL) {
23611b248f14SClaudio Fontana         env->fpus |= 0x400;
23621b248f14SClaudio Fontana     }
23631b248f14SClaudio Fontana }
23641b248f14SClaudio Fontana 
23651b248f14SClaudio Fontana static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
23661b248f14SClaudio Fontana                       uintptr_t retaddr)
23671b248f14SClaudio Fontana {
23681b248f14SClaudio Fontana     int fpus, fptag, exp, i;
23691b248f14SClaudio Fontana     uint64_t mant;
23701b248f14SClaudio Fontana     CPU_LDoubleU tmp;
23711b248f14SClaudio Fontana 
23721b248f14SClaudio Fontana     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
23731b248f14SClaudio Fontana     fptag = 0;
23741b248f14SClaudio Fontana     for (i = 7; i >= 0; i--) {
23751b248f14SClaudio Fontana         fptag <<= 2;
23761b248f14SClaudio Fontana         if (env->fptags[i]) {
23771b248f14SClaudio Fontana             fptag |= 3;
23781b248f14SClaudio Fontana         } else {
23791b248f14SClaudio Fontana             tmp.d = env->fpregs[i].d;
23801b248f14SClaudio Fontana             exp = EXPD(tmp);
23811b248f14SClaudio Fontana             mant = MANTD(tmp);
23821b248f14SClaudio Fontana             if (exp == 0 && mant == 0) {
23831b248f14SClaudio Fontana                 /* zero */
23841b248f14SClaudio Fontana                 fptag |= 1;
23851b248f14SClaudio Fontana             } else if (exp == 0 || exp == MAXEXPD
23861b248f14SClaudio Fontana                        || (mant & (1LL << 63)) == 0) {
23871b248f14SClaudio Fontana                 /* NaNs, infinity, denormal */
23881b248f14SClaudio Fontana                 fptag |= 2;
23891b248f14SClaudio Fontana             }
23901b248f14SClaudio Fontana         }
23911b248f14SClaudio Fontana     }
23921b248f14SClaudio Fontana     if (data32) {
23931b248f14SClaudio Fontana         /* 32 bit */
23941b248f14SClaudio Fontana         cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
23951b248f14SClaudio Fontana         cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
23961b248f14SClaudio Fontana         cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
239784abdd7dSZiqiao Kong         cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */
239884abdd7dSZiqiao Kong         cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */
239984abdd7dSZiqiao Kong         cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */
240084abdd7dSZiqiao Kong         cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */
24011b248f14SClaudio Fontana     } else {
24021b248f14SClaudio Fontana         /* 16 bit */
24031b248f14SClaudio Fontana         cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
24041b248f14SClaudio Fontana         cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
24051b248f14SClaudio Fontana         cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
240684abdd7dSZiqiao Kong         cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr);
240784abdd7dSZiqiao Kong         cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr);
240884abdd7dSZiqiao Kong         cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr);
240984abdd7dSZiqiao Kong         cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr);
24101b248f14SClaudio Fontana     }
24111b248f14SClaudio Fontana }
24121b248f14SClaudio Fontana 
24131b248f14SClaudio Fontana void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
24141b248f14SClaudio Fontana {
24151b248f14SClaudio Fontana     do_fstenv(env, ptr, data32, GETPC());
24161b248f14SClaudio Fontana }
24171b248f14SClaudio Fontana 
24181b248f14SClaudio Fontana static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
24191b248f14SClaudio Fontana {
24201b248f14SClaudio Fontana     env->fpstt = (fpus >> 11) & 7;
24211b248f14SClaudio Fontana     env->fpus = fpus & ~0x3800 & ~FPUS_B;
24221b248f14SClaudio Fontana     env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
24231b248f14SClaudio Fontana #if !defined(CONFIG_USER_ONLY)
24241b248f14SClaudio Fontana     if (!(env->fpus & FPUS_SE)) {
24251b248f14SClaudio Fontana         /*
24261b248f14SClaudio Fontana          * Here the processor deasserts FERR#; in response, the chipset deasserts
24271b248f14SClaudio Fontana          * IGNNE#.
24281b248f14SClaudio Fontana          */
24291b248f14SClaudio Fontana         cpu_clear_ignne();
24301b248f14SClaudio Fontana     }
24311b248f14SClaudio Fontana #endif
24321b248f14SClaudio Fontana }
24331b248f14SClaudio Fontana 
24341b248f14SClaudio Fontana static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
24351b248f14SClaudio Fontana                       uintptr_t retaddr)
24361b248f14SClaudio Fontana {
24371b248f14SClaudio Fontana     int i, fpus, fptag;
24381b248f14SClaudio Fontana 
24391b248f14SClaudio Fontana     if (data32) {
24401b248f14SClaudio Fontana         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
24411b248f14SClaudio Fontana         fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
24421b248f14SClaudio Fontana         fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
24431b248f14SClaudio Fontana     } else {
24441b248f14SClaudio Fontana         cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
24451b248f14SClaudio Fontana         fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
24461b248f14SClaudio Fontana         fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
24471b248f14SClaudio Fontana     }
24481b248f14SClaudio Fontana     cpu_set_fpus(env, fpus);
24491b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
24501b248f14SClaudio Fontana         env->fptags[i] = ((fptag & 3) == 3);
24511b248f14SClaudio Fontana         fptag >>= 2;
24521b248f14SClaudio Fontana     }
24531b248f14SClaudio Fontana }
24541b248f14SClaudio Fontana 
24551b248f14SClaudio Fontana void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
24561b248f14SClaudio Fontana {
24571b248f14SClaudio Fontana     do_fldenv(env, ptr, data32, GETPC());
24581b248f14SClaudio Fontana }
24591b248f14SClaudio Fontana 
24600ac2b197SRichard Henderson static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
24610ac2b197SRichard Henderson                      uintptr_t retaddr)
24621b248f14SClaudio Fontana {
24631b248f14SClaudio Fontana     floatx80 tmp;
24641b248f14SClaudio Fontana     int i;
24651b248f14SClaudio Fontana 
24660ac2b197SRichard Henderson     do_fstenv(env, ptr, data32, retaddr);
24671b248f14SClaudio Fontana 
24680cbc1359SRichard Henderson     ptr += (target_ulong)14 << data32;
24691b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
24701b248f14SClaudio Fontana         tmp = ST(i);
24710ac2b197SRichard Henderson         do_fstt(env, tmp, ptr, retaddr);
24721b248f14SClaudio Fontana         ptr += 10;
24731b248f14SClaudio Fontana     }
24741b248f14SClaudio Fontana 
2475bbdda9b7SRichard Henderson     do_fninit(env);
24761b248f14SClaudio Fontana }
24771b248f14SClaudio Fontana 
24780ac2b197SRichard Henderson void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
24790ac2b197SRichard Henderson {
24800ac2b197SRichard Henderson     do_fsave(env, ptr, data32, GETPC());
24810ac2b197SRichard Henderson }
24820ac2b197SRichard Henderson 
24830ac2b197SRichard Henderson static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
24840ac2b197SRichard Henderson                       uintptr_t retaddr)
24851b248f14SClaudio Fontana {
24861b248f14SClaudio Fontana     floatx80 tmp;
24871b248f14SClaudio Fontana     int i;
24881b248f14SClaudio Fontana 
24890ac2b197SRichard Henderson     do_fldenv(env, ptr, data32, retaddr);
24900cbc1359SRichard Henderson     ptr += (target_ulong)14 << data32;
24911b248f14SClaudio Fontana 
24921b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
24930ac2b197SRichard Henderson         tmp = do_fldt(env, ptr, retaddr);
24941b248f14SClaudio Fontana         ST(i) = tmp;
24951b248f14SClaudio Fontana         ptr += 10;
24961b248f14SClaudio Fontana     }
24971b248f14SClaudio Fontana }
24981b248f14SClaudio Fontana 
24990ac2b197SRichard Henderson void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
25000ac2b197SRichard Henderson {
25010ac2b197SRichard Henderson     do_frstor(env, ptr, data32, GETPC());
25020ac2b197SRichard Henderson }
25030ac2b197SRichard Henderson 
25041b248f14SClaudio Fontana #define XO(X)  offsetof(X86XSaveArea, X)
25051b248f14SClaudio Fontana 
25061b248f14SClaudio Fontana static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
25071b248f14SClaudio Fontana {
25081b248f14SClaudio Fontana     int fpus, fptag, i;
25091b248f14SClaudio Fontana     target_ulong addr;
25101b248f14SClaudio Fontana 
25111b248f14SClaudio Fontana     fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
25121b248f14SClaudio Fontana     fptag = 0;
25131b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
25141b248f14SClaudio Fontana         fptag |= (env->fptags[i] << i);
25151b248f14SClaudio Fontana     }
25161b248f14SClaudio Fontana 
25171b248f14SClaudio Fontana     cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
25181b248f14SClaudio Fontana     cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
25191b248f14SClaudio Fontana     cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
25201b248f14SClaudio Fontana 
25211b248f14SClaudio Fontana     /* In 32-bit mode this is eip, sel, dp, sel.
25221b248f14SClaudio Fontana        In 64-bit mode this is rip, rdp.
25231b248f14SClaudio Fontana        But in either case we don't write actual data, just zeros.  */
25241b248f14SClaudio Fontana     cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
25251b248f14SClaudio Fontana     cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
25261b248f14SClaudio Fontana 
25271b248f14SClaudio Fontana     addr = ptr + XO(legacy.fpregs);
25281b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
25291b248f14SClaudio Fontana         floatx80 tmp = ST(i);
2530e3a69234SRichard Henderson         do_fstt(env, tmp, addr, ra);
25311b248f14SClaudio Fontana         addr += 16;
25321b248f14SClaudio Fontana     }
25331b248f14SClaudio Fontana }
25341b248f14SClaudio Fontana 
25351b248f14SClaudio Fontana static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
25361b248f14SClaudio Fontana {
25371b248f14SClaudio Fontana     update_mxcsr_from_sse_status(env);
25381b248f14SClaudio Fontana     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
25391b248f14SClaudio Fontana     cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
25401b248f14SClaudio Fontana }
25411b248f14SClaudio Fontana 
25421b248f14SClaudio Fontana static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
25431b248f14SClaudio Fontana {
25441b248f14SClaudio Fontana     int i, nb_xmm_regs;
25451b248f14SClaudio Fontana     target_ulong addr;
25461b248f14SClaudio Fontana 
25471b248f14SClaudio Fontana     if (env->hflags & HF_CS64_MASK) {
25481b248f14SClaudio Fontana         nb_xmm_regs = 16;
25491b248f14SClaudio Fontana     } else {
25501b248f14SClaudio Fontana         nb_xmm_regs = 8;
25511b248f14SClaudio Fontana     }
25521b248f14SClaudio Fontana 
25531b248f14SClaudio Fontana     addr = ptr + XO(legacy.xmm_regs);
25541b248f14SClaudio Fontana     for (i = 0; i < nb_xmm_regs; i++) {
25551b248f14SClaudio Fontana         cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
25561b248f14SClaudio Fontana         cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
25571b248f14SClaudio Fontana         addr += 16;
25581b248f14SClaudio Fontana     }
25591b248f14SClaudio Fontana }
25601b248f14SClaudio Fontana 
256189254431SPaolo Bonzini static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
256289254431SPaolo Bonzini {
256389254431SPaolo Bonzini     int i, nb_xmm_regs;
256489254431SPaolo Bonzini 
256589254431SPaolo Bonzini     if (env->hflags & HF_CS64_MASK) {
256689254431SPaolo Bonzini         nb_xmm_regs = 16;
256789254431SPaolo Bonzini     } else {
256889254431SPaolo Bonzini         nb_xmm_regs = 8;
256989254431SPaolo Bonzini     }
257089254431SPaolo Bonzini 
257189254431SPaolo Bonzini     for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
257289254431SPaolo Bonzini         cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra);
257389254431SPaolo Bonzini         cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra);
257489254431SPaolo Bonzini     }
257589254431SPaolo Bonzini }
257689254431SPaolo Bonzini 
25771b248f14SClaudio Fontana static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
25781b248f14SClaudio Fontana {
25791b248f14SClaudio Fontana     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
25801b248f14SClaudio Fontana     int i;
25811b248f14SClaudio Fontana 
25821b248f14SClaudio Fontana     for (i = 0; i < 4; i++, addr += 16) {
25831b248f14SClaudio Fontana         cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
25841b248f14SClaudio Fontana         cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
25851b248f14SClaudio Fontana     }
25861b248f14SClaudio Fontana }
25871b248f14SClaudio Fontana 
25881b248f14SClaudio Fontana static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
25891b248f14SClaudio Fontana {
25901b248f14SClaudio Fontana     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
25911b248f14SClaudio Fontana                     env->bndcs_regs.cfgu, ra);
25921b248f14SClaudio Fontana     cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
25931b248f14SClaudio Fontana                     env->bndcs_regs.sts, ra);
25941b248f14SClaudio Fontana }
25951b248f14SClaudio Fontana 
25961b248f14SClaudio Fontana static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
25971b248f14SClaudio Fontana {
25981b248f14SClaudio Fontana     cpu_stq_data_ra(env, ptr, env->pkru, ra);
25991b248f14SClaudio Fontana }
26001b248f14SClaudio Fontana 
26010ac2b197SRichard Henderson static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra)
26021b248f14SClaudio Fontana {
26031b248f14SClaudio Fontana     /* The operand must be 16 byte aligned */
26041b248f14SClaudio Fontana     if (ptr & 0xf) {
26051b248f14SClaudio Fontana         raise_exception_ra(env, EXCP0D_GPF, ra);
26061b248f14SClaudio Fontana     }
26071b248f14SClaudio Fontana 
26081b248f14SClaudio Fontana     do_xsave_fpu(env, ptr, ra);
26091b248f14SClaudio Fontana 
26101b248f14SClaudio Fontana     if (env->cr[4] & CR4_OSFXSR_MASK) {
26111b248f14SClaudio Fontana         do_xsave_mxcsr(env, ptr, ra);
26121b248f14SClaudio Fontana         /* Fast FXSAVE leaves out the XMM registers */
26131b248f14SClaudio Fontana         if (!(env->efer & MSR_EFER_FFXSR)
26141b248f14SClaudio Fontana             || (env->hflags & HF_CPL_MASK)
26151b248f14SClaudio Fontana             || !(env->hflags & HF_LMA_MASK)) {
26161b248f14SClaudio Fontana             do_xsave_sse(env, ptr, ra);
26171b248f14SClaudio Fontana         }
26181b248f14SClaudio Fontana     }
26191b248f14SClaudio Fontana }
26201b248f14SClaudio Fontana 
26210ac2b197SRichard Henderson void helper_fxsave(CPUX86State *env, target_ulong ptr)
26220ac2b197SRichard Henderson {
26230ac2b197SRichard Henderson     do_fxsave(env, ptr, GETPC());
26240ac2b197SRichard Henderson }
26250ac2b197SRichard Henderson 
26261b248f14SClaudio Fontana static uint64_t get_xinuse(CPUX86State *env)
26271b248f14SClaudio Fontana {
26281b248f14SClaudio Fontana     uint64_t inuse = -1;
26291b248f14SClaudio Fontana 
26301b248f14SClaudio Fontana     /* For the most part, we don't track XINUSE.  We could calculate it
26311b248f14SClaudio Fontana        here for all components, but it's probably less work to simply
26321b248f14SClaudio Fontana        indicate in use.  That said, the state of BNDREGS is important
26331b248f14SClaudio Fontana        enough to track in HFLAGS, so we might as well use that here.  */
26341b248f14SClaudio Fontana     if ((env->hflags & HF_MPX_IU_MASK) == 0) {
26351b248f14SClaudio Fontana        inuse &= ~XSTATE_BNDREGS_MASK;
26361b248f14SClaudio Fontana     }
26371b248f14SClaudio Fontana     return inuse;
26381b248f14SClaudio Fontana }
26391b248f14SClaudio Fontana 
26401b248f14SClaudio Fontana static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
26411b248f14SClaudio Fontana                      uint64_t inuse, uint64_t opt, uintptr_t ra)
26421b248f14SClaudio Fontana {
26431b248f14SClaudio Fontana     uint64_t old_bv, new_bv;
26441b248f14SClaudio Fontana 
26451b248f14SClaudio Fontana     /* The OS must have enabled XSAVE.  */
26461b248f14SClaudio Fontana     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
26471b248f14SClaudio Fontana         raise_exception_ra(env, EXCP06_ILLOP, ra);
26481b248f14SClaudio Fontana     }
26491b248f14SClaudio Fontana 
26501b248f14SClaudio Fontana     /* The operand must be 64 byte aligned.  */
26511b248f14SClaudio Fontana     if (ptr & 63) {
26521b248f14SClaudio Fontana         raise_exception_ra(env, EXCP0D_GPF, ra);
26531b248f14SClaudio Fontana     }
26541b248f14SClaudio Fontana 
26551b248f14SClaudio Fontana     /* Never save anything not enabled by XCR0.  */
26561b248f14SClaudio Fontana     rfbm &= env->xcr0;
26571b248f14SClaudio Fontana     opt &= rfbm;
26581b248f14SClaudio Fontana 
26591b248f14SClaudio Fontana     if (opt & XSTATE_FP_MASK) {
26601b248f14SClaudio Fontana         do_xsave_fpu(env, ptr, ra);
26611b248f14SClaudio Fontana     }
26621b248f14SClaudio Fontana     if (rfbm & XSTATE_SSE_MASK) {
26631b248f14SClaudio Fontana         /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
26641b248f14SClaudio Fontana         do_xsave_mxcsr(env, ptr, ra);
26651b248f14SClaudio Fontana     }
26661b248f14SClaudio Fontana     if (opt & XSTATE_SSE_MASK) {
26671b248f14SClaudio Fontana         do_xsave_sse(env, ptr, ra);
26681b248f14SClaudio Fontana     }
266989254431SPaolo Bonzini     if (opt & XSTATE_YMM_MASK) {
267089254431SPaolo Bonzini         do_xsave_ymmh(env, ptr + XO(avx_state), ra);
267189254431SPaolo Bonzini     }
26721b248f14SClaudio Fontana     if (opt & XSTATE_BNDREGS_MASK) {
26731b248f14SClaudio Fontana         do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
26741b248f14SClaudio Fontana     }
26751b248f14SClaudio Fontana     if (opt & XSTATE_BNDCSR_MASK) {
26761b248f14SClaudio Fontana         do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
26771b248f14SClaudio Fontana     }
26781b248f14SClaudio Fontana     if (opt & XSTATE_PKRU_MASK) {
26791b248f14SClaudio Fontana         do_xsave_pkru(env, ptr + XO(pkru_state), ra);
26801b248f14SClaudio Fontana     }
26811b248f14SClaudio Fontana 
26821b248f14SClaudio Fontana     /* Update the XSTATE_BV field.  */
26831b248f14SClaudio Fontana     old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
26841b248f14SClaudio Fontana     new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
26851b248f14SClaudio Fontana     cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
26861b248f14SClaudio Fontana }
26871b248f14SClaudio Fontana 
26881b248f14SClaudio Fontana void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
26891b248f14SClaudio Fontana {
26901b248f14SClaudio Fontana     do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
26911b248f14SClaudio Fontana }
26921b248f14SClaudio Fontana 
26931b248f14SClaudio Fontana void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
26941b248f14SClaudio Fontana {
26951b248f14SClaudio Fontana     uint64_t inuse = get_xinuse(env);
26961b248f14SClaudio Fontana     do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
26971b248f14SClaudio Fontana }
26981b248f14SClaudio Fontana 
26991b248f14SClaudio Fontana static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
27001b248f14SClaudio Fontana {
27011b248f14SClaudio Fontana     int i, fpuc, fpus, fptag;
27021b248f14SClaudio Fontana     target_ulong addr;
27031b248f14SClaudio Fontana 
27041b248f14SClaudio Fontana     fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
27051b248f14SClaudio Fontana     fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
27061b248f14SClaudio Fontana     fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
27071b248f14SClaudio Fontana     cpu_set_fpuc(env, fpuc);
27081b248f14SClaudio Fontana     cpu_set_fpus(env, fpus);
27091b248f14SClaudio Fontana     fptag ^= 0xff;
27101b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
27111b248f14SClaudio Fontana         env->fptags[i] = ((fptag >> i) & 1);
27121b248f14SClaudio Fontana     }
27131b248f14SClaudio Fontana 
27141b248f14SClaudio Fontana     addr = ptr + XO(legacy.fpregs);
27151b248f14SClaudio Fontana     for (i = 0; i < 8; i++) {
2716e3a69234SRichard Henderson         floatx80 tmp = do_fldt(env, addr, ra);
27171b248f14SClaudio Fontana         ST(i) = tmp;
27181b248f14SClaudio Fontana         addr += 16;
27191b248f14SClaudio Fontana     }
27201b248f14SClaudio Fontana }
27211b248f14SClaudio Fontana 
27221b248f14SClaudio Fontana static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
27231b248f14SClaudio Fontana {
27241b248f14SClaudio Fontana     cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
27251b248f14SClaudio Fontana }
27261b248f14SClaudio Fontana 
27271b248f14SClaudio Fontana static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
27281b248f14SClaudio Fontana {
27291b248f14SClaudio Fontana     int i, nb_xmm_regs;
27301b248f14SClaudio Fontana     target_ulong addr;
27311b248f14SClaudio Fontana 
27321b248f14SClaudio Fontana     if (env->hflags & HF_CS64_MASK) {
27331b248f14SClaudio Fontana         nb_xmm_regs = 16;
27341b248f14SClaudio Fontana     } else {
27351b248f14SClaudio Fontana         nb_xmm_regs = 8;
27361b248f14SClaudio Fontana     }
27371b248f14SClaudio Fontana 
27381b248f14SClaudio Fontana     addr = ptr + XO(legacy.xmm_regs);
27391b248f14SClaudio Fontana     for (i = 0; i < nb_xmm_regs; i++) {
27401b248f14SClaudio Fontana         env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
27411b248f14SClaudio Fontana         env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
27421b248f14SClaudio Fontana         addr += 16;
27431b248f14SClaudio Fontana     }
27441b248f14SClaudio Fontana }
27451b248f14SClaudio Fontana 
274689254431SPaolo Bonzini static void do_clear_sse(CPUX86State *env)
274789254431SPaolo Bonzini {
274889254431SPaolo Bonzini     int i, nb_xmm_regs;
274989254431SPaolo Bonzini 
275089254431SPaolo Bonzini     if (env->hflags & HF_CS64_MASK) {
275189254431SPaolo Bonzini         nb_xmm_regs = 16;
275289254431SPaolo Bonzini     } else {
275389254431SPaolo Bonzini         nb_xmm_regs = 8;
275489254431SPaolo Bonzini     }
275589254431SPaolo Bonzini 
275689254431SPaolo Bonzini     for (i = 0; i < nb_xmm_regs; i++) {
275789254431SPaolo Bonzini         env->xmm_regs[i].ZMM_Q(0) = 0;
275889254431SPaolo Bonzini         env->xmm_regs[i].ZMM_Q(1) = 0;
275989254431SPaolo Bonzini     }
276089254431SPaolo Bonzini }
276189254431SPaolo Bonzini 
276289254431SPaolo Bonzini static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
276389254431SPaolo Bonzini {
276489254431SPaolo Bonzini     int i, nb_xmm_regs;
276589254431SPaolo Bonzini 
276689254431SPaolo Bonzini     if (env->hflags & HF_CS64_MASK) {
276789254431SPaolo Bonzini         nb_xmm_regs = 16;
276889254431SPaolo Bonzini     } else {
276989254431SPaolo Bonzini         nb_xmm_regs = 8;
277089254431SPaolo Bonzini     }
277189254431SPaolo Bonzini 
277289254431SPaolo Bonzini     for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
277389254431SPaolo Bonzini         env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra);
277489254431SPaolo Bonzini         env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra);
277589254431SPaolo Bonzini     }
277689254431SPaolo Bonzini }
277789254431SPaolo Bonzini 
277889254431SPaolo Bonzini static void do_clear_ymmh(CPUX86State *env)
277989254431SPaolo Bonzini {
278089254431SPaolo Bonzini     int i, nb_xmm_regs;
278189254431SPaolo Bonzini 
278289254431SPaolo Bonzini     if (env->hflags & HF_CS64_MASK) {
278389254431SPaolo Bonzini         nb_xmm_regs = 16;
278489254431SPaolo Bonzini     } else {
278589254431SPaolo Bonzini         nb_xmm_regs = 8;
278689254431SPaolo Bonzini     }
278789254431SPaolo Bonzini 
278889254431SPaolo Bonzini     for (i = 0; i < nb_xmm_regs; i++) {
278989254431SPaolo Bonzini         env->xmm_regs[i].ZMM_Q(2) = 0;
279089254431SPaolo Bonzini         env->xmm_regs[i].ZMM_Q(3) = 0;
279189254431SPaolo Bonzini     }
279289254431SPaolo Bonzini }
279389254431SPaolo Bonzini 
27941b248f14SClaudio Fontana static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
27951b248f14SClaudio Fontana {
27961b248f14SClaudio Fontana     target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
27971b248f14SClaudio Fontana     int i;
27981b248f14SClaudio Fontana 
27991b248f14SClaudio Fontana     for (i = 0; i < 4; i++, addr += 16) {
28001b248f14SClaudio Fontana         env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
28011b248f14SClaudio Fontana         env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
28021b248f14SClaudio Fontana     }
28031b248f14SClaudio Fontana }
28041b248f14SClaudio Fontana 
28051b248f14SClaudio Fontana static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
28061b248f14SClaudio Fontana {
28071b248f14SClaudio Fontana     /* FIXME: Extend highest implemented bit of linear address.  */
28081b248f14SClaudio Fontana     env->bndcs_regs.cfgu
28091b248f14SClaudio Fontana         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
28101b248f14SClaudio Fontana     env->bndcs_regs.sts
28111b248f14SClaudio Fontana         = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
28121b248f14SClaudio Fontana }
28131b248f14SClaudio Fontana 
28141b248f14SClaudio Fontana static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
28151b248f14SClaudio Fontana {
28161b248f14SClaudio Fontana     env->pkru = cpu_ldq_data_ra(env, ptr, ra);
28171b248f14SClaudio Fontana }
28181b248f14SClaudio Fontana 
28190ac2b197SRichard Henderson static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra)
28201b248f14SClaudio Fontana {
28211b248f14SClaudio Fontana     /* The operand must be 16 byte aligned */
28221b248f14SClaudio Fontana     if (ptr & 0xf) {
28231b248f14SClaudio Fontana         raise_exception_ra(env, EXCP0D_GPF, ra);
28241b248f14SClaudio Fontana     }
28251b248f14SClaudio Fontana 
28261b248f14SClaudio Fontana     do_xrstor_fpu(env, ptr, ra);
28271b248f14SClaudio Fontana 
28281b248f14SClaudio Fontana     if (env->cr[4] & CR4_OSFXSR_MASK) {
28291b248f14SClaudio Fontana         do_xrstor_mxcsr(env, ptr, ra);
28301b248f14SClaudio Fontana         /* Fast FXRSTOR leaves out the XMM registers */
28311b248f14SClaudio Fontana         if (!(env->efer & MSR_EFER_FFXSR)
28321b248f14SClaudio Fontana             || (env->hflags & HF_CPL_MASK)
28331b248f14SClaudio Fontana             || !(env->hflags & HF_LMA_MASK)) {
28341b248f14SClaudio Fontana             do_xrstor_sse(env, ptr, ra);
28351b248f14SClaudio Fontana         }
28361b248f14SClaudio Fontana     }
28371b248f14SClaudio Fontana }
28381b248f14SClaudio Fontana 
28390ac2b197SRichard Henderson void helper_fxrstor(CPUX86State *env, target_ulong ptr)
28400ac2b197SRichard Henderson {
28410ac2b197SRichard Henderson     do_fxrstor(env, ptr, GETPC());
28420ac2b197SRichard Henderson }
28430ac2b197SRichard Henderson 
28445d245678SPaolo Bonzini static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra)
28451b248f14SClaudio Fontana {
28461b248f14SClaudio Fontana     uint64_t xstate_bv, xcomp_bv, reserve0;
28471b248f14SClaudio Fontana 
28481b248f14SClaudio Fontana     rfbm &= env->xcr0;
28491b248f14SClaudio Fontana 
28501b248f14SClaudio Fontana     /* The OS must have enabled XSAVE.  */
28511b248f14SClaudio Fontana     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
28521b248f14SClaudio Fontana         raise_exception_ra(env, EXCP06_ILLOP, ra);
28531b248f14SClaudio Fontana     }
28541b248f14SClaudio Fontana 
28551b248f14SClaudio Fontana     /* The operand must be 64 byte aligned.  */
28561b248f14SClaudio Fontana     if (ptr & 63) {
28571b248f14SClaudio Fontana         raise_exception_ra(env, EXCP0D_GPF, ra);
28581b248f14SClaudio Fontana     }
28591b248f14SClaudio Fontana 
28601b248f14SClaudio Fontana     xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
28611b248f14SClaudio Fontana 
28621b248f14SClaudio Fontana     if ((int64_t)xstate_bv < 0) {
28631b248f14SClaudio Fontana         /* FIXME: Compact form.  */
28641b248f14SClaudio Fontana         raise_exception_ra(env, EXCP0D_GPF, ra);
28651b248f14SClaudio Fontana     }
28661b248f14SClaudio Fontana 
28671b248f14SClaudio Fontana     /* Standard form.  */
28681b248f14SClaudio Fontana 
28691b248f14SClaudio Fontana     /* The XSTATE_BV field must not set bits not present in XCR0.  */
28701b248f14SClaudio Fontana     if (xstate_bv & ~env->xcr0) {
28711b248f14SClaudio Fontana         raise_exception_ra(env, EXCP0D_GPF, ra);
28721b248f14SClaudio Fontana     }
28731b248f14SClaudio Fontana 
28741b248f14SClaudio Fontana     /* The XCOMP_BV field must be zero.  Note that, as of the April 2016
28751b248f14SClaudio Fontana        revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
28761b248f14SClaudio Fontana        describes only XCOMP_BV, but the description of the standard form
28771b248f14SClaudio Fontana        of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
28781b248f14SClaudio Fontana        includes the next 64-bit field.  */
28791b248f14SClaudio Fontana     xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
28801b248f14SClaudio Fontana     reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
28811b248f14SClaudio Fontana     if (xcomp_bv || reserve0) {
28821b248f14SClaudio Fontana         raise_exception_ra(env, EXCP0D_GPF, ra);
28831b248f14SClaudio Fontana     }
28841b248f14SClaudio Fontana 
28851b248f14SClaudio Fontana     if (rfbm & XSTATE_FP_MASK) {
28861b248f14SClaudio Fontana         if (xstate_bv & XSTATE_FP_MASK) {
28871b248f14SClaudio Fontana             do_xrstor_fpu(env, ptr, ra);
28881b248f14SClaudio Fontana         } else {
2889bbdda9b7SRichard Henderson             do_fninit(env);
28901b248f14SClaudio Fontana             memset(env->fpregs, 0, sizeof(env->fpregs));
28911b248f14SClaudio Fontana         }
28921b248f14SClaudio Fontana     }
28931b248f14SClaudio Fontana     if (rfbm & XSTATE_SSE_MASK) {
28941b248f14SClaudio Fontana         /* Note that the standard form of XRSTOR loads MXCSR from memory
28951b248f14SClaudio Fontana            whether or not the XSTATE_BV bit is set.  */
28961b248f14SClaudio Fontana         do_xrstor_mxcsr(env, ptr, ra);
28971b248f14SClaudio Fontana         if (xstate_bv & XSTATE_SSE_MASK) {
28981b248f14SClaudio Fontana             do_xrstor_sse(env, ptr, ra);
28991b248f14SClaudio Fontana         } else {
290089254431SPaolo Bonzini             do_clear_sse(env);
290189254431SPaolo Bonzini         }
290289254431SPaolo Bonzini     }
290389254431SPaolo Bonzini     if (rfbm & XSTATE_YMM_MASK) {
290489254431SPaolo Bonzini         if (xstate_bv & XSTATE_YMM_MASK) {
290589254431SPaolo Bonzini             do_xrstor_ymmh(env, ptr + XO(avx_state), ra);
290689254431SPaolo Bonzini         } else {
290789254431SPaolo Bonzini             do_clear_ymmh(env);
29081b248f14SClaudio Fontana         }
29091b248f14SClaudio Fontana     }
29101b248f14SClaudio Fontana     if (rfbm & XSTATE_BNDREGS_MASK) {
29111b248f14SClaudio Fontana         if (xstate_bv & XSTATE_BNDREGS_MASK) {
29121b248f14SClaudio Fontana             do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
29131b248f14SClaudio Fontana             env->hflags |= HF_MPX_IU_MASK;
29141b248f14SClaudio Fontana         } else {
29151b248f14SClaudio Fontana             memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
29161b248f14SClaudio Fontana             env->hflags &= ~HF_MPX_IU_MASK;
29171b248f14SClaudio Fontana         }
29181b248f14SClaudio Fontana     }
29191b248f14SClaudio Fontana     if (rfbm & XSTATE_BNDCSR_MASK) {
29201b248f14SClaudio Fontana         if (xstate_bv & XSTATE_BNDCSR_MASK) {
29211b248f14SClaudio Fontana             do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
29221b248f14SClaudio Fontana         } else {
29231b248f14SClaudio Fontana             memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
29241b248f14SClaudio Fontana         }
29251b248f14SClaudio Fontana         cpu_sync_bndcs_hflags(env);
29261b248f14SClaudio Fontana     }
29271b248f14SClaudio Fontana     if (rfbm & XSTATE_PKRU_MASK) {
29281b248f14SClaudio Fontana         uint64_t old_pkru = env->pkru;
29291b248f14SClaudio Fontana         if (xstate_bv & XSTATE_PKRU_MASK) {
29301b248f14SClaudio Fontana             do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
29311b248f14SClaudio Fontana         } else {
29321b248f14SClaudio Fontana             env->pkru = 0;
29331b248f14SClaudio Fontana         }
29341b248f14SClaudio Fontana         if (env->pkru != old_pkru) {
29351b248f14SClaudio Fontana             CPUState *cs = env_cpu(env);
29361b248f14SClaudio Fontana             tlb_flush(cs);
29371b248f14SClaudio Fontana         }
29381b248f14SClaudio Fontana     }
29391b248f14SClaudio Fontana }
29401b248f14SClaudio Fontana 
29411b248f14SClaudio Fontana #undef XO
29421b248f14SClaudio Fontana 
29435d245678SPaolo Bonzini void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
29445d245678SPaolo Bonzini {
29455d245678SPaolo Bonzini     do_xrstor(env, ptr, rfbm, GETPC());
29465d245678SPaolo Bonzini }
29475d245678SPaolo Bonzini 
29485d245678SPaolo Bonzini #if defined(CONFIG_USER_ONLY)
29495d245678SPaolo Bonzini void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
29505d245678SPaolo Bonzini {
29515d245678SPaolo Bonzini     do_fsave(env, ptr, data32, 0);
29525d245678SPaolo Bonzini }
29535d245678SPaolo Bonzini 
29545d245678SPaolo Bonzini void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
29555d245678SPaolo Bonzini {
29565d245678SPaolo Bonzini     do_frstor(env, ptr, data32, 0);
29575d245678SPaolo Bonzini }
29585d245678SPaolo Bonzini 
29595d245678SPaolo Bonzini void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
29605d245678SPaolo Bonzini {
29615d245678SPaolo Bonzini     do_fxsave(env, ptr, 0);
29625d245678SPaolo Bonzini }
29635d245678SPaolo Bonzini 
29645d245678SPaolo Bonzini void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
29655d245678SPaolo Bonzini {
29665d245678SPaolo Bonzini     do_fxrstor(env, ptr, 0);
29675d245678SPaolo Bonzini }
29685d245678SPaolo Bonzini 
29695d245678SPaolo Bonzini void cpu_x86_xsave(CPUX86State *env, target_ulong ptr)
29705d245678SPaolo Bonzini {
29715d245678SPaolo Bonzini     do_xsave(env, ptr, -1, get_xinuse(env), -1, 0);
29725d245678SPaolo Bonzini }
29735d245678SPaolo Bonzini 
29745d245678SPaolo Bonzini void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr)
29755d245678SPaolo Bonzini {
29765d245678SPaolo Bonzini     do_xrstor(env, ptr, -1, 0);
29775d245678SPaolo Bonzini }
29785d245678SPaolo Bonzini #endif
29795d245678SPaolo Bonzini 
29801b248f14SClaudio Fontana uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
29811b248f14SClaudio Fontana {
29821b248f14SClaudio Fontana     /* The OS must have enabled XSAVE.  */
29831b248f14SClaudio Fontana     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
29841b248f14SClaudio Fontana         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
29851b248f14SClaudio Fontana     }
29861b248f14SClaudio Fontana 
29871b248f14SClaudio Fontana     switch (ecx) {
29881b248f14SClaudio Fontana     case 0:
29891b248f14SClaudio Fontana         return env->xcr0;
29901b248f14SClaudio Fontana     case 1:
29911b248f14SClaudio Fontana         if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
29921b248f14SClaudio Fontana             return env->xcr0 & get_xinuse(env);
29931b248f14SClaudio Fontana         }
29941b248f14SClaudio Fontana         break;
29951b248f14SClaudio Fontana     }
29961b248f14SClaudio Fontana     raise_exception_ra(env, EXCP0D_GPF, GETPC());
29971b248f14SClaudio Fontana }
29981b248f14SClaudio Fontana 
29991b248f14SClaudio Fontana void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
30001b248f14SClaudio Fontana {
30011b248f14SClaudio Fontana     uint32_t dummy, ena_lo, ena_hi;
30021b248f14SClaudio Fontana     uint64_t ena;
30031b248f14SClaudio Fontana 
30041b248f14SClaudio Fontana     /* The OS must have enabled XSAVE.  */
30051b248f14SClaudio Fontana     if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
30061b248f14SClaudio Fontana         raise_exception_ra(env, EXCP06_ILLOP, GETPC());
30071b248f14SClaudio Fontana     }
30081b248f14SClaudio Fontana 
30091b248f14SClaudio Fontana     /* Only XCR0 is defined at present; the FPU may not be disabled.  */
30101b248f14SClaudio Fontana     if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
30111b248f14SClaudio Fontana         goto do_gpf;
30121b248f14SClaudio Fontana     }
30131b248f14SClaudio Fontana 
30141b248f14SClaudio Fontana     /* Disallow enabling unimplemented features.  */
30151b248f14SClaudio Fontana     cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
30161b248f14SClaudio Fontana     ena = ((uint64_t)ena_hi << 32) | ena_lo;
30171b248f14SClaudio Fontana     if (mask & ~ena) {
30181b248f14SClaudio Fontana         goto do_gpf;
30191b248f14SClaudio Fontana     }
30201b248f14SClaudio Fontana 
30211b248f14SClaudio Fontana     /* Disallow enabling only half of MPX.  */
30221b248f14SClaudio Fontana     if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
30231b248f14SClaudio Fontana         & XSTATE_BNDCSR_MASK) {
30241b248f14SClaudio Fontana         goto do_gpf;
30251b248f14SClaudio Fontana     }
30261b248f14SClaudio Fontana 
30271b248f14SClaudio Fontana     env->xcr0 = mask;
30281b248f14SClaudio Fontana     cpu_sync_bndcs_hflags(env);
3029608db8dbSPaul Brook     cpu_sync_avx_hflag(env);
30301b248f14SClaudio Fontana     return;
30311b248f14SClaudio Fontana 
30321b248f14SClaudio Fontana  do_gpf:
30331b248f14SClaudio Fontana     raise_exception_ra(env, EXCP0D_GPF, GETPC());
30341b248f14SClaudio Fontana }
30351b248f14SClaudio Fontana 
30361b248f14SClaudio Fontana /* MMX/SSE */
30371b248f14SClaudio Fontana /* XXX: optimize by storing fptt and fptags in the static cpu state */
30381b248f14SClaudio Fontana 
30391b248f14SClaudio Fontana #define SSE_DAZ             0x0040
3040*314d3effSPaolo Bonzini #define SSE_RC_SHIFT        13
3041*314d3effSPaolo Bonzini #define SSE_RC_MASK         (3 << SSE_RC_SHIFT)
30421b248f14SClaudio Fontana #define SSE_FZ              0x8000
30431b248f14SClaudio Fontana 
30441b248f14SClaudio Fontana void update_mxcsr_status(CPUX86State *env)
30451b248f14SClaudio Fontana {
30461b248f14SClaudio Fontana     uint32_t mxcsr = env->mxcsr;
30471b248f14SClaudio Fontana     int rnd_type;
30481b248f14SClaudio Fontana 
30491b248f14SClaudio Fontana     /* set rounding mode */
3050*314d3effSPaolo Bonzini     rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT;
3051*314d3effSPaolo Bonzini     set_x86_rounding_mode(rnd_type, &env->sse_status);
30521b248f14SClaudio Fontana 
30531b248f14SClaudio Fontana     /* Set exception flags.  */
30541b248f14SClaudio Fontana     set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
30551b248f14SClaudio Fontana                               (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
30561b248f14SClaudio Fontana                               (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
30571b248f14SClaudio Fontana                               (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
30581b248f14SClaudio Fontana                               (mxcsr & FPUS_PE ? float_flag_inexact : 0),
30591b248f14SClaudio Fontana                               &env->sse_status);
30601b248f14SClaudio Fontana 
30611b248f14SClaudio Fontana     /* set denormals are zero */
30621b248f14SClaudio Fontana     set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
30631b248f14SClaudio Fontana 
30641b248f14SClaudio Fontana     /* set flush to zero */
30651b248f14SClaudio Fontana     set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
30661b248f14SClaudio Fontana }
30671b248f14SClaudio Fontana 
30681b248f14SClaudio Fontana void update_mxcsr_from_sse_status(CPUX86State *env)
30691b248f14SClaudio Fontana {
30701b248f14SClaudio Fontana     uint8_t flags = get_float_exception_flags(&env->sse_status);
30711b248f14SClaudio Fontana     /*
30721b248f14SClaudio Fontana      * The MXCSR denormal flag has opposite semantics to
30731b248f14SClaudio Fontana      * float_flag_input_denormal (the softfloat code sets that flag
30741b248f14SClaudio Fontana      * only when flushing input denormals to zero, but SSE sets it
30751b248f14SClaudio Fontana      * only when not flushing them to zero), so is not converted
30761b248f14SClaudio Fontana      * here.
30771b248f14SClaudio Fontana      */
30781b248f14SClaudio Fontana     env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
30791b248f14SClaudio Fontana                    (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
30801b248f14SClaudio Fontana                    (flags & float_flag_overflow ? FPUS_OE : 0) |
30811b248f14SClaudio Fontana                    (flags & float_flag_underflow ? FPUS_UE : 0) |
30821b248f14SClaudio Fontana                    (flags & float_flag_inexact ? FPUS_PE : 0) |
30831b248f14SClaudio Fontana                    (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
30841b248f14SClaudio Fontana                     0));
30851b248f14SClaudio Fontana }
30861b248f14SClaudio Fontana 
30871b248f14SClaudio Fontana void helper_update_mxcsr(CPUX86State *env)
30881b248f14SClaudio Fontana {
30891b248f14SClaudio Fontana     update_mxcsr_from_sse_status(env);
30901b248f14SClaudio Fontana }
30911b248f14SClaudio Fontana 
30921b248f14SClaudio Fontana void helper_ldmxcsr(CPUX86State *env, uint32_t val)
30931b248f14SClaudio Fontana {
30941b248f14SClaudio Fontana     cpu_set_mxcsr(env, val);
30951b248f14SClaudio Fontana }
30961b248f14SClaudio Fontana 
30971b248f14SClaudio Fontana void helper_enter_mmx(CPUX86State *env)
30981b248f14SClaudio Fontana {
30991b248f14SClaudio Fontana     env->fpstt = 0;
31001b248f14SClaudio Fontana     *(uint32_t *)(env->fptags) = 0;
31011b248f14SClaudio Fontana     *(uint32_t *)(env->fptags + 4) = 0;
31021b248f14SClaudio Fontana }
31031b248f14SClaudio Fontana 
31041b248f14SClaudio Fontana void helper_emms(CPUX86State *env)
31051b248f14SClaudio Fontana {
31061b248f14SClaudio Fontana     /* set to empty state */
31071b248f14SClaudio Fontana     *(uint32_t *)(env->fptags) = 0x01010101;
31081b248f14SClaudio Fontana     *(uint32_t *)(env->fptags + 4) = 0x01010101;
31091b248f14SClaudio Fontana }
31101b248f14SClaudio Fontana 
31111b248f14SClaudio Fontana #define SHIFT 0
31121b248f14SClaudio Fontana #include "ops_sse.h"
31131b248f14SClaudio Fontana 
31141b248f14SClaudio Fontana #define SHIFT 1
31151b248f14SClaudio Fontana #include "ops_sse.h"
3116b98f886cSPaolo Bonzini 
3117b98f886cSPaolo Bonzini #define SHIFT 2
3118b98f886cSPaolo Bonzini #include "ops_sse.h"
3119