11b248f14SClaudio Fontana /*
21b248f14SClaudio Fontana * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
31b248f14SClaudio Fontana *
41b248f14SClaudio Fontana * Copyright (c) 2003 Fabrice Bellard
51b248f14SClaudio Fontana *
61b248f14SClaudio Fontana * This library is free software; you can redistribute it and/or
71b248f14SClaudio Fontana * modify it under the terms of the GNU Lesser General Public
81b248f14SClaudio Fontana * License as published by the Free Software Foundation; either
91b248f14SClaudio Fontana * version 2.1 of the License, or (at your option) any later version.
101b248f14SClaudio Fontana *
111b248f14SClaudio Fontana * This library is distributed in the hope that it will be useful,
121b248f14SClaudio Fontana * but WITHOUT ANY WARRANTY; without even the implied warranty of
131b248f14SClaudio Fontana * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
141b248f14SClaudio Fontana * Lesser General Public License for more details.
151b248f14SClaudio Fontana *
161b248f14SClaudio Fontana * You should have received a copy of the GNU Lesser General Public
171b248f14SClaudio Fontana * License along with this library; if not, see <http://www.gnu.org/licenses/>.
181b248f14SClaudio Fontana */
191b248f14SClaudio Fontana
201b248f14SClaudio Fontana #include "qemu/osdep.h"
211b248f14SClaudio Fontana #include <math.h>
221b248f14SClaudio Fontana #include "cpu.h"
2348e5c98aSDavid Edmondson #include "tcg-cpu.h"
247e17a524SPhilippe Mathieu-Daudé #include "exec/exec-all.h"
2509b07f28SPhilippe Mathieu-Daudé #include "exec/cpu_ldst.h"
261b248f14SClaudio Fontana #include "exec/helper-proto.h"
271b248f14SClaudio Fontana #include "fpu/softfloat.h"
281b248f14SClaudio Fontana #include "fpu/softfloat-macros.h"
29ed69e831SClaudio Fontana #include "helper-tcg.h"
30d3e8b648SRichard Henderson #include "access.h"
311b248f14SClaudio Fontana
32ed69e831SClaudio Fontana /* float macros */
33ed69e831SClaudio Fontana #define FT0 (env->ft0)
34ed69e831SClaudio Fontana #define ST0 (env->fpregs[env->fpstt].d)
35ed69e831SClaudio Fontana #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d)
36ed69e831SClaudio Fontana #define ST1 ST(1)
37ed69e831SClaudio Fontana
38314d3effSPaolo Bonzini #define FPU_RC_SHIFT 10
39314d3effSPaolo Bonzini #define FPU_RC_MASK (3 << FPU_RC_SHIFT)
401b248f14SClaudio Fontana #define FPU_RC_NEAR 0x000
411b248f14SClaudio Fontana #define FPU_RC_DOWN 0x400
421b248f14SClaudio Fontana #define FPU_RC_UP 0x800
431b248f14SClaudio Fontana #define FPU_RC_CHOP 0xc00
441b248f14SClaudio Fontana
451b248f14SClaudio Fontana #define MAXTAN 9223372036854775808.0
461b248f14SClaudio Fontana
471b248f14SClaudio Fontana /* the following deal with x86 long double-precision numbers */
481b248f14SClaudio Fontana #define MAXEXPD 0x7fff
491b248f14SClaudio Fontana #define EXPBIAS 16383
501b248f14SClaudio Fontana #define EXPD(fp) (fp.l.upper & 0x7fff)
511b248f14SClaudio Fontana #define SIGND(fp) ((fp.l.upper) & 0x8000)
521b248f14SClaudio Fontana #define MANTD(fp) (fp.l.lower)
531b248f14SClaudio Fontana #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
541b248f14SClaudio Fontana
551b248f14SClaudio Fontana #define FPUS_IE (1 << 0)
561b248f14SClaudio Fontana #define FPUS_DE (1 << 1)
571b248f14SClaudio Fontana #define FPUS_ZE (1 << 2)
581b248f14SClaudio Fontana #define FPUS_OE (1 << 3)
591b248f14SClaudio Fontana #define FPUS_UE (1 << 4)
601b248f14SClaudio Fontana #define FPUS_PE (1 << 5)
611b248f14SClaudio Fontana #define FPUS_SF (1 << 6)
621b248f14SClaudio Fontana #define FPUS_SE (1 << 7)
631b248f14SClaudio Fontana #define FPUS_B (1 << 15)
641b248f14SClaudio Fontana
651b248f14SClaudio Fontana #define FPUC_EM 0x3f
661b248f14SClaudio Fontana
671b248f14SClaudio Fontana #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
681b248f14SClaudio Fontana #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
691b248f14SClaudio Fontana #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
701b248f14SClaudio Fontana #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
711b248f14SClaudio Fontana #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
721b248f14SClaudio Fontana #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
731b248f14SClaudio Fontana #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
741b248f14SClaudio Fontana #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
751b248f14SClaudio Fontana
fpush(CPUX86State * env)761b248f14SClaudio Fontana static inline void fpush(CPUX86State *env)
771b248f14SClaudio Fontana {
781b248f14SClaudio Fontana env->fpstt = (env->fpstt - 1) & 7;
791b248f14SClaudio Fontana env->fptags[env->fpstt] = 0; /* validate stack entry */
801b248f14SClaudio Fontana }
811b248f14SClaudio Fontana
fpop(CPUX86State * env)821b248f14SClaudio Fontana static inline void fpop(CPUX86State *env)
831b248f14SClaudio Fontana {
841b248f14SClaudio Fontana env->fptags[env->fpstt] = 1; /* invalidate stack entry */
851b248f14SClaudio Fontana env->fpstt = (env->fpstt + 1) & 7;
861b248f14SClaudio Fontana }
871b248f14SClaudio Fontana
do_fldt(X86Access * ac,target_ulong ptr)88d3e8b648SRichard Henderson static floatx80 do_fldt(X86Access *ac, target_ulong ptr)
891b248f14SClaudio Fontana {
901b248f14SClaudio Fontana CPU_LDoubleU temp;
911b248f14SClaudio Fontana
92d3e8b648SRichard Henderson temp.l.lower = access_ldq(ac, ptr);
93d3e8b648SRichard Henderson temp.l.upper = access_ldw(ac, ptr + 8);
941b248f14SClaudio Fontana return temp.d;
951b248f14SClaudio Fontana }
961b248f14SClaudio Fontana
do_fstt(X86Access * ac,target_ulong ptr,floatx80 f)97d3e8b648SRichard Henderson static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f)
981b248f14SClaudio Fontana {
991b248f14SClaudio Fontana CPU_LDoubleU temp;
1001b248f14SClaudio Fontana
1011b248f14SClaudio Fontana temp.d = f;
102d3e8b648SRichard Henderson access_stq(ac, ptr, temp.l.lower);
103d3e8b648SRichard Henderson access_stw(ac, ptr + 8, temp.l.upper);
1041b248f14SClaudio Fontana }
1051b248f14SClaudio Fontana
1061b248f14SClaudio Fontana /* x87 FPU helpers */
1071b248f14SClaudio Fontana
floatx80_to_double(CPUX86State * env,floatx80 a)1081b248f14SClaudio Fontana static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
1091b248f14SClaudio Fontana {
1101b248f14SClaudio Fontana union {
1111b248f14SClaudio Fontana float64 f64;
1121b248f14SClaudio Fontana double d;
1131b248f14SClaudio Fontana } u;
1141b248f14SClaudio Fontana
1151b248f14SClaudio Fontana u.f64 = floatx80_to_float64(a, &env->fp_status);
1161b248f14SClaudio Fontana return u.d;
1171b248f14SClaudio Fontana }
1181b248f14SClaudio Fontana
double_to_floatx80(CPUX86State * env,double a)1191b248f14SClaudio Fontana static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
1201b248f14SClaudio Fontana {
1211b248f14SClaudio Fontana union {
1221b248f14SClaudio Fontana float64 f64;
1231b248f14SClaudio Fontana double d;
1241b248f14SClaudio Fontana } u;
1251b248f14SClaudio Fontana
1261b248f14SClaudio Fontana u.d = a;
1271b248f14SClaudio Fontana return float64_to_floatx80(u.f64, &env->fp_status);
1281b248f14SClaudio Fontana }
1291b248f14SClaudio Fontana
fpu_set_exception(CPUX86State * env,int mask)1301b248f14SClaudio Fontana static void fpu_set_exception(CPUX86State *env, int mask)
1311b248f14SClaudio Fontana {
1321b248f14SClaudio Fontana env->fpus |= mask;
1331b248f14SClaudio Fontana if (env->fpus & (~env->fpuc & FPUC_EM)) {
1341b248f14SClaudio Fontana env->fpus |= FPUS_SE | FPUS_B;
1351b248f14SClaudio Fontana }
1361b248f14SClaudio Fontana }
1371b248f14SClaudio Fontana
cpu_init_fp_statuses(CPUX86State * env)138*62d39b28SPeter Maydell void cpu_init_fp_statuses(CPUX86State *env)
139*62d39b28SPeter Maydell {
140*62d39b28SPeter Maydell /*
141*62d39b28SPeter Maydell * Initialise the non-runtime-varying fields of the various
142*62d39b28SPeter Maydell * float_status words to x86 behaviour. This must be called at
143*62d39b28SPeter Maydell * CPU reset because the float_status words are in the
144*62d39b28SPeter Maydell * "zeroed on reset" portion of the CPU state struct.
145*62d39b28SPeter Maydell * Fields in float_status that vary under guest control are set
146*62d39b28SPeter Maydell * via the codepath for setting that register, eg cpu_set_fpuc().
147*62d39b28SPeter Maydell */
148*62d39b28SPeter Maydell /*
149*62d39b28SPeter Maydell * Use x87 NaN propagation rules:
150*62d39b28SPeter Maydell * SNaN + QNaN => return the QNaN
151*62d39b28SPeter Maydell * two SNaNs => return the one with the larger significand, silenced
152*62d39b28SPeter Maydell * two QNaNs => return the one with the larger significand
153*62d39b28SPeter Maydell * SNaN and a non-NaN => return the SNaN, silenced
154*62d39b28SPeter Maydell * QNaN and a non-NaN => return the QNaN
155*62d39b28SPeter Maydell *
156*62d39b28SPeter Maydell * If we get down to comparing significands and they are the same,
157*62d39b28SPeter Maydell * return the NaN with the positive sign bit (if any).
158*62d39b28SPeter Maydell */
159*62d39b28SPeter Maydell set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
160*62d39b28SPeter Maydell /*
161*62d39b28SPeter Maydell * TODO: These are incorrect: the x86 Software Developer's Manual vol 1
162*62d39b28SPeter Maydell * section 4.8.3.5 "Operating on SNaNs and QNaNs" says that the
163*62d39b28SPeter Maydell * "larger significand" behaviour is only used for x87 FPU operations.
164*62d39b28SPeter Maydell * For SSE the required behaviour is to always return the first NaN,
165*62d39b28SPeter Maydell * which is float_2nan_prop_ab.
166*62d39b28SPeter Maydell *
167*62d39b28SPeter Maydell * mmx_status is used only for the AMD 3DNow! instructions, which
168*62d39b28SPeter Maydell * are documented in the "3DNow! Technology Manual" as not supporting
169*62d39b28SPeter Maydell * NaNs or infinities as inputs. The result of passing two NaNs is
170*62d39b28SPeter Maydell * documented as "undefined", so we can do what we choose.
171*62d39b28SPeter Maydell * (Strictly there is some behaviour we don't implement correctly
172*62d39b28SPeter Maydell * for these "unsupported" NaN and Inf values, like "NaN * 0 == 0".)
173*62d39b28SPeter Maydell */
174*62d39b28SPeter Maydell set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status);
175*62d39b28SPeter Maydell set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status);
176*62d39b28SPeter Maydell }
177*62d39b28SPeter Maydell
save_exception_flags(CPUX86State * env)1781b248f14SClaudio Fontana static inline uint8_t save_exception_flags(CPUX86State *env)
1791b248f14SClaudio Fontana {
1801b248f14SClaudio Fontana uint8_t old_flags = get_float_exception_flags(&env->fp_status);
1811b248f14SClaudio Fontana set_float_exception_flags(0, &env->fp_status);
1821b248f14SClaudio Fontana return old_flags;
1831b248f14SClaudio Fontana }
1841b248f14SClaudio Fontana
merge_exception_flags(CPUX86State * env,uint8_t old_flags)1851b248f14SClaudio Fontana static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
1861b248f14SClaudio Fontana {
1871b248f14SClaudio Fontana uint8_t new_flags = get_float_exception_flags(&env->fp_status);
1881b248f14SClaudio Fontana float_raise(old_flags, &env->fp_status);
1891b248f14SClaudio Fontana fpu_set_exception(env,
1901b248f14SClaudio Fontana ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
1911b248f14SClaudio Fontana (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
1921b248f14SClaudio Fontana (new_flags & float_flag_overflow ? FPUS_OE : 0) |
1931b248f14SClaudio Fontana (new_flags & float_flag_underflow ? FPUS_UE : 0) |
1941b248f14SClaudio Fontana (new_flags & float_flag_inexact ? FPUS_PE : 0) |
1951b248f14SClaudio Fontana (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
1961b248f14SClaudio Fontana }
1971b248f14SClaudio Fontana
helper_fdiv(CPUX86State * env,floatx80 a,floatx80 b)1981b248f14SClaudio Fontana static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
1991b248f14SClaudio Fontana {
2001b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
2011b248f14SClaudio Fontana floatx80 ret = floatx80_div(a, b, &env->fp_status);
2021b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
2031b248f14SClaudio Fontana return ret;
2041b248f14SClaudio Fontana }
2051b248f14SClaudio Fontana
fpu_raise_exception(CPUX86State * env,uintptr_t retaddr)2061b248f14SClaudio Fontana static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
2071b248f14SClaudio Fontana {
2081b248f14SClaudio Fontana if (env->cr[0] & CR0_NE_MASK) {
2091b248f14SClaudio Fontana raise_exception_ra(env, EXCP10_COPR, retaddr);
2101b248f14SClaudio Fontana }
2111b248f14SClaudio Fontana #if !defined(CONFIG_USER_ONLY)
21283a3d9c7SClaudio Fontana else {
21383a3d9c7SClaudio Fontana fpu_check_raise_ferr_irq(env);
2141b248f14SClaudio Fontana }
2151b248f14SClaudio Fontana #endif
2161b248f14SClaudio Fontana }
2171b248f14SClaudio Fontana
helper_flds_FT0(CPUX86State * env,uint32_t val)2181b248f14SClaudio Fontana void helper_flds_FT0(CPUX86State *env, uint32_t val)
2191b248f14SClaudio Fontana {
2201b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
2211b248f14SClaudio Fontana union {
2221b248f14SClaudio Fontana float32 f;
2231b248f14SClaudio Fontana uint32_t i;
2241b248f14SClaudio Fontana } u;
2251b248f14SClaudio Fontana
2261b248f14SClaudio Fontana u.i = val;
2271b248f14SClaudio Fontana FT0 = float32_to_floatx80(u.f, &env->fp_status);
2281b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
2291b248f14SClaudio Fontana }
2301b248f14SClaudio Fontana
helper_fldl_FT0(CPUX86State * env,uint64_t val)2311b248f14SClaudio Fontana void helper_fldl_FT0(CPUX86State *env, uint64_t val)
2321b248f14SClaudio Fontana {
2331b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
2341b248f14SClaudio Fontana union {
2351b248f14SClaudio Fontana float64 f;
2361b248f14SClaudio Fontana uint64_t i;
2371b248f14SClaudio Fontana } u;
2381b248f14SClaudio Fontana
2391b248f14SClaudio Fontana u.i = val;
2401b248f14SClaudio Fontana FT0 = float64_to_floatx80(u.f, &env->fp_status);
2411b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
2421b248f14SClaudio Fontana }
2431b248f14SClaudio Fontana
helper_fildl_FT0(CPUX86State * env,int32_t val)2441b248f14SClaudio Fontana void helper_fildl_FT0(CPUX86State *env, int32_t val)
2451b248f14SClaudio Fontana {
2461b248f14SClaudio Fontana FT0 = int32_to_floatx80(val, &env->fp_status);
2471b248f14SClaudio Fontana }
2481b248f14SClaudio Fontana
helper_flds_ST0(CPUX86State * env,uint32_t val)2491b248f14SClaudio Fontana void helper_flds_ST0(CPUX86State *env, uint32_t val)
2501b248f14SClaudio Fontana {
2511b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
2521b248f14SClaudio Fontana int new_fpstt;
2531b248f14SClaudio Fontana union {
2541b248f14SClaudio Fontana float32 f;
2551b248f14SClaudio Fontana uint32_t i;
2561b248f14SClaudio Fontana } u;
2571b248f14SClaudio Fontana
2581b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7;
2591b248f14SClaudio Fontana u.i = val;
2601b248f14SClaudio Fontana env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
2611b248f14SClaudio Fontana env->fpstt = new_fpstt;
2621b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */
2631b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
2641b248f14SClaudio Fontana }
2651b248f14SClaudio Fontana
helper_fldl_ST0(CPUX86State * env,uint64_t val)2661b248f14SClaudio Fontana void helper_fldl_ST0(CPUX86State *env, uint64_t val)
2671b248f14SClaudio Fontana {
2681b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
2691b248f14SClaudio Fontana int new_fpstt;
2701b248f14SClaudio Fontana union {
2711b248f14SClaudio Fontana float64 f;
2721b248f14SClaudio Fontana uint64_t i;
2731b248f14SClaudio Fontana } u;
2741b248f14SClaudio Fontana
2751b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7;
2761b248f14SClaudio Fontana u.i = val;
2771b248f14SClaudio Fontana env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
2781b248f14SClaudio Fontana env->fpstt = new_fpstt;
2791b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */
2801b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
2811b248f14SClaudio Fontana }
2821b248f14SClaudio Fontana
tmp_maximise_precision(float_status * st)283276de33fSAlex Bennée static FloatX80RoundPrec tmp_maximise_precision(float_status *st)
284276de33fSAlex Bennée {
285276de33fSAlex Bennée FloatX80RoundPrec old = get_floatx80_rounding_precision(st);
286276de33fSAlex Bennée set_floatx80_rounding_precision(floatx80_precision_x, st);
287276de33fSAlex Bennée return old;
288276de33fSAlex Bennée }
289276de33fSAlex Bennée
helper_fildl_ST0(CPUX86State * env,int32_t val)2901b248f14SClaudio Fontana void helper_fildl_ST0(CPUX86State *env, int32_t val)
2911b248f14SClaudio Fontana {
2921b248f14SClaudio Fontana int new_fpstt;
293276de33fSAlex Bennée FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
2941b248f14SClaudio Fontana
2951b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7;
2961b248f14SClaudio Fontana env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
2971b248f14SClaudio Fontana env->fpstt = new_fpstt;
2981b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */
299276de33fSAlex Bennée
300276de33fSAlex Bennée set_floatx80_rounding_precision(old, &env->fp_status);
3011b248f14SClaudio Fontana }
3021b248f14SClaudio Fontana
helper_fildll_ST0(CPUX86State * env,int64_t val)3031b248f14SClaudio Fontana void helper_fildll_ST0(CPUX86State *env, int64_t val)
3041b248f14SClaudio Fontana {
3051b248f14SClaudio Fontana int new_fpstt;
306276de33fSAlex Bennée FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
3071b248f14SClaudio Fontana
3081b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7;
3091b248f14SClaudio Fontana env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
3101b248f14SClaudio Fontana env->fpstt = new_fpstt;
3111b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */
312276de33fSAlex Bennée
313276de33fSAlex Bennée set_floatx80_rounding_precision(old, &env->fp_status);
3141b248f14SClaudio Fontana }
3151b248f14SClaudio Fontana
helper_fsts_ST0(CPUX86State * env)3161b248f14SClaudio Fontana uint32_t helper_fsts_ST0(CPUX86State *env)
3171b248f14SClaudio Fontana {
3181b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
3191b248f14SClaudio Fontana union {
3201b248f14SClaudio Fontana float32 f;
3211b248f14SClaudio Fontana uint32_t i;
3221b248f14SClaudio Fontana } u;
3231b248f14SClaudio Fontana
3241b248f14SClaudio Fontana u.f = floatx80_to_float32(ST0, &env->fp_status);
3251b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
3261b248f14SClaudio Fontana return u.i;
3271b248f14SClaudio Fontana }
3281b248f14SClaudio Fontana
helper_fstl_ST0(CPUX86State * env)3291b248f14SClaudio Fontana uint64_t helper_fstl_ST0(CPUX86State *env)
3301b248f14SClaudio Fontana {
3311b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
3321b248f14SClaudio Fontana union {
3331b248f14SClaudio Fontana float64 f;
3341b248f14SClaudio Fontana uint64_t i;
3351b248f14SClaudio Fontana } u;
3361b248f14SClaudio Fontana
3371b248f14SClaudio Fontana u.f = floatx80_to_float64(ST0, &env->fp_status);
3381b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
3391b248f14SClaudio Fontana return u.i;
3401b248f14SClaudio Fontana }
3411b248f14SClaudio Fontana
helper_fist_ST0(CPUX86State * env)3421b248f14SClaudio Fontana int32_t helper_fist_ST0(CPUX86State *env)
3431b248f14SClaudio Fontana {
3441b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
3451b248f14SClaudio Fontana int32_t val;
3461b248f14SClaudio Fontana
3471b248f14SClaudio Fontana val = floatx80_to_int32(ST0, &env->fp_status);
3481b248f14SClaudio Fontana if (val != (int16_t)val) {
3491b248f14SClaudio Fontana set_float_exception_flags(float_flag_invalid, &env->fp_status);
3501b248f14SClaudio Fontana val = -32768;
3511b248f14SClaudio Fontana }
3521b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
3531b248f14SClaudio Fontana return val;
3541b248f14SClaudio Fontana }
3551b248f14SClaudio Fontana
helper_fistl_ST0(CPUX86State * env)3561b248f14SClaudio Fontana int32_t helper_fistl_ST0(CPUX86State *env)
3571b248f14SClaudio Fontana {
3581b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
3591b248f14SClaudio Fontana int32_t val;
3601b248f14SClaudio Fontana
3611b248f14SClaudio Fontana val = floatx80_to_int32(ST0, &env->fp_status);
3621b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
3631b248f14SClaudio Fontana val = 0x80000000;
3641b248f14SClaudio Fontana }
3651b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
3661b248f14SClaudio Fontana return val;
3671b248f14SClaudio Fontana }
3681b248f14SClaudio Fontana
helper_fistll_ST0(CPUX86State * env)3691b248f14SClaudio Fontana int64_t helper_fistll_ST0(CPUX86State *env)
3701b248f14SClaudio Fontana {
3711b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
3721b248f14SClaudio Fontana int64_t val;
3731b248f14SClaudio Fontana
3741b248f14SClaudio Fontana val = floatx80_to_int64(ST0, &env->fp_status);
3751b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
3761b248f14SClaudio Fontana val = 0x8000000000000000ULL;
3771b248f14SClaudio Fontana }
3781b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
3791b248f14SClaudio Fontana return val;
3801b248f14SClaudio Fontana }
3811b248f14SClaudio Fontana
helper_fistt_ST0(CPUX86State * env)3821b248f14SClaudio Fontana int32_t helper_fistt_ST0(CPUX86State *env)
3831b248f14SClaudio Fontana {
3841b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
3851b248f14SClaudio Fontana int32_t val;
3861b248f14SClaudio Fontana
3871b248f14SClaudio Fontana val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
3881b248f14SClaudio Fontana if (val != (int16_t)val) {
3891b248f14SClaudio Fontana set_float_exception_flags(float_flag_invalid, &env->fp_status);
3901b248f14SClaudio Fontana val = -32768;
3911b248f14SClaudio Fontana }
3921b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
3931b248f14SClaudio Fontana return val;
3941b248f14SClaudio Fontana }
3951b248f14SClaudio Fontana
helper_fisttl_ST0(CPUX86State * env)3961b248f14SClaudio Fontana int32_t helper_fisttl_ST0(CPUX86State *env)
3971b248f14SClaudio Fontana {
3981b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
3991b248f14SClaudio Fontana int32_t val;
4001b248f14SClaudio Fontana
4011b248f14SClaudio Fontana val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
4021b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
4031b248f14SClaudio Fontana val = 0x80000000;
4041b248f14SClaudio Fontana }
4051b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
4061b248f14SClaudio Fontana return val;
4071b248f14SClaudio Fontana }
4081b248f14SClaudio Fontana
helper_fisttll_ST0(CPUX86State * env)4091b248f14SClaudio Fontana int64_t helper_fisttll_ST0(CPUX86State *env)
4101b248f14SClaudio Fontana {
4111b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
4121b248f14SClaudio Fontana int64_t val;
4131b248f14SClaudio Fontana
4141b248f14SClaudio Fontana val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
4151b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
4161b248f14SClaudio Fontana val = 0x8000000000000000ULL;
4171b248f14SClaudio Fontana }
4181b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
4191b248f14SClaudio Fontana return val;
4201b248f14SClaudio Fontana }
4211b248f14SClaudio Fontana
helper_fldt_ST0(CPUX86State * env,target_ulong ptr)4221b248f14SClaudio Fontana void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
4231b248f14SClaudio Fontana {
4241b248f14SClaudio Fontana int new_fpstt;
425d3e8b648SRichard Henderson X86Access ac;
426d3e8b648SRichard Henderson
427d3e8b648SRichard Henderson access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
4281b248f14SClaudio Fontana
4291b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7;
430d3e8b648SRichard Henderson env->fpregs[new_fpstt].d = do_fldt(&ac, ptr);
4311b248f14SClaudio Fontana env->fpstt = new_fpstt;
4321b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */
4331b248f14SClaudio Fontana }
4341b248f14SClaudio Fontana
helper_fstt_ST0(CPUX86State * env,target_ulong ptr)4351b248f14SClaudio Fontana void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
4361b248f14SClaudio Fontana {
437d3e8b648SRichard Henderson X86Access ac;
438d3e8b648SRichard Henderson
439d3e8b648SRichard Henderson access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
440d3e8b648SRichard Henderson do_fstt(&ac, ptr, ST0);
4411b248f14SClaudio Fontana }
4421b248f14SClaudio Fontana
helper_fpush(CPUX86State * env)4431b248f14SClaudio Fontana void helper_fpush(CPUX86State *env)
4441b248f14SClaudio Fontana {
4451b248f14SClaudio Fontana fpush(env);
4461b248f14SClaudio Fontana }
4471b248f14SClaudio Fontana
helper_fpop(CPUX86State * env)4481b248f14SClaudio Fontana void helper_fpop(CPUX86State *env)
4491b248f14SClaudio Fontana {
4501b248f14SClaudio Fontana fpop(env);
4511b248f14SClaudio Fontana }
4521b248f14SClaudio Fontana
helper_fdecstp(CPUX86State * env)4531b248f14SClaudio Fontana void helper_fdecstp(CPUX86State *env)
4541b248f14SClaudio Fontana {
4551b248f14SClaudio Fontana env->fpstt = (env->fpstt - 1) & 7;
4561b248f14SClaudio Fontana env->fpus &= ~0x4700;
4571b248f14SClaudio Fontana }
4581b248f14SClaudio Fontana
helper_fincstp(CPUX86State * env)4591b248f14SClaudio Fontana void helper_fincstp(CPUX86State *env)
4601b248f14SClaudio Fontana {
4611b248f14SClaudio Fontana env->fpstt = (env->fpstt + 1) & 7;
4621b248f14SClaudio Fontana env->fpus &= ~0x4700;
4631b248f14SClaudio Fontana }
4641b248f14SClaudio Fontana
4651b248f14SClaudio Fontana /* FPU move */
4661b248f14SClaudio Fontana
helper_ffree_STN(CPUX86State * env,int st_index)4671b248f14SClaudio Fontana void helper_ffree_STN(CPUX86State *env, int st_index)
4681b248f14SClaudio Fontana {
4691b248f14SClaudio Fontana env->fptags[(env->fpstt + st_index) & 7] = 1;
4701b248f14SClaudio Fontana }
4711b248f14SClaudio Fontana
helper_fmov_ST0_FT0(CPUX86State * env)4721b248f14SClaudio Fontana void helper_fmov_ST0_FT0(CPUX86State *env)
4731b248f14SClaudio Fontana {
4741b248f14SClaudio Fontana ST0 = FT0;
4751b248f14SClaudio Fontana }
4761b248f14SClaudio Fontana
helper_fmov_FT0_STN(CPUX86State * env,int st_index)4771b248f14SClaudio Fontana void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
4781b248f14SClaudio Fontana {
4791b248f14SClaudio Fontana FT0 = ST(st_index);
4801b248f14SClaudio Fontana }
4811b248f14SClaudio Fontana
helper_fmov_ST0_STN(CPUX86State * env,int st_index)4821b248f14SClaudio Fontana void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
4831b248f14SClaudio Fontana {
4841b248f14SClaudio Fontana ST0 = ST(st_index);
4851b248f14SClaudio Fontana }
4861b248f14SClaudio Fontana
helper_fmov_STN_ST0(CPUX86State * env,int st_index)4871b248f14SClaudio Fontana void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
4881b248f14SClaudio Fontana {
4891b248f14SClaudio Fontana ST(st_index) = ST0;
4901b248f14SClaudio Fontana }
4911b248f14SClaudio Fontana
helper_fxchg_ST0_STN(CPUX86State * env,int st_index)4921b248f14SClaudio Fontana void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
4931b248f14SClaudio Fontana {
4941b248f14SClaudio Fontana floatx80 tmp;
4951b248f14SClaudio Fontana
4961b248f14SClaudio Fontana tmp = ST(st_index);
4971b248f14SClaudio Fontana ST(st_index) = ST0;
4981b248f14SClaudio Fontana ST0 = tmp;
4991b248f14SClaudio Fontana }
5001b248f14SClaudio Fontana
5011b248f14SClaudio Fontana /* FPU operations */
5021b248f14SClaudio Fontana
5031b248f14SClaudio Fontana static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
5041b248f14SClaudio Fontana
helper_fcom_ST0_FT0(CPUX86State * env)5051b248f14SClaudio Fontana void helper_fcom_ST0_FT0(CPUX86State *env)
5061b248f14SClaudio Fontana {
5071b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
5081b248f14SClaudio Fontana FloatRelation ret;
5091b248f14SClaudio Fontana
5101b248f14SClaudio Fontana ret = floatx80_compare(ST0, FT0, &env->fp_status);
5111b248f14SClaudio Fontana env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
5121b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
5131b248f14SClaudio Fontana }
5141b248f14SClaudio Fontana
helper_fucom_ST0_FT0(CPUX86State * env)5151b248f14SClaudio Fontana void helper_fucom_ST0_FT0(CPUX86State *env)
5161b248f14SClaudio Fontana {
5171b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
5181b248f14SClaudio Fontana FloatRelation ret;
5191b248f14SClaudio Fontana
5201b248f14SClaudio Fontana ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
5211b248f14SClaudio Fontana env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
5221b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
5231b248f14SClaudio Fontana }
5241b248f14SClaudio Fontana
5251b248f14SClaudio Fontana static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
5261b248f14SClaudio Fontana
helper_fcomi_ST0_FT0(CPUX86State * env)5271b248f14SClaudio Fontana void helper_fcomi_ST0_FT0(CPUX86State *env)
5281b248f14SClaudio Fontana {
5291b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
5301b248f14SClaudio Fontana int eflags;
5311b248f14SClaudio Fontana FloatRelation ret;
5321b248f14SClaudio Fontana
5331b248f14SClaudio Fontana ret = floatx80_compare(ST0, FT0, &env->fp_status);
5342455e9cfSPaolo Bonzini eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C);
5352455e9cfSPaolo Bonzini CC_SRC = eflags | fcomi_ccval[ret + 1];
536abdcc5c8SPaolo Bonzini CC_OP = CC_OP_EFLAGS;
5371b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
5381b248f14SClaudio Fontana }
5391b248f14SClaudio Fontana
helper_fucomi_ST0_FT0(CPUX86State * env)5401b248f14SClaudio Fontana void helper_fucomi_ST0_FT0(CPUX86State *env)
5411b248f14SClaudio Fontana {
5421b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
5431b248f14SClaudio Fontana int eflags;
5441b248f14SClaudio Fontana FloatRelation ret;
5451b248f14SClaudio Fontana
5461b248f14SClaudio Fontana ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
5472455e9cfSPaolo Bonzini eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C);
5482455e9cfSPaolo Bonzini CC_SRC = eflags | fcomi_ccval[ret + 1];
549abdcc5c8SPaolo Bonzini CC_OP = CC_OP_EFLAGS;
5501b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
5511b248f14SClaudio Fontana }
5521b248f14SClaudio Fontana
helper_fadd_ST0_FT0(CPUX86State * env)5531b248f14SClaudio Fontana void helper_fadd_ST0_FT0(CPUX86State *env)
5541b248f14SClaudio Fontana {
5551b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
5561b248f14SClaudio Fontana ST0 = floatx80_add(ST0, FT0, &env->fp_status);
5571b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
5581b248f14SClaudio Fontana }
5591b248f14SClaudio Fontana
helper_fmul_ST0_FT0(CPUX86State * env)5601b248f14SClaudio Fontana void helper_fmul_ST0_FT0(CPUX86State *env)
5611b248f14SClaudio Fontana {
5621b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
5631b248f14SClaudio Fontana ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
5641b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
5651b248f14SClaudio Fontana }
5661b248f14SClaudio Fontana
helper_fsub_ST0_FT0(CPUX86State * env)5671b248f14SClaudio Fontana void helper_fsub_ST0_FT0(CPUX86State *env)
5681b248f14SClaudio Fontana {
5691b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
5701b248f14SClaudio Fontana ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
5711b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
5721b248f14SClaudio Fontana }
5731b248f14SClaudio Fontana
helper_fsubr_ST0_FT0(CPUX86State * env)5741b248f14SClaudio Fontana void helper_fsubr_ST0_FT0(CPUX86State *env)
5751b248f14SClaudio Fontana {
5761b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
5771b248f14SClaudio Fontana ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
5781b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
5791b248f14SClaudio Fontana }
5801b248f14SClaudio Fontana
helper_fdiv_ST0_FT0(CPUX86State * env)5811b248f14SClaudio Fontana void helper_fdiv_ST0_FT0(CPUX86State *env)
5821b248f14SClaudio Fontana {
5831b248f14SClaudio Fontana ST0 = helper_fdiv(env, ST0, FT0);
5841b248f14SClaudio Fontana }
5851b248f14SClaudio Fontana
helper_fdivr_ST0_FT0(CPUX86State * env)5861b248f14SClaudio Fontana void helper_fdivr_ST0_FT0(CPUX86State *env)
5871b248f14SClaudio Fontana {
5881b248f14SClaudio Fontana ST0 = helper_fdiv(env, FT0, ST0);
5891b248f14SClaudio Fontana }
5901b248f14SClaudio Fontana
5911b248f14SClaudio Fontana /* fp operations between STN and ST0 */
5921b248f14SClaudio Fontana
helper_fadd_STN_ST0(CPUX86State * env,int st_index)5931b248f14SClaudio Fontana void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
5941b248f14SClaudio Fontana {
5951b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
5961b248f14SClaudio Fontana ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
5971b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
5981b248f14SClaudio Fontana }
5991b248f14SClaudio Fontana
helper_fmul_STN_ST0(CPUX86State * env,int st_index)6001b248f14SClaudio Fontana void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
6011b248f14SClaudio Fontana {
6021b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
6031b248f14SClaudio Fontana ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
6041b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
6051b248f14SClaudio Fontana }
6061b248f14SClaudio Fontana
helper_fsub_STN_ST0(CPUX86State * env,int st_index)6071b248f14SClaudio Fontana void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
6081b248f14SClaudio Fontana {
6091b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
6101b248f14SClaudio Fontana ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
6111b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
6121b248f14SClaudio Fontana }
6131b248f14SClaudio Fontana
helper_fsubr_STN_ST0(CPUX86State * env,int st_index)6141b248f14SClaudio Fontana void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
6151b248f14SClaudio Fontana {
6161b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
6171b248f14SClaudio Fontana ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
6181b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
6191b248f14SClaudio Fontana }
6201b248f14SClaudio Fontana
helper_fdiv_STN_ST0(CPUX86State * env,int st_index)6211b248f14SClaudio Fontana void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
6221b248f14SClaudio Fontana {
6231b248f14SClaudio Fontana floatx80 *p;
6241b248f14SClaudio Fontana
6251b248f14SClaudio Fontana p = &ST(st_index);
6261b248f14SClaudio Fontana *p = helper_fdiv(env, *p, ST0);
6271b248f14SClaudio Fontana }
6281b248f14SClaudio Fontana
helper_fdivr_STN_ST0(CPUX86State * env,int st_index)6291b248f14SClaudio Fontana void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
6301b248f14SClaudio Fontana {
6311b248f14SClaudio Fontana floatx80 *p;
6321b248f14SClaudio Fontana
6331b248f14SClaudio Fontana p = &ST(st_index);
6341b248f14SClaudio Fontana *p = helper_fdiv(env, ST0, *p);
6351b248f14SClaudio Fontana }
6361b248f14SClaudio Fontana
6371b248f14SClaudio Fontana /* misc FPU operations */
helper_fchs_ST0(CPUX86State * env)6381b248f14SClaudio Fontana void helper_fchs_ST0(CPUX86State *env)
6391b248f14SClaudio Fontana {
6401b248f14SClaudio Fontana ST0 = floatx80_chs(ST0);
6411b248f14SClaudio Fontana }
6421b248f14SClaudio Fontana
helper_fabs_ST0(CPUX86State * env)6431b248f14SClaudio Fontana void helper_fabs_ST0(CPUX86State *env)
6441b248f14SClaudio Fontana {
6451b248f14SClaudio Fontana ST0 = floatx80_abs(ST0);
6461b248f14SClaudio Fontana }
6471b248f14SClaudio Fontana
helper_fld1_ST0(CPUX86State * env)6481b248f14SClaudio Fontana void helper_fld1_ST0(CPUX86State *env)
6491b248f14SClaudio Fontana {
6501b248f14SClaudio Fontana ST0 = floatx80_one;
6511b248f14SClaudio Fontana }
6521b248f14SClaudio Fontana
helper_fldl2t_ST0(CPUX86State * env)6531b248f14SClaudio Fontana void helper_fldl2t_ST0(CPUX86State *env)
6541b248f14SClaudio Fontana {
6551b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) {
6561b248f14SClaudio Fontana case FPU_RC_UP:
6571b248f14SClaudio Fontana ST0 = floatx80_l2t_u;
6581b248f14SClaudio Fontana break;
6591b248f14SClaudio Fontana default:
6601b248f14SClaudio Fontana ST0 = floatx80_l2t;
6611b248f14SClaudio Fontana break;
6621b248f14SClaudio Fontana }
6631b248f14SClaudio Fontana }
6641b248f14SClaudio Fontana
helper_fldl2e_ST0(CPUX86State * env)6651b248f14SClaudio Fontana void helper_fldl2e_ST0(CPUX86State *env)
6661b248f14SClaudio Fontana {
6671b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) {
6681b248f14SClaudio Fontana case FPU_RC_DOWN:
6691b248f14SClaudio Fontana case FPU_RC_CHOP:
6701b248f14SClaudio Fontana ST0 = floatx80_l2e_d;
6711b248f14SClaudio Fontana break;
6721b248f14SClaudio Fontana default:
6731b248f14SClaudio Fontana ST0 = floatx80_l2e;
6741b248f14SClaudio Fontana break;
6751b248f14SClaudio Fontana }
6761b248f14SClaudio Fontana }
6771b248f14SClaudio Fontana
helper_fldpi_ST0(CPUX86State * env)6781b248f14SClaudio Fontana void helper_fldpi_ST0(CPUX86State *env)
6791b248f14SClaudio Fontana {
6801b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) {
6811b248f14SClaudio Fontana case FPU_RC_DOWN:
6821b248f14SClaudio Fontana case FPU_RC_CHOP:
6831b248f14SClaudio Fontana ST0 = floatx80_pi_d;
6841b248f14SClaudio Fontana break;
6851b248f14SClaudio Fontana default:
6861b248f14SClaudio Fontana ST0 = floatx80_pi;
6871b248f14SClaudio Fontana break;
6881b248f14SClaudio Fontana }
6891b248f14SClaudio Fontana }
6901b248f14SClaudio Fontana
helper_fldlg2_ST0(CPUX86State * env)6911b248f14SClaudio Fontana void helper_fldlg2_ST0(CPUX86State *env)
6921b248f14SClaudio Fontana {
6931b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) {
6941b248f14SClaudio Fontana case FPU_RC_DOWN:
6951b248f14SClaudio Fontana case FPU_RC_CHOP:
6961b248f14SClaudio Fontana ST0 = floatx80_lg2_d;
6971b248f14SClaudio Fontana break;
6981b248f14SClaudio Fontana default:
6991b248f14SClaudio Fontana ST0 = floatx80_lg2;
7001b248f14SClaudio Fontana break;
7011b248f14SClaudio Fontana }
7021b248f14SClaudio Fontana }
7031b248f14SClaudio Fontana
helper_fldln2_ST0(CPUX86State * env)7041b248f14SClaudio Fontana void helper_fldln2_ST0(CPUX86State *env)
7051b248f14SClaudio Fontana {
7061b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) {
7071b248f14SClaudio Fontana case FPU_RC_DOWN:
7081b248f14SClaudio Fontana case FPU_RC_CHOP:
7091b248f14SClaudio Fontana ST0 = floatx80_ln2_d;
7101b248f14SClaudio Fontana break;
7111b248f14SClaudio Fontana default:
7121b248f14SClaudio Fontana ST0 = floatx80_ln2;
7131b248f14SClaudio Fontana break;
7141b248f14SClaudio Fontana }
7151b248f14SClaudio Fontana }
7161b248f14SClaudio Fontana
helper_fldz_ST0(CPUX86State * env)7171b248f14SClaudio Fontana void helper_fldz_ST0(CPUX86State *env)
7181b248f14SClaudio Fontana {
7191b248f14SClaudio Fontana ST0 = floatx80_zero;
7201b248f14SClaudio Fontana }
7211b248f14SClaudio Fontana
helper_fldz_FT0(CPUX86State * env)7221b248f14SClaudio Fontana void helper_fldz_FT0(CPUX86State *env)
7231b248f14SClaudio Fontana {
7241b248f14SClaudio Fontana FT0 = floatx80_zero;
7251b248f14SClaudio Fontana }
7261b248f14SClaudio Fontana
helper_fnstsw(CPUX86State * env)7271b248f14SClaudio Fontana uint32_t helper_fnstsw(CPUX86State *env)
7281b248f14SClaudio Fontana {
7291b248f14SClaudio Fontana return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
7301b248f14SClaudio Fontana }
7311b248f14SClaudio Fontana
helper_fnstcw(CPUX86State * env)7321b248f14SClaudio Fontana uint32_t helper_fnstcw(CPUX86State *env)
7331b248f14SClaudio Fontana {
7341b248f14SClaudio Fontana return env->fpuc;
7351b248f14SClaudio Fontana }
7361b248f14SClaudio Fontana
set_x86_rounding_mode(unsigned mode,float_status * status)737314d3effSPaolo Bonzini static void set_x86_rounding_mode(unsigned mode, float_status *status)
738314d3effSPaolo Bonzini {
739314d3effSPaolo Bonzini static FloatRoundMode x86_round_mode[4] = {
740314d3effSPaolo Bonzini float_round_nearest_even,
741314d3effSPaolo Bonzini float_round_down,
742314d3effSPaolo Bonzini float_round_up,
743314d3effSPaolo Bonzini float_round_to_zero
744314d3effSPaolo Bonzini };
745314d3effSPaolo Bonzini assert(mode < ARRAY_SIZE(x86_round_mode));
746314d3effSPaolo Bonzini set_float_rounding_mode(x86_round_mode[mode], status);
747314d3effSPaolo Bonzini }
748314d3effSPaolo Bonzini
update_fp_status(CPUX86State * env)7491b248f14SClaudio Fontana void update_fp_status(CPUX86State *env)
7501b248f14SClaudio Fontana {
751314d3effSPaolo Bonzini int rnd_mode;
7528da5f1dbSRichard Henderson FloatX80RoundPrec rnd_prec;
7531b248f14SClaudio Fontana
7541b248f14SClaudio Fontana /* set rounding mode */
755314d3effSPaolo Bonzini rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT;
756314d3effSPaolo Bonzini set_x86_rounding_mode(rnd_mode, &env->fp_status);
7578da5f1dbSRichard Henderson
7581b248f14SClaudio Fontana switch ((env->fpuc >> 8) & 3) {
7591b248f14SClaudio Fontana case 0:
7608da5f1dbSRichard Henderson rnd_prec = floatx80_precision_s;
7611b248f14SClaudio Fontana break;
7621b248f14SClaudio Fontana case 2:
7638da5f1dbSRichard Henderson rnd_prec = floatx80_precision_d;
7641b248f14SClaudio Fontana break;
7651b248f14SClaudio Fontana case 3:
7661b248f14SClaudio Fontana default:
7678da5f1dbSRichard Henderson rnd_prec = floatx80_precision_x;
7681b248f14SClaudio Fontana break;
7691b248f14SClaudio Fontana }
7708da5f1dbSRichard Henderson set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
7711b248f14SClaudio Fontana }
7721b248f14SClaudio Fontana
helper_fldcw(CPUX86State * env,uint32_t val)7731b248f14SClaudio Fontana void helper_fldcw(CPUX86State *env, uint32_t val)
7741b248f14SClaudio Fontana {
7751b248f14SClaudio Fontana cpu_set_fpuc(env, val);
7761b248f14SClaudio Fontana }
7771b248f14SClaudio Fontana
helper_fclex(CPUX86State * env)7781b248f14SClaudio Fontana void helper_fclex(CPUX86State *env)
7791b248f14SClaudio Fontana {
7801b248f14SClaudio Fontana env->fpus &= 0x7f00;
7811b248f14SClaudio Fontana }
7821b248f14SClaudio Fontana
helper_fwait(CPUX86State * env)7831b248f14SClaudio Fontana void helper_fwait(CPUX86State *env)
7841b248f14SClaudio Fontana {
7851b248f14SClaudio Fontana if (env->fpus & FPUS_SE) {
7861b248f14SClaudio Fontana fpu_raise_exception(env, GETPC());
7871b248f14SClaudio Fontana }
7881b248f14SClaudio Fontana }
7891b248f14SClaudio Fontana
do_fninit(CPUX86State * env)790bbdda9b7SRichard Henderson static void do_fninit(CPUX86State *env)
7911b248f14SClaudio Fontana {
7921b248f14SClaudio Fontana env->fpus = 0;
7931b248f14SClaudio Fontana env->fpstt = 0;
79484abdd7dSZiqiao Kong env->fpcs = 0;
79584abdd7dSZiqiao Kong env->fpds = 0;
79684abdd7dSZiqiao Kong env->fpip = 0;
79784abdd7dSZiqiao Kong env->fpdp = 0;
7981b248f14SClaudio Fontana cpu_set_fpuc(env, 0x37f);
7991b248f14SClaudio Fontana env->fptags[0] = 1;
8001b248f14SClaudio Fontana env->fptags[1] = 1;
8011b248f14SClaudio Fontana env->fptags[2] = 1;
8021b248f14SClaudio Fontana env->fptags[3] = 1;
8031b248f14SClaudio Fontana env->fptags[4] = 1;
8041b248f14SClaudio Fontana env->fptags[5] = 1;
8051b248f14SClaudio Fontana env->fptags[6] = 1;
8061b248f14SClaudio Fontana env->fptags[7] = 1;
8071b248f14SClaudio Fontana }
8081b248f14SClaudio Fontana
helper_fninit(CPUX86State * env)809bbdda9b7SRichard Henderson void helper_fninit(CPUX86State *env)
810bbdda9b7SRichard Henderson {
811bbdda9b7SRichard Henderson do_fninit(env);
812bbdda9b7SRichard Henderson }
813bbdda9b7SRichard Henderson
8141b248f14SClaudio Fontana /* BCD ops */
8151b248f14SClaudio Fontana
helper_fbld_ST0(CPUX86State * env,target_ulong ptr)8161b248f14SClaudio Fontana void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
8171b248f14SClaudio Fontana {
8184526f58aSRichard Henderson X86Access ac;
8191b248f14SClaudio Fontana floatx80 tmp;
8201b248f14SClaudio Fontana uint64_t val;
8211b248f14SClaudio Fontana unsigned int v;
8221b248f14SClaudio Fontana int i;
8231b248f14SClaudio Fontana
8244526f58aSRichard Henderson access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
8254526f58aSRichard Henderson
8261b248f14SClaudio Fontana val = 0;
8271b248f14SClaudio Fontana for (i = 8; i >= 0; i--) {
8284526f58aSRichard Henderson v = access_ldb(&ac, ptr + i);
8291b248f14SClaudio Fontana val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
8301b248f14SClaudio Fontana }
8311b248f14SClaudio Fontana tmp = int64_to_floatx80(val, &env->fp_status);
8324526f58aSRichard Henderson if (access_ldb(&ac, ptr + 9) & 0x80) {
8331b248f14SClaudio Fontana tmp = floatx80_chs(tmp);
8341b248f14SClaudio Fontana }
8351b248f14SClaudio Fontana fpush(env);
8361b248f14SClaudio Fontana ST0 = tmp;
8371b248f14SClaudio Fontana }
8381b248f14SClaudio Fontana
helper_fbst_ST0(CPUX86State * env,target_ulong ptr)8391b248f14SClaudio Fontana void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
8401b248f14SClaudio Fontana {
8411b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
8421b248f14SClaudio Fontana int v;
8431b248f14SClaudio Fontana target_ulong mem_ref, mem_end;
8441b248f14SClaudio Fontana int64_t val;
8451b248f14SClaudio Fontana CPU_LDoubleU temp;
8464526f58aSRichard Henderson X86Access ac;
8471b248f14SClaudio Fontana
8484526f58aSRichard Henderson access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
8491b248f14SClaudio Fontana temp.d = ST0;
8501b248f14SClaudio Fontana
8511b248f14SClaudio Fontana val = floatx80_to_int64(ST0, &env->fp_status);
8521b248f14SClaudio Fontana mem_ref = ptr;
8531b248f14SClaudio Fontana if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
8541b248f14SClaudio Fontana set_float_exception_flags(float_flag_invalid, &env->fp_status);
8551b248f14SClaudio Fontana while (mem_ref < ptr + 7) {
8564526f58aSRichard Henderson access_stb(&ac, mem_ref++, 0);
8571b248f14SClaudio Fontana }
8584526f58aSRichard Henderson access_stb(&ac, mem_ref++, 0xc0);
8594526f58aSRichard Henderson access_stb(&ac, mem_ref++, 0xff);
8604526f58aSRichard Henderson access_stb(&ac, mem_ref++, 0xff);
8611b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
8621b248f14SClaudio Fontana return;
8631b248f14SClaudio Fontana }
8641b248f14SClaudio Fontana mem_end = mem_ref + 9;
8651b248f14SClaudio Fontana if (SIGND(temp)) {
8664526f58aSRichard Henderson access_stb(&ac, mem_end, 0x80);
8671b248f14SClaudio Fontana val = -val;
8681b248f14SClaudio Fontana } else {
8694526f58aSRichard Henderson access_stb(&ac, mem_end, 0x00);
8701b248f14SClaudio Fontana }
8711b248f14SClaudio Fontana while (mem_ref < mem_end) {
8721b248f14SClaudio Fontana if (val == 0) {
8731b248f14SClaudio Fontana break;
8741b248f14SClaudio Fontana }
8751b248f14SClaudio Fontana v = val % 100;
8761b248f14SClaudio Fontana val = val / 100;
8771b248f14SClaudio Fontana v = ((v / 10) << 4) | (v % 10);
8784526f58aSRichard Henderson access_stb(&ac, mem_ref++, v);
8791b248f14SClaudio Fontana }
8801b248f14SClaudio Fontana while (mem_ref < mem_end) {
8814526f58aSRichard Henderson access_stb(&ac, mem_ref++, 0);
8821b248f14SClaudio Fontana }
8831b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
8841b248f14SClaudio Fontana }
8851b248f14SClaudio Fontana
8861b248f14SClaudio Fontana /* 128-bit significand of log(2). */
8871b248f14SClaudio Fontana #define ln2_sig_high 0xb17217f7d1cf79abULL
8881b248f14SClaudio Fontana #define ln2_sig_low 0xc9e3b39803f2f6afULL
8891b248f14SClaudio Fontana
8901b248f14SClaudio Fontana /*
8911b248f14SClaudio Fontana * Polynomial coefficients for an approximation to (2^x - 1) / x, on
8921b248f14SClaudio Fontana * the interval [-1/64, 1/64].
8931b248f14SClaudio Fontana */
8941b248f14SClaudio Fontana #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
8951b248f14SClaudio Fontana #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
8961b248f14SClaudio Fontana #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
8971b248f14SClaudio Fontana #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
8981b248f14SClaudio Fontana #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
8991b248f14SClaudio Fontana #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
9001b248f14SClaudio Fontana #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
9011b248f14SClaudio Fontana #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
9021b248f14SClaudio Fontana #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
9031b248f14SClaudio Fontana
9041b248f14SClaudio Fontana struct f2xm1_data {
9051b248f14SClaudio Fontana /*
9061b248f14SClaudio Fontana * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
9071b248f14SClaudio Fontana * are very close to exact floatx80 values.
9081b248f14SClaudio Fontana */
9091b248f14SClaudio Fontana floatx80 t;
9101b248f14SClaudio Fontana /* The value of 2^t. */
9111b248f14SClaudio Fontana floatx80 exp2;
9121b248f14SClaudio Fontana /* The value of 2^t - 1. */
9131b248f14SClaudio Fontana floatx80 exp2m1;
9141b248f14SClaudio Fontana };
9151b248f14SClaudio Fontana
9161b248f14SClaudio Fontana static const struct f2xm1_data f2xm1_table[65] = {
9171b248f14SClaudio Fontana { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
9181b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
9191b248f14SClaudio Fontana make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
9201b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
9211b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
9221b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
9231b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
9241b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
9251b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
9261b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
9271b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
9281b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
9291b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
9301b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
9311b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
9321b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
9331b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
9341b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
9351b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
9361b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
9371b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
9381b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
9391b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
9401b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
9411b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
9421b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
9431b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
9441b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
9451b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
9461b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
9471b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
9481b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
9491b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
9501b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
9511b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
9521b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
9531b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
9541b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
9551b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
9561b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
9571b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
9581b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
9591b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
9601b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
9611b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
9621b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
9631b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
9641b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
9651b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x800000000000227dULL),
9661b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
9671b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
9681b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
9691b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
9701b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
9711b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
9721b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
9731b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
9741b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
9751b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
9761b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
9771b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
9781b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
9791b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
9801b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
9811b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
9821b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
9831b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
9841b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
9851b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
9861b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
9871b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
9881b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
9891b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
9901b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
9911b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
9921b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
9931b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
9941b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
9951b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
9961b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
9971b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
9981b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
9991b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
10001b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
10011b248f14SClaudio Fontana { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
10021b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
10031b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
10041b248f14SClaudio Fontana { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
10051b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
10061b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
10071b248f14SClaudio Fontana { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
10081b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
10091b248f14SClaudio Fontana make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
10101b248f14SClaudio Fontana { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
10111b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
10121b248f14SClaudio Fontana make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
10131b248f14SClaudio Fontana { floatx80_zero_init,
10141b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8000000000000000ULL),
10151b248f14SClaudio Fontana floatx80_zero_init },
10161b248f14SClaudio Fontana { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
10171b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
10181b248f14SClaudio Fontana make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
10191b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
10201b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
10211b248f14SClaudio Fontana make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
10221b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
10231b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
10241b248f14SClaudio Fontana make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
10251b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
10261b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
10271b248f14SClaudio Fontana make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
10281b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
10291b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
10301b248f14SClaudio Fontana make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
10311b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
10321b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
10331b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
10341b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
10351b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
10361b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
10371b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
10381b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
10391b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
10401b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
10411b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
10421b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
10431b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
10441b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
10451b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
10461b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
10471b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
10481b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
10491b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
10501b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
10511b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
10521b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
10531b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
10541b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
10551b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
10561b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
10571b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
10581b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
10591b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
10601b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
10611b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
10621b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
10631b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
10641b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
10651b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
10661b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
10671b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
10681b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
10691b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
10701b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
10711b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
10721b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
10731b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
10741b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
10751b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
10761b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
10771b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
10781b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
10791b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
10801b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
10811b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
10821b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
10831b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
10841b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
10851b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
10861b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
10871b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
10881b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
10891b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
10901b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
10911b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
10921b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
10931b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
10941b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
10951b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
10961b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
10971b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
10981b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
10991b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
11001b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
11011b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
11021b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
11031b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
11041b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
11051b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
11061b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
11071b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
11081b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
11091b248f14SClaudio Fontana { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
11101b248f14SClaudio Fontana make_floatx80_init(0x4000, 0x8000000000000000ULL),
11111b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
11121b248f14SClaudio Fontana };
11131b248f14SClaudio Fontana
helper_f2xm1(CPUX86State * env)11141b248f14SClaudio Fontana void helper_f2xm1(CPUX86State *env)
11151b248f14SClaudio Fontana {
11161b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
11171b248f14SClaudio Fontana uint64_t sig = extractFloatx80Frac(ST0);
11181b248f14SClaudio Fontana int32_t exp = extractFloatx80Exp(ST0);
11191b248f14SClaudio Fontana bool sign = extractFloatx80Sign(ST0);
11201b248f14SClaudio Fontana
11211b248f14SClaudio Fontana if (floatx80_invalid_encoding(ST0)) {
11221b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
11231b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status);
11241b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) {
11251b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
11261b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
11271b248f14SClaudio Fontana ST0 = floatx80_silence_nan(ST0, &env->fp_status);
11281b248f14SClaudio Fontana }
11291b248f14SClaudio Fontana } else if (exp > 0x3fff ||
11301b248f14SClaudio Fontana (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
11311b248f14SClaudio Fontana /* Out of range for the instruction, treat as invalid. */
11321b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
11331b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status);
11341b248f14SClaudio Fontana } else if (exp == 0x3fff) {
11351b248f14SClaudio Fontana /* Argument 1 or -1, exact result 1 or -0.5. */
11361b248f14SClaudio Fontana if (sign) {
11371b248f14SClaudio Fontana ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
11381b248f14SClaudio Fontana }
11391b248f14SClaudio Fontana } else if (exp < 0x3fb0) {
11401b248f14SClaudio Fontana if (!floatx80_is_zero(ST0)) {
11411b248f14SClaudio Fontana /*
11421b248f14SClaudio Fontana * Multiplying the argument by an extra-precision version
11431b248f14SClaudio Fontana * of log(2) is sufficiently precise. Zero arguments are
11441b248f14SClaudio Fontana * returned unchanged.
11451b248f14SClaudio Fontana */
11461b248f14SClaudio Fontana uint64_t sig0, sig1, sig2;
11471b248f14SClaudio Fontana if (exp == 0) {
11481b248f14SClaudio Fontana normalizeFloatx80Subnormal(sig, &exp, &sig);
11491b248f14SClaudio Fontana }
11501b248f14SClaudio Fontana mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
11511b248f14SClaudio Fontana &sig2);
11521b248f14SClaudio Fontana /* This result is inexact. */
11531b248f14SClaudio Fontana sig1 |= 1;
11548da5f1dbSRichard Henderson ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
11558da5f1dbSRichard Henderson sign, exp, sig0, sig1,
11561b248f14SClaudio Fontana &env->fp_status);
11571b248f14SClaudio Fontana }
11581b248f14SClaudio Fontana } else {
11591b248f14SClaudio Fontana floatx80 tmp, y, accum;
11601b248f14SClaudio Fontana bool asign, bsign;
11611b248f14SClaudio Fontana int32_t n, aexp, bexp;
11621b248f14SClaudio Fontana uint64_t asig0, asig1, asig2, bsig0, bsig1;
11631b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
11648da5f1dbSRichard Henderson FloatX80RoundPrec save_prec =
11658da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision;
11661b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even;
11678da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
11681b248f14SClaudio Fontana
11691b248f14SClaudio Fontana /* Find the nearest multiple of 1/32 to the argument. */
11701b248f14SClaudio Fontana tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
11711b248f14SClaudio Fontana n = 32 + floatx80_to_int32(tmp, &env->fp_status);
11721b248f14SClaudio Fontana y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
11731b248f14SClaudio Fontana
11741b248f14SClaudio Fontana if (floatx80_is_zero(y)) {
11751b248f14SClaudio Fontana /*
11761b248f14SClaudio Fontana * Use the value of 2^t - 1 from the table, to avoid
11771b248f14SClaudio Fontana * needing to special-case zero as a result of
11781b248f14SClaudio Fontana * multiplication below.
11791b248f14SClaudio Fontana */
11801b248f14SClaudio Fontana ST0 = f2xm1_table[n].t;
11811b248f14SClaudio Fontana set_float_exception_flags(float_flag_inexact, &env->fp_status);
11821b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode;
11831b248f14SClaudio Fontana } else {
11841b248f14SClaudio Fontana /*
11851b248f14SClaudio Fontana * Compute the lower parts of a polynomial expansion for
11861b248f14SClaudio Fontana * (2^y - 1) / y.
11871b248f14SClaudio Fontana */
11881b248f14SClaudio Fontana accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
11891b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
11901b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status);
11911b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
11921b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status);
11931b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
11941b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status);
11951b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
11961b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status);
11971b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
11981b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status);
11991b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
12001b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status);
12011b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
12021b248f14SClaudio Fontana
12031b248f14SClaudio Fontana /*
12041b248f14SClaudio Fontana * The full polynomial expansion is f2xm1_coeff_0 + accum
12051b248f14SClaudio Fontana * (where accum has much lower magnitude, and so, in
12061b248f14SClaudio Fontana * particular, carry out of the addition is not possible).
12071b248f14SClaudio Fontana * (This expansion is only accurate to about 70 bits, not
12081b248f14SClaudio Fontana * 128 bits.)
12091b248f14SClaudio Fontana */
12101b248f14SClaudio Fontana aexp = extractFloatx80Exp(f2xm1_coeff_0);
12111b248f14SClaudio Fontana asign = extractFloatx80Sign(f2xm1_coeff_0);
12121b248f14SClaudio Fontana shift128RightJamming(extractFloatx80Frac(accum), 0,
12131b248f14SClaudio Fontana aexp - extractFloatx80Exp(accum),
12141b248f14SClaudio Fontana &asig0, &asig1);
12151b248f14SClaudio Fontana bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
12161b248f14SClaudio Fontana bsig1 = 0;
12171b248f14SClaudio Fontana if (asign == extractFloatx80Sign(accum)) {
12181b248f14SClaudio Fontana add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
12191b248f14SClaudio Fontana } else {
12201b248f14SClaudio Fontana sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
12211b248f14SClaudio Fontana }
12221b248f14SClaudio Fontana /* And thus compute an approximation to 2^y - 1. */
12231b248f14SClaudio Fontana mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
12241b248f14SClaudio Fontana &asig0, &asig1, &asig2);
12251b248f14SClaudio Fontana aexp += extractFloatx80Exp(y) - 0x3ffe;
12261b248f14SClaudio Fontana asign ^= extractFloatx80Sign(y);
12271b248f14SClaudio Fontana if (n != 32) {
12281b248f14SClaudio Fontana /*
12291b248f14SClaudio Fontana * Multiply this by the precomputed value of 2^t and
12301b248f14SClaudio Fontana * add that of 2^t - 1.
12311b248f14SClaudio Fontana */
12321b248f14SClaudio Fontana mul128By64To192(asig0, asig1,
12331b248f14SClaudio Fontana extractFloatx80Frac(f2xm1_table[n].exp2),
12341b248f14SClaudio Fontana &asig0, &asig1, &asig2);
12351b248f14SClaudio Fontana aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
12361b248f14SClaudio Fontana bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
12371b248f14SClaudio Fontana bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
12381b248f14SClaudio Fontana bsig1 = 0;
12391b248f14SClaudio Fontana if (bexp < aexp) {
12401b248f14SClaudio Fontana shift128RightJamming(bsig0, bsig1, aexp - bexp,
12411b248f14SClaudio Fontana &bsig0, &bsig1);
12421b248f14SClaudio Fontana } else if (aexp < bexp) {
12431b248f14SClaudio Fontana shift128RightJamming(asig0, asig1, bexp - aexp,
12441b248f14SClaudio Fontana &asig0, &asig1);
12451b248f14SClaudio Fontana aexp = bexp;
12461b248f14SClaudio Fontana }
12471b248f14SClaudio Fontana /* The sign of 2^t - 1 is always that of the result. */
12481b248f14SClaudio Fontana bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
12491b248f14SClaudio Fontana if (asign == bsign) {
12501b248f14SClaudio Fontana /* Avoid possible carry out of the addition. */
12511b248f14SClaudio Fontana shift128RightJamming(asig0, asig1, 1,
12521b248f14SClaudio Fontana &asig0, &asig1);
12531b248f14SClaudio Fontana shift128RightJamming(bsig0, bsig1, 1,
12541b248f14SClaudio Fontana &bsig0, &bsig1);
12551b248f14SClaudio Fontana ++aexp;
12561b248f14SClaudio Fontana add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
12571b248f14SClaudio Fontana } else {
12581b248f14SClaudio Fontana sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
12591b248f14SClaudio Fontana asign = bsign;
12601b248f14SClaudio Fontana }
12611b248f14SClaudio Fontana }
12621b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode;
12631b248f14SClaudio Fontana /* This result is inexact. */
12641b248f14SClaudio Fontana asig1 |= 1;
12658da5f1dbSRichard Henderson ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
12668da5f1dbSRichard Henderson asign, aexp, asig0, asig1,
12671b248f14SClaudio Fontana &env->fp_status);
12681b248f14SClaudio Fontana }
12691b248f14SClaudio Fontana
12701b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec;
12711b248f14SClaudio Fontana }
12721b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
12731b248f14SClaudio Fontana }
12741b248f14SClaudio Fontana
helper_fptan(CPUX86State * env)12751b248f14SClaudio Fontana void helper_fptan(CPUX86State *env)
12761b248f14SClaudio Fontana {
12771b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0);
12781b248f14SClaudio Fontana
12791b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
12801b248f14SClaudio Fontana env->fpus |= 0x400;
12811b248f14SClaudio Fontana } else {
12821b248f14SClaudio Fontana fptemp = tan(fptemp);
12831b248f14SClaudio Fontana ST0 = double_to_floatx80(env, fptemp);
12841b248f14SClaudio Fontana fpush(env);
12851b248f14SClaudio Fontana ST0 = floatx80_one;
12861b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */
12871b248f14SClaudio Fontana /* the above code is for |arg| < 2**52 only */
12881b248f14SClaudio Fontana }
12891b248f14SClaudio Fontana }
12901b248f14SClaudio Fontana
12911b248f14SClaudio Fontana /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */
12921b248f14SClaudio Fontana #define pi_4_exp 0x3ffe
12931b248f14SClaudio Fontana #define pi_4_sig_high 0xc90fdaa22168c234ULL
12941b248f14SClaudio Fontana #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
12951b248f14SClaudio Fontana #define pi_2_exp 0x3fff
12961b248f14SClaudio Fontana #define pi_2_sig_high 0xc90fdaa22168c234ULL
12971b248f14SClaudio Fontana #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
12981b248f14SClaudio Fontana #define pi_34_exp 0x4000
12991b248f14SClaudio Fontana #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
13001b248f14SClaudio Fontana #define pi_34_sig_low 0x9394c9e8a0a5159dULL
13011b248f14SClaudio Fontana #define pi_exp 0x4000
13021b248f14SClaudio Fontana #define pi_sig_high 0xc90fdaa22168c234ULL
13031b248f14SClaudio Fontana #define pi_sig_low 0xc4c6628b80dc1cd1ULL
13041b248f14SClaudio Fontana
13051b248f14SClaudio Fontana /*
13061b248f14SClaudio Fontana * Polynomial coefficients for an approximation to atan(x), with only
13071b248f14SClaudio Fontana * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike
13081b248f14SClaudio Fontana * for some other approximations, no low part is needed for the first
13091b248f14SClaudio Fontana * coefficient here to achieve a sufficiently accurate result, because
13101b248f14SClaudio Fontana * the coefficient in this minimax approximation is very close to
13111b248f14SClaudio Fontana * exactly 1.)
13121b248f14SClaudio Fontana */
13131b248f14SClaudio Fontana #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
13141b248f14SClaudio Fontana #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
13151b248f14SClaudio Fontana #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
13161b248f14SClaudio Fontana #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
13171b248f14SClaudio Fontana #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
13181b248f14SClaudio Fontana #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
13191b248f14SClaudio Fontana #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
13201b248f14SClaudio Fontana
13211b248f14SClaudio Fontana struct fpatan_data {
13221b248f14SClaudio Fontana /* High and low parts of atan(x). */
13231b248f14SClaudio Fontana floatx80 atan_high, atan_low;
13241b248f14SClaudio Fontana };
13251b248f14SClaudio Fontana
13261b248f14SClaudio Fontana static const struct fpatan_data fpatan_table[9] = {
13271b248f14SClaudio Fontana { floatx80_zero_init,
13281b248f14SClaudio Fontana floatx80_zero_init },
13291b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
13301b248f14SClaudio Fontana make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
13311b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
13321b248f14SClaudio Fontana make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
13331b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
13341b248f14SClaudio Fontana make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
13351b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
13361b248f14SClaudio Fontana make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
13371b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
13381b248f14SClaudio Fontana make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
13391b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
13401b248f14SClaudio Fontana make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
13411b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
13421b248f14SClaudio Fontana make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
13431b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
13441b248f14SClaudio Fontana make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
13451b248f14SClaudio Fontana };
13461b248f14SClaudio Fontana
helper_fpatan(CPUX86State * env)13471b248f14SClaudio Fontana void helper_fpatan(CPUX86State *env)
13481b248f14SClaudio Fontana {
13491b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
13501b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(ST0);
13511b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(ST0);
13521b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(ST0);
13531b248f14SClaudio Fontana uint64_t arg1_sig = extractFloatx80Frac(ST1);
13541b248f14SClaudio Fontana int32_t arg1_exp = extractFloatx80Exp(ST1);
13551b248f14SClaudio Fontana bool arg1_sign = extractFloatx80Sign(ST1);
13561b248f14SClaudio Fontana
13571b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
13581b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
13591b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST0, &env->fp_status);
13601b248f14SClaudio Fontana } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
13611b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
13621b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST1, &env->fp_status);
13631b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0) ||
13641b248f14SClaudio Fontana floatx80_invalid_encoding(ST1)) {
13651b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
13661b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status);
13671b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) {
13681b248f14SClaudio Fontana ST1 = ST0;
13691b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) {
13701b248f14SClaudio Fontana /* Pass this NaN through. */
13711b248f14SClaudio Fontana } else if (floatx80_is_zero(ST1) && !arg0_sign) {
13721b248f14SClaudio Fontana /* Pass this zero through. */
13731b248f14SClaudio Fontana } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
13741b248f14SClaudio Fontana arg0_exp - arg1_exp >= 80) &&
13751b248f14SClaudio Fontana !arg0_sign) {
13761b248f14SClaudio Fontana /*
13771b248f14SClaudio Fontana * Dividing ST1 by ST0 gives the correct result up to
13781b248f14SClaudio Fontana * rounding, and avoids spurious underflow exceptions that
13791b248f14SClaudio Fontana * might result from passing some small values through the
13801b248f14SClaudio Fontana * polynomial approximation, but if a finite nonzero result of
13811b248f14SClaudio Fontana * division is exact, the result of fpatan is still inexact
13821b248f14SClaudio Fontana * (and underflowing where appropriate).
13831b248f14SClaudio Fontana */
13848da5f1dbSRichard Henderson FloatX80RoundPrec save_prec =
13858da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision;
13868da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
13871b248f14SClaudio Fontana ST1 = floatx80_div(ST1, ST0, &env->fp_status);
13881b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec;
13891b248f14SClaudio Fontana if (!floatx80_is_zero(ST1) &&
13901b248f14SClaudio Fontana !(get_float_exception_flags(&env->fp_status) &
13911b248f14SClaudio Fontana float_flag_inexact)) {
13921b248f14SClaudio Fontana /*
13931b248f14SClaudio Fontana * The mathematical result is very slightly closer to zero
13941b248f14SClaudio Fontana * than this exact result. Round a value with the
13951b248f14SClaudio Fontana * significand adjusted accordingly to get the correct
13961b248f14SClaudio Fontana * exceptions, and possibly an adjusted result depending
13971b248f14SClaudio Fontana * on the rounding mode.
13981b248f14SClaudio Fontana */
13991b248f14SClaudio Fontana uint64_t sig = extractFloatx80Frac(ST1);
14001b248f14SClaudio Fontana int32_t exp = extractFloatx80Exp(ST1);
14011b248f14SClaudio Fontana bool sign = extractFloatx80Sign(ST1);
14021b248f14SClaudio Fontana if (exp == 0) {
14031b248f14SClaudio Fontana normalizeFloatx80Subnormal(sig, &exp, &sig);
14041b248f14SClaudio Fontana }
14058da5f1dbSRichard Henderson ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
14068da5f1dbSRichard Henderson sign, exp, sig - 1,
14071b248f14SClaudio Fontana -1, &env->fp_status);
14081b248f14SClaudio Fontana }
14091b248f14SClaudio Fontana } else {
14101b248f14SClaudio Fontana /* The result is inexact. */
14111b248f14SClaudio Fontana bool rsign = arg1_sign;
14121b248f14SClaudio Fontana int32_t rexp;
14131b248f14SClaudio Fontana uint64_t rsig0, rsig1;
14141b248f14SClaudio Fontana if (floatx80_is_zero(ST1)) {
14151b248f14SClaudio Fontana /*
14161b248f14SClaudio Fontana * ST0 is negative. The result is pi with the sign of
14171b248f14SClaudio Fontana * ST1.
14181b248f14SClaudio Fontana */
14191b248f14SClaudio Fontana rexp = pi_exp;
14201b248f14SClaudio Fontana rsig0 = pi_sig_high;
14211b248f14SClaudio Fontana rsig1 = pi_sig_low;
14221b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST1)) {
14231b248f14SClaudio Fontana if (floatx80_is_infinity(ST0)) {
14241b248f14SClaudio Fontana if (arg0_sign) {
14251b248f14SClaudio Fontana rexp = pi_34_exp;
14261b248f14SClaudio Fontana rsig0 = pi_34_sig_high;
14271b248f14SClaudio Fontana rsig1 = pi_34_sig_low;
14281b248f14SClaudio Fontana } else {
14291b248f14SClaudio Fontana rexp = pi_4_exp;
14301b248f14SClaudio Fontana rsig0 = pi_4_sig_high;
14311b248f14SClaudio Fontana rsig1 = pi_4_sig_low;
14321b248f14SClaudio Fontana }
14331b248f14SClaudio Fontana } else {
14341b248f14SClaudio Fontana rexp = pi_2_exp;
14351b248f14SClaudio Fontana rsig0 = pi_2_sig_high;
14361b248f14SClaudio Fontana rsig1 = pi_2_sig_low;
14371b248f14SClaudio Fontana }
14381b248f14SClaudio Fontana } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
14391b248f14SClaudio Fontana rexp = pi_2_exp;
14401b248f14SClaudio Fontana rsig0 = pi_2_sig_high;
14411b248f14SClaudio Fontana rsig1 = pi_2_sig_low;
14421b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
14431b248f14SClaudio Fontana /* ST0 is negative. */
14441b248f14SClaudio Fontana rexp = pi_exp;
14451b248f14SClaudio Fontana rsig0 = pi_sig_high;
14461b248f14SClaudio Fontana rsig1 = pi_sig_low;
14471b248f14SClaudio Fontana } else {
14481b248f14SClaudio Fontana /*
14491b248f14SClaudio Fontana * ST0 and ST1 are finite, nonzero and with exponents not
14501b248f14SClaudio Fontana * too far apart.
14511b248f14SClaudio Fontana */
14521b248f14SClaudio Fontana int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
14531b248f14SClaudio Fontana int32_t azexp, axexp;
14541b248f14SClaudio Fontana bool adj_sub, ysign, zsign;
14551b248f14SClaudio Fontana uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
14561b248f14SClaudio Fontana uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
14571b248f14SClaudio Fontana uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
14581b248f14SClaudio Fontana uint64_t azsig0, azsig1;
14591b248f14SClaudio Fontana uint64_t azsig2, azsig3, axsig0, axsig1;
14601b248f14SClaudio Fontana floatx80 x8;
14611b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
14628da5f1dbSRichard Henderson FloatX80RoundPrec save_prec =
14638da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision;
14641b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even;
14658da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
14661b248f14SClaudio Fontana
14671b248f14SClaudio Fontana if (arg0_exp == 0) {
14681b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
14691b248f14SClaudio Fontana }
14701b248f14SClaudio Fontana if (arg1_exp == 0) {
14711b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
14721b248f14SClaudio Fontana }
14731b248f14SClaudio Fontana if (arg0_exp > arg1_exp ||
14741b248f14SClaudio Fontana (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
14751b248f14SClaudio Fontana /* Work with abs(ST1) / abs(ST0). */
14761b248f14SClaudio Fontana num_exp = arg1_exp;
14771b248f14SClaudio Fontana num_sig = arg1_sig;
14781b248f14SClaudio Fontana den_exp = arg0_exp;
14791b248f14SClaudio Fontana den_sig = arg0_sig;
14801b248f14SClaudio Fontana if (arg0_sign) {
14811b248f14SClaudio Fontana /* The result is subtracted from pi. */
14821b248f14SClaudio Fontana adj_exp = pi_exp;
14831b248f14SClaudio Fontana adj_sig0 = pi_sig_high;
14841b248f14SClaudio Fontana adj_sig1 = pi_sig_low;
14851b248f14SClaudio Fontana adj_sub = true;
14861b248f14SClaudio Fontana } else {
14871b248f14SClaudio Fontana /* The result is used as-is. */
14881b248f14SClaudio Fontana adj_exp = 0;
14891b248f14SClaudio Fontana adj_sig0 = 0;
14901b248f14SClaudio Fontana adj_sig1 = 0;
14911b248f14SClaudio Fontana adj_sub = false;
14921b248f14SClaudio Fontana }
14931b248f14SClaudio Fontana } else {
14941b248f14SClaudio Fontana /* Work with abs(ST0) / abs(ST1). */
14951b248f14SClaudio Fontana num_exp = arg0_exp;
14961b248f14SClaudio Fontana num_sig = arg0_sig;
14971b248f14SClaudio Fontana den_exp = arg1_exp;
14981b248f14SClaudio Fontana den_sig = arg1_sig;
14991b248f14SClaudio Fontana /* The result is added to or subtracted from pi/2. */
15001b248f14SClaudio Fontana adj_exp = pi_2_exp;
15011b248f14SClaudio Fontana adj_sig0 = pi_2_sig_high;
15021b248f14SClaudio Fontana adj_sig1 = pi_2_sig_low;
15031b248f14SClaudio Fontana adj_sub = !arg0_sign;
15041b248f14SClaudio Fontana }
15051b248f14SClaudio Fontana
15061b248f14SClaudio Fontana /*
15071b248f14SClaudio Fontana * Compute x = num/den, where 0 < x <= 1 and x is not too
15081b248f14SClaudio Fontana * small.
15091b248f14SClaudio Fontana */
15101b248f14SClaudio Fontana xexp = num_exp - den_exp + 0x3ffe;
15111b248f14SClaudio Fontana remsig0 = num_sig;
15121b248f14SClaudio Fontana remsig1 = 0;
15131b248f14SClaudio Fontana if (den_sig <= remsig0) {
15141b248f14SClaudio Fontana shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
15151b248f14SClaudio Fontana ++xexp;
15161b248f14SClaudio Fontana }
15171b248f14SClaudio Fontana xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
15181b248f14SClaudio Fontana mul64To128(den_sig, xsig0, &msig0, &msig1);
15191b248f14SClaudio Fontana sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
15201b248f14SClaudio Fontana while ((int64_t) remsig0 < 0) {
15211b248f14SClaudio Fontana --xsig0;
15221b248f14SClaudio Fontana add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
15231b248f14SClaudio Fontana }
15241b248f14SClaudio Fontana xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
15251b248f14SClaudio Fontana /*
15261b248f14SClaudio Fontana * No need to correct any estimation error in xsig1; even
15271b248f14SClaudio Fontana * with such error, it is accurate enough.
15281b248f14SClaudio Fontana */
15291b248f14SClaudio Fontana
15301b248f14SClaudio Fontana /*
15311b248f14SClaudio Fontana * Split x as x = t + y, where t = n/8 is the nearest
15321b248f14SClaudio Fontana * multiple of 1/8 to x.
15331b248f14SClaudio Fontana */
15348da5f1dbSRichard Henderson x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
15358da5f1dbSRichard Henderson false, xexp + 3, xsig0,
15361b248f14SClaudio Fontana xsig1, &env->fp_status);
15371b248f14SClaudio Fontana n = floatx80_to_int32(x8, &env->fp_status);
15381b248f14SClaudio Fontana if (n == 0) {
15391b248f14SClaudio Fontana ysign = false;
15401b248f14SClaudio Fontana yexp = xexp;
15411b248f14SClaudio Fontana ysig0 = xsig0;
15421b248f14SClaudio Fontana ysig1 = xsig1;
15431b248f14SClaudio Fontana texp = 0;
15441b248f14SClaudio Fontana tsig = 0;
15451b248f14SClaudio Fontana } else {
15461b248f14SClaudio Fontana int shift = clz32(n) + 32;
15471b248f14SClaudio Fontana texp = 0x403b - shift;
15481b248f14SClaudio Fontana tsig = n;
15491b248f14SClaudio Fontana tsig <<= shift;
15501b248f14SClaudio Fontana if (texp == xexp) {
15511b248f14SClaudio Fontana sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
15521b248f14SClaudio Fontana if ((int64_t) ysig0 >= 0) {
15531b248f14SClaudio Fontana ysign = false;
15541b248f14SClaudio Fontana if (ysig0 == 0) {
15551b248f14SClaudio Fontana if (ysig1 == 0) {
15561b248f14SClaudio Fontana yexp = 0;
15571b248f14SClaudio Fontana } else {
15581b248f14SClaudio Fontana shift = clz64(ysig1) + 64;
15591b248f14SClaudio Fontana yexp = xexp - shift;
15601b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift,
15611b248f14SClaudio Fontana &ysig0, &ysig1);
15621b248f14SClaudio Fontana }
15631b248f14SClaudio Fontana } else {
15641b248f14SClaudio Fontana shift = clz64(ysig0);
15651b248f14SClaudio Fontana yexp = xexp - shift;
15661b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
15671b248f14SClaudio Fontana }
15681b248f14SClaudio Fontana } else {
15691b248f14SClaudio Fontana ysign = true;
15701b248f14SClaudio Fontana sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
15711b248f14SClaudio Fontana if (ysig0 == 0) {
15721b248f14SClaudio Fontana shift = clz64(ysig1) + 64;
15731b248f14SClaudio Fontana } else {
15741b248f14SClaudio Fontana shift = clz64(ysig0);
15751b248f14SClaudio Fontana }
15761b248f14SClaudio Fontana yexp = xexp - shift;
15771b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
15781b248f14SClaudio Fontana }
15791b248f14SClaudio Fontana } else {
15801b248f14SClaudio Fontana /*
15811b248f14SClaudio Fontana * t's exponent must be greater than x's because t
15821b248f14SClaudio Fontana * is positive and the nearest multiple of 1/8 to
15831b248f14SClaudio Fontana * x, and if x has a greater exponent, the power
15841b248f14SClaudio Fontana * of 2 with that exponent is also a multiple of
15851b248f14SClaudio Fontana * 1/8.
15861b248f14SClaudio Fontana */
15871b248f14SClaudio Fontana uint64_t usig0, usig1;
15881b248f14SClaudio Fontana shift128RightJamming(xsig0, xsig1, texp - xexp,
15891b248f14SClaudio Fontana &usig0, &usig1);
15901b248f14SClaudio Fontana ysign = true;
15911b248f14SClaudio Fontana sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
15921b248f14SClaudio Fontana if (ysig0 == 0) {
15931b248f14SClaudio Fontana shift = clz64(ysig1) + 64;
15941b248f14SClaudio Fontana } else {
15951b248f14SClaudio Fontana shift = clz64(ysig0);
15961b248f14SClaudio Fontana }
15971b248f14SClaudio Fontana yexp = texp - shift;
15981b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
15991b248f14SClaudio Fontana }
16001b248f14SClaudio Fontana }
16011b248f14SClaudio Fontana
16021b248f14SClaudio Fontana /*
16031b248f14SClaudio Fontana * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
16041b248f14SClaudio Fontana * arctan(z).
16051b248f14SClaudio Fontana */
16061b248f14SClaudio Fontana zsign = ysign;
16071b248f14SClaudio Fontana if (texp == 0 || yexp == 0) {
16081b248f14SClaudio Fontana zexp = yexp;
16091b248f14SClaudio Fontana zsig0 = ysig0;
16101b248f14SClaudio Fontana zsig1 = ysig1;
16111b248f14SClaudio Fontana } else {
16121b248f14SClaudio Fontana /*
16131b248f14SClaudio Fontana * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
16141b248f14SClaudio Fontana */
16151b248f14SClaudio Fontana int32_t dexp = texp + xexp - 0x3ffe;
16161b248f14SClaudio Fontana uint64_t dsig0, dsig1, dsig2;
16171b248f14SClaudio Fontana mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
16181b248f14SClaudio Fontana /*
16191b248f14SClaudio Fontana * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
16201b248f14SClaudio Fontana * bit). Add 1 to produce the denominator 1+tx.
16211b248f14SClaudio Fontana */
16221b248f14SClaudio Fontana shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
16231b248f14SClaudio Fontana &dsig0, &dsig1);
16241b248f14SClaudio Fontana dsig0 |= 0x8000000000000000ULL;
16251b248f14SClaudio Fontana zexp = yexp - 1;
16261b248f14SClaudio Fontana remsig0 = ysig0;
16271b248f14SClaudio Fontana remsig1 = ysig1;
16281b248f14SClaudio Fontana remsig2 = 0;
16291b248f14SClaudio Fontana if (dsig0 <= remsig0) {
16301b248f14SClaudio Fontana shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
16311b248f14SClaudio Fontana ++zexp;
16321b248f14SClaudio Fontana }
16331b248f14SClaudio Fontana zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
16341b248f14SClaudio Fontana mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
16351b248f14SClaudio Fontana sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
16361b248f14SClaudio Fontana &remsig0, &remsig1, &remsig2);
16371b248f14SClaudio Fontana while ((int64_t) remsig0 < 0) {
16381b248f14SClaudio Fontana --zsig0;
16391b248f14SClaudio Fontana add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
16401b248f14SClaudio Fontana &remsig0, &remsig1, &remsig2);
16411b248f14SClaudio Fontana }
16421b248f14SClaudio Fontana zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
16431b248f14SClaudio Fontana /* No need to correct any estimation error in zsig1. */
16441b248f14SClaudio Fontana }
16451b248f14SClaudio Fontana
16461b248f14SClaudio Fontana if (zexp == 0) {
16471b248f14SClaudio Fontana azexp = 0;
16481b248f14SClaudio Fontana azsig0 = 0;
16491b248f14SClaudio Fontana azsig1 = 0;
16501b248f14SClaudio Fontana } else {
16511b248f14SClaudio Fontana floatx80 z2, accum;
16521b248f14SClaudio Fontana uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
16531b248f14SClaudio Fontana /* Compute z^2. */
16541b248f14SClaudio Fontana mul128To256(zsig0, zsig1, zsig0, zsig1,
16551b248f14SClaudio Fontana &z2sig0, &z2sig1, &z2sig2, &z2sig3);
16568da5f1dbSRichard Henderson z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
16571b248f14SClaudio Fontana zexp + zexp - 0x3ffe,
16581b248f14SClaudio Fontana z2sig0, z2sig1,
16591b248f14SClaudio Fontana &env->fp_status);
16601b248f14SClaudio Fontana
16611b248f14SClaudio Fontana /* Compute the lower parts of the polynomial expansion. */
16621b248f14SClaudio Fontana accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
16631b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
16641b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status);
16651b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
16661b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status);
16671b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
16681b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status);
16691b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
16701b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status);
16711b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
16721b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status);
16731b248f14SClaudio Fontana
16741b248f14SClaudio Fontana /*
16751b248f14SClaudio Fontana * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
16761b248f14SClaudio Fontana * fpatan_coeff_0 is 1, and accum is negative and much smaller.
16771b248f14SClaudio Fontana */
16781b248f14SClaudio Fontana aexp = extractFloatx80Exp(fpatan_coeff_0);
16791b248f14SClaudio Fontana shift128RightJamming(extractFloatx80Frac(accum), 0,
16801b248f14SClaudio Fontana aexp - extractFloatx80Exp(accum),
16811b248f14SClaudio Fontana &asig0, &asig1);
16821b248f14SClaudio Fontana sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
16831b248f14SClaudio Fontana &asig0, &asig1);
16841b248f14SClaudio Fontana /* Multiply by z to compute arctan(z). */
16851b248f14SClaudio Fontana azexp = aexp + zexp - 0x3ffe;
16861b248f14SClaudio Fontana mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
16871b248f14SClaudio Fontana &azsig2, &azsig3);
16881b248f14SClaudio Fontana }
16891b248f14SClaudio Fontana
16901b248f14SClaudio Fontana /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */
16911b248f14SClaudio Fontana if (texp == 0) {
16921b248f14SClaudio Fontana /* z is positive. */
16931b248f14SClaudio Fontana axexp = azexp;
16941b248f14SClaudio Fontana axsig0 = azsig0;
16951b248f14SClaudio Fontana axsig1 = azsig1;
16961b248f14SClaudio Fontana } else {
16971b248f14SClaudio Fontana bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
16981b248f14SClaudio Fontana int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
16991b248f14SClaudio Fontana uint64_t low_sig0 =
17001b248f14SClaudio Fontana extractFloatx80Frac(fpatan_table[n].atan_low);
17011b248f14SClaudio Fontana uint64_t low_sig1 = 0;
17021b248f14SClaudio Fontana axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
17031b248f14SClaudio Fontana axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
17041b248f14SClaudio Fontana axsig1 = 0;
17051b248f14SClaudio Fontana shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
17061b248f14SClaudio Fontana &low_sig0, &low_sig1);
17071b248f14SClaudio Fontana if (low_sign) {
17081b248f14SClaudio Fontana sub128(axsig0, axsig1, low_sig0, low_sig1,
17091b248f14SClaudio Fontana &axsig0, &axsig1);
17101b248f14SClaudio Fontana } else {
17111b248f14SClaudio Fontana add128(axsig0, axsig1, low_sig0, low_sig1,
17121b248f14SClaudio Fontana &axsig0, &axsig1);
17131b248f14SClaudio Fontana }
17141b248f14SClaudio Fontana if (azexp >= axexp) {
17151b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
17161b248f14SClaudio Fontana &axsig0, &axsig1);
17171b248f14SClaudio Fontana axexp = azexp + 1;
17181b248f14SClaudio Fontana shift128RightJamming(azsig0, azsig1, 1,
17191b248f14SClaudio Fontana &azsig0, &azsig1);
17201b248f14SClaudio Fontana } else {
17211b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, 1,
17221b248f14SClaudio Fontana &axsig0, &axsig1);
17231b248f14SClaudio Fontana shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
17241b248f14SClaudio Fontana &azsig0, &azsig1);
17251b248f14SClaudio Fontana ++axexp;
17261b248f14SClaudio Fontana }
17271b248f14SClaudio Fontana if (zsign) {
17281b248f14SClaudio Fontana sub128(axsig0, axsig1, azsig0, azsig1,
17291b248f14SClaudio Fontana &axsig0, &axsig1);
17301b248f14SClaudio Fontana } else {
17311b248f14SClaudio Fontana add128(axsig0, axsig1, azsig0, azsig1,
17321b248f14SClaudio Fontana &axsig0, &axsig1);
17331b248f14SClaudio Fontana }
17341b248f14SClaudio Fontana }
17351b248f14SClaudio Fontana
17361b248f14SClaudio Fontana if (adj_exp == 0) {
17371b248f14SClaudio Fontana rexp = axexp;
17381b248f14SClaudio Fontana rsig0 = axsig0;
17391b248f14SClaudio Fontana rsig1 = axsig1;
17401b248f14SClaudio Fontana } else {
17411b248f14SClaudio Fontana /*
17421b248f14SClaudio Fontana * Add or subtract arctan(x) (exponent axexp,
17431b248f14SClaudio Fontana * significand axsig0 and axsig1, positive, not
17441b248f14SClaudio Fontana * necessarily normalized) to the number given by
17451b248f14SClaudio Fontana * adj_exp, adj_sig0 and adj_sig1, according to
17461b248f14SClaudio Fontana * adj_sub.
17471b248f14SClaudio Fontana */
17481b248f14SClaudio Fontana if (adj_exp >= axexp) {
17491b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
17501b248f14SClaudio Fontana &axsig0, &axsig1);
17511b248f14SClaudio Fontana rexp = adj_exp + 1;
17521b248f14SClaudio Fontana shift128RightJamming(adj_sig0, adj_sig1, 1,
17531b248f14SClaudio Fontana &adj_sig0, &adj_sig1);
17541b248f14SClaudio Fontana } else {
17551b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, 1,
17561b248f14SClaudio Fontana &axsig0, &axsig1);
17571b248f14SClaudio Fontana shift128RightJamming(adj_sig0, adj_sig1,
17581b248f14SClaudio Fontana axexp - adj_exp + 1,
17591b248f14SClaudio Fontana &adj_sig0, &adj_sig1);
17601b248f14SClaudio Fontana rexp = axexp + 1;
17611b248f14SClaudio Fontana }
17621b248f14SClaudio Fontana if (adj_sub) {
17631b248f14SClaudio Fontana sub128(adj_sig0, adj_sig1, axsig0, axsig1,
17641b248f14SClaudio Fontana &rsig0, &rsig1);
17651b248f14SClaudio Fontana } else {
17661b248f14SClaudio Fontana add128(adj_sig0, adj_sig1, axsig0, axsig1,
17671b248f14SClaudio Fontana &rsig0, &rsig1);
17681b248f14SClaudio Fontana }
17691b248f14SClaudio Fontana }
17701b248f14SClaudio Fontana
17711b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode;
17721b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec;
17731b248f14SClaudio Fontana }
17741b248f14SClaudio Fontana /* This result is inexact. */
17751b248f14SClaudio Fontana rsig1 |= 1;
17768da5f1dbSRichard Henderson ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
17771b248f14SClaudio Fontana rsig0, rsig1, &env->fp_status);
17781b248f14SClaudio Fontana }
17791b248f14SClaudio Fontana
17801b248f14SClaudio Fontana fpop(env);
17811b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
17821b248f14SClaudio Fontana }
17831b248f14SClaudio Fontana
helper_fxtract(CPUX86State * env)17841b248f14SClaudio Fontana void helper_fxtract(CPUX86State *env)
17851b248f14SClaudio Fontana {
17861b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
17871b248f14SClaudio Fontana CPU_LDoubleU temp;
17881b248f14SClaudio Fontana
17891b248f14SClaudio Fontana temp.d = ST0;
17901b248f14SClaudio Fontana
17911b248f14SClaudio Fontana if (floatx80_is_zero(ST0)) {
17921b248f14SClaudio Fontana /* Easy way to generate -inf and raising division by 0 exception */
17931b248f14SClaudio Fontana ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
17941b248f14SClaudio Fontana &env->fp_status);
17951b248f14SClaudio Fontana fpush(env);
17961b248f14SClaudio Fontana ST0 = temp.d;
17971b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0)) {
17981b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
17991b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status);
18001b248f14SClaudio Fontana fpush(env);
18011b248f14SClaudio Fontana ST0 = ST1;
18021b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) {
18031b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
18041b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
18051b248f14SClaudio Fontana ST0 = floatx80_silence_nan(ST0, &env->fp_status);
18061b248f14SClaudio Fontana }
18071b248f14SClaudio Fontana fpush(env);
18081b248f14SClaudio Fontana ST0 = ST1;
18091b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST0)) {
18101b248f14SClaudio Fontana fpush(env);
18111b248f14SClaudio Fontana ST0 = ST1;
18121b248f14SClaudio Fontana ST1 = floatx80_infinity;
18131b248f14SClaudio Fontana } else {
18141b248f14SClaudio Fontana int expdif;
18151b248f14SClaudio Fontana
18161b248f14SClaudio Fontana if (EXPD(temp) == 0) {
18171b248f14SClaudio Fontana int shift = clz64(temp.l.lower);
18181b248f14SClaudio Fontana temp.l.lower <<= shift;
18191b248f14SClaudio Fontana expdif = 1 - EXPBIAS - shift;
18201b248f14SClaudio Fontana float_raise(float_flag_input_denormal, &env->fp_status);
18211b248f14SClaudio Fontana } else {
18221b248f14SClaudio Fontana expdif = EXPD(temp) - EXPBIAS;
18231b248f14SClaudio Fontana }
18241b248f14SClaudio Fontana /* DP exponent bias */
18251b248f14SClaudio Fontana ST0 = int32_to_floatx80(expdif, &env->fp_status);
18261b248f14SClaudio Fontana fpush(env);
18271b248f14SClaudio Fontana BIASEXPONENT(temp);
18281b248f14SClaudio Fontana ST0 = temp.d;
18291b248f14SClaudio Fontana }
18301b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
18311b248f14SClaudio Fontana }
18321b248f14SClaudio Fontana
helper_fprem_common(CPUX86State * env,bool mod)18331b248f14SClaudio Fontana static void helper_fprem_common(CPUX86State *env, bool mod)
18341b248f14SClaudio Fontana {
18351b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
18361b248f14SClaudio Fontana uint64_t quotient;
18371b248f14SClaudio Fontana CPU_LDoubleU temp0, temp1;
18381b248f14SClaudio Fontana int exp0, exp1, expdiff;
18391b248f14SClaudio Fontana
18401b248f14SClaudio Fontana temp0.d = ST0;
18411b248f14SClaudio Fontana temp1.d = ST1;
18421b248f14SClaudio Fontana exp0 = EXPD(temp0);
18431b248f14SClaudio Fontana exp1 = EXPD(temp1);
18441b248f14SClaudio Fontana
18451b248f14SClaudio Fontana env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
18461b248f14SClaudio Fontana if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
18471b248f14SClaudio Fontana exp0 == 0x7fff || exp1 == 0x7fff ||
18481b248f14SClaudio Fontana floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
18491b248f14SClaudio Fontana ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status);
18501b248f14SClaudio Fontana } else {
18511b248f14SClaudio Fontana if (exp0 == 0) {
18521b248f14SClaudio Fontana exp0 = 1 - clz64(temp0.l.lower);
18531b248f14SClaudio Fontana }
18541b248f14SClaudio Fontana if (exp1 == 0) {
18551b248f14SClaudio Fontana exp1 = 1 - clz64(temp1.l.lower);
18561b248f14SClaudio Fontana }
18571b248f14SClaudio Fontana expdiff = exp0 - exp1;
18581b248f14SClaudio Fontana if (expdiff < 64) {
18591b248f14SClaudio Fontana ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status);
18601b248f14SClaudio Fontana env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */
18611b248f14SClaudio Fontana env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
18621b248f14SClaudio Fontana env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */
18631b248f14SClaudio Fontana } else {
18641b248f14SClaudio Fontana /*
18651b248f14SClaudio Fontana * Partial remainder. This choice of how many bits to
18661b248f14SClaudio Fontana * process at once is specified in AMD instruction set
18671b248f14SClaudio Fontana * manuals, and empirically is followed by Intel
18681b248f14SClaudio Fontana * processors as well; it ensures that the final remainder
18691b248f14SClaudio Fontana * operation in a loop does produce the correct low three
18701b248f14SClaudio Fontana * bits of the quotient. AMD manuals specify that the
18711b248f14SClaudio Fontana * flags other than C2 are cleared, and empirically Intel
18721b248f14SClaudio Fontana * processors clear them as well.
18731b248f14SClaudio Fontana */
18741b248f14SClaudio Fontana int n = 32 + (expdiff % 32);
18751b248f14SClaudio Fontana temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
18761b248f14SClaudio Fontana ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
18771b248f14SClaudio Fontana env->fpus |= 0x400; /* C2 <-- 1 */
18781b248f14SClaudio Fontana }
18791b248f14SClaudio Fontana }
18801b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
18811b248f14SClaudio Fontana }
18821b248f14SClaudio Fontana
helper_fprem1(CPUX86State * env)18831b248f14SClaudio Fontana void helper_fprem1(CPUX86State *env)
18841b248f14SClaudio Fontana {
18851b248f14SClaudio Fontana helper_fprem_common(env, false);
18861b248f14SClaudio Fontana }
18871b248f14SClaudio Fontana
helper_fprem(CPUX86State * env)18881b248f14SClaudio Fontana void helper_fprem(CPUX86State *env)
18891b248f14SClaudio Fontana {
18901b248f14SClaudio Fontana helper_fprem_common(env, true);
18911b248f14SClaudio Fontana }
18921b248f14SClaudio Fontana
18931b248f14SClaudio Fontana /* 128-bit significand of log2(e). */
18941b248f14SClaudio Fontana #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
18951b248f14SClaudio Fontana #define log2_e_sig_low 0xbe87fed0691d3e89ULL
18961b248f14SClaudio Fontana
18971b248f14SClaudio Fontana /*
18981b248f14SClaudio Fontana * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
18991b248f14SClaudio Fontana * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
19001b248f14SClaudio Fontana * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
19011b248f14SClaudio Fontana * interval [sqrt(2)/2, sqrt(2)].
19021b248f14SClaudio Fontana */
19031b248f14SClaudio Fontana #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
19041b248f14SClaudio Fontana #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
19051b248f14SClaudio Fontana #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
19061b248f14SClaudio Fontana #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
19071b248f14SClaudio Fontana #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
19081b248f14SClaudio Fontana #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
19091b248f14SClaudio Fontana #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
19101b248f14SClaudio Fontana #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
19111b248f14SClaudio Fontana #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
19121b248f14SClaudio Fontana #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
19131b248f14SClaudio Fontana #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
19141b248f14SClaudio Fontana
19151b248f14SClaudio Fontana /*
19161b248f14SClaudio Fontana * Compute an approximation of log2(1+arg), where 1+arg is in the
19171b248f14SClaudio Fontana * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
19181b248f14SClaudio Fontana * function is called, rounding precision is set to 80 and the
19191b248f14SClaudio Fontana * round-to-nearest mode is in effect. arg must not be exactly zero,
19201b248f14SClaudio Fontana * and must not be so close to zero that underflow might occur.
19211b248f14SClaudio Fontana */
helper_fyl2x_common(CPUX86State * env,floatx80 arg,int32_t * exp,uint64_t * sig0,uint64_t * sig1)19221b248f14SClaudio Fontana static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
19231b248f14SClaudio Fontana uint64_t *sig0, uint64_t *sig1)
19241b248f14SClaudio Fontana {
19251b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(arg);
19261b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(arg);
19271b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(arg);
19281b248f14SClaudio Fontana bool asign;
19291b248f14SClaudio Fontana int32_t dexp, texp, aexp;
19301b248f14SClaudio Fontana uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
19311b248f14SClaudio Fontana uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
19321b248f14SClaudio Fontana uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
19331b248f14SClaudio Fontana floatx80 t2, accum;
19341b248f14SClaudio Fontana
19351b248f14SClaudio Fontana /*
19361b248f14SClaudio Fontana * Compute an approximation of arg/(2+arg), with extra precision,
19371b248f14SClaudio Fontana * as the argument to a polynomial approximation. The extra
19381b248f14SClaudio Fontana * precision is only needed for the first term of the
19391b248f14SClaudio Fontana * approximation, with subsequent terms being significantly
19401b248f14SClaudio Fontana * smaller; the approximation only uses odd exponents, and the
19411b248f14SClaudio Fontana * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
19421b248f14SClaudio Fontana */
19431b248f14SClaudio Fontana if (arg0_sign) {
19441b248f14SClaudio Fontana dexp = 0x3fff;
19451b248f14SClaudio Fontana shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
19461b248f14SClaudio Fontana sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
19471b248f14SClaudio Fontana } else {
19481b248f14SClaudio Fontana dexp = 0x4000;
19491b248f14SClaudio Fontana shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
19501b248f14SClaudio Fontana dsig0 |= 0x8000000000000000ULL;
19511b248f14SClaudio Fontana }
19521b248f14SClaudio Fontana texp = arg0_exp - dexp + 0x3ffe;
19531b248f14SClaudio Fontana rsig0 = arg0_sig;
19541b248f14SClaudio Fontana rsig1 = 0;
19551b248f14SClaudio Fontana rsig2 = 0;
19561b248f14SClaudio Fontana if (dsig0 <= rsig0) {
19571b248f14SClaudio Fontana shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
19581b248f14SClaudio Fontana ++texp;
19591b248f14SClaudio Fontana }
19601b248f14SClaudio Fontana tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
19611b248f14SClaudio Fontana mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
19621b248f14SClaudio Fontana sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
19631b248f14SClaudio Fontana &rsig0, &rsig1, &rsig2);
19641b248f14SClaudio Fontana while ((int64_t) rsig0 < 0) {
19651b248f14SClaudio Fontana --tsig0;
19661b248f14SClaudio Fontana add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
19671b248f14SClaudio Fontana &rsig0, &rsig1, &rsig2);
19681b248f14SClaudio Fontana }
19691b248f14SClaudio Fontana tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
19701b248f14SClaudio Fontana /*
19711b248f14SClaudio Fontana * No need to correct any estimation error in tsig1; even with
19721b248f14SClaudio Fontana * such error, it is accurate enough. Now compute the square of
19731b248f14SClaudio Fontana * that approximation.
19741b248f14SClaudio Fontana */
19751b248f14SClaudio Fontana mul128To256(tsig0, tsig1, tsig0, tsig1,
19761b248f14SClaudio Fontana &t2sig0, &t2sig1, &t2sig2, &t2sig3);
19778da5f1dbSRichard Henderson t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
19788da5f1dbSRichard Henderson texp + texp - 0x3ffe,
19791b248f14SClaudio Fontana t2sig0, t2sig1, &env->fp_status);
19801b248f14SClaudio Fontana
19811b248f14SClaudio Fontana /* Compute the lower parts of the polynomial expansion. */
19821b248f14SClaudio Fontana accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
19831b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
19841b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status);
19851b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
19861b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status);
19871b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
19881b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status);
19891b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
19901b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status);
19911b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
19921b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status);
19931b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
19941b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status);
19951b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
19961b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status);
19971b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
19981b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status);
19991b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
20001b248f14SClaudio Fontana
20011b248f14SClaudio Fontana /*
20021b248f14SClaudio Fontana * The full polynomial expansion is fyl2x_coeff_0 + accum (where
20031b248f14SClaudio Fontana * accum has much lower magnitude, and so, in particular, carry
20041b248f14SClaudio Fontana * out of the addition is not possible), multiplied by t. (This
20051b248f14SClaudio Fontana * expansion is only accurate to about 70 bits, not 128 bits.)
20061b248f14SClaudio Fontana */
20071b248f14SClaudio Fontana aexp = extractFloatx80Exp(fyl2x_coeff_0);
20081b248f14SClaudio Fontana asign = extractFloatx80Sign(fyl2x_coeff_0);
20091b248f14SClaudio Fontana shift128RightJamming(extractFloatx80Frac(accum), 0,
20101b248f14SClaudio Fontana aexp - extractFloatx80Exp(accum),
20111b248f14SClaudio Fontana &asig0, &asig1);
20121b248f14SClaudio Fontana bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
20131b248f14SClaudio Fontana bsig1 = 0;
20141b248f14SClaudio Fontana if (asign == extractFloatx80Sign(accum)) {
20151b248f14SClaudio Fontana add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
20161b248f14SClaudio Fontana } else {
20171b248f14SClaudio Fontana sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
20181b248f14SClaudio Fontana }
20191b248f14SClaudio Fontana /* Multiply by t to compute the required result. */
20201b248f14SClaudio Fontana mul128To256(asig0, asig1, tsig0, tsig1,
20211b248f14SClaudio Fontana &asig0, &asig1, &asig2, &asig3);
20221b248f14SClaudio Fontana aexp += texp - 0x3ffe;
20231b248f14SClaudio Fontana *exp = aexp;
20241b248f14SClaudio Fontana *sig0 = asig0;
20251b248f14SClaudio Fontana *sig1 = asig1;
20261b248f14SClaudio Fontana }
20271b248f14SClaudio Fontana
helper_fyl2xp1(CPUX86State * env)20281b248f14SClaudio Fontana void helper_fyl2xp1(CPUX86State *env)
20291b248f14SClaudio Fontana {
20301b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
20311b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(ST0);
20321b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(ST0);
20331b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(ST0);
20341b248f14SClaudio Fontana uint64_t arg1_sig = extractFloatx80Frac(ST1);
20351b248f14SClaudio Fontana int32_t arg1_exp = extractFloatx80Exp(ST1);
20361b248f14SClaudio Fontana bool arg1_sign = extractFloatx80Sign(ST1);
20371b248f14SClaudio Fontana
20381b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
20391b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
20401b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST0, &env->fp_status);
20411b248f14SClaudio Fontana } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
20421b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
20431b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST1, &env->fp_status);
20441b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0) ||
20451b248f14SClaudio Fontana floatx80_invalid_encoding(ST1)) {
20461b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
20471b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status);
20481b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) {
20491b248f14SClaudio Fontana ST1 = ST0;
20501b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) {
20511b248f14SClaudio Fontana /* Pass this NaN through. */
20521b248f14SClaudio Fontana } else if (arg0_exp > 0x3ffd ||
20531b248f14SClaudio Fontana (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
20541b248f14SClaudio Fontana 0x95f619980c4336f7ULL :
20551b248f14SClaudio Fontana 0xd413cccfe7799211ULL))) {
20561b248f14SClaudio Fontana /*
20571b248f14SClaudio Fontana * Out of range for the instruction (ST0 must have absolute
20581b248f14SClaudio Fontana * value less than 1 - sqrt(2)/2 = 0.292..., according to
20591b248f14SClaudio Fontana * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
20601b248f14SClaudio Fontana * to sqrt(2) - 1, which we allow here), treat as invalid.
20611b248f14SClaudio Fontana */
20621b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
20631b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status);
20641b248f14SClaudio Fontana } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
20651b248f14SClaudio Fontana arg1_exp == 0x7fff) {
20661b248f14SClaudio Fontana /*
20671b248f14SClaudio Fontana * One argument is zero, or multiplying by infinity; correct
20681b248f14SClaudio Fontana * result is exact and can be obtained by multiplying the
20691b248f14SClaudio Fontana * arguments.
20701b248f14SClaudio Fontana */
20711b248f14SClaudio Fontana ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
20721b248f14SClaudio Fontana } else if (arg0_exp < 0x3fb0) {
20731b248f14SClaudio Fontana /*
20741b248f14SClaudio Fontana * Multiplying both arguments and an extra-precision version
20751b248f14SClaudio Fontana * of log2(e) is sufficiently precise.
20761b248f14SClaudio Fontana */
20771b248f14SClaudio Fontana uint64_t sig0, sig1, sig2;
20781b248f14SClaudio Fontana int32_t exp;
20791b248f14SClaudio Fontana if (arg0_exp == 0) {
20801b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
20811b248f14SClaudio Fontana }
20821b248f14SClaudio Fontana if (arg1_exp == 0) {
20831b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
20841b248f14SClaudio Fontana }
20851b248f14SClaudio Fontana mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
20861b248f14SClaudio Fontana &sig0, &sig1, &sig2);
20871b248f14SClaudio Fontana exp = arg0_exp + 1;
20881b248f14SClaudio Fontana mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
20891b248f14SClaudio Fontana exp += arg1_exp - 0x3ffe;
20901b248f14SClaudio Fontana /* This result is inexact. */
20911b248f14SClaudio Fontana sig1 |= 1;
20928da5f1dbSRichard Henderson ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
20938da5f1dbSRichard Henderson arg0_sign ^ arg1_sign, exp,
20941b248f14SClaudio Fontana sig0, sig1, &env->fp_status);
20951b248f14SClaudio Fontana } else {
20961b248f14SClaudio Fontana int32_t aexp;
20971b248f14SClaudio Fontana uint64_t asig0, asig1, asig2;
20981b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
20998da5f1dbSRichard Henderson FloatX80RoundPrec save_prec =
21008da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision;
21011b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even;
21028da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
21031b248f14SClaudio Fontana
21041b248f14SClaudio Fontana helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
21051b248f14SClaudio Fontana /*
21061b248f14SClaudio Fontana * Multiply by the second argument to compute the required
21071b248f14SClaudio Fontana * result.
21081b248f14SClaudio Fontana */
21091b248f14SClaudio Fontana if (arg1_exp == 0) {
21101b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
21111b248f14SClaudio Fontana }
21121b248f14SClaudio Fontana mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
21131b248f14SClaudio Fontana aexp += arg1_exp - 0x3ffe;
21141b248f14SClaudio Fontana /* This result is inexact. */
21151b248f14SClaudio Fontana asig1 |= 1;
21161b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode;
21178da5f1dbSRichard Henderson ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
21188da5f1dbSRichard Henderson arg0_sign ^ arg1_sign, aexp,
21191b248f14SClaudio Fontana asig0, asig1, &env->fp_status);
21201b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec;
21211b248f14SClaudio Fontana }
21221b248f14SClaudio Fontana fpop(env);
21231b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
21241b248f14SClaudio Fontana }
21251b248f14SClaudio Fontana
helper_fyl2x(CPUX86State * env)21261b248f14SClaudio Fontana void helper_fyl2x(CPUX86State *env)
21271b248f14SClaudio Fontana {
21281b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
21291b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(ST0);
21301b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(ST0);
21311b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(ST0);
21321b248f14SClaudio Fontana uint64_t arg1_sig = extractFloatx80Frac(ST1);
21331b248f14SClaudio Fontana int32_t arg1_exp = extractFloatx80Exp(ST1);
21341b248f14SClaudio Fontana bool arg1_sign = extractFloatx80Sign(ST1);
21351b248f14SClaudio Fontana
21361b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
21371b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
21381b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST0, &env->fp_status);
21391b248f14SClaudio Fontana } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
21401b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
21411b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST1, &env->fp_status);
21421b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0) ||
21431b248f14SClaudio Fontana floatx80_invalid_encoding(ST1)) {
21441b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
21451b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status);
21461b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) {
21471b248f14SClaudio Fontana ST1 = ST0;
21481b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) {
21491b248f14SClaudio Fontana /* Pass this NaN through. */
21501b248f14SClaudio Fontana } else if (arg0_sign && !floatx80_is_zero(ST0)) {
21511b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
21521b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status);
21531b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST1)) {
21541b248f14SClaudio Fontana FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
21551b248f14SClaudio Fontana &env->fp_status);
21561b248f14SClaudio Fontana switch (cmp) {
21571b248f14SClaudio Fontana case float_relation_less:
21581b248f14SClaudio Fontana ST1 = floatx80_chs(ST1);
21591b248f14SClaudio Fontana break;
21601b248f14SClaudio Fontana case float_relation_greater:
21611b248f14SClaudio Fontana /* Result is infinity of the same sign as ST1. */
21621b248f14SClaudio Fontana break;
21631b248f14SClaudio Fontana default:
21641b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
21651b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status);
21661b248f14SClaudio Fontana break;
21671b248f14SClaudio Fontana }
21681b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST0)) {
21691b248f14SClaudio Fontana if (floatx80_is_zero(ST1)) {
21701b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
21711b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status);
21721b248f14SClaudio Fontana } else if (arg1_sign) {
21731b248f14SClaudio Fontana ST1 = floatx80_chs(ST0);
21741b248f14SClaudio Fontana } else {
21751b248f14SClaudio Fontana ST1 = ST0;
21761b248f14SClaudio Fontana }
21771b248f14SClaudio Fontana } else if (floatx80_is_zero(ST0)) {
21781b248f14SClaudio Fontana if (floatx80_is_zero(ST1)) {
21791b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
21801b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status);
21811b248f14SClaudio Fontana } else {
21821b248f14SClaudio Fontana /* Result is infinity with opposite sign to ST1. */
21831b248f14SClaudio Fontana float_raise(float_flag_divbyzero, &env->fp_status);
21841b248f14SClaudio Fontana ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
21851b248f14SClaudio Fontana 0x8000000000000000ULL);
21861b248f14SClaudio Fontana }
21871b248f14SClaudio Fontana } else if (floatx80_is_zero(ST1)) {
21881b248f14SClaudio Fontana if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
21891b248f14SClaudio Fontana ST1 = floatx80_chs(ST1);
21901b248f14SClaudio Fontana }
21911b248f14SClaudio Fontana /* Otherwise, ST1 is already the correct result. */
21921b248f14SClaudio Fontana } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
21931b248f14SClaudio Fontana if (arg1_sign) {
21941b248f14SClaudio Fontana ST1 = floatx80_chs(floatx80_zero);
21951b248f14SClaudio Fontana } else {
21961b248f14SClaudio Fontana ST1 = floatx80_zero;
21971b248f14SClaudio Fontana }
21981b248f14SClaudio Fontana } else {
21991b248f14SClaudio Fontana int32_t int_exp;
22001b248f14SClaudio Fontana floatx80 arg0_m1;
22011b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
22028da5f1dbSRichard Henderson FloatX80RoundPrec save_prec =
22038da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision;
22041b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even;
22058da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
22061b248f14SClaudio Fontana
22071b248f14SClaudio Fontana if (arg0_exp == 0) {
22081b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
22091b248f14SClaudio Fontana }
22101b248f14SClaudio Fontana if (arg1_exp == 0) {
22111b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
22121b248f14SClaudio Fontana }
22131b248f14SClaudio Fontana int_exp = arg0_exp - 0x3fff;
22141b248f14SClaudio Fontana if (arg0_sig > 0xb504f333f9de6484ULL) {
22151b248f14SClaudio Fontana ++int_exp;
22161b248f14SClaudio Fontana }
22171b248f14SClaudio Fontana arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
22181b248f14SClaudio Fontana &env->fp_status),
22191b248f14SClaudio Fontana floatx80_one, &env->fp_status);
22201b248f14SClaudio Fontana if (floatx80_is_zero(arg0_m1)) {
22211b248f14SClaudio Fontana /* Exact power of 2; multiply by ST1. */
22221b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode;
22231b248f14SClaudio Fontana ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
22241b248f14SClaudio Fontana ST1, &env->fp_status);
22251b248f14SClaudio Fontana } else {
22261b248f14SClaudio Fontana bool asign = extractFloatx80Sign(arg0_m1);
22271b248f14SClaudio Fontana int32_t aexp;
22281b248f14SClaudio Fontana uint64_t asig0, asig1, asig2;
22291b248f14SClaudio Fontana helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
22301b248f14SClaudio Fontana if (int_exp != 0) {
22311b248f14SClaudio Fontana bool isign = (int_exp < 0);
22321b248f14SClaudio Fontana int32_t iexp;
22331b248f14SClaudio Fontana uint64_t isig;
22341b248f14SClaudio Fontana int shift;
22351b248f14SClaudio Fontana int_exp = isign ? -int_exp : int_exp;
22361b248f14SClaudio Fontana shift = clz32(int_exp) + 32;
22371b248f14SClaudio Fontana isig = int_exp;
22381b248f14SClaudio Fontana isig <<= shift;
22391b248f14SClaudio Fontana iexp = 0x403e - shift;
22401b248f14SClaudio Fontana shift128RightJamming(asig0, asig1, iexp - aexp,
22411b248f14SClaudio Fontana &asig0, &asig1);
22421b248f14SClaudio Fontana if (asign == isign) {
22431b248f14SClaudio Fontana add128(isig, 0, asig0, asig1, &asig0, &asig1);
22441b248f14SClaudio Fontana } else {
22451b248f14SClaudio Fontana sub128(isig, 0, asig0, asig1, &asig0, &asig1);
22461b248f14SClaudio Fontana }
22471b248f14SClaudio Fontana aexp = iexp;
22481b248f14SClaudio Fontana asign = isign;
22491b248f14SClaudio Fontana }
22501b248f14SClaudio Fontana /*
22511b248f14SClaudio Fontana * Multiply by the second argument to compute the required
22521b248f14SClaudio Fontana * result.
22531b248f14SClaudio Fontana */
22541b248f14SClaudio Fontana if (arg1_exp == 0) {
22551b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
22561b248f14SClaudio Fontana }
22571b248f14SClaudio Fontana mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
22581b248f14SClaudio Fontana aexp += arg1_exp - 0x3ffe;
22591b248f14SClaudio Fontana /* This result is inexact. */
22601b248f14SClaudio Fontana asig1 |= 1;
22611b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode;
22628da5f1dbSRichard Henderson ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
22638da5f1dbSRichard Henderson asign ^ arg1_sign, aexp,
22641b248f14SClaudio Fontana asig0, asig1, &env->fp_status);
22651b248f14SClaudio Fontana }
22661b248f14SClaudio Fontana
22671b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec;
22681b248f14SClaudio Fontana }
22691b248f14SClaudio Fontana fpop(env);
22701b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
22711b248f14SClaudio Fontana }
22721b248f14SClaudio Fontana
helper_fsqrt(CPUX86State * env)22731b248f14SClaudio Fontana void helper_fsqrt(CPUX86State *env)
22741b248f14SClaudio Fontana {
22751b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
22761b248f14SClaudio Fontana if (floatx80_is_neg(ST0)) {
22771b248f14SClaudio Fontana env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
22781b248f14SClaudio Fontana env->fpus |= 0x400;
22791b248f14SClaudio Fontana }
22801b248f14SClaudio Fontana ST0 = floatx80_sqrt(ST0, &env->fp_status);
22811b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
22821b248f14SClaudio Fontana }
22831b248f14SClaudio Fontana
helper_fsincos(CPUX86State * env)22841b248f14SClaudio Fontana void helper_fsincos(CPUX86State *env)
22851b248f14SClaudio Fontana {
22861b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0);
22871b248f14SClaudio Fontana
22881b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
22891b248f14SClaudio Fontana env->fpus |= 0x400;
22901b248f14SClaudio Fontana } else {
22911b248f14SClaudio Fontana ST0 = double_to_floatx80(env, sin(fptemp));
22921b248f14SClaudio Fontana fpush(env);
22931b248f14SClaudio Fontana ST0 = double_to_floatx80(env, cos(fptemp));
22941b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */
22951b248f14SClaudio Fontana /* the above code is for |arg| < 2**63 only */
22961b248f14SClaudio Fontana }
22971b248f14SClaudio Fontana }
22981b248f14SClaudio Fontana
helper_frndint(CPUX86State * env)22991b248f14SClaudio Fontana void helper_frndint(CPUX86State *env)
23001b248f14SClaudio Fontana {
23011b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
23021b248f14SClaudio Fontana ST0 = floatx80_round_to_int(ST0, &env->fp_status);
23031b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
23041b248f14SClaudio Fontana }
23051b248f14SClaudio Fontana
helper_fscale(CPUX86State * env)23061b248f14SClaudio Fontana void helper_fscale(CPUX86State *env)
23071b248f14SClaudio Fontana {
23081b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env);
23091b248f14SClaudio Fontana if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
23101b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
23111b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status);
23121b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) {
23131b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
23141b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
23151b248f14SClaudio Fontana }
23161b248f14SClaudio Fontana ST0 = ST1;
23171b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
23181b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
23191b248f14SClaudio Fontana ST0 = floatx80_silence_nan(ST0, &env->fp_status);
23201b248f14SClaudio Fontana }
23211b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST1) &&
23221b248f14SClaudio Fontana !floatx80_invalid_encoding(ST0) &&
23231b248f14SClaudio Fontana !floatx80_is_any_nan(ST0)) {
23241b248f14SClaudio Fontana if (floatx80_is_neg(ST1)) {
23251b248f14SClaudio Fontana if (floatx80_is_infinity(ST0)) {
23261b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
23271b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status);
23281b248f14SClaudio Fontana } else {
23291b248f14SClaudio Fontana ST0 = (floatx80_is_neg(ST0) ?
23301b248f14SClaudio Fontana floatx80_chs(floatx80_zero) :
23311b248f14SClaudio Fontana floatx80_zero);
23321b248f14SClaudio Fontana }
23331b248f14SClaudio Fontana } else {
23341b248f14SClaudio Fontana if (floatx80_is_zero(ST0)) {
23351b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status);
23361b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status);
23371b248f14SClaudio Fontana } else {
23381b248f14SClaudio Fontana ST0 = (floatx80_is_neg(ST0) ?
23391b248f14SClaudio Fontana floatx80_chs(floatx80_infinity) :
23401b248f14SClaudio Fontana floatx80_infinity);
23411b248f14SClaudio Fontana }
23421b248f14SClaudio Fontana }
23431b248f14SClaudio Fontana } else {
23441b248f14SClaudio Fontana int n;
23458da5f1dbSRichard Henderson FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
23461b248f14SClaudio Fontana uint8_t save_flags = get_float_exception_flags(&env->fp_status);
23471b248f14SClaudio Fontana set_float_exception_flags(0, &env->fp_status);
23481b248f14SClaudio Fontana n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
23491b248f14SClaudio Fontana set_float_exception_flags(save_flags, &env->fp_status);
23508da5f1dbSRichard Henderson env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
23511b248f14SClaudio Fontana ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
23521b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save;
23531b248f14SClaudio Fontana }
23541b248f14SClaudio Fontana merge_exception_flags(env, old_flags);
23551b248f14SClaudio Fontana }
23561b248f14SClaudio Fontana
helper_fsin(CPUX86State * env)23571b248f14SClaudio Fontana void helper_fsin(CPUX86State *env)
23581b248f14SClaudio Fontana {
23591b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0);
23601b248f14SClaudio Fontana
23611b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
23621b248f14SClaudio Fontana env->fpus |= 0x400;
23631b248f14SClaudio Fontana } else {
23641b248f14SClaudio Fontana ST0 = double_to_floatx80(env, sin(fptemp));
23651b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */
23661b248f14SClaudio Fontana /* the above code is for |arg| < 2**53 only */
23671b248f14SClaudio Fontana }
23681b248f14SClaudio Fontana }
23691b248f14SClaudio Fontana
helper_fcos(CPUX86State * env)23701b248f14SClaudio Fontana void helper_fcos(CPUX86State *env)
23711b248f14SClaudio Fontana {
23721b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0);
23731b248f14SClaudio Fontana
23741b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
23751b248f14SClaudio Fontana env->fpus |= 0x400;
23761b248f14SClaudio Fontana } else {
23771b248f14SClaudio Fontana ST0 = double_to_floatx80(env, cos(fptemp));
23781b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */
23791b248f14SClaudio Fontana /* the above code is for |arg| < 2**63 only */
23801b248f14SClaudio Fontana }
23811b248f14SClaudio Fontana }
23821b248f14SClaudio Fontana
helper_fxam_ST0(CPUX86State * env)23831b248f14SClaudio Fontana void helper_fxam_ST0(CPUX86State *env)
23841b248f14SClaudio Fontana {
23851b248f14SClaudio Fontana CPU_LDoubleU temp;
23861b248f14SClaudio Fontana int expdif;
23871b248f14SClaudio Fontana
23881b248f14SClaudio Fontana temp.d = ST0;
23891b248f14SClaudio Fontana
23901b248f14SClaudio Fontana env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
23911b248f14SClaudio Fontana if (SIGND(temp)) {
23921b248f14SClaudio Fontana env->fpus |= 0x200; /* C1 <-- 1 */
23931b248f14SClaudio Fontana }
23941b248f14SClaudio Fontana
23951b248f14SClaudio Fontana if (env->fptags[env->fpstt]) {
23961b248f14SClaudio Fontana env->fpus |= 0x4100; /* Empty */
23971b248f14SClaudio Fontana return;
23981b248f14SClaudio Fontana }
23991b248f14SClaudio Fontana
24001b248f14SClaudio Fontana expdif = EXPD(temp);
24011b248f14SClaudio Fontana if (expdif == MAXEXPD) {
24021b248f14SClaudio Fontana if (MANTD(temp) == 0x8000000000000000ULL) {
24031b248f14SClaudio Fontana env->fpus |= 0x500; /* Infinity */
24041b248f14SClaudio Fontana } else if (MANTD(temp) & 0x8000000000000000ULL) {
24051b248f14SClaudio Fontana env->fpus |= 0x100; /* NaN */
24061b248f14SClaudio Fontana }
24071b248f14SClaudio Fontana } else if (expdif == 0) {
24081b248f14SClaudio Fontana if (MANTD(temp) == 0) {
24091b248f14SClaudio Fontana env->fpus |= 0x4000; /* Zero */
24101b248f14SClaudio Fontana } else {
24111b248f14SClaudio Fontana env->fpus |= 0x4400; /* Denormal */
24121b248f14SClaudio Fontana }
24131b248f14SClaudio Fontana } else if (MANTD(temp) & 0x8000000000000000ULL) {
24141b248f14SClaudio Fontana env->fpus |= 0x400;
24151b248f14SClaudio Fontana }
24161b248f14SClaudio Fontana }
24171b248f14SClaudio Fontana
do_fstenv(X86Access * ac,target_ulong ptr,int data32)2418505e2ef7SRichard Henderson static void do_fstenv(X86Access *ac, target_ulong ptr, int data32)
24191b248f14SClaudio Fontana {
2420505e2ef7SRichard Henderson CPUX86State *env = ac->env;
24211b248f14SClaudio Fontana int fpus, fptag, exp, i;
24221b248f14SClaudio Fontana uint64_t mant;
24231b248f14SClaudio Fontana CPU_LDoubleU tmp;
24241b248f14SClaudio Fontana
24251b248f14SClaudio Fontana fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
24261b248f14SClaudio Fontana fptag = 0;
24271b248f14SClaudio Fontana for (i = 7; i >= 0; i--) {
24281b248f14SClaudio Fontana fptag <<= 2;
24291b248f14SClaudio Fontana if (env->fptags[i]) {
24301b248f14SClaudio Fontana fptag |= 3;
24311b248f14SClaudio Fontana } else {
24321b248f14SClaudio Fontana tmp.d = env->fpregs[i].d;
24331b248f14SClaudio Fontana exp = EXPD(tmp);
24341b248f14SClaudio Fontana mant = MANTD(tmp);
24351b248f14SClaudio Fontana if (exp == 0 && mant == 0) {
24361b248f14SClaudio Fontana /* zero */
24371b248f14SClaudio Fontana fptag |= 1;
24381b248f14SClaudio Fontana } else if (exp == 0 || exp == MAXEXPD
24391b248f14SClaudio Fontana || (mant & (1LL << 63)) == 0) {
24401b248f14SClaudio Fontana /* NaNs, infinity, denormal */
24411b248f14SClaudio Fontana fptag |= 2;
24421b248f14SClaudio Fontana }
24431b248f14SClaudio Fontana }
24441b248f14SClaudio Fontana }
24451b248f14SClaudio Fontana if (data32) {
24461b248f14SClaudio Fontana /* 32 bit */
2447505e2ef7SRichard Henderson access_stl(ac, ptr, env->fpuc);
2448505e2ef7SRichard Henderson access_stl(ac, ptr + 4, fpus);
2449505e2ef7SRichard Henderson access_stl(ac, ptr + 8, fptag);
2450505e2ef7SRichard Henderson access_stl(ac, ptr + 12, env->fpip); /* fpip */
2451505e2ef7SRichard Henderson access_stl(ac, ptr + 16, env->fpcs); /* fpcs */
2452505e2ef7SRichard Henderson access_stl(ac, ptr + 20, env->fpdp); /* fpoo */
2453505e2ef7SRichard Henderson access_stl(ac, ptr + 24, env->fpds); /* fpos */
24541b248f14SClaudio Fontana } else {
24551b248f14SClaudio Fontana /* 16 bit */
2456505e2ef7SRichard Henderson access_stw(ac, ptr, env->fpuc);
2457505e2ef7SRichard Henderson access_stw(ac, ptr + 2, fpus);
2458505e2ef7SRichard Henderson access_stw(ac, ptr + 4, fptag);
2459505e2ef7SRichard Henderson access_stw(ac, ptr + 6, env->fpip);
2460505e2ef7SRichard Henderson access_stw(ac, ptr + 8, env->fpcs);
2461505e2ef7SRichard Henderson access_stw(ac, ptr + 10, env->fpdp);
2462505e2ef7SRichard Henderson access_stw(ac, ptr + 12, env->fpds);
24631b248f14SClaudio Fontana }
24641b248f14SClaudio Fontana }
24651b248f14SClaudio Fontana
helper_fstenv(CPUX86State * env,target_ulong ptr,int data32)24661b248f14SClaudio Fontana void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
24671b248f14SClaudio Fontana {
2468505e2ef7SRichard Henderson X86Access ac;
2469505e2ef7SRichard Henderson
2470505e2ef7SRichard Henderson access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
2471505e2ef7SRichard Henderson do_fstenv(&ac, ptr, data32);
24721b248f14SClaudio Fontana }
24731b248f14SClaudio Fontana
cpu_set_fpus(CPUX86State * env,uint16_t fpus)24741b248f14SClaudio Fontana static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
24751b248f14SClaudio Fontana {
24761b248f14SClaudio Fontana env->fpstt = (fpus >> 11) & 7;
24771b248f14SClaudio Fontana env->fpus = fpus & ~0x3800 & ~FPUS_B;
24781b248f14SClaudio Fontana env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
24791b248f14SClaudio Fontana #if !defined(CONFIG_USER_ONLY)
24801b248f14SClaudio Fontana if (!(env->fpus & FPUS_SE)) {
24811b248f14SClaudio Fontana /*
24821b248f14SClaudio Fontana * Here the processor deasserts FERR#; in response, the chipset deasserts
24831b248f14SClaudio Fontana * IGNNE#.
24841b248f14SClaudio Fontana */
24851b248f14SClaudio Fontana cpu_clear_ignne();
24861b248f14SClaudio Fontana }
24871b248f14SClaudio Fontana #endif
24881b248f14SClaudio Fontana }
24891b248f14SClaudio Fontana
do_fldenv(X86Access * ac,target_ulong ptr,int data32)2490bc13c2ddSRichard Henderson static void do_fldenv(X86Access *ac, target_ulong ptr, int data32)
24911b248f14SClaudio Fontana {
24921b248f14SClaudio Fontana int i, fpus, fptag;
2493bc13c2ddSRichard Henderson CPUX86State *env = ac->env;
24941b248f14SClaudio Fontana
2495bc13c2ddSRichard Henderson cpu_set_fpuc(env, access_ldw(ac, ptr));
2496bc13c2ddSRichard Henderson fpus = access_ldw(ac, ptr + (2 << data32));
2497bc13c2ddSRichard Henderson fptag = access_ldw(ac, ptr + (4 << data32));
2498bc13c2ddSRichard Henderson
24991b248f14SClaudio Fontana cpu_set_fpus(env, fpus);
25001b248f14SClaudio Fontana for (i = 0; i < 8; i++) {
25011b248f14SClaudio Fontana env->fptags[i] = ((fptag & 3) == 3);
25021b248f14SClaudio Fontana fptag >>= 2;
25031b248f14SClaudio Fontana }
25041b248f14SClaudio Fontana }
25051b248f14SClaudio Fontana
helper_fldenv(CPUX86State * env,target_ulong ptr,int data32)25061b248f14SClaudio Fontana void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
25071b248f14SClaudio Fontana {
2508bc13c2ddSRichard Henderson X86Access ac;
2509bc13c2ddSRichard Henderson
2510bc13c2ddSRichard Henderson access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
2511bc13c2ddSRichard Henderson do_fldenv(&ac, ptr, data32);
25121b248f14SClaudio Fontana }
25131b248f14SClaudio Fontana
do_fsave(X86Access * ac,target_ulong ptr,int data32)251494f60f8fSRichard Henderson static void do_fsave(X86Access *ac, target_ulong ptr, int data32)
25151b248f14SClaudio Fontana {
251694f60f8fSRichard Henderson CPUX86State *env = ac->env;
25171b248f14SClaudio Fontana
251894f60f8fSRichard Henderson do_fstenv(ac, ptr, data32);
251994f60f8fSRichard Henderson ptr += 14 << data32;
25201b248f14SClaudio Fontana
252194f60f8fSRichard Henderson for (int i = 0; i < 8; i++) {
252294f60f8fSRichard Henderson floatx80 tmp = ST(i);
252394f60f8fSRichard Henderson do_fstt(ac, ptr, tmp);
25241b248f14SClaudio Fontana ptr += 10;
25251b248f14SClaudio Fontana }
25261b248f14SClaudio Fontana
2527bbdda9b7SRichard Henderson do_fninit(env);
25281b248f14SClaudio Fontana }
25291b248f14SClaudio Fontana
helper_fsave(CPUX86State * env,target_ulong ptr,int data32)25300ac2b197SRichard Henderson void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
25310ac2b197SRichard Henderson {
253294f60f8fSRichard Henderson int size = (14 << data32) + 80;
253394f60f8fSRichard Henderson X86Access ac;
253494f60f8fSRichard Henderson
253594f60f8fSRichard Henderson access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC());
253694f60f8fSRichard Henderson do_fsave(&ac, ptr, data32);
25370ac2b197SRichard Henderson }
25380ac2b197SRichard Henderson
do_frstor(X86Access * ac,target_ulong ptr,int data32)253994f60f8fSRichard Henderson static void do_frstor(X86Access *ac, target_ulong ptr, int data32)
25401b248f14SClaudio Fontana {
254194f60f8fSRichard Henderson CPUX86State *env = ac->env;
25421b248f14SClaudio Fontana
254394f60f8fSRichard Henderson do_fldenv(ac, ptr, data32);
254494f60f8fSRichard Henderson ptr += 14 << data32;
25451b248f14SClaudio Fontana
254694f60f8fSRichard Henderson for (int i = 0; i < 8; i++) {
254794f60f8fSRichard Henderson floatx80 tmp = do_fldt(ac, ptr);
25481b248f14SClaudio Fontana ST(i) = tmp;
25491b248f14SClaudio Fontana ptr += 10;
25501b248f14SClaudio Fontana }
25511b248f14SClaudio Fontana }
25521b248f14SClaudio Fontana
helper_frstor(CPUX86State * env,target_ulong ptr,int data32)25530ac2b197SRichard Henderson void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
25540ac2b197SRichard Henderson {
255594f60f8fSRichard Henderson int size = (14 << data32) + 80;
255694f60f8fSRichard Henderson X86Access ac;
255794f60f8fSRichard Henderson
255894f60f8fSRichard Henderson access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC());
255994f60f8fSRichard Henderson do_frstor(&ac, ptr, data32);
25600ac2b197SRichard Henderson }
25610ac2b197SRichard Henderson
25621b248f14SClaudio Fontana #define XO(X) offsetof(X86XSaveArea, X)
25631b248f14SClaudio Fontana
do_xsave_fpu(X86Access * ac,target_ulong ptr)2564b7e6d3adSRichard Henderson static void do_xsave_fpu(X86Access *ac, target_ulong ptr)
25651b248f14SClaudio Fontana {
2566b7e6d3adSRichard Henderson CPUX86State *env = ac->env;
25671b248f14SClaudio Fontana int fpus, fptag, i;
25681b248f14SClaudio Fontana target_ulong addr;
25691b248f14SClaudio Fontana
25701b248f14SClaudio Fontana fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
25711b248f14SClaudio Fontana fptag = 0;
25721b248f14SClaudio Fontana for (i = 0; i < 8; i++) {
25731b248f14SClaudio Fontana fptag |= (env->fptags[i] << i);
25741b248f14SClaudio Fontana }
25751b248f14SClaudio Fontana
2576b7e6d3adSRichard Henderson access_stw(ac, ptr + XO(legacy.fcw), env->fpuc);
2577b7e6d3adSRichard Henderson access_stw(ac, ptr + XO(legacy.fsw), fpus);
2578b7e6d3adSRichard Henderson access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff);
25791b248f14SClaudio Fontana
25801b248f14SClaudio Fontana /* In 32-bit mode this is eip, sel, dp, sel.
25811b248f14SClaudio Fontana In 64-bit mode this is rip, rdp.
25821b248f14SClaudio Fontana But in either case we don't write actual data, just zeros. */
2583b7e6d3adSRichard Henderson access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */
2584b7e6d3adSRichard Henderson access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */
25851b248f14SClaudio Fontana
25861b248f14SClaudio Fontana addr = ptr + XO(legacy.fpregs);
2587d3e8b648SRichard Henderson
25881b248f14SClaudio Fontana for (i = 0; i < 8; i++) {
25891b248f14SClaudio Fontana floatx80 tmp = ST(i);
2590b7e6d3adSRichard Henderson do_fstt(ac, addr, tmp);
25911b248f14SClaudio Fontana addr += 16;
25921b248f14SClaudio Fontana }
25931b248f14SClaudio Fontana }
25941b248f14SClaudio Fontana
do_xsave_mxcsr(X86Access * ac,target_ulong ptr)2595b7e6d3adSRichard Henderson static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr)
25961b248f14SClaudio Fontana {
2597b7e6d3adSRichard Henderson CPUX86State *env = ac->env;
2598b7e6d3adSRichard Henderson
25991b248f14SClaudio Fontana update_mxcsr_from_sse_status(env);
2600b7e6d3adSRichard Henderson access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr);
2601b7e6d3adSRichard Henderson access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff);
26021b248f14SClaudio Fontana }
26031b248f14SClaudio Fontana
do_xsave_sse(X86Access * ac,target_ulong ptr)2604b7e6d3adSRichard Henderson static void do_xsave_sse(X86Access *ac, target_ulong ptr)
26051b248f14SClaudio Fontana {
2606b7e6d3adSRichard Henderson CPUX86State *env = ac->env;
26071b248f14SClaudio Fontana int i, nb_xmm_regs;
26081b248f14SClaudio Fontana target_ulong addr;
26091b248f14SClaudio Fontana
26101b248f14SClaudio Fontana if (env->hflags & HF_CS64_MASK) {
26111b248f14SClaudio Fontana nb_xmm_regs = 16;
26121b248f14SClaudio Fontana } else {
26131b248f14SClaudio Fontana nb_xmm_regs = 8;
26141b248f14SClaudio Fontana }
26151b248f14SClaudio Fontana
26161b248f14SClaudio Fontana addr = ptr + XO(legacy.xmm_regs);
26171b248f14SClaudio Fontana for (i = 0; i < nb_xmm_regs; i++) {
2618b7e6d3adSRichard Henderson access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0));
2619b7e6d3adSRichard Henderson access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1));
26201b248f14SClaudio Fontana addr += 16;
26211b248f14SClaudio Fontana }
26221b248f14SClaudio Fontana }
26231b248f14SClaudio Fontana
do_xsave_ymmh(X86Access * ac,target_ulong ptr)26246b1b736bSRichard Henderson static void do_xsave_ymmh(X86Access *ac, target_ulong ptr)
262589254431SPaolo Bonzini {
26266b1b736bSRichard Henderson CPUX86State *env = ac->env;
262789254431SPaolo Bonzini int i, nb_xmm_regs;
262889254431SPaolo Bonzini
262989254431SPaolo Bonzini if (env->hflags & HF_CS64_MASK) {
263089254431SPaolo Bonzini nb_xmm_regs = 16;
263189254431SPaolo Bonzini } else {
263289254431SPaolo Bonzini nb_xmm_regs = 8;
263389254431SPaolo Bonzini }
263489254431SPaolo Bonzini
263589254431SPaolo Bonzini for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
26366b1b736bSRichard Henderson access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2));
26376b1b736bSRichard Henderson access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3));
263889254431SPaolo Bonzini }
263989254431SPaolo Bonzini }
264089254431SPaolo Bonzini
do_xsave_bndregs(X86Access * ac,target_ulong ptr)26416b1b736bSRichard Henderson static void do_xsave_bndregs(X86Access *ac, target_ulong ptr)
26421b248f14SClaudio Fontana {
26436b1b736bSRichard Henderson CPUX86State *env = ac->env;
26441b248f14SClaudio Fontana target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
26451b248f14SClaudio Fontana int i;
26461b248f14SClaudio Fontana
26471b248f14SClaudio Fontana for (i = 0; i < 4; i++, addr += 16) {
26486b1b736bSRichard Henderson access_stq(ac, addr, env->bnd_regs[i].lb);
26496b1b736bSRichard Henderson access_stq(ac, addr + 8, env->bnd_regs[i].ub);
26501b248f14SClaudio Fontana }
26511b248f14SClaudio Fontana }
26521b248f14SClaudio Fontana
do_xsave_bndcsr(X86Access * ac,target_ulong ptr)26536b1b736bSRichard Henderson static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr)
26541b248f14SClaudio Fontana {
26556b1b736bSRichard Henderson CPUX86State *env = ac->env;
26566b1b736bSRichard Henderson
26576b1b736bSRichard Henderson access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
26586b1b736bSRichard Henderson env->bndcs_regs.cfgu);
26596b1b736bSRichard Henderson access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
26606b1b736bSRichard Henderson env->bndcs_regs.sts);
26611b248f14SClaudio Fontana }
26621b248f14SClaudio Fontana
do_xsave_pkru(X86Access * ac,target_ulong ptr)26636b1b736bSRichard Henderson static void do_xsave_pkru(X86Access *ac, target_ulong ptr)
26641b248f14SClaudio Fontana {
26656b1b736bSRichard Henderson access_stq(ac, ptr, ac->env->pkru);
26661b248f14SClaudio Fontana }
26671b248f14SClaudio Fontana
do_fxsave(X86Access * ac,target_ulong ptr)26686d030aabSRichard Henderson static void do_fxsave(X86Access *ac, target_ulong ptr)
26691b248f14SClaudio Fontana {
26706d030aabSRichard Henderson CPUX86State *env = ac->env;
26711b248f14SClaudio Fontana
26726d030aabSRichard Henderson do_xsave_fpu(ac, ptr);
26731b248f14SClaudio Fontana if (env->cr[4] & CR4_OSFXSR_MASK) {
26746d030aabSRichard Henderson do_xsave_mxcsr(ac, ptr);
26751b248f14SClaudio Fontana /* Fast FXSAVE leaves out the XMM registers */
26761b248f14SClaudio Fontana if (!(env->efer & MSR_EFER_FFXSR)
26771b248f14SClaudio Fontana || (env->hflags & HF_CPL_MASK)
26781b248f14SClaudio Fontana || !(env->hflags & HF_LMA_MASK)) {
26796d030aabSRichard Henderson do_xsave_sse(ac, ptr);
26801b248f14SClaudio Fontana }
26811b248f14SClaudio Fontana }
26821b248f14SClaudio Fontana }
26831b248f14SClaudio Fontana
helper_fxsave(CPUX86State * env,target_ulong ptr)26840ac2b197SRichard Henderson void helper_fxsave(CPUX86State *env, target_ulong ptr)
26850ac2b197SRichard Henderson {
26866d030aabSRichard Henderson uintptr_t ra = GETPC();
2687b7e6d3adSRichard Henderson X86Access ac;
2688b7e6d3adSRichard Henderson
26891b248f14SClaudio Fontana /* The operand must be 16 byte aligned */
26901b248f14SClaudio Fontana if (ptr & 0xf) {
26911b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra);
26921b248f14SClaudio Fontana }
26931b248f14SClaudio Fontana
2694b7e6d3adSRichard Henderson access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
2695b7e6d3adSRichard Henderson MMU_DATA_STORE, ra);
26966d030aabSRichard Henderson do_fxsave(&ac, ptr);
26970ac2b197SRichard Henderson }
26980ac2b197SRichard Henderson
get_xinuse(CPUX86State * env)26991b248f14SClaudio Fontana static uint64_t get_xinuse(CPUX86State *env)
27001b248f14SClaudio Fontana {
27011b248f14SClaudio Fontana uint64_t inuse = -1;
27021b248f14SClaudio Fontana
27031b248f14SClaudio Fontana /* For the most part, we don't track XINUSE. We could calculate it
27041b248f14SClaudio Fontana here for all components, but it's probably less work to simply
27051b248f14SClaudio Fontana indicate in use. That said, the state of BNDREGS is important
27061b248f14SClaudio Fontana enough to track in HFLAGS, so we might as well use that here. */
27071b248f14SClaudio Fontana if ((env->hflags & HF_MPX_IU_MASK) == 0) {
27081b248f14SClaudio Fontana inuse &= ~XSTATE_BNDREGS_MASK;
27091b248f14SClaudio Fontana }
27101b248f14SClaudio Fontana return inuse;
27111b248f14SClaudio Fontana }
27121b248f14SClaudio Fontana
do_xsave_access(X86Access * ac,target_ulong ptr,uint64_t rfbm,uint64_t inuse,uint64_t opt)2713c6e6d150SRichard Henderson static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm,
2714c6e6d150SRichard Henderson uint64_t inuse, uint64_t opt)
27151b248f14SClaudio Fontana {
27161b248f14SClaudio Fontana uint64_t old_bv, new_bv;
27171b248f14SClaudio Fontana
27181b248f14SClaudio Fontana if (opt & XSTATE_FP_MASK) {
2719c6e6d150SRichard Henderson do_xsave_fpu(ac, ptr);
27201b248f14SClaudio Fontana }
27211b248f14SClaudio Fontana if (rfbm & XSTATE_SSE_MASK) {
27221b248f14SClaudio Fontana /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
2723c6e6d150SRichard Henderson do_xsave_mxcsr(ac, ptr);
27241b248f14SClaudio Fontana }
27251b248f14SClaudio Fontana if (opt & XSTATE_SSE_MASK) {
2726c6e6d150SRichard Henderson do_xsave_sse(ac, ptr);
27271b248f14SClaudio Fontana }
272889254431SPaolo Bonzini if (opt & XSTATE_YMM_MASK) {
2729c6e6d150SRichard Henderson do_xsave_ymmh(ac, ptr + XO(avx_state));
273089254431SPaolo Bonzini }
27311b248f14SClaudio Fontana if (opt & XSTATE_BNDREGS_MASK) {
2732c6e6d150SRichard Henderson do_xsave_bndregs(ac, ptr + XO(bndreg_state));
27331b248f14SClaudio Fontana }
27341b248f14SClaudio Fontana if (opt & XSTATE_BNDCSR_MASK) {
2735c6e6d150SRichard Henderson do_xsave_bndcsr(ac, ptr + XO(bndcsr_state));
27361b248f14SClaudio Fontana }
27371b248f14SClaudio Fontana if (opt & XSTATE_PKRU_MASK) {
2738c6e6d150SRichard Henderson do_xsave_pkru(ac, ptr + XO(pkru_state));
27391b248f14SClaudio Fontana }
27401b248f14SClaudio Fontana
27411b248f14SClaudio Fontana /* Update the XSTATE_BV field. */
2742c6e6d150SRichard Henderson old_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
27431b248f14SClaudio Fontana new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2744c6e6d150SRichard Henderson access_stq(ac, ptr + XO(header.xstate_bv), new_bv);
27451b248f14SClaudio Fontana }
27461b248f14SClaudio Fontana
do_xsave_chk(CPUX86State * env,target_ulong ptr,uintptr_t ra)2747a8f68831SRichard Henderson static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2748a8f68831SRichard Henderson {
27491b248f14SClaudio Fontana /* The OS must have enabled XSAVE. */
27501b248f14SClaudio Fontana if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
27511b248f14SClaudio Fontana raise_exception_ra(env, EXCP06_ILLOP, ra);
27521b248f14SClaudio Fontana }
27531b248f14SClaudio Fontana
27541b248f14SClaudio Fontana /* The operand must be 64 byte aligned. */
27551b248f14SClaudio Fontana if (ptr & 63) {
27561b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra);
27571b248f14SClaudio Fontana }
2758a8f68831SRichard Henderson }
2759a8f68831SRichard Henderson
do_xsave(CPUX86State * env,target_ulong ptr,uint64_t rfbm,uint64_t inuse,uint64_t opt,uintptr_t ra)2760c6e6d150SRichard Henderson static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2761c6e6d150SRichard Henderson uint64_t inuse, uint64_t opt, uintptr_t ra)
27621b248f14SClaudio Fontana {
2763c6e6d150SRichard Henderson X86Access ac;
2764c6e6d150SRichard Henderson unsigned size;
2765a8f68831SRichard Henderson
2766a8f68831SRichard Henderson do_xsave_chk(env, ptr, ra);
27671b248f14SClaudio Fontana
27681b248f14SClaudio Fontana /* Never save anything not enabled by XCR0. */
27691b248f14SClaudio Fontana rfbm &= env->xcr0;
27701b248f14SClaudio Fontana opt &= rfbm;
2771c6e6d150SRichard Henderson size = xsave_area_size(opt, false);
27721b248f14SClaudio Fontana
2773c6e6d150SRichard Henderson access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra);
2774c6e6d150SRichard Henderson do_xsave_access(&ac, ptr, rfbm, inuse, opt);
27751b248f14SClaudio Fontana }
27761b248f14SClaudio Fontana
helper_xsave(CPUX86State * env,target_ulong ptr,uint64_t rfbm)27771b248f14SClaudio Fontana void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
27781b248f14SClaudio Fontana {
2779c6e6d150SRichard Henderson do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC());
27801b248f14SClaudio Fontana }
27811b248f14SClaudio Fontana
helper_xsaveopt(CPUX86State * env,target_ulong ptr,uint64_t rfbm)27821b248f14SClaudio Fontana void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
27831b248f14SClaudio Fontana {
27841b248f14SClaudio Fontana uint64_t inuse = get_xinuse(env);
27851b248f14SClaudio Fontana do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
27861b248f14SClaudio Fontana }
27871b248f14SClaudio Fontana
do_xrstor_fpu(X86Access * ac,target_ulong ptr)2788e41d2eafSRichard Henderson static void do_xrstor_fpu(X86Access *ac, target_ulong ptr)
27891b248f14SClaudio Fontana {
2790e41d2eafSRichard Henderson CPUX86State *env = ac->env;
27911b248f14SClaudio Fontana int i, fpuc, fpus, fptag;
27921b248f14SClaudio Fontana target_ulong addr;
27931b248f14SClaudio Fontana
2794e41d2eafSRichard Henderson fpuc = access_ldw(ac, ptr + XO(legacy.fcw));
2795e41d2eafSRichard Henderson fpus = access_ldw(ac, ptr + XO(legacy.fsw));
2796e41d2eafSRichard Henderson fptag = access_ldw(ac, ptr + XO(legacy.ftw));
27971b248f14SClaudio Fontana cpu_set_fpuc(env, fpuc);
27981b248f14SClaudio Fontana cpu_set_fpus(env, fpus);
2799e41d2eafSRichard Henderson
28001b248f14SClaudio Fontana fptag ^= 0xff;
28011b248f14SClaudio Fontana for (i = 0; i < 8; i++) {
28021b248f14SClaudio Fontana env->fptags[i] = ((fptag >> i) & 1);
28031b248f14SClaudio Fontana }
28041b248f14SClaudio Fontana
28051b248f14SClaudio Fontana addr = ptr + XO(legacy.fpregs);
2806d3e8b648SRichard Henderson
28071b248f14SClaudio Fontana for (i = 0; i < 8; i++) {
2808e41d2eafSRichard Henderson floatx80 tmp = do_fldt(ac, addr);
28091b248f14SClaudio Fontana ST(i) = tmp;
28101b248f14SClaudio Fontana addr += 16;
28111b248f14SClaudio Fontana }
28121b248f14SClaudio Fontana }
28131b248f14SClaudio Fontana
do_xrstor_mxcsr(X86Access * ac,target_ulong ptr)2814e41d2eafSRichard Henderson static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr)
28151b248f14SClaudio Fontana {
2816e41d2eafSRichard Henderson CPUX86State *env = ac->env;
2817e41d2eafSRichard Henderson cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr)));
28181b248f14SClaudio Fontana }
28191b248f14SClaudio Fontana
do_xrstor_sse(X86Access * ac,target_ulong ptr)2820e41d2eafSRichard Henderson static void do_xrstor_sse(X86Access *ac, target_ulong ptr)
28211b248f14SClaudio Fontana {
2822e41d2eafSRichard Henderson CPUX86State *env = ac->env;
28231b248f14SClaudio Fontana int i, nb_xmm_regs;
28241b248f14SClaudio Fontana target_ulong addr;
28251b248f14SClaudio Fontana
28261b248f14SClaudio Fontana if (env->hflags & HF_CS64_MASK) {
28271b248f14SClaudio Fontana nb_xmm_regs = 16;
28281b248f14SClaudio Fontana } else {
28291b248f14SClaudio Fontana nb_xmm_regs = 8;
28301b248f14SClaudio Fontana }
28311b248f14SClaudio Fontana
28321b248f14SClaudio Fontana addr = ptr + XO(legacy.xmm_regs);
28331b248f14SClaudio Fontana for (i = 0; i < nb_xmm_regs; i++) {
2834e41d2eafSRichard Henderson env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr);
2835e41d2eafSRichard Henderson env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8);
28361b248f14SClaudio Fontana addr += 16;
28371b248f14SClaudio Fontana }
28381b248f14SClaudio Fontana }
28391b248f14SClaudio Fontana
do_clear_sse(CPUX86State * env)284089254431SPaolo Bonzini static void do_clear_sse(CPUX86State *env)
284189254431SPaolo Bonzini {
284289254431SPaolo Bonzini int i, nb_xmm_regs;
284389254431SPaolo Bonzini
284489254431SPaolo Bonzini if (env->hflags & HF_CS64_MASK) {
284589254431SPaolo Bonzini nb_xmm_regs = 16;
284689254431SPaolo Bonzini } else {
284789254431SPaolo Bonzini nb_xmm_regs = 8;
284889254431SPaolo Bonzini }
284989254431SPaolo Bonzini
285089254431SPaolo Bonzini for (i = 0; i < nb_xmm_regs; i++) {
285189254431SPaolo Bonzini env->xmm_regs[i].ZMM_Q(0) = 0;
285289254431SPaolo Bonzini env->xmm_regs[i].ZMM_Q(1) = 0;
285389254431SPaolo Bonzini }
285489254431SPaolo Bonzini }
285589254431SPaolo Bonzini
do_xrstor_ymmh(X86Access * ac,target_ulong ptr)285658955a96SRichard Henderson static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr)
285789254431SPaolo Bonzini {
285858955a96SRichard Henderson CPUX86State *env = ac->env;
285989254431SPaolo Bonzini int i, nb_xmm_regs;
286089254431SPaolo Bonzini
286189254431SPaolo Bonzini if (env->hflags & HF_CS64_MASK) {
286289254431SPaolo Bonzini nb_xmm_regs = 16;
286389254431SPaolo Bonzini } else {
286489254431SPaolo Bonzini nb_xmm_regs = 8;
286589254431SPaolo Bonzini }
286689254431SPaolo Bonzini
286789254431SPaolo Bonzini for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
286858955a96SRichard Henderson env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr);
286958955a96SRichard Henderson env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8);
287089254431SPaolo Bonzini }
287189254431SPaolo Bonzini }
287289254431SPaolo Bonzini
do_clear_ymmh(CPUX86State * env)287389254431SPaolo Bonzini static void do_clear_ymmh(CPUX86State *env)
287489254431SPaolo Bonzini {
287589254431SPaolo Bonzini int i, nb_xmm_regs;
287689254431SPaolo Bonzini
287789254431SPaolo Bonzini if (env->hflags & HF_CS64_MASK) {
287889254431SPaolo Bonzini nb_xmm_regs = 16;
287989254431SPaolo Bonzini } else {
288089254431SPaolo Bonzini nb_xmm_regs = 8;
288189254431SPaolo Bonzini }
288289254431SPaolo Bonzini
288389254431SPaolo Bonzini for (i = 0; i < nb_xmm_regs; i++) {
288489254431SPaolo Bonzini env->xmm_regs[i].ZMM_Q(2) = 0;
288589254431SPaolo Bonzini env->xmm_regs[i].ZMM_Q(3) = 0;
288689254431SPaolo Bonzini }
288789254431SPaolo Bonzini }
288889254431SPaolo Bonzini
do_xrstor_bndregs(X86Access * ac,target_ulong ptr)288958955a96SRichard Henderson static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr)
28901b248f14SClaudio Fontana {
289158955a96SRichard Henderson CPUX86State *env = ac->env;
28921b248f14SClaudio Fontana target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
28931b248f14SClaudio Fontana int i;
28941b248f14SClaudio Fontana
28951b248f14SClaudio Fontana for (i = 0; i < 4; i++, addr += 16) {
289658955a96SRichard Henderson env->bnd_regs[i].lb = access_ldq(ac, addr);
289758955a96SRichard Henderson env->bnd_regs[i].ub = access_ldq(ac, addr + 8);
28981b248f14SClaudio Fontana }
28991b248f14SClaudio Fontana }
29001b248f14SClaudio Fontana
do_xrstor_bndcsr(X86Access * ac,target_ulong ptr)290158955a96SRichard Henderson static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr)
29021b248f14SClaudio Fontana {
290358955a96SRichard Henderson CPUX86State *env = ac->env;
290458955a96SRichard Henderson
29051b248f14SClaudio Fontana /* FIXME: Extend highest implemented bit of linear address. */
29061b248f14SClaudio Fontana env->bndcs_regs.cfgu
290758955a96SRichard Henderson = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu));
29081b248f14SClaudio Fontana env->bndcs_regs.sts
290958955a96SRichard Henderson = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts));
29101b248f14SClaudio Fontana }
29111b248f14SClaudio Fontana
do_xrstor_pkru(X86Access * ac,target_ulong ptr)291258955a96SRichard Henderson static void do_xrstor_pkru(X86Access *ac, target_ulong ptr)
29131b248f14SClaudio Fontana {
291458955a96SRichard Henderson ac->env->pkru = access_ldq(ac, ptr);
29151b248f14SClaudio Fontana }
29161b248f14SClaudio Fontana
do_fxrstor(X86Access * ac,target_ulong ptr)29176d030aabSRichard Henderson static void do_fxrstor(X86Access *ac, target_ulong ptr)
29181b248f14SClaudio Fontana {
29196d030aabSRichard Henderson CPUX86State *env = ac->env;
29201b248f14SClaudio Fontana
29216d030aabSRichard Henderson do_xrstor_fpu(ac, ptr);
29221b248f14SClaudio Fontana if (env->cr[4] & CR4_OSFXSR_MASK) {
29236d030aabSRichard Henderson do_xrstor_mxcsr(ac, ptr);
29241b248f14SClaudio Fontana /* Fast FXRSTOR leaves out the XMM registers */
29251b248f14SClaudio Fontana if (!(env->efer & MSR_EFER_FFXSR)
29261b248f14SClaudio Fontana || (env->hflags & HF_CPL_MASK)
29271b248f14SClaudio Fontana || !(env->hflags & HF_LMA_MASK)) {
29286d030aabSRichard Henderson do_xrstor_sse(ac, ptr);
29291b248f14SClaudio Fontana }
29301b248f14SClaudio Fontana }
29311b248f14SClaudio Fontana }
29321b248f14SClaudio Fontana
helper_fxrstor(CPUX86State * env,target_ulong ptr)29330ac2b197SRichard Henderson void helper_fxrstor(CPUX86State *env, target_ulong ptr)
29340ac2b197SRichard Henderson {
29356d030aabSRichard Henderson uintptr_t ra = GETPC();
2936e41d2eafSRichard Henderson X86Access ac;
2937e41d2eafSRichard Henderson
29381b248f14SClaudio Fontana /* The operand must be 16 byte aligned */
29391b248f14SClaudio Fontana if (ptr & 0xf) {
29401b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra);
29410ac2b197SRichard Henderson }
29420ac2b197SRichard Henderson
2943e41d2eafSRichard Henderson access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
2944e41d2eafSRichard Henderson MMU_DATA_LOAD, ra);
29456d030aabSRichard Henderson do_fxrstor(&ac, ptr);
29461b248f14SClaudio Fontana }
29471b248f14SClaudio Fontana
valid_xrstor_header(X86Access * ac,uint64_t * pxsbv,target_ulong ptr)2948d5dc3a92SRichard Henderson static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv,
2949d5dc3a92SRichard Henderson target_ulong ptr)
29501b248f14SClaudio Fontana {
29511b248f14SClaudio Fontana uint64_t xstate_bv, xcomp_bv, reserve0;
29521b248f14SClaudio Fontana
2953d5dc3a92SRichard Henderson xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
2954d5dc3a92SRichard Henderson xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv));
2955d5dc3a92SRichard Henderson reserve0 = access_ldq(ac, ptr + XO(header.reserve0));
2956d5dc3a92SRichard Henderson *pxsbv = xstate_bv;
29571b248f14SClaudio Fontana
2958d5dc3a92SRichard Henderson /*
2959d5dc3a92SRichard Henderson * XCOMP_BV bit 63 indicates compact form, which we do not support,
2960d5dc3a92SRichard Henderson * and thus must raise #GP. That leaves us in standard form.
2961d5dc3a92SRichard Henderson * In standard form, bytes 23:8 must be zero -- which is both
2962d5dc3a92SRichard Henderson * XCOMP_BV and the following 64-bit field.
2963d5dc3a92SRichard Henderson */
2964d5dc3a92SRichard Henderson if (xcomp_bv || reserve0) {
2965d5dc3a92SRichard Henderson return false;
29661b248f14SClaudio Fontana }
29671b248f14SClaudio Fontana
29681b248f14SClaudio Fontana /* The XSTATE_BV field must not set bits not present in XCR0. */
2969d5dc3a92SRichard Henderson return (xstate_bv & ~ac->env->xcr0) == 0;
29701b248f14SClaudio Fontana }
29711b248f14SClaudio Fontana
do_xrstor(X86Access * ac,target_ulong ptr,uint64_t rfbm,uint64_t xstate_bv)2972d5dc3a92SRichard Henderson static void do_xrstor(X86Access *ac, target_ulong ptr,
2973d5dc3a92SRichard Henderson uint64_t rfbm, uint64_t xstate_bv)
2974d5dc3a92SRichard Henderson {
2975d5dc3a92SRichard Henderson CPUX86State *env = ac->env;
29761b248f14SClaudio Fontana
29771b248f14SClaudio Fontana if (rfbm & XSTATE_FP_MASK) {
29781b248f14SClaudio Fontana if (xstate_bv & XSTATE_FP_MASK) {
2979d5dc3a92SRichard Henderson do_xrstor_fpu(ac, ptr);
29801b248f14SClaudio Fontana } else {
2981bbdda9b7SRichard Henderson do_fninit(env);
29821b248f14SClaudio Fontana memset(env->fpregs, 0, sizeof(env->fpregs));
29831b248f14SClaudio Fontana }
29841b248f14SClaudio Fontana }
29851b248f14SClaudio Fontana if (rfbm & XSTATE_SSE_MASK) {
29861b248f14SClaudio Fontana /* Note that the standard form of XRSTOR loads MXCSR from memory
29871b248f14SClaudio Fontana whether or not the XSTATE_BV bit is set. */
2988d5dc3a92SRichard Henderson do_xrstor_mxcsr(ac, ptr);
29891b248f14SClaudio Fontana if (xstate_bv & XSTATE_SSE_MASK) {
2990d5dc3a92SRichard Henderson do_xrstor_sse(ac, ptr);
29911b248f14SClaudio Fontana } else {
299289254431SPaolo Bonzini do_clear_sse(env);
299389254431SPaolo Bonzini }
299489254431SPaolo Bonzini }
299589254431SPaolo Bonzini if (rfbm & XSTATE_YMM_MASK) {
299689254431SPaolo Bonzini if (xstate_bv & XSTATE_YMM_MASK) {
2997d5dc3a92SRichard Henderson do_xrstor_ymmh(ac, ptr + XO(avx_state));
299889254431SPaolo Bonzini } else {
299989254431SPaolo Bonzini do_clear_ymmh(env);
30001b248f14SClaudio Fontana }
30011b248f14SClaudio Fontana }
30021b248f14SClaudio Fontana if (rfbm & XSTATE_BNDREGS_MASK) {
30031b248f14SClaudio Fontana if (xstate_bv & XSTATE_BNDREGS_MASK) {
3004d5dc3a92SRichard Henderson do_xrstor_bndregs(ac, ptr + XO(bndreg_state));
30051b248f14SClaudio Fontana env->hflags |= HF_MPX_IU_MASK;
30061b248f14SClaudio Fontana } else {
30071b248f14SClaudio Fontana memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
30081b248f14SClaudio Fontana env->hflags &= ~HF_MPX_IU_MASK;
30091b248f14SClaudio Fontana }
30101b248f14SClaudio Fontana }
30111b248f14SClaudio Fontana if (rfbm & XSTATE_BNDCSR_MASK) {
30121b248f14SClaudio Fontana if (xstate_bv & XSTATE_BNDCSR_MASK) {
3013d5dc3a92SRichard Henderson do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state));
30141b248f14SClaudio Fontana } else {
30151b248f14SClaudio Fontana memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
30161b248f14SClaudio Fontana }
30171b248f14SClaudio Fontana cpu_sync_bndcs_hflags(env);
30181b248f14SClaudio Fontana }
30191b248f14SClaudio Fontana if (rfbm & XSTATE_PKRU_MASK) {
30201b248f14SClaudio Fontana uint64_t old_pkru = env->pkru;
30211b248f14SClaudio Fontana if (xstate_bv & XSTATE_PKRU_MASK) {
3022d5dc3a92SRichard Henderson do_xrstor_pkru(ac, ptr + XO(pkru_state));
30231b248f14SClaudio Fontana } else {
30241b248f14SClaudio Fontana env->pkru = 0;
30251b248f14SClaudio Fontana }
30261b248f14SClaudio Fontana if (env->pkru != old_pkru) {
30271b248f14SClaudio Fontana CPUState *cs = env_cpu(env);
30281b248f14SClaudio Fontana tlb_flush(cs);
30291b248f14SClaudio Fontana }
30301b248f14SClaudio Fontana }
30311b248f14SClaudio Fontana }
30321b248f14SClaudio Fontana
30331b248f14SClaudio Fontana #undef XO
30341b248f14SClaudio Fontana
helper_xrstor(CPUX86State * env,target_ulong ptr,uint64_t rfbm)30355d245678SPaolo Bonzini void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
30365d245678SPaolo Bonzini {
3037a8f68831SRichard Henderson uintptr_t ra = GETPC();
3038d5dc3a92SRichard Henderson X86Access ac;
3039d5dc3a92SRichard Henderson uint64_t xstate_bv;
3040d5dc3a92SRichard Henderson unsigned size, size_ext;
3041a8f68831SRichard Henderson
3042a8f68831SRichard Henderson do_xsave_chk(env, ptr, ra);
3043d5dc3a92SRichard Henderson
3044d5dc3a92SRichard Henderson /* Begin with just the minimum size to validate the header. */
3045d5dc3a92SRichard Henderson size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
3046d5dc3a92SRichard Henderson access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra);
3047d5dc3a92SRichard Henderson if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) {
3048d5dc3a92SRichard Henderson raise_exception_ra(env, EXCP0D_GPF, ra);
3049d5dc3a92SRichard Henderson }
3050d5dc3a92SRichard Henderson
3051d5dc3a92SRichard Henderson rfbm &= env->xcr0;
3052d5dc3a92SRichard Henderson size_ext = xsave_area_size(rfbm & xstate_bv, false);
3053d5dc3a92SRichard Henderson if (size < size_ext) {
3054d5dc3a92SRichard Henderson /* TODO: See if existing page probe has covered extra size. */
3055d5dc3a92SRichard Henderson access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra);
3056d5dc3a92SRichard Henderson }
3057d5dc3a92SRichard Henderson
3058d5dc3a92SRichard Henderson do_xrstor(&ac, ptr, rfbm, xstate_bv);
30595d245678SPaolo Bonzini }
30605d245678SPaolo Bonzini
30615d245678SPaolo Bonzini #if defined(CONFIG_USER_ONLY)
cpu_x86_fsave(CPUX86State * env,void * host,size_t len)306276d8d0f8SRichard Henderson void cpu_x86_fsave(CPUX86State *env, void *host, size_t len)
30635d245678SPaolo Bonzini {
306476d8d0f8SRichard Henderson X86Access ac = {
306576d8d0f8SRichard Henderson .haddr1 = host,
306676d8d0f8SRichard Henderson .size = 4 * 7 + 8 * 10,
306776d8d0f8SRichard Henderson .env = env,
306876d8d0f8SRichard Henderson };
306994f60f8fSRichard Henderson
307076d8d0f8SRichard Henderson assert(ac.size <= len);
307176d8d0f8SRichard Henderson do_fsave(&ac, 0, true);
30725d245678SPaolo Bonzini }
30735d245678SPaolo Bonzini
cpu_x86_frstor(CPUX86State * env,void * host,size_t len)307476d8d0f8SRichard Henderson void cpu_x86_frstor(CPUX86State *env, void *host, size_t len)
30755d245678SPaolo Bonzini {
307676d8d0f8SRichard Henderson X86Access ac = {
307776d8d0f8SRichard Henderson .haddr1 = host,
307876d8d0f8SRichard Henderson .size = 4 * 7 + 8 * 10,
307976d8d0f8SRichard Henderson .env = env,
308076d8d0f8SRichard Henderson };
308194f60f8fSRichard Henderson
308276d8d0f8SRichard Henderson assert(ac.size <= len);
308376d8d0f8SRichard Henderson do_frstor(&ac, 0, true);
30845d245678SPaolo Bonzini }
30855d245678SPaolo Bonzini
cpu_x86_fxsave(CPUX86State * env,void * host,size_t len)30869c2fb9e1SRichard Henderson void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len)
30875d245678SPaolo Bonzini {
30889c2fb9e1SRichard Henderson X86Access ac = {
30899c2fb9e1SRichard Henderson .haddr1 = host,
30909c2fb9e1SRichard Henderson .size = sizeof(X86LegacyXSaveArea),
30919c2fb9e1SRichard Henderson .env = env,
30929c2fb9e1SRichard Henderson };
30936d030aabSRichard Henderson
30949c2fb9e1SRichard Henderson assert(ac.size <= len);
30959c2fb9e1SRichard Henderson do_fxsave(&ac, 0);
30965d245678SPaolo Bonzini }
30975d245678SPaolo Bonzini
cpu_x86_fxrstor(CPUX86State * env,void * host,size_t len)30989c2fb9e1SRichard Henderson void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len)
30995d245678SPaolo Bonzini {
31009c2fb9e1SRichard Henderson X86Access ac = {
31019c2fb9e1SRichard Henderson .haddr1 = host,
31029c2fb9e1SRichard Henderson .size = sizeof(X86LegacyXSaveArea),
31039c2fb9e1SRichard Henderson .env = env,
31049c2fb9e1SRichard Henderson };
31056d030aabSRichard Henderson
31069c2fb9e1SRichard Henderson assert(ac.size <= len);
31079c2fb9e1SRichard Henderson do_fxrstor(&ac, 0);
31085d245678SPaolo Bonzini }
31095d245678SPaolo Bonzini
cpu_x86_xsave(CPUX86State * env,void * host,size_t len,uint64_t rfbm)3110701890bdSRichard Henderson void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
31115d245678SPaolo Bonzini {
3112701890bdSRichard Henderson X86Access ac = {
3113701890bdSRichard Henderson .haddr1 = host,
3114701890bdSRichard Henderson .env = env,
3115701890bdSRichard Henderson };
3116c6e6d150SRichard Henderson
3117c6e6d150SRichard Henderson /*
3118c6e6d150SRichard Henderson * Since this is only called from user-level signal handling,
3119c6e6d150SRichard Henderson * we should have done the job correctly there.
3120c6e6d150SRichard Henderson */
3121c6e6d150SRichard Henderson assert((rfbm & ~env->xcr0) == 0);
3122701890bdSRichard Henderson ac.size = xsave_area_size(rfbm, false);
3123701890bdSRichard Henderson assert(ac.size <= len);
3124701890bdSRichard Henderson do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm);
31255d245678SPaolo Bonzini }
31265d245678SPaolo Bonzini
cpu_x86_xrstor(CPUX86State * env,void * host,size_t len,uint64_t rfbm)3127701890bdSRichard Henderson bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
31285d245678SPaolo Bonzini {
3129701890bdSRichard Henderson X86Access ac = {
3130701890bdSRichard Henderson .haddr1 = host,
3131701890bdSRichard Henderson .env = env,
3132701890bdSRichard Henderson };
3133d5dc3a92SRichard Henderson uint64_t xstate_bv;
3134d5dc3a92SRichard Henderson
3135d5dc3a92SRichard Henderson /*
3136d5dc3a92SRichard Henderson * Since this is only called from user-level signal handling,
3137d5dc3a92SRichard Henderson * we should have done the job correctly there.
3138d5dc3a92SRichard Henderson */
3139d5dc3a92SRichard Henderson assert((rfbm & ~env->xcr0) == 0);
3140701890bdSRichard Henderson ac.size = xsave_area_size(rfbm, false);
3141701890bdSRichard Henderson assert(ac.size <= len);
3142d5dc3a92SRichard Henderson
3143701890bdSRichard Henderson if (!valid_xrstor_header(&ac, &xstate_bv, 0)) {
3144701890bdSRichard Henderson return false;
3145d5dc3a92SRichard Henderson }
3146701890bdSRichard Henderson do_xrstor(&ac, 0, rfbm, xstate_bv);
3147701890bdSRichard Henderson return true;
31485d245678SPaolo Bonzini }
31495d245678SPaolo Bonzini #endif
31505d245678SPaolo Bonzini
helper_xgetbv(CPUX86State * env,uint32_t ecx)31511b248f14SClaudio Fontana uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
31521b248f14SClaudio Fontana {
31531b248f14SClaudio Fontana /* The OS must have enabled XSAVE. */
31541b248f14SClaudio Fontana if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
31551b248f14SClaudio Fontana raise_exception_ra(env, EXCP06_ILLOP, GETPC());
31561b248f14SClaudio Fontana }
31571b248f14SClaudio Fontana
31581b248f14SClaudio Fontana switch (ecx) {
31591b248f14SClaudio Fontana case 0:
31601b248f14SClaudio Fontana return env->xcr0;
31611b248f14SClaudio Fontana case 1:
31621b248f14SClaudio Fontana if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
31631b248f14SClaudio Fontana return env->xcr0 & get_xinuse(env);
31641b248f14SClaudio Fontana }
31651b248f14SClaudio Fontana break;
31661b248f14SClaudio Fontana }
31671b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, GETPC());
31681b248f14SClaudio Fontana }
31691b248f14SClaudio Fontana
helper_xsetbv(CPUX86State * env,uint32_t ecx,uint64_t mask)31701b248f14SClaudio Fontana void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
31711b248f14SClaudio Fontana {
31721b248f14SClaudio Fontana uint32_t dummy, ena_lo, ena_hi;
31731b248f14SClaudio Fontana uint64_t ena;
31741b248f14SClaudio Fontana
31751b248f14SClaudio Fontana /* The OS must have enabled XSAVE. */
31761b248f14SClaudio Fontana if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
31771b248f14SClaudio Fontana raise_exception_ra(env, EXCP06_ILLOP, GETPC());
31781b248f14SClaudio Fontana }
31791b248f14SClaudio Fontana
31801b248f14SClaudio Fontana /* Only XCR0 is defined at present; the FPU may not be disabled. */
31811b248f14SClaudio Fontana if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
31821b248f14SClaudio Fontana goto do_gpf;
31831b248f14SClaudio Fontana }
31841b248f14SClaudio Fontana
31857604bbc2SPaolo Bonzini /* SSE can be disabled, but only if AVX is disabled too. */
31867604bbc2SPaolo Bonzini if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) {
31877604bbc2SPaolo Bonzini goto do_gpf;
31887604bbc2SPaolo Bonzini }
31897604bbc2SPaolo Bonzini
31901b248f14SClaudio Fontana /* Disallow enabling unimplemented features. */
31911b248f14SClaudio Fontana cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
31921b248f14SClaudio Fontana ena = ((uint64_t)ena_hi << 32) | ena_lo;
31931b248f14SClaudio Fontana if (mask & ~ena) {
31941b248f14SClaudio Fontana goto do_gpf;
31951b248f14SClaudio Fontana }
31961b248f14SClaudio Fontana
31971b248f14SClaudio Fontana /* Disallow enabling only half of MPX. */
31981b248f14SClaudio Fontana if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
31991b248f14SClaudio Fontana & XSTATE_BNDCSR_MASK) {
32001b248f14SClaudio Fontana goto do_gpf;
32011b248f14SClaudio Fontana }
32021b248f14SClaudio Fontana
32031b248f14SClaudio Fontana env->xcr0 = mask;
32041b248f14SClaudio Fontana cpu_sync_bndcs_hflags(env);
3205608db8dbSPaul Brook cpu_sync_avx_hflag(env);
32061b248f14SClaudio Fontana return;
32071b248f14SClaudio Fontana
32081b248f14SClaudio Fontana do_gpf:
32091b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, GETPC());
32101b248f14SClaudio Fontana }
32111b248f14SClaudio Fontana
32121b248f14SClaudio Fontana /* MMX/SSE */
32131b248f14SClaudio Fontana /* XXX: optimize by storing fptt and fptags in the static cpu state */
32141b248f14SClaudio Fontana
32151b248f14SClaudio Fontana #define SSE_DAZ 0x0040
3216314d3effSPaolo Bonzini #define SSE_RC_SHIFT 13
3217314d3effSPaolo Bonzini #define SSE_RC_MASK (3 << SSE_RC_SHIFT)
32181b248f14SClaudio Fontana #define SSE_FZ 0x8000
32191b248f14SClaudio Fontana
update_mxcsr_status(CPUX86State * env)32201b248f14SClaudio Fontana void update_mxcsr_status(CPUX86State *env)
32211b248f14SClaudio Fontana {
32221b248f14SClaudio Fontana uint32_t mxcsr = env->mxcsr;
32231b248f14SClaudio Fontana int rnd_type;
32241b248f14SClaudio Fontana
32251b248f14SClaudio Fontana /* set rounding mode */
3226314d3effSPaolo Bonzini rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT;
3227314d3effSPaolo Bonzini set_x86_rounding_mode(rnd_type, &env->sse_status);
32281b248f14SClaudio Fontana
32291b248f14SClaudio Fontana /* Set exception flags. */
32301b248f14SClaudio Fontana set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
32311b248f14SClaudio Fontana (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
32321b248f14SClaudio Fontana (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
32331b248f14SClaudio Fontana (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
32341b248f14SClaudio Fontana (mxcsr & FPUS_PE ? float_flag_inexact : 0),
32351b248f14SClaudio Fontana &env->sse_status);
32361b248f14SClaudio Fontana
32371b248f14SClaudio Fontana /* set denormals are zero */
32381b248f14SClaudio Fontana set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
32391b248f14SClaudio Fontana
32401b248f14SClaudio Fontana /* set flush to zero */
32411b248f14SClaudio Fontana set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
32421b248f14SClaudio Fontana }
32431b248f14SClaudio Fontana
update_mxcsr_from_sse_status(CPUX86State * env)32441b248f14SClaudio Fontana void update_mxcsr_from_sse_status(CPUX86State *env)
32451b248f14SClaudio Fontana {
32461b248f14SClaudio Fontana uint8_t flags = get_float_exception_flags(&env->sse_status);
32471b248f14SClaudio Fontana /*
32481b248f14SClaudio Fontana * The MXCSR denormal flag has opposite semantics to
32491b248f14SClaudio Fontana * float_flag_input_denormal (the softfloat code sets that flag
32501b248f14SClaudio Fontana * only when flushing input denormals to zero, but SSE sets it
32511b248f14SClaudio Fontana * only when not flushing them to zero), so is not converted
32521b248f14SClaudio Fontana * here.
32531b248f14SClaudio Fontana */
32541b248f14SClaudio Fontana env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
32551b248f14SClaudio Fontana (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
32561b248f14SClaudio Fontana (flags & float_flag_overflow ? FPUS_OE : 0) |
32571b248f14SClaudio Fontana (flags & float_flag_underflow ? FPUS_UE : 0) |
32581b248f14SClaudio Fontana (flags & float_flag_inexact ? FPUS_PE : 0) |
32591b248f14SClaudio Fontana (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
32601b248f14SClaudio Fontana 0));
32611b248f14SClaudio Fontana }
32621b248f14SClaudio Fontana
helper_update_mxcsr(CPUX86State * env)32631b248f14SClaudio Fontana void helper_update_mxcsr(CPUX86State *env)
32641b248f14SClaudio Fontana {
32651b248f14SClaudio Fontana update_mxcsr_from_sse_status(env);
32661b248f14SClaudio Fontana }
32671b248f14SClaudio Fontana
helper_ldmxcsr(CPUX86State * env,uint32_t val)32681b248f14SClaudio Fontana void helper_ldmxcsr(CPUX86State *env, uint32_t val)
32691b248f14SClaudio Fontana {
32701b248f14SClaudio Fontana cpu_set_mxcsr(env, val);
32711b248f14SClaudio Fontana }
32721b248f14SClaudio Fontana
helper_enter_mmx(CPUX86State * env)32731b248f14SClaudio Fontana void helper_enter_mmx(CPUX86State *env)
32741b248f14SClaudio Fontana {
32751b248f14SClaudio Fontana env->fpstt = 0;
32761b248f14SClaudio Fontana *(uint32_t *)(env->fptags) = 0;
32771b248f14SClaudio Fontana *(uint32_t *)(env->fptags + 4) = 0;
32781b248f14SClaudio Fontana }
32791b248f14SClaudio Fontana
helper_emms(CPUX86State * env)32801b248f14SClaudio Fontana void helper_emms(CPUX86State *env)
32811b248f14SClaudio Fontana {
32821b248f14SClaudio Fontana /* set to empty state */
32831b248f14SClaudio Fontana *(uint32_t *)(env->fptags) = 0x01010101;
32841b248f14SClaudio Fontana *(uint32_t *)(env->fptags + 4) = 0x01010101;
32851b248f14SClaudio Fontana }
32861b248f14SClaudio Fontana
32871b248f14SClaudio Fontana #define SHIFT 0
32881b248f14SClaudio Fontana #include "ops_sse.h"
32891b248f14SClaudio Fontana
32901b248f14SClaudio Fontana #define SHIFT 1
32911b248f14SClaudio Fontana #include "ops_sse.h"
3292b98f886cSPaolo Bonzini
3293b98f886cSPaolo Bonzini #define SHIFT 2
3294b98f886cSPaolo Bonzini #include "ops_sse.h"
3295