1*1b248f14SClaudio Fontana /* 2*1b248f14SClaudio Fontana * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3*1b248f14SClaudio Fontana * 4*1b248f14SClaudio Fontana * Copyright (c) 2003 Fabrice Bellard 5*1b248f14SClaudio Fontana * 6*1b248f14SClaudio Fontana * This library is free software; you can redistribute it and/or 7*1b248f14SClaudio Fontana * modify it under the terms of the GNU Lesser General Public 8*1b248f14SClaudio Fontana * License as published by the Free Software Foundation; either 9*1b248f14SClaudio Fontana * version 2.1 of the License, or (at your option) any later version. 10*1b248f14SClaudio Fontana * 11*1b248f14SClaudio Fontana * This library is distributed in the hope that it will be useful, 12*1b248f14SClaudio Fontana * but WITHOUT ANY WARRANTY; without even the implied warranty of 13*1b248f14SClaudio Fontana * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14*1b248f14SClaudio Fontana * Lesser General Public License for more details. 15*1b248f14SClaudio Fontana * 16*1b248f14SClaudio Fontana * You should have received a copy of the GNU Lesser General Public 17*1b248f14SClaudio Fontana * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18*1b248f14SClaudio Fontana */ 19*1b248f14SClaudio Fontana 20*1b248f14SClaudio Fontana #include "qemu/osdep.h" 21*1b248f14SClaudio Fontana #include <math.h> 22*1b248f14SClaudio Fontana #include "cpu.h" 23*1b248f14SClaudio Fontana #include "exec/helper-proto.h" 24*1b248f14SClaudio Fontana #include "qemu/host-utils.h" 25*1b248f14SClaudio Fontana #include "exec/exec-all.h" 26*1b248f14SClaudio Fontana #include "exec/cpu_ldst.h" 27*1b248f14SClaudio Fontana #include "fpu/softfloat.h" 28*1b248f14SClaudio Fontana #include "fpu/softfloat-macros.h" 29*1b248f14SClaudio Fontana 30*1b248f14SClaudio Fontana #ifdef CONFIG_SOFTMMU 31*1b248f14SClaudio Fontana #include "hw/irq.h" 32*1b248f14SClaudio Fontana #endif 33*1b248f14SClaudio Fontana 34*1b248f14SClaudio Fontana #define FPU_RC_MASK 0xc00 35*1b248f14SClaudio Fontana #define FPU_RC_NEAR 0x000 36*1b248f14SClaudio Fontana #define FPU_RC_DOWN 0x400 37*1b248f14SClaudio Fontana #define FPU_RC_UP 0x800 38*1b248f14SClaudio Fontana #define FPU_RC_CHOP 0xc00 39*1b248f14SClaudio Fontana 40*1b248f14SClaudio Fontana #define MAXTAN 9223372036854775808.0 41*1b248f14SClaudio Fontana 42*1b248f14SClaudio Fontana /* the following deal with x86 long double-precision numbers */ 43*1b248f14SClaudio Fontana #define MAXEXPD 0x7fff 44*1b248f14SClaudio Fontana #define EXPBIAS 16383 45*1b248f14SClaudio Fontana #define EXPD(fp) (fp.l.upper & 0x7fff) 46*1b248f14SClaudio Fontana #define SIGND(fp) ((fp.l.upper) & 0x8000) 47*1b248f14SClaudio Fontana #define MANTD(fp) (fp.l.lower) 48*1b248f14SClaudio Fontana #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 49*1b248f14SClaudio Fontana 50*1b248f14SClaudio Fontana #define FPUS_IE (1 << 0) 51*1b248f14SClaudio Fontana #define FPUS_DE (1 << 1) 52*1b248f14SClaudio Fontana #define FPUS_ZE (1 << 2) 53*1b248f14SClaudio Fontana #define FPUS_OE (1 << 3) 54*1b248f14SClaudio Fontana #define FPUS_UE (1 << 4) 55*1b248f14SClaudio Fontana #define FPUS_PE (1 << 5) 56*1b248f14SClaudio Fontana #define FPUS_SF (1 << 6) 57*1b248f14SClaudio Fontana #define FPUS_SE (1 << 7) 58*1b248f14SClaudio Fontana #define FPUS_B (1 << 15) 59*1b248f14SClaudio Fontana 60*1b248f14SClaudio Fontana #define FPUC_EM 0x3f 61*1b248f14SClaudio Fontana 62*1b248f14SClaudio Fontana #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 63*1b248f14SClaudio Fontana #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 64*1b248f14SClaudio Fontana #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 65*1b248f14SClaudio Fontana #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 66*1b248f14SClaudio Fontana #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 67*1b248f14SClaudio Fontana #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 68*1b248f14SClaudio Fontana #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 69*1b248f14SClaudio Fontana #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 70*1b248f14SClaudio Fontana 71*1b248f14SClaudio Fontana #if !defined(CONFIG_USER_ONLY) 72*1b248f14SClaudio Fontana static qemu_irq ferr_irq; 73*1b248f14SClaudio Fontana 74*1b248f14SClaudio Fontana void x86_register_ferr_irq(qemu_irq irq) 75*1b248f14SClaudio Fontana { 76*1b248f14SClaudio Fontana ferr_irq = irq; 77*1b248f14SClaudio Fontana } 78*1b248f14SClaudio Fontana 79*1b248f14SClaudio Fontana static void cpu_clear_ignne(void) 80*1b248f14SClaudio Fontana { 81*1b248f14SClaudio Fontana CPUX86State *env = &X86_CPU(first_cpu)->env; 82*1b248f14SClaudio Fontana env->hflags2 &= ~HF2_IGNNE_MASK; 83*1b248f14SClaudio Fontana } 84*1b248f14SClaudio Fontana 85*1b248f14SClaudio Fontana void cpu_set_ignne(void) 86*1b248f14SClaudio Fontana { 87*1b248f14SClaudio Fontana CPUX86State *env = &X86_CPU(first_cpu)->env; 88*1b248f14SClaudio Fontana env->hflags2 |= HF2_IGNNE_MASK; 89*1b248f14SClaudio Fontana /* 90*1b248f14SClaudio Fontana * We get here in response to a write to port F0h. The chipset should 91*1b248f14SClaudio Fontana * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is 92*1b248f14SClaudio Fontana * cleared, because FERR# and FP_IRQ are two separate pins on real 93*1b248f14SClaudio Fontana * hardware. However, we don't model FERR# as a qemu_irq, so we just 94*1b248f14SClaudio Fontana * do directly what the chipset would do, i.e. deassert FP_IRQ. 95*1b248f14SClaudio Fontana */ 96*1b248f14SClaudio Fontana qemu_irq_lower(ferr_irq); 97*1b248f14SClaudio Fontana } 98*1b248f14SClaudio Fontana #endif 99*1b248f14SClaudio Fontana 100*1b248f14SClaudio Fontana 101*1b248f14SClaudio Fontana static inline void fpush(CPUX86State *env) 102*1b248f14SClaudio Fontana { 103*1b248f14SClaudio Fontana env->fpstt = (env->fpstt - 1) & 7; 104*1b248f14SClaudio Fontana env->fptags[env->fpstt] = 0; /* validate stack entry */ 105*1b248f14SClaudio Fontana } 106*1b248f14SClaudio Fontana 107*1b248f14SClaudio Fontana static inline void fpop(CPUX86State *env) 108*1b248f14SClaudio Fontana { 109*1b248f14SClaudio Fontana env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 110*1b248f14SClaudio Fontana env->fpstt = (env->fpstt + 1) & 7; 111*1b248f14SClaudio Fontana } 112*1b248f14SClaudio Fontana 113*1b248f14SClaudio Fontana static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr, 114*1b248f14SClaudio Fontana uintptr_t retaddr) 115*1b248f14SClaudio Fontana { 116*1b248f14SClaudio Fontana CPU_LDoubleU temp; 117*1b248f14SClaudio Fontana 118*1b248f14SClaudio Fontana temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 119*1b248f14SClaudio Fontana temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 120*1b248f14SClaudio Fontana return temp.d; 121*1b248f14SClaudio Fontana } 122*1b248f14SClaudio Fontana 123*1b248f14SClaudio Fontana static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 124*1b248f14SClaudio Fontana uintptr_t retaddr) 125*1b248f14SClaudio Fontana { 126*1b248f14SClaudio Fontana CPU_LDoubleU temp; 127*1b248f14SClaudio Fontana 128*1b248f14SClaudio Fontana temp.d = f; 129*1b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 130*1b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 131*1b248f14SClaudio Fontana } 132*1b248f14SClaudio Fontana 133*1b248f14SClaudio Fontana /* x87 FPU helpers */ 134*1b248f14SClaudio Fontana 135*1b248f14SClaudio Fontana static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 136*1b248f14SClaudio Fontana { 137*1b248f14SClaudio Fontana union { 138*1b248f14SClaudio Fontana float64 f64; 139*1b248f14SClaudio Fontana double d; 140*1b248f14SClaudio Fontana } u; 141*1b248f14SClaudio Fontana 142*1b248f14SClaudio Fontana u.f64 = floatx80_to_float64(a, &env->fp_status); 143*1b248f14SClaudio Fontana return u.d; 144*1b248f14SClaudio Fontana } 145*1b248f14SClaudio Fontana 146*1b248f14SClaudio Fontana static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 147*1b248f14SClaudio Fontana { 148*1b248f14SClaudio Fontana union { 149*1b248f14SClaudio Fontana float64 f64; 150*1b248f14SClaudio Fontana double d; 151*1b248f14SClaudio Fontana } u; 152*1b248f14SClaudio Fontana 153*1b248f14SClaudio Fontana u.d = a; 154*1b248f14SClaudio Fontana return float64_to_floatx80(u.f64, &env->fp_status); 155*1b248f14SClaudio Fontana } 156*1b248f14SClaudio Fontana 157*1b248f14SClaudio Fontana static void fpu_set_exception(CPUX86State *env, int mask) 158*1b248f14SClaudio Fontana { 159*1b248f14SClaudio Fontana env->fpus |= mask; 160*1b248f14SClaudio Fontana if (env->fpus & (~env->fpuc & FPUC_EM)) { 161*1b248f14SClaudio Fontana env->fpus |= FPUS_SE | FPUS_B; 162*1b248f14SClaudio Fontana } 163*1b248f14SClaudio Fontana } 164*1b248f14SClaudio Fontana 165*1b248f14SClaudio Fontana static inline uint8_t save_exception_flags(CPUX86State *env) 166*1b248f14SClaudio Fontana { 167*1b248f14SClaudio Fontana uint8_t old_flags = get_float_exception_flags(&env->fp_status); 168*1b248f14SClaudio Fontana set_float_exception_flags(0, &env->fp_status); 169*1b248f14SClaudio Fontana return old_flags; 170*1b248f14SClaudio Fontana } 171*1b248f14SClaudio Fontana 172*1b248f14SClaudio Fontana static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 173*1b248f14SClaudio Fontana { 174*1b248f14SClaudio Fontana uint8_t new_flags = get_float_exception_flags(&env->fp_status); 175*1b248f14SClaudio Fontana float_raise(old_flags, &env->fp_status); 176*1b248f14SClaudio Fontana fpu_set_exception(env, 177*1b248f14SClaudio Fontana ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 178*1b248f14SClaudio Fontana (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 179*1b248f14SClaudio Fontana (new_flags & float_flag_overflow ? FPUS_OE : 0) | 180*1b248f14SClaudio Fontana (new_flags & float_flag_underflow ? FPUS_UE : 0) | 181*1b248f14SClaudio Fontana (new_flags & float_flag_inexact ? FPUS_PE : 0) | 182*1b248f14SClaudio Fontana (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 183*1b248f14SClaudio Fontana } 184*1b248f14SClaudio Fontana 185*1b248f14SClaudio Fontana static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 186*1b248f14SClaudio Fontana { 187*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 188*1b248f14SClaudio Fontana floatx80 ret = floatx80_div(a, b, &env->fp_status); 189*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 190*1b248f14SClaudio Fontana return ret; 191*1b248f14SClaudio Fontana } 192*1b248f14SClaudio Fontana 193*1b248f14SClaudio Fontana static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 194*1b248f14SClaudio Fontana { 195*1b248f14SClaudio Fontana if (env->cr[0] & CR0_NE_MASK) { 196*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP10_COPR, retaddr); 197*1b248f14SClaudio Fontana } 198*1b248f14SClaudio Fontana #if !defined(CONFIG_USER_ONLY) 199*1b248f14SClaudio Fontana else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) { 200*1b248f14SClaudio Fontana qemu_irq_raise(ferr_irq); 201*1b248f14SClaudio Fontana } 202*1b248f14SClaudio Fontana #endif 203*1b248f14SClaudio Fontana } 204*1b248f14SClaudio Fontana 205*1b248f14SClaudio Fontana void helper_flds_FT0(CPUX86State *env, uint32_t val) 206*1b248f14SClaudio Fontana { 207*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 208*1b248f14SClaudio Fontana union { 209*1b248f14SClaudio Fontana float32 f; 210*1b248f14SClaudio Fontana uint32_t i; 211*1b248f14SClaudio Fontana } u; 212*1b248f14SClaudio Fontana 213*1b248f14SClaudio Fontana u.i = val; 214*1b248f14SClaudio Fontana FT0 = float32_to_floatx80(u.f, &env->fp_status); 215*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 216*1b248f14SClaudio Fontana } 217*1b248f14SClaudio Fontana 218*1b248f14SClaudio Fontana void helper_fldl_FT0(CPUX86State *env, uint64_t val) 219*1b248f14SClaudio Fontana { 220*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 221*1b248f14SClaudio Fontana union { 222*1b248f14SClaudio Fontana float64 f; 223*1b248f14SClaudio Fontana uint64_t i; 224*1b248f14SClaudio Fontana } u; 225*1b248f14SClaudio Fontana 226*1b248f14SClaudio Fontana u.i = val; 227*1b248f14SClaudio Fontana FT0 = float64_to_floatx80(u.f, &env->fp_status); 228*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 229*1b248f14SClaudio Fontana } 230*1b248f14SClaudio Fontana 231*1b248f14SClaudio Fontana void helper_fildl_FT0(CPUX86State *env, int32_t val) 232*1b248f14SClaudio Fontana { 233*1b248f14SClaudio Fontana FT0 = int32_to_floatx80(val, &env->fp_status); 234*1b248f14SClaudio Fontana } 235*1b248f14SClaudio Fontana 236*1b248f14SClaudio Fontana void helper_flds_ST0(CPUX86State *env, uint32_t val) 237*1b248f14SClaudio Fontana { 238*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 239*1b248f14SClaudio Fontana int new_fpstt; 240*1b248f14SClaudio Fontana union { 241*1b248f14SClaudio Fontana float32 f; 242*1b248f14SClaudio Fontana uint32_t i; 243*1b248f14SClaudio Fontana } u; 244*1b248f14SClaudio Fontana 245*1b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7; 246*1b248f14SClaudio Fontana u.i = val; 247*1b248f14SClaudio Fontana env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 248*1b248f14SClaudio Fontana env->fpstt = new_fpstt; 249*1b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */ 250*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 251*1b248f14SClaudio Fontana } 252*1b248f14SClaudio Fontana 253*1b248f14SClaudio Fontana void helper_fldl_ST0(CPUX86State *env, uint64_t val) 254*1b248f14SClaudio Fontana { 255*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 256*1b248f14SClaudio Fontana int new_fpstt; 257*1b248f14SClaudio Fontana union { 258*1b248f14SClaudio Fontana float64 f; 259*1b248f14SClaudio Fontana uint64_t i; 260*1b248f14SClaudio Fontana } u; 261*1b248f14SClaudio Fontana 262*1b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7; 263*1b248f14SClaudio Fontana u.i = val; 264*1b248f14SClaudio Fontana env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 265*1b248f14SClaudio Fontana env->fpstt = new_fpstt; 266*1b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */ 267*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 268*1b248f14SClaudio Fontana } 269*1b248f14SClaudio Fontana 270*1b248f14SClaudio Fontana void helper_fildl_ST0(CPUX86State *env, int32_t val) 271*1b248f14SClaudio Fontana { 272*1b248f14SClaudio Fontana int new_fpstt; 273*1b248f14SClaudio Fontana 274*1b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7; 275*1b248f14SClaudio Fontana env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 276*1b248f14SClaudio Fontana env->fpstt = new_fpstt; 277*1b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */ 278*1b248f14SClaudio Fontana } 279*1b248f14SClaudio Fontana 280*1b248f14SClaudio Fontana void helper_fildll_ST0(CPUX86State *env, int64_t val) 281*1b248f14SClaudio Fontana { 282*1b248f14SClaudio Fontana int new_fpstt; 283*1b248f14SClaudio Fontana 284*1b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7; 285*1b248f14SClaudio Fontana env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 286*1b248f14SClaudio Fontana env->fpstt = new_fpstt; 287*1b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */ 288*1b248f14SClaudio Fontana } 289*1b248f14SClaudio Fontana 290*1b248f14SClaudio Fontana uint32_t helper_fsts_ST0(CPUX86State *env) 291*1b248f14SClaudio Fontana { 292*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 293*1b248f14SClaudio Fontana union { 294*1b248f14SClaudio Fontana float32 f; 295*1b248f14SClaudio Fontana uint32_t i; 296*1b248f14SClaudio Fontana } u; 297*1b248f14SClaudio Fontana 298*1b248f14SClaudio Fontana u.f = floatx80_to_float32(ST0, &env->fp_status); 299*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 300*1b248f14SClaudio Fontana return u.i; 301*1b248f14SClaudio Fontana } 302*1b248f14SClaudio Fontana 303*1b248f14SClaudio Fontana uint64_t helper_fstl_ST0(CPUX86State *env) 304*1b248f14SClaudio Fontana { 305*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 306*1b248f14SClaudio Fontana union { 307*1b248f14SClaudio Fontana float64 f; 308*1b248f14SClaudio Fontana uint64_t i; 309*1b248f14SClaudio Fontana } u; 310*1b248f14SClaudio Fontana 311*1b248f14SClaudio Fontana u.f = floatx80_to_float64(ST0, &env->fp_status); 312*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 313*1b248f14SClaudio Fontana return u.i; 314*1b248f14SClaudio Fontana } 315*1b248f14SClaudio Fontana 316*1b248f14SClaudio Fontana int32_t helper_fist_ST0(CPUX86State *env) 317*1b248f14SClaudio Fontana { 318*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 319*1b248f14SClaudio Fontana int32_t val; 320*1b248f14SClaudio Fontana 321*1b248f14SClaudio Fontana val = floatx80_to_int32(ST0, &env->fp_status); 322*1b248f14SClaudio Fontana if (val != (int16_t)val) { 323*1b248f14SClaudio Fontana set_float_exception_flags(float_flag_invalid, &env->fp_status); 324*1b248f14SClaudio Fontana val = -32768; 325*1b248f14SClaudio Fontana } 326*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 327*1b248f14SClaudio Fontana return val; 328*1b248f14SClaudio Fontana } 329*1b248f14SClaudio Fontana 330*1b248f14SClaudio Fontana int32_t helper_fistl_ST0(CPUX86State *env) 331*1b248f14SClaudio Fontana { 332*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 333*1b248f14SClaudio Fontana int32_t val; 334*1b248f14SClaudio Fontana 335*1b248f14SClaudio Fontana val = floatx80_to_int32(ST0, &env->fp_status); 336*1b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 337*1b248f14SClaudio Fontana val = 0x80000000; 338*1b248f14SClaudio Fontana } 339*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 340*1b248f14SClaudio Fontana return val; 341*1b248f14SClaudio Fontana } 342*1b248f14SClaudio Fontana 343*1b248f14SClaudio Fontana int64_t helper_fistll_ST0(CPUX86State *env) 344*1b248f14SClaudio Fontana { 345*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 346*1b248f14SClaudio Fontana int64_t val; 347*1b248f14SClaudio Fontana 348*1b248f14SClaudio Fontana val = floatx80_to_int64(ST0, &env->fp_status); 349*1b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 350*1b248f14SClaudio Fontana val = 0x8000000000000000ULL; 351*1b248f14SClaudio Fontana } 352*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 353*1b248f14SClaudio Fontana return val; 354*1b248f14SClaudio Fontana } 355*1b248f14SClaudio Fontana 356*1b248f14SClaudio Fontana int32_t helper_fistt_ST0(CPUX86State *env) 357*1b248f14SClaudio Fontana { 358*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 359*1b248f14SClaudio Fontana int32_t val; 360*1b248f14SClaudio Fontana 361*1b248f14SClaudio Fontana val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 362*1b248f14SClaudio Fontana if (val != (int16_t)val) { 363*1b248f14SClaudio Fontana set_float_exception_flags(float_flag_invalid, &env->fp_status); 364*1b248f14SClaudio Fontana val = -32768; 365*1b248f14SClaudio Fontana } 366*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 367*1b248f14SClaudio Fontana return val; 368*1b248f14SClaudio Fontana } 369*1b248f14SClaudio Fontana 370*1b248f14SClaudio Fontana int32_t helper_fisttl_ST0(CPUX86State *env) 371*1b248f14SClaudio Fontana { 372*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 373*1b248f14SClaudio Fontana int32_t val; 374*1b248f14SClaudio Fontana 375*1b248f14SClaudio Fontana val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 376*1b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 377*1b248f14SClaudio Fontana val = 0x80000000; 378*1b248f14SClaudio Fontana } 379*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 380*1b248f14SClaudio Fontana return val; 381*1b248f14SClaudio Fontana } 382*1b248f14SClaudio Fontana 383*1b248f14SClaudio Fontana int64_t helper_fisttll_ST0(CPUX86State *env) 384*1b248f14SClaudio Fontana { 385*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 386*1b248f14SClaudio Fontana int64_t val; 387*1b248f14SClaudio Fontana 388*1b248f14SClaudio Fontana val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 389*1b248f14SClaudio Fontana if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 390*1b248f14SClaudio Fontana val = 0x8000000000000000ULL; 391*1b248f14SClaudio Fontana } 392*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 393*1b248f14SClaudio Fontana return val; 394*1b248f14SClaudio Fontana } 395*1b248f14SClaudio Fontana 396*1b248f14SClaudio Fontana void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 397*1b248f14SClaudio Fontana { 398*1b248f14SClaudio Fontana int new_fpstt; 399*1b248f14SClaudio Fontana 400*1b248f14SClaudio Fontana new_fpstt = (env->fpstt - 1) & 7; 401*1b248f14SClaudio Fontana env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC()); 402*1b248f14SClaudio Fontana env->fpstt = new_fpstt; 403*1b248f14SClaudio Fontana env->fptags[new_fpstt] = 0; /* validate stack entry */ 404*1b248f14SClaudio Fontana } 405*1b248f14SClaudio Fontana 406*1b248f14SClaudio Fontana void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 407*1b248f14SClaudio Fontana { 408*1b248f14SClaudio Fontana helper_fstt(env, ST0, ptr, GETPC()); 409*1b248f14SClaudio Fontana } 410*1b248f14SClaudio Fontana 411*1b248f14SClaudio Fontana void helper_fpush(CPUX86State *env) 412*1b248f14SClaudio Fontana { 413*1b248f14SClaudio Fontana fpush(env); 414*1b248f14SClaudio Fontana } 415*1b248f14SClaudio Fontana 416*1b248f14SClaudio Fontana void helper_fpop(CPUX86State *env) 417*1b248f14SClaudio Fontana { 418*1b248f14SClaudio Fontana fpop(env); 419*1b248f14SClaudio Fontana } 420*1b248f14SClaudio Fontana 421*1b248f14SClaudio Fontana void helper_fdecstp(CPUX86State *env) 422*1b248f14SClaudio Fontana { 423*1b248f14SClaudio Fontana env->fpstt = (env->fpstt - 1) & 7; 424*1b248f14SClaudio Fontana env->fpus &= ~0x4700; 425*1b248f14SClaudio Fontana } 426*1b248f14SClaudio Fontana 427*1b248f14SClaudio Fontana void helper_fincstp(CPUX86State *env) 428*1b248f14SClaudio Fontana { 429*1b248f14SClaudio Fontana env->fpstt = (env->fpstt + 1) & 7; 430*1b248f14SClaudio Fontana env->fpus &= ~0x4700; 431*1b248f14SClaudio Fontana } 432*1b248f14SClaudio Fontana 433*1b248f14SClaudio Fontana /* FPU move */ 434*1b248f14SClaudio Fontana 435*1b248f14SClaudio Fontana void helper_ffree_STN(CPUX86State *env, int st_index) 436*1b248f14SClaudio Fontana { 437*1b248f14SClaudio Fontana env->fptags[(env->fpstt + st_index) & 7] = 1; 438*1b248f14SClaudio Fontana } 439*1b248f14SClaudio Fontana 440*1b248f14SClaudio Fontana void helper_fmov_ST0_FT0(CPUX86State *env) 441*1b248f14SClaudio Fontana { 442*1b248f14SClaudio Fontana ST0 = FT0; 443*1b248f14SClaudio Fontana } 444*1b248f14SClaudio Fontana 445*1b248f14SClaudio Fontana void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 446*1b248f14SClaudio Fontana { 447*1b248f14SClaudio Fontana FT0 = ST(st_index); 448*1b248f14SClaudio Fontana } 449*1b248f14SClaudio Fontana 450*1b248f14SClaudio Fontana void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 451*1b248f14SClaudio Fontana { 452*1b248f14SClaudio Fontana ST0 = ST(st_index); 453*1b248f14SClaudio Fontana } 454*1b248f14SClaudio Fontana 455*1b248f14SClaudio Fontana void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 456*1b248f14SClaudio Fontana { 457*1b248f14SClaudio Fontana ST(st_index) = ST0; 458*1b248f14SClaudio Fontana } 459*1b248f14SClaudio Fontana 460*1b248f14SClaudio Fontana void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 461*1b248f14SClaudio Fontana { 462*1b248f14SClaudio Fontana floatx80 tmp; 463*1b248f14SClaudio Fontana 464*1b248f14SClaudio Fontana tmp = ST(st_index); 465*1b248f14SClaudio Fontana ST(st_index) = ST0; 466*1b248f14SClaudio Fontana ST0 = tmp; 467*1b248f14SClaudio Fontana } 468*1b248f14SClaudio Fontana 469*1b248f14SClaudio Fontana /* FPU operations */ 470*1b248f14SClaudio Fontana 471*1b248f14SClaudio Fontana static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 472*1b248f14SClaudio Fontana 473*1b248f14SClaudio Fontana void helper_fcom_ST0_FT0(CPUX86State *env) 474*1b248f14SClaudio Fontana { 475*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 476*1b248f14SClaudio Fontana FloatRelation ret; 477*1b248f14SClaudio Fontana 478*1b248f14SClaudio Fontana ret = floatx80_compare(ST0, FT0, &env->fp_status); 479*1b248f14SClaudio Fontana env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 480*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 481*1b248f14SClaudio Fontana } 482*1b248f14SClaudio Fontana 483*1b248f14SClaudio Fontana void helper_fucom_ST0_FT0(CPUX86State *env) 484*1b248f14SClaudio Fontana { 485*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 486*1b248f14SClaudio Fontana FloatRelation ret; 487*1b248f14SClaudio Fontana 488*1b248f14SClaudio Fontana ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 489*1b248f14SClaudio Fontana env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 490*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 491*1b248f14SClaudio Fontana } 492*1b248f14SClaudio Fontana 493*1b248f14SClaudio Fontana static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 494*1b248f14SClaudio Fontana 495*1b248f14SClaudio Fontana void helper_fcomi_ST0_FT0(CPUX86State *env) 496*1b248f14SClaudio Fontana { 497*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 498*1b248f14SClaudio Fontana int eflags; 499*1b248f14SClaudio Fontana FloatRelation ret; 500*1b248f14SClaudio Fontana 501*1b248f14SClaudio Fontana ret = floatx80_compare(ST0, FT0, &env->fp_status); 502*1b248f14SClaudio Fontana eflags = cpu_cc_compute_all(env, CC_OP); 503*1b248f14SClaudio Fontana eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 504*1b248f14SClaudio Fontana CC_SRC = eflags; 505*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 506*1b248f14SClaudio Fontana } 507*1b248f14SClaudio Fontana 508*1b248f14SClaudio Fontana void helper_fucomi_ST0_FT0(CPUX86State *env) 509*1b248f14SClaudio Fontana { 510*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 511*1b248f14SClaudio Fontana int eflags; 512*1b248f14SClaudio Fontana FloatRelation ret; 513*1b248f14SClaudio Fontana 514*1b248f14SClaudio Fontana ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 515*1b248f14SClaudio Fontana eflags = cpu_cc_compute_all(env, CC_OP); 516*1b248f14SClaudio Fontana eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 517*1b248f14SClaudio Fontana CC_SRC = eflags; 518*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 519*1b248f14SClaudio Fontana } 520*1b248f14SClaudio Fontana 521*1b248f14SClaudio Fontana void helper_fadd_ST0_FT0(CPUX86State *env) 522*1b248f14SClaudio Fontana { 523*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 524*1b248f14SClaudio Fontana ST0 = floatx80_add(ST0, FT0, &env->fp_status); 525*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 526*1b248f14SClaudio Fontana } 527*1b248f14SClaudio Fontana 528*1b248f14SClaudio Fontana void helper_fmul_ST0_FT0(CPUX86State *env) 529*1b248f14SClaudio Fontana { 530*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 531*1b248f14SClaudio Fontana ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 532*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 533*1b248f14SClaudio Fontana } 534*1b248f14SClaudio Fontana 535*1b248f14SClaudio Fontana void helper_fsub_ST0_FT0(CPUX86State *env) 536*1b248f14SClaudio Fontana { 537*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 538*1b248f14SClaudio Fontana ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 539*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 540*1b248f14SClaudio Fontana } 541*1b248f14SClaudio Fontana 542*1b248f14SClaudio Fontana void helper_fsubr_ST0_FT0(CPUX86State *env) 543*1b248f14SClaudio Fontana { 544*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 545*1b248f14SClaudio Fontana ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 546*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 547*1b248f14SClaudio Fontana } 548*1b248f14SClaudio Fontana 549*1b248f14SClaudio Fontana void helper_fdiv_ST0_FT0(CPUX86State *env) 550*1b248f14SClaudio Fontana { 551*1b248f14SClaudio Fontana ST0 = helper_fdiv(env, ST0, FT0); 552*1b248f14SClaudio Fontana } 553*1b248f14SClaudio Fontana 554*1b248f14SClaudio Fontana void helper_fdivr_ST0_FT0(CPUX86State *env) 555*1b248f14SClaudio Fontana { 556*1b248f14SClaudio Fontana ST0 = helper_fdiv(env, FT0, ST0); 557*1b248f14SClaudio Fontana } 558*1b248f14SClaudio Fontana 559*1b248f14SClaudio Fontana /* fp operations between STN and ST0 */ 560*1b248f14SClaudio Fontana 561*1b248f14SClaudio Fontana void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 562*1b248f14SClaudio Fontana { 563*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 564*1b248f14SClaudio Fontana ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 565*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 566*1b248f14SClaudio Fontana } 567*1b248f14SClaudio Fontana 568*1b248f14SClaudio Fontana void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 569*1b248f14SClaudio Fontana { 570*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 571*1b248f14SClaudio Fontana ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 572*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 573*1b248f14SClaudio Fontana } 574*1b248f14SClaudio Fontana 575*1b248f14SClaudio Fontana void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 576*1b248f14SClaudio Fontana { 577*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 578*1b248f14SClaudio Fontana ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 579*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 580*1b248f14SClaudio Fontana } 581*1b248f14SClaudio Fontana 582*1b248f14SClaudio Fontana void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 583*1b248f14SClaudio Fontana { 584*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 585*1b248f14SClaudio Fontana ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 586*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 587*1b248f14SClaudio Fontana } 588*1b248f14SClaudio Fontana 589*1b248f14SClaudio Fontana void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 590*1b248f14SClaudio Fontana { 591*1b248f14SClaudio Fontana floatx80 *p; 592*1b248f14SClaudio Fontana 593*1b248f14SClaudio Fontana p = &ST(st_index); 594*1b248f14SClaudio Fontana *p = helper_fdiv(env, *p, ST0); 595*1b248f14SClaudio Fontana } 596*1b248f14SClaudio Fontana 597*1b248f14SClaudio Fontana void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 598*1b248f14SClaudio Fontana { 599*1b248f14SClaudio Fontana floatx80 *p; 600*1b248f14SClaudio Fontana 601*1b248f14SClaudio Fontana p = &ST(st_index); 602*1b248f14SClaudio Fontana *p = helper_fdiv(env, ST0, *p); 603*1b248f14SClaudio Fontana } 604*1b248f14SClaudio Fontana 605*1b248f14SClaudio Fontana /* misc FPU operations */ 606*1b248f14SClaudio Fontana void helper_fchs_ST0(CPUX86State *env) 607*1b248f14SClaudio Fontana { 608*1b248f14SClaudio Fontana ST0 = floatx80_chs(ST0); 609*1b248f14SClaudio Fontana } 610*1b248f14SClaudio Fontana 611*1b248f14SClaudio Fontana void helper_fabs_ST0(CPUX86State *env) 612*1b248f14SClaudio Fontana { 613*1b248f14SClaudio Fontana ST0 = floatx80_abs(ST0); 614*1b248f14SClaudio Fontana } 615*1b248f14SClaudio Fontana 616*1b248f14SClaudio Fontana void helper_fld1_ST0(CPUX86State *env) 617*1b248f14SClaudio Fontana { 618*1b248f14SClaudio Fontana ST0 = floatx80_one; 619*1b248f14SClaudio Fontana } 620*1b248f14SClaudio Fontana 621*1b248f14SClaudio Fontana void helper_fldl2t_ST0(CPUX86State *env) 622*1b248f14SClaudio Fontana { 623*1b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) { 624*1b248f14SClaudio Fontana case FPU_RC_UP: 625*1b248f14SClaudio Fontana ST0 = floatx80_l2t_u; 626*1b248f14SClaudio Fontana break; 627*1b248f14SClaudio Fontana default: 628*1b248f14SClaudio Fontana ST0 = floatx80_l2t; 629*1b248f14SClaudio Fontana break; 630*1b248f14SClaudio Fontana } 631*1b248f14SClaudio Fontana } 632*1b248f14SClaudio Fontana 633*1b248f14SClaudio Fontana void helper_fldl2e_ST0(CPUX86State *env) 634*1b248f14SClaudio Fontana { 635*1b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) { 636*1b248f14SClaudio Fontana case FPU_RC_DOWN: 637*1b248f14SClaudio Fontana case FPU_RC_CHOP: 638*1b248f14SClaudio Fontana ST0 = floatx80_l2e_d; 639*1b248f14SClaudio Fontana break; 640*1b248f14SClaudio Fontana default: 641*1b248f14SClaudio Fontana ST0 = floatx80_l2e; 642*1b248f14SClaudio Fontana break; 643*1b248f14SClaudio Fontana } 644*1b248f14SClaudio Fontana } 645*1b248f14SClaudio Fontana 646*1b248f14SClaudio Fontana void helper_fldpi_ST0(CPUX86State *env) 647*1b248f14SClaudio Fontana { 648*1b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) { 649*1b248f14SClaudio Fontana case FPU_RC_DOWN: 650*1b248f14SClaudio Fontana case FPU_RC_CHOP: 651*1b248f14SClaudio Fontana ST0 = floatx80_pi_d; 652*1b248f14SClaudio Fontana break; 653*1b248f14SClaudio Fontana default: 654*1b248f14SClaudio Fontana ST0 = floatx80_pi; 655*1b248f14SClaudio Fontana break; 656*1b248f14SClaudio Fontana } 657*1b248f14SClaudio Fontana } 658*1b248f14SClaudio Fontana 659*1b248f14SClaudio Fontana void helper_fldlg2_ST0(CPUX86State *env) 660*1b248f14SClaudio Fontana { 661*1b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) { 662*1b248f14SClaudio Fontana case FPU_RC_DOWN: 663*1b248f14SClaudio Fontana case FPU_RC_CHOP: 664*1b248f14SClaudio Fontana ST0 = floatx80_lg2_d; 665*1b248f14SClaudio Fontana break; 666*1b248f14SClaudio Fontana default: 667*1b248f14SClaudio Fontana ST0 = floatx80_lg2; 668*1b248f14SClaudio Fontana break; 669*1b248f14SClaudio Fontana } 670*1b248f14SClaudio Fontana } 671*1b248f14SClaudio Fontana 672*1b248f14SClaudio Fontana void helper_fldln2_ST0(CPUX86State *env) 673*1b248f14SClaudio Fontana { 674*1b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) { 675*1b248f14SClaudio Fontana case FPU_RC_DOWN: 676*1b248f14SClaudio Fontana case FPU_RC_CHOP: 677*1b248f14SClaudio Fontana ST0 = floatx80_ln2_d; 678*1b248f14SClaudio Fontana break; 679*1b248f14SClaudio Fontana default: 680*1b248f14SClaudio Fontana ST0 = floatx80_ln2; 681*1b248f14SClaudio Fontana break; 682*1b248f14SClaudio Fontana } 683*1b248f14SClaudio Fontana } 684*1b248f14SClaudio Fontana 685*1b248f14SClaudio Fontana void helper_fldz_ST0(CPUX86State *env) 686*1b248f14SClaudio Fontana { 687*1b248f14SClaudio Fontana ST0 = floatx80_zero; 688*1b248f14SClaudio Fontana } 689*1b248f14SClaudio Fontana 690*1b248f14SClaudio Fontana void helper_fldz_FT0(CPUX86State *env) 691*1b248f14SClaudio Fontana { 692*1b248f14SClaudio Fontana FT0 = floatx80_zero; 693*1b248f14SClaudio Fontana } 694*1b248f14SClaudio Fontana 695*1b248f14SClaudio Fontana uint32_t helper_fnstsw(CPUX86State *env) 696*1b248f14SClaudio Fontana { 697*1b248f14SClaudio Fontana return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 698*1b248f14SClaudio Fontana } 699*1b248f14SClaudio Fontana 700*1b248f14SClaudio Fontana uint32_t helper_fnstcw(CPUX86State *env) 701*1b248f14SClaudio Fontana { 702*1b248f14SClaudio Fontana return env->fpuc; 703*1b248f14SClaudio Fontana } 704*1b248f14SClaudio Fontana 705*1b248f14SClaudio Fontana void update_fp_status(CPUX86State *env) 706*1b248f14SClaudio Fontana { 707*1b248f14SClaudio Fontana int rnd_type; 708*1b248f14SClaudio Fontana 709*1b248f14SClaudio Fontana /* set rounding mode */ 710*1b248f14SClaudio Fontana switch (env->fpuc & FPU_RC_MASK) { 711*1b248f14SClaudio Fontana default: 712*1b248f14SClaudio Fontana case FPU_RC_NEAR: 713*1b248f14SClaudio Fontana rnd_type = float_round_nearest_even; 714*1b248f14SClaudio Fontana break; 715*1b248f14SClaudio Fontana case FPU_RC_DOWN: 716*1b248f14SClaudio Fontana rnd_type = float_round_down; 717*1b248f14SClaudio Fontana break; 718*1b248f14SClaudio Fontana case FPU_RC_UP: 719*1b248f14SClaudio Fontana rnd_type = float_round_up; 720*1b248f14SClaudio Fontana break; 721*1b248f14SClaudio Fontana case FPU_RC_CHOP: 722*1b248f14SClaudio Fontana rnd_type = float_round_to_zero; 723*1b248f14SClaudio Fontana break; 724*1b248f14SClaudio Fontana } 725*1b248f14SClaudio Fontana set_float_rounding_mode(rnd_type, &env->fp_status); 726*1b248f14SClaudio Fontana switch ((env->fpuc >> 8) & 3) { 727*1b248f14SClaudio Fontana case 0: 728*1b248f14SClaudio Fontana rnd_type = 32; 729*1b248f14SClaudio Fontana break; 730*1b248f14SClaudio Fontana case 2: 731*1b248f14SClaudio Fontana rnd_type = 64; 732*1b248f14SClaudio Fontana break; 733*1b248f14SClaudio Fontana case 3: 734*1b248f14SClaudio Fontana default: 735*1b248f14SClaudio Fontana rnd_type = 80; 736*1b248f14SClaudio Fontana break; 737*1b248f14SClaudio Fontana } 738*1b248f14SClaudio Fontana set_floatx80_rounding_precision(rnd_type, &env->fp_status); 739*1b248f14SClaudio Fontana } 740*1b248f14SClaudio Fontana 741*1b248f14SClaudio Fontana void helper_fldcw(CPUX86State *env, uint32_t val) 742*1b248f14SClaudio Fontana { 743*1b248f14SClaudio Fontana cpu_set_fpuc(env, val); 744*1b248f14SClaudio Fontana } 745*1b248f14SClaudio Fontana 746*1b248f14SClaudio Fontana void helper_fclex(CPUX86State *env) 747*1b248f14SClaudio Fontana { 748*1b248f14SClaudio Fontana env->fpus &= 0x7f00; 749*1b248f14SClaudio Fontana } 750*1b248f14SClaudio Fontana 751*1b248f14SClaudio Fontana void helper_fwait(CPUX86State *env) 752*1b248f14SClaudio Fontana { 753*1b248f14SClaudio Fontana if (env->fpus & FPUS_SE) { 754*1b248f14SClaudio Fontana fpu_raise_exception(env, GETPC()); 755*1b248f14SClaudio Fontana } 756*1b248f14SClaudio Fontana } 757*1b248f14SClaudio Fontana 758*1b248f14SClaudio Fontana void helper_fninit(CPUX86State *env) 759*1b248f14SClaudio Fontana { 760*1b248f14SClaudio Fontana env->fpus = 0; 761*1b248f14SClaudio Fontana env->fpstt = 0; 762*1b248f14SClaudio Fontana cpu_set_fpuc(env, 0x37f); 763*1b248f14SClaudio Fontana env->fptags[0] = 1; 764*1b248f14SClaudio Fontana env->fptags[1] = 1; 765*1b248f14SClaudio Fontana env->fptags[2] = 1; 766*1b248f14SClaudio Fontana env->fptags[3] = 1; 767*1b248f14SClaudio Fontana env->fptags[4] = 1; 768*1b248f14SClaudio Fontana env->fptags[5] = 1; 769*1b248f14SClaudio Fontana env->fptags[6] = 1; 770*1b248f14SClaudio Fontana env->fptags[7] = 1; 771*1b248f14SClaudio Fontana } 772*1b248f14SClaudio Fontana 773*1b248f14SClaudio Fontana /* BCD ops */ 774*1b248f14SClaudio Fontana 775*1b248f14SClaudio Fontana void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 776*1b248f14SClaudio Fontana { 777*1b248f14SClaudio Fontana floatx80 tmp; 778*1b248f14SClaudio Fontana uint64_t val; 779*1b248f14SClaudio Fontana unsigned int v; 780*1b248f14SClaudio Fontana int i; 781*1b248f14SClaudio Fontana 782*1b248f14SClaudio Fontana val = 0; 783*1b248f14SClaudio Fontana for (i = 8; i >= 0; i--) { 784*1b248f14SClaudio Fontana v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 785*1b248f14SClaudio Fontana val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 786*1b248f14SClaudio Fontana } 787*1b248f14SClaudio Fontana tmp = int64_to_floatx80(val, &env->fp_status); 788*1b248f14SClaudio Fontana if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 789*1b248f14SClaudio Fontana tmp = floatx80_chs(tmp); 790*1b248f14SClaudio Fontana } 791*1b248f14SClaudio Fontana fpush(env); 792*1b248f14SClaudio Fontana ST0 = tmp; 793*1b248f14SClaudio Fontana } 794*1b248f14SClaudio Fontana 795*1b248f14SClaudio Fontana void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 796*1b248f14SClaudio Fontana { 797*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 798*1b248f14SClaudio Fontana int v; 799*1b248f14SClaudio Fontana target_ulong mem_ref, mem_end; 800*1b248f14SClaudio Fontana int64_t val; 801*1b248f14SClaudio Fontana CPU_LDoubleU temp; 802*1b248f14SClaudio Fontana 803*1b248f14SClaudio Fontana temp.d = ST0; 804*1b248f14SClaudio Fontana 805*1b248f14SClaudio Fontana val = floatx80_to_int64(ST0, &env->fp_status); 806*1b248f14SClaudio Fontana mem_ref = ptr; 807*1b248f14SClaudio Fontana if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 808*1b248f14SClaudio Fontana set_float_exception_flags(float_flag_invalid, &env->fp_status); 809*1b248f14SClaudio Fontana while (mem_ref < ptr + 7) { 810*1b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 811*1b248f14SClaudio Fontana } 812*1b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 813*1b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 814*1b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 815*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 816*1b248f14SClaudio Fontana return; 817*1b248f14SClaudio Fontana } 818*1b248f14SClaudio Fontana mem_end = mem_ref + 9; 819*1b248f14SClaudio Fontana if (SIGND(temp)) { 820*1b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 821*1b248f14SClaudio Fontana val = -val; 822*1b248f14SClaudio Fontana } else { 823*1b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 824*1b248f14SClaudio Fontana } 825*1b248f14SClaudio Fontana while (mem_ref < mem_end) { 826*1b248f14SClaudio Fontana if (val == 0) { 827*1b248f14SClaudio Fontana break; 828*1b248f14SClaudio Fontana } 829*1b248f14SClaudio Fontana v = val % 100; 830*1b248f14SClaudio Fontana val = val / 100; 831*1b248f14SClaudio Fontana v = ((v / 10) << 4) | (v % 10); 832*1b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 833*1b248f14SClaudio Fontana } 834*1b248f14SClaudio Fontana while (mem_ref < mem_end) { 835*1b248f14SClaudio Fontana cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 836*1b248f14SClaudio Fontana } 837*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 838*1b248f14SClaudio Fontana } 839*1b248f14SClaudio Fontana 840*1b248f14SClaudio Fontana /* 128-bit significand of log(2). */ 841*1b248f14SClaudio Fontana #define ln2_sig_high 0xb17217f7d1cf79abULL 842*1b248f14SClaudio Fontana #define ln2_sig_low 0xc9e3b39803f2f6afULL 843*1b248f14SClaudio Fontana 844*1b248f14SClaudio Fontana /* 845*1b248f14SClaudio Fontana * Polynomial coefficients for an approximation to (2^x - 1) / x, on 846*1b248f14SClaudio Fontana * the interval [-1/64, 1/64]. 847*1b248f14SClaudio Fontana */ 848*1b248f14SClaudio Fontana #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 849*1b248f14SClaudio Fontana #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 850*1b248f14SClaudio Fontana #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 851*1b248f14SClaudio Fontana #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 852*1b248f14SClaudio Fontana #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 853*1b248f14SClaudio Fontana #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 854*1b248f14SClaudio Fontana #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 855*1b248f14SClaudio Fontana #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 856*1b248f14SClaudio Fontana #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 857*1b248f14SClaudio Fontana 858*1b248f14SClaudio Fontana struct f2xm1_data { 859*1b248f14SClaudio Fontana /* 860*1b248f14SClaudio Fontana * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 861*1b248f14SClaudio Fontana * are very close to exact floatx80 values. 862*1b248f14SClaudio Fontana */ 863*1b248f14SClaudio Fontana floatx80 t; 864*1b248f14SClaudio Fontana /* The value of 2^t. */ 865*1b248f14SClaudio Fontana floatx80 exp2; 866*1b248f14SClaudio Fontana /* The value of 2^t - 1. */ 867*1b248f14SClaudio Fontana floatx80 exp2m1; 868*1b248f14SClaudio Fontana }; 869*1b248f14SClaudio Fontana 870*1b248f14SClaudio Fontana static const struct f2xm1_data f2xm1_table[65] = { 871*1b248f14SClaudio Fontana { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 872*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 873*1b248f14SClaudio Fontana make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 874*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 875*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 876*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 877*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 878*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 879*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 880*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 881*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 882*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 883*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 884*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 885*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 886*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 887*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 888*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 889*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 890*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 891*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 892*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 893*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 894*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 895*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 896*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 897*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 898*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 899*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 900*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 901*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 902*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 903*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 904*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 905*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 906*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 907*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 908*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 909*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 910*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 911*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 912*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 913*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 914*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 915*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 916*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 917*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 918*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 919*1b248f14SClaudio Fontana { make_floatx80_init(0xbffe, 0x800000000000227dULL), 920*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 921*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 922*1b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 923*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 924*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 925*1b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 926*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 927*1b248f14SClaudio Fontana make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 928*1b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 929*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 930*1b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 931*1b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 932*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 933*1b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 934*1b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 935*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 936*1b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 937*1b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 938*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 939*1b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 940*1b248f14SClaudio Fontana { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 941*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 942*1b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 943*1b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 944*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 945*1b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 946*1b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 947*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 948*1b248f14SClaudio Fontana make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 949*1b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 950*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 951*1b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 952*1b248f14SClaudio Fontana { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 953*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 954*1b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 955*1b248f14SClaudio Fontana { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 956*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 957*1b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 958*1b248f14SClaudio Fontana { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 959*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 960*1b248f14SClaudio Fontana make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 961*1b248f14SClaudio Fontana { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 962*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 963*1b248f14SClaudio Fontana make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 964*1b248f14SClaudio Fontana { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 965*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 966*1b248f14SClaudio Fontana make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 967*1b248f14SClaudio Fontana { floatx80_zero_init, 968*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8000000000000000ULL), 969*1b248f14SClaudio Fontana floatx80_zero_init }, 970*1b248f14SClaudio Fontana { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 971*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 972*1b248f14SClaudio Fontana make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 973*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 974*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 975*1b248f14SClaudio Fontana make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 976*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 977*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 978*1b248f14SClaudio Fontana make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 979*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 980*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 981*1b248f14SClaudio Fontana make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 982*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 983*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 984*1b248f14SClaudio Fontana make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 985*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 986*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 987*1b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 988*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 989*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 990*1b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 991*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 992*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 993*1b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 994*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 995*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 996*1b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 997*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 998*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 999*1b248f14SClaudio Fontana make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 1000*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 1001*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 1002*1b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 1003*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 1004*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 1005*1b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 1006*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1007*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1008*1b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1009*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1010*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1011*1b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1012*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1013*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1014*1b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1015*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1016*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1017*1b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1018*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1019*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1020*1b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1021*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1022*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1023*1b248f14SClaudio Fontana make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1024*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1025*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1026*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1027*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1028*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1029*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1030*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1031*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1032*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1033*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1034*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1035*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1036*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1037*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1038*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1039*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1040*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1041*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1042*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1043*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1044*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1045*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1046*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1047*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1048*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1049*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1050*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1051*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1052*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1053*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1054*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1055*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1056*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1057*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1058*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1059*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1060*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1061*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1062*1b248f14SClaudio Fontana make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1063*1b248f14SClaudio Fontana { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1064*1b248f14SClaudio Fontana make_floatx80_init(0x4000, 0x8000000000000000ULL), 1065*1b248f14SClaudio Fontana make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1066*1b248f14SClaudio Fontana }; 1067*1b248f14SClaudio Fontana 1068*1b248f14SClaudio Fontana void helper_f2xm1(CPUX86State *env) 1069*1b248f14SClaudio Fontana { 1070*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 1071*1b248f14SClaudio Fontana uint64_t sig = extractFloatx80Frac(ST0); 1072*1b248f14SClaudio Fontana int32_t exp = extractFloatx80Exp(ST0); 1073*1b248f14SClaudio Fontana bool sign = extractFloatx80Sign(ST0); 1074*1b248f14SClaudio Fontana 1075*1b248f14SClaudio Fontana if (floatx80_invalid_encoding(ST0)) { 1076*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 1077*1b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 1078*1b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) { 1079*1b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1080*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 1081*1b248f14SClaudio Fontana ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1082*1b248f14SClaudio Fontana } 1083*1b248f14SClaudio Fontana } else if (exp > 0x3fff || 1084*1b248f14SClaudio Fontana (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1085*1b248f14SClaudio Fontana /* Out of range for the instruction, treat as invalid. */ 1086*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 1087*1b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 1088*1b248f14SClaudio Fontana } else if (exp == 0x3fff) { 1089*1b248f14SClaudio Fontana /* Argument 1 or -1, exact result 1 or -0.5. */ 1090*1b248f14SClaudio Fontana if (sign) { 1091*1b248f14SClaudio Fontana ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1092*1b248f14SClaudio Fontana } 1093*1b248f14SClaudio Fontana } else if (exp < 0x3fb0) { 1094*1b248f14SClaudio Fontana if (!floatx80_is_zero(ST0)) { 1095*1b248f14SClaudio Fontana /* 1096*1b248f14SClaudio Fontana * Multiplying the argument by an extra-precision version 1097*1b248f14SClaudio Fontana * of log(2) is sufficiently precise. Zero arguments are 1098*1b248f14SClaudio Fontana * returned unchanged. 1099*1b248f14SClaudio Fontana */ 1100*1b248f14SClaudio Fontana uint64_t sig0, sig1, sig2; 1101*1b248f14SClaudio Fontana if (exp == 0) { 1102*1b248f14SClaudio Fontana normalizeFloatx80Subnormal(sig, &exp, &sig); 1103*1b248f14SClaudio Fontana } 1104*1b248f14SClaudio Fontana mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1105*1b248f14SClaudio Fontana &sig2); 1106*1b248f14SClaudio Fontana /* This result is inexact. */ 1107*1b248f14SClaudio Fontana sig1 |= 1; 1108*1b248f14SClaudio Fontana ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1, 1109*1b248f14SClaudio Fontana &env->fp_status); 1110*1b248f14SClaudio Fontana } 1111*1b248f14SClaudio Fontana } else { 1112*1b248f14SClaudio Fontana floatx80 tmp, y, accum; 1113*1b248f14SClaudio Fontana bool asign, bsign; 1114*1b248f14SClaudio Fontana int32_t n, aexp, bexp; 1115*1b248f14SClaudio Fontana uint64_t asig0, asig1, asig2, bsig0, bsig1; 1116*1b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1117*1b248f14SClaudio Fontana signed char save_prec = env->fp_status.floatx80_rounding_precision; 1118*1b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even; 1119*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = 80; 1120*1b248f14SClaudio Fontana 1121*1b248f14SClaudio Fontana /* Find the nearest multiple of 1/32 to the argument. */ 1122*1b248f14SClaudio Fontana tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1123*1b248f14SClaudio Fontana n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1124*1b248f14SClaudio Fontana y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1125*1b248f14SClaudio Fontana 1126*1b248f14SClaudio Fontana if (floatx80_is_zero(y)) { 1127*1b248f14SClaudio Fontana /* 1128*1b248f14SClaudio Fontana * Use the value of 2^t - 1 from the table, to avoid 1129*1b248f14SClaudio Fontana * needing to special-case zero as a result of 1130*1b248f14SClaudio Fontana * multiplication below. 1131*1b248f14SClaudio Fontana */ 1132*1b248f14SClaudio Fontana ST0 = f2xm1_table[n].t; 1133*1b248f14SClaudio Fontana set_float_exception_flags(float_flag_inexact, &env->fp_status); 1134*1b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 1135*1b248f14SClaudio Fontana } else { 1136*1b248f14SClaudio Fontana /* 1137*1b248f14SClaudio Fontana * Compute the lower parts of a polynomial expansion for 1138*1b248f14SClaudio Fontana * (2^y - 1) / y. 1139*1b248f14SClaudio Fontana */ 1140*1b248f14SClaudio Fontana accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1141*1b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1142*1b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 1143*1b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1144*1b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 1145*1b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1146*1b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 1147*1b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1148*1b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 1149*1b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1150*1b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 1151*1b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1152*1b248f14SClaudio Fontana accum = floatx80_mul(accum, y, &env->fp_status); 1153*1b248f14SClaudio Fontana accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1154*1b248f14SClaudio Fontana 1155*1b248f14SClaudio Fontana /* 1156*1b248f14SClaudio Fontana * The full polynomial expansion is f2xm1_coeff_0 + accum 1157*1b248f14SClaudio Fontana * (where accum has much lower magnitude, and so, in 1158*1b248f14SClaudio Fontana * particular, carry out of the addition is not possible). 1159*1b248f14SClaudio Fontana * (This expansion is only accurate to about 70 bits, not 1160*1b248f14SClaudio Fontana * 128 bits.) 1161*1b248f14SClaudio Fontana */ 1162*1b248f14SClaudio Fontana aexp = extractFloatx80Exp(f2xm1_coeff_0); 1163*1b248f14SClaudio Fontana asign = extractFloatx80Sign(f2xm1_coeff_0); 1164*1b248f14SClaudio Fontana shift128RightJamming(extractFloatx80Frac(accum), 0, 1165*1b248f14SClaudio Fontana aexp - extractFloatx80Exp(accum), 1166*1b248f14SClaudio Fontana &asig0, &asig1); 1167*1b248f14SClaudio Fontana bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1168*1b248f14SClaudio Fontana bsig1 = 0; 1169*1b248f14SClaudio Fontana if (asign == extractFloatx80Sign(accum)) { 1170*1b248f14SClaudio Fontana add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1171*1b248f14SClaudio Fontana } else { 1172*1b248f14SClaudio Fontana sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1173*1b248f14SClaudio Fontana } 1174*1b248f14SClaudio Fontana /* And thus compute an approximation to 2^y - 1. */ 1175*1b248f14SClaudio Fontana mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1176*1b248f14SClaudio Fontana &asig0, &asig1, &asig2); 1177*1b248f14SClaudio Fontana aexp += extractFloatx80Exp(y) - 0x3ffe; 1178*1b248f14SClaudio Fontana asign ^= extractFloatx80Sign(y); 1179*1b248f14SClaudio Fontana if (n != 32) { 1180*1b248f14SClaudio Fontana /* 1181*1b248f14SClaudio Fontana * Multiply this by the precomputed value of 2^t and 1182*1b248f14SClaudio Fontana * add that of 2^t - 1. 1183*1b248f14SClaudio Fontana */ 1184*1b248f14SClaudio Fontana mul128By64To192(asig0, asig1, 1185*1b248f14SClaudio Fontana extractFloatx80Frac(f2xm1_table[n].exp2), 1186*1b248f14SClaudio Fontana &asig0, &asig1, &asig2); 1187*1b248f14SClaudio Fontana aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1188*1b248f14SClaudio Fontana bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1189*1b248f14SClaudio Fontana bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1190*1b248f14SClaudio Fontana bsig1 = 0; 1191*1b248f14SClaudio Fontana if (bexp < aexp) { 1192*1b248f14SClaudio Fontana shift128RightJamming(bsig0, bsig1, aexp - bexp, 1193*1b248f14SClaudio Fontana &bsig0, &bsig1); 1194*1b248f14SClaudio Fontana } else if (aexp < bexp) { 1195*1b248f14SClaudio Fontana shift128RightJamming(asig0, asig1, bexp - aexp, 1196*1b248f14SClaudio Fontana &asig0, &asig1); 1197*1b248f14SClaudio Fontana aexp = bexp; 1198*1b248f14SClaudio Fontana } 1199*1b248f14SClaudio Fontana /* The sign of 2^t - 1 is always that of the result. */ 1200*1b248f14SClaudio Fontana bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1201*1b248f14SClaudio Fontana if (asign == bsign) { 1202*1b248f14SClaudio Fontana /* Avoid possible carry out of the addition. */ 1203*1b248f14SClaudio Fontana shift128RightJamming(asig0, asig1, 1, 1204*1b248f14SClaudio Fontana &asig0, &asig1); 1205*1b248f14SClaudio Fontana shift128RightJamming(bsig0, bsig1, 1, 1206*1b248f14SClaudio Fontana &bsig0, &bsig1); 1207*1b248f14SClaudio Fontana ++aexp; 1208*1b248f14SClaudio Fontana add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1209*1b248f14SClaudio Fontana } else { 1210*1b248f14SClaudio Fontana sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1211*1b248f14SClaudio Fontana asign = bsign; 1212*1b248f14SClaudio Fontana } 1213*1b248f14SClaudio Fontana } 1214*1b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 1215*1b248f14SClaudio Fontana /* This result is inexact. */ 1216*1b248f14SClaudio Fontana asig1 |= 1; 1217*1b248f14SClaudio Fontana ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1, 1218*1b248f14SClaudio Fontana &env->fp_status); 1219*1b248f14SClaudio Fontana } 1220*1b248f14SClaudio Fontana 1221*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec; 1222*1b248f14SClaudio Fontana } 1223*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 1224*1b248f14SClaudio Fontana } 1225*1b248f14SClaudio Fontana 1226*1b248f14SClaudio Fontana void helper_fptan(CPUX86State *env) 1227*1b248f14SClaudio Fontana { 1228*1b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0); 1229*1b248f14SClaudio Fontana 1230*1b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1231*1b248f14SClaudio Fontana env->fpus |= 0x400; 1232*1b248f14SClaudio Fontana } else { 1233*1b248f14SClaudio Fontana fptemp = tan(fptemp); 1234*1b248f14SClaudio Fontana ST0 = double_to_floatx80(env, fptemp); 1235*1b248f14SClaudio Fontana fpush(env); 1236*1b248f14SClaudio Fontana ST0 = floatx80_one; 1237*1b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */ 1238*1b248f14SClaudio Fontana /* the above code is for |arg| < 2**52 only */ 1239*1b248f14SClaudio Fontana } 1240*1b248f14SClaudio Fontana } 1241*1b248f14SClaudio Fontana 1242*1b248f14SClaudio Fontana /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1243*1b248f14SClaudio Fontana #define pi_4_exp 0x3ffe 1244*1b248f14SClaudio Fontana #define pi_4_sig_high 0xc90fdaa22168c234ULL 1245*1b248f14SClaudio Fontana #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1246*1b248f14SClaudio Fontana #define pi_2_exp 0x3fff 1247*1b248f14SClaudio Fontana #define pi_2_sig_high 0xc90fdaa22168c234ULL 1248*1b248f14SClaudio Fontana #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1249*1b248f14SClaudio Fontana #define pi_34_exp 0x4000 1250*1b248f14SClaudio Fontana #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1251*1b248f14SClaudio Fontana #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1252*1b248f14SClaudio Fontana #define pi_exp 0x4000 1253*1b248f14SClaudio Fontana #define pi_sig_high 0xc90fdaa22168c234ULL 1254*1b248f14SClaudio Fontana #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1255*1b248f14SClaudio Fontana 1256*1b248f14SClaudio Fontana /* 1257*1b248f14SClaudio Fontana * Polynomial coefficients for an approximation to atan(x), with only 1258*1b248f14SClaudio Fontana * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1259*1b248f14SClaudio Fontana * for some other approximations, no low part is needed for the first 1260*1b248f14SClaudio Fontana * coefficient here to achieve a sufficiently accurate result, because 1261*1b248f14SClaudio Fontana * the coefficient in this minimax approximation is very close to 1262*1b248f14SClaudio Fontana * exactly 1.) 1263*1b248f14SClaudio Fontana */ 1264*1b248f14SClaudio Fontana #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1265*1b248f14SClaudio Fontana #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1266*1b248f14SClaudio Fontana #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1267*1b248f14SClaudio Fontana #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1268*1b248f14SClaudio Fontana #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1269*1b248f14SClaudio Fontana #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1270*1b248f14SClaudio Fontana #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1271*1b248f14SClaudio Fontana 1272*1b248f14SClaudio Fontana struct fpatan_data { 1273*1b248f14SClaudio Fontana /* High and low parts of atan(x). */ 1274*1b248f14SClaudio Fontana floatx80 atan_high, atan_low; 1275*1b248f14SClaudio Fontana }; 1276*1b248f14SClaudio Fontana 1277*1b248f14SClaudio Fontana static const struct fpatan_data fpatan_table[9] = { 1278*1b248f14SClaudio Fontana { floatx80_zero_init, 1279*1b248f14SClaudio Fontana floatx80_zero_init }, 1280*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1281*1b248f14SClaudio Fontana make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1282*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1283*1b248f14SClaudio Fontana make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1284*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1285*1b248f14SClaudio Fontana make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1286*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1287*1b248f14SClaudio Fontana make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1288*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1289*1b248f14SClaudio Fontana make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1290*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1291*1b248f14SClaudio Fontana make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1292*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1293*1b248f14SClaudio Fontana make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1294*1b248f14SClaudio Fontana { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1295*1b248f14SClaudio Fontana make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1296*1b248f14SClaudio Fontana }; 1297*1b248f14SClaudio Fontana 1298*1b248f14SClaudio Fontana void helper_fpatan(CPUX86State *env) 1299*1b248f14SClaudio Fontana { 1300*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 1301*1b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(ST0); 1302*1b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(ST0); 1303*1b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(ST0); 1304*1b248f14SClaudio Fontana uint64_t arg1_sig = extractFloatx80Frac(ST1); 1305*1b248f14SClaudio Fontana int32_t arg1_exp = extractFloatx80Exp(ST1); 1306*1b248f14SClaudio Fontana bool arg1_sign = extractFloatx80Sign(ST1); 1307*1b248f14SClaudio Fontana 1308*1b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1309*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 1310*1b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1311*1b248f14SClaudio Fontana } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1312*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 1313*1b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1314*1b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0) || 1315*1b248f14SClaudio Fontana floatx80_invalid_encoding(ST1)) { 1316*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 1317*1b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 1318*1b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) { 1319*1b248f14SClaudio Fontana ST1 = ST0; 1320*1b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) { 1321*1b248f14SClaudio Fontana /* Pass this NaN through. */ 1322*1b248f14SClaudio Fontana } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1323*1b248f14SClaudio Fontana /* Pass this zero through. */ 1324*1b248f14SClaudio Fontana } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1325*1b248f14SClaudio Fontana arg0_exp - arg1_exp >= 80) && 1326*1b248f14SClaudio Fontana !arg0_sign) { 1327*1b248f14SClaudio Fontana /* 1328*1b248f14SClaudio Fontana * Dividing ST1 by ST0 gives the correct result up to 1329*1b248f14SClaudio Fontana * rounding, and avoids spurious underflow exceptions that 1330*1b248f14SClaudio Fontana * might result from passing some small values through the 1331*1b248f14SClaudio Fontana * polynomial approximation, but if a finite nonzero result of 1332*1b248f14SClaudio Fontana * division is exact, the result of fpatan is still inexact 1333*1b248f14SClaudio Fontana * (and underflowing where appropriate). 1334*1b248f14SClaudio Fontana */ 1335*1b248f14SClaudio Fontana signed char save_prec = env->fp_status.floatx80_rounding_precision; 1336*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = 80; 1337*1b248f14SClaudio Fontana ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1338*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec; 1339*1b248f14SClaudio Fontana if (!floatx80_is_zero(ST1) && 1340*1b248f14SClaudio Fontana !(get_float_exception_flags(&env->fp_status) & 1341*1b248f14SClaudio Fontana float_flag_inexact)) { 1342*1b248f14SClaudio Fontana /* 1343*1b248f14SClaudio Fontana * The mathematical result is very slightly closer to zero 1344*1b248f14SClaudio Fontana * than this exact result. Round a value with the 1345*1b248f14SClaudio Fontana * significand adjusted accordingly to get the correct 1346*1b248f14SClaudio Fontana * exceptions, and possibly an adjusted result depending 1347*1b248f14SClaudio Fontana * on the rounding mode. 1348*1b248f14SClaudio Fontana */ 1349*1b248f14SClaudio Fontana uint64_t sig = extractFloatx80Frac(ST1); 1350*1b248f14SClaudio Fontana int32_t exp = extractFloatx80Exp(ST1); 1351*1b248f14SClaudio Fontana bool sign = extractFloatx80Sign(ST1); 1352*1b248f14SClaudio Fontana if (exp == 0) { 1353*1b248f14SClaudio Fontana normalizeFloatx80Subnormal(sig, &exp, &sig); 1354*1b248f14SClaudio Fontana } 1355*1b248f14SClaudio Fontana ST1 = normalizeRoundAndPackFloatx80(80, sign, exp, sig - 1, 1356*1b248f14SClaudio Fontana -1, &env->fp_status); 1357*1b248f14SClaudio Fontana } 1358*1b248f14SClaudio Fontana } else { 1359*1b248f14SClaudio Fontana /* The result is inexact. */ 1360*1b248f14SClaudio Fontana bool rsign = arg1_sign; 1361*1b248f14SClaudio Fontana int32_t rexp; 1362*1b248f14SClaudio Fontana uint64_t rsig0, rsig1; 1363*1b248f14SClaudio Fontana if (floatx80_is_zero(ST1)) { 1364*1b248f14SClaudio Fontana /* 1365*1b248f14SClaudio Fontana * ST0 is negative. The result is pi with the sign of 1366*1b248f14SClaudio Fontana * ST1. 1367*1b248f14SClaudio Fontana */ 1368*1b248f14SClaudio Fontana rexp = pi_exp; 1369*1b248f14SClaudio Fontana rsig0 = pi_sig_high; 1370*1b248f14SClaudio Fontana rsig1 = pi_sig_low; 1371*1b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST1)) { 1372*1b248f14SClaudio Fontana if (floatx80_is_infinity(ST0)) { 1373*1b248f14SClaudio Fontana if (arg0_sign) { 1374*1b248f14SClaudio Fontana rexp = pi_34_exp; 1375*1b248f14SClaudio Fontana rsig0 = pi_34_sig_high; 1376*1b248f14SClaudio Fontana rsig1 = pi_34_sig_low; 1377*1b248f14SClaudio Fontana } else { 1378*1b248f14SClaudio Fontana rexp = pi_4_exp; 1379*1b248f14SClaudio Fontana rsig0 = pi_4_sig_high; 1380*1b248f14SClaudio Fontana rsig1 = pi_4_sig_low; 1381*1b248f14SClaudio Fontana } 1382*1b248f14SClaudio Fontana } else { 1383*1b248f14SClaudio Fontana rexp = pi_2_exp; 1384*1b248f14SClaudio Fontana rsig0 = pi_2_sig_high; 1385*1b248f14SClaudio Fontana rsig1 = pi_2_sig_low; 1386*1b248f14SClaudio Fontana } 1387*1b248f14SClaudio Fontana } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1388*1b248f14SClaudio Fontana rexp = pi_2_exp; 1389*1b248f14SClaudio Fontana rsig0 = pi_2_sig_high; 1390*1b248f14SClaudio Fontana rsig1 = pi_2_sig_low; 1391*1b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1392*1b248f14SClaudio Fontana /* ST0 is negative. */ 1393*1b248f14SClaudio Fontana rexp = pi_exp; 1394*1b248f14SClaudio Fontana rsig0 = pi_sig_high; 1395*1b248f14SClaudio Fontana rsig1 = pi_sig_low; 1396*1b248f14SClaudio Fontana } else { 1397*1b248f14SClaudio Fontana /* 1398*1b248f14SClaudio Fontana * ST0 and ST1 are finite, nonzero and with exponents not 1399*1b248f14SClaudio Fontana * too far apart. 1400*1b248f14SClaudio Fontana */ 1401*1b248f14SClaudio Fontana int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1402*1b248f14SClaudio Fontana int32_t azexp, axexp; 1403*1b248f14SClaudio Fontana bool adj_sub, ysign, zsign; 1404*1b248f14SClaudio Fontana uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1405*1b248f14SClaudio Fontana uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1406*1b248f14SClaudio Fontana uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1407*1b248f14SClaudio Fontana uint64_t azsig0, azsig1; 1408*1b248f14SClaudio Fontana uint64_t azsig2, azsig3, axsig0, axsig1; 1409*1b248f14SClaudio Fontana floatx80 x8; 1410*1b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1411*1b248f14SClaudio Fontana signed char save_prec = env->fp_status.floatx80_rounding_precision; 1412*1b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even; 1413*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = 80; 1414*1b248f14SClaudio Fontana 1415*1b248f14SClaudio Fontana if (arg0_exp == 0) { 1416*1b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1417*1b248f14SClaudio Fontana } 1418*1b248f14SClaudio Fontana if (arg1_exp == 0) { 1419*1b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1420*1b248f14SClaudio Fontana } 1421*1b248f14SClaudio Fontana if (arg0_exp > arg1_exp || 1422*1b248f14SClaudio Fontana (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1423*1b248f14SClaudio Fontana /* Work with abs(ST1) / abs(ST0). */ 1424*1b248f14SClaudio Fontana num_exp = arg1_exp; 1425*1b248f14SClaudio Fontana num_sig = arg1_sig; 1426*1b248f14SClaudio Fontana den_exp = arg0_exp; 1427*1b248f14SClaudio Fontana den_sig = arg0_sig; 1428*1b248f14SClaudio Fontana if (arg0_sign) { 1429*1b248f14SClaudio Fontana /* The result is subtracted from pi. */ 1430*1b248f14SClaudio Fontana adj_exp = pi_exp; 1431*1b248f14SClaudio Fontana adj_sig0 = pi_sig_high; 1432*1b248f14SClaudio Fontana adj_sig1 = pi_sig_low; 1433*1b248f14SClaudio Fontana adj_sub = true; 1434*1b248f14SClaudio Fontana } else { 1435*1b248f14SClaudio Fontana /* The result is used as-is. */ 1436*1b248f14SClaudio Fontana adj_exp = 0; 1437*1b248f14SClaudio Fontana adj_sig0 = 0; 1438*1b248f14SClaudio Fontana adj_sig1 = 0; 1439*1b248f14SClaudio Fontana adj_sub = false; 1440*1b248f14SClaudio Fontana } 1441*1b248f14SClaudio Fontana } else { 1442*1b248f14SClaudio Fontana /* Work with abs(ST0) / abs(ST1). */ 1443*1b248f14SClaudio Fontana num_exp = arg0_exp; 1444*1b248f14SClaudio Fontana num_sig = arg0_sig; 1445*1b248f14SClaudio Fontana den_exp = arg1_exp; 1446*1b248f14SClaudio Fontana den_sig = arg1_sig; 1447*1b248f14SClaudio Fontana /* The result is added to or subtracted from pi/2. */ 1448*1b248f14SClaudio Fontana adj_exp = pi_2_exp; 1449*1b248f14SClaudio Fontana adj_sig0 = pi_2_sig_high; 1450*1b248f14SClaudio Fontana adj_sig1 = pi_2_sig_low; 1451*1b248f14SClaudio Fontana adj_sub = !arg0_sign; 1452*1b248f14SClaudio Fontana } 1453*1b248f14SClaudio Fontana 1454*1b248f14SClaudio Fontana /* 1455*1b248f14SClaudio Fontana * Compute x = num/den, where 0 < x <= 1 and x is not too 1456*1b248f14SClaudio Fontana * small. 1457*1b248f14SClaudio Fontana */ 1458*1b248f14SClaudio Fontana xexp = num_exp - den_exp + 0x3ffe; 1459*1b248f14SClaudio Fontana remsig0 = num_sig; 1460*1b248f14SClaudio Fontana remsig1 = 0; 1461*1b248f14SClaudio Fontana if (den_sig <= remsig0) { 1462*1b248f14SClaudio Fontana shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1463*1b248f14SClaudio Fontana ++xexp; 1464*1b248f14SClaudio Fontana } 1465*1b248f14SClaudio Fontana xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1466*1b248f14SClaudio Fontana mul64To128(den_sig, xsig0, &msig0, &msig1); 1467*1b248f14SClaudio Fontana sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1468*1b248f14SClaudio Fontana while ((int64_t) remsig0 < 0) { 1469*1b248f14SClaudio Fontana --xsig0; 1470*1b248f14SClaudio Fontana add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1471*1b248f14SClaudio Fontana } 1472*1b248f14SClaudio Fontana xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1473*1b248f14SClaudio Fontana /* 1474*1b248f14SClaudio Fontana * No need to correct any estimation error in xsig1; even 1475*1b248f14SClaudio Fontana * with such error, it is accurate enough. 1476*1b248f14SClaudio Fontana */ 1477*1b248f14SClaudio Fontana 1478*1b248f14SClaudio Fontana /* 1479*1b248f14SClaudio Fontana * Split x as x = t + y, where t = n/8 is the nearest 1480*1b248f14SClaudio Fontana * multiple of 1/8 to x. 1481*1b248f14SClaudio Fontana */ 1482*1b248f14SClaudio Fontana x8 = normalizeRoundAndPackFloatx80(80, false, xexp + 3, xsig0, 1483*1b248f14SClaudio Fontana xsig1, &env->fp_status); 1484*1b248f14SClaudio Fontana n = floatx80_to_int32(x8, &env->fp_status); 1485*1b248f14SClaudio Fontana if (n == 0) { 1486*1b248f14SClaudio Fontana ysign = false; 1487*1b248f14SClaudio Fontana yexp = xexp; 1488*1b248f14SClaudio Fontana ysig0 = xsig0; 1489*1b248f14SClaudio Fontana ysig1 = xsig1; 1490*1b248f14SClaudio Fontana texp = 0; 1491*1b248f14SClaudio Fontana tsig = 0; 1492*1b248f14SClaudio Fontana } else { 1493*1b248f14SClaudio Fontana int shift = clz32(n) + 32; 1494*1b248f14SClaudio Fontana texp = 0x403b - shift; 1495*1b248f14SClaudio Fontana tsig = n; 1496*1b248f14SClaudio Fontana tsig <<= shift; 1497*1b248f14SClaudio Fontana if (texp == xexp) { 1498*1b248f14SClaudio Fontana sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1499*1b248f14SClaudio Fontana if ((int64_t) ysig0 >= 0) { 1500*1b248f14SClaudio Fontana ysign = false; 1501*1b248f14SClaudio Fontana if (ysig0 == 0) { 1502*1b248f14SClaudio Fontana if (ysig1 == 0) { 1503*1b248f14SClaudio Fontana yexp = 0; 1504*1b248f14SClaudio Fontana } else { 1505*1b248f14SClaudio Fontana shift = clz64(ysig1) + 64; 1506*1b248f14SClaudio Fontana yexp = xexp - shift; 1507*1b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift, 1508*1b248f14SClaudio Fontana &ysig0, &ysig1); 1509*1b248f14SClaudio Fontana } 1510*1b248f14SClaudio Fontana } else { 1511*1b248f14SClaudio Fontana shift = clz64(ysig0); 1512*1b248f14SClaudio Fontana yexp = xexp - shift; 1513*1b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1514*1b248f14SClaudio Fontana } 1515*1b248f14SClaudio Fontana } else { 1516*1b248f14SClaudio Fontana ysign = true; 1517*1b248f14SClaudio Fontana sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1518*1b248f14SClaudio Fontana if (ysig0 == 0) { 1519*1b248f14SClaudio Fontana shift = clz64(ysig1) + 64; 1520*1b248f14SClaudio Fontana } else { 1521*1b248f14SClaudio Fontana shift = clz64(ysig0); 1522*1b248f14SClaudio Fontana } 1523*1b248f14SClaudio Fontana yexp = xexp - shift; 1524*1b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1525*1b248f14SClaudio Fontana } 1526*1b248f14SClaudio Fontana } else { 1527*1b248f14SClaudio Fontana /* 1528*1b248f14SClaudio Fontana * t's exponent must be greater than x's because t 1529*1b248f14SClaudio Fontana * is positive and the nearest multiple of 1/8 to 1530*1b248f14SClaudio Fontana * x, and if x has a greater exponent, the power 1531*1b248f14SClaudio Fontana * of 2 with that exponent is also a multiple of 1532*1b248f14SClaudio Fontana * 1/8. 1533*1b248f14SClaudio Fontana */ 1534*1b248f14SClaudio Fontana uint64_t usig0, usig1; 1535*1b248f14SClaudio Fontana shift128RightJamming(xsig0, xsig1, texp - xexp, 1536*1b248f14SClaudio Fontana &usig0, &usig1); 1537*1b248f14SClaudio Fontana ysign = true; 1538*1b248f14SClaudio Fontana sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1539*1b248f14SClaudio Fontana if (ysig0 == 0) { 1540*1b248f14SClaudio Fontana shift = clz64(ysig1) + 64; 1541*1b248f14SClaudio Fontana } else { 1542*1b248f14SClaudio Fontana shift = clz64(ysig0); 1543*1b248f14SClaudio Fontana } 1544*1b248f14SClaudio Fontana yexp = texp - shift; 1545*1b248f14SClaudio Fontana shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1546*1b248f14SClaudio Fontana } 1547*1b248f14SClaudio Fontana } 1548*1b248f14SClaudio Fontana 1549*1b248f14SClaudio Fontana /* 1550*1b248f14SClaudio Fontana * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1551*1b248f14SClaudio Fontana * arctan(z). 1552*1b248f14SClaudio Fontana */ 1553*1b248f14SClaudio Fontana zsign = ysign; 1554*1b248f14SClaudio Fontana if (texp == 0 || yexp == 0) { 1555*1b248f14SClaudio Fontana zexp = yexp; 1556*1b248f14SClaudio Fontana zsig0 = ysig0; 1557*1b248f14SClaudio Fontana zsig1 = ysig1; 1558*1b248f14SClaudio Fontana } else { 1559*1b248f14SClaudio Fontana /* 1560*1b248f14SClaudio Fontana * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1561*1b248f14SClaudio Fontana */ 1562*1b248f14SClaudio Fontana int32_t dexp = texp + xexp - 0x3ffe; 1563*1b248f14SClaudio Fontana uint64_t dsig0, dsig1, dsig2; 1564*1b248f14SClaudio Fontana mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1565*1b248f14SClaudio Fontana /* 1566*1b248f14SClaudio Fontana * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1567*1b248f14SClaudio Fontana * bit). Add 1 to produce the denominator 1+tx. 1568*1b248f14SClaudio Fontana */ 1569*1b248f14SClaudio Fontana shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1570*1b248f14SClaudio Fontana &dsig0, &dsig1); 1571*1b248f14SClaudio Fontana dsig0 |= 0x8000000000000000ULL; 1572*1b248f14SClaudio Fontana zexp = yexp - 1; 1573*1b248f14SClaudio Fontana remsig0 = ysig0; 1574*1b248f14SClaudio Fontana remsig1 = ysig1; 1575*1b248f14SClaudio Fontana remsig2 = 0; 1576*1b248f14SClaudio Fontana if (dsig0 <= remsig0) { 1577*1b248f14SClaudio Fontana shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1578*1b248f14SClaudio Fontana ++zexp; 1579*1b248f14SClaudio Fontana } 1580*1b248f14SClaudio Fontana zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1581*1b248f14SClaudio Fontana mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1582*1b248f14SClaudio Fontana sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1583*1b248f14SClaudio Fontana &remsig0, &remsig1, &remsig2); 1584*1b248f14SClaudio Fontana while ((int64_t) remsig0 < 0) { 1585*1b248f14SClaudio Fontana --zsig0; 1586*1b248f14SClaudio Fontana add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1587*1b248f14SClaudio Fontana &remsig0, &remsig1, &remsig2); 1588*1b248f14SClaudio Fontana } 1589*1b248f14SClaudio Fontana zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1590*1b248f14SClaudio Fontana /* No need to correct any estimation error in zsig1. */ 1591*1b248f14SClaudio Fontana } 1592*1b248f14SClaudio Fontana 1593*1b248f14SClaudio Fontana if (zexp == 0) { 1594*1b248f14SClaudio Fontana azexp = 0; 1595*1b248f14SClaudio Fontana azsig0 = 0; 1596*1b248f14SClaudio Fontana azsig1 = 0; 1597*1b248f14SClaudio Fontana } else { 1598*1b248f14SClaudio Fontana floatx80 z2, accum; 1599*1b248f14SClaudio Fontana uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1600*1b248f14SClaudio Fontana /* Compute z^2. */ 1601*1b248f14SClaudio Fontana mul128To256(zsig0, zsig1, zsig0, zsig1, 1602*1b248f14SClaudio Fontana &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1603*1b248f14SClaudio Fontana z2 = normalizeRoundAndPackFloatx80(80, false, 1604*1b248f14SClaudio Fontana zexp + zexp - 0x3ffe, 1605*1b248f14SClaudio Fontana z2sig0, z2sig1, 1606*1b248f14SClaudio Fontana &env->fp_status); 1607*1b248f14SClaudio Fontana 1608*1b248f14SClaudio Fontana /* Compute the lower parts of the polynomial expansion. */ 1609*1b248f14SClaudio Fontana accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1610*1b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1611*1b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status); 1612*1b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1613*1b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status); 1614*1b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1615*1b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status); 1616*1b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1617*1b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status); 1618*1b248f14SClaudio Fontana accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1619*1b248f14SClaudio Fontana accum = floatx80_mul(accum, z2, &env->fp_status); 1620*1b248f14SClaudio Fontana 1621*1b248f14SClaudio Fontana /* 1622*1b248f14SClaudio Fontana * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1623*1b248f14SClaudio Fontana * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1624*1b248f14SClaudio Fontana */ 1625*1b248f14SClaudio Fontana aexp = extractFloatx80Exp(fpatan_coeff_0); 1626*1b248f14SClaudio Fontana shift128RightJamming(extractFloatx80Frac(accum), 0, 1627*1b248f14SClaudio Fontana aexp - extractFloatx80Exp(accum), 1628*1b248f14SClaudio Fontana &asig0, &asig1); 1629*1b248f14SClaudio Fontana sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1630*1b248f14SClaudio Fontana &asig0, &asig1); 1631*1b248f14SClaudio Fontana /* Multiply by z to compute arctan(z). */ 1632*1b248f14SClaudio Fontana azexp = aexp + zexp - 0x3ffe; 1633*1b248f14SClaudio Fontana mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1634*1b248f14SClaudio Fontana &azsig2, &azsig3); 1635*1b248f14SClaudio Fontana } 1636*1b248f14SClaudio Fontana 1637*1b248f14SClaudio Fontana /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1638*1b248f14SClaudio Fontana if (texp == 0) { 1639*1b248f14SClaudio Fontana /* z is positive. */ 1640*1b248f14SClaudio Fontana axexp = azexp; 1641*1b248f14SClaudio Fontana axsig0 = azsig0; 1642*1b248f14SClaudio Fontana axsig1 = azsig1; 1643*1b248f14SClaudio Fontana } else { 1644*1b248f14SClaudio Fontana bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1645*1b248f14SClaudio Fontana int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1646*1b248f14SClaudio Fontana uint64_t low_sig0 = 1647*1b248f14SClaudio Fontana extractFloatx80Frac(fpatan_table[n].atan_low); 1648*1b248f14SClaudio Fontana uint64_t low_sig1 = 0; 1649*1b248f14SClaudio Fontana axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1650*1b248f14SClaudio Fontana axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1651*1b248f14SClaudio Fontana axsig1 = 0; 1652*1b248f14SClaudio Fontana shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1653*1b248f14SClaudio Fontana &low_sig0, &low_sig1); 1654*1b248f14SClaudio Fontana if (low_sign) { 1655*1b248f14SClaudio Fontana sub128(axsig0, axsig1, low_sig0, low_sig1, 1656*1b248f14SClaudio Fontana &axsig0, &axsig1); 1657*1b248f14SClaudio Fontana } else { 1658*1b248f14SClaudio Fontana add128(axsig0, axsig1, low_sig0, low_sig1, 1659*1b248f14SClaudio Fontana &axsig0, &axsig1); 1660*1b248f14SClaudio Fontana } 1661*1b248f14SClaudio Fontana if (azexp >= axexp) { 1662*1b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1663*1b248f14SClaudio Fontana &axsig0, &axsig1); 1664*1b248f14SClaudio Fontana axexp = azexp + 1; 1665*1b248f14SClaudio Fontana shift128RightJamming(azsig0, azsig1, 1, 1666*1b248f14SClaudio Fontana &azsig0, &azsig1); 1667*1b248f14SClaudio Fontana } else { 1668*1b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, 1, 1669*1b248f14SClaudio Fontana &axsig0, &axsig1); 1670*1b248f14SClaudio Fontana shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1671*1b248f14SClaudio Fontana &azsig0, &azsig1); 1672*1b248f14SClaudio Fontana ++axexp; 1673*1b248f14SClaudio Fontana } 1674*1b248f14SClaudio Fontana if (zsign) { 1675*1b248f14SClaudio Fontana sub128(axsig0, axsig1, azsig0, azsig1, 1676*1b248f14SClaudio Fontana &axsig0, &axsig1); 1677*1b248f14SClaudio Fontana } else { 1678*1b248f14SClaudio Fontana add128(axsig0, axsig1, azsig0, azsig1, 1679*1b248f14SClaudio Fontana &axsig0, &axsig1); 1680*1b248f14SClaudio Fontana } 1681*1b248f14SClaudio Fontana } 1682*1b248f14SClaudio Fontana 1683*1b248f14SClaudio Fontana if (adj_exp == 0) { 1684*1b248f14SClaudio Fontana rexp = axexp; 1685*1b248f14SClaudio Fontana rsig0 = axsig0; 1686*1b248f14SClaudio Fontana rsig1 = axsig1; 1687*1b248f14SClaudio Fontana } else { 1688*1b248f14SClaudio Fontana /* 1689*1b248f14SClaudio Fontana * Add or subtract arctan(x) (exponent axexp, 1690*1b248f14SClaudio Fontana * significand axsig0 and axsig1, positive, not 1691*1b248f14SClaudio Fontana * necessarily normalized) to the number given by 1692*1b248f14SClaudio Fontana * adj_exp, adj_sig0 and adj_sig1, according to 1693*1b248f14SClaudio Fontana * adj_sub. 1694*1b248f14SClaudio Fontana */ 1695*1b248f14SClaudio Fontana if (adj_exp >= axexp) { 1696*1b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1697*1b248f14SClaudio Fontana &axsig0, &axsig1); 1698*1b248f14SClaudio Fontana rexp = adj_exp + 1; 1699*1b248f14SClaudio Fontana shift128RightJamming(adj_sig0, adj_sig1, 1, 1700*1b248f14SClaudio Fontana &adj_sig0, &adj_sig1); 1701*1b248f14SClaudio Fontana } else { 1702*1b248f14SClaudio Fontana shift128RightJamming(axsig0, axsig1, 1, 1703*1b248f14SClaudio Fontana &axsig0, &axsig1); 1704*1b248f14SClaudio Fontana shift128RightJamming(adj_sig0, adj_sig1, 1705*1b248f14SClaudio Fontana axexp - adj_exp + 1, 1706*1b248f14SClaudio Fontana &adj_sig0, &adj_sig1); 1707*1b248f14SClaudio Fontana rexp = axexp + 1; 1708*1b248f14SClaudio Fontana } 1709*1b248f14SClaudio Fontana if (adj_sub) { 1710*1b248f14SClaudio Fontana sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1711*1b248f14SClaudio Fontana &rsig0, &rsig1); 1712*1b248f14SClaudio Fontana } else { 1713*1b248f14SClaudio Fontana add128(adj_sig0, adj_sig1, axsig0, axsig1, 1714*1b248f14SClaudio Fontana &rsig0, &rsig1); 1715*1b248f14SClaudio Fontana } 1716*1b248f14SClaudio Fontana } 1717*1b248f14SClaudio Fontana 1718*1b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 1719*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec; 1720*1b248f14SClaudio Fontana } 1721*1b248f14SClaudio Fontana /* This result is inexact. */ 1722*1b248f14SClaudio Fontana rsig1 |= 1; 1723*1b248f14SClaudio Fontana ST1 = normalizeRoundAndPackFloatx80(80, rsign, rexp, 1724*1b248f14SClaudio Fontana rsig0, rsig1, &env->fp_status); 1725*1b248f14SClaudio Fontana } 1726*1b248f14SClaudio Fontana 1727*1b248f14SClaudio Fontana fpop(env); 1728*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 1729*1b248f14SClaudio Fontana } 1730*1b248f14SClaudio Fontana 1731*1b248f14SClaudio Fontana void helper_fxtract(CPUX86State *env) 1732*1b248f14SClaudio Fontana { 1733*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 1734*1b248f14SClaudio Fontana CPU_LDoubleU temp; 1735*1b248f14SClaudio Fontana 1736*1b248f14SClaudio Fontana temp.d = ST0; 1737*1b248f14SClaudio Fontana 1738*1b248f14SClaudio Fontana if (floatx80_is_zero(ST0)) { 1739*1b248f14SClaudio Fontana /* Easy way to generate -inf and raising division by 0 exception */ 1740*1b248f14SClaudio Fontana ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1741*1b248f14SClaudio Fontana &env->fp_status); 1742*1b248f14SClaudio Fontana fpush(env); 1743*1b248f14SClaudio Fontana ST0 = temp.d; 1744*1b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0)) { 1745*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 1746*1b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 1747*1b248f14SClaudio Fontana fpush(env); 1748*1b248f14SClaudio Fontana ST0 = ST1; 1749*1b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) { 1750*1b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1751*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 1752*1b248f14SClaudio Fontana ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1753*1b248f14SClaudio Fontana } 1754*1b248f14SClaudio Fontana fpush(env); 1755*1b248f14SClaudio Fontana ST0 = ST1; 1756*1b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST0)) { 1757*1b248f14SClaudio Fontana fpush(env); 1758*1b248f14SClaudio Fontana ST0 = ST1; 1759*1b248f14SClaudio Fontana ST1 = floatx80_infinity; 1760*1b248f14SClaudio Fontana } else { 1761*1b248f14SClaudio Fontana int expdif; 1762*1b248f14SClaudio Fontana 1763*1b248f14SClaudio Fontana if (EXPD(temp) == 0) { 1764*1b248f14SClaudio Fontana int shift = clz64(temp.l.lower); 1765*1b248f14SClaudio Fontana temp.l.lower <<= shift; 1766*1b248f14SClaudio Fontana expdif = 1 - EXPBIAS - shift; 1767*1b248f14SClaudio Fontana float_raise(float_flag_input_denormal, &env->fp_status); 1768*1b248f14SClaudio Fontana } else { 1769*1b248f14SClaudio Fontana expdif = EXPD(temp) - EXPBIAS; 1770*1b248f14SClaudio Fontana } 1771*1b248f14SClaudio Fontana /* DP exponent bias */ 1772*1b248f14SClaudio Fontana ST0 = int32_to_floatx80(expdif, &env->fp_status); 1773*1b248f14SClaudio Fontana fpush(env); 1774*1b248f14SClaudio Fontana BIASEXPONENT(temp); 1775*1b248f14SClaudio Fontana ST0 = temp.d; 1776*1b248f14SClaudio Fontana } 1777*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 1778*1b248f14SClaudio Fontana } 1779*1b248f14SClaudio Fontana 1780*1b248f14SClaudio Fontana static void helper_fprem_common(CPUX86State *env, bool mod) 1781*1b248f14SClaudio Fontana { 1782*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 1783*1b248f14SClaudio Fontana uint64_t quotient; 1784*1b248f14SClaudio Fontana CPU_LDoubleU temp0, temp1; 1785*1b248f14SClaudio Fontana int exp0, exp1, expdiff; 1786*1b248f14SClaudio Fontana 1787*1b248f14SClaudio Fontana temp0.d = ST0; 1788*1b248f14SClaudio Fontana temp1.d = ST1; 1789*1b248f14SClaudio Fontana exp0 = EXPD(temp0); 1790*1b248f14SClaudio Fontana exp1 = EXPD(temp1); 1791*1b248f14SClaudio Fontana 1792*1b248f14SClaudio Fontana env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1793*1b248f14SClaudio Fontana if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1794*1b248f14SClaudio Fontana exp0 == 0x7fff || exp1 == 0x7fff || 1795*1b248f14SClaudio Fontana floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1796*1b248f14SClaudio Fontana ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1797*1b248f14SClaudio Fontana } else { 1798*1b248f14SClaudio Fontana if (exp0 == 0) { 1799*1b248f14SClaudio Fontana exp0 = 1 - clz64(temp0.l.lower); 1800*1b248f14SClaudio Fontana } 1801*1b248f14SClaudio Fontana if (exp1 == 0) { 1802*1b248f14SClaudio Fontana exp1 = 1 - clz64(temp1.l.lower); 1803*1b248f14SClaudio Fontana } 1804*1b248f14SClaudio Fontana expdiff = exp0 - exp1; 1805*1b248f14SClaudio Fontana if (expdiff < 64) { 1806*1b248f14SClaudio Fontana ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1807*1b248f14SClaudio Fontana env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1808*1b248f14SClaudio Fontana env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1809*1b248f14SClaudio Fontana env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1810*1b248f14SClaudio Fontana } else { 1811*1b248f14SClaudio Fontana /* 1812*1b248f14SClaudio Fontana * Partial remainder. This choice of how many bits to 1813*1b248f14SClaudio Fontana * process at once is specified in AMD instruction set 1814*1b248f14SClaudio Fontana * manuals, and empirically is followed by Intel 1815*1b248f14SClaudio Fontana * processors as well; it ensures that the final remainder 1816*1b248f14SClaudio Fontana * operation in a loop does produce the correct low three 1817*1b248f14SClaudio Fontana * bits of the quotient. AMD manuals specify that the 1818*1b248f14SClaudio Fontana * flags other than C2 are cleared, and empirically Intel 1819*1b248f14SClaudio Fontana * processors clear them as well. 1820*1b248f14SClaudio Fontana */ 1821*1b248f14SClaudio Fontana int n = 32 + (expdiff % 32); 1822*1b248f14SClaudio Fontana temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1823*1b248f14SClaudio Fontana ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1824*1b248f14SClaudio Fontana env->fpus |= 0x400; /* C2 <-- 1 */ 1825*1b248f14SClaudio Fontana } 1826*1b248f14SClaudio Fontana } 1827*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 1828*1b248f14SClaudio Fontana } 1829*1b248f14SClaudio Fontana 1830*1b248f14SClaudio Fontana void helper_fprem1(CPUX86State *env) 1831*1b248f14SClaudio Fontana { 1832*1b248f14SClaudio Fontana helper_fprem_common(env, false); 1833*1b248f14SClaudio Fontana } 1834*1b248f14SClaudio Fontana 1835*1b248f14SClaudio Fontana void helper_fprem(CPUX86State *env) 1836*1b248f14SClaudio Fontana { 1837*1b248f14SClaudio Fontana helper_fprem_common(env, true); 1838*1b248f14SClaudio Fontana } 1839*1b248f14SClaudio Fontana 1840*1b248f14SClaudio Fontana /* 128-bit significand of log2(e). */ 1841*1b248f14SClaudio Fontana #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1842*1b248f14SClaudio Fontana #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1843*1b248f14SClaudio Fontana 1844*1b248f14SClaudio Fontana /* 1845*1b248f14SClaudio Fontana * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1846*1b248f14SClaudio Fontana * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1847*1b248f14SClaudio Fontana * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1848*1b248f14SClaudio Fontana * interval [sqrt(2)/2, sqrt(2)]. 1849*1b248f14SClaudio Fontana */ 1850*1b248f14SClaudio Fontana #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1851*1b248f14SClaudio Fontana #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1852*1b248f14SClaudio Fontana #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1853*1b248f14SClaudio Fontana #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1854*1b248f14SClaudio Fontana #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1855*1b248f14SClaudio Fontana #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1856*1b248f14SClaudio Fontana #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1857*1b248f14SClaudio Fontana #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1858*1b248f14SClaudio Fontana #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1859*1b248f14SClaudio Fontana #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1860*1b248f14SClaudio Fontana #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1861*1b248f14SClaudio Fontana 1862*1b248f14SClaudio Fontana /* 1863*1b248f14SClaudio Fontana * Compute an approximation of log2(1+arg), where 1+arg is in the 1864*1b248f14SClaudio Fontana * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1865*1b248f14SClaudio Fontana * function is called, rounding precision is set to 80 and the 1866*1b248f14SClaudio Fontana * round-to-nearest mode is in effect. arg must not be exactly zero, 1867*1b248f14SClaudio Fontana * and must not be so close to zero that underflow might occur. 1868*1b248f14SClaudio Fontana */ 1869*1b248f14SClaudio Fontana static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1870*1b248f14SClaudio Fontana uint64_t *sig0, uint64_t *sig1) 1871*1b248f14SClaudio Fontana { 1872*1b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(arg); 1873*1b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(arg); 1874*1b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(arg); 1875*1b248f14SClaudio Fontana bool asign; 1876*1b248f14SClaudio Fontana int32_t dexp, texp, aexp; 1877*1b248f14SClaudio Fontana uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1878*1b248f14SClaudio Fontana uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1879*1b248f14SClaudio Fontana uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1880*1b248f14SClaudio Fontana floatx80 t2, accum; 1881*1b248f14SClaudio Fontana 1882*1b248f14SClaudio Fontana /* 1883*1b248f14SClaudio Fontana * Compute an approximation of arg/(2+arg), with extra precision, 1884*1b248f14SClaudio Fontana * as the argument to a polynomial approximation. The extra 1885*1b248f14SClaudio Fontana * precision is only needed for the first term of the 1886*1b248f14SClaudio Fontana * approximation, with subsequent terms being significantly 1887*1b248f14SClaudio Fontana * smaller; the approximation only uses odd exponents, and the 1888*1b248f14SClaudio Fontana * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1889*1b248f14SClaudio Fontana */ 1890*1b248f14SClaudio Fontana if (arg0_sign) { 1891*1b248f14SClaudio Fontana dexp = 0x3fff; 1892*1b248f14SClaudio Fontana shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1893*1b248f14SClaudio Fontana sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1894*1b248f14SClaudio Fontana } else { 1895*1b248f14SClaudio Fontana dexp = 0x4000; 1896*1b248f14SClaudio Fontana shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1897*1b248f14SClaudio Fontana dsig0 |= 0x8000000000000000ULL; 1898*1b248f14SClaudio Fontana } 1899*1b248f14SClaudio Fontana texp = arg0_exp - dexp + 0x3ffe; 1900*1b248f14SClaudio Fontana rsig0 = arg0_sig; 1901*1b248f14SClaudio Fontana rsig1 = 0; 1902*1b248f14SClaudio Fontana rsig2 = 0; 1903*1b248f14SClaudio Fontana if (dsig0 <= rsig0) { 1904*1b248f14SClaudio Fontana shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1905*1b248f14SClaudio Fontana ++texp; 1906*1b248f14SClaudio Fontana } 1907*1b248f14SClaudio Fontana tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1908*1b248f14SClaudio Fontana mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1909*1b248f14SClaudio Fontana sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1910*1b248f14SClaudio Fontana &rsig0, &rsig1, &rsig2); 1911*1b248f14SClaudio Fontana while ((int64_t) rsig0 < 0) { 1912*1b248f14SClaudio Fontana --tsig0; 1913*1b248f14SClaudio Fontana add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1914*1b248f14SClaudio Fontana &rsig0, &rsig1, &rsig2); 1915*1b248f14SClaudio Fontana } 1916*1b248f14SClaudio Fontana tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1917*1b248f14SClaudio Fontana /* 1918*1b248f14SClaudio Fontana * No need to correct any estimation error in tsig1; even with 1919*1b248f14SClaudio Fontana * such error, it is accurate enough. Now compute the square of 1920*1b248f14SClaudio Fontana * that approximation. 1921*1b248f14SClaudio Fontana */ 1922*1b248f14SClaudio Fontana mul128To256(tsig0, tsig1, tsig0, tsig1, 1923*1b248f14SClaudio Fontana &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1924*1b248f14SClaudio Fontana t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe, 1925*1b248f14SClaudio Fontana t2sig0, t2sig1, &env->fp_status); 1926*1b248f14SClaudio Fontana 1927*1b248f14SClaudio Fontana /* Compute the lower parts of the polynomial expansion. */ 1928*1b248f14SClaudio Fontana accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1929*1b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1930*1b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 1931*1b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1932*1b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 1933*1b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1934*1b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 1935*1b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1936*1b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 1937*1b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1938*1b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 1939*1b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1940*1b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 1941*1b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1942*1b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 1943*1b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1944*1b248f14SClaudio Fontana accum = floatx80_mul(accum, t2, &env->fp_status); 1945*1b248f14SClaudio Fontana accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1946*1b248f14SClaudio Fontana 1947*1b248f14SClaudio Fontana /* 1948*1b248f14SClaudio Fontana * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1949*1b248f14SClaudio Fontana * accum has much lower magnitude, and so, in particular, carry 1950*1b248f14SClaudio Fontana * out of the addition is not possible), multiplied by t. (This 1951*1b248f14SClaudio Fontana * expansion is only accurate to about 70 bits, not 128 bits.) 1952*1b248f14SClaudio Fontana */ 1953*1b248f14SClaudio Fontana aexp = extractFloatx80Exp(fyl2x_coeff_0); 1954*1b248f14SClaudio Fontana asign = extractFloatx80Sign(fyl2x_coeff_0); 1955*1b248f14SClaudio Fontana shift128RightJamming(extractFloatx80Frac(accum), 0, 1956*1b248f14SClaudio Fontana aexp - extractFloatx80Exp(accum), 1957*1b248f14SClaudio Fontana &asig0, &asig1); 1958*1b248f14SClaudio Fontana bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1959*1b248f14SClaudio Fontana bsig1 = 0; 1960*1b248f14SClaudio Fontana if (asign == extractFloatx80Sign(accum)) { 1961*1b248f14SClaudio Fontana add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1962*1b248f14SClaudio Fontana } else { 1963*1b248f14SClaudio Fontana sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1964*1b248f14SClaudio Fontana } 1965*1b248f14SClaudio Fontana /* Multiply by t to compute the required result. */ 1966*1b248f14SClaudio Fontana mul128To256(asig0, asig1, tsig0, tsig1, 1967*1b248f14SClaudio Fontana &asig0, &asig1, &asig2, &asig3); 1968*1b248f14SClaudio Fontana aexp += texp - 0x3ffe; 1969*1b248f14SClaudio Fontana *exp = aexp; 1970*1b248f14SClaudio Fontana *sig0 = asig0; 1971*1b248f14SClaudio Fontana *sig1 = asig1; 1972*1b248f14SClaudio Fontana } 1973*1b248f14SClaudio Fontana 1974*1b248f14SClaudio Fontana void helper_fyl2xp1(CPUX86State *env) 1975*1b248f14SClaudio Fontana { 1976*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 1977*1b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(ST0); 1978*1b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(ST0); 1979*1b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(ST0); 1980*1b248f14SClaudio Fontana uint64_t arg1_sig = extractFloatx80Frac(ST1); 1981*1b248f14SClaudio Fontana int32_t arg1_exp = extractFloatx80Exp(ST1); 1982*1b248f14SClaudio Fontana bool arg1_sign = extractFloatx80Sign(ST1); 1983*1b248f14SClaudio Fontana 1984*1b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1985*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 1986*1b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1987*1b248f14SClaudio Fontana } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1988*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 1989*1b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1990*1b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0) || 1991*1b248f14SClaudio Fontana floatx80_invalid_encoding(ST1)) { 1992*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 1993*1b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 1994*1b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) { 1995*1b248f14SClaudio Fontana ST1 = ST0; 1996*1b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) { 1997*1b248f14SClaudio Fontana /* Pass this NaN through. */ 1998*1b248f14SClaudio Fontana } else if (arg0_exp > 0x3ffd || 1999*1b248f14SClaudio Fontana (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2000*1b248f14SClaudio Fontana 0x95f619980c4336f7ULL : 2001*1b248f14SClaudio Fontana 0xd413cccfe7799211ULL))) { 2002*1b248f14SClaudio Fontana /* 2003*1b248f14SClaudio Fontana * Out of range for the instruction (ST0 must have absolute 2004*1b248f14SClaudio Fontana * value less than 1 - sqrt(2)/2 = 0.292..., according to 2005*1b248f14SClaudio Fontana * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2006*1b248f14SClaudio Fontana * to sqrt(2) - 1, which we allow here), treat as invalid. 2007*1b248f14SClaudio Fontana */ 2008*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2009*1b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 2010*1b248f14SClaudio Fontana } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2011*1b248f14SClaudio Fontana arg1_exp == 0x7fff) { 2012*1b248f14SClaudio Fontana /* 2013*1b248f14SClaudio Fontana * One argument is zero, or multiplying by infinity; correct 2014*1b248f14SClaudio Fontana * result is exact and can be obtained by multiplying the 2015*1b248f14SClaudio Fontana * arguments. 2016*1b248f14SClaudio Fontana */ 2017*1b248f14SClaudio Fontana ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2018*1b248f14SClaudio Fontana } else if (arg0_exp < 0x3fb0) { 2019*1b248f14SClaudio Fontana /* 2020*1b248f14SClaudio Fontana * Multiplying both arguments and an extra-precision version 2021*1b248f14SClaudio Fontana * of log2(e) is sufficiently precise. 2022*1b248f14SClaudio Fontana */ 2023*1b248f14SClaudio Fontana uint64_t sig0, sig1, sig2; 2024*1b248f14SClaudio Fontana int32_t exp; 2025*1b248f14SClaudio Fontana if (arg0_exp == 0) { 2026*1b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2027*1b248f14SClaudio Fontana } 2028*1b248f14SClaudio Fontana if (arg1_exp == 0) { 2029*1b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2030*1b248f14SClaudio Fontana } 2031*1b248f14SClaudio Fontana mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2032*1b248f14SClaudio Fontana &sig0, &sig1, &sig2); 2033*1b248f14SClaudio Fontana exp = arg0_exp + 1; 2034*1b248f14SClaudio Fontana mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2035*1b248f14SClaudio Fontana exp += arg1_exp - 0x3ffe; 2036*1b248f14SClaudio Fontana /* This result is inexact. */ 2037*1b248f14SClaudio Fontana sig1 |= 1; 2038*1b248f14SClaudio Fontana ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp, 2039*1b248f14SClaudio Fontana sig0, sig1, &env->fp_status); 2040*1b248f14SClaudio Fontana } else { 2041*1b248f14SClaudio Fontana int32_t aexp; 2042*1b248f14SClaudio Fontana uint64_t asig0, asig1, asig2; 2043*1b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2044*1b248f14SClaudio Fontana signed char save_prec = env->fp_status.floatx80_rounding_precision; 2045*1b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even; 2046*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = 80; 2047*1b248f14SClaudio Fontana 2048*1b248f14SClaudio Fontana helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2049*1b248f14SClaudio Fontana /* 2050*1b248f14SClaudio Fontana * Multiply by the second argument to compute the required 2051*1b248f14SClaudio Fontana * result. 2052*1b248f14SClaudio Fontana */ 2053*1b248f14SClaudio Fontana if (arg1_exp == 0) { 2054*1b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2055*1b248f14SClaudio Fontana } 2056*1b248f14SClaudio Fontana mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2057*1b248f14SClaudio Fontana aexp += arg1_exp - 0x3ffe; 2058*1b248f14SClaudio Fontana /* This result is inexact. */ 2059*1b248f14SClaudio Fontana asig1 |= 1; 2060*1b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 2061*1b248f14SClaudio Fontana ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp, 2062*1b248f14SClaudio Fontana asig0, asig1, &env->fp_status); 2063*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec; 2064*1b248f14SClaudio Fontana } 2065*1b248f14SClaudio Fontana fpop(env); 2066*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 2067*1b248f14SClaudio Fontana } 2068*1b248f14SClaudio Fontana 2069*1b248f14SClaudio Fontana void helper_fyl2x(CPUX86State *env) 2070*1b248f14SClaudio Fontana { 2071*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 2072*1b248f14SClaudio Fontana uint64_t arg0_sig = extractFloatx80Frac(ST0); 2073*1b248f14SClaudio Fontana int32_t arg0_exp = extractFloatx80Exp(ST0); 2074*1b248f14SClaudio Fontana bool arg0_sign = extractFloatx80Sign(ST0); 2075*1b248f14SClaudio Fontana uint64_t arg1_sig = extractFloatx80Frac(ST1); 2076*1b248f14SClaudio Fontana int32_t arg1_exp = extractFloatx80Exp(ST1); 2077*1b248f14SClaudio Fontana bool arg1_sign = extractFloatx80Sign(ST1); 2078*1b248f14SClaudio Fontana 2079*1b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2080*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2081*1b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2082*1b248f14SClaudio Fontana } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2083*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2084*1b248f14SClaudio Fontana ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2085*1b248f14SClaudio Fontana } else if (floatx80_invalid_encoding(ST0) || 2086*1b248f14SClaudio Fontana floatx80_invalid_encoding(ST1)) { 2087*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2088*1b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 2089*1b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST0)) { 2090*1b248f14SClaudio Fontana ST1 = ST0; 2091*1b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) { 2092*1b248f14SClaudio Fontana /* Pass this NaN through. */ 2093*1b248f14SClaudio Fontana } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2094*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2095*1b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 2096*1b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST1)) { 2097*1b248f14SClaudio Fontana FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2098*1b248f14SClaudio Fontana &env->fp_status); 2099*1b248f14SClaudio Fontana switch (cmp) { 2100*1b248f14SClaudio Fontana case float_relation_less: 2101*1b248f14SClaudio Fontana ST1 = floatx80_chs(ST1); 2102*1b248f14SClaudio Fontana break; 2103*1b248f14SClaudio Fontana case float_relation_greater: 2104*1b248f14SClaudio Fontana /* Result is infinity of the same sign as ST1. */ 2105*1b248f14SClaudio Fontana break; 2106*1b248f14SClaudio Fontana default: 2107*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2108*1b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 2109*1b248f14SClaudio Fontana break; 2110*1b248f14SClaudio Fontana } 2111*1b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST0)) { 2112*1b248f14SClaudio Fontana if (floatx80_is_zero(ST1)) { 2113*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2114*1b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 2115*1b248f14SClaudio Fontana } else if (arg1_sign) { 2116*1b248f14SClaudio Fontana ST1 = floatx80_chs(ST0); 2117*1b248f14SClaudio Fontana } else { 2118*1b248f14SClaudio Fontana ST1 = ST0; 2119*1b248f14SClaudio Fontana } 2120*1b248f14SClaudio Fontana } else if (floatx80_is_zero(ST0)) { 2121*1b248f14SClaudio Fontana if (floatx80_is_zero(ST1)) { 2122*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2123*1b248f14SClaudio Fontana ST1 = floatx80_default_nan(&env->fp_status); 2124*1b248f14SClaudio Fontana } else { 2125*1b248f14SClaudio Fontana /* Result is infinity with opposite sign to ST1. */ 2126*1b248f14SClaudio Fontana float_raise(float_flag_divbyzero, &env->fp_status); 2127*1b248f14SClaudio Fontana ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2128*1b248f14SClaudio Fontana 0x8000000000000000ULL); 2129*1b248f14SClaudio Fontana } 2130*1b248f14SClaudio Fontana } else if (floatx80_is_zero(ST1)) { 2131*1b248f14SClaudio Fontana if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2132*1b248f14SClaudio Fontana ST1 = floatx80_chs(ST1); 2133*1b248f14SClaudio Fontana } 2134*1b248f14SClaudio Fontana /* Otherwise, ST1 is already the correct result. */ 2135*1b248f14SClaudio Fontana } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2136*1b248f14SClaudio Fontana if (arg1_sign) { 2137*1b248f14SClaudio Fontana ST1 = floatx80_chs(floatx80_zero); 2138*1b248f14SClaudio Fontana } else { 2139*1b248f14SClaudio Fontana ST1 = floatx80_zero; 2140*1b248f14SClaudio Fontana } 2141*1b248f14SClaudio Fontana } else { 2142*1b248f14SClaudio Fontana int32_t int_exp; 2143*1b248f14SClaudio Fontana floatx80 arg0_m1; 2144*1b248f14SClaudio Fontana FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2145*1b248f14SClaudio Fontana signed char save_prec = env->fp_status.floatx80_rounding_precision; 2146*1b248f14SClaudio Fontana env->fp_status.float_rounding_mode = float_round_nearest_even; 2147*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = 80; 2148*1b248f14SClaudio Fontana 2149*1b248f14SClaudio Fontana if (arg0_exp == 0) { 2150*1b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2151*1b248f14SClaudio Fontana } 2152*1b248f14SClaudio Fontana if (arg1_exp == 0) { 2153*1b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2154*1b248f14SClaudio Fontana } 2155*1b248f14SClaudio Fontana int_exp = arg0_exp - 0x3fff; 2156*1b248f14SClaudio Fontana if (arg0_sig > 0xb504f333f9de6484ULL) { 2157*1b248f14SClaudio Fontana ++int_exp; 2158*1b248f14SClaudio Fontana } 2159*1b248f14SClaudio Fontana arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2160*1b248f14SClaudio Fontana &env->fp_status), 2161*1b248f14SClaudio Fontana floatx80_one, &env->fp_status); 2162*1b248f14SClaudio Fontana if (floatx80_is_zero(arg0_m1)) { 2163*1b248f14SClaudio Fontana /* Exact power of 2; multiply by ST1. */ 2164*1b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 2165*1b248f14SClaudio Fontana ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2166*1b248f14SClaudio Fontana ST1, &env->fp_status); 2167*1b248f14SClaudio Fontana } else { 2168*1b248f14SClaudio Fontana bool asign = extractFloatx80Sign(arg0_m1); 2169*1b248f14SClaudio Fontana int32_t aexp; 2170*1b248f14SClaudio Fontana uint64_t asig0, asig1, asig2; 2171*1b248f14SClaudio Fontana helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2172*1b248f14SClaudio Fontana if (int_exp != 0) { 2173*1b248f14SClaudio Fontana bool isign = (int_exp < 0); 2174*1b248f14SClaudio Fontana int32_t iexp; 2175*1b248f14SClaudio Fontana uint64_t isig; 2176*1b248f14SClaudio Fontana int shift; 2177*1b248f14SClaudio Fontana int_exp = isign ? -int_exp : int_exp; 2178*1b248f14SClaudio Fontana shift = clz32(int_exp) + 32; 2179*1b248f14SClaudio Fontana isig = int_exp; 2180*1b248f14SClaudio Fontana isig <<= shift; 2181*1b248f14SClaudio Fontana iexp = 0x403e - shift; 2182*1b248f14SClaudio Fontana shift128RightJamming(asig0, asig1, iexp - aexp, 2183*1b248f14SClaudio Fontana &asig0, &asig1); 2184*1b248f14SClaudio Fontana if (asign == isign) { 2185*1b248f14SClaudio Fontana add128(isig, 0, asig0, asig1, &asig0, &asig1); 2186*1b248f14SClaudio Fontana } else { 2187*1b248f14SClaudio Fontana sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2188*1b248f14SClaudio Fontana } 2189*1b248f14SClaudio Fontana aexp = iexp; 2190*1b248f14SClaudio Fontana asign = isign; 2191*1b248f14SClaudio Fontana } 2192*1b248f14SClaudio Fontana /* 2193*1b248f14SClaudio Fontana * Multiply by the second argument to compute the required 2194*1b248f14SClaudio Fontana * result. 2195*1b248f14SClaudio Fontana */ 2196*1b248f14SClaudio Fontana if (arg1_exp == 0) { 2197*1b248f14SClaudio Fontana normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2198*1b248f14SClaudio Fontana } 2199*1b248f14SClaudio Fontana mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2200*1b248f14SClaudio Fontana aexp += arg1_exp - 0x3ffe; 2201*1b248f14SClaudio Fontana /* This result is inexact. */ 2202*1b248f14SClaudio Fontana asig1 |= 1; 2203*1b248f14SClaudio Fontana env->fp_status.float_rounding_mode = save_mode; 2204*1b248f14SClaudio Fontana ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp, 2205*1b248f14SClaudio Fontana asig0, asig1, &env->fp_status); 2206*1b248f14SClaudio Fontana } 2207*1b248f14SClaudio Fontana 2208*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save_prec; 2209*1b248f14SClaudio Fontana } 2210*1b248f14SClaudio Fontana fpop(env); 2211*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 2212*1b248f14SClaudio Fontana } 2213*1b248f14SClaudio Fontana 2214*1b248f14SClaudio Fontana void helper_fsqrt(CPUX86State *env) 2215*1b248f14SClaudio Fontana { 2216*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 2217*1b248f14SClaudio Fontana if (floatx80_is_neg(ST0)) { 2218*1b248f14SClaudio Fontana env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2219*1b248f14SClaudio Fontana env->fpus |= 0x400; 2220*1b248f14SClaudio Fontana } 2221*1b248f14SClaudio Fontana ST0 = floatx80_sqrt(ST0, &env->fp_status); 2222*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 2223*1b248f14SClaudio Fontana } 2224*1b248f14SClaudio Fontana 2225*1b248f14SClaudio Fontana void helper_fsincos(CPUX86State *env) 2226*1b248f14SClaudio Fontana { 2227*1b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0); 2228*1b248f14SClaudio Fontana 2229*1b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2230*1b248f14SClaudio Fontana env->fpus |= 0x400; 2231*1b248f14SClaudio Fontana } else { 2232*1b248f14SClaudio Fontana ST0 = double_to_floatx80(env, sin(fptemp)); 2233*1b248f14SClaudio Fontana fpush(env); 2234*1b248f14SClaudio Fontana ST0 = double_to_floatx80(env, cos(fptemp)); 2235*1b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */ 2236*1b248f14SClaudio Fontana /* the above code is for |arg| < 2**63 only */ 2237*1b248f14SClaudio Fontana } 2238*1b248f14SClaudio Fontana } 2239*1b248f14SClaudio Fontana 2240*1b248f14SClaudio Fontana void helper_frndint(CPUX86State *env) 2241*1b248f14SClaudio Fontana { 2242*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 2243*1b248f14SClaudio Fontana ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2244*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 2245*1b248f14SClaudio Fontana } 2246*1b248f14SClaudio Fontana 2247*1b248f14SClaudio Fontana void helper_fscale(CPUX86State *env) 2248*1b248f14SClaudio Fontana { 2249*1b248f14SClaudio Fontana uint8_t old_flags = save_exception_flags(env); 2250*1b248f14SClaudio Fontana if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2251*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2252*1b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 2253*1b248f14SClaudio Fontana } else if (floatx80_is_any_nan(ST1)) { 2254*1b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2255*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2256*1b248f14SClaudio Fontana } 2257*1b248f14SClaudio Fontana ST0 = ST1; 2258*1b248f14SClaudio Fontana if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2259*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2260*1b248f14SClaudio Fontana ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2261*1b248f14SClaudio Fontana } 2262*1b248f14SClaudio Fontana } else if (floatx80_is_infinity(ST1) && 2263*1b248f14SClaudio Fontana !floatx80_invalid_encoding(ST0) && 2264*1b248f14SClaudio Fontana !floatx80_is_any_nan(ST0)) { 2265*1b248f14SClaudio Fontana if (floatx80_is_neg(ST1)) { 2266*1b248f14SClaudio Fontana if (floatx80_is_infinity(ST0)) { 2267*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2268*1b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 2269*1b248f14SClaudio Fontana } else { 2270*1b248f14SClaudio Fontana ST0 = (floatx80_is_neg(ST0) ? 2271*1b248f14SClaudio Fontana floatx80_chs(floatx80_zero) : 2272*1b248f14SClaudio Fontana floatx80_zero); 2273*1b248f14SClaudio Fontana } 2274*1b248f14SClaudio Fontana } else { 2275*1b248f14SClaudio Fontana if (floatx80_is_zero(ST0)) { 2276*1b248f14SClaudio Fontana float_raise(float_flag_invalid, &env->fp_status); 2277*1b248f14SClaudio Fontana ST0 = floatx80_default_nan(&env->fp_status); 2278*1b248f14SClaudio Fontana } else { 2279*1b248f14SClaudio Fontana ST0 = (floatx80_is_neg(ST0) ? 2280*1b248f14SClaudio Fontana floatx80_chs(floatx80_infinity) : 2281*1b248f14SClaudio Fontana floatx80_infinity); 2282*1b248f14SClaudio Fontana } 2283*1b248f14SClaudio Fontana } 2284*1b248f14SClaudio Fontana } else { 2285*1b248f14SClaudio Fontana int n; 2286*1b248f14SClaudio Fontana signed char save = env->fp_status.floatx80_rounding_precision; 2287*1b248f14SClaudio Fontana uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2288*1b248f14SClaudio Fontana set_float_exception_flags(0, &env->fp_status); 2289*1b248f14SClaudio Fontana n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2290*1b248f14SClaudio Fontana set_float_exception_flags(save_flags, &env->fp_status); 2291*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = 80; 2292*1b248f14SClaudio Fontana ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2293*1b248f14SClaudio Fontana env->fp_status.floatx80_rounding_precision = save; 2294*1b248f14SClaudio Fontana } 2295*1b248f14SClaudio Fontana merge_exception_flags(env, old_flags); 2296*1b248f14SClaudio Fontana } 2297*1b248f14SClaudio Fontana 2298*1b248f14SClaudio Fontana void helper_fsin(CPUX86State *env) 2299*1b248f14SClaudio Fontana { 2300*1b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0); 2301*1b248f14SClaudio Fontana 2302*1b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2303*1b248f14SClaudio Fontana env->fpus |= 0x400; 2304*1b248f14SClaudio Fontana } else { 2305*1b248f14SClaudio Fontana ST0 = double_to_floatx80(env, sin(fptemp)); 2306*1b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */ 2307*1b248f14SClaudio Fontana /* the above code is for |arg| < 2**53 only */ 2308*1b248f14SClaudio Fontana } 2309*1b248f14SClaudio Fontana } 2310*1b248f14SClaudio Fontana 2311*1b248f14SClaudio Fontana void helper_fcos(CPUX86State *env) 2312*1b248f14SClaudio Fontana { 2313*1b248f14SClaudio Fontana double fptemp = floatx80_to_double(env, ST0); 2314*1b248f14SClaudio Fontana 2315*1b248f14SClaudio Fontana if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2316*1b248f14SClaudio Fontana env->fpus |= 0x400; 2317*1b248f14SClaudio Fontana } else { 2318*1b248f14SClaudio Fontana ST0 = double_to_floatx80(env, cos(fptemp)); 2319*1b248f14SClaudio Fontana env->fpus &= ~0x400; /* C2 <-- 0 */ 2320*1b248f14SClaudio Fontana /* the above code is for |arg| < 2**63 only */ 2321*1b248f14SClaudio Fontana } 2322*1b248f14SClaudio Fontana } 2323*1b248f14SClaudio Fontana 2324*1b248f14SClaudio Fontana void helper_fxam_ST0(CPUX86State *env) 2325*1b248f14SClaudio Fontana { 2326*1b248f14SClaudio Fontana CPU_LDoubleU temp; 2327*1b248f14SClaudio Fontana int expdif; 2328*1b248f14SClaudio Fontana 2329*1b248f14SClaudio Fontana temp.d = ST0; 2330*1b248f14SClaudio Fontana 2331*1b248f14SClaudio Fontana env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2332*1b248f14SClaudio Fontana if (SIGND(temp)) { 2333*1b248f14SClaudio Fontana env->fpus |= 0x200; /* C1 <-- 1 */ 2334*1b248f14SClaudio Fontana } 2335*1b248f14SClaudio Fontana 2336*1b248f14SClaudio Fontana if (env->fptags[env->fpstt]) { 2337*1b248f14SClaudio Fontana env->fpus |= 0x4100; /* Empty */ 2338*1b248f14SClaudio Fontana return; 2339*1b248f14SClaudio Fontana } 2340*1b248f14SClaudio Fontana 2341*1b248f14SClaudio Fontana expdif = EXPD(temp); 2342*1b248f14SClaudio Fontana if (expdif == MAXEXPD) { 2343*1b248f14SClaudio Fontana if (MANTD(temp) == 0x8000000000000000ULL) { 2344*1b248f14SClaudio Fontana env->fpus |= 0x500; /* Infinity */ 2345*1b248f14SClaudio Fontana } else if (MANTD(temp) & 0x8000000000000000ULL) { 2346*1b248f14SClaudio Fontana env->fpus |= 0x100; /* NaN */ 2347*1b248f14SClaudio Fontana } 2348*1b248f14SClaudio Fontana } else if (expdif == 0) { 2349*1b248f14SClaudio Fontana if (MANTD(temp) == 0) { 2350*1b248f14SClaudio Fontana env->fpus |= 0x4000; /* Zero */ 2351*1b248f14SClaudio Fontana } else { 2352*1b248f14SClaudio Fontana env->fpus |= 0x4400; /* Denormal */ 2353*1b248f14SClaudio Fontana } 2354*1b248f14SClaudio Fontana } else if (MANTD(temp) & 0x8000000000000000ULL) { 2355*1b248f14SClaudio Fontana env->fpus |= 0x400; 2356*1b248f14SClaudio Fontana } 2357*1b248f14SClaudio Fontana } 2358*1b248f14SClaudio Fontana 2359*1b248f14SClaudio Fontana static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 2360*1b248f14SClaudio Fontana uintptr_t retaddr) 2361*1b248f14SClaudio Fontana { 2362*1b248f14SClaudio Fontana int fpus, fptag, exp, i; 2363*1b248f14SClaudio Fontana uint64_t mant; 2364*1b248f14SClaudio Fontana CPU_LDoubleU tmp; 2365*1b248f14SClaudio Fontana 2366*1b248f14SClaudio Fontana fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2367*1b248f14SClaudio Fontana fptag = 0; 2368*1b248f14SClaudio Fontana for (i = 7; i >= 0; i--) { 2369*1b248f14SClaudio Fontana fptag <<= 2; 2370*1b248f14SClaudio Fontana if (env->fptags[i]) { 2371*1b248f14SClaudio Fontana fptag |= 3; 2372*1b248f14SClaudio Fontana } else { 2373*1b248f14SClaudio Fontana tmp.d = env->fpregs[i].d; 2374*1b248f14SClaudio Fontana exp = EXPD(tmp); 2375*1b248f14SClaudio Fontana mant = MANTD(tmp); 2376*1b248f14SClaudio Fontana if (exp == 0 && mant == 0) { 2377*1b248f14SClaudio Fontana /* zero */ 2378*1b248f14SClaudio Fontana fptag |= 1; 2379*1b248f14SClaudio Fontana } else if (exp == 0 || exp == MAXEXPD 2380*1b248f14SClaudio Fontana || (mant & (1LL << 63)) == 0) { 2381*1b248f14SClaudio Fontana /* NaNs, infinity, denormal */ 2382*1b248f14SClaudio Fontana fptag |= 2; 2383*1b248f14SClaudio Fontana } 2384*1b248f14SClaudio Fontana } 2385*1b248f14SClaudio Fontana } 2386*1b248f14SClaudio Fontana if (data32) { 2387*1b248f14SClaudio Fontana /* 32 bit */ 2388*1b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 2389*1b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 2390*1b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 2391*1b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */ 2392*1b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */ 2393*1b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */ 2394*1b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */ 2395*1b248f14SClaudio Fontana } else { 2396*1b248f14SClaudio Fontana /* 16 bit */ 2397*1b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 2398*1b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 2399*1b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 2400*1b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + 6, 0, retaddr); 2401*1b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + 8, 0, retaddr); 2402*1b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + 10, 0, retaddr); 2403*1b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + 12, 0, retaddr); 2404*1b248f14SClaudio Fontana } 2405*1b248f14SClaudio Fontana } 2406*1b248f14SClaudio Fontana 2407*1b248f14SClaudio Fontana void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2408*1b248f14SClaudio Fontana { 2409*1b248f14SClaudio Fontana do_fstenv(env, ptr, data32, GETPC()); 2410*1b248f14SClaudio Fontana } 2411*1b248f14SClaudio Fontana 2412*1b248f14SClaudio Fontana static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2413*1b248f14SClaudio Fontana { 2414*1b248f14SClaudio Fontana env->fpstt = (fpus >> 11) & 7; 2415*1b248f14SClaudio Fontana env->fpus = fpus & ~0x3800 & ~FPUS_B; 2416*1b248f14SClaudio Fontana env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2417*1b248f14SClaudio Fontana #if !defined(CONFIG_USER_ONLY) 2418*1b248f14SClaudio Fontana if (!(env->fpus & FPUS_SE)) { 2419*1b248f14SClaudio Fontana /* 2420*1b248f14SClaudio Fontana * Here the processor deasserts FERR#; in response, the chipset deasserts 2421*1b248f14SClaudio Fontana * IGNNE#. 2422*1b248f14SClaudio Fontana */ 2423*1b248f14SClaudio Fontana cpu_clear_ignne(); 2424*1b248f14SClaudio Fontana } 2425*1b248f14SClaudio Fontana #endif 2426*1b248f14SClaudio Fontana } 2427*1b248f14SClaudio Fontana 2428*1b248f14SClaudio Fontana static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 2429*1b248f14SClaudio Fontana uintptr_t retaddr) 2430*1b248f14SClaudio Fontana { 2431*1b248f14SClaudio Fontana int i, fpus, fptag; 2432*1b248f14SClaudio Fontana 2433*1b248f14SClaudio Fontana if (data32) { 2434*1b248f14SClaudio Fontana cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2435*1b248f14SClaudio Fontana fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2436*1b248f14SClaudio Fontana fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 2437*1b248f14SClaudio Fontana } else { 2438*1b248f14SClaudio Fontana cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2439*1b248f14SClaudio Fontana fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 2440*1b248f14SClaudio Fontana fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2441*1b248f14SClaudio Fontana } 2442*1b248f14SClaudio Fontana cpu_set_fpus(env, fpus); 2443*1b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 2444*1b248f14SClaudio Fontana env->fptags[i] = ((fptag & 3) == 3); 2445*1b248f14SClaudio Fontana fptag >>= 2; 2446*1b248f14SClaudio Fontana } 2447*1b248f14SClaudio Fontana } 2448*1b248f14SClaudio Fontana 2449*1b248f14SClaudio Fontana void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2450*1b248f14SClaudio Fontana { 2451*1b248f14SClaudio Fontana do_fldenv(env, ptr, data32, GETPC()); 2452*1b248f14SClaudio Fontana } 2453*1b248f14SClaudio Fontana 2454*1b248f14SClaudio Fontana void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2455*1b248f14SClaudio Fontana { 2456*1b248f14SClaudio Fontana floatx80 tmp; 2457*1b248f14SClaudio Fontana int i; 2458*1b248f14SClaudio Fontana 2459*1b248f14SClaudio Fontana do_fstenv(env, ptr, data32, GETPC()); 2460*1b248f14SClaudio Fontana 2461*1b248f14SClaudio Fontana ptr += (14 << data32); 2462*1b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 2463*1b248f14SClaudio Fontana tmp = ST(i); 2464*1b248f14SClaudio Fontana helper_fstt(env, tmp, ptr, GETPC()); 2465*1b248f14SClaudio Fontana ptr += 10; 2466*1b248f14SClaudio Fontana } 2467*1b248f14SClaudio Fontana 2468*1b248f14SClaudio Fontana /* fninit */ 2469*1b248f14SClaudio Fontana env->fpus = 0; 2470*1b248f14SClaudio Fontana env->fpstt = 0; 2471*1b248f14SClaudio Fontana cpu_set_fpuc(env, 0x37f); 2472*1b248f14SClaudio Fontana env->fptags[0] = 1; 2473*1b248f14SClaudio Fontana env->fptags[1] = 1; 2474*1b248f14SClaudio Fontana env->fptags[2] = 1; 2475*1b248f14SClaudio Fontana env->fptags[3] = 1; 2476*1b248f14SClaudio Fontana env->fptags[4] = 1; 2477*1b248f14SClaudio Fontana env->fptags[5] = 1; 2478*1b248f14SClaudio Fontana env->fptags[6] = 1; 2479*1b248f14SClaudio Fontana env->fptags[7] = 1; 2480*1b248f14SClaudio Fontana } 2481*1b248f14SClaudio Fontana 2482*1b248f14SClaudio Fontana void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2483*1b248f14SClaudio Fontana { 2484*1b248f14SClaudio Fontana floatx80 tmp; 2485*1b248f14SClaudio Fontana int i; 2486*1b248f14SClaudio Fontana 2487*1b248f14SClaudio Fontana do_fldenv(env, ptr, data32, GETPC()); 2488*1b248f14SClaudio Fontana ptr += (14 << data32); 2489*1b248f14SClaudio Fontana 2490*1b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 2491*1b248f14SClaudio Fontana tmp = helper_fldt(env, ptr, GETPC()); 2492*1b248f14SClaudio Fontana ST(i) = tmp; 2493*1b248f14SClaudio Fontana ptr += 10; 2494*1b248f14SClaudio Fontana } 2495*1b248f14SClaudio Fontana } 2496*1b248f14SClaudio Fontana 2497*1b248f14SClaudio Fontana #if defined(CONFIG_USER_ONLY) 2498*1b248f14SClaudio Fontana void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 2499*1b248f14SClaudio Fontana { 2500*1b248f14SClaudio Fontana helper_fsave(env, ptr, data32); 2501*1b248f14SClaudio Fontana } 2502*1b248f14SClaudio Fontana 2503*1b248f14SClaudio Fontana void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 2504*1b248f14SClaudio Fontana { 2505*1b248f14SClaudio Fontana helper_frstor(env, ptr, data32); 2506*1b248f14SClaudio Fontana } 2507*1b248f14SClaudio Fontana #endif 2508*1b248f14SClaudio Fontana 2509*1b248f14SClaudio Fontana #define XO(X) offsetof(X86XSaveArea, X) 2510*1b248f14SClaudio Fontana 2511*1b248f14SClaudio Fontana static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2512*1b248f14SClaudio Fontana { 2513*1b248f14SClaudio Fontana int fpus, fptag, i; 2514*1b248f14SClaudio Fontana target_ulong addr; 2515*1b248f14SClaudio Fontana 2516*1b248f14SClaudio Fontana fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2517*1b248f14SClaudio Fontana fptag = 0; 2518*1b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 2519*1b248f14SClaudio Fontana fptag |= (env->fptags[i] << i); 2520*1b248f14SClaudio Fontana } 2521*1b248f14SClaudio Fontana 2522*1b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 2523*1b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 2524*1b248f14SClaudio Fontana cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 2525*1b248f14SClaudio Fontana 2526*1b248f14SClaudio Fontana /* In 32-bit mode this is eip, sel, dp, sel. 2527*1b248f14SClaudio Fontana In 64-bit mode this is rip, rdp. 2528*1b248f14SClaudio Fontana But in either case we don't write actual data, just zeros. */ 2529*1b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 2530*1b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 2531*1b248f14SClaudio Fontana 2532*1b248f14SClaudio Fontana addr = ptr + XO(legacy.fpregs); 2533*1b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 2534*1b248f14SClaudio Fontana floatx80 tmp = ST(i); 2535*1b248f14SClaudio Fontana helper_fstt(env, tmp, addr, ra); 2536*1b248f14SClaudio Fontana addr += 16; 2537*1b248f14SClaudio Fontana } 2538*1b248f14SClaudio Fontana } 2539*1b248f14SClaudio Fontana 2540*1b248f14SClaudio Fontana static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2541*1b248f14SClaudio Fontana { 2542*1b248f14SClaudio Fontana update_mxcsr_from_sse_status(env); 2543*1b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 2544*1b248f14SClaudio Fontana cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 2545*1b248f14SClaudio Fontana } 2546*1b248f14SClaudio Fontana 2547*1b248f14SClaudio Fontana static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2548*1b248f14SClaudio Fontana { 2549*1b248f14SClaudio Fontana int i, nb_xmm_regs; 2550*1b248f14SClaudio Fontana target_ulong addr; 2551*1b248f14SClaudio Fontana 2552*1b248f14SClaudio Fontana if (env->hflags & HF_CS64_MASK) { 2553*1b248f14SClaudio Fontana nb_xmm_regs = 16; 2554*1b248f14SClaudio Fontana } else { 2555*1b248f14SClaudio Fontana nb_xmm_regs = 8; 2556*1b248f14SClaudio Fontana } 2557*1b248f14SClaudio Fontana 2558*1b248f14SClaudio Fontana addr = ptr + XO(legacy.xmm_regs); 2559*1b248f14SClaudio Fontana for (i = 0; i < nb_xmm_regs; i++) { 2560*1b248f14SClaudio Fontana cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 2561*1b248f14SClaudio Fontana cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 2562*1b248f14SClaudio Fontana addr += 16; 2563*1b248f14SClaudio Fontana } 2564*1b248f14SClaudio Fontana } 2565*1b248f14SClaudio Fontana 2566*1b248f14SClaudio Fontana static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2567*1b248f14SClaudio Fontana { 2568*1b248f14SClaudio Fontana target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2569*1b248f14SClaudio Fontana int i; 2570*1b248f14SClaudio Fontana 2571*1b248f14SClaudio Fontana for (i = 0; i < 4; i++, addr += 16) { 2572*1b248f14SClaudio Fontana cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 2573*1b248f14SClaudio Fontana cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 2574*1b248f14SClaudio Fontana } 2575*1b248f14SClaudio Fontana } 2576*1b248f14SClaudio Fontana 2577*1b248f14SClaudio Fontana static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2578*1b248f14SClaudio Fontana { 2579*1b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2580*1b248f14SClaudio Fontana env->bndcs_regs.cfgu, ra); 2581*1b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2582*1b248f14SClaudio Fontana env->bndcs_regs.sts, ra); 2583*1b248f14SClaudio Fontana } 2584*1b248f14SClaudio Fontana 2585*1b248f14SClaudio Fontana static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2586*1b248f14SClaudio Fontana { 2587*1b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr, env->pkru, ra); 2588*1b248f14SClaudio Fontana } 2589*1b248f14SClaudio Fontana 2590*1b248f14SClaudio Fontana void helper_fxsave(CPUX86State *env, target_ulong ptr) 2591*1b248f14SClaudio Fontana { 2592*1b248f14SClaudio Fontana uintptr_t ra = GETPC(); 2593*1b248f14SClaudio Fontana 2594*1b248f14SClaudio Fontana /* The operand must be 16 byte aligned */ 2595*1b248f14SClaudio Fontana if (ptr & 0xf) { 2596*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 2597*1b248f14SClaudio Fontana } 2598*1b248f14SClaudio Fontana 2599*1b248f14SClaudio Fontana do_xsave_fpu(env, ptr, ra); 2600*1b248f14SClaudio Fontana 2601*1b248f14SClaudio Fontana if (env->cr[4] & CR4_OSFXSR_MASK) { 2602*1b248f14SClaudio Fontana do_xsave_mxcsr(env, ptr, ra); 2603*1b248f14SClaudio Fontana /* Fast FXSAVE leaves out the XMM registers */ 2604*1b248f14SClaudio Fontana if (!(env->efer & MSR_EFER_FFXSR) 2605*1b248f14SClaudio Fontana || (env->hflags & HF_CPL_MASK) 2606*1b248f14SClaudio Fontana || !(env->hflags & HF_LMA_MASK)) { 2607*1b248f14SClaudio Fontana do_xsave_sse(env, ptr, ra); 2608*1b248f14SClaudio Fontana } 2609*1b248f14SClaudio Fontana } 2610*1b248f14SClaudio Fontana } 2611*1b248f14SClaudio Fontana 2612*1b248f14SClaudio Fontana static uint64_t get_xinuse(CPUX86State *env) 2613*1b248f14SClaudio Fontana { 2614*1b248f14SClaudio Fontana uint64_t inuse = -1; 2615*1b248f14SClaudio Fontana 2616*1b248f14SClaudio Fontana /* For the most part, we don't track XINUSE. We could calculate it 2617*1b248f14SClaudio Fontana here for all components, but it's probably less work to simply 2618*1b248f14SClaudio Fontana indicate in use. That said, the state of BNDREGS is important 2619*1b248f14SClaudio Fontana enough to track in HFLAGS, so we might as well use that here. */ 2620*1b248f14SClaudio Fontana if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2621*1b248f14SClaudio Fontana inuse &= ~XSTATE_BNDREGS_MASK; 2622*1b248f14SClaudio Fontana } 2623*1b248f14SClaudio Fontana return inuse; 2624*1b248f14SClaudio Fontana } 2625*1b248f14SClaudio Fontana 2626*1b248f14SClaudio Fontana static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2627*1b248f14SClaudio Fontana uint64_t inuse, uint64_t opt, uintptr_t ra) 2628*1b248f14SClaudio Fontana { 2629*1b248f14SClaudio Fontana uint64_t old_bv, new_bv; 2630*1b248f14SClaudio Fontana 2631*1b248f14SClaudio Fontana /* The OS must have enabled XSAVE. */ 2632*1b248f14SClaudio Fontana if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2633*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP06_ILLOP, ra); 2634*1b248f14SClaudio Fontana } 2635*1b248f14SClaudio Fontana 2636*1b248f14SClaudio Fontana /* The operand must be 64 byte aligned. */ 2637*1b248f14SClaudio Fontana if (ptr & 63) { 2638*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 2639*1b248f14SClaudio Fontana } 2640*1b248f14SClaudio Fontana 2641*1b248f14SClaudio Fontana /* Never save anything not enabled by XCR0. */ 2642*1b248f14SClaudio Fontana rfbm &= env->xcr0; 2643*1b248f14SClaudio Fontana opt &= rfbm; 2644*1b248f14SClaudio Fontana 2645*1b248f14SClaudio Fontana if (opt & XSTATE_FP_MASK) { 2646*1b248f14SClaudio Fontana do_xsave_fpu(env, ptr, ra); 2647*1b248f14SClaudio Fontana } 2648*1b248f14SClaudio Fontana if (rfbm & XSTATE_SSE_MASK) { 2649*1b248f14SClaudio Fontana /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2650*1b248f14SClaudio Fontana do_xsave_mxcsr(env, ptr, ra); 2651*1b248f14SClaudio Fontana } 2652*1b248f14SClaudio Fontana if (opt & XSTATE_SSE_MASK) { 2653*1b248f14SClaudio Fontana do_xsave_sse(env, ptr, ra); 2654*1b248f14SClaudio Fontana } 2655*1b248f14SClaudio Fontana if (opt & XSTATE_BNDREGS_MASK) { 2656*1b248f14SClaudio Fontana do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 2657*1b248f14SClaudio Fontana } 2658*1b248f14SClaudio Fontana if (opt & XSTATE_BNDCSR_MASK) { 2659*1b248f14SClaudio Fontana do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 2660*1b248f14SClaudio Fontana } 2661*1b248f14SClaudio Fontana if (opt & XSTATE_PKRU_MASK) { 2662*1b248f14SClaudio Fontana do_xsave_pkru(env, ptr + XO(pkru_state), ra); 2663*1b248f14SClaudio Fontana } 2664*1b248f14SClaudio Fontana 2665*1b248f14SClaudio Fontana /* Update the XSTATE_BV field. */ 2666*1b248f14SClaudio Fontana old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2667*1b248f14SClaudio Fontana new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2668*1b248f14SClaudio Fontana cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 2669*1b248f14SClaudio Fontana } 2670*1b248f14SClaudio Fontana 2671*1b248f14SClaudio Fontana void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2672*1b248f14SClaudio Fontana { 2673*1b248f14SClaudio Fontana do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 2674*1b248f14SClaudio Fontana } 2675*1b248f14SClaudio Fontana 2676*1b248f14SClaudio Fontana void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2677*1b248f14SClaudio Fontana { 2678*1b248f14SClaudio Fontana uint64_t inuse = get_xinuse(env); 2679*1b248f14SClaudio Fontana do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2680*1b248f14SClaudio Fontana } 2681*1b248f14SClaudio Fontana 2682*1b248f14SClaudio Fontana static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2683*1b248f14SClaudio Fontana { 2684*1b248f14SClaudio Fontana int i, fpuc, fpus, fptag; 2685*1b248f14SClaudio Fontana target_ulong addr; 2686*1b248f14SClaudio Fontana 2687*1b248f14SClaudio Fontana fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 2688*1b248f14SClaudio Fontana fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 2689*1b248f14SClaudio Fontana fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 2690*1b248f14SClaudio Fontana cpu_set_fpuc(env, fpuc); 2691*1b248f14SClaudio Fontana cpu_set_fpus(env, fpus); 2692*1b248f14SClaudio Fontana fptag ^= 0xff; 2693*1b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 2694*1b248f14SClaudio Fontana env->fptags[i] = ((fptag >> i) & 1); 2695*1b248f14SClaudio Fontana } 2696*1b248f14SClaudio Fontana 2697*1b248f14SClaudio Fontana addr = ptr + XO(legacy.fpregs); 2698*1b248f14SClaudio Fontana for (i = 0; i < 8; i++) { 2699*1b248f14SClaudio Fontana floatx80 tmp = helper_fldt(env, addr, ra); 2700*1b248f14SClaudio Fontana ST(i) = tmp; 2701*1b248f14SClaudio Fontana addr += 16; 2702*1b248f14SClaudio Fontana } 2703*1b248f14SClaudio Fontana } 2704*1b248f14SClaudio Fontana 2705*1b248f14SClaudio Fontana static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2706*1b248f14SClaudio Fontana { 2707*1b248f14SClaudio Fontana cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 2708*1b248f14SClaudio Fontana } 2709*1b248f14SClaudio Fontana 2710*1b248f14SClaudio Fontana static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2711*1b248f14SClaudio Fontana { 2712*1b248f14SClaudio Fontana int i, nb_xmm_regs; 2713*1b248f14SClaudio Fontana target_ulong addr; 2714*1b248f14SClaudio Fontana 2715*1b248f14SClaudio Fontana if (env->hflags & HF_CS64_MASK) { 2716*1b248f14SClaudio Fontana nb_xmm_regs = 16; 2717*1b248f14SClaudio Fontana } else { 2718*1b248f14SClaudio Fontana nb_xmm_regs = 8; 2719*1b248f14SClaudio Fontana } 2720*1b248f14SClaudio Fontana 2721*1b248f14SClaudio Fontana addr = ptr + XO(legacy.xmm_regs); 2722*1b248f14SClaudio Fontana for (i = 0; i < nb_xmm_regs; i++) { 2723*1b248f14SClaudio Fontana env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 2724*1b248f14SClaudio Fontana env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 2725*1b248f14SClaudio Fontana addr += 16; 2726*1b248f14SClaudio Fontana } 2727*1b248f14SClaudio Fontana } 2728*1b248f14SClaudio Fontana 2729*1b248f14SClaudio Fontana static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2730*1b248f14SClaudio Fontana { 2731*1b248f14SClaudio Fontana target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2732*1b248f14SClaudio Fontana int i; 2733*1b248f14SClaudio Fontana 2734*1b248f14SClaudio Fontana for (i = 0; i < 4; i++, addr += 16) { 2735*1b248f14SClaudio Fontana env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 2736*1b248f14SClaudio Fontana env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 2737*1b248f14SClaudio Fontana } 2738*1b248f14SClaudio Fontana } 2739*1b248f14SClaudio Fontana 2740*1b248f14SClaudio Fontana static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2741*1b248f14SClaudio Fontana { 2742*1b248f14SClaudio Fontana /* FIXME: Extend highest implemented bit of linear address. */ 2743*1b248f14SClaudio Fontana env->bndcs_regs.cfgu 2744*1b248f14SClaudio Fontana = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 2745*1b248f14SClaudio Fontana env->bndcs_regs.sts 2746*1b248f14SClaudio Fontana = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 2747*1b248f14SClaudio Fontana } 2748*1b248f14SClaudio Fontana 2749*1b248f14SClaudio Fontana static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2750*1b248f14SClaudio Fontana { 2751*1b248f14SClaudio Fontana env->pkru = cpu_ldq_data_ra(env, ptr, ra); 2752*1b248f14SClaudio Fontana } 2753*1b248f14SClaudio Fontana 2754*1b248f14SClaudio Fontana void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2755*1b248f14SClaudio Fontana { 2756*1b248f14SClaudio Fontana uintptr_t ra = GETPC(); 2757*1b248f14SClaudio Fontana 2758*1b248f14SClaudio Fontana /* The operand must be 16 byte aligned */ 2759*1b248f14SClaudio Fontana if (ptr & 0xf) { 2760*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 2761*1b248f14SClaudio Fontana } 2762*1b248f14SClaudio Fontana 2763*1b248f14SClaudio Fontana do_xrstor_fpu(env, ptr, ra); 2764*1b248f14SClaudio Fontana 2765*1b248f14SClaudio Fontana if (env->cr[4] & CR4_OSFXSR_MASK) { 2766*1b248f14SClaudio Fontana do_xrstor_mxcsr(env, ptr, ra); 2767*1b248f14SClaudio Fontana /* Fast FXRSTOR leaves out the XMM registers */ 2768*1b248f14SClaudio Fontana if (!(env->efer & MSR_EFER_FFXSR) 2769*1b248f14SClaudio Fontana || (env->hflags & HF_CPL_MASK) 2770*1b248f14SClaudio Fontana || !(env->hflags & HF_LMA_MASK)) { 2771*1b248f14SClaudio Fontana do_xrstor_sse(env, ptr, ra); 2772*1b248f14SClaudio Fontana } 2773*1b248f14SClaudio Fontana } 2774*1b248f14SClaudio Fontana } 2775*1b248f14SClaudio Fontana 2776*1b248f14SClaudio Fontana #if defined(CONFIG_USER_ONLY) 2777*1b248f14SClaudio Fontana void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 2778*1b248f14SClaudio Fontana { 2779*1b248f14SClaudio Fontana helper_fxsave(env, ptr); 2780*1b248f14SClaudio Fontana } 2781*1b248f14SClaudio Fontana 2782*1b248f14SClaudio Fontana void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 2783*1b248f14SClaudio Fontana { 2784*1b248f14SClaudio Fontana helper_fxrstor(env, ptr); 2785*1b248f14SClaudio Fontana } 2786*1b248f14SClaudio Fontana #endif 2787*1b248f14SClaudio Fontana 2788*1b248f14SClaudio Fontana void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2789*1b248f14SClaudio Fontana { 2790*1b248f14SClaudio Fontana uintptr_t ra = GETPC(); 2791*1b248f14SClaudio Fontana uint64_t xstate_bv, xcomp_bv, reserve0; 2792*1b248f14SClaudio Fontana 2793*1b248f14SClaudio Fontana rfbm &= env->xcr0; 2794*1b248f14SClaudio Fontana 2795*1b248f14SClaudio Fontana /* The OS must have enabled XSAVE. */ 2796*1b248f14SClaudio Fontana if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2797*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP06_ILLOP, ra); 2798*1b248f14SClaudio Fontana } 2799*1b248f14SClaudio Fontana 2800*1b248f14SClaudio Fontana /* The operand must be 64 byte aligned. */ 2801*1b248f14SClaudio Fontana if (ptr & 63) { 2802*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 2803*1b248f14SClaudio Fontana } 2804*1b248f14SClaudio Fontana 2805*1b248f14SClaudio Fontana xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2806*1b248f14SClaudio Fontana 2807*1b248f14SClaudio Fontana if ((int64_t)xstate_bv < 0) { 2808*1b248f14SClaudio Fontana /* FIXME: Compact form. */ 2809*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 2810*1b248f14SClaudio Fontana } 2811*1b248f14SClaudio Fontana 2812*1b248f14SClaudio Fontana /* Standard form. */ 2813*1b248f14SClaudio Fontana 2814*1b248f14SClaudio Fontana /* The XSTATE_BV field must not set bits not present in XCR0. */ 2815*1b248f14SClaudio Fontana if (xstate_bv & ~env->xcr0) { 2816*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 2817*1b248f14SClaudio Fontana } 2818*1b248f14SClaudio Fontana 2819*1b248f14SClaudio Fontana /* The XCOMP_BV field must be zero. Note that, as of the April 2016 2820*1b248f14SClaudio Fontana revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 2821*1b248f14SClaudio Fontana describes only XCOMP_BV, but the description of the standard form 2822*1b248f14SClaudio Fontana of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 2823*1b248f14SClaudio Fontana includes the next 64-bit field. */ 2824*1b248f14SClaudio Fontana xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 2825*1b248f14SClaudio Fontana reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 2826*1b248f14SClaudio Fontana if (xcomp_bv || reserve0) { 2827*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, ra); 2828*1b248f14SClaudio Fontana } 2829*1b248f14SClaudio Fontana 2830*1b248f14SClaudio Fontana if (rfbm & XSTATE_FP_MASK) { 2831*1b248f14SClaudio Fontana if (xstate_bv & XSTATE_FP_MASK) { 2832*1b248f14SClaudio Fontana do_xrstor_fpu(env, ptr, ra); 2833*1b248f14SClaudio Fontana } else { 2834*1b248f14SClaudio Fontana helper_fninit(env); 2835*1b248f14SClaudio Fontana memset(env->fpregs, 0, sizeof(env->fpregs)); 2836*1b248f14SClaudio Fontana } 2837*1b248f14SClaudio Fontana } 2838*1b248f14SClaudio Fontana if (rfbm & XSTATE_SSE_MASK) { 2839*1b248f14SClaudio Fontana /* Note that the standard form of XRSTOR loads MXCSR from memory 2840*1b248f14SClaudio Fontana whether or not the XSTATE_BV bit is set. */ 2841*1b248f14SClaudio Fontana do_xrstor_mxcsr(env, ptr, ra); 2842*1b248f14SClaudio Fontana if (xstate_bv & XSTATE_SSE_MASK) { 2843*1b248f14SClaudio Fontana do_xrstor_sse(env, ptr, ra); 2844*1b248f14SClaudio Fontana } else { 2845*1b248f14SClaudio Fontana /* ??? When AVX is implemented, we may have to be more 2846*1b248f14SClaudio Fontana selective in the clearing. */ 2847*1b248f14SClaudio Fontana memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); 2848*1b248f14SClaudio Fontana } 2849*1b248f14SClaudio Fontana } 2850*1b248f14SClaudio Fontana if (rfbm & XSTATE_BNDREGS_MASK) { 2851*1b248f14SClaudio Fontana if (xstate_bv & XSTATE_BNDREGS_MASK) { 2852*1b248f14SClaudio Fontana do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 2853*1b248f14SClaudio Fontana env->hflags |= HF_MPX_IU_MASK; 2854*1b248f14SClaudio Fontana } else { 2855*1b248f14SClaudio Fontana memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2856*1b248f14SClaudio Fontana env->hflags &= ~HF_MPX_IU_MASK; 2857*1b248f14SClaudio Fontana } 2858*1b248f14SClaudio Fontana } 2859*1b248f14SClaudio Fontana if (rfbm & XSTATE_BNDCSR_MASK) { 2860*1b248f14SClaudio Fontana if (xstate_bv & XSTATE_BNDCSR_MASK) { 2861*1b248f14SClaudio Fontana do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 2862*1b248f14SClaudio Fontana } else { 2863*1b248f14SClaudio Fontana memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2864*1b248f14SClaudio Fontana } 2865*1b248f14SClaudio Fontana cpu_sync_bndcs_hflags(env); 2866*1b248f14SClaudio Fontana } 2867*1b248f14SClaudio Fontana if (rfbm & XSTATE_PKRU_MASK) { 2868*1b248f14SClaudio Fontana uint64_t old_pkru = env->pkru; 2869*1b248f14SClaudio Fontana if (xstate_bv & XSTATE_PKRU_MASK) { 2870*1b248f14SClaudio Fontana do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 2871*1b248f14SClaudio Fontana } else { 2872*1b248f14SClaudio Fontana env->pkru = 0; 2873*1b248f14SClaudio Fontana } 2874*1b248f14SClaudio Fontana if (env->pkru != old_pkru) { 2875*1b248f14SClaudio Fontana CPUState *cs = env_cpu(env); 2876*1b248f14SClaudio Fontana tlb_flush(cs); 2877*1b248f14SClaudio Fontana } 2878*1b248f14SClaudio Fontana } 2879*1b248f14SClaudio Fontana } 2880*1b248f14SClaudio Fontana 2881*1b248f14SClaudio Fontana #undef XO 2882*1b248f14SClaudio Fontana 2883*1b248f14SClaudio Fontana uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 2884*1b248f14SClaudio Fontana { 2885*1b248f14SClaudio Fontana /* The OS must have enabled XSAVE. */ 2886*1b248f14SClaudio Fontana if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2887*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2888*1b248f14SClaudio Fontana } 2889*1b248f14SClaudio Fontana 2890*1b248f14SClaudio Fontana switch (ecx) { 2891*1b248f14SClaudio Fontana case 0: 2892*1b248f14SClaudio Fontana return env->xcr0; 2893*1b248f14SClaudio Fontana case 1: 2894*1b248f14SClaudio Fontana if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 2895*1b248f14SClaudio Fontana return env->xcr0 & get_xinuse(env); 2896*1b248f14SClaudio Fontana } 2897*1b248f14SClaudio Fontana break; 2898*1b248f14SClaudio Fontana } 2899*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2900*1b248f14SClaudio Fontana } 2901*1b248f14SClaudio Fontana 2902*1b248f14SClaudio Fontana void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 2903*1b248f14SClaudio Fontana { 2904*1b248f14SClaudio Fontana uint32_t dummy, ena_lo, ena_hi; 2905*1b248f14SClaudio Fontana uint64_t ena; 2906*1b248f14SClaudio Fontana 2907*1b248f14SClaudio Fontana /* The OS must have enabled XSAVE. */ 2908*1b248f14SClaudio Fontana if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2909*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2910*1b248f14SClaudio Fontana } 2911*1b248f14SClaudio Fontana 2912*1b248f14SClaudio Fontana /* Only XCR0 is defined at present; the FPU may not be disabled. */ 2913*1b248f14SClaudio Fontana if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 2914*1b248f14SClaudio Fontana goto do_gpf; 2915*1b248f14SClaudio Fontana } 2916*1b248f14SClaudio Fontana 2917*1b248f14SClaudio Fontana /* Disallow enabling unimplemented features. */ 2918*1b248f14SClaudio Fontana cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 2919*1b248f14SClaudio Fontana ena = ((uint64_t)ena_hi << 32) | ena_lo; 2920*1b248f14SClaudio Fontana if (mask & ~ena) { 2921*1b248f14SClaudio Fontana goto do_gpf; 2922*1b248f14SClaudio Fontana } 2923*1b248f14SClaudio Fontana 2924*1b248f14SClaudio Fontana /* Disallow enabling only half of MPX. */ 2925*1b248f14SClaudio Fontana if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 2926*1b248f14SClaudio Fontana & XSTATE_BNDCSR_MASK) { 2927*1b248f14SClaudio Fontana goto do_gpf; 2928*1b248f14SClaudio Fontana } 2929*1b248f14SClaudio Fontana 2930*1b248f14SClaudio Fontana env->xcr0 = mask; 2931*1b248f14SClaudio Fontana cpu_sync_bndcs_hflags(env); 2932*1b248f14SClaudio Fontana return; 2933*1b248f14SClaudio Fontana 2934*1b248f14SClaudio Fontana do_gpf: 2935*1b248f14SClaudio Fontana raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2936*1b248f14SClaudio Fontana } 2937*1b248f14SClaudio Fontana 2938*1b248f14SClaudio Fontana /* MMX/SSE */ 2939*1b248f14SClaudio Fontana /* XXX: optimize by storing fptt and fptags in the static cpu state */ 2940*1b248f14SClaudio Fontana 2941*1b248f14SClaudio Fontana #define SSE_DAZ 0x0040 2942*1b248f14SClaudio Fontana #define SSE_RC_MASK 0x6000 2943*1b248f14SClaudio Fontana #define SSE_RC_NEAR 0x0000 2944*1b248f14SClaudio Fontana #define SSE_RC_DOWN 0x2000 2945*1b248f14SClaudio Fontana #define SSE_RC_UP 0x4000 2946*1b248f14SClaudio Fontana #define SSE_RC_CHOP 0x6000 2947*1b248f14SClaudio Fontana #define SSE_FZ 0x8000 2948*1b248f14SClaudio Fontana 2949*1b248f14SClaudio Fontana void update_mxcsr_status(CPUX86State *env) 2950*1b248f14SClaudio Fontana { 2951*1b248f14SClaudio Fontana uint32_t mxcsr = env->mxcsr; 2952*1b248f14SClaudio Fontana int rnd_type; 2953*1b248f14SClaudio Fontana 2954*1b248f14SClaudio Fontana /* set rounding mode */ 2955*1b248f14SClaudio Fontana switch (mxcsr & SSE_RC_MASK) { 2956*1b248f14SClaudio Fontana default: 2957*1b248f14SClaudio Fontana case SSE_RC_NEAR: 2958*1b248f14SClaudio Fontana rnd_type = float_round_nearest_even; 2959*1b248f14SClaudio Fontana break; 2960*1b248f14SClaudio Fontana case SSE_RC_DOWN: 2961*1b248f14SClaudio Fontana rnd_type = float_round_down; 2962*1b248f14SClaudio Fontana break; 2963*1b248f14SClaudio Fontana case SSE_RC_UP: 2964*1b248f14SClaudio Fontana rnd_type = float_round_up; 2965*1b248f14SClaudio Fontana break; 2966*1b248f14SClaudio Fontana case SSE_RC_CHOP: 2967*1b248f14SClaudio Fontana rnd_type = float_round_to_zero; 2968*1b248f14SClaudio Fontana break; 2969*1b248f14SClaudio Fontana } 2970*1b248f14SClaudio Fontana set_float_rounding_mode(rnd_type, &env->sse_status); 2971*1b248f14SClaudio Fontana 2972*1b248f14SClaudio Fontana /* Set exception flags. */ 2973*1b248f14SClaudio Fontana set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 2974*1b248f14SClaudio Fontana (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 2975*1b248f14SClaudio Fontana (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 2976*1b248f14SClaudio Fontana (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 2977*1b248f14SClaudio Fontana (mxcsr & FPUS_PE ? float_flag_inexact : 0), 2978*1b248f14SClaudio Fontana &env->sse_status); 2979*1b248f14SClaudio Fontana 2980*1b248f14SClaudio Fontana /* set denormals are zero */ 2981*1b248f14SClaudio Fontana set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 2982*1b248f14SClaudio Fontana 2983*1b248f14SClaudio Fontana /* set flush to zero */ 2984*1b248f14SClaudio Fontana set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 2985*1b248f14SClaudio Fontana } 2986*1b248f14SClaudio Fontana 2987*1b248f14SClaudio Fontana void update_mxcsr_from_sse_status(CPUX86State *env) 2988*1b248f14SClaudio Fontana { 2989*1b248f14SClaudio Fontana if (tcg_enabled()) { 2990*1b248f14SClaudio Fontana uint8_t flags = get_float_exception_flags(&env->sse_status); 2991*1b248f14SClaudio Fontana /* 2992*1b248f14SClaudio Fontana * The MXCSR denormal flag has opposite semantics to 2993*1b248f14SClaudio Fontana * float_flag_input_denormal (the softfloat code sets that flag 2994*1b248f14SClaudio Fontana * only when flushing input denormals to zero, but SSE sets it 2995*1b248f14SClaudio Fontana * only when not flushing them to zero), so is not converted 2996*1b248f14SClaudio Fontana * here. 2997*1b248f14SClaudio Fontana */ 2998*1b248f14SClaudio Fontana env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 2999*1b248f14SClaudio Fontana (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3000*1b248f14SClaudio Fontana (flags & float_flag_overflow ? FPUS_OE : 0) | 3001*1b248f14SClaudio Fontana (flags & float_flag_underflow ? FPUS_UE : 0) | 3002*1b248f14SClaudio Fontana (flags & float_flag_inexact ? FPUS_PE : 0) | 3003*1b248f14SClaudio Fontana (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3004*1b248f14SClaudio Fontana 0)); 3005*1b248f14SClaudio Fontana } 3006*1b248f14SClaudio Fontana } 3007*1b248f14SClaudio Fontana 3008*1b248f14SClaudio Fontana void helper_update_mxcsr(CPUX86State *env) 3009*1b248f14SClaudio Fontana { 3010*1b248f14SClaudio Fontana update_mxcsr_from_sse_status(env); 3011*1b248f14SClaudio Fontana } 3012*1b248f14SClaudio Fontana 3013*1b248f14SClaudio Fontana void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3014*1b248f14SClaudio Fontana { 3015*1b248f14SClaudio Fontana cpu_set_mxcsr(env, val); 3016*1b248f14SClaudio Fontana } 3017*1b248f14SClaudio Fontana 3018*1b248f14SClaudio Fontana void helper_enter_mmx(CPUX86State *env) 3019*1b248f14SClaudio Fontana { 3020*1b248f14SClaudio Fontana env->fpstt = 0; 3021*1b248f14SClaudio Fontana *(uint32_t *)(env->fptags) = 0; 3022*1b248f14SClaudio Fontana *(uint32_t *)(env->fptags + 4) = 0; 3023*1b248f14SClaudio Fontana } 3024*1b248f14SClaudio Fontana 3025*1b248f14SClaudio Fontana void helper_emms(CPUX86State *env) 3026*1b248f14SClaudio Fontana { 3027*1b248f14SClaudio Fontana /* set to empty state */ 3028*1b248f14SClaudio Fontana *(uint32_t *)(env->fptags) = 0x01010101; 3029*1b248f14SClaudio Fontana *(uint32_t *)(env->fptags + 4) = 0x01010101; 3030*1b248f14SClaudio Fontana } 3031*1b248f14SClaudio Fontana 3032*1b248f14SClaudio Fontana /* XXX: suppress */ 3033*1b248f14SClaudio Fontana void helper_movq(CPUX86State *env, void *d, void *s) 3034*1b248f14SClaudio Fontana { 3035*1b248f14SClaudio Fontana *(uint64_t *)d = *(uint64_t *)s; 3036*1b248f14SClaudio Fontana } 3037*1b248f14SClaudio Fontana 3038*1b248f14SClaudio Fontana #define SHIFT 0 3039*1b248f14SClaudio Fontana #include "ops_sse.h" 3040*1b248f14SClaudio Fontana 3041*1b248f14SClaudio Fontana #define SHIFT 1 3042*1b248f14SClaudio Fontana #include "ops_sse.h" 3043