1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "exec/helper-proto.h" 24 #include "qemu/host-utils.h" 25 #include "exec/exec-all.h" 26 #include "exec/cpu_ldst.h" 27 #include "fpu/softfloat.h" 28 #include "fpu/softfloat-macros.h" 29 #include "helper-tcg.h" 30 31 #ifdef CONFIG_SOFTMMU 32 #include "hw/irq.h" 33 #endif 34 35 /* float macros */ 36 #define FT0 (env->ft0) 37 #define ST0 (env->fpregs[env->fpstt].d) 38 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 39 #define ST1 ST(1) 40 41 #define FPU_RC_MASK 0xc00 42 #define FPU_RC_NEAR 0x000 43 #define FPU_RC_DOWN 0x400 44 #define FPU_RC_UP 0x800 45 #define FPU_RC_CHOP 0xc00 46 47 #define MAXTAN 9223372036854775808.0 48 49 /* the following deal with x86 long double-precision numbers */ 50 #define MAXEXPD 0x7fff 51 #define EXPBIAS 16383 52 #define EXPD(fp) (fp.l.upper & 0x7fff) 53 #define SIGND(fp) ((fp.l.upper) & 0x8000) 54 #define MANTD(fp) (fp.l.lower) 55 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 56 57 #define FPUS_IE (1 << 0) 58 #define FPUS_DE (1 << 1) 59 #define FPUS_ZE (1 << 2) 60 #define FPUS_OE (1 << 3) 61 #define FPUS_UE (1 << 4) 62 #define FPUS_PE (1 << 5) 63 #define FPUS_SF (1 << 6) 64 #define FPUS_SE (1 << 7) 65 #define FPUS_B (1 << 15) 66 67 #define FPUC_EM 0x3f 68 69 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 70 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 71 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 72 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 73 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 74 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 75 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 76 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 77 78 #if !defined(CONFIG_USER_ONLY) 79 static qemu_irq ferr_irq; 80 81 void x86_register_ferr_irq(qemu_irq irq) 82 { 83 ferr_irq = irq; 84 } 85 86 static void cpu_clear_ignne(void) 87 { 88 CPUX86State *env = &X86_CPU(first_cpu)->env; 89 env->hflags2 &= ~HF2_IGNNE_MASK; 90 } 91 92 void cpu_set_ignne(void) 93 { 94 CPUX86State *env = &X86_CPU(first_cpu)->env; 95 env->hflags2 |= HF2_IGNNE_MASK; 96 /* 97 * We get here in response to a write to port F0h. The chipset should 98 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is 99 * cleared, because FERR# and FP_IRQ are two separate pins on real 100 * hardware. However, we don't model FERR# as a qemu_irq, so we just 101 * do directly what the chipset would do, i.e. deassert FP_IRQ. 102 */ 103 qemu_irq_lower(ferr_irq); 104 } 105 #endif 106 107 108 static inline void fpush(CPUX86State *env) 109 { 110 env->fpstt = (env->fpstt - 1) & 7; 111 env->fptags[env->fpstt] = 0; /* validate stack entry */ 112 } 113 114 static inline void fpop(CPUX86State *env) 115 { 116 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 117 env->fpstt = (env->fpstt + 1) & 7; 118 } 119 120 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) 121 { 122 CPU_LDoubleU temp; 123 124 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 125 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 126 return temp.d; 127 } 128 129 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 130 uintptr_t retaddr) 131 { 132 CPU_LDoubleU temp; 133 134 temp.d = f; 135 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 136 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 137 } 138 139 /* x87 FPU helpers */ 140 141 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 142 { 143 union { 144 float64 f64; 145 double d; 146 } u; 147 148 u.f64 = floatx80_to_float64(a, &env->fp_status); 149 return u.d; 150 } 151 152 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 153 { 154 union { 155 float64 f64; 156 double d; 157 } u; 158 159 u.d = a; 160 return float64_to_floatx80(u.f64, &env->fp_status); 161 } 162 163 static void fpu_set_exception(CPUX86State *env, int mask) 164 { 165 env->fpus |= mask; 166 if (env->fpus & (~env->fpuc & FPUC_EM)) { 167 env->fpus |= FPUS_SE | FPUS_B; 168 } 169 } 170 171 static inline uint8_t save_exception_flags(CPUX86State *env) 172 { 173 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 174 set_float_exception_flags(0, &env->fp_status); 175 return old_flags; 176 } 177 178 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 179 { 180 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 181 float_raise(old_flags, &env->fp_status); 182 fpu_set_exception(env, 183 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 184 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 185 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 186 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 187 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 188 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 189 } 190 191 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 192 { 193 uint8_t old_flags = save_exception_flags(env); 194 floatx80 ret = floatx80_div(a, b, &env->fp_status); 195 merge_exception_flags(env, old_flags); 196 return ret; 197 } 198 199 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 200 { 201 if (env->cr[0] & CR0_NE_MASK) { 202 raise_exception_ra(env, EXCP10_COPR, retaddr); 203 } 204 #if !defined(CONFIG_USER_ONLY) 205 else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) { 206 qemu_irq_raise(ferr_irq); 207 } 208 #endif 209 } 210 211 void helper_flds_FT0(CPUX86State *env, uint32_t val) 212 { 213 uint8_t old_flags = save_exception_flags(env); 214 union { 215 float32 f; 216 uint32_t i; 217 } u; 218 219 u.i = val; 220 FT0 = float32_to_floatx80(u.f, &env->fp_status); 221 merge_exception_flags(env, old_flags); 222 } 223 224 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 225 { 226 uint8_t old_flags = save_exception_flags(env); 227 union { 228 float64 f; 229 uint64_t i; 230 } u; 231 232 u.i = val; 233 FT0 = float64_to_floatx80(u.f, &env->fp_status); 234 merge_exception_flags(env, old_flags); 235 } 236 237 void helper_fildl_FT0(CPUX86State *env, int32_t val) 238 { 239 FT0 = int32_to_floatx80(val, &env->fp_status); 240 } 241 242 void helper_flds_ST0(CPUX86State *env, uint32_t val) 243 { 244 uint8_t old_flags = save_exception_flags(env); 245 int new_fpstt; 246 union { 247 float32 f; 248 uint32_t i; 249 } u; 250 251 new_fpstt = (env->fpstt - 1) & 7; 252 u.i = val; 253 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 254 env->fpstt = new_fpstt; 255 env->fptags[new_fpstt] = 0; /* validate stack entry */ 256 merge_exception_flags(env, old_flags); 257 } 258 259 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 260 { 261 uint8_t old_flags = save_exception_flags(env); 262 int new_fpstt; 263 union { 264 float64 f; 265 uint64_t i; 266 } u; 267 268 new_fpstt = (env->fpstt - 1) & 7; 269 u.i = val; 270 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 271 env->fpstt = new_fpstt; 272 env->fptags[new_fpstt] = 0; /* validate stack entry */ 273 merge_exception_flags(env, old_flags); 274 } 275 276 void helper_fildl_ST0(CPUX86State *env, int32_t val) 277 { 278 int new_fpstt; 279 280 new_fpstt = (env->fpstt - 1) & 7; 281 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 282 env->fpstt = new_fpstt; 283 env->fptags[new_fpstt] = 0; /* validate stack entry */ 284 } 285 286 void helper_fildll_ST0(CPUX86State *env, int64_t val) 287 { 288 int new_fpstt; 289 290 new_fpstt = (env->fpstt - 1) & 7; 291 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 292 env->fpstt = new_fpstt; 293 env->fptags[new_fpstt] = 0; /* validate stack entry */ 294 } 295 296 uint32_t helper_fsts_ST0(CPUX86State *env) 297 { 298 uint8_t old_flags = save_exception_flags(env); 299 union { 300 float32 f; 301 uint32_t i; 302 } u; 303 304 u.f = floatx80_to_float32(ST0, &env->fp_status); 305 merge_exception_flags(env, old_flags); 306 return u.i; 307 } 308 309 uint64_t helper_fstl_ST0(CPUX86State *env) 310 { 311 uint8_t old_flags = save_exception_flags(env); 312 union { 313 float64 f; 314 uint64_t i; 315 } u; 316 317 u.f = floatx80_to_float64(ST0, &env->fp_status); 318 merge_exception_flags(env, old_flags); 319 return u.i; 320 } 321 322 int32_t helper_fist_ST0(CPUX86State *env) 323 { 324 uint8_t old_flags = save_exception_flags(env); 325 int32_t val; 326 327 val = floatx80_to_int32(ST0, &env->fp_status); 328 if (val != (int16_t)val) { 329 set_float_exception_flags(float_flag_invalid, &env->fp_status); 330 val = -32768; 331 } 332 merge_exception_flags(env, old_flags); 333 return val; 334 } 335 336 int32_t helper_fistl_ST0(CPUX86State *env) 337 { 338 uint8_t old_flags = save_exception_flags(env); 339 int32_t val; 340 341 val = floatx80_to_int32(ST0, &env->fp_status); 342 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 343 val = 0x80000000; 344 } 345 merge_exception_flags(env, old_flags); 346 return val; 347 } 348 349 int64_t helper_fistll_ST0(CPUX86State *env) 350 { 351 uint8_t old_flags = save_exception_flags(env); 352 int64_t val; 353 354 val = floatx80_to_int64(ST0, &env->fp_status); 355 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 356 val = 0x8000000000000000ULL; 357 } 358 merge_exception_flags(env, old_flags); 359 return val; 360 } 361 362 int32_t helper_fistt_ST0(CPUX86State *env) 363 { 364 uint8_t old_flags = save_exception_flags(env); 365 int32_t val; 366 367 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 368 if (val != (int16_t)val) { 369 set_float_exception_flags(float_flag_invalid, &env->fp_status); 370 val = -32768; 371 } 372 merge_exception_flags(env, old_flags); 373 return val; 374 } 375 376 int32_t helper_fisttl_ST0(CPUX86State *env) 377 { 378 uint8_t old_flags = save_exception_flags(env); 379 int32_t val; 380 381 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 382 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 383 val = 0x80000000; 384 } 385 merge_exception_flags(env, old_flags); 386 return val; 387 } 388 389 int64_t helper_fisttll_ST0(CPUX86State *env) 390 { 391 uint8_t old_flags = save_exception_flags(env); 392 int64_t val; 393 394 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 395 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 396 val = 0x8000000000000000ULL; 397 } 398 merge_exception_flags(env, old_flags); 399 return val; 400 } 401 402 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 403 { 404 int new_fpstt; 405 406 new_fpstt = (env->fpstt - 1) & 7; 407 env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); 408 env->fpstt = new_fpstt; 409 env->fptags[new_fpstt] = 0; /* validate stack entry */ 410 } 411 412 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 413 { 414 do_fstt(env, ST0, ptr, GETPC()); 415 } 416 417 void helper_fpush(CPUX86State *env) 418 { 419 fpush(env); 420 } 421 422 void helper_fpop(CPUX86State *env) 423 { 424 fpop(env); 425 } 426 427 void helper_fdecstp(CPUX86State *env) 428 { 429 env->fpstt = (env->fpstt - 1) & 7; 430 env->fpus &= ~0x4700; 431 } 432 433 void helper_fincstp(CPUX86State *env) 434 { 435 env->fpstt = (env->fpstt + 1) & 7; 436 env->fpus &= ~0x4700; 437 } 438 439 /* FPU move */ 440 441 void helper_ffree_STN(CPUX86State *env, int st_index) 442 { 443 env->fptags[(env->fpstt + st_index) & 7] = 1; 444 } 445 446 void helper_fmov_ST0_FT0(CPUX86State *env) 447 { 448 ST0 = FT0; 449 } 450 451 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 452 { 453 FT0 = ST(st_index); 454 } 455 456 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 457 { 458 ST0 = ST(st_index); 459 } 460 461 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 462 { 463 ST(st_index) = ST0; 464 } 465 466 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 467 { 468 floatx80 tmp; 469 470 tmp = ST(st_index); 471 ST(st_index) = ST0; 472 ST0 = tmp; 473 } 474 475 /* FPU operations */ 476 477 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 478 479 void helper_fcom_ST0_FT0(CPUX86State *env) 480 { 481 uint8_t old_flags = save_exception_flags(env); 482 FloatRelation ret; 483 484 ret = floatx80_compare(ST0, FT0, &env->fp_status); 485 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 486 merge_exception_flags(env, old_flags); 487 } 488 489 void helper_fucom_ST0_FT0(CPUX86State *env) 490 { 491 uint8_t old_flags = save_exception_flags(env); 492 FloatRelation ret; 493 494 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 495 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 496 merge_exception_flags(env, old_flags); 497 } 498 499 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 500 501 void helper_fcomi_ST0_FT0(CPUX86State *env) 502 { 503 uint8_t old_flags = save_exception_flags(env); 504 int eflags; 505 FloatRelation ret; 506 507 ret = floatx80_compare(ST0, FT0, &env->fp_status); 508 eflags = cpu_cc_compute_all(env, CC_OP); 509 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 510 CC_SRC = eflags; 511 merge_exception_flags(env, old_flags); 512 } 513 514 void helper_fucomi_ST0_FT0(CPUX86State *env) 515 { 516 uint8_t old_flags = save_exception_flags(env); 517 int eflags; 518 FloatRelation ret; 519 520 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 521 eflags = cpu_cc_compute_all(env, CC_OP); 522 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 523 CC_SRC = eflags; 524 merge_exception_flags(env, old_flags); 525 } 526 527 void helper_fadd_ST0_FT0(CPUX86State *env) 528 { 529 uint8_t old_flags = save_exception_flags(env); 530 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 531 merge_exception_flags(env, old_flags); 532 } 533 534 void helper_fmul_ST0_FT0(CPUX86State *env) 535 { 536 uint8_t old_flags = save_exception_flags(env); 537 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 538 merge_exception_flags(env, old_flags); 539 } 540 541 void helper_fsub_ST0_FT0(CPUX86State *env) 542 { 543 uint8_t old_flags = save_exception_flags(env); 544 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 545 merge_exception_flags(env, old_flags); 546 } 547 548 void helper_fsubr_ST0_FT0(CPUX86State *env) 549 { 550 uint8_t old_flags = save_exception_flags(env); 551 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 552 merge_exception_flags(env, old_flags); 553 } 554 555 void helper_fdiv_ST0_FT0(CPUX86State *env) 556 { 557 ST0 = helper_fdiv(env, ST0, FT0); 558 } 559 560 void helper_fdivr_ST0_FT0(CPUX86State *env) 561 { 562 ST0 = helper_fdiv(env, FT0, ST0); 563 } 564 565 /* fp operations between STN and ST0 */ 566 567 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 568 { 569 uint8_t old_flags = save_exception_flags(env); 570 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 571 merge_exception_flags(env, old_flags); 572 } 573 574 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 575 { 576 uint8_t old_flags = save_exception_flags(env); 577 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 578 merge_exception_flags(env, old_flags); 579 } 580 581 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 582 { 583 uint8_t old_flags = save_exception_flags(env); 584 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 585 merge_exception_flags(env, old_flags); 586 } 587 588 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 589 { 590 uint8_t old_flags = save_exception_flags(env); 591 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 592 merge_exception_flags(env, old_flags); 593 } 594 595 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 596 { 597 floatx80 *p; 598 599 p = &ST(st_index); 600 *p = helper_fdiv(env, *p, ST0); 601 } 602 603 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 604 { 605 floatx80 *p; 606 607 p = &ST(st_index); 608 *p = helper_fdiv(env, ST0, *p); 609 } 610 611 /* misc FPU operations */ 612 void helper_fchs_ST0(CPUX86State *env) 613 { 614 ST0 = floatx80_chs(ST0); 615 } 616 617 void helper_fabs_ST0(CPUX86State *env) 618 { 619 ST0 = floatx80_abs(ST0); 620 } 621 622 void helper_fld1_ST0(CPUX86State *env) 623 { 624 ST0 = floatx80_one; 625 } 626 627 void helper_fldl2t_ST0(CPUX86State *env) 628 { 629 switch (env->fpuc & FPU_RC_MASK) { 630 case FPU_RC_UP: 631 ST0 = floatx80_l2t_u; 632 break; 633 default: 634 ST0 = floatx80_l2t; 635 break; 636 } 637 } 638 639 void helper_fldl2e_ST0(CPUX86State *env) 640 { 641 switch (env->fpuc & FPU_RC_MASK) { 642 case FPU_RC_DOWN: 643 case FPU_RC_CHOP: 644 ST0 = floatx80_l2e_d; 645 break; 646 default: 647 ST0 = floatx80_l2e; 648 break; 649 } 650 } 651 652 void helper_fldpi_ST0(CPUX86State *env) 653 { 654 switch (env->fpuc & FPU_RC_MASK) { 655 case FPU_RC_DOWN: 656 case FPU_RC_CHOP: 657 ST0 = floatx80_pi_d; 658 break; 659 default: 660 ST0 = floatx80_pi; 661 break; 662 } 663 } 664 665 void helper_fldlg2_ST0(CPUX86State *env) 666 { 667 switch (env->fpuc & FPU_RC_MASK) { 668 case FPU_RC_DOWN: 669 case FPU_RC_CHOP: 670 ST0 = floatx80_lg2_d; 671 break; 672 default: 673 ST0 = floatx80_lg2; 674 break; 675 } 676 } 677 678 void helper_fldln2_ST0(CPUX86State *env) 679 { 680 switch (env->fpuc & FPU_RC_MASK) { 681 case FPU_RC_DOWN: 682 case FPU_RC_CHOP: 683 ST0 = floatx80_ln2_d; 684 break; 685 default: 686 ST0 = floatx80_ln2; 687 break; 688 } 689 } 690 691 void helper_fldz_ST0(CPUX86State *env) 692 { 693 ST0 = floatx80_zero; 694 } 695 696 void helper_fldz_FT0(CPUX86State *env) 697 { 698 FT0 = floatx80_zero; 699 } 700 701 uint32_t helper_fnstsw(CPUX86State *env) 702 { 703 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 704 } 705 706 uint32_t helper_fnstcw(CPUX86State *env) 707 { 708 return env->fpuc; 709 } 710 711 void update_fp_status(CPUX86State *env) 712 { 713 int rnd_type; 714 715 /* set rounding mode */ 716 switch (env->fpuc & FPU_RC_MASK) { 717 default: 718 case FPU_RC_NEAR: 719 rnd_type = float_round_nearest_even; 720 break; 721 case FPU_RC_DOWN: 722 rnd_type = float_round_down; 723 break; 724 case FPU_RC_UP: 725 rnd_type = float_round_up; 726 break; 727 case FPU_RC_CHOP: 728 rnd_type = float_round_to_zero; 729 break; 730 } 731 set_float_rounding_mode(rnd_type, &env->fp_status); 732 switch ((env->fpuc >> 8) & 3) { 733 case 0: 734 rnd_type = 32; 735 break; 736 case 2: 737 rnd_type = 64; 738 break; 739 case 3: 740 default: 741 rnd_type = 80; 742 break; 743 } 744 set_floatx80_rounding_precision(rnd_type, &env->fp_status); 745 } 746 747 void helper_fldcw(CPUX86State *env, uint32_t val) 748 { 749 cpu_set_fpuc(env, val); 750 } 751 752 void helper_fclex(CPUX86State *env) 753 { 754 env->fpus &= 0x7f00; 755 } 756 757 void helper_fwait(CPUX86State *env) 758 { 759 if (env->fpus & FPUS_SE) { 760 fpu_raise_exception(env, GETPC()); 761 } 762 } 763 764 void helper_fninit(CPUX86State *env) 765 { 766 env->fpus = 0; 767 env->fpstt = 0; 768 cpu_set_fpuc(env, 0x37f); 769 env->fptags[0] = 1; 770 env->fptags[1] = 1; 771 env->fptags[2] = 1; 772 env->fptags[3] = 1; 773 env->fptags[4] = 1; 774 env->fptags[5] = 1; 775 env->fptags[6] = 1; 776 env->fptags[7] = 1; 777 } 778 779 /* BCD ops */ 780 781 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 782 { 783 floatx80 tmp; 784 uint64_t val; 785 unsigned int v; 786 int i; 787 788 val = 0; 789 for (i = 8; i >= 0; i--) { 790 v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 791 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 792 } 793 tmp = int64_to_floatx80(val, &env->fp_status); 794 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 795 tmp = floatx80_chs(tmp); 796 } 797 fpush(env); 798 ST0 = tmp; 799 } 800 801 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 802 { 803 uint8_t old_flags = save_exception_flags(env); 804 int v; 805 target_ulong mem_ref, mem_end; 806 int64_t val; 807 CPU_LDoubleU temp; 808 809 temp.d = ST0; 810 811 val = floatx80_to_int64(ST0, &env->fp_status); 812 mem_ref = ptr; 813 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 814 set_float_exception_flags(float_flag_invalid, &env->fp_status); 815 while (mem_ref < ptr + 7) { 816 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 817 } 818 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 819 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 820 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 821 merge_exception_flags(env, old_flags); 822 return; 823 } 824 mem_end = mem_ref + 9; 825 if (SIGND(temp)) { 826 cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 827 val = -val; 828 } else { 829 cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 830 } 831 while (mem_ref < mem_end) { 832 if (val == 0) { 833 break; 834 } 835 v = val % 100; 836 val = val / 100; 837 v = ((v / 10) << 4) | (v % 10); 838 cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 839 } 840 while (mem_ref < mem_end) { 841 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 842 } 843 merge_exception_flags(env, old_flags); 844 } 845 846 /* 128-bit significand of log(2). */ 847 #define ln2_sig_high 0xb17217f7d1cf79abULL 848 #define ln2_sig_low 0xc9e3b39803f2f6afULL 849 850 /* 851 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 852 * the interval [-1/64, 1/64]. 853 */ 854 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 855 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 856 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 857 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 858 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 859 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 860 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 861 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 862 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 863 864 struct f2xm1_data { 865 /* 866 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 867 * are very close to exact floatx80 values. 868 */ 869 floatx80 t; 870 /* The value of 2^t. */ 871 floatx80 exp2; 872 /* The value of 2^t - 1. */ 873 floatx80 exp2m1; 874 }; 875 876 static const struct f2xm1_data f2xm1_table[65] = { 877 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 878 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 879 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 880 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 881 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 882 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 883 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 884 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 885 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 886 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 887 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 888 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 889 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 890 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 891 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 892 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 893 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 894 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 895 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 896 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 897 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 898 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 899 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 900 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 901 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 902 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 903 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 904 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 905 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 906 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 907 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 908 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 909 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 910 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 911 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 912 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 913 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 914 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 915 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 916 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 917 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 918 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 919 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 920 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 921 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 922 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 923 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 924 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 925 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 926 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 927 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 928 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 929 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 930 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 931 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 932 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 933 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 934 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 935 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 936 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 937 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 938 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 939 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 940 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 941 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 942 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 943 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 944 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 945 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 946 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 947 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 948 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 949 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 950 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 951 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 952 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 953 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 954 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 955 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 956 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 957 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 958 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 959 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 960 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 961 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 962 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 963 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 964 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 965 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 966 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 967 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 968 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 969 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 970 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 971 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 972 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 973 { floatx80_zero_init, 974 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 975 floatx80_zero_init }, 976 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 977 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 978 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 979 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 980 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 981 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 982 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 983 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 984 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 985 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 986 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 987 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 988 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 989 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 990 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 991 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 992 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 993 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 994 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 995 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 996 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 997 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 998 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 999 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 1000 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 1001 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 1002 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 1003 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 1004 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 1005 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 1006 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 1007 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 1008 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 1009 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 1010 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 1011 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 1012 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1013 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1014 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1015 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1016 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1017 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1018 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1019 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1020 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1021 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1022 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1023 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1024 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1025 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1026 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1027 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1028 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1029 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1030 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1031 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1032 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1033 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1034 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1035 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1036 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1037 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1038 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1039 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1040 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1041 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1042 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1043 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1044 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1045 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1046 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1047 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1048 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1049 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1050 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1051 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1052 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1053 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1054 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1055 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1056 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1057 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1058 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1059 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1060 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1061 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1062 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1063 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1064 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1065 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1066 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1067 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1068 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1069 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1070 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1071 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1072 }; 1073 1074 void helper_f2xm1(CPUX86State *env) 1075 { 1076 uint8_t old_flags = save_exception_flags(env); 1077 uint64_t sig = extractFloatx80Frac(ST0); 1078 int32_t exp = extractFloatx80Exp(ST0); 1079 bool sign = extractFloatx80Sign(ST0); 1080 1081 if (floatx80_invalid_encoding(ST0)) { 1082 float_raise(float_flag_invalid, &env->fp_status); 1083 ST0 = floatx80_default_nan(&env->fp_status); 1084 } else if (floatx80_is_any_nan(ST0)) { 1085 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1086 float_raise(float_flag_invalid, &env->fp_status); 1087 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1088 } 1089 } else if (exp > 0x3fff || 1090 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1091 /* Out of range for the instruction, treat as invalid. */ 1092 float_raise(float_flag_invalid, &env->fp_status); 1093 ST0 = floatx80_default_nan(&env->fp_status); 1094 } else if (exp == 0x3fff) { 1095 /* Argument 1 or -1, exact result 1 or -0.5. */ 1096 if (sign) { 1097 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1098 } 1099 } else if (exp < 0x3fb0) { 1100 if (!floatx80_is_zero(ST0)) { 1101 /* 1102 * Multiplying the argument by an extra-precision version 1103 * of log(2) is sufficiently precise. Zero arguments are 1104 * returned unchanged. 1105 */ 1106 uint64_t sig0, sig1, sig2; 1107 if (exp == 0) { 1108 normalizeFloatx80Subnormal(sig, &exp, &sig); 1109 } 1110 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1111 &sig2); 1112 /* This result is inexact. */ 1113 sig1 |= 1; 1114 ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1, 1115 &env->fp_status); 1116 } 1117 } else { 1118 floatx80 tmp, y, accum; 1119 bool asign, bsign; 1120 int32_t n, aexp, bexp; 1121 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1122 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1123 signed char save_prec = env->fp_status.floatx80_rounding_precision; 1124 env->fp_status.float_rounding_mode = float_round_nearest_even; 1125 env->fp_status.floatx80_rounding_precision = 80; 1126 1127 /* Find the nearest multiple of 1/32 to the argument. */ 1128 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1129 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1130 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1131 1132 if (floatx80_is_zero(y)) { 1133 /* 1134 * Use the value of 2^t - 1 from the table, to avoid 1135 * needing to special-case zero as a result of 1136 * multiplication below. 1137 */ 1138 ST0 = f2xm1_table[n].t; 1139 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1140 env->fp_status.float_rounding_mode = save_mode; 1141 } else { 1142 /* 1143 * Compute the lower parts of a polynomial expansion for 1144 * (2^y - 1) / y. 1145 */ 1146 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1147 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1148 accum = floatx80_mul(accum, y, &env->fp_status); 1149 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1150 accum = floatx80_mul(accum, y, &env->fp_status); 1151 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1152 accum = floatx80_mul(accum, y, &env->fp_status); 1153 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1154 accum = floatx80_mul(accum, y, &env->fp_status); 1155 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1156 accum = floatx80_mul(accum, y, &env->fp_status); 1157 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1158 accum = floatx80_mul(accum, y, &env->fp_status); 1159 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1160 1161 /* 1162 * The full polynomial expansion is f2xm1_coeff_0 + accum 1163 * (where accum has much lower magnitude, and so, in 1164 * particular, carry out of the addition is not possible). 1165 * (This expansion is only accurate to about 70 bits, not 1166 * 128 bits.) 1167 */ 1168 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1169 asign = extractFloatx80Sign(f2xm1_coeff_0); 1170 shift128RightJamming(extractFloatx80Frac(accum), 0, 1171 aexp - extractFloatx80Exp(accum), 1172 &asig0, &asig1); 1173 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1174 bsig1 = 0; 1175 if (asign == extractFloatx80Sign(accum)) { 1176 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1177 } else { 1178 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1179 } 1180 /* And thus compute an approximation to 2^y - 1. */ 1181 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1182 &asig0, &asig1, &asig2); 1183 aexp += extractFloatx80Exp(y) - 0x3ffe; 1184 asign ^= extractFloatx80Sign(y); 1185 if (n != 32) { 1186 /* 1187 * Multiply this by the precomputed value of 2^t and 1188 * add that of 2^t - 1. 1189 */ 1190 mul128By64To192(asig0, asig1, 1191 extractFloatx80Frac(f2xm1_table[n].exp2), 1192 &asig0, &asig1, &asig2); 1193 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1194 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1195 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1196 bsig1 = 0; 1197 if (bexp < aexp) { 1198 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1199 &bsig0, &bsig1); 1200 } else if (aexp < bexp) { 1201 shift128RightJamming(asig0, asig1, bexp - aexp, 1202 &asig0, &asig1); 1203 aexp = bexp; 1204 } 1205 /* The sign of 2^t - 1 is always that of the result. */ 1206 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1207 if (asign == bsign) { 1208 /* Avoid possible carry out of the addition. */ 1209 shift128RightJamming(asig0, asig1, 1, 1210 &asig0, &asig1); 1211 shift128RightJamming(bsig0, bsig1, 1, 1212 &bsig0, &bsig1); 1213 ++aexp; 1214 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1215 } else { 1216 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1217 asign = bsign; 1218 } 1219 } 1220 env->fp_status.float_rounding_mode = save_mode; 1221 /* This result is inexact. */ 1222 asig1 |= 1; 1223 ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1, 1224 &env->fp_status); 1225 } 1226 1227 env->fp_status.floatx80_rounding_precision = save_prec; 1228 } 1229 merge_exception_flags(env, old_flags); 1230 } 1231 1232 void helper_fptan(CPUX86State *env) 1233 { 1234 double fptemp = floatx80_to_double(env, ST0); 1235 1236 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1237 env->fpus |= 0x400; 1238 } else { 1239 fptemp = tan(fptemp); 1240 ST0 = double_to_floatx80(env, fptemp); 1241 fpush(env); 1242 ST0 = floatx80_one; 1243 env->fpus &= ~0x400; /* C2 <-- 0 */ 1244 /* the above code is for |arg| < 2**52 only */ 1245 } 1246 } 1247 1248 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1249 #define pi_4_exp 0x3ffe 1250 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1251 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1252 #define pi_2_exp 0x3fff 1253 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1254 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1255 #define pi_34_exp 0x4000 1256 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1257 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1258 #define pi_exp 0x4000 1259 #define pi_sig_high 0xc90fdaa22168c234ULL 1260 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1261 1262 /* 1263 * Polynomial coefficients for an approximation to atan(x), with only 1264 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1265 * for some other approximations, no low part is needed for the first 1266 * coefficient here to achieve a sufficiently accurate result, because 1267 * the coefficient in this minimax approximation is very close to 1268 * exactly 1.) 1269 */ 1270 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1271 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1272 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1273 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1274 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1275 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1276 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1277 1278 struct fpatan_data { 1279 /* High and low parts of atan(x). */ 1280 floatx80 atan_high, atan_low; 1281 }; 1282 1283 static const struct fpatan_data fpatan_table[9] = { 1284 { floatx80_zero_init, 1285 floatx80_zero_init }, 1286 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1287 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1288 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1289 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1290 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1291 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1292 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1293 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1294 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1295 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1296 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1297 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1298 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1299 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1300 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1301 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1302 }; 1303 1304 void helper_fpatan(CPUX86State *env) 1305 { 1306 uint8_t old_flags = save_exception_flags(env); 1307 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1308 int32_t arg0_exp = extractFloatx80Exp(ST0); 1309 bool arg0_sign = extractFloatx80Sign(ST0); 1310 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1311 int32_t arg1_exp = extractFloatx80Exp(ST1); 1312 bool arg1_sign = extractFloatx80Sign(ST1); 1313 1314 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1315 float_raise(float_flag_invalid, &env->fp_status); 1316 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1317 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1318 float_raise(float_flag_invalid, &env->fp_status); 1319 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1320 } else if (floatx80_invalid_encoding(ST0) || 1321 floatx80_invalid_encoding(ST1)) { 1322 float_raise(float_flag_invalid, &env->fp_status); 1323 ST1 = floatx80_default_nan(&env->fp_status); 1324 } else if (floatx80_is_any_nan(ST0)) { 1325 ST1 = ST0; 1326 } else if (floatx80_is_any_nan(ST1)) { 1327 /* Pass this NaN through. */ 1328 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1329 /* Pass this zero through. */ 1330 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1331 arg0_exp - arg1_exp >= 80) && 1332 !arg0_sign) { 1333 /* 1334 * Dividing ST1 by ST0 gives the correct result up to 1335 * rounding, and avoids spurious underflow exceptions that 1336 * might result from passing some small values through the 1337 * polynomial approximation, but if a finite nonzero result of 1338 * division is exact, the result of fpatan is still inexact 1339 * (and underflowing where appropriate). 1340 */ 1341 signed char save_prec = env->fp_status.floatx80_rounding_precision; 1342 env->fp_status.floatx80_rounding_precision = 80; 1343 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1344 env->fp_status.floatx80_rounding_precision = save_prec; 1345 if (!floatx80_is_zero(ST1) && 1346 !(get_float_exception_flags(&env->fp_status) & 1347 float_flag_inexact)) { 1348 /* 1349 * The mathematical result is very slightly closer to zero 1350 * than this exact result. Round a value with the 1351 * significand adjusted accordingly to get the correct 1352 * exceptions, and possibly an adjusted result depending 1353 * on the rounding mode. 1354 */ 1355 uint64_t sig = extractFloatx80Frac(ST1); 1356 int32_t exp = extractFloatx80Exp(ST1); 1357 bool sign = extractFloatx80Sign(ST1); 1358 if (exp == 0) { 1359 normalizeFloatx80Subnormal(sig, &exp, &sig); 1360 } 1361 ST1 = normalizeRoundAndPackFloatx80(80, sign, exp, sig - 1, 1362 -1, &env->fp_status); 1363 } 1364 } else { 1365 /* The result is inexact. */ 1366 bool rsign = arg1_sign; 1367 int32_t rexp; 1368 uint64_t rsig0, rsig1; 1369 if (floatx80_is_zero(ST1)) { 1370 /* 1371 * ST0 is negative. The result is pi with the sign of 1372 * ST1. 1373 */ 1374 rexp = pi_exp; 1375 rsig0 = pi_sig_high; 1376 rsig1 = pi_sig_low; 1377 } else if (floatx80_is_infinity(ST1)) { 1378 if (floatx80_is_infinity(ST0)) { 1379 if (arg0_sign) { 1380 rexp = pi_34_exp; 1381 rsig0 = pi_34_sig_high; 1382 rsig1 = pi_34_sig_low; 1383 } else { 1384 rexp = pi_4_exp; 1385 rsig0 = pi_4_sig_high; 1386 rsig1 = pi_4_sig_low; 1387 } 1388 } else { 1389 rexp = pi_2_exp; 1390 rsig0 = pi_2_sig_high; 1391 rsig1 = pi_2_sig_low; 1392 } 1393 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1394 rexp = pi_2_exp; 1395 rsig0 = pi_2_sig_high; 1396 rsig1 = pi_2_sig_low; 1397 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1398 /* ST0 is negative. */ 1399 rexp = pi_exp; 1400 rsig0 = pi_sig_high; 1401 rsig1 = pi_sig_low; 1402 } else { 1403 /* 1404 * ST0 and ST1 are finite, nonzero and with exponents not 1405 * too far apart. 1406 */ 1407 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1408 int32_t azexp, axexp; 1409 bool adj_sub, ysign, zsign; 1410 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1411 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1412 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1413 uint64_t azsig0, azsig1; 1414 uint64_t azsig2, azsig3, axsig0, axsig1; 1415 floatx80 x8; 1416 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1417 signed char save_prec = env->fp_status.floatx80_rounding_precision; 1418 env->fp_status.float_rounding_mode = float_round_nearest_even; 1419 env->fp_status.floatx80_rounding_precision = 80; 1420 1421 if (arg0_exp == 0) { 1422 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1423 } 1424 if (arg1_exp == 0) { 1425 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1426 } 1427 if (arg0_exp > arg1_exp || 1428 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1429 /* Work with abs(ST1) / abs(ST0). */ 1430 num_exp = arg1_exp; 1431 num_sig = arg1_sig; 1432 den_exp = arg0_exp; 1433 den_sig = arg0_sig; 1434 if (arg0_sign) { 1435 /* The result is subtracted from pi. */ 1436 adj_exp = pi_exp; 1437 adj_sig0 = pi_sig_high; 1438 adj_sig1 = pi_sig_low; 1439 adj_sub = true; 1440 } else { 1441 /* The result is used as-is. */ 1442 adj_exp = 0; 1443 adj_sig0 = 0; 1444 adj_sig1 = 0; 1445 adj_sub = false; 1446 } 1447 } else { 1448 /* Work with abs(ST0) / abs(ST1). */ 1449 num_exp = arg0_exp; 1450 num_sig = arg0_sig; 1451 den_exp = arg1_exp; 1452 den_sig = arg1_sig; 1453 /* The result is added to or subtracted from pi/2. */ 1454 adj_exp = pi_2_exp; 1455 adj_sig0 = pi_2_sig_high; 1456 adj_sig1 = pi_2_sig_low; 1457 adj_sub = !arg0_sign; 1458 } 1459 1460 /* 1461 * Compute x = num/den, where 0 < x <= 1 and x is not too 1462 * small. 1463 */ 1464 xexp = num_exp - den_exp + 0x3ffe; 1465 remsig0 = num_sig; 1466 remsig1 = 0; 1467 if (den_sig <= remsig0) { 1468 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1469 ++xexp; 1470 } 1471 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1472 mul64To128(den_sig, xsig0, &msig0, &msig1); 1473 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1474 while ((int64_t) remsig0 < 0) { 1475 --xsig0; 1476 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1477 } 1478 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1479 /* 1480 * No need to correct any estimation error in xsig1; even 1481 * with such error, it is accurate enough. 1482 */ 1483 1484 /* 1485 * Split x as x = t + y, where t = n/8 is the nearest 1486 * multiple of 1/8 to x. 1487 */ 1488 x8 = normalizeRoundAndPackFloatx80(80, false, xexp + 3, xsig0, 1489 xsig1, &env->fp_status); 1490 n = floatx80_to_int32(x8, &env->fp_status); 1491 if (n == 0) { 1492 ysign = false; 1493 yexp = xexp; 1494 ysig0 = xsig0; 1495 ysig1 = xsig1; 1496 texp = 0; 1497 tsig = 0; 1498 } else { 1499 int shift = clz32(n) + 32; 1500 texp = 0x403b - shift; 1501 tsig = n; 1502 tsig <<= shift; 1503 if (texp == xexp) { 1504 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1505 if ((int64_t) ysig0 >= 0) { 1506 ysign = false; 1507 if (ysig0 == 0) { 1508 if (ysig1 == 0) { 1509 yexp = 0; 1510 } else { 1511 shift = clz64(ysig1) + 64; 1512 yexp = xexp - shift; 1513 shift128Left(ysig0, ysig1, shift, 1514 &ysig0, &ysig1); 1515 } 1516 } else { 1517 shift = clz64(ysig0); 1518 yexp = xexp - shift; 1519 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1520 } 1521 } else { 1522 ysign = true; 1523 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1524 if (ysig0 == 0) { 1525 shift = clz64(ysig1) + 64; 1526 } else { 1527 shift = clz64(ysig0); 1528 } 1529 yexp = xexp - shift; 1530 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1531 } 1532 } else { 1533 /* 1534 * t's exponent must be greater than x's because t 1535 * is positive and the nearest multiple of 1/8 to 1536 * x, and if x has a greater exponent, the power 1537 * of 2 with that exponent is also a multiple of 1538 * 1/8. 1539 */ 1540 uint64_t usig0, usig1; 1541 shift128RightJamming(xsig0, xsig1, texp - xexp, 1542 &usig0, &usig1); 1543 ysign = true; 1544 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1545 if (ysig0 == 0) { 1546 shift = clz64(ysig1) + 64; 1547 } else { 1548 shift = clz64(ysig0); 1549 } 1550 yexp = texp - shift; 1551 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1552 } 1553 } 1554 1555 /* 1556 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1557 * arctan(z). 1558 */ 1559 zsign = ysign; 1560 if (texp == 0 || yexp == 0) { 1561 zexp = yexp; 1562 zsig0 = ysig0; 1563 zsig1 = ysig1; 1564 } else { 1565 /* 1566 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1567 */ 1568 int32_t dexp = texp + xexp - 0x3ffe; 1569 uint64_t dsig0, dsig1, dsig2; 1570 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1571 /* 1572 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1573 * bit). Add 1 to produce the denominator 1+tx. 1574 */ 1575 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1576 &dsig0, &dsig1); 1577 dsig0 |= 0x8000000000000000ULL; 1578 zexp = yexp - 1; 1579 remsig0 = ysig0; 1580 remsig1 = ysig1; 1581 remsig2 = 0; 1582 if (dsig0 <= remsig0) { 1583 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1584 ++zexp; 1585 } 1586 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1587 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1588 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1589 &remsig0, &remsig1, &remsig2); 1590 while ((int64_t) remsig0 < 0) { 1591 --zsig0; 1592 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1593 &remsig0, &remsig1, &remsig2); 1594 } 1595 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1596 /* No need to correct any estimation error in zsig1. */ 1597 } 1598 1599 if (zexp == 0) { 1600 azexp = 0; 1601 azsig0 = 0; 1602 azsig1 = 0; 1603 } else { 1604 floatx80 z2, accum; 1605 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1606 /* Compute z^2. */ 1607 mul128To256(zsig0, zsig1, zsig0, zsig1, 1608 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1609 z2 = normalizeRoundAndPackFloatx80(80, false, 1610 zexp + zexp - 0x3ffe, 1611 z2sig0, z2sig1, 1612 &env->fp_status); 1613 1614 /* Compute the lower parts of the polynomial expansion. */ 1615 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1616 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1617 accum = floatx80_mul(accum, z2, &env->fp_status); 1618 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1619 accum = floatx80_mul(accum, z2, &env->fp_status); 1620 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1621 accum = floatx80_mul(accum, z2, &env->fp_status); 1622 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1623 accum = floatx80_mul(accum, z2, &env->fp_status); 1624 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1625 accum = floatx80_mul(accum, z2, &env->fp_status); 1626 1627 /* 1628 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1629 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1630 */ 1631 aexp = extractFloatx80Exp(fpatan_coeff_0); 1632 shift128RightJamming(extractFloatx80Frac(accum), 0, 1633 aexp - extractFloatx80Exp(accum), 1634 &asig0, &asig1); 1635 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1636 &asig0, &asig1); 1637 /* Multiply by z to compute arctan(z). */ 1638 azexp = aexp + zexp - 0x3ffe; 1639 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1640 &azsig2, &azsig3); 1641 } 1642 1643 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1644 if (texp == 0) { 1645 /* z is positive. */ 1646 axexp = azexp; 1647 axsig0 = azsig0; 1648 axsig1 = azsig1; 1649 } else { 1650 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1651 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1652 uint64_t low_sig0 = 1653 extractFloatx80Frac(fpatan_table[n].atan_low); 1654 uint64_t low_sig1 = 0; 1655 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1656 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1657 axsig1 = 0; 1658 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1659 &low_sig0, &low_sig1); 1660 if (low_sign) { 1661 sub128(axsig0, axsig1, low_sig0, low_sig1, 1662 &axsig0, &axsig1); 1663 } else { 1664 add128(axsig0, axsig1, low_sig0, low_sig1, 1665 &axsig0, &axsig1); 1666 } 1667 if (azexp >= axexp) { 1668 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1669 &axsig0, &axsig1); 1670 axexp = azexp + 1; 1671 shift128RightJamming(azsig0, azsig1, 1, 1672 &azsig0, &azsig1); 1673 } else { 1674 shift128RightJamming(axsig0, axsig1, 1, 1675 &axsig0, &axsig1); 1676 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1677 &azsig0, &azsig1); 1678 ++axexp; 1679 } 1680 if (zsign) { 1681 sub128(axsig0, axsig1, azsig0, azsig1, 1682 &axsig0, &axsig1); 1683 } else { 1684 add128(axsig0, axsig1, azsig0, azsig1, 1685 &axsig0, &axsig1); 1686 } 1687 } 1688 1689 if (adj_exp == 0) { 1690 rexp = axexp; 1691 rsig0 = axsig0; 1692 rsig1 = axsig1; 1693 } else { 1694 /* 1695 * Add or subtract arctan(x) (exponent axexp, 1696 * significand axsig0 and axsig1, positive, not 1697 * necessarily normalized) to the number given by 1698 * adj_exp, adj_sig0 and adj_sig1, according to 1699 * adj_sub. 1700 */ 1701 if (adj_exp >= axexp) { 1702 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1703 &axsig0, &axsig1); 1704 rexp = adj_exp + 1; 1705 shift128RightJamming(adj_sig0, adj_sig1, 1, 1706 &adj_sig0, &adj_sig1); 1707 } else { 1708 shift128RightJamming(axsig0, axsig1, 1, 1709 &axsig0, &axsig1); 1710 shift128RightJamming(adj_sig0, adj_sig1, 1711 axexp - adj_exp + 1, 1712 &adj_sig0, &adj_sig1); 1713 rexp = axexp + 1; 1714 } 1715 if (adj_sub) { 1716 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1717 &rsig0, &rsig1); 1718 } else { 1719 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1720 &rsig0, &rsig1); 1721 } 1722 } 1723 1724 env->fp_status.float_rounding_mode = save_mode; 1725 env->fp_status.floatx80_rounding_precision = save_prec; 1726 } 1727 /* This result is inexact. */ 1728 rsig1 |= 1; 1729 ST1 = normalizeRoundAndPackFloatx80(80, rsign, rexp, 1730 rsig0, rsig1, &env->fp_status); 1731 } 1732 1733 fpop(env); 1734 merge_exception_flags(env, old_flags); 1735 } 1736 1737 void helper_fxtract(CPUX86State *env) 1738 { 1739 uint8_t old_flags = save_exception_flags(env); 1740 CPU_LDoubleU temp; 1741 1742 temp.d = ST0; 1743 1744 if (floatx80_is_zero(ST0)) { 1745 /* Easy way to generate -inf and raising division by 0 exception */ 1746 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1747 &env->fp_status); 1748 fpush(env); 1749 ST0 = temp.d; 1750 } else if (floatx80_invalid_encoding(ST0)) { 1751 float_raise(float_flag_invalid, &env->fp_status); 1752 ST0 = floatx80_default_nan(&env->fp_status); 1753 fpush(env); 1754 ST0 = ST1; 1755 } else if (floatx80_is_any_nan(ST0)) { 1756 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1757 float_raise(float_flag_invalid, &env->fp_status); 1758 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1759 } 1760 fpush(env); 1761 ST0 = ST1; 1762 } else if (floatx80_is_infinity(ST0)) { 1763 fpush(env); 1764 ST0 = ST1; 1765 ST1 = floatx80_infinity; 1766 } else { 1767 int expdif; 1768 1769 if (EXPD(temp) == 0) { 1770 int shift = clz64(temp.l.lower); 1771 temp.l.lower <<= shift; 1772 expdif = 1 - EXPBIAS - shift; 1773 float_raise(float_flag_input_denormal, &env->fp_status); 1774 } else { 1775 expdif = EXPD(temp) - EXPBIAS; 1776 } 1777 /* DP exponent bias */ 1778 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1779 fpush(env); 1780 BIASEXPONENT(temp); 1781 ST0 = temp.d; 1782 } 1783 merge_exception_flags(env, old_flags); 1784 } 1785 1786 static void helper_fprem_common(CPUX86State *env, bool mod) 1787 { 1788 uint8_t old_flags = save_exception_flags(env); 1789 uint64_t quotient; 1790 CPU_LDoubleU temp0, temp1; 1791 int exp0, exp1, expdiff; 1792 1793 temp0.d = ST0; 1794 temp1.d = ST1; 1795 exp0 = EXPD(temp0); 1796 exp1 = EXPD(temp1); 1797 1798 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1799 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1800 exp0 == 0x7fff || exp1 == 0x7fff || 1801 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1802 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1803 } else { 1804 if (exp0 == 0) { 1805 exp0 = 1 - clz64(temp0.l.lower); 1806 } 1807 if (exp1 == 0) { 1808 exp1 = 1 - clz64(temp1.l.lower); 1809 } 1810 expdiff = exp0 - exp1; 1811 if (expdiff < 64) { 1812 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1813 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1814 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1815 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1816 } else { 1817 /* 1818 * Partial remainder. This choice of how many bits to 1819 * process at once is specified in AMD instruction set 1820 * manuals, and empirically is followed by Intel 1821 * processors as well; it ensures that the final remainder 1822 * operation in a loop does produce the correct low three 1823 * bits of the quotient. AMD manuals specify that the 1824 * flags other than C2 are cleared, and empirically Intel 1825 * processors clear them as well. 1826 */ 1827 int n = 32 + (expdiff % 32); 1828 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1829 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1830 env->fpus |= 0x400; /* C2 <-- 1 */ 1831 } 1832 } 1833 merge_exception_flags(env, old_flags); 1834 } 1835 1836 void helper_fprem1(CPUX86State *env) 1837 { 1838 helper_fprem_common(env, false); 1839 } 1840 1841 void helper_fprem(CPUX86State *env) 1842 { 1843 helper_fprem_common(env, true); 1844 } 1845 1846 /* 128-bit significand of log2(e). */ 1847 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1848 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1849 1850 /* 1851 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1852 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1853 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1854 * interval [sqrt(2)/2, sqrt(2)]. 1855 */ 1856 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1857 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1858 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1859 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1860 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1861 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1862 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1863 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1864 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1865 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1866 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1867 1868 /* 1869 * Compute an approximation of log2(1+arg), where 1+arg is in the 1870 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1871 * function is called, rounding precision is set to 80 and the 1872 * round-to-nearest mode is in effect. arg must not be exactly zero, 1873 * and must not be so close to zero that underflow might occur. 1874 */ 1875 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1876 uint64_t *sig0, uint64_t *sig1) 1877 { 1878 uint64_t arg0_sig = extractFloatx80Frac(arg); 1879 int32_t arg0_exp = extractFloatx80Exp(arg); 1880 bool arg0_sign = extractFloatx80Sign(arg); 1881 bool asign; 1882 int32_t dexp, texp, aexp; 1883 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1884 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1885 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1886 floatx80 t2, accum; 1887 1888 /* 1889 * Compute an approximation of arg/(2+arg), with extra precision, 1890 * as the argument to a polynomial approximation. The extra 1891 * precision is only needed for the first term of the 1892 * approximation, with subsequent terms being significantly 1893 * smaller; the approximation only uses odd exponents, and the 1894 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1895 */ 1896 if (arg0_sign) { 1897 dexp = 0x3fff; 1898 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1899 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1900 } else { 1901 dexp = 0x4000; 1902 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1903 dsig0 |= 0x8000000000000000ULL; 1904 } 1905 texp = arg0_exp - dexp + 0x3ffe; 1906 rsig0 = arg0_sig; 1907 rsig1 = 0; 1908 rsig2 = 0; 1909 if (dsig0 <= rsig0) { 1910 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1911 ++texp; 1912 } 1913 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1914 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1915 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1916 &rsig0, &rsig1, &rsig2); 1917 while ((int64_t) rsig0 < 0) { 1918 --tsig0; 1919 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1920 &rsig0, &rsig1, &rsig2); 1921 } 1922 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1923 /* 1924 * No need to correct any estimation error in tsig1; even with 1925 * such error, it is accurate enough. Now compute the square of 1926 * that approximation. 1927 */ 1928 mul128To256(tsig0, tsig1, tsig0, tsig1, 1929 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1930 t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe, 1931 t2sig0, t2sig1, &env->fp_status); 1932 1933 /* Compute the lower parts of the polynomial expansion. */ 1934 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1935 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1936 accum = floatx80_mul(accum, t2, &env->fp_status); 1937 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1938 accum = floatx80_mul(accum, t2, &env->fp_status); 1939 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1940 accum = floatx80_mul(accum, t2, &env->fp_status); 1941 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1942 accum = floatx80_mul(accum, t2, &env->fp_status); 1943 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1944 accum = floatx80_mul(accum, t2, &env->fp_status); 1945 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1946 accum = floatx80_mul(accum, t2, &env->fp_status); 1947 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1948 accum = floatx80_mul(accum, t2, &env->fp_status); 1949 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1950 accum = floatx80_mul(accum, t2, &env->fp_status); 1951 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1952 1953 /* 1954 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1955 * accum has much lower magnitude, and so, in particular, carry 1956 * out of the addition is not possible), multiplied by t. (This 1957 * expansion is only accurate to about 70 bits, not 128 bits.) 1958 */ 1959 aexp = extractFloatx80Exp(fyl2x_coeff_0); 1960 asign = extractFloatx80Sign(fyl2x_coeff_0); 1961 shift128RightJamming(extractFloatx80Frac(accum), 0, 1962 aexp - extractFloatx80Exp(accum), 1963 &asig0, &asig1); 1964 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1965 bsig1 = 0; 1966 if (asign == extractFloatx80Sign(accum)) { 1967 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1968 } else { 1969 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1970 } 1971 /* Multiply by t to compute the required result. */ 1972 mul128To256(asig0, asig1, tsig0, tsig1, 1973 &asig0, &asig1, &asig2, &asig3); 1974 aexp += texp - 0x3ffe; 1975 *exp = aexp; 1976 *sig0 = asig0; 1977 *sig1 = asig1; 1978 } 1979 1980 void helper_fyl2xp1(CPUX86State *env) 1981 { 1982 uint8_t old_flags = save_exception_flags(env); 1983 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1984 int32_t arg0_exp = extractFloatx80Exp(ST0); 1985 bool arg0_sign = extractFloatx80Sign(ST0); 1986 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1987 int32_t arg1_exp = extractFloatx80Exp(ST1); 1988 bool arg1_sign = extractFloatx80Sign(ST1); 1989 1990 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1991 float_raise(float_flag_invalid, &env->fp_status); 1992 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1993 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1994 float_raise(float_flag_invalid, &env->fp_status); 1995 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1996 } else if (floatx80_invalid_encoding(ST0) || 1997 floatx80_invalid_encoding(ST1)) { 1998 float_raise(float_flag_invalid, &env->fp_status); 1999 ST1 = floatx80_default_nan(&env->fp_status); 2000 } else if (floatx80_is_any_nan(ST0)) { 2001 ST1 = ST0; 2002 } else if (floatx80_is_any_nan(ST1)) { 2003 /* Pass this NaN through. */ 2004 } else if (arg0_exp > 0x3ffd || 2005 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2006 0x95f619980c4336f7ULL : 2007 0xd413cccfe7799211ULL))) { 2008 /* 2009 * Out of range for the instruction (ST0 must have absolute 2010 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2011 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2012 * to sqrt(2) - 1, which we allow here), treat as invalid. 2013 */ 2014 float_raise(float_flag_invalid, &env->fp_status); 2015 ST1 = floatx80_default_nan(&env->fp_status); 2016 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2017 arg1_exp == 0x7fff) { 2018 /* 2019 * One argument is zero, or multiplying by infinity; correct 2020 * result is exact and can be obtained by multiplying the 2021 * arguments. 2022 */ 2023 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2024 } else if (arg0_exp < 0x3fb0) { 2025 /* 2026 * Multiplying both arguments and an extra-precision version 2027 * of log2(e) is sufficiently precise. 2028 */ 2029 uint64_t sig0, sig1, sig2; 2030 int32_t exp; 2031 if (arg0_exp == 0) { 2032 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2033 } 2034 if (arg1_exp == 0) { 2035 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2036 } 2037 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2038 &sig0, &sig1, &sig2); 2039 exp = arg0_exp + 1; 2040 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2041 exp += arg1_exp - 0x3ffe; 2042 /* This result is inexact. */ 2043 sig1 |= 1; 2044 ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp, 2045 sig0, sig1, &env->fp_status); 2046 } else { 2047 int32_t aexp; 2048 uint64_t asig0, asig1, asig2; 2049 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2050 signed char save_prec = env->fp_status.floatx80_rounding_precision; 2051 env->fp_status.float_rounding_mode = float_round_nearest_even; 2052 env->fp_status.floatx80_rounding_precision = 80; 2053 2054 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2055 /* 2056 * Multiply by the second argument to compute the required 2057 * result. 2058 */ 2059 if (arg1_exp == 0) { 2060 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2061 } 2062 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2063 aexp += arg1_exp - 0x3ffe; 2064 /* This result is inexact. */ 2065 asig1 |= 1; 2066 env->fp_status.float_rounding_mode = save_mode; 2067 ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp, 2068 asig0, asig1, &env->fp_status); 2069 env->fp_status.floatx80_rounding_precision = save_prec; 2070 } 2071 fpop(env); 2072 merge_exception_flags(env, old_flags); 2073 } 2074 2075 void helper_fyl2x(CPUX86State *env) 2076 { 2077 uint8_t old_flags = save_exception_flags(env); 2078 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2079 int32_t arg0_exp = extractFloatx80Exp(ST0); 2080 bool arg0_sign = extractFloatx80Sign(ST0); 2081 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2082 int32_t arg1_exp = extractFloatx80Exp(ST1); 2083 bool arg1_sign = extractFloatx80Sign(ST1); 2084 2085 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2086 float_raise(float_flag_invalid, &env->fp_status); 2087 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2088 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2089 float_raise(float_flag_invalid, &env->fp_status); 2090 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2091 } else if (floatx80_invalid_encoding(ST0) || 2092 floatx80_invalid_encoding(ST1)) { 2093 float_raise(float_flag_invalid, &env->fp_status); 2094 ST1 = floatx80_default_nan(&env->fp_status); 2095 } else if (floatx80_is_any_nan(ST0)) { 2096 ST1 = ST0; 2097 } else if (floatx80_is_any_nan(ST1)) { 2098 /* Pass this NaN through. */ 2099 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2100 float_raise(float_flag_invalid, &env->fp_status); 2101 ST1 = floatx80_default_nan(&env->fp_status); 2102 } else if (floatx80_is_infinity(ST1)) { 2103 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2104 &env->fp_status); 2105 switch (cmp) { 2106 case float_relation_less: 2107 ST1 = floatx80_chs(ST1); 2108 break; 2109 case float_relation_greater: 2110 /* Result is infinity of the same sign as ST1. */ 2111 break; 2112 default: 2113 float_raise(float_flag_invalid, &env->fp_status); 2114 ST1 = floatx80_default_nan(&env->fp_status); 2115 break; 2116 } 2117 } else if (floatx80_is_infinity(ST0)) { 2118 if (floatx80_is_zero(ST1)) { 2119 float_raise(float_flag_invalid, &env->fp_status); 2120 ST1 = floatx80_default_nan(&env->fp_status); 2121 } else if (arg1_sign) { 2122 ST1 = floatx80_chs(ST0); 2123 } else { 2124 ST1 = ST0; 2125 } 2126 } else if (floatx80_is_zero(ST0)) { 2127 if (floatx80_is_zero(ST1)) { 2128 float_raise(float_flag_invalid, &env->fp_status); 2129 ST1 = floatx80_default_nan(&env->fp_status); 2130 } else { 2131 /* Result is infinity with opposite sign to ST1. */ 2132 float_raise(float_flag_divbyzero, &env->fp_status); 2133 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2134 0x8000000000000000ULL); 2135 } 2136 } else if (floatx80_is_zero(ST1)) { 2137 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2138 ST1 = floatx80_chs(ST1); 2139 } 2140 /* Otherwise, ST1 is already the correct result. */ 2141 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2142 if (arg1_sign) { 2143 ST1 = floatx80_chs(floatx80_zero); 2144 } else { 2145 ST1 = floatx80_zero; 2146 } 2147 } else { 2148 int32_t int_exp; 2149 floatx80 arg0_m1; 2150 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2151 signed char save_prec = env->fp_status.floatx80_rounding_precision; 2152 env->fp_status.float_rounding_mode = float_round_nearest_even; 2153 env->fp_status.floatx80_rounding_precision = 80; 2154 2155 if (arg0_exp == 0) { 2156 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2157 } 2158 if (arg1_exp == 0) { 2159 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2160 } 2161 int_exp = arg0_exp - 0x3fff; 2162 if (arg0_sig > 0xb504f333f9de6484ULL) { 2163 ++int_exp; 2164 } 2165 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2166 &env->fp_status), 2167 floatx80_one, &env->fp_status); 2168 if (floatx80_is_zero(arg0_m1)) { 2169 /* Exact power of 2; multiply by ST1. */ 2170 env->fp_status.float_rounding_mode = save_mode; 2171 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2172 ST1, &env->fp_status); 2173 } else { 2174 bool asign = extractFloatx80Sign(arg0_m1); 2175 int32_t aexp; 2176 uint64_t asig0, asig1, asig2; 2177 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2178 if (int_exp != 0) { 2179 bool isign = (int_exp < 0); 2180 int32_t iexp; 2181 uint64_t isig; 2182 int shift; 2183 int_exp = isign ? -int_exp : int_exp; 2184 shift = clz32(int_exp) + 32; 2185 isig = int_exp; 2186 isig <<= shift; 2187 iexp = 0x403e - shift; 2188 shift128RightJamming(asig0, asig1, iexp - aexp, 2189 &asig0, &asig1); 2190 if (asign == isign) { 2191 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2192 } else { 2193 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2194 } 2195 aexp = iexp; 2196 asign = isign; 2197 } 2198 /* 2199 * Multiply by the second argument to compute the required 2200 * result. 2201 */ 2202 if (arg1_exp == 0) { 2203 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2204 } 2205 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2206 aexp += arg1_exp - 0x3ffe; 2207 /* This result is inexact. */ 2208 asig1 |= 1; 2209 env->fp_status.float_rounding_mode = save_mode; 2210 ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp, 2211 asig0, asig1, &env->fp_status); 2212 } 2213 2214 env->fp_status.floatx80_rounding_precision = save_prec; 2215 } 2216 fpop(env); 2217 merge_exception_flags(env, old_flags); 2218 } 2219 2220 void helper_fsqrt(CPUX86State *env) 2221 { 2222 uint8_t old_flags = save_exception_flags(env); 2223 if (floatx80_is_neg(ST0)) { 2224 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2225 env->fpus |= 0x400; 2226 } 2227 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2228 merge_exception_flags(env, old_flags); 2229 } 2230 2231 void helper_fsincos(CPUX86State *env) 2232 { 2233 double fptemp = floatx80_to_double(env, ST0); 2234 2235 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2236 env->fpus |= 0x400; 2237 } else { 2238 ST0 = double_to_floatx80(env, sin(fptemp)); 2239 fpush(env); 2240 ST0 = double_to_floatx80(env, cos(fptemp)); 2241 env->fpus &= ~0x400; /* C2 <-- 0 */ 2242 /* the above code is for |arg| < 2**63 only */ 2243 } 2244 } 2245 2246 void helper_frndint(CPUX86State *env) 2247 { 2248 uint8_t old_flags = save_exception_flags(env); 2249 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2250 merge_exception_flags(env, old_flags); 2251 } 2252 2253 void helper_fscale(CPUX86State *env) 2254 { 2255 uint8_t old_flags = save_exception_flags(env); 2256 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2257 float_raise(float_flag_invalid, &env->fp_status); 2258 ST0 = floatx80_default_nan(&env->fp_status); 2259 } else if (floatx80_is_any_nan(ST1)) { 2260 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2261 float_raise(float_flag_invalid, &env->fp_status); 2262 } 2263 ST0 = ST1; 2264 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2265 float_raise(float_flag_invalid, &env->fp_status); 2266 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2267 } 2268 } else if (floatx80_is_infinity(ST1) && 2269 !floatx80_invalid_encoding(ST0) && 2270 !floatx80_is_any_nan(ST0)) { 2271 if (floatx80_is_neg(ST1)) { 2272 if (floatx80_is_infinity(ST0)) { 2273 float_raise(float_flag_invalid, &env->fp_status); 2274 ST0 = floatx80_default_nan(&env->fp_status); 2275 } else { 2276 ST0 = (floatx80_is_neg(ST0) ? 2277 floatx80_chs(floatx80_zero) : 2278 floatx80_zero); 2279 } 2280 } else { 2281 if (floatx80_is_zero(ST0)) { 2282 float_raise(float_flag_invalid, &env->fp_status); 2283 ST0 = floatx80_default_nan(&env->fp_status); 2284 } else { 2285 ST0 = (floatx80_is_neg(ST0) ? 2286 floatx80_chs(floatx80_infinity) : 2287 floatx80_infinity); 2288 } 2289 } 2290 } else { 2291 int n; 2292 signed char save = env->fp_status.floatx80_rounding_precision; 2293 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2294 set_float_exception_flags(0, &env->fp_status); 2295 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2296 set_float_exception_flags(save_flags, &env->fp_status); 2297 env->fp_status.floatx80_rounding_precision = 80; 2298 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2299 env->fp_status.floatx80_rounding_precision = save; 2300 } 2301 merge_exception_flags(env, old_flags); 2302 } 2303 2304 void helper_fsin(CPUX86State *env) 2305 { 2306 double fptemp = floatx80_to_double(env, ST0); 2307 2308 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2309 env->fpus |= 0x400; 2310 } else { 2311 ST0 = double_to_floatx80(env, sin(fptemp)); 2312 env->fpus &= ~0x400; /* C2 <-- 0 */ 2313 /* the above code is for |arg| < 2**53 only */ 2314 } 2315 } 2316 2317 void helper_fcos(CPUX86State *env) 2318 { 2319 double fptemp = floatx80_to_double(env, ST0); 2320 2321 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2322 env->fpus |= 0x400; 2323 } else { 2324 ST0 = double_to_floatx80(env, cos(fptemp)); 2325 env->fpus &= ~0x400; /* C2 <-- 0 */ 2326 /* the above code is for |arg| < 2**63 only */ 2327 } 2328 } 2329 2330 void helper_fxam_ST0(CPUX86State *env) 2331 { 2332 CPU_LDoubleU temp; 2333 int expdif; 2334 2335 temp.d = ST0; 2336 2337 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2338 if (SIGND(temp)) { 2339 env->fpus |= 0x200; /* C1 <-- 1 */ 2340 } 2341 2342 if (env->fptags[env->fpstt]) { 2343 env->fpus |= 0x4100; /* Empty */ 2344 return; 2345 } 2346 2347 expdif = EXPD(temp); 2348 if (expdif == MAXEXPD) { 2349 if (MANTD(temp) == 0x8000000000000000ULL) { 2350 env->fpus |= 0x500; /* Infinity */ 2351 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2352 env->fpus |= 0x100; /* NaN */ 2353 } 2354 } else if (expdif == 0) { 2355 if (MANTD(temp) == 0) { 2356 env->fpus |= 0x4000; /* Zero */ 2357 } else { 2358 env->fpus |= 0x4400; /* Denormal */ 2359 } 2360 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2361 env->fpus |= 0x400; 2362 } 2363 } 2364 2365 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 2366 uintptr_t retaddr) 2367 { 2368 int fpus, fptag, exp, i; 2369 uint64_t mant; 2370 CPU_LDoubleU tmp; 2371 2372 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2373 fptag = 0; 2374 for (i = 7; i >= 0; i--) { 2375 fptag <<= 2; 2376 if (env->fptags[i]) { 2377 fptag |= 3; 2378 } else { 2379 tmp.d = env->fpregs[i].d; 2380 exp = EXPD(tmp); 2381 mant = MANTD(tmp); 2382 if (exp == 0 && mant == 0) { 2383 /* zero */ 2384 fptag |= 1; 2385 } else if (exp == 0 || exp == MAXEXPD 2386 || (mant & (1LL << 63)) == 0) { 2387 /* NaNs, infinity, denormal */ 2388 fptag |= 2; 2389 } 2390 } 2391 } 2392 if (data32) { 2393 /* 32 bit */ 2394 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 2395 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 2396 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 2397 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */ 2398 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */ 2399 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */ 2400 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */ 2401 } else { 2402 /* 16 bit */ 2403 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 2404 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 2405 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 2406 cpu_stw_data_ra(env, ptr + 6, 0, retaddr); 2407 cpu_stw_data_ra(env, ptr + 8, 0, retaddr); 2408 cpu_stw_data_ra(env, ptr + 10, 0, retaddr); 2409 cpu_stw_data_ra(env, ptr + 12, 0, retaddr); 2410 } 2411 } 2412 2413 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2414 { 2415 do_fstenv(env, ptr, data32, GETPC()); 2416 } 2417 2418 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2419 { 2420 env->fpstt = (fpus >> 11) & 7; 2421 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2422 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2423 #if !defined(CONFIG_USER_ONLY) 2424 if (!(env->fpus & FPUS_SE)) { 2425 /* 2426 * Here the processor deasserts FERR#; in response, the chipset deasserts 2427 * IGNNE#. 2428 */ 2429 cpu_clear_ignne(); 2430 } 2431 #endif 2432 } 2433 2434 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 2435 uintptr_t retaddr) 2436 { 2437 int i, fpus, fptag; 2438 2439 if (data32) { 2440 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2441 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2442 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 2443 } else { 2444 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2445 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 2446 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2447 } 2448 cpu_set_fpus(env, fpus); 2449 for (i = 0; i < 8; i++) { 2450 env->fptags[i] = ((fptag & 3) == 3); 2451 fptag >>= 2; 2452 } 2453 } 2454 2455 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2456 { 2457 do_fldenv(env, ptr, data32, GETPC()); 2458 } 2459 2460 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, 2461 uintptr_t retaddr) 2462 { 2463 floatx80 tmp; 2464 int i; 2465 2466 do_fstenv(env, ptr, data32, retaddr); 2467 2468 ptr += (14 << data32); 2469 for (i = 0; i < 8; i++) { 2470 tmp = ST(i); 2471 do_fstt(env, tmp, ptr, retaddr); 2472 ptr += 10; 2473 } 2474 2475 /* fninit */ 2476 env->fpus = 0; 2477 env->fpstt = 0; 2478 cpu_set_fpuc(env, 0x37f); 2479 env->fptags[0] = 1; 2480 env->fptags[1] = 1; 2481 env->fptags[2] = 1; 2482 env->fptags[3] = 1; 2483 env->fptags[4] = 1; 2484 env->fptags[5] = 1; 2485 env->fptags[6] = 1; 2486 env->fptags[7] = 1; 2487 } 2488 2489 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2490 { 2491 do_fsave(env, ptr, data32, GETPC()); 2492 } 2493 2494 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, 2495 uintptr_t retaddr) 2496 { 2497 floatx80 tmp; 2498 int i; 2499 2500 do_fldenv(env, ptr, data32, retaddr); 2501 ptr += (14 << data32); 2502 2503 for (i = 0; i < 8; i++) { 2504 tmp = do_fldt(env, ptr, retaddr); 2505 ST(i) = tmp; 2506 ptr += 10; 2507 } 2508 } 2509 2510 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2511 { 2512 do_frstor(env, ptr, data32, GETPC()); 2513 } 2514 2515 #if defined(CONFIG_USER_ONLY) 2516 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 2517 { 2518 do_fsave(env, ptr, data32, 0); 2519 } 2520 2521 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 2522 { 2523 do_frstor(env, ptr, data32, 0); 2524 } 2525 #endif 2526 2527 #define XO(X) offsetof(X86XSaveArea, X) 2528 2529 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2530 { 2531 int fpus, fptag, i; 2532 target_ulong addr; 2533 2534 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2535 fptag = 0; 2536 for (i = 0; i < 8; i++) { 2537 fptag |= (env->fptags[i] << i); 2538 } 2539 2540 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 2541 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 2542 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 2543 2544 /* In 32-bit mode this is eip, sel, dp, sel. 2545 In 64-bit mode this is rip, rdp. 2546 But in either case we don't write actual data, just zeros. */ 2547 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 2548 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 2549 2550 addr = ptr + XO(legacy.fpregs); 2551 for (i = 0; i < 8; i++) { 2552 floatx80 tmp = ST(i); 2553 do_fstt(env, tmp, addr, ra); 2554 addr += 16; 2555 } 2556 } 2557 2558 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2559 { 2560 update_mxcsr_from_sse_status(env); 2561 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 2562 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 2563 } 2564 2565 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2566 { 2567 int i, nb_xmm_regs; 2568 target_ulong addr; 2569 2570 if (env->hflags & HF_CS64_MASK) { 2571 nb_xmm_regs = 16; 2572 } else { 2573 nb_xmm_regs = 8; 2574 } 2575 2576 addr = ptr + XO(legacy.xmm_regs); 2577 for (i = 0; i < nb_xmm_regs; i++) { 2578 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 2579 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 2580 addr += 16; 2581 } 2582 } 2583 2584 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2585 { 2586 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2587 int i; 2588 2589 for (i = 0; i < 4; i++, addr += 16) { 2590 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 2591 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 2592 } 2593 } 2594 2595 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2596 { 2597 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2598 env->bndcs_regs.cfgu, ra); 2599 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2600 env->bndcs_regs.sts, ra); 2601 } 2602 2603 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2604 { 2605 cpu_stq_data_ra(env, ptr, env->pkru, ra); 2606 } 2607 2608 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2609 { 2610 /* The operand must be 16 byte aligned */ 2611 if (ptr & 0xf) { 2612 raise_exception_ra(env, EXCP0D_GPF, ra); 2613 } 2614 2615 do_xsave_fpu(env, ptr, ra); 2616 2617 if (env->cr[4] & CR4_OSFXSR_MASK) { 2618 do_xsave_mxcsr(env, ptr, ra); 2619 /* Fast FXSAVE leaves out the XMM registers */ 2620 if (!(env->efer & MSR_EFER_FFXSR) 2621 || (env->hflags & HF_CPL_MASK) 2622 || !(env->hflags & HF_LMA_MASK)) { 2623 do_xsave_sse(env, ptr, ra); 2624 } 2625 } 2626 } 2627 2628 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2629 { 2630 do_fxsave(env, ptr, GETPC()); 2631 } 2632 2633 static uint64_t get_xinuse(CPUX86State *env) 2634 { 2635 uint64_t inuse = -1; 2636 2637 /* For the most part, we don't track XINUSE. We could calculate it 2638 here for all components, but it's probably less work to simply 2639 indicate in use. That said, the state of BNDREGS is important 2640 enough to track in HFLAGS, so we might as well use that here. */ 2641 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2642 inuse &= ~XSTATE_BNDREGS_MASK; 2643 } 2644 return inuse; 2645 } 2646 2647 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2648 uint64_t inuse, uint64_t opt, uintptr_t ra) 2649 { 2650 uint64_t old_bv, new_bv; 2651 2652 /* The OS must have enabled XSAVE. */ 2653 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2654 raise_exception_ra(env, EXCP06_ILLOP, ra); 2655 } 2656 2657 /* The operand must be 64 byte aligned. */ 2658 if (ptr & 63) { 2659 raise_exception_ra(env, EXCP0D_GPF, ra); 2660 } 2661 2662 /* Never save anything not enabled by XCR0. */ 2663 rfbm &= env->xcr0; 2664 opt &= rfbm; 2665 2666 if (opt & XSTATE_FP_MASK) { 2667 do_xsave_fpu(env, ptr, ra); 2668 } 2669 if (rfbm & XSTATE_SSE_MASK) { 2670 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2671 do_xsave_mxcsr(env, ptr, ra); 2672 } 2673 if (opt & XSTATE_SSE_MASK) { 2674 do_xsave_sse(env, ptr, ra); 2675 } 2676 if (opt & XSTATE_BNDREGS_MASK) { 2677 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 2678 } 2679 if (opt & XSTATE_BNDCSR_MASK) { 2680 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 2681 } 2682 if (opt & XSTATE_PKRU_MASK) { 2683 do_xsave_pkru(env, ptr + XO(pkru_state), ra); 2684 } 2685 2686 /* Update the XSTATE_BV field. */ 2687 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2688 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2689 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 2690 } 2691 2692 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2693 { 2694 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 2695 } 2696 2697 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2698 { 2699 uint64_t inuse = get_xinuse(env); 2700 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2701 } 2702 2703 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2704 { 2705 int i, fpuc, fpus, fptag; 2706 target_ulong addr; 2707 2708 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 2709 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 2710 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 2711 cpu_set_fpuc(env, fpuc); 2712 cpu_set_fpus(env, fpus); 2713 fptag ^= 0xff; 2714 for (i = 0; i < 8; i++) { 2715 env->fptags[i] = ((fptag >> i) & 1); 2716 } 2717 2718 addr = ptr + XO(legacy.fpregs); 2719 for (i = 0; i < 8; i++) { 2720 floatx80 tmp = do_fldt(env, addr, ra); 2721 ST(i) = tmp; 2722 addr += 16; 2723 } 2724 } 2725 2726 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2727 { 2728 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 2729 } 2730 2731 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2732 { 2733 int i, nb_xmm_regs; 2734 target_ulong addr; 2735 2736 if (env->hflags & HF_CS64_MASK) { 2737 nb_xmm_regs = 16; 2738 } else { 2739 nb_xmm_regs = 8; 2740 } 2741 2742 addr = ptr + XO(legacy.xmm_regs); 2743 for (i = 0; i < nb_xmm_regs; i++) { 2744 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 2745 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 2746 addr += 16; 2747 } 2748 } 2749 2750 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2751 { 2752 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2753 int i; 2754 2755 for (i = 0; i < 4; i++, addr += 16) { 2756 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 2757 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 2758 } 2759 } 2760 2761 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2762 { 2763 /* FIXME: Extend highest implemented bit of linear address. */ 2764 env->bndcs_regs.cfgu 2765 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 2766 env->bndcs_regs.sts 2767 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 2768 } 2769 2770 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2771 { 2772 env->pkru = cpu_ldq_data_ra(env, ptr, ra); 2773 } 2774 2775 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2776 { 2777 /* The operand must be 16 byte aligned */ 2778 if (ptr & 0xf) { 2779 raise_exception_ra(env, EXCP0D_GPF, ra); 2780 } 2781 2782 do_xrstor_fpu(env, ptr, ra); 2783 2784 if (env->cr[4] & CR4_OSFXSR_MASK) { 2785 do_xrstor_mxcsr(env, ptr, ra); 2786 /* Fast FXRSTOR leaves out the XMM registers */ 2787 if (!(env->efer & MSR_EFER_FFXSR) 2788 || (env->hflags & HF_CPL_MASK) 2789 || !(env->hflags & HF_LMA_MASK)) { 2790 do_xrstor_sse(env, ptr, ra); 2791 } 2792 } 2793 } 2794 2795 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2796 { 2797 do_fxrstor(env, ptr, GETPC()); 2798 } 2799 2800 #if defined(CONFIG_USER_ONLY) 2801 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 2802 { 2803 do_fxsave(env, ptr, 0); 2804 } 2805 2806 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 2807 { 2808 do_fxrstor(env, ptr, 0); 2809 } 2810 #endif 2811 2812 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2813 { 2814 uintptr_t ra = GETPC(); 2815 uint64_t xstate_bv, xcomp_bv, reserve0; 2816 2817 rfbm &= env->xcr0; 2818 2819 /* The OS must have enabled XSAVE. */ 2820 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2821 raise_exception_ra(env, EXCP06_ILLOP, ra); 2822 } 2823 2824 /* The operand must be 64 byte aligned. */ 2825 if (ptr & 63) { 2826 raise_exception_ra(env, EXCP0D_GPF, ra); 2827 } 2828 2829 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2830 2831 if ((int64_t)xstate_bv < 0) { 2832 /* FIXME: Compact form. */ 2833 raise_exception_ra(env, EXCP0D_GPF, ra); 2834 } 2835 2836 /* Standard form. */ 2837 2838 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2839 if (xstate_bv & ~env->xcr0) { 2840 raise_exception_ra(env, EXCP0D_GPF, ra); 2841 } 2842 2843 /* The XCOMP_BV field must be zero. Note that, as of the April 2016 2844 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 2845 describes only XCOMP_BV, but the description of the standard form 2846 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 2847 includes the next 64-bit field. */ 2848 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 2849 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 2850 if (xcomp_bv || reserve0) { 2851 raise_exception_ra(env, EXCP0D_GPF, ra); 2852 } 2853 2854 if (rfbm & XSTATE_FP_MASK) { 2855 if (xstate_bv & XSTATE_FP_MASK) { 2856 do_xrstor_fpu(env, ptr, ra); 2857 } else { 2858 helper_fninit(env); 2859 memset(env->fpregs, 0, sizeof(env->fpregs)); 2860 } 2861 } 2862 if (rfbm & XSTATE_SSE_MASK) { 2863 /* Note that the standard form of XRSTOR loads MXCSR from memory 2864 whether or not the XSTATE_BV bit is set. */ 2865 do_xrstor_mxcsr(env, ptr, ra); 2866 if (xstate_bv & XSTATE_SSE_MASK) { 2867 do_xrstor_sse(env, ptr, ra); 2868 } else { 2869 /* ??? When AVX is implemented, we may have to be more 2870 selective in the clearing. */ 2871 memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); 2872 } 2873 } 2874 if (rfbm & XSTATE_BNDREGS_MASK) { 2875 if (xstate_bv & XSTATE_BNDREGS_MASK) { 2876 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 2877 env->hflags |= HF_MPX_IU_MASK; 2878 } else { 2879 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2880 env->hflags &= ~HF_MPX_IU_MASK; 2881 } 2882 } 2883 if (rfbm & XSTATE_BNDCSR_MASK) { 2884 if (xstate_bv & XSTATE_BNDCSR_MASK) { 2885 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 2886 } else { 2887 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2888 } 2889 cpu_sync_bndcs_hflags(env); 2890 } 2891 if (rfbm & XSTATE_PKRU_MASK) { 2892 uint64_t old_pkru = env->pkru; 2893 if (xstate_bv & XSTATE_PKRU_MASK) { 2894 do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 2895 } else { 2896 env->pkru = 0; 2897 } 2898 if (env->pkru != old_pkru) { 2899 CPUState *cs = env_cpu(env); 2900 tlb_flush(cs); 2901 } 2902 } 2903 } 2904 2905 #undef XO 2906 2907 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 2908 { 2909 /* The OS must have enabled XSAVE. */ 2910 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2911 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2912 } 2913 2914 switch (ecx) { 2915 case 0: 2916 return env->xcr0; 2917 case 1: 2918 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 2919 return env->xcr0 & get_xinuse(env); 2920 } 2921 break; 2922 } 2923 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2924 } 2925 2926 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 2927 { 2928 uint32_t dummy, ena_lo, ena_hi; 2929 uint64_t ena; 2930 2931 /* The OS must have enabled XSAVE. */ 2932 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2933 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2934 } 2935 2936 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 2937 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 2938 goto do_gpf; 2939 } 2940 2941 /* Disallow enabling unimplemented features. */ 2942 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 2943 ena = ((uint64_t)ena_hi << 32) | ena_lo; 2944 if (mask & ~ena) { 2945 goto do_gpf; 2946 } 2947 2948 /* Disallow enabling only half of MPX. */ 2949 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 2950 & XSTATE_BNDCSR_MASK) { 2951 goto do_gpf; 2952 } 2953 2954 env->xcr0 = mask; 2955 cpu_sync_bndcs_hflags(env); 2956 return; 2957 2958 do_gpf: 2959 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2960 } 2961 2962 /* MMX/SSE */ 2963 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 2964 2965 #define SSE_DAZ 0x0040 2966 #define SSE_RC_MASK 0x6000 2967 #define SSE_RC_NEAR 0x0000 2968 #define SSE_RC_DOWN 0x2000 2969 #define SSE_RC_UP 0x4000 2970 #define SSE_RC_CHOP 0x6000 2971 #define SSE_FZ 0x8000 2972 2973 void update_mxcsr_status(CPUX86State *env) 2974 { 2975 uint32_t mxcsr = env->mxcsr; 2976 int rnd_type; 2977 2978 /* set rounding mode */ 2979 switch (mxcsr & SSE_RC_MASK) { 2980 default: 2981 case SSE_RC_NEAR: 2982 rnd_type = float_round_nearest_even; 2983 break; 2984 case SSE_RC_DOWN: 2985 rnd_type = float_round_down; 2986 break; 2987 case SSE_RC_UP: 2988 rnd_type = float_round_up; 2989 break; 2990 case SSE_RC_CHOP: 2991 rnd_type = float_round_to_zero; 2992 break; 2993 } 2994 set_float_rounding_mode(rnd_type, &env->sse_status); 2995 2996 /* Set exception flags. */ 2997 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 2998 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 2999 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 3000 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 3001 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 3002 &env->sse_status); 3003 3004 /* set denormals are zero */ 3005 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 3006 3007 /* set flush to zero */ 3008 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 3009 } 3010 3011 void update_mxcsr_from_sse_status(CPUX86State *env) 3012 { 3013 uint8_t flags = get_float_exception_flags(&env->sse_status); 3014 /* 3015 * The MXCSR denormal flag has opposite semantics to 3016 * float_flag_input_denormal (the softfloat code sets that flag 3017 * only when flushing input denormals to zero, but SSE sets it 3018 * only when not flushing them to zero), so is not converted 3019 * here. 3020 */ 3021 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3022 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3023 (flags & float_flag_overflow ? FPUS_OE : 0) | 3024 (flags & float_flag_underflow ? FPUS_UE : 0) | 3025 (flags & float_flag_inexact ? FPUS_PE : 0) | 3026 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3027 0)); 3028 } 3029 3030 void helper_update_mxcsr(CPUX86State *env) 3031 { 3032 update_mxcsr_from_sse_status(env); 3033 } 3034 3035 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3036 { 3037 cpu_set_mxcsr(env, val); 3038 } 3039 3040 void helper_enter_mmx(CPUX86State *env) 3041 { 3042 env->fpstt = 0; 3043 *(uint32_t *)(env->fptags) = 0; 3044 *(uint32_t *)(env->fptags + 4) = 0; 3045 } 3046 3047 void helper_emms(CPUX86State *env) 3048 { 3049 /* set to empty state */ 3050 *(uint32_t *)(env->fptags) = 0x01010101; 3051 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3052 } 3053 3054 /* XXX: suppress */ 3055 void helper_movq(CPUX86State *env, void *d, void *s) 3056 { 3057 *(uint64_t *)d = *(uint64_t *)s; 3058 } 3059 3060 #define SHIFT 0 3061 #include "ops_sse.h" 3062 3063 #define SHIFT 1 3064 #include "ops_sse.h" 3065