1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "exec/helper-proto.h" 24 #include "qemu/host-utils.h" 25 #include "exec/exec-all.h" 26 #include "exec/cpu_ldst.h" 27 #include "fpu/softfloat.h" 28 #include "fpu/softfloat-macros.h" 29 #include "helper-tcg.h" 30 31 #ifdef CONFIG_SOFTMMU 32 #include "hw/irq.h" 33 #endif 34 35 /* float macros */ 36 #define FT0 (env->ft0) 37 #define ST0 (env->fpregs[env->fpstt].d) 38 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 39 #define ST1 ST(1) 40 41 #define FPU_RC_MASK 0xc00 42 #define FPU_RC_NEAR 0x000 43 #define FPU_RC_DOWN 0x400 44 #define FPU_RC_UP 0x800 45 #define FPU_RC_CHOP 0xc00 46 47 #define MAXTAN 9223372036854775808.0 48 49 /* the following deal with x86 long double-precision numbers */ 50 #define MAXEXPD 0x7fff 51 #define EXPBIAS 16383 52 #define EXPD(fp) (fp.l.upper & 0x7fff) 53 #define SIGND(fp) ((fp.l.upper) & 0x8000) 54 #define MANTD(fp) (fp.l.lower) 55 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 56 57 #define FPUS_IE (1 << 0) 58 #define FPUS_DE (1 << 1) 59 #define FPUS_ZE (1 << 2) 60 #define FPUS_OE (1 << 3) 61 #define FPUS_UE (1 << 4) 62 #define FPUS_PE (1 << 5) 63 #define FPUS_SF (1 << 6) 64 #define FPUS_SE (1 << 7) 65 #define FPUS_B (1 << 15) 66 67 #define FPUC_EM 0x3f 68 69 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 70 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 71 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 72 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 73 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 74 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 75 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 76 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 77 78 #if !defined(CONFIG_USER_ONLY) 79 static qemu_irq ferr_irq; 80 81 void x86_register_ferr_irq(qemu_irq irq) 82 { 83 ferr_irq = irq; 84 } 85 86 static void cpu_clear_ignne(void) 87 { 88 CPUX86State *env = &X86_CPU(first_cpu)->env; 89 env->hflags2 &= ~HF2_IGNNE_MASK; 90 } 91 92 void cpu_set_ignne(void) 93 { 94 CPUX86State *env = &X86_CPU(first_cpu)->env; 95 env->hflags2 |= HF2_IGNNE_MASK; 96 /* 97 * We get here in response to a write to port F0h. The chipset should 98 * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is 99 * cleared, because FERR# and FP_IRQ are two separate pins on real 100 * hardware. However, we don't model FERR# as a qemu_irq, so we just 101 * do directly what the chipset would do, i.e. deassert FP_IRQ. 102 */ 103 qemu_irq_lower(ferr_irq); 104 } 105 #endif 106 107 108 static inline void fpush(CPUX86State *env) 109 { 110 env->fpstt = (env->fpstt - 1) & 7; 111 env->fptags[env->fpstt] = 0; /* validate stack entry */ 112 } 113 114 static inline void fpop(CPUX86State *env) 115 { 116 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 117 env->fpstt = (env->fpstt + 1) & 7; 118 } 119 120 static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr, 121 uintptr_t retaddr) 122 { 123 CPU_LDoubleU temp; 124 125 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 126 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 127 return temp.d; 128 } 129 130 static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 131 uintptr_t retaddr) 132 { 133 CPU_LDoubleU temp; 134 135 temp.d = f; 136 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 137 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 138 } 139 140 /* x87 FPU helpers */ 141 142 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 143 { 144 union { 145 float64 f64; 146 double d; 147 } u; 148 149 u.f64 = floatx80_to_float64(a, &env->fp_status); 150 return u.d; 151 } 152 153 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 154 { 155 union { 156 float64 f64; 157 double d; 158 } u; 159 160 u.d = a; 161 return float64_to_floatx80(u.f64, &env->fp_status); 162 } 163 164 static void fpu_set_exception(CPUX86State *env, int mask) 165 { 166 env->fpus |= mask; 167 if (env->fpus & (~env->fpuc & FPUC_EM)) { 168 env->fpus |= FPUS_SE | FPUS_B; 169 } 170 } 171 172 static inline uint8_t save_exception_flags(CPUX86State *env) 173 { 174 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 175 set_float_exception_flags(0, &env->fp_status); 176 return old_flags; 177 } 178 179 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 180 { 181 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 182 float_raise(old_flags, &env->fp_status); 183 fpu_set_exception(env, 184 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 185 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 186 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 187 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 188 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 189 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 190 } 191 192 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 193 { 194 uint8_t old_flags = save_exception_flags(env); 195 floatx80 ret = floatx80_div(a, b, &env->fp_status); 196 merge_exception_flags(env, old_flags); 197 return ret; 198 } 199 200 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 201 { 202 if (env->cr[0] & CR0_NE_MASK) { 203 raise_exception_ra(env, EXCP10_COPR, retaddr); 204 } 205 #if !defined(CONFIG_USER_ONLY) 206 else if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) { 207 qemu_irq_raise(ferr_irq); 208 } 209 #endif 210 } 211 212 void helper_flds_FT0(CPUX86State *env, uint32_t val) 213 { 214 uint8_t old_flags = save_exception_flags(env); 215 union { 216 float32 f; 217 uint32_t i; 218 } u; 219 220 u.i = val; 221 FT0 = float32_to_floatx80(u.f, &env->fp_status); 222 merge_exception_flags(env, old_flags); 223 } 224 225 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 226 { 227 uint8_t old_flags = save_exception_flags(env); 228 union { 229 float64 f; 230 uint64_t i; 231 } u; 232 233 u.i = val; 234 FT0 = float64_to_floatx80(u.f, &env->fp_status); 235 merge_exception_flags(env, old_flags); 236 } 237 238 void helper_fildl_FT0(CPUX86State *env, int32_t val) 239 { 240 FT0 = int32_to_floatx80(val, &env->fp_status); 241 } 242 243 void helper_flds_ST0(CPUX86State *env, uint32_t val) 244 { 245 uint8_t old_flags = save_exception_flags(env); 246 int new_fpstt; 247 union { 248 float32 f; 249 uint32_t i; 250 } u; 251 252 new_fpstt = (env->fpstt - 1) & 7; 253 u.i = val; 254 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 255 env->fpstt = new_fpstt; 256 env->fptags[new_fpstt] = 0; /* validate stack entry */ 257 merge_exception_flags(env, old_flags); 258 } 259 260 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 261 { 262 uint8_t old_flags = save_exception_flags(env); 263 int new_fpstt; 264 union { 265 float64 f; 266 uint64_t i; 267 } u; 268 269 new_fpstt = (env->fpstt - 1) & 7; 270 u.i = val; 271 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 272 env->fpstt = new_fpstt; 273 env->fptags[new_fpstt] = 0; /* validate stack entry */ 274 merge_exception_flags(env, old_flags); 275 } 276 277 void helper_fildl_ST0(CPUX86State *env, int32_t val) 278 { 279 int new_fpstt; 280 281 new_fpstt = (env->fpstt - 1) & 7; 282 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 283 env->fpstt = new_fpstt; 284 env->fptags[new_fpstt] = 0; /* validate stack entry */ 285 } 286 287 void helper_fildll_ST0(CPUX86State *env, int64_t val) 288 { 289 int new_fpstt; 290 291 new_fpstt = (env->fpstt - 1) & 7; 292 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 293 env->fpstt = new_fpstt; 294 env->fptags[new_fpstt] = 0; /* validate stack entry */ 295 } 296 297 uint32_t helper_fsts_ST0(CPUX86State *env) 298 { 299 uint8_t old_flags = save_exception_flags(env); 300 union { 301 float32 f; 302 uint32_t i; 303 } u; 304 305 u.f = floatx80_to_float32(ST0, &env->fp_status); 306 merge_exception_flags(env, old_flags); 307 return u.i; 308 } 309 310 uint64_t helper_fstl_ST0(CPUX86State *env) 311 { 312 uint8_t old_flags = save_exception_flags(env); 313 union { 314 float64 f; 315 uint64_t i; 316 } u; 317 318 u.f = floatx80_to_float64(ST0, &env->fp_status); 319 merge_exception_flags(env, old_flags); 320 return u.i; 321 } 322 323 int32_t helper_fist_ST0(CPUX86State *env) 324 { 325 uint8_t old_flags = save_exception_flags(env); 326 int32_t val; 327 328 val = floatx80_to_int32(ST0, &env->fp_status); 329 if (val != (int16_t)val) { 330 set_float_exception_flags(float_flag_invalid, &env->fp_status); 331 val = -32768; 332 } 333 merge_exception_flags(env, old_flags); 334 return val; 335 } 336 337 int32_t helper_fistl_ST0(CPUX86State *env) 338 { 339 uint8_t old_flags = save_exception_flags(env); 340 int32_t val; 341 342 val = floatx80_to_int32(ST0, &env->fp_status); 343 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 344 val = 0x80000000; 345 } 346 merge_exception_flags(env, old_flags); 347 return val; 348 } 349 350 int64_t helper_fistll_ST0(CPUX86State *env) 351 { 352 uint8_t old_flags = save_exception_flags(env); 353 int64_t val; 354 355 val = floatx80_to_int64(ST0, &env->fp_status); 356 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 357 val = 0x8000000000000000ULL; 358 } 359 merge_exception_flags(env, old_flags); 360 return val; 361 } 362 363 int32_t helper_fistt_ST0(CPUX86State *env) 364 { 365 uint8_t old_flags = save_exception_flags(env); 366 int32_t val; 367 368 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 369 if (val != (int16_t)val) { 370 set_float_exception_flags(float_flag_invalid, &env->fp_status); 371 val = -32768; 372 } 373 merge_exception_flags(env, old_flags); 374 return val; 375 } 376 377 int32_t helper_fisttl_ST0(CPUX86State *env) 378 { 379 uint8_t old_flags = save_exception_flags(env); 380 int32_t val; 381 382 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 383 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 384 val = 0x80000000; 385 } 386 merge_exception_flags(env, old_flags); 387 return val; 388 } 389 390 int64_t helper_fisttll_ST0(CPUX86State *env) 391 { 392 uint8_t old_flags = save_exception_flags(env); 393 int64_t val; 394 395 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 396 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 397 val = 0x8000000000000000ULL; 398 } 399 merge_exception_flags(env, old_flags); 400 return val; 401 } 402 403 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 404 { 405 int new_fpstt; 406 407 new_fpstt = (env->fpstt - 1) & 7; 408 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC()); 409 env->fpstt = new_fpstt; 410 env->fptags[new_fpstt] = 0; /* validate stack entry */ 411 } 412 413 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 414 { 415 helper_fstt(env, ST0, ptr, GETPC()); 416 } 417 418 void helper_fpush(CPUX86State *env) 419 { 420 fpush(env); 421 } 422 423 void helper_fpop(CPUX86State *env) 424 { 425 fpop(env); 426 } 427 428 void helper_fdecstp(CPUX86State *env) 429 { 430 env->fpstt = (env->fpstt - 1) & 7; 431 env->fpus &= ~0x4700; 432 } 433 434 void helper_fincstp(CPUX86State *env) 435 { 436 env->fpstt = (env->fpstt + 1) & 7; 437 env->fpus &= ~0x4700; 438 } 439 440 /* FPU move */ 441 442 void helper_ffree_STN(CPUX86State *env, int st_index) 443 { 444 env->fptags[(env->fpstt + st_index) & 7] = 1; 445 } 446 447 void helper_fmov_ST0_FT0(CPUX86State *env) 448 { 449 ST0 = FT0; 450 } 451 452 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 453 { 454 FT0 = ST(st_index); 455 } 456 457 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 458 { 459 ST0 = ST(st_index); 460 } 461 462 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 463 { 464 ST(st_index) = ST0; 465 } 466 467 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 468 { 469 floatx80 tmp; 470 471 tmp = ST(st_index); 472 ST(st_index) = ST0; 473 ST0 = tmp; 474 } 475 476 /* FPU operations */ 477 478 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 479 480 void helper_fcom_ST0_FT0(CPUX86State *env) 481 { 482 uint8_t old_flags = save_exception_flags(env); 483 FloatRelation ret; 484 485 ret = floatx80_compare(ST0, FT0, &env->fp_status); 486 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 487 merge_exception_flags(env, old_flags); 488 } 489 490 void helper_fucom_ST0_FT0(CPUX86State *env) 491 { 492 uint8_t old_flags = save_exception_flags(env); 493 FloatRelation ret; 494 495 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 496 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 497 merge_exception_flags(env, old_flags); 498 } 499 500 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 501 502 void helper_fcomi_ST0_FT0(CPUX86State *env) 503 { 504 uint8_t old_flags = save_exception_flags(env); 505 int eflags; 506 FloatRelation ret; 507 508 ret = floatx80_compare(ST0, FT0, &env->fp_status); 509 eflags = cpu_cc_compute_all(env, CC_OP); 510 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 511 CC_SRC = eflags; 512 merge_exception_flags(env, old_flags); 513 } 514 515 void helper_fucomi_ST0_FT0(CPUX86State *env) 516 { 517 uint8_t old_flags = save_exception_flags(env); 518 int eflags; 519 FloatRelation ret; 520 521 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 522 eflags = cpu_cc_compute_all(env, CC_OP); 523 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 524 CC_SRC = eflags; 525 merge_exception_flags(env, old_flags); 526 } 527 528 void helper_fadd_ST0_FT0(CPUX86State *env) 529 { 530 uint8_t old_flags = save_exception_flags(env); 531 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 532 merge_exception_flags(env, old_flags); 533 } 534 535 void helper_fmul_ST0_FT0(CPUX86State *env) 536 { 537 uint8_t old_flags = save_exception_flags(env); 538 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 539 merge_exception_flags(env, old_flags); 540 } 541 542 void helper_fsub_ST0_FT0(CPUX86State *env) 543 { 544 uint8_t old_flags = save_exception_flags(env); 545 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 546 merge_exception_flags(env, old_flags); 547 } 548 549 void helper_fsubr_ST0_FT0(CPUX86State *env) 550 { 551 uint8_t old_flags = save_exception_flags(env); 552 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 553 merge_exception_flags(env, old_flags); 554 } 555 556 void helper_fdiv_ST0_FT0(CPUX86State *env) 557 { 558 ST0 = helper_fdiv(env, ST0, FT0); 559 } 560 561 void helper_fdivr_ST0_FT0(CPUX86State *env) 562 { 563 ST0 = helper_fdiv(env, FT0, ST0); 564 } 565 566 /* fp operations between STN and ST0 */ 567 568 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 569 { 570 uint8_t old_flags = save_exception_flags(env); 571 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 572 merge_exception_flags(env, old_flags); 573 } 574 575 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 576 { 577 uint8_t old_flags = save_exception_flags(env); 578 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 579 merge_exception_flags(env, old_flags); 580 } 581 582 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 583 { 584 uint8_t old_flags = save_exception_flags(env); 585 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 586 merge_exception_flags(env, old_flags); 587 } 588 589 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 590 { 591 uint8_t old_flags = save_exception_flags(env); 592 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 593 merge_exception_flags(env, old_flags); 594 } 595 596 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 597 { 598 floatx80 *p; 599 600 p = &ST(st_index); 601 *p = helper_fdiv(env, *p, ST0); 602 } 603 604 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 605 { 606 floatx80 *p; 607 608 p = &ST(st_index); 609 *p = helper_fdiv(env, ST0, *p); 610 } 611 612 /* misc FPU operations */ 613 void helper_fchs_ST0(CPUX86State *env) 614 { 615 ST0 = floatx80_chs(ST0); 616 } 617 618 void helper_fabs_ST0(CPUX86State *env) 619 { 620 ST0 = floatx80_abs(ST0); 621 } 622 623 void helper_fld1_ST0(CPUX86State *env) 624 { 625 ST0 = floatx80_one; 626 } 627 628 void helper_fldl2t_ST0(CPUX86State *env) 629 { 630 switch (env->fpuc & FPU_RC_MASK) { 631 case FPU_RC_UP: 632 ST0 = floatx80_l2t_u; 633 break; 634 default: 635 ST0 = floatx80_l2t; 636 break; 637 } 638 } 639 640 void helper_fldl2e_ST0(CPUX86State *env) 641 { 642 switch (env->fpuc & FPU_RC_MASK) { 643 case FPU_RC_DOWN: 644 case FPU_RC_CHOP: 645 ST0 = floatx80_l2e_d; 646 break; 647 default: 648 ST0 = floatx80_l2e; 649 break; 650 } 651 } 652 653 void helper_fldpi_ST0(CPUX86State *env) 654 { 655 switch (env->fpuc & FPU_RC_MASK) { 656 case FPU_RC_DOWN: 657 case FPU_RC_CHOP: 658 ST0 = floatx80_pi_d; 659 break; 660 default: 661 ST0 = floatx80_pi; 662 break; 663 } 664 } 665 666 void helper_fldlg2_ST0(CPUX86State *env) 667 { 668 switch (env->fpuc & FPU_RC_MASK) { 669 case FPU_RC_DOWN: 670 case FPU_RC_CHOP: 671 ST0 = floatx80_lg2_d; 672 break; 673 default: 674 ST0 = floatx80_lg2; 675 break; 676 } 677 } 678 679 void helper_fldln2_ST0(CPUX86State *env) 680 { 681 switch (env->fpuc & FPU_RC_MASK) { 682 case FPU_RC_DOWN: 683 case FPU_RC_CHOP: 684 ST0 = floatx80_ln2_d; 685 break; 686 default: 687 ST0 = floatx80_ln2; 688 break; 689 } 690 } 691 692 void helper_fldz_ST0(CPUX86State *env) 693 { 694 ST0 = floatx80_zero; 695 } 696 697 void helper_fldz_FT0(CPUX86State *env) 698 { 699 FT0 = floatx80_zero; 700 } 701 702 uint32_t helper_fnstsw(CPUX86State *env) 703 { 704 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 705 } 706 707 uint32_t helper_fnstcw(CPUX86State *env) 708 { 709 return env->fpuc; 710 } 711 712 void update_fp_status(CPUX86State *env) 713 { 714 int rnd_type; 715 716 /* set rounding mode */ 717 switch (env->fpuc & FPU_RC_MASK) { 718 default: 719 case FPU_RC_NEAR: 720 rnd_type = float_round_nearest_even; 721 break; 722 case FPU_RC_DOWN: 723 rnd_type = float_round_down; 724 break; 725 case FPU_RC_UP: 726 rnd_type = float_round_up; 727 break; 728 case FPU_RC_CHOP: 729 rnd_type = float_round_to_zero; 730 break; 731 } 732 set_float_rounding_mode(rnd_type, &env->fp_status); 733 switch ((env->fpuc >> 8) & 3) { 734 case 0: 735 rnd_type = 32; 736 break; 737 case 2: 738 rnd_type = 64; 739 break; 740 case 3: 741 default: 742 rnd_type = 80; 743 break; 744 } 745 set_floatx80_rounding_precision(rnd_type, &env->fp_status); 746 } 747 748 void helper_fldcw(CPUX86State *env, uint32_t val) 749 { 750 cpu_set_fpuc(env, val); 751 } 752 753 void helper_fclex(CPUX86State *env) 754 { 755 env->fpus &= 0x7f00; 756 } 757 758 void helper_fwait(CPUX86State *env) 759 { 760 if (env->fpus & FPUS_SE) { 761 fpu_raise_exception(env, GETPC()); 762 } 763 } 764 765 void helper_fninit(CPUX86State *env) 766 { 767 env->fpus = 0; 768 env->fpstt = 0; 769 cpu_set_fpuc(env, 0x37f); 770 env->fptags[0] = 1; 771 env->fptags[1] = 1; 772 env->fptags[2] = 1; 773 env->fptags[3] = 1; 774 env->fptags[4] = 1; 775 env->fptags[5] = 1; 776 env->fptags[6] = 1; 777 env->fptags[7] = 1; 778 } 779 780 /* BCD ops */ 781 782 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 783 { 784 floatx80 tmp; 785 uint64_t val; 786 unsigned int v; 787 int i; 788 789 val = 0; 790 for (i = 8; i >= 0; i--) { 791 v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 792 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 793 } 794 tmp = int64_to_floatx80(val, &env->fp_status); 795 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 796 tmp = floatx80_chs(tmp); 797 } 798 fpush(env); 799 ST0 = tmp; 800 } 801 802 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 803 { 804 uint8_t old_flags = save_exception_flags(env); 805 int v; 806 target_ulong mem_ref, mem_end; 807 int64_t val; 808 CPU_LDoubleU temp; 809 810 temp.d = ST0; 811 812 val = floatx80_to_int64(ST0, &env->fp_status); 813 mem_ref = ptr; 814 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 815 set_float_exception_flags(float_flag_invalid, &env->fp_status); 816 while (mem_ref < ptr + 7) { 817 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 818 } 819 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 820 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 821 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 822 merge_exception_flags(env, old_flags); 823 return; 824 } 825 mem_end = mem_ref + 9; 826 if (SIGND(temp)) { 827 cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 828 val = -val; 829 } else { 830 cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 831 } 832 while (mem_ref < mem_end) { 833 if (val == 0) { 834 break; 835 } 836 v = val % 100; 837 val = val / 100; 838 v = ((v / 10) << 4) | (v % 10); 839 cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 840 } 841 while (mem_ref < mem_end) { 842 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 843 } 844 merge_exception_flags(env, old_flags); 845 } 846 847 /* 128-bit significand of log(2). */ 848 #define ln2_sig_high 0xb17217f7d1cf79abULL 849 #define ln2_sig_low 0xc9e3b39803f2f6afULL 850 851 /* 852 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 853 * the interval [-1/64, 1/64]. 854 */ 855 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 856 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 857 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 858 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 859 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 860 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 861 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 862 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 863 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 864 865 struct f2xm1_data { 866 /* 867 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 868 * are very close to exact floatx80 values. 869 */ 870 floatx80 t; 871 /* The value of 2^t. */ 872 floatx80 exp2; 873 /* The value of 2^t - 1. */ 874 floatx80 exp2m1; 875 }; 876 877 static const struct f2xm1_data f2xm1_table[65] = { 878 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 879 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 880 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 881 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 882 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 883 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 884 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 885 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 886 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 887 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 888 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 889 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 890 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 891 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 892 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 893 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 894 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 895 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 896 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 897 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 898 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 899 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 900 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 901 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 902 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 903 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 904 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 905 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 906 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 907 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 908 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 909 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 910 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 911 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 912 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 913 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 914 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 915 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 916 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 917 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 918 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 919 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 920 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 921 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 922 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 923 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 924 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 925 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 926 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 927 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 928 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 929 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 930 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 931 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 932 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 933 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 934 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 935 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 936 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 937 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 938 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 939 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 940 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 941 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 942 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 943 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 944 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 945 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 946 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 947 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 948 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 949 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 950 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 951 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 952 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 953 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 954 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 955 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 956 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 957 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 958 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 959 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 960 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 961 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 962 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 963 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 964 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 965 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 966 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 967 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 968 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 969 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 970 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 971 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 972 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 973 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 974 { floatx80_zero_init, 975 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 976 floatx80_zero_init }, 977 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 978 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 979 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 980 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 981 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 982 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 983 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 984 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 985 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 986 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 987 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 988 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 989 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 990 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 991 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 992 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 993 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 994 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 995 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 996 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 997 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 998 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 999 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 1000 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 1001 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 1002 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 1003 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 1004 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 1005 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 1006 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 1007 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 1008 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 1009 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 1010 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 1011 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 1012 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 1013 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1014 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1015 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1016 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1017 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1018 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1019 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1020 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1021 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1022 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1023 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1024 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1025 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1026 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1027 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1028 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1029 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1030 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1031 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1032 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1033 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1034 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1035 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1036 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1037 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1038 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1039 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1040 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1041 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1042 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1043 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1044 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1045 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1046 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1047 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1048 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1049 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1050 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1051 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1052 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1053 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1054 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1055 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1056 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1057 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1058 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1059 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1060 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1061 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1062 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1063 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1064 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1065 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1066 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1067 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1068 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1069 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1070 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1071 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1072 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1073 }; 1074 1075 void helper_f2xm1(CPUX86State *env) 1076 { 1077 uint8_t old_flags = save_exception_flags(env); 1078 uint64_t sig = extractFloatx80Frac(ST0); 1079 int32_t exp = extractFloatx80Exp(ST0); 1080 bool sign = extractFloatx80Sign(ST0); 1081 1082 if (floatx80_invalid_encoding(ST0)) { 1083 float_raise(float_flag_invalid, &env->fp_status); 1084 ST0 = floatx80_default_nan(&env->fp_status); 1085 } else if (floatx80_is_any_nan(ST0)) { 1086 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1087 float_raise(float_flag_invalid, &env->fp_status); 1088 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1089 } 1090 } else if (exp > 0x3fff || 1091 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1092 /* Out of range for the instruction, treat as invalid. */ 1093 float_raise(float_flag_invalid, &env->fp_status); 1094 ST0 = floatx80_default_nan(&env->fp_status); 1095 } else if (exp == 0x3fff) { 1096 /* Argument 1 or -1, exact result 1 or -0.5. */ 1097 if (sign) { 1098 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1099 } 1100 } else if (exp < 0x3fb0) { 1101 if (!floatx80_is_zero(ST0)) { 1102 /* 1103 * Multiplying the argument by an extra-precision version 1104 * of log(2) is sufficiently precise. Zero arguments are 1105 * returned unchanged. 1106 */ 1107 uint64_t sig0, sig1, sig2; 1108 if (exp == 0) { 1109 normalizeFloatx80Subnormal(sig, &exp, &sig); 1110 } 1111 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1112 &sig2); 1113 /* This result is inexact. */ 1114 sig1 |= 1; 1115 ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1, 1116 &env->fp_status); 1117 } 1118 } else { 1119 floatx80 tmp, y, accum; 1120 bool asign, bsign; 1121 int32_t n, aexp, bexp; 1122 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1123 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1124 signed char save_prec = env->fp_status.floatx80_rounding_precision; 1125 env->fp_status.float_rounding_mode = float_round_nearest_even; 1126 env->fp_status.floatx80_rounding_precision = 80; 1127 1128 /* Find the nearest multiple of 1/32 to the argument. */ 1129 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1130 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1131 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1132 1133 if (floatx80_is_zero(y)) { 1134 /* 1135 * Use the value of 2^t - 1 from the table, to avoid 1136 * needing to special-case zero as a result of 1137 * multiplication below. 1138 */ 1139 ST0 = f2xm1_table[n].t; 1140 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1141 env->fp_status.float_rounding_mode = save_mode; 1142 } else { 1143 /* 1144 * Compute the lower parts of a polynomial expansion for 1145 * (2^y - 1) / y. 1146 */ 1147 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1148 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1149 accum = floatx80_mul(accum, y, &env->fp_status); 1150 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1151 accum = floatx80_mul(accum, y, &env->fp_status); 1152 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1153 accum = floatx80_mul(accum, y, &env->fp_status); 1154 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1155 accum = floatx80_mul(accum, y, &env->fp_status); 1156 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1157 accum = floatx80_mul(accum, y, &env->fp_status); 1158 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1159 accum = floatx80_mul(accum, y, &env->fp_status); 1160 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1161 1162 /* 1163 * The full polynomial expansion is f2xm1_coeff_0 + accum 1164 * (where accum has much lower magnitude, and so, in 1165 * particular, carry out of the addition is not possible). 1166 * (This expansion is only accurate to about 70 bits, not 1167 * 128 bits.) 1168 */ 1169 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1170 asign = extractFloatx80Sign(f2xm1_coeff_0); 1171 shift128RightJamming(extractFloatx80Frac(accum), 0, 1172 aexp - extractFloatx80Exp(accum), 1173 &asig0, &asig1); 1174 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1175 bsig1 = 0; 1176 if (asign == extractFloatx80Sign(accum)) { 1177 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1178 } else { 1179 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1180 } 1181 /* And thus compute an approximation to 2^y - 1. */ 1182 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1183 &asig0, &asig1, &asig2); 1184 aexp += extractFloatx80Exp(y) - 0x3ffe; 1185 asign ^= extractFloatx80Sign(y); 1186 if (n != 32) { 1187 /* 1188 * Multiply this by the precomputed value of 2^t and 1189 * add that of 2^t - 1. 1190 */ 1191 mul128By64To192(asig0, asig1, 1192 extractFloatx80Frac(f2xm1_table[n].exp2), 1193 &asig0, &asig1, &asig2); 1194 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1195 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1196 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1197 bsig1 = 0; 1198 if (bexp < aexp) { 1199 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1200 &bsig0, &bsig1); 1201 } else if (aexp < bexp) { 1202 shift128RightJamming(asig0, asig1, bexp - aexp, 1203 &asig0, &asig1); 1204 aexp = bexp; 1205 } 1206 /* The sign of 2^t - 1 is always that of the result. */ 1207 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1208 if (asign == bsign) { 1209 /* Avoid possible carry out of the addition. */ 1210 shift128RightJamming(asig0, asig1, 1, 1211 &asig0, &asig1); 1212 shift128RightJamming(bsig0, bsig1, 1, 1213 &bsig0, &bsig1); 1214 ++aexp; 1215 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1216 } else { 1217 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1218 asign = bsign; 1219 } 1220 } 1221 env->fp_status.float_rounding_mode = save_mode; 1222 /* This result is inexact. */ 1223 asig1 |= 1; 1224 ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1, 1225 &env->fp_status); 1226 } 1227 1228 env->fp_status.floatx80_rounding_precision = save_prec; 1229 } 1230 merge_exception_flags(env, old_flags); 1231 } 1232 1233 void helper_fptan(CPUX86State *env) 1234 { 1235 double fptemp = floatx80_to_double(env, ST0); 1236 1237 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1238 env->fpus |= 0x400; 1239 } else { 1240 fptemp = tan(fptemp); 1241 ST0 = double_to_floatx80(env, fptemp); 1242 fpush(env); 1243 ST0 = floatx80_one; 1244 env->fpus &= ~0x400; /* C2 <-- 0 */ 1245 /* the above code is for |arg| < 2**52 only */ 1246 } 1247 } 1248 1249 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1250 #define pi_4_exp 0x3ffe 1251 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1252 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1253 #define pi_2_exp 0x3fff 1254 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1255 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1256 #define pi_34_exp 0x4000 1257 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1258 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1259 #define pi_exp 0x4000 1260 #define pi_sig_high 0xc90fdaa22168c234ULL 1261 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1262 1263 /* 1264 * Polynomial coefficients for an approximation to atan(x), with only 1265 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1266 * for some other approximations, no low part is needed for the first 1267 * coefficient here to achieve a sufficiently accurate result, because 1268 * the coefficient in this minimax approximation is very close to 1269 * exactly 1.) 1270 */ 1271 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1272 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1273 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1274 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1275 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1276 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1277 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1278 1279 struct fpatan_data { 1280 /* High and low parts of atan(x). */ 1281 floatx80 atan_high, atan_low; 1282 }; 1283 1284 static const struct fpatan_data fpatan_table[9] = { 1285 { floatx80_zero_init, 1286 floatx80_zero_init }, 1287 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1288 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1289 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1290 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1291 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1292 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1293 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1294 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1295 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1296 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1297 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1298 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1299 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1300 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1301 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1302 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1303 }; 1304 1305 void helper_fpatan(CPUX86State *env) 1306 { 1307 uint8_t old_flags = save_exception_flags(env); 1308 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1309 int32_t arg0_exp = extractFloatx80Exp(ST0); 1310 bool arg0_sign = extractFloatx80Sign(ST0); 1311 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1312 int32_t arg1_exp = extractFloatx80Exp(ST1); 1313 bool arg1_sign = extractFloatx80Sign(ST1); 1314 1315 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1316 float_raise(float_flag_invalid, &env->fp_status); 1317 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1318 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1319 float_raise(float_flag_invalid, &env->fp_status); 1320 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1321 } else if (floatx80_invalid_encoding(ST0) || 1322 floatx80_invalid_encoding(ST1)) { 1323 float_raise(float_flag_invalid, &env->fp_status); 1324 ST1 = floatx80_default_nan(&env->fp_status); 1325 } else if (floatx80_is_any_nan(ST0)) { 1326 ST1 = ST0; 1327 } else if (floatx80_is_any_nan(ST1)) { 1328 /* Pass this NaN through. */ 1329 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1330 /* Pass this zero through. */ 1331 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1332 arg0_exp - arg1_exp >= 80) && 1333 !arg0_sign) { 1334 /* 1335 * Dividing ST1 by ST0 gives the correct result up to 1336 * rounding, and avoids spurious underflow exceptions that 1337 * might result from passing some small values through the 1338 * polynomial approximation, but if a finite nonzero result of 1339 * division is exact, the result of fpatan is still inexact 1340 * (and underflowing where appropriate). 1341 */ 1342 signed char save_prec = env->fp_status.floatx80_rounding_precision; 1343 env->fp_status.floatx80_rounding_precision = 80; 1344 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1345 env->fp_status.floatx80_rounding_precision = save_prec; 1346 if (!floatx80_is_zero(ST1) && 1347 !(get_float_exception_flags(&env->fp_status) & 1348 float_flag_inexact)) { 1349 /* 1350 * The mathematical result is very slightly closer to zero 1351 * than this exact result. Round a value with the 1352 * significand adjusted accordingly to get the correct 1353 * exceptions, and possibly an adjusted result depending 1354 * on the rounding mode. 1355 */ 1356 uint64_t sig = extractFloatx80Frac(ST1); 1357 int32_t exp = extractFloatx80Exp(ST1); 1358 bool sign = extractFloatx80Sign(ST1); 1359 if (exp == 0) { 1360 normalizeFloatx80Subnormal(sig, &exp, &sig); 1361 } 1362 ST1 = normalizeRoundAndPackFloatx80(80, sign, exp, sig - 1, 1363 -1, &env->fp_status); 1364 } 1365 } else { 1366 /* The result is inexact. */ 1367 bool rsign = arg1_sign; 1368 int32_t rexp; 1369 uint64_t rsig0, rsig1; 1370 if (floatx80_is_zero(ST1)) { 1371 /* 1372 * ST0 is negative. The result is pi with the sign of 1373 * ST1. 1374 */ 1375 rexp = pi_exp; 1376 rsig0 = pi_sig_high; 1377 rsig1 = pi_sig_low; 1378 } else if (floatx80_is_infinity(ST1)) { 1379 if (floatx80_is_infinity(ST0)) { 1380 if (arg0_sign) { 1381 rexp = pi_34_exp; 1382 rsig0 = pi_34_sig_high; 1383 rsig1 = pi_34_sig_low; 1384 } else { 1385 rexp = pi_4_exp; 1386 rsig0 = pi_4_sig_high; 1387 rsig1 = pi_4_sig_low; 1388 } 1389 } else { 1390 rexp = pi_2_exp; 1391 rsig0 = pi_2_sig_high; 1392 rsig1 = pi_2_sig_low; 1393 } 1394 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1395 rexp = pi_2_exp; 1396 rsig0 = pi_2_sig_high; 1397 rsig1 = pi_2_sig_low; 1398 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1399 /* ST0 is negative. */ 1400 rexp = pi_exp; 1401 rsig0 = pi_sig_high; 1402 rsig1 = pi_sig_low; 1403 } else { 1404 /* 1405 * ST0 and ST1 are finite, nonzero and with exponents not 1406 * too far apart. 1407 */ 1408 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1409 int32_t azexp, axexp; 1410 bool adj_sub, ysign, zsign; 1411 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1412 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1413 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1414 uint64_t azsig0, azsig1; 1415 uint64_t azsig2, azsig3, axsig0, axsig1; 1416 floatx80 x8; 1417 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1418 signed char save_prec = env->fp_status.floatx80_rounding_precision; 1419 env->fp_status.float_rounding_mode = float_round_nearest_even; 1420 env->fp_status.floatx80_rounding_precision = 80; 1421 1422 if (arg0_exp == 0) { 1423 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1424 } 1425 if (arg1_exp == 0) { 1426 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1427 } 1428 if (arg0_exp > arg1_exp || 1429 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1430 /* Work with abs(ST1) / abs(ST0). */ 1431 num_exp = arg1_exp; 1432 num_sig = arg1_sig; 1433 den_exp = arg0_exp; 1434 den_sig = arg0_sig; 1435 if (arg0_sign) { 1436 /* The result is subtracted from pi. */ 1437 adj_exp = pi_exp; 1438 adj_sig0 = pi_sig_high; 1439 adj_sig1 = pi_sig_low; 1440 adj_sub = true; 1441 } else { 1442 /* The result is used as-is. */ 1443 adj_exp = 0; 1444 adj_sig0 = 0; 1445 adj_sig1 = 0; 1446 adj_sub = false; 1447 } 1448 } else { 1449 /* Work with abs(ST0) / abs(ST1). */ 1450 num_exp = arg0_exp; 1451 num_sig = arg0_sig; 1452 den_exp = arg1_exp; 1453 den_sig = arg1_sig; 1454 /* The result is added to or subtracted from pi/2. */ 1455 adj_exp = pi_2_exp; 1456 adj_sig0 = pi_2_sig_high; 1457 adj_sig1 = pi_2_sig_low; 1458 adj_sub = !arg0_sign; 1459 } 1460 1461 /* 1462 * Compute x = num/den, where 0 < x <= 1 and x is not too 1463 * small. 1464 */ 1465 xexp = num_exp - den_exp + 0x3ffe; 1466 remsig0 = num_sig; 1467 remsig1 = 0; 1468 if (den_sig <= remsig0) { 1469 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1470 ++xexp; 1471 } 1472 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1473 mul64To128(den_sig, xsig0, &msig0, &msig1); 1474 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1475 while ((int64_t) remsig0 < 0) { 1476 --xsig0; 1477 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1478 } 1479 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1480 /* 1481 * No need to correct any estimation error in xsig1; even 1482 * with such error, it is accurate enough. 1483 */ 1484 1485 /* 1486 * Split x as x = t + y, where t = n/8 is the nearest 1487 * multiple of 1/8 to x. 1488 */ 1489 x8 = normalizeRoundAndPackFloatx80(80, false, xexp + 3, xsig0, 1490 xsig1, &env->fp_status); 1491 n = floatx80_to_int32(x8, &env->fp_status); 1492 if (n == 0) { 1493 ysign = false; 1494 yexp = xexp; 1495 ysig0 = xsig0; 1496 ysig1 = xsig1; 1497 texp = 0; 1498 tsig = 0; 1499 } else { 1500 int shift = clz32(n) + 32; 1501 texp = 0x403b - shift; 1502 tsig = n; 1503 tsig <<= shift; 1504 if (texp == xexp) { 1505 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1506 if ((int64_t) ysig0 >= 0) { 1507 ysign = false; 1508 if (ysig0 == 0) { 1509 if (ysig1 == 0) { 1510 yexp = 0; 1511 } else { 1512 shift = clz64(ysig1) + 64; 1513 yexp = xexp - shift; 1514 shift128Left(ysig0, ysig1, shift, 1515 &ysig0, &ysig1); 1516 } 1517 } else { 1518 shift = clz64(ysig0); 1519 yexp = xexp - shift; 1520 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1521 } 1522 } else { 1523 ysign = true; 1524 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1525 if (ysig0 == 0) { 1526 shift = clz64(ysig1) + 64; 1527 } else { 1528 shift = clz64(ysig0); 1529 } 1530 yexp = xexp - shift; 1531 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1532 } 1533 } else { 1534 /* 1535 * t's exponent must be greater than x's because t 1536 * is positive and the nearest multiple of 1/8 to 1537 * x, and if x has a greater exponent, the power 1538 * of 2 with that exponent is also a multiple of 1539 * 1/8. 1540 */ 1541 uint64_t usig0, usig1; 1542 shift128RightJamming(xsig0, xsig1, texp - xexp, 1543 &usig0, &usig1); 1544 ysign = true; 1545 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1546 if (ysig0 == 0) { 1547 shift = clz64(ysig1) + 64; 1548 } else { 1549 shift = clz64(ysig0); 1550 } 1551 yexp = texp - shift; 1552 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1553 } 1554 } 1555 1556 /* 1557 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1558 * arctan(z). 1559 */ 1560 zsign = ysign; 1561 if (texp == 0 || yexp == 0) { 1562 zexp = yexp; 1563 zsig0 = ysig0; 1564 zsig1 = ysig1; 1565 } else { 1566 /* 1567 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1568 */ 1569 int32_t dexp = texp + xexp - 0x3ffe; 1570 uint64_t dsig0, dsig1, dsig2; 1571 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1572 /* 1573 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1574 * bit). Add 1 to produce the denominator 1+tx. 1575 */ 1576 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1577 &dsig0, &dsig1); 1578 dsig0 |= 0x8000000000000000ULL; 1579 zexp = yexp - 1; 1580 remsig0 = ysig0; 1581 remsig1 = ysig1; 1582 remsig2 = 0; 1583 if (dsig0 <= remsig0) { 1584 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1585 ++zexp; 1586 } 1587 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1588 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1589 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1590 &remsig0, &remsig1, &remsig2); 1591 while ((int64_t) remsig0 < 0) { 1592 --zsig0; 1593 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1594 &remsig0, &remsig1, &remsig2); 1595 } 1596 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1597 /* No need to correct any estimation error in zsig1. */ 1598 } 1599 1600 if (zexp == 0) { 1601 azexp = 0; 1602 azsig0 = 0; 1603 azsig1 = 0; 1604 } else { 1605 floatx80 z2, accum; 1606 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1607 /* Compute z^2. */ 1608 mul128To256(zsig0, zsig1, zsig0, zsig1, 1609 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1610 z2 = normalizeRoundAndPackFloatx80(80, false, 1611 zexp + zexp - 0x3ffe, 1612 z2sig0, z2sig1, 1613 &env->fp_status); 1614 1615 /* Compute the lower parts of the polynomial expansion. */ 1616 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1617 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1618 accum = floatx80_mul(accum, z2, &env->fp_status); 1619 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1620 accum = floatx80_mul(accum, z2, &env->fp_status); 1621 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1622 accum = floatx80_mul(accum, z2, &env->fp_status); 1623 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1624 accum = floatx80_mul(accum, z2, &env->fp_status); 1625 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1626 accum = floatx80_mul(accum, z2, &env->fp_status); 1627 1628 /* 1629 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1630 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1631 */ 1632 aexp = extractFloatx80Exp(fpatan_coeff_0); 1633 shift128RightJamming(extractFloatx80Frac(accum), 0, 1634 aexp - extractFloatx80Exp(accum), 1635 &asig0, &asig1); 1636 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1637 &asig0, &asig1); 1638 /* Multiply by z to compute arctan(z). */ 1639 azexp = aexp + zexp - 0x3ffe; 1640 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1641 &azsig2, &azsig3); 1642 } 1643 1644 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1645 if (texp == 0) { 1646 /* z is positive. */ 1647 axexp = azexp; 1648 axsig0 = azsig0; 1649 axsig1 = azsig1; 1650 } else { 1651 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1652 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1653 uint64_t low_sig0 = 1654 extractFloatx80Frac(fpatan_table[n].atan_low); 1655 uint64_t low_sig1 = 0; 1656 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1657 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1658 axsig1 = 0; 1659 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1660 &low_sig0, &low_sig1); 1661 if (low_sign) { 1662 sub128(axsig0, axsig1, low_sig0, low_sig1, 1663 &axsig0, &axsig1); 1664 } else { 1665 add128(axsig0, axsig1, low_sig0, low_sig1, 1666 &axsig0, &axsig1); 1667 } 1668 if (azexp >= axexp) { 1669 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1670 &axsig0, &axsig1); 1671 axexp = azexp + 1; 1672 shift128RightJamming(azsig0, azsig1, 1, 1673 &azsig0, &azsig1); 1674 } else { 1675 shift128RightJamming(axsig0, axsig1, 1, 1676 &axsig0, &axsig1); 1677 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1678 &azsig0, &azsig1); 1679 ++axexp; 1680 } 1681 if (zsign) { 1682 sub128(axsig0, axsig1, azsig0, azsig1, 1683 &axsig0, &axsig1); 1684 } else { 1685 add128(axsig0, axsig1, azsig0, azsig1, 1686 &axsig0, &axsig1); 1687 } 1688 } 1689 1690 if (adj_exp == 0) { 1691 rexp = axexp; 1692 rsig0 = axsig0; 1693 rsig1 = axsig1; 1694 } else { 1695 /* 1696 * Add or subtract arctan(x) (exponent axexp, 1697 * significand axsig0 and axsig1, positive, not 1698 * necessarily normalized) to the number given by 1699 * adj_exp, adj_sig0 and adj_sig1, according to 1700 * adj_sub. 1701 */ 1702 if (adj_exp >= axexp) { 1703 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1704 &axsig0, &axsig1); 1705 rexp = adj_exp + 1; 1706 shift128RightJamming(adj_sig0, adj_sig1, 1, 1707 &adj_sig0, &adj_sig1); 1708 } else { 1709 shift128RightJamming(axsig0, axsig1, 1, 1710 &axsig0, &axsig1); 1711 shift128RightJamming(adj_sig0, adj_sig1, 1712 axexp - adj_exp + 1, 1713 &adj_sig0, &adj_sig1); 1714 rexp = axexp + 1; 1715 } 1716 if (adj_sub) { 1717 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1718 &rsig0, &rsig1); 1719 } else { 1720 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1721 &rsig0, &rsig1); 1722 } 1723 } 1724 1725 env->fp_status.float_rounding_mode = save_mode; 1726 env->fp_status.floatx80_rounding_precision = save_prec; 1727 } 1728 /* This result is inexact. */ 1729 rsig1 |= 1; 1730 ST1 = normalizeRoundAndPackFloatx80(80, rsign, rexp, 1731 rsig0, rsig1, &env->fp_status); 1732 } 1733 1734 fpop(env); 1735 merge_exception_flags(env, old_flags); 1736 } 1737 1738 void helper_fxtract(CPUX86State *env) 1739 { 1740 uint8_t old_flags = save_exception_flags(env); 1741 CPU_LDoubleU temp; 1742 1743 temp.d = ST0; 1744 1745 if (floatx80_is_zero(ST0)) { 1746 /* Easy way to generate -inf and raising division by 0 exception */ 1747 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1748 &env->fp_status); 1749 fpush(env); 1750 ST0 = temp.d; 1751 } else if (floatx80_invalid_encoding(ST0)) { 1752 float_raise(float_flag_invalid, &env->fp_status); 1753 ST0 = floatx80_default_nan(&env->fp_status); 1754 fpush(env); 1755 ST0 = ST1; 1756 } else if (floatx80_is_any_nan(ST0)) { 1757 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1758 float_raise(float_flag_invalid, &env->fp_status); 1759 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1760 } 1761 fpush(env); 1762 ST0 = ST1; 1763 } else if (floatx80_is_infinity(ST0)) { 1764 fpush(env); 1765 ST0 = ST1; 1766 ST1 = floatx80_infinity; 1767 } else { 1768 int expdif; 1769 1770 if (EXPD(temp) == 0) { 1771 int shift = clz64(temp.l.lower); 1772 temp.l.lower <<= shift; 1773 expdif = 1 - EXPBIAS - shift; 1774 float_raise(float_flag_input_denormal, &env->fp_status); 1775 } else { 1776 expdif = EXPD(temp) - EXPBIAS; 1777 } 1778 /* DP exponent bias */ 1779 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1780 fpush(env); 1781 BIASEXPONENT(temp); 1782 ST0 = temp.d; 1783 } 1784 merge_exception_flags(env, old_flags); 1785 } 1786 1787 static void helper_fprem_common(CPUX86State *env, bool mod) 1788 { 1789 uint8_t old_flags = save_exception_flags(env); 1790 uint64_t quotient; 1791 CPU_LDoubleU temp0, temp1; 1792 int exp0, exp1, expdiff; 1793 1794 temp0.d = ST0; 1795 temp1.d = ST1; 1796 exp0 = EXPD(temp0); 1797 exp1 = EXPD(temp1); 1798 1799 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1800 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1801 exp0 == 0x7fff || exp1 == 0x7fff || 1802 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1803 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1804 } else { 1805 if (exp0 == 0) { 1806 exp0 = 1 - clz64(temp0.l.lower); 1807 } 1808 if (exp1 == 0) { 1809 exp1 = 1 - clz64(temp1.l.lower); 1810 } 1811 expdiff = exp0 - exp1; 1812 if (expdiff < 64) { 1813 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1814 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1815 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1816 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1817 } else { 1818 /* 1819 * Partial remainder. This choice of how many bits to 1820 * process at once is specified in AMD instruction set 1821 * manuals, and empirically is followed by Intel 1822 * processors as well; it ensures that the final remainder 1823 * operation in a loop does produce the correct low three 1824 * bits of the quotient. AMD manuals specify that the 1825 * flags other than C2 are cleared, and empirically Intel 1826 * processors clear them as well. 1827 */ 1828 int n = 32 + (expdiff % 32); 1829 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1830 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1831 env->fpus |= 0x400; /* C2 <-- 1 */ 1832 } 1833 } 1834 merge_exception_flags(env, old_flags); 1835 } 1836 1837 void helper_fprem1(CPUX86State *env) 1838 { 1839 helper_fprem_common(env, false); 1840 } 1841 1842 void helper_fprem(CPUX86State *env) 1843 { 1844 helper_fprem_common(env, true); 1845 } 1846 1847 /* 128-bit significand of log2(e). */ 1848 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1849 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1850 1851 /* 1852 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1853 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1854 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1855 * interval [sqrt(2)/2, sqrt(2)]. 1856 */ 1857 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1858 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1859 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1860 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1861 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1862 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1863 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1864 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1865 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1866 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1867 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1868 1869 /* 1870 * Compute an approximation of log2(1+arg), where 1+arg is in the 1871 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1872 * function is called, rounding precision is set to 80 and the 1873 * round-to-nearest mode is in effect. arg must not be exactly zero, 1874 * and must not be so close to zero that underflow might occur. 1875 */ 1876 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1877 uint64_t *sig0, uint64_t *sig1) 1878 { 1879 uint64_t arg0_sig = extractFloatx80Frac(arg); 1880 int32_t arg0_exp = extractFloatx80Exp(arg); 1881 bool arg0_sign = extractFloatx80Sign(arg); 1882 bool asign; 1883 int32_t dexp, texp, aexp; 1884 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1885 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1886 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1887 floatx80 t2, accum; 1888 1889 /* 1890 * Compute an approximation of arg/(2+arg), with extra precision, 1891 * as the argument to a polynomial approximation. The extra 1892 * precision is only needed for the first term of the 1893 * approximation, with subsequent terms being significantly 1894 * smaller; the approximation only uses odd exponents, and the 1895 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1896 */ 1897 if (arg0_sign) { 1898 dexp = 0x3fff; 1899 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1900 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1901 } else { 1902 dexp = 0x4000; 1903 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1904 dsig0 |= 0x8000000000000000ULL; 1905 } 1906 texp = arg0_exp - dexp + 0x3ffe; 1907 rsig0 = arg0_sig; 1908 rsig1 = 0; 1909 rsig2 = 0; 1910 if (dsig0 <= rsig0) { 1911 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1912 ++texp; 1913 } 1914 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1915 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1916 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1917 &rsig0, &rsig1, &rsig2); 1918 while ((int64_t) rsig0 < 0) { 1919 --tsig0; 1920 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1921 &rsig0, &rsig1, &rsig2); 1922 } 1923 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1924 /* 1925 * No need to correct any estimation error in tsig1; even with 1926 * such error, it is accurate enough. Now compute the square of 1927 * that approximation. 1928 */ 1929 mul128To256(tsig0, tsig1, tsig0, tsig1, 1930 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1931 t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe, 1932 t2sig0, t2sig1, &env->fp_status); 1933 1934 /* Compute the lower parts of the polynomial expansion. */ 1935 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1936 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1937 accum = floatx80_mul(accum, t2, &env->fp_status); 1938 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1939 accum = floatx80_mul(accum, t2, &env->fp_status); 1940 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1941 accum = floatx80_mul(accum, t2, &env->fp_status); 1942 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1943 accum = floatx80_mul(accum, t2, &env->fp_status); 1944 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1945 accum = floatx80_mul(accum, t2, &env->fp_status); 1946 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1947 accum = floatx80_mul(accum, t2, &env->fp_status); 1948 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1949 accum = floatx80_mul(accum, t2, &env->fp_status); 1950 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1951 accum = floatx80_mul(accum, t2, &env->fp_status); 1952 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1953 1954 /* 1955 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1956 * accum has much lower magnitude, and so, in particular, carry 1957 * out of the addition is not possible), multiplied by t. (This 1958 * expansion is only accurate to about 70 bits, not 128 bits.) 1959 */ 1960 aexp = extractFloatx80Exp(fyl2x_coeff_0); 1961 asign = extractFloatx80Sign(fyl2x_coeff_0); 1962 shift128RightJamming(extractFloatx80Frac(accum), 0, 1963 aexp - extractFloatx80Exp(accum), 1964 &asig0, &asig1); 1965 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1966 bsig1 = 0; 1967 if (asign == extractFloatx80Sign(accum)) { 1968 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1969 } else { 1970 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1971 } 1972 /* Multiply by t to compute the required result. */ 1973 mul128To256(asig0, asig1, tsig0, tsig1, 1974 &asig0, &asig1, &asig2, &asig3); 1975 aexp += texp - 0x3ffe; 1976 *exp = aexp; 1977 *sig0 = asig0; 1978 *sig1 = asig1; 1979 } 1980 1981 void helper_fyl2xp1(CPUX86State *env) 1982 { 1983 uint8_t old_flags = save_exception_flags(env); 1984 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1985 int32_t arg0_exp = extractFloatx80Exp(ST0); 1986 bool arg0_sign = extractFloatx80Sign(ST0); 1987 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1988 int32_t arg1_exp = extractFloatx80Exp(ST1); 1989 bool arg1_sign = extractFloatx80Sign(ST1); 1990 1991 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1992 float_raise(float_flag_invalid, &env->fp_status); 1993 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1994 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1995 float_raise(float_flag_invalid, &env->fp_status); 1996 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1997 } else if (floatx80_invalid_encoding(ST0) || 1998 floatx80_invalid_encoding(ST1)) { 1999 float_raise(float_flag_invalid, &env->fp_status); 2000 ST1 = floatx80_default_nan(&env->fp_status); 2001 } else if (floatx80_is_any_nan(ST0)) { 2002 ST1 = ST0; 2003 } else if (floatx80_is_any_nan(ST1)) { 2004 /* Pass this NaN through. */ 2005 } else if (arg0_exp > 0x3ffd || 2006 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2007 0x95f619980c4336f7ULL : 2008 0xd413cccfe7799211ULL))) { 2009 /* 2010 * Out of range for the instruction (ST0 must have absolute 2011 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2012 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2013 * to sqrt(2) - 1, which we allow here), treat as invalid. 2014 */ 2015 float_raise(float_flag_invalid, &env->fp_status); 2016 ST1 = floatx80_default_nan(&env->fp_status); 2017 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2018 arg1_exp == 0x7fff) { 2019 /* 2020 * One argument is zero, or multiplying by infinity; correct 2021 * result is exact and can be obtained by multiplying the 2022 * arguments. 2023 */ 2024 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2025 } else if (arg0_exp < 0x3fb0) { 2026 /* 2027 * Multiplying both arguments and an extra-precision version 2028 * of log2(e) is sufficiently precise. 2029 */ 2030 uint64_t sig0, sig1, sig2; 2031 int32_t exp; 2032 if (arg0_exp == 0) { 2033 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2034 } 2035 if (arg1_exp == 0) { 2036 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2037 } 2038 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2039 &sig0, &sig1, &sig2); 2040 exp = arg0_exp + 1; 2041 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2042 exp += arg1_exp - 0x3ffe; 2043 /* This result is inexact. */ 2044 sig1 |= 1; 2045 ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp, 2046 sig0, sig1, &env->fp_status); 2047 } else { 2048 int32_t aexp; 2049 uint64_t asig0, asig1, asig2; 2050 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2051 signed char save_prec = env->fp_status.floatx80_rounding_precision; 2052 env->fp_status.float_rounding_mode = float_round_nearest_even; 2053 env->fp_status.floatx80_rounding_precision = 80; 2054 2055 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2056 /* 2057 * Multiply by the second argument to compute the required 2058 * result. 2059 */ 2060 if (arg1_exp == 0) { 2061 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2062 } 2063 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2064 aexp += arg1_exp - 0x3ffe; 2065 /* This result is inexact. */ 2066 asig1 |= 1; 2067 env->fp_status.float_rounding_mode = save_mode; 2068 ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp, 2069 asig0, asig1, &env->fp_status); 2070 env->fp_status.floatx80_rounding_precision = save_prec; 2071 } 2072 fpop(env); 2073 merge_exception_flags(env, old_flags); 2074 } 2075 2076 void helper_fyl2x(CPUX86State *env) 2077 { 2078 uint8_t old_flags = save_exception_flags(env); 2079 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2080 int32_t arg0_exp = extractFloatx80Exp(ST0); 2081 bool arg0_sign = extractFloatx80Sign(ST0); 2082 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2083 int32_t arg1_exp = extractFloatx80Exp(ST1); 2084 bool arg1_sign = extractFloatx80Sign(ST1); 2085 2086 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2087 float_raise(float_flag_invalid, &env->fp_status); 2088 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2089 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2090 float_raise(float_flag_invalid, &env->fp_status); 2091 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2092 } else if (floatx80_invalid_encoding(ST0) || 2093 floatx80_invalid_encoding(ST1)) { 2094 float_raise(float_flag_invalid, &env->fp_status); 2095 ST1 = floatx80_default_nan(&env->fp_status); 2096 } else if (floatx80_is_any_nan(ST0)) { 2097 ST1 = ST0; 2098 } else if (floatx80_is_any_nan(ST1)) { 2099 /* Pass this NaN through. */ 2100 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2101 float_raise(float_flag_invalid, &env->fp_status); 2102 ST1 = floatx80_default_nan(&env->fp_status); 2103 } else if (floatx80_is_infinity(ST1)) { 2104 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2105 &env->fp_status); 2106 switch (cmp) { 2107 case float_relation_less: 2108 ST1 = floatx80_chs(ST1); 2109 break; 2110 case float_relation_greater: 2111 /* Result is infinity of the same sign as ST1. */ 2112 break; 2113 default: 2114 float_raise(float_flag_invalid, &env->fp_status); 2115 ST1 = floatx80_default_nan(&env->fp_status); 2116 break; 2117 } 2118 } else if (floatx80_is_infinity(ST0)) { 2119 if (floatx80_is_zero(ST1)) { 2120 float_raise(float_flag_invalid, &env->fp_status); 2121 ST1 = floatx80_default_nan(&env->fp_status); 2122 } else if (arg1_sign) { 2123 ST1 = floatx80_chs(ST0); 2124 } else { 2125 ST1 = ST0; 2126 } 2127 } else if (floatx80_is_zero(ST0)) { 2128 if (floatx80_is_zero(ST1)) { 2129 float_raise(float_flag_invalid, &env->fp_status); 2130 ST1 = floatx80_default_nan(&env->fp_status); 2131 } else { 2132 /* Result is infinity with opposite sign to ST1. */ 2133 float_raise(float_flag_divbyzero, &env->fp_status); 2134 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2135 0x8000000000000000ULL); 2136 } 2137 } else if (floatx80_is_zero(ST1)) { 2138 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2139 ST1 = floatx80_chs(ST1); 2140 } 2141 /* Otherwise, ST1 is already the correct result. */ 2142 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2143 if (arg1_sign) { 2144 ST1 = floatx80_chs(floatx80_zero); 2145 } else { 2146 ST1 = floatx80_zero; 2147 } 2148 } else { 2149 int32_t int_exp; 2150 floatx80 arg0_m1; 2151 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2152 signed char save_prec = env->fp_status.floatx80_rounding_precision; 2153 env->fp_status.float_rounding_mode = float_round_nearest_even; 2154 env->fp_status.floatx80_rounding_precision = 80; 2155 2156 if (arg0_exp == 0) { 2157 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2158 } 2159 if (arg1_exp == 0) { 2160 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2161 } 2162 int_exp = arg0_exp - 0x3fff; 2163 if (arg0_sig > 0xb504f333f9de6484ULL) { 2164 ++int_exp; 2165 } 2166 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2167 &env->fp_status), 2168 floatx80_one, &env->fp_status); 2169 if (floatx80_is_zero(arg0_m1)) { 2170 /* Exact power of 2; multiply by ST1. */ 2171 env->fp_status.float_rounding_mode = save_mode; 2172 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2173 ST1, &env->fp_status); 2174 } else { 2175 bool asign = extractFloatx80Sign(arg0_m1); 2176 int32_t aexp; 2177 uint64_t asig0, asig1, asig2; 2178 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2179 if (int_exp != 0) { 2180 bool isign = (int_exp < 0); 2181 int32_t iexp; 2182 uint64_t isig; 2183 int shift; 2184 int_exp = isign ? -int_exp : int_exp; 2185 shift = clz32(int_exp) + 32; 2186 isig = int_exp; 2187 isig <<= shift; 2188 iexp = 0x403e - shift; 2189 shift128RightJamming(asig0, asig1, iexp - aexp, 2190 &asig0, &asig1); 2191 if (asign == isign) { 2192 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2193 } else { 2194 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2195 } 2196 aexp = iexp; 2197 asign = isign; 2198 } 2199 /* 2200 * Multiply by the second argument to compute the required 2201 * result. 2202 */ 2203 if (arg1_exp == 0) { 2204 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2205 } 2206 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2207 aexp += arg1_exp - 0x3ffe; 2208 /* This result is inexact. */ 2209 asig1 |= 1; 2210 env->fp_status.float_rounding_mode = save_mode; 2211 ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp, 2212 asig0, asig1, &env->fp_status); 2213 } 2214 2215 env->fp_status.floatx80_rounding_precision = save_prec; 2216 } 2217 fpop(env); 2218 merge_exception_flags(env, old_flags); 2219 } 2220 2221 void helper_fsqrt(CPUX86State *env) 2222 { 2223 uint8_t old_flags = save_exception_flags(env); 2224 if (floatx80_is_neg(ST0)) { 2225 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2226 env->fpus |= 0x400; 2227 } 2228 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2229 merge_exception_flags(env, old_flags); 2230 } 2231 2232 void helper_fsincos(CPUX86State *env) 2233 { 2234 double fptemp = floatx80_to_double(env, ST0); 2235 2236 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2237 env->fpus |= 0x400; 2238 } else { 2239 ST0 = double_to_floatx80(env, sin(fptemp)); 2240 fpush(env); 2241 ST0 = double_to_floatx80(env, cos(fptemp)); 2242 env->fpus &= ~0x400; /* C2 <-- 0 */ 2243 /* the above code is for |arg| < 2**63 only */ 2244 } 2245 } 2246 2247 void helper_frndint(CPUX86State *env) 2248 { 2249 uint8_t old_flags = save_exception_flags(env); 2250 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2251 merge_exception_flags(env, old_flags); 2252 } 2253 2254 void helper_fscale(CPUX86State *env) 2255 { 2256 uint8_t old_flags = save_exception_flags(env); 2257 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2258 float_raise(float_flag_invalid, &env->fp_status); 2259 ST0 = floatx80_default_nan(&env->fp_status); 2260 } else if (floatx80_is_any_nan(ST1)) { 2261 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2262 float_raise(float_flag_invalid, &env->fp_status); 2263 } 2264 ST0 = ST1; 2265 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2266 float_raise(float_flag_invalid, &env->fp_status); 2267 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2268 } 2269 } else if (floatx80_is_infinity(ST1) && 2270 !floatx80_invalid_encoding(ST0) && 2271 !floatx80_is_any_nan(ST0)) { 2272 if (floatx80_is_neg(ST1)) { 2273 if (floatx80_is_infinity(ST0)) { 2274 float_raise(float_flag_invalid, &env->fp_status); 2275 ST0 = floatx80_default_nan(&env->fp_status); 2276 } else { 2277 ST0 = (floatx80_is_neg(ST0) ? 2278 floatx80_chs(floatx80_zero) : 2279 floatx80_zero); 2280 } 2281 } else { 2282 if (floatx80_is_zero(ST0)) { 2283 float_raise(float_flag_invalid, &env->fp_status); 2284 ST0 = floatx80_default_nan(&env->fp_status); 2285 } else { 2286 ST0 = (floatx80_is_neg(ST0) ? 2287 floatx80_chs(floatx80_infinity) : 2288 floatx80_infinity); 2289 } 2290 } 2291 } else { 2292 int n; 2293 signed char save = env->fp_status.floatx80_rounding_precision; 2294 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2295 set_float_exception_flags(0, &env->fp_status); 2296 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2297 set_float_exception_flags(save_flags, &env->fp_status); 2298 env->fp_status.floatx80_rounding_precision = 80; 2299 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2300 env->fp_status.floatx80_rounding_precision = save; 2301 } 2302 merge_exception_flags(env, old_flags); 2303 } 2304 2305 void helper_fsin(CPUX86State *env) 2306 { 2307 double fptemp = floatx80_to_double(env, ST0); 2308 2309 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2310 env->fpus |= 0x400; 2311 } else { 2312 ST0 = double_to_floatx80(env, sin(fptemp)); 2313 env->fpus &= ~0x400; /* C2 <-- 0 */ 2314 /* the above code is for |arg| < 2**53 only */ 2315 } 2316 } 2317 2318 void helper_fcos(CPUX86State *env) 2319 { 2320 double fptemp = floatx80_to_double(env, ST0); 2321 2322 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2323 env->fpus |= 0x400; 2324 } else { 2325 ST0 = double_to_floatx80(env, cos(fptemp)); 2326 env->fpus &= ~0x400; /* C2 <-- 0 */ 2327 /* the above code is for |arg| < 2**63 only */ 2328 } 2329 } 2330 2331 void helper_fxam_ST0(CPUX86State *env) 2332 { 2333 CPU_LDoubleU temp; 2334 int expdif; 2335 2336 temp.d = ST0; 2337 2338 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2339 if (SIGND(temp)) { 2340 env->fpus |= 0x200; /* C1 <-- 1 */ 2341 } 2342 2343 if (env->fptags[env->fpstt]) { 2344 env->fpus |= 0x4100; /* Empty */ 2345 return; 2346 } 2347 2348 expdif = EXPD(temp); 2349 if (expdif == MAXEXPD) { 2350 if (MANTD(temp) == 0x8000000000000000ULL) { 2351 env->fpus |= 0x500; /* Infinity */ 2352 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2353 env->fpus |= 0x100; /* NaN */ 2354 } 2355 } else if (expdif == 0) { 2356 if (MANTD(temp) == 0) { 2357 env->fpus |= 0x4000; /* Zero */ 2358 } else { 2359 env->fpus |= 0x4400; /* Denormal */ 2360 } 2361 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2362 env->fpus |= 0x400; 2363 } 2364 } 2365 2366 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 2367 uintptr_t retaddr) 2368 { 2369 int fpus, fptag, exp, i; 2370 uint64_t mant; 2371 CPU_LDoubleU tmp; 2372 2373 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2374 fptag = 0; 2375 for (i = 7; i >= 0; i--) { 2376 fptag <<= 2; 2377 if (env->fptags[i]) { 2378 fptag |= 3; 2379 } else { 2380 tmp.d = env->fpregs[i].d; 2381 exp = EXPD(tmp); 2382 mant = MANTD(tmp); 2383 if (exp == 0 && mant == 0) { 2384 /* zero */ 2385 fptag |= 1; 2386 } else if (exp == 0 || exp == MAXEXPD 2387 || (mant & (1LL << 63)) == 0) { 2388 /* NaNs, infinity, denormal */ 2389 fptag |= 2; 2390 } 2391 } 2392 } 2393 if (data32) { 2394 /* 32 bit */ 2395 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 2396 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 2397 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 2398 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */ 2399 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */ 2400 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */ 2401 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */ 2402 } else { 2403 /* 16 bit */ 2404 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 2405 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 2406 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 2407 cpu_stw_data_ra(env, ptr + 6, 0, retaddr); 2408 cpu_stw_data_ra(env, ptr + 8, 0, retaddr); 2409 cpu_stw_data_ra(env, ptr + 10, 0, retaddr); 2410 cpu_stw_data_ra(env, ptr + 12, 0, retaddr); 2411 } 2412 } 2413 2414 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2415 { 2416 do_fstenv(env, ptr, data32, GETPC()); 2417 } 2418 2419 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2420 { 2421 env->fpstt = (fpus >> 11) & 7; 2422 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2423 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2424 #if !defined(CONFIG_USER_ONLY) 2425 if (!(env->fpus & FPUS_SE)) { 2426 /* 2427 * Here the processor deasserts FERR#; in response, the chipset deasserts 2428 * IGNNE#. 2429 */ 2430 cpu_clear_ignne(); 2431 } 2432 #endif 2433 } 2434 2435 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 2436 uintptr_t retaddr) 2437 { 2438 int i, fpus, fptag; 2439 2440 if (data32) { 2441 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2442 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2443 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 2444 } else { 2445 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2446 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 2447 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2448 } 2449 cpu_set_fpus(env, fpus); 2450 for (i = 0; i < 8; i++) { 2451 env->fptags[i] = ((fptag & 3) == 3); 2452 fptag >>= 2; 2453 } 2454 } 2455 2456 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2457 { 2458 do_fldenv(env, ptr, data32, GETPC()); 2459 } 2460 2461 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2462 { 2463 floatx80 tmp; 2464 int i; 2465 2466 do_fstenv(env, ptr, data32, GETPC()); 2467 2468 ptr += (14 << data32); 2469 for (i = 0; i < 8; i++) { 2470 tmp = ST(i); 2471 helper_fstt(env, tmp, ptr, GETPC()); 2472 ptr += 10; 2473 } 2474 2475 /* fninit */ 2476 env->fpus = 0; 2477 env->fpstt = 0; 2478 cpu_set_fpuc(env, 0x37f); 2479 env->fptags[0] = 1; 2480 env->fptags[1] = 1; 2481 env->fptags[2] = 1; 2482 env->fptags[3] = 1; 2483 env->fptags[4] = 1; 2484 env->fptags[5] = 1; 2485 env->fptags[6] = 1; 2486 env->fptags[7] = 1; 2487 } 2488 2489 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2490 { 2491 floatx80 tmp; 2492 int i; 2493 2494 do_fldenv(env, ptr, data32, GETPC()); 2495 ptr += (14 << data32); 2496 2497 for (i = 0; i < 8; i++) { 2498 tmp = helper_fldt(env, ptr, GETPC()); 2499 ST(i) = tmp; 2500 ptr += 10; 2501 } 2502 } 2503 2504 #if defined(CONFIG_USER_ONLY) 2505 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 2506 { 2507 helper_fsave(env, ptr, data32); 2508 } 2509 2510 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 2511 { 2512 helper_frstor(env, ptr, data32); 2513 } 2514 #endif 2515 2516 #define XO(X) offsetof(X86XSaveArea, X) 2517 2518 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2519 { 2520 int fpus, fptag, i; 2521 target_ulong addr; 2522 2523 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2524 fptag = 0; 2525 for (i = 0; i < 8; i++) { 2526 fptag |= (env->fptags[i] << i); 2527 } 2528 2529 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 2530 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 2531 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 2532 2533 /* In 32-bit mode this is eip, sel, dp, sel. 2534 In 64-bit mode this is rip, rdp. 2535 But in either case we don't write actual data, just zeros. */ 2536 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 2537 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 2538 2539 addr = ptr + XO(legacy.fpregs); 2540 for (i = 0; i < 8; i++) { 2541 floatx80 tmp = ST(i); 2542 helper_fstt(env, tmp, addr, ra); 2543 addr += 16; 2544 } 2545 } 2546 2547 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2548 { 2549 update_mxcsr_from_sse_status(env); 2550 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 2551 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 2552 } 2553 2554 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2555 { 2556 int i, nb_xmm_regs; 2557 target_ulong addr; 2558 2559 if (env->hflags & HF_CS64_MASK) { 2560 nb_xmm_regs = 16; 2561 } else { 2562 nb_xmm_regs = 8; 2563 } 2564 2565 addr = ptr + XO(legacy.xmm_regs); 2566 for (i = 0; i < nb_xmm_regs; i++) { 2567 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 2568 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 2569 addr += 16; 2570 } 2571 } 2572 2573 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2574 { 2575 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2576 int i; 2577 2578 for (i = 0; i < 4; i++, addr += 16) { 2579 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 2580 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 2581 } 2582 } 2583 2584 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2585 { 2586 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2587 env->bndcs_regs.cfgu, ra); 2588 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2589 env->bndcs_regs.sts, ra); 2590 } 2591 2592 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2593 { 2594 cpu_stq_data_ra(env, ptr, env->pkru, ra); 2595 } 2596 2597 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2598 { 2599 uintptr_t ra = GETPC(); 2600 2601 /* The operand must be 16 byte aligned */ 2602 if (ptr & 0xf) { 2603 raise_exception_ra(env, EXCP0D_GPF, ra); 2604 } 2605 2606 do_xsave_fpu(env, ptr, ra); 2607 2608 if (env->cr[4] & CR4_OSFXSR_MASK) { 2609 do_xsave_mxcsr(env, ptr, ra); 2610 /* Fast FXSAVE leaves out the XMM registers */ 2611 if (!(env->efer & MSR_EFER_FFXSR) 2612 || (env->hflags & HF_CPL_MASK) 2613 || !(env->hflags & HF_LMA_MASK)) { 2614 do_xsave_sse(env, ptr, ra); 2615 } 2616 } 2617 } 2618 2619 static uint64_t get_xinuse(CPUX86State *env) 2620 { 2621 uint64_t inuse = -1; 2622 2623 /* For the most part, we don't track XINUSE. We could calculate it 2624 here for all components, but it's probably less work to simply 2625 indicate in use. That said, the state of BNDREGS is important 2626 enough to track in HFLAGS, so we might as well use that here. */ 2627 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2628 inuse &= ~XSTATE_BNDREGS_MASK; 2629 } 2630 return inuse; 2631 } 2632 2633 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2634 uint64_t inuse, uint64_t opt, uintptr_t ra) 2635 { 2636 uint64_t old_bv, new_bv; 2637 2638 /* The OS must have enabled XSAVE. */ 2639 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2640 raise_exception_ra(env, EXCP06_ILLOP, ra); 2641 } 2642 2643 /* The operand must be 64 byte aligned. */ 2644 if (ptr & 63) { 2645 raise_exception_ra(env, EXCP0D_GPF, ra); 2646 } 2647 2648 /* Never save anything not enabled by XCR0. */ 2649 rfbm &= env->xcr0; 2650 opt &= rfbm; 2651 2652 if (opt & XSTATE_FP_MASK) { 2653 do_xsave_fpu(env, ptr, ra); 2654 } 2655 if (rfbm & XSTATE_SSE_MASK) { 2656 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2657 do_xsave_mxcsr(env, ptr, ra); 2658 } 2659 if (opt & XSTATE_SSE_MASK) { 2660 do_xsave_sse(env, ptr, ra); 2661 } 2662 if (opt & XSTATE_BNDREGS_MASK) { 2663 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 2664 } 2665 if (opt & XSTATE_BNDCSR_MASK) { 2666 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 2667 } 2668 if (opt & XSTATE_PKRU_MASK) { 2669 do_xsave_pkru(env, ptr + XO(pkru_state), ra); 2670 } 2671 2672 /* Update the XSTATE_BV field. */ 2673 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2674 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2675 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 2676 } 2677 2678 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2679 { 2680 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 2681 } 2682 2683 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2684 { 2685 uint64_t inuse = get_xinuse(env); 2686 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2687 } 2688 2689 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2690 { 2691 int i, fpuc, fpus, fptag; 2692 target_ulong addr; 2693 2694 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 2695 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 2696 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 2697 cpu_set_fpuc(env, fpuc); 2698 cpu_set_fpus(env, fpus); 2699 fptag ^= 0xff; 2700 for (i = 0; i < 8; i++) { 2701 env->fptags[i] = ((fptag >> i) & 1); 2702 } 2703 2704 addr = ptr + XO(legacy.fpregs); 2705 for (i = 0; i < 8; i++) { 2706 floatx80 tmp = helper_fldt(env, addr, ra); 2707 ST(i) = tmp; 2708 addr += 16; 2709 } 2710 } 2711 2712 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2713 { 2714 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 2715 } 2716 2717 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2718 { 2719 int i, nb_xmm_regs; 2720 target_ulong addr; 2721 2722 if (env->hflags & HF_CS64_MASK) { 2723 nb_xmm_regs = 16; 2724 } else { 2725 nb_xmm_regs = 8; 2726 } 2727 2728 addr = ptr + XO(legacy.xmm_regs); 2729 for (i = 0; i < nb_xmm_regs; i++) { 2730 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 2731 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 2732 addr += 16; 2733 } 2734 } 2735 2736 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2737 { 2738 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2739 int i; 2740 2741 for (i = 0; i < 4; i++, addr += 16) { 2742 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 2743 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 2744 } 2745 } 2746 2747 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2748 { 2749 /* FIXME: Extend highest implemented bit of linear address. */ 2750 env->bndcs_regs.cfgu 2751 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 2752 env->bndcs_regs.sts 2753 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 2754 } 2755 2756 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2757 { 2758 env->pkru = cpu_ldq_data_ra(env, ptr, ra); 2759 } 2760 2761 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2762 { 2763 uintptr_t ra = GETPC(); 2764 2765 /* The operand must be 16 byte aligned */ 2766 if (ptr & 0xf) { 2767 raise_exception_ra(env, EXCP0D_GPF, ra); 2768 } 2769 2770 do_xrstor_fpu(env, ptr, ra); 2771 2772 if (env->cr[4] & CR4_OSFXSR_MASK) { 2773 do_xrstor_mxcsr(env, ptr, ra); 2774 /* Fast FXRSTOR leaves out the XMM registers */ 2775 if (!(env->efer & MSR_EFER_FFXSR) 2776 || (env->hflags & HF_CPL_MASK) 2777 || !(env->hflags & HF_LMA_MASK)) { 2778 do_xrstor_sse(env, ptr, ra); 2779 } 2780 } 2781 } 2782 2783 #if defined(CONFIG_USER_ONLY) 2784 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 2785 { 2786 helper_fxsave(env, ptr); 2787 } 2788 2789 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 2790 { 2791 helper_fxrstor(env, ptr); 2792 } 2793 #endif 2794 2795 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2796 { 2797 uintptr_t ra = GETPC(); 2798 uint64_t xstate_bv, xcomp_bv, reserve0; 2799 2800 rfbm &= env->xcr0; 2801 2802 /* The OS must have enabled XSAVE. */ 2803 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2804 raise_exception_ra(env, EXCP06_ILLOP, ra); 2805 } 2806 2807 /* The operand must be 64 byte aligned. */ 2808 if (ptr & 63) { 2809 raise_exception_ra(env, EXCP0D_GPF, ra); 2810 } 2811 2812 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2813 2814 if ((int64_t)xstate_bv < 0) { 2815 /* FIXME: Compact form. */ 2816 raise_exception_ra(env, EXCP0D_GPF, ra); 2817 } 2818 2819 /* Standard form. */ 2820 2821 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2822 if (xstate_bv & ~env->xcr0) { 2823 raise_exception_ra(env, EXCP0D_GPF, ra); 2824 } 2825 2826 /* The XCOMP_BV field must be zero. Note that, as of the April 2016 2827 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 2828 describes only XCOMP_BV, but the description of the standard form 2829 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 2830 includes the next 64-bit field. */ 2831 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 2832 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 2833 if (xcomp_bv || reserve0) { 2834 raise_exception_ra(env, EXCP0D_GPF, ra); 2835 } 2836 2837 if (rfbm & XSTATE_FP_MASK) { 2838 if (xstate_bv & XSTATE_FP_MASK) { 2839 do_xrstor_fpu(env, ptr, ra); 2840 } else { 2841 helper_fninit(env); 2842 memset(env->fpregs, 0, sizeof(env->fpregs)); 2843 } 2844 } 2845 if (rfbm & XSTATE_SSE_MASK) { 2846 /* Note that the standard form of XRSTOR loads MXCSR from memory 2847 whether or not the XSTATE_BV bit is set. */ 2848 do_xrstor_mxcsr(env, ptr, ra); 2849 if (xstate_bv & XSTATE_SSE_MASK) { 2850 do_xrstor_sse(env, ptr, ra); 2851 } else { 2852 /* ??? When AVX is implemented, we may have to be more 2853 selective in the clearing. */ 2854 memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); 2855 } 2856 } 2857 if (rfbm & XSTATE_BNDREGS_MASK) { 2858 if (xstate_bv & XSTATE_BNDREGS_MASK) { 2859 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 2860 env->hflags |= HF_MPX_IU_MASK; 2861 } else { 2862 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2863 env->hflags &= ~HF_MPX_IU_MASK; 2864 } 2865 } 2866 if (rfbm & XSTATE_BNDCSR_MASK) { 2867 if (xstate_bv & XSTATE_BNDCSR_MASK) { 2868 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 2869 } else { 2870 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2871 } 2872 cpu_sync_bndcs_hflags(env); 2873 } 2874 if (rfbm & XSTATE_PKRU_MASK) { 2875 uint64_t old_pkru = env->pkru; 2876 if (xstate_bv & XSTATE_PKRU_MASK) { 2877 do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 2878 } else { 2879 env->pkru = 0; 2880 } 2881 if (env->pkru != old_pkru) { 2882 CPUState *cs = env_cpu(env); 2883 tlb_flush(cs); 2884 } 2885 } 2886 } 2887 2888 #undef XO 2889 2890 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 2891 { 2892 /* The OS must have enabled XSAVE. */ 2893 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2894 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2895 } 2896 2897 switch (ecx) { 2898 case 0: 2899 return env->xcr0; 2900 case 1: 2901 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 2902 return env->xcr0 & get_xinuse(env); 2903 } 2904 break; 2905 } 2906 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2907 } 2908 2909 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 2910 { 2911 uint32_t dummy, ena_lo, ena_hi; 2912 uint64_t ena; 2913 2914 /* The OS must have enabled XSAVE. */ 2915 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2916 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2917 } 2918 2919 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 2920 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 2921 goto do_gpf; 2922 } 2923 2924 /* Disallow enabling unimplemented features. */ 2925 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 2926 ena = ((uint64_t)ena_hi << 32) | ena_lo; 2927 if (mask & ~ena) { 2928 goto do_gpf; 2929 } 2930 2931 /* Disallow enabling only half of MPX. */ 2932 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 2933 & XSTATE_BNDCSR_MASK) { 2934 goto do_gpf; 2935 } 2936 2937 env->xcr0 = mask; 2938 cpu_sync_bndcs_hflags(env); 2939 return; 2940 2941 do_gpf: 2942 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2943 } 2944 2945 /* MMX/SSE */ 2946 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 2947 2948 #define SSE_DAZ 0x0040 2949 #define SSE_RC_MASK 0x6000 2950 #define SSE_RC_NEAR 0x0000 2951 #define SSE_RC_DOWN 0x2000 2952 #define SSE_RC_UP 0x4000 2953 #define SSE_RC_CHOP 0x6000 2954 #define SSE_FZ 0x8000 2955 2956 void update_mxcsr_status(CPUX86State *env) 2957 { 2958 uint32_t mxcsr = env->mxcsr; 2959 int rnd_type; 2960 2961 /* set rounding mode */ 2962 switch (mxcsr & SSE_RC_MASK) { 2963 default: 2964 case SSE_RC_NEAR: 2965 rnd_type = float_round_nearest_even; 2966 break; 2967 case SSE_RC_DOWN: 2968 rnd_type = float_round_down; 2969 break; 2970 case SSE_RC_UP: 2971 rnd_type = float_round_up; 2972 break; 2973 case SSE_RC_CHOP: 2974 rnd_type = float_round_to_zero; 2975 break; 2976 } 2977 set_float_rounding_mode(rnd_type, &env->sse_status); 2978 2979 /* Set exception flags. */ 2980 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 2981 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 2982 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 2983 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 2984 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 2985 &env->sse_status); 2986 2987 /* set denormals are zero */ 2988 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 2989 2990 /* set flush to zero */ 2991 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 2992 } 2993 2994 void update_mxcsr_from_sse_status(CPUX86State *env) 2995 { 2996 uint8_t flags = get_float_exception_flags(&env->sse_status); 2997 /* 2998 * The MXCSR denormal flag has opposite semantics to 2999 * float_flag_input_denormal (the softfloat code sets that flag 3000 * only when flushing input denormals to zero, but SSE sets it 3001 * only when not flushing them to zero), so is not converted 3002 * here. 3003 */ 3004 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3005 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3006 (flags & float_flag_overflow ? FPUS_OE : 0) | 3007 (flags & float_flag_underflow ? FPUS_UE : 0) | 3008 (flags & float_flag_inexact ? FPUS_PE : 0) | 3009 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3010 0)); 3011 } 3012 3013 void helper_update_mxcsr(CPUX86State *env) 3014 { 3015 update_mxcsr_from_sse_status(env); 3016 } 3017 3018 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3019 { 3020 cpu_set_mxcsr(env, val); 3021 } 3022 3023 void helper_enter_mmx(CPUX86State *env) 3024 { 3025 env->fpstt = 0; 3026 *(uint32_t *)(env->fptags) = 0; 3027 *(uint32_t *)(env->fptags + 4) = 0; 3028 } 3029 3030 void helper_emms(CPUX86State *env) 3031 { 3032 /* set to empty state */ 3033 *(uint32_t *)(env->fptags) = 0x01010101; 3034 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3035 } 3036 3037 /* XXX: suppress */ 3038 void helper_movq(CPUX86State *env, void *d, void *s) 3039 { 3040 *(uint64_t *)d = *(uint64_t *)s; 3041 } 3042 3043 #define SHIFT 0 3044 #include "ops_sse.h" 3045 3046 #define SHIFT 1 3047 #include "ops_sse.h" 3048