1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/helper-proto.h" 25 #include "fpu/softfloat.h" 26 #include "fpu/softfloat-macros.h" 27 #include "helper-tcg.h" 28 29 /* float macros */ 30 #define FT0 (env->ft0) 31 #define ST0 (env->fpregs[env->fpstt].d) 32 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 33 #define ST1 ST(1) 34 35 #define FPU_RC_MASK 0xc00 36 #define FPU_RC_NEAR 0x000 37 #define FPU_RC_DOWN 0x400 38 #define FPU_RC_UP 0x800 39 #define FPU_RC_CHOP 0xc00 40 41 #define MAXTAN 9223372036854775808.0 42 43 /* the following deal with x86 long double-precision numbers */ 44 #define MAXEXPD 0x7fff 45 #define EXPBIAS 16383 46 #define EXPD(fp) (fp.l.upper & 0x7fff) 47 #define SIGND(fp) ((fp.l.upper) & 0x8000) 48 #define MANTD(fp) (fp.l.lower) 49 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 50 51 #define FPUS_IE (1 << 0) 52 #define FPUS_DE (1 << 1) 53 #define FPUS_ZE (1 << 2) 54 #define FPUS_OE (1 << 3) 55 #define FPUS_UE (1 << 4) 56 #define FPUS_PE (1 << 5) 57 #define FPUS_SF (1 << 6) 58 #define FPUS_SE (1 << 7) 59 #define FPUS_B (1 << 15) 60 61 #define FPUC_EM 0x3f 62 63 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 64 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 65 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 66 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 67 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 68 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 69 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 70 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 71 72 static inline void fpush(CPUX86State *env) 73 { 74 env->fpstt = (env->fpstt - 1) & 7; 75 env->fptags[env->fpstt] = 0; /* validate stack entry */ 76 } 77 78 static inline void fpop(CPUX86State *env) 79 { 80 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 81 env->fpstt = (env->fpstt + 1) & 7; 82 } 83 84 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) 85 { 86 CPU_LDoubleU temp; 87 88 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 89 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 90 return temp.d; 91 } 92 93 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 94 uintptr_t retaddr) 95 { 96 CPU_LDoubleU temp; 97 98 temp.d = f; 99 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 100 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 101 } 102 103 /* x87 FPU helpers */ 104 105 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 106 { 107 union { 108 float64 f64; 109 double d; 110 } u; 111 112 u.f64 = floatx80_to_float64(a, &env->fp_status); 113 return u.d; 114 } 115 116 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 117 { 118 union { 119 float64 f64; 120 double d; 121 } u; 122 123 u.d = a; 124 return float64_to_floatx80(u.f64, &env->fp_status); 125 } 126 127 static void fpu_set_exception(CPUX86State *env, int mask) 128 { 129 env->fpus |= mask; 130 if (env->fpus & (~env->fpuc & FPUC_EM)) { 131 env->fpus |= FPUS_SE | FPUS_B; 132 } 133 } 134 135 static inline uint8_t save_exception_flags(CPUX86State *env) 136 { 137 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 138 set_float_exception_flags(0, &env->fp_status); 139 return old_flags; 140 } 141 142 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 143 { 144 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 145 float_raise(old_flags, &env->fp_status); 146 fpu_set_exception(env, 147 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 148 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 149 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 150 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 151 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 152 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 153 } 154 155 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 156 { 157 uint8_t old_flags = save_exception_flags(env); 158 floatx80 ret = floatx80_div(a, b, &env->fp_status); 159 merge_exception_flags(env, old_flags); 160 return ret; 161 } 162 163 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 164 { 165 if (env->cr[0] & CR0_NE_MASK) { 166 raise_exception_ra(env, EXCP10_COPR, retaddr); 167 } 168 #if !defined(CONFIG_USER_ONLY) 169 else { 170 fpu_check_raise_ferr_irq(env); 171 } 172 #endif 173 } 174 175 void helper_flds_FT0(CPUX86State *env, uint32_t val) 176 { 177 uint8_t old_flags = save_exception_flags(env); 178 union { 179 float32 f; 180 uint32_t i; 181 } u; 182 183 u.i = val; 184 FT0 = float32_to_floatx80(u.f, &env->fp_status); 185 merge_exception_flags(env, old_flags); 186 } 187 188 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 189 { 190 uint8_t old_flags = save_exception_flags(env); 191 union { 192 float64 f; 193 uint64_t i; 194 } u; 195 196 u.i = val; 197 FT0 = float64_to_floatx80(u.f, &env->fp_status); 198 merge_exception_flags(env, old_flags); 199 } 200 201 void helper_fildl_FT0(CPUX86State *env, int32_t val) 202 { 203 FT0 = int32_to_floatx80(val, &env->fp_status); 204 } 205 206 void helper_flds_ST0(CPUX86State *env, uint32_t val) 207 { 208 uint8_t old_flags = save_exception_flags(env); 209 int new_fpstt; 210 union { 211 float32 f; 212 uint32_t i; 213 } u; 214 215 new_fpstt = (env->fpstt - 1) & 7; 216 u.i = val; 217 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 218 env->fpstt = new_fpstt; 219 env->fptags[new_fpstt] = 0; /* validate stack entry */ 220 merge_exception_flags(env, old_flags); 221 } 222 223 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 224 { 225 uint8_t old_flags = save_exception_flags(env); 226 int new_fpstt; 227 union { 228 float64 f; 229 uint64_t i; 230 } u; 231 232 new_fpstt = (env->fpstt - 1) & 7; 233 u.i = val; 234 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 235 env->fpstt = new_fpstt; 236 env->fptags[new_fpstt] = 0; /* validate stack entry */ 237 merge_exception_flags(env, old_flags); 238 } 239 240 void helper_fildl_ST0(CPUX86State *env, int32_t val) 241 { 242 int new_fpstt; 243 244 new_fpstt = (env->fpstt - 1) & 7; 245 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 246 env->fpstt = new_fpstt; 247 env->fptags[new_fpstt] = 0; /* validate stack entry */ 248 } 249 250 void helper_fildll_ST0(CPUX86State *env, int64_t val) 251 { 252 int new_fpstt; 253 254 new_fpstt = (env->fpstt - 1) & 7; 255 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 256 env->fpstt = new_fpstt; 257 env->fptags[new_fpstt] = 0; /* validate stack entry */ 258 } 259 260 uint32_t helper_fsts_ST0(CPUX86State *env) 261 { 262 uint8_t old_flags = save_exception_flags(env); 263 union { 264 float32 f; 265 uint32_t i; 266 } u; 267 268 u.f = floatx80_to_float32(ST0, &env->fp_status); 269 merge_exception_flags(env, old_flags); 270 return u.i; 271 } 272 273 uint64_t helper_fstl_ST0(CPUX86State *env) 274 { 275 uint8_t old_flags = save_exception_flags(env); 276 union { 277 float64 f; 278 uint64_t i; 279 } u; 280 281 u.f = floatx80_to_float64(ST0, &env->fp_status); 282 merge_exception_flags(env, old_flags); 283 return u.i; 284 } 285 286 int32_t helper_fist_ST0(CPUX86State *env) 287 { 288 uint8_t old_flags = save_exception_flags(env); 289 int32_t val; 290 291 val = floatx80_to_int32(ST0, &env->fp_status); 292 if (val != (int16_t)val) { 293 set_float_exception_flags(float_flag_invalid, &env->fp_status); 294 val = -32768; 295 } 296 merge_exception_flags(env, old_flags); 297 return val; 298 } 299 300 int32_t helper_fistl_ST0(CPUX86State *env) 301 { 302 uint8_t old_flags = save_exception_flags(env); 303 int32_t val; 304 305 val = floatx80_to_int32(ST0, &env->fp_status); 306 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 307 val = 0x80000000; 308 } 309 merge_exception_flags(env, old_flags); 310 return val; 311 } 312 313 int64_t helper_fistll_ST0(CPUX86State *env) 314 { 315 uint8_t old_flags = save_exception_flags(env); 316 int64_t val; 317 318 val = floatx80_to_int64(ST0, &env->fp_status); 319 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 320 val = 0x8000000000000000ULL; 321 } 322 merge_exception_flags(env, old_flags); 323 return val; 324 } 325 326 int32_t helper_fistt_ST0(CPUX86State *env) 327 { 328 uint8_t old_flags = save_exception_flags(env); 329 int32_t val; 330 331 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 332 if (val != (int16_t)val) { 333 set_float_exception_flags(float_flag_invalid, &env->fp_status); 334 val = -32768; 335 } 336 merge_exception_flags(env, old_flags); 337 return val; 338 } 339 340 int32_t helper_fisttl_ST0(CPUX86State *env) 341 { 342 uint8_t old_flags = save_exception_flags(env); 343 int32_t val; 344 345 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 346 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 347 val = 0x80000000; 348 } 349 merge_exception_flags(env, old_flags); 350 return val; 351 } 352 353 int64_t helper_fisttll_ST0(CPUX86State *env) 354 { 355 uint8_t old_flags = save_exception_flags(env); 356 int64_t val; 357 358 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 359 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 360 val = 0x8000000000000000ULL; 361 } 362 merge_exception_flags(env, old_flags); 363 return val; 364 } 365 366 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 367 { 368 int new_fpstt; 369 370 new_fpstt = (env->fpstt - 1) & 7; 371 env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); 372 env->fpstt = new_fpstt; 373 env->fptags[new_fpstt] = 0; /* validate stack entry */ 374 } 375 376 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 377 { 378 do_fstt(env, ST0, ptr, GETPC()); 379 } 380 381 void helper_fpush(CPUX86State *env) 382 { 383 fpush(env); 384 } 385 386 void helper_fpop(CPUX86State *env) 387 { 388 fpop(env); 389 } 390 391 void helper_fdecstp(CPUX86State *env) 392 { 393 env->fpstt = (env->fpstt - 1) & 7; 394 env->fpus &= ~0x4700; 395 } 396 397 void helper_fincstp(CPUX86State *env) 398 { 399 env->fpstt = (env->fpstt + 1) & 7; 400 env->fpus &= ~0x4700; 401 } 402 403 /* FPU move */ 404 405 void helper_ffree_STN(CPUX86State *env, int st_index) 406 { 407 env->fptags[(env->fpstt + st_index) & 7] = 1; 408 } 409 410 void helper_fmov_ST0_FT0(CPUX86State *env) 411 { 412 ST0 = FT0; 413 } 414 415 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 416 { 417 FT0 = ST(st_index); 418 } 419 420 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 421 { 422 ST0 = ST(st_index); 423 } 424 425 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 426 { 427 ST(st_index) = ST0; 428 } 429 430 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 431 { 432 floatx80 tmp; 433 434 tmp = ST(st_index); 435 ST(st_index) = ST0; 436 ST0 = tmp; 437 } 438 439 /* FPU operations */ 440 441 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 442 443 void helper_fcom_ST0_FT0(CPUX86State *env) 444 { 445 uint8_t old_flags = save_exception_flags(env); 446 FloatRelation ret; 447 448 ret = floatx80_compare(ST0, FT0, &env->fp_status); 449 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 450 merge_exception_flags(env, old_flags); 451 } 452 453 void helper_fucom_ST0_FT0(CPUX86State *env) 454 { 455 uint8_t old_flags = save_exception_flags(env); 456 FloatRelation ret; 457 458 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 459 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 460 merge_exception_flags(env, old_flags); 461 } 462 463 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 464 465 void helper_fcomi_ST0_FT0(CPUX86State *env) 466 { 467 uint8_t old_flags = save_exception_flags(env); 468 int eflags; 469 FloatRelation ret; 470 471 ret = floatx80_compare(ST0, FT0, &env->fp_status); 472 eflags = cpu_cc_compute_all(env, CC_OP); 473 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 474 CC_SRC = eflags; 475 merge_exception_flags(env, old_flags); 476 } 477 478 void helper_fucomi_ST0_FT0(CPUX86State *env) 479 { 480 uint8_t old_flags = save_exception_flags(env); 481 int eflags; 482 FloatRelation ret; 483 484 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 485 eflags = cpu_cc_compute_all(env, CC_OP); 486 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 487 CC_SRC = eflags; 488 merge_exception_flags(env, old_flags); 489 } 490 491 void helper_fadd_ST0_FT0(CPUX86State *env) 492 { 493 uint8_t old_flags = save_exception_flags(env); 494 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 495 merge_exception_flags(env, old_flags); 496 } 497 498 void helper_fmul_ST0_FT0(CPUX86State *env) 499 { 500 uint8_t old_flags = save_exception_flags(env); 501 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 502 merge_exception_flags(env, old_flags); 503 } 504 505 void helper_fsub_ST0_FT0(CPUX86State *env) 506 { 507 uint8_t old_flags = save_exception_flags(env); 508 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 509 merge_exception_flags(env, old_flags); 510 } 511 512 void helper_fsubr_ST0_FT0(CPUX86State *env) 513 { 514 uint8_t old_flags = save_exception_flags(env); 515 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 516 merge_exception_flags(env, old_flags); 517 } 518 519 void helper_fdiv_ST0_FT0(CPUX86State *env) 520 { 521 ST0 = helper_fdiv(env, ST0, FT0); 522 } 523 524 void helper_fdivr_ST0_FT0(CPUX86State *env) 525 { 526 ST0 = helper_fdiv(env, FT0, ST0); 527 } 528 529 /* fp operations between STN and ST0 */ 530 531 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 532 { 533 uint8_t old_flags = save_exception_flags(env); 534 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 535 merge_exception_flags(env, old_flags); 536 } 537 538 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 539 { 540 uint8_t old_flags = save_exception_flags(env); 541 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 542 merge_exception_flags(env, old_flags); 543 } 544 545 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 546 { 547 uint8_t old_flags = save_exception_flags(env); 548 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 549 merge_exception_flags(env, old_flags); 550 } 551 552 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 553 { 554 uint8_t old_flags = save_exception_flags(env); 555 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 556 merge_exception_flags(env, old_flags); 557 } 558 559 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 560 { 561 floatx80 *p; 562 563 p = &ST(st_index); 564 *p = helper_fdiv(env, *p, ST0); 565 } 566 567 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 568 { 569 floatx80 *p; 570 571 p = &ST(st_index); 572 *p = helper_fdiv(env, ST0, *p); 573 } 574 575 /* misc FPU operations */ 576 void helper_fchs_ST0(CPUX86State *env) 577 { 578 ST0 = floatx80_chs(ST0); 579 } 580 581 void helper_fabs_ST0(CPUX86State *env) 582 { 583 ST0 = floatx80_abs(ST0); 584 } 585 586 void helper_fld1_ST0(CPUX86State *env) 587 { 588 ST0 = floatx80_one; 589 } 590 591 void helper_fldl2t_ST0(CPUX86State *env) 592 { 593 switch (env->fpuc & FPU_RC_MASK) { 594 case FPU_RC_UP: 595 ST0 = floatx80_l2t_u; 596 break; 597 default: 598 ST0 = floatx80_l2t; 599 break; 600 } 601 } 602 603 void helper_fldl2e_ST0(CPUX86State *env) 604 { 605 switch (env->fpuc & FPU_RC_MASK) { 606 case FPU_RC_DOWN: 607 case FPU_RC_CHOP: 608 ST0 = floatx80_l2e_d; 609 break; 610 default: 611 ST0 = floatx80_l2e; 612 break; 613 } 614 } 615 616 void helper_fldpi_ST0(CPUX86State *env) 617 { 618 switch (env->fpuc & FPU_RC_MASK) { 619 case FPU_RC_DOWN: 620 case FPU_RC_CHOP: 621 ST0 = floatx80_pi_d; 622 break; 623 default: 624 ST0 = floatx80_pi; 625 break; 626 } 627 } 628 629 void helper_fldlg2_ST0(CPUX86State *env) 630 { 631 switch (env->fpuc & FPU_RC_MASK) { 632 case FPU_RC_DOWN: 633 case FPU_RC_CHOP: 634 ST0 = floatx80_lg2_d; 635 break; 636 default: 637 ST0 = floatx80_lg2; 638 break; 639 } 640 } 641 642 void helper_fldln2_ST0(CPUX86State *env) 643 { 644 switch (env->fpuc & FPU_RC_MASK) { 645 case FPU_RC_DOWN: 646 case FPU_RC_CHOP: 647 ST0 = floatx80_ln2_d; 648 break; 649 default: 650 ST0 = floatx80_ln2; 651 break; 652 } 653 } 654 655 void helper_fldz_ST0(CPUX86State *env) 656 { 657 ST0 = floatx80_zero; 658 } 659 660 void helper_fldz_FT0(CPUX86State *env) 661 { 662 FT0 = floatx80_zero; 663 } 664 665 uint32_t helper_fnstsw(CPUX86State *env) 666 { 667 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 668 } 669 670 uint32_t helper_fnstcw(CPUX86State *env) 671 { 672 return env->fpuc; 673 } 674 675 void update_fp_status(CPUX86State *env) 676 { 677 FloatRoundMode rnd_mode; 678 FloatX80RoundPrec rnd_prec; 679 680 /* set rounding mode */ 681 switch (env->fpuc & FPU_RC_MASK) { 682 default: 683 case FPU_RC_NEAR: 684 rnd_mode = float_round_nearest_even; 685 break; 686 case FPU_RC_DOWN: 687 rnd_mode = float_round_down; 688 break; 689 case FPU_RC_UP: 690 rnd_mode = float_round_up; 691 break; 692 case FPU_RC_CHOP: 693 rnd_mode = float_round_to_zero; 694 break; 695 } 696 set_float_rounding_mode(rnd_mode, &env->fp_status); 697 698 switch ((env->fpuc >> 8) & 3) { 699 case 0: 700 rnd_prec = floatx80_precision_s; 701 break; 702 case 2: 703 rnd_prec = floatx80_precision_d; 704 break; 705 case 3: 706 default: 707 rnd_prec = floatx80_precision_x; 708 break; 709 } 710 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 711 } 712 713 void helper_fldcw(CPUX86State *env, uint32_t val) 714 { 715 cpu_set_fpuc(env, val); 716 } 717 718 void helper_fclex(CPUX86State *env) 719 { 720 env->fpus &= 0x7f00; 721 } 722 723 void helper_fwait(CPUX86State *env) 724 { 725 if (env->fpus & FPUS_SE) { 726 fpu_raise_exception(env, GETPC()); 727 } 728 } 729 730 static void do_fninit(CPUX86State *env) 731 { 732 env->fpus = 0; 733 env->fpstt = 0; 734 env->fpcs = 0; 735 env->fpds = 0; 736 env->fpip = 0; 737 env->fpdp = 0; 738 cpu_set_fpuc(env, 0x37f); 739 env->fptags[0] = 1; 740 env->fptags[1] = 1; 741 env->fptags[2] = 1; 742 env->fptags[3] = 1; 743 env->fptags[4] = 1; 744 env->fptags[5] = 1; 745 env->fptags[6] = 1; 746 env->fptags[7] = 1; 747 } 748 749 void helper_fninit(CPUX86State *env) 750 { 751 do_fninit(env); 752 } 753 754 /* BCD ops */ 755 756 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 757 { 758 floatx80 tmp; 759 uint64_t val; 760 unsigned int v; 761 int i; 762 763 val = 0; 764 for (i = 8; i >= 0; i--) { 765 v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 766 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 767 } 768 tmp = int64_to_floatx80(val, &env->fp_status); 769 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 770 tmp = floatx80_chs(tmp); 771 } 772 fpush(env); 773 ST0 = tmp; 774 } 775 776 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 777 { 778 uint8_t old_flags = save_exception_flags(env); 779 int v; 780 target_ulong mem_ref, mem_end; 781 int64_t val; 782 CPU_LDoubleU temp; 783 784 temp.d = ST0; 785 786 val = floatx80_to_int64(ST0, &env->fp_status); 787 mem_ref = ptr; 788 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 789 set_float_exception_flags(float_flag_invalid, &env->fp_status); 790 while (mem_ref < ptr + 7) { 791 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 792 } 793 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 794 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 795 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 796 merge_exception_flags(env, old_flags); 797 return; 798 } 799 mem_end = mem_ref + 9; 800 if (SIGND(temp)) { 801 cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 802 val = -val; 803 } else { 804 cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 805 } 806 while (mem_ref < mem_end) { 807 if (val == 0) { 808 break; 809 } 810 v = val % 100; 811 val = val / 100; 812 v = ((v / 10) << 4) | (v % 10); 813 cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 814 } 815 while (mem_ref < mem_end) { 816 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 817 } 818 merge_exception_flags(env, old_flags); 819 } 820 821 /* 128-bit significand of log(2). */ 822 #define ln2_sig_high 0xb17217f7d1cf79abULL 823 #define ln2_sig_low 0xc9e3b39803f2f6afULL 824 825 /* 826 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 827 * the interval [-1/64, 1/64]. 828 */ 829 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 830 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 831 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 832 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 833 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 834 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 835 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 836 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 837 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 838 839 struct f2xm1_data { 840 /* 841 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 842 * are very close to exact floatx80 values. 843 */ 844 floatx80 t; 845 /* The value of 2^t. */ 846 floatx80 exp2; 847 /* The value of 2^t - 1. */ 848 floatx80 exp2m1; 849 }; 850 851 static const struct f2xm1_data f2xm1_table[65] = { 852 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 853 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 854 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 855 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 856 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 857 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 858 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 859 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 860 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 861 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 862 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 863 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 864 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 865 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 866 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 867 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 868 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 869 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 870 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 871 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 872 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 873 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 874 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 875 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 876 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 877 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 878 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 879 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 880 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 881 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 882 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 883 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 884 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 885 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 886 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 887 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 888 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 889 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 890 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 891 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 892 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 893 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 894 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 895 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 896 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 897 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 898 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 899 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 900 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 901 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 902 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 903 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 904 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 905 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 906 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 907 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 908 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 909 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 910 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 911 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 912 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 913 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 914 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 915 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 916 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 917 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 918 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 919 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 920 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 921 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 922 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 923 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 924 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 925 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 926 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 927 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 928 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 929 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 930 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 931 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 932 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 933 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 934 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 935 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 936 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 937 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 938 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 939 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 940 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 941 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 942 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 943 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 944 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 945 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 946 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 947 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 948 { floatx80_zero_init, 949 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 950 floatx80_zero_init }, 951 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 952 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 953 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 954 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 955 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 956 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 957 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 958 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 959 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 960 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 961 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 962 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 963 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 964 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 965 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 966 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 967 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 968 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 969 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 970 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 971 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 972 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 973 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 974 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 975 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 976 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 977 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 978 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 979 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 980 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 981 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 982 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 983 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 984 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 985 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 986 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 987 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 988 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 989 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 990 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 991 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 992 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 993 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 994 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 995 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 996 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 997 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 998 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 999 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1000 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1001 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1002 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1003 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1004 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1005 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1006 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1007 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1008 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1009 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1010 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1011 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1012 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1013 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1014 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1015 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1016 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1017 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1018 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1019 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1020 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1021 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1022 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1023 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1024 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1025 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1026 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1027 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1028 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1029 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1030 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1031 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1032 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1033 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1034 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1035 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1036 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1037 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1038 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1039 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1040 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1041 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1042 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1043 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1044 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1045 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1046 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1047 }; 1048 1049 void helper_f2xm1(CPUX86State *env) 1050 { 1051 uint8_t old_flags = save_exception_flags(env); 1052 uint64_t sig = extractFloatx80Frac(ST0); 1053 int32_t exp = extractFloatx80Exp(ST0); 1054 bool sign = extractFloatx80Sign(ST0); 1055 1056 if (floatx80_invalid_encoding(ST0)) { 1057 float_raise(float_flag_invalid, &env->fp_status); 1058 ST0 = floatx80_default_nan(&env->fp_status); 1059 } else if (floatx80_is_any_nan(ST0)) { 1060 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1061 float_raise(float_flag_invalid, &env->fp_status); 1062 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1063 } 1064 } else if (exp > 0x3fff || 1065 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1066 /* Out of range for the instruction, treat as invalid. */ 1067 float_raise(float_flag_invalid, &env->fp_status); 1068 ST0 = floatx80_default_nan(&env->fp_status); 1069 } else if (exp == 0x3fff) { 1070 /* Argument 1 or -1, exact result 1 or -0.5. */ 1071 if (sign) { 1072 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1073 } 1074 } else if (exp < 0x3fb0) { 1075 if (!floatx80_is_zero(ST0)) { 1076 /* 1077 * Multiplying the argument by an extra-precision version 1078 * of log(2) is sufficiently precise. Zero arguments are 1079 * returned unchanged. 1080 */ 1081 uint64_t sig0, sig1, sig2; 1082 if (exp == 0) { 1083 normalizeFloatx80Subnormal(sig, &exp, &sig); 1084 } 1085 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1086 &sig2); 1087 /* This result is inexact. */ 1088 sig1 |= 1; 1089 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1090 sign, exp, sig0, sig1, 1091 &env->fp_status); 1092 } 1093 } else { 1094 floatx80 tmp, y, accum; 1095 bool asign, bsign; 1096 int32_t n, aexp, bexp; 1097 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1098 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1099 FloatX80RoundPrec save_prec = 1100 env->fp_status.floatx80_rounding_precision; 1101 env->fp_status.float_rounding_mode = float_round_nearest_even; 1102 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1103 1104 /* Find the nearest multiple of 1/32 to the argument. */ 1105 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1106 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1107 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1108 1109 if (floatx80_is_zero(y)) { 1110 /* 1111 * Use the value of 2^t - 1 from the table, to avoid 1112 * needing to special-case zero as a result of 1113 * multiplication below. 1114 */ 1115 ST0 = f2xm1_table[n].t; 1116 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1117 env->fp_status.float_rounding_mode = save_mode; 1118 } else { 1119 /* 1120 * Compute the lower parts of a polynomial expansion for 1121 * (2^y - 1) / y. 1122 */ 1123 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1124 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1125 accum = floatx80_mul(accum, y, &env->fp_status); 1126 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1127 accum = floatx80_mul(accum, y, &env->fp_status); 1128 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1129 accum = floatx80_mul(accum, y, &env->fp_status); 1130 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1131 accum = floatx80_mul(accum, y, &env->fp_status); 1132 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1133 accum = floatx80_mul(accum, y, &env->fp_status); 1134 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1135 accum = floatx80_mul(accum, y, &env->fp_status); 1136 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1137 1138 /* 1139 * The full polynomial expansion is f2xm1_coeff_0 + accum 1140 * (where accum has much lower magnitude, and so, in 1141 * particular, carry out of the addition is not possible). 1142 * (This expansion is only accurate to about 70 bits, not 1143 * 128 bits.) 1144 */ 1145 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1146 asign = extractFloatx80Sign(f2xm1_coeff_0); 1147 shift128RightJamming(extractFloatx80Frac(accum), 0, 1148 aexp - extractFloatx80Exp(accum), 1149 &asig0, &asig1); 1150 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1151 bsig1 = 0; 1152 if (asign == extractFloatx80Sign(accum)) { 1153 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1154 } else { 1155 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1156 } 1157 /* And thus compute an approximation to 2^y - 1. */ 1158 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1159 &asig0, &asig1, &asig2); 1160 aexp += extractFloatx80Exp(y) - 0x3ffe; 1161 asign ^= extractFloatx80Sign(y); 1162 if (n != 32) { 1163 /* 1164 * Multiply this by the precomputed value of 2^t and 1165 * add that of 2^t - 1. 1166 */ 1167 mul128By64To192(asig0, asig1, 1168 extractFloatx80Frac(f2xm1_table[n].exp2), 1169 &asig0, &asig1, &asig2); 1170 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1171 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1172 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1173 bsig1 = 0; 1174 if (bexp < aexp) { 1175 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1176 &bsig0, &bsig1); 1177 } else if (aexp < bexp) { 1178 shift128RightJamming(asig0, asig1, bexp - aexp, 1179 &asig0, &asig1); 1180 aexp = bexp; 1181 } 1182 /* The sign of 2^t - 1 is always that of the result. */ 1183 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1184 if (asign == bsign) { 1185 /* Avoid possible carry out of the addition. */ 1186 shift128RightJamming(asig0, asig1, 1, 1187 &asig0, &asig1); 1188 shift128RightJamming(bsig0, bsig1, 1, 1189 &bsig0, &bsig1); 1190 ++aexp; 1191 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1192 } else { 1193 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1194 asign = bsign; 1195 } 1196 } 1197 env->fp_status.float_rounding_mode = save_mode; 1198 /* This result is inexact. */ 1199 asig1 |= 1; 1200 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1201 asign, aexp, asig0, asig1, 1202 &env->fp_status); 1203 } 1204 1205 env->fp_status.floatx80_rounding_precision = save_prec; 1206 } 1207 merge_exception_flags(env, old_flags); 1208 } 1209 1210 void helper_fptan(CPUX86State *env) 1211 { 1212 double fptemp = floatx80_to_double(env, ST0); 1213 1214 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1215 env->fpus |= 0x400; 1216 } else { 1217 fptemp = tan(fptemp); 1218 ST0 = double_to_floatx80(env, fptemp); 1219 fpush(env); 1220 ST0 = floatx80_one; 1221 env->fpus &= ~0x400; /* C2 <-- 0 */ 1222 /* the above code is for |arg| < 2**52 only */ 1223 } 1224 } 1225 1226 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1227 #define pi_4_exp 0x3ffe 1228 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1229 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1230 #define pi_2_exp 0x3fff 1231 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1232 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1233 #define pi_34_exp 0x4000 1234 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1235 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1236 #define pi_exp 0x4000 1237 #define pi_sig_high 0xc90fdaa22168c234ULL 1238 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1239 1240 /* 1241 * Polynomial coefficients for an approximation to atan(x), with only 1242 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1243 * for some other approximations, no low part is needed for the first 1244 * coefficient here to achieve a sufficiently accurate result, because 1245 * the coefficient in this minimax approximation is very close to 1246 * exactly 1.) 1247 */ 1248 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1249 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1250 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1251 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1252 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1253 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1254 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1255 1256 struct fpatan_data { 1257 /* High and low parts of atan(x). */ 1258 floatx80 atan_high, atan_low; 1259 }; 1260 1261 static const struct fpatan_data fpatan_table[9] = { 1262 { floatx80_zero_init, 1263 floatx80_zero_init }, 1264 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1265 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1266 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1267 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1268 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1269 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1270 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1271 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1272 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1273 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1274 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1275 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1276 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1277 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1278 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1279 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1280 }; 1281 1282 void helper_fpatan(CPUX86State *env) 1283 { 1284 uint8_t old_flags = save_exception_flags(env); 1285 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1286 int32_t arg0_exp = extractFloatx80Exp(ST0); 1287 bool arg0_sign = extractFloatx80Sign(ST0); 1288 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1289 int32_t arg1_exp = extractFloatx80Exp(ST1); 1290 bool arg1_sign = extractFloatx80Sign(ST1); 1291 1292 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1293 float_raise(float_flag_invalid, &env->fp_status); 1294 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1295 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1296 float_raise(float_flag_invalid, &env->fp_status); 1297 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1298 } else if (floatx80_invalid_encoding(ST0) || 1299 floatx80_invalid_encoding(ST1)) { 1300 float_raise(float_flag_invalid, &env->fp_status); 1301 ST1 = floatx80_default_nan(&env->fp_status); 1302 } else if (floatx80_is_any_nan(ST0)) { 1303 ST1 = ST0; 1304 } else if (floatx80_is_any_nan(ST1)) { 1305 /* Pass this NaN through. */ 1306 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1307 /* Pass this zero through. */ 1308 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1309 arg0_exp - arg1_exp >= 80) && 1310 !arg0_sign) { 1311 /* 1312 * Dividing ST1 by ST0 gives the correct result up to 1313 * rounding, and avoids spurious underflow exceptions that 1314 * might result from passing some small values through the 1315 * polynomial approximation, but if a finite nonzero result of 1316 * division is exact, the result of fpatan is still inexact 1317 * (and underflowing where appropriate). 1318 */ 1319 FloatX80RoundPrec save_prec = 1320 env->fp_status.floatx80_rounding_precision; 1321 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1322 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1323 env->fp_status.floatx80_rounding_precision = save_prec; 1324 if (!floatx80_is_zero(ST1) && 1325 !(get_float_exception_flags(&env->fp_status) & 1326 float_flag_inexact)) { 1327 /* 1328 * The mathematical result is very slightly closer to zero 1329 * than this exact result. Round a value with the 1330 * significand adjusted accordingly to get the correct 1331 * exceptions, and possibly an adjusted result depending 1332 * on the rounding mode. 1333 */ 1334 uint64_t sig = extractFloatx80Frac(ST1); 1335 int32_t exp = extractFloatx80Exp(ST1); 1336 bool sign = extractFloatx80Sign(ST1); 1337 if (exp == 0) { 1338 normalizeFloatx80Subnormal(sig, &exp, &sig); 1339 } 1340 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1341 sign, exp, sig - 1, 1342 -1, &env->fp_status); 1343 } 1344 } else { 1345 /* The result is inexact. */ 1346 bool rsign = arg1_sign; 1347 int32_t rexp; 1348 uint64_t rsig0, rsig1; 1349 if (floatx80_is_zero(ST1)) { 1350 /* 1351 * ST0 is negative. The result is pi with the sign of 1352 * ST1. 1353 */ 1354 rexp = pi_exp; 1355 rsig0 = pi_sig_high; 1356 rsig1 = pi_sig_low; 1357 } else if (floatx80_is_infinity(ST1)) { 1358 if (floatx80_is_infinity(ST0)) { 1359 if (arg0_sign) { 1360 rexp = pi_34_exp; 1361 rsig0 = pi_34_sig_high; 1362 rsig1 = pi_34_sig_low; 1363 } else { 1364 rexp = pi_4_exp; 1365 rsig0 = pi_4_sig_high; 1366 rsig1 = pi_4_sig_low; 1367 } 1368 } else { 1369 rexp = pi_2_exp; 1370 rsig0 = pi_2_sig_high; 1371 rsig1 = pi_2_sig_low; 1372 } 1373 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1374 rexp = pi_2_exp; 1375 rsig0 = pi_2_sig_high; 1376 rsig1 = pi_2_sig_low; 1377 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1378 /* ST0 is negative. */ 1379 rexp = pi_exp; 1380 rsig0 = pi_sig_high; 1381 rsig1 = pi_sig_low; 1382 } else { 1383 /* 1384 * ST0 and ST1 are finite, nonzero and with exponents not 1385 * too far apart. 1386 */ 1387 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1388 int32_t azexp, axexp; 1389 bool adj_sub, ysign, zsign; 1390 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1391 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1392 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1393 uint64_t azsig0, azsig1; 1394 uint64_t azsig2, azsig3, axsig0, axsig1; 1395 floatx80 x8; 1396 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1397 FloatX80RoundPrec save_prec = 1398 env->fp_status.floatx80_rounding_precision; 1399 env->fp_status.float_rounding_mode = float_round_nearest_even; 1400 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1401 1402 if (arg0_exp == 0) { 1403 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1404 } 1405 if (arg1_exp == 0) { 1406 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1407 } 1408 if (arg0_exp > arg1_exp || 1409 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1410 /* Work with abs(ST1) / abs(ST0). */ 1411 num_exp = arg1_exp; 1412 num_sig = arg1_sig; 1413 den_exp = arg0_exp; 1414 den_sig = arg0_sig; 1415 if (arg0_sign) { 1416 /* The result is subtracted from pi. */ 1417 adj_exp = pi_exp; 1418 adj_sig0 = pi_sig_high; 1419 adj_sig1 = pi_sig_low; 1420 adj_sub = true; 1421 } else { 1422 /* The result is used as-is. */ 1423 adj_exp = 0; 1424 adj_sig0 = 0; 1425 adj_sig1 = 0; 1426 adj_sub = false; 1427 } 1428 } else { 1429 /* Work with abs(ST0) / abs(ST1). */ 1430 num_exp = arg0_exp; 1431 num_sig = arg0_sig; 1432 den_exp = arg1_exp; 1433 den_sig = arg1_sig; 1434 /* The result is added to or subtracted from pi/2. */ 1435 adj_exp = pi_2_exp; 1436 adj_sig0 = pi_2_sig_high; 1437 adj_sig1 = pi_2_sig_low; 1438 adj_sub = !arg0_sign; 1439 } 1440 1441 /* 1442 * Compute x = num/den, where 0 < x <= 1 and x is not too 1443 * small. 1444 */ 1445 xexp = num_exp - den_exp + 0x3ffe; 1446 remsig0 = num_sig; 1447 remsig1 = 0; 1448 if (den_sig <= remsig0) { 1449 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1450 ++xexp; 1451 } 1452 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1453 mul64To128(den_sig, xsig0, &msig0, &msig1); 1454 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1455 while ((int64_t) remsig0 < 0) { 1456 --xsig0; 1457 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1458 } 1459 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1460 /* 1461 * No need to correct any estimation error in xsig1; even 1462 * with such error, it is accurate enough. 1463 */ 1464 1465 /* 1466 * Split x as x = t + y, where t = n/8 is the nearest 1467 * multiple of 1/8 to x. 1468 */ 1469 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1470 false, xexp + 3, xsig0, 1471 xsig1, &env->fp_status); 1472 n = floatx80_to_int32(x8, &env->fp_status); 1473 if (n == 0) { 1474 ysign = false; 1475 yexp = xexp; 1476 ysig0 = xsig0; 1477 ysig1 = xsig1; 1478 texp = 0; 1479 tsig = 0; 1480 } else { 1481 int shift = clz32(n) + 32; 1482 texp = 0x403b - shift; 1483 tsig = n; 1484 tsig <<= shift; 1485 if (texp == xexp) { 1486 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1487 if ((int64_t) ysig0 >= 0) { 1488 ysign = false; 1489 if (ysig0 == 0) { 1490 if (ysig1 == 0) { 1491 yexp = 0; 1492 } else { 1493 shift = clz64(ysig1) + 64; 1494 yexp = xexp - shift; 1495 shift128Left(ysig0, ysig1, shift, 1496 &ysig0, &ysig1); 1497 } 1498 } else { 1499 shift = clz64(ysig0); 1500 yexp = xexp - shift; 1501 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1502 } 1503 } else { 1504 ysign = true; 1505 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1506 if (ysig0 == 0) { 1507 shift = clz64(ysig1) + 64; 1508 } else { 1509 shift = clz64(ysig0); 1510 } 1511 yexp = xexp - shift; 1512 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1513 } 1514 } else { 1515 /* 1516 * t's exponent must be greater than x's because t 1517 * is positive and the nearest multiple of 1/8 to 1518 * x, and if x has a greater exponent, the power 1519 * of 2 with that exponent is also a multiple of 1520 * 1/8. 1521 */ 1522 uint64_t usig0, usig1; 1523 shift128RightJamming(xsig0, xsig1, texp - xexp, 1524 &usig0, &usig1); 1525 ysign = true; 1526 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1527 if (ysig0 == 0) { 1528 shift = clz64(ysig1) + 64; 1529 } else { 1530 shift = clz64(ysig0); 1531 } 1532 yexp = texp - shift; 1533 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1534 } 1535 } 1536 1537 /* 1538 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1539 * arctan(z). 1540 */ 1541 zsign = ysign; 1542 if (texp == 0 || yexp == 0) { 1543 zexp = yexp; 1544 zsig0 = ysig0; 1545 zsig1 = ysig1; 1546 } else { 1547 /* 1548 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1549 */ 1550 int32_t dexp = texp + xexp - 0x3ffe; 1551 uint64_t dsig0, dsig1, dsig2; 1552 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1553 /* 1554 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1555 * bit). Add 1 to produce the denominator 1+tx. 1556 */ 1557 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1558 &dsig0, &dsig1); 1559 dsig0 |= 0x8000000000000000ULL; 1560 zexp = yexp - 1; 1561 remsig0 = ysig0; 1562 remsig1 = ysig1; 1563 remsig2 = 0; 1564 if (dsig0 <= remsig0) { 1565 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1566 ++zexp; 1567 } 1568 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1569 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1570 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1571 &remsig0, &remsig1, &remsig2); 1572 while ((int64_t) remsig0 < 0) { 1573 --zsig0; 1574 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1575 &remsig0, &remsig1, &remsig2); 1576 } 1577 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1578 /* No need to correct any estimation error in zsig1. */ 1579 } 1580 1581 if (zexp == 0) { 1582 azexp = 0; 1583 azsig0 = 0; 1584 azsig1 = 0; 1585 } else { 1586 floatx80 z2, accum; 1587 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1588 /* Compute z^2. */ 1589 mul128To256(zsig0, zsig1, zsig0, zsig1, 1590 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1591 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1592 zexp + zexp - 0x3ffe, 1593 z2sig0, z2sig1, 1594 &env->fp_status); 1595 1596 /* Compute the lower parts of the polynomial expansion. */ 1597 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1598 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1599 accum = floatx80_mul(accum, z2, &env->fp_status); 1600 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1601 accum = floatx80_mul(accum, z2, &env->fp_status); 1602 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1603 accum = floatx80_mul(accum, z2, &env->fp_status); 1604 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1605 accum = floatx80_mul(accum, z2, &env->fp_status); 1606 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1607 accum = floatx80_mul(accum, z2, &env->fp_status); 1608 1609 /* 1610 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1611 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1612 */ 1613 aexp = extractFloatx80Exp(fpatan_coeff_0); 1614 shift128RightJamming(extractFloatx80Frac(accum), 0, 1615 aexp - extractFloatx80Exp(accum), 1616 &asig0, &asig1); 1617 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1618 &asig0, &asig1); 1619 /* Multiply by z to compute arctan(z). */ 1620 azexp = aexp + zexp - 0x3ffe; 1621 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1622 &azsig2, &azsig3); 1623 } 1624 1625 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1626 if (texp == 0) { 1627 /* z is positive. */ 1628 axexp = azexp; 1629 axsig0 = azsig0; 1630 axsig1 = azsig1; 1631 } else { 1632 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1633 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1634 uint64_t low_sig0 = 1635 extractFloatx80Frac(fpatan_table[n].atan_low); 1636 uint64_t low_sig1 = 0; 1637 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1638 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1639 axsig1 = 0; 1640 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1641 &low_sig0, &low_sig1); 1642 if (low_sign) { 1643 sub128(axsig0, axsig1, low_sig0, low_sig1, 1644 &axsig0, &axsig1); 1645 } else { 1646 add128(axsig0, axsig1, low_sig0, low_sig1, 1647 &axsig0, &axsig1); 1648 } 1649 if (azexp >= axexp) { 1650 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1651 &axsig0, &axsig1); 1652 axexp = azexp + 1; 1653 shift128RightJamming(azsig0, azsig1, 1, 1654 &azsig0, &azsig1); 1655 } else { 1656 shift128RightJamming(axsig0, axsig1, 1, 1657 &axsig0, &axsig1); 1658 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1659 &azsig0, &azsig1); 1660 ++axexp; 1661 } 1662 if (zsign) { 1663 sub128(axsig0, axsig1, azsig0, azsig1, 1664 &axsig0, &axsig1); 1665 } else { 1666 add128(axsig0, axsig1, azsig0, azsig1, 1667 &axsig0, &axsig1); 1668 } 1669 } 1670 1671 if (adj_exp == 0) { 1672 rexp = axexp; 1673 rsig0 = axsig0; 1674 rsig1 = axsig1; 1675 } else { 1676 /* 1677 * Add or subtract arctan(x) (exponent axexp, 1678 * significand axsig0 and axsig1, positive, not 1679 * necessarily normalized) to the number given by 1680 * adj_exp, adj_sig0 and adj_sig1, according to 1681 * adj_sub. 1682 */ 1683 if (adj_exp >= axexp) { 1684 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1685 &axsig0, &axsig1); 1686 rexp = adj_exp + 1; 1687 shift128RightJamming(adj_sig0, adj_sig1, 1, 1688 &adj_sig0, &adj_sig1); 1689 } else { 1690 shift128RightJamming(axsig0, axsig1, 1, 1691 &axsig0, &axsig1); 1692 shift128RightJamming(adj_sig0, adj_sig1, 1693 axexp - adj_exp + 1, 1694 &adj_sig0, &adj_sig1); 1695 rexp = axexp + 1; 1696 } 1697 if (adj_sub) { 1698 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1699 &rsig0, &rsig1); 1700 } else { 1701 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1702 &rsig0, &rsig1); 1703 } 1704 } 1705 1706 env->fp_status.float_rounding_mode = save_mode; 1707 env->fp_status.floatx80_rounding_precision = save_prec; 1708 } 1709 /* This result is inexact. */ 1710 rsig1 |= 1; 1711 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1712 rsig0, rsig1, &env->fp_status); 1713 } 1714 1715 fpop(env); 1716 merge_exception_flags(env, old_flags); 1717 } 1718 1719 void helper_fxtract(CPUX86State *env) 1720 { 1721 uint8_t old_flags = save_exception_flags(env); 1722 CPU_LDoubleU temp; 1723 1724 temp.d = ST0; 1725 1726 if (floatx80_is_zero(ST0)) { 1727 /* Easy way to generate -inf and raising division by 0 exception */ 1728 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1729 &env->fp_status); 1730 fpush(env); 1731 ST0 = temp.d; 1732 } else if (floatx80_invalid_encoding(ST0)) { 1733 float_raise(float_flag_invalid, &env->fp_status); 1734 ST0 = floatx80_default_nan(&env->fp_status); 1735 fpush(env); 1736 ST0 = ST1; 1737 } else if (floatx80_is_any_nan(ST0)) { 1738 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1739 float_raise(float_flag_invalid, &env->fp_status); 1740 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1741 } 1742 fpush(env); 1743 ST0 = ST1; 1744 } else if (floatx80_is_infinity(ST0)) { 1745 fpush(env); 1746 ST0 = ST1; 1747 ST1 = floatx80_infinity; 1748 } else { 1749 int expdif; 1750 1751 if (EXPD(temp) == 0) { 1752 int shift = clz64(temp.l.lower); 1753 temp.l.lower <<= shift; 1754 expdif = 1 - EXPBIAS - shift; 1755 float_raise(float_flag_input_denormal, &env->fp_status); 1756 } else { 1757 expdif = EXPD(temp) - EXPBIAS; 1758 } 1759 /* DP exponent bias */ 1760 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1761 fpush(env); 1762 BIASEXPONENT(temp); 1763 ST0 = temp.d; 1764 } 1765 merge_exception_flags(env, old_flags); 1766 } 1767 1768 static void helper_fprem_common(CPUX86State *env, bool mod) 1769 { 1770 uint8_t old_flags = save_exception_flags(env); 1771 uint64_t quotient; 1772 CPU_LDoubleU temp0, temp1; 1773 int exp0, exp1, expdiff; 1774 1775 temp0.d = ST0; 1776 temp1.d = ST1; 1777 exp0 = EXPD(temp0); 1778 exp1 = EXPD(temp1); 1779 1780 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1781 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1782 exp0 == 0x7fff || exp1 == 0x7fff || 1783 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1784 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1785 } else { 1786 if (exp0 == 0) { 1787 exp0 = 1 - clz64(temp0.l.lower); 1788 } 1789 if (exp1 == 0) { 1790 exp1 = 1 - clz64(temp1.l.lower); 1791 } 1792 expdiff = exp0 - exp1; 1793 if (expdiff < 64) { 1794 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1795 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1796 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1797 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1798 } else { 1799 /* 1800 * Partial remainder. This choice of how many bits to 1801 * process at once is specified in AMD instruction set 1802 * manuals, and empirically is followed by Intel 1803 * processors as well; it ensures that the final remainder 1804 * operation in a loop does produce the correct low three 1805 * bits of the quotient. AMD manuals specify that the 1806 * flags other than C2 are cleared, and empirically Intel 1807 * processors clear them as well. 1808 */ 1809 int n = 32 + (expdiff % 32); 1810 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1811 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1812 env->fpus |= 0x400; /* C2 <-- 1 */ 1813 } 1814 } 1815 merge_exception_flags(env, old_flags); 1816 } 1817 1818 void helper_fprem1(CPUX86State *env) 1819 { 1820 helper_fprem_common(env, false); 1821 } 1822 1823 void helper_fprem(CPUX86State *env) 1824 { 1825 helper_fprem_common(env, true); 1826 } 1827 1828 /* 128-bit significand of log2(e). */ 1829 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1830 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1831 1832 /* 1833 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1834 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1835 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1836 * interval [sqrt(2)/2, sqrt(2)]. 1837 */ 1838 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1839 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1840 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1841 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1842 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1843 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1844 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1845 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1846 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1847 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1848 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1849 1850 /* 1851 * Compute an approximation of log2(1+arg), where 1+arg is in the 1852 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1853 * function is called, rounding precision is set to 80 and the 1854 * round-to-nearest mode is in effect. arg must not be exactly zero, 1855 * and must not be so close to zero that underflow might occur. 1856 */ 1857 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1858 uint64_t *sig0, uint64_t *sig1) 1859 { 1860 uint64_t arg0_sig = extractFloatx80Frac(arg); 1861 int32_t arg0_exp = extractFloatx80Exp(arg); 1862 bool arg0_sign = extractFloatx80Sign(arg); 1863 bool asign; 1864 int32_t dexp, texp, aexp; 1865 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1866 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1867 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1868 floatx80 t2, accum; 1869 1870 /* 1871 * Compute an approximation of arg/(2+arg), with extra precision, 1872 * as the argument to a polynomial approximation. The extra 1873 * precision is only needed for the first term of the 1874 * approximation, with subsequent terms being significantly 1875 * smaller; the approximation only uses odd exponents, and the 1876 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1877 */ 1878 if (arg0_sign) { 1879 dexp = 0x3fff; 1880 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1881 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1882 } else { 1883 dexp = 0x4000; 1884 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1885 dsig0 |= 0x8000000000000000ULL; 1886 } 1887 texp = arg0_exp - dexp + 0x3ffe; 1888 rsig0 = arg0_sig; 1889 rsig1 = 0; 1890 rsig2 = 0; 1891 if (dsig0 <= rsig0) { 1892 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1893 ++texp; 1894 } 1895 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1896 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1897 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1898 &rsig0, &rsig1, &rsig2); 1899 while ((int64_t) rsig0 < 0) { 1900 --tsig0; 1901 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1902 &rsig0, &rsig1, &rsig2); 1903 } 1904 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1905 /* 1906 * No need to correct any estimation error in tsig1; even with 1907 * such error, it is accurate enough. Now compute the square of 1908 * that approximation. 1909 */ 1910 mul128To256(tsig0, tsig1, tsig0, tsig1, 1911 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1912 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1913 texp + texp - 0x3ffe, 1914 t2sig0, t2sig1, &env->fp_status); 1915 1916 /* Compute the lower parts of the polynomial expansion. */ 1917 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1918 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1919 accum = floatx80_mul(accum, t2, &env->fp_status); 1920 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1921 accum = floatx80_mul(accum, t2, &env->fp_status); 1922 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1923 accum = floatx80_mul(accum, t2, &env->fp_status); 1924 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1925 accum = floatx80_mul(accum, t2, &env->fp_status); 1926 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1927 accum = floatx80_mul(accum, t2, &env->fp_status); 1928 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1929 accum = floatx80_mul(accum, t2, &env->fp_status); 1930 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1931 accum = floatx80_mul(accum, t2, &env->fp_status); 1932 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1933 accum = floatx80_mul(accum, t2, &env->fp_status); 1934 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1935 1936 /* 1937 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1938 * accum has much lower magnitude, and so, in particular, carry 1939 * out of the addition is not possible), multiplied by t. (This 1940 * expansion is only accurate to about 70 bits, not 128 bits.) 1941 */ 1942 aexp = extractFloatx80Exp(fyl2x_coeff_0); 1943 asign = extractFloatx80Sign(fyl2x_coeff_0); 1944 shift128RightJamming(extractFloatx80Frac(accum), 0, 1945 aexp - extractFloatx80Exp(accum), 1946 &asig0, &asig1); 1947 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1948 bsig1 = 0; 1949 if (asign == extractFloatx80Sign(accum)) { 1950 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1951 } else { 1952 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1953 } 1954 /* Multiply by t to compute the required result. */ 1955 mul128To256(asig0, asig1, tsig0, tsig1, 1956 &asig0, &asig1, &asig2, &asig3); 1957 aexp += texp - 0x3ffe; 1958 *exp = aexp; 1959 *sig0 = asig0; 1960 *sig1 = asig1; 1961 } 1962 1963 void helper_fyl2xp1(CPUX86State *env) 1964 { 1965 uint8_t old_flags = save_exception_flags(env); 1966 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1967 int32_t arg0_exp = extractFloatx80Exp(ST0); 1968 bool arg0_sign = extractFloatx80Sign(ST0); 1969 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1970 int32_t arg1_exp = extractFloatx80Exp(ST1); 1971 bool arg1_sign = extractFloatx80Sign(ST1); 1972 1973 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1974 float_raise(float_flag_invalid, &env->fp_status); 1975 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1976 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1977 float_raise(float_flag_invalid, &env->fp_status); 1978 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1979 } else if (floatx80_invalid_encoding(ST0) || 1980 floatx80_invalid_encoding(ST1)) { 1981 float_raise(float_flag_invalid, &env->fp_status); 1982 ST1 = floatx80_default_nan(&env->fp_status); 1983 } else if (floatx80_is_any_nan(ST0)) { 1984 ST1 = ST0; 1985 } else if (floatx80_is_any_nan(ST1)) { 1986 /* Pass this NaN through. */ 1987 } else if (arg0_exp > 0x3ffd || 1988 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 1989 0x95f619980c4336f7ULL : 1990 0xd413cccfe7799211ULL))) { 1991 /* 1992 * Out of range for the instruction (ST0 must have absolute 1993 * value less than 1 - sqrt(2)/2 = 0.292..., according to 1994 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 1995 * to sqrt(2) - 1, which we allow here), treat as invalid. 1996 */ 1997 float_raise(float_flag_invalid, &env->fp_status); 1998 ST1 = floatx80_default_nan(&env->fp_status); 1999 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2000 arg1_exp == 0x7fff) { 2001 /* 2002 * One argument is zero, or multiplying by infinity; correct 2003 * result is exact and can be obtained by multiplying the 2004 * arguments. 2005 */ 2006 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2007 } else if (arg0_exp < 0x3fb0) { 2008 /* 2009 * Multiplying both arguments and an extra-precision version 2010 * of log2(e) is sufficiently precise. 2011 */ 2012 uint64_t sig0, sig1, sig2; 2013 int32_t exp; 2014 if (arg0_exp == 0) { 2015 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2016 } 2017 if (arg1_exp == 0) { 2018 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2019 } 2020 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2021 &sig0, &sig1, &sig2); 2022 exp = arg0_exp + 1; 2023 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2024 exp += arg1_exp - 0x3ffe; 2025 /* This result is inexact. */ 2026 sig1 |= 1; 2027 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2028 arg0_sign ^ arg1_sign, exp, 2029 sig0, sig1, &env->fp_status); 2030 } else { 2031 int32_t aexp; 2032 uint64_t asig0, asig1, asig2; 2033 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2034 FloatX80RoundPrec save_prec = 2035 env->fp_status.floatx80_rounding_precision; 2036 env->fp_status.float_rounding_mode = float_round_nearest_even; 2037 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2038 2039 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2040 /* 2041 * Multiply by the second argument to compute the required 2042 * result. 2043 */ 2044 if (arg1_exp == 0) { 2045 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2046 } 2047 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2048 aexp += arg1_exp - 0x3ffe; 2049 /* This result is inexact. */ 2050 asig1 |= 1; 2051 env->fp_status.float_rounding_mode = save_mode; 2052 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2053 arg0_sign ^ arg1_sign, aexp, 2054 asig0, asig1, &env->fp_status); 2055 env->fp_status.floatx80_rounding_precision = save_prec; 2056 } 2057 fpop(env); 2058 merge_exception_flags(env, old_flags); 2059 } 2060 2061 void helper_fyl2x(CPUX86State *env) 2062 { 2063 uint8_t old_flags = save_exception_flags(env); 2064 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2065 int32_t arg0_exp = extractFloatx80Exp(ST0); 2066 bool arg0_sign = extractFloatx80Sign(ST0); 2067 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2068 int32_t arg1_exp = extractFloatx80Exp(ST1); 2069 bool arg1_sign = extractFloatx80Sign(ST1); 2070 2071 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2072 float_raise(float_flag_invalid, &env->fp_status); 2073 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2074 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2075 float_raise(float_flag_invalid, &env->fp_status); 2076 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2077 } else if (floatx80_invalid_encoding(ST0) || 2078 floatx80_invalid_encoding(ST1)) { 2079 float_raise(float_flag_invalid, &env->fp_status); 2080 ST1 = floatx80_default_nan(&env->fp_status); 2081 } else if (floatx80_is_any_nan(ST0)) { 2082 ST1 = ST0; 2083 } else if (floatx80_is_any_nan(ST1)) { 2084 /* Pass this NaN through. */ 2085 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2086 float_raise(float_flag_invalid, &env->fp_status); 2087 ST1 = floatx80_default_nan(&env->fp_status); 2088 } else if (floatx80_is_infinity(ST1)) { 2089 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2090 &env->fp_status); 2091 switch (cmp) { 2092 case float_relation_less: 2093 ST1 = floatx80_chs(ST1); 2094 break; 2095 case float_relation_greater: 2096 /* Result is infinity of the same sign as ST1. */ 2097 break; 2098 default: 2099 float_raise(float_flag_invalid, &env->fp_status); 2100 ST1 = floatx80_default_nan(&env->fp_status); 2101 break; 2102 } 2103 } else if (floatx80_is_infinity(ST0)) { 2104 if (floatx80_is_zero(ST1)) { 2105 float_raise(float_flag_invalid, &env->fp_status); 2106 ST1 = floatx80_default_nan(&env->fp_status); 2107 } else if (arg1_sign) { 2108 ST1 = floatx80_chs(ST0); 2109 } else { 2110 ST1 = ST0; 2111 } 2112 } else if (floatx80_is_zero(ST0)) { 2113 if (floatx80_is_zero(ST1)) { 2114 float_raise(float_flag_invalid, &env->fp_status); 2115 ST1 = floatx80_default_nan(&env->fp_status); 2116 } else { 2117 /* Result is infinity with opposite sign to ST1. */ 2118 float_raise(float_flag_divbyzero, &env->fp_status); 2119 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2120 0x8000000000000000ULL); 2121 } 2122 } else if (floatx80_is_zero(ST1)) { 2123 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2124 ST1 = floatx80_chs(ST1); 2125 } 2126 /* Otherwise, ST1 is already the correct result. */ 2127 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2128 if (arg1_sign) { 2129 ST1 = floatx80_chs(floatx80_zero); 2130 } else { 2131 ST1 = floatx80_zero; 2132 } 2133 } else { 2134 int32_t int_exp; 2135 floatx80 arg0_m1; 2136 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2137 FloatX80RoundPrec save_prec = 2138 env->fp_status.floatx80_rounding_precision; 2139 env->fp_status.float_rounding_mode = float_round_nearest_even; 2140 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2141 2142 if (arg0_exp == 0) { 2143 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2144 } 2145 if (arg1_exp == 0) { 2146 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2147 } 2148 int_exp = arg0_exp - 0x3fff; 2149 if (arg0_sig > 0xb504f333f9de6484ULL) { 2150 ++int_exp; 2151 } 2152 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2153 &env->fp_status), 2154 floatx80_one, &env->fp_status); 2155 if (floatx80_is_zero(arg0_m1)) { 2156 /* Exact power of 2; multiply by ST1. */ 2157 env->fp_status.float_rounding_mode = save_mode; 2158 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2159 ST1, &env->fp_status); 2160 } else { 2161 bool asign = extractFloatx80Sign(arg0_m1); 2162 int32_t aexp; 2163 uint64_t asig0, asig1, asig2; 2164 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2165 if (int_exp != 0) { 2166 bool isign = (int_exp < 0); 2167 int32_t iexp; 2168 uint64_t isig; 2169 int shift; 2170 int_exp = isign ? -int_exp : int_exp; 2171 shift = clz32(int_exp) + 32; 2172 isig = int_exp; 2173 isig <<= shift; 2174 iexp = 0x403e - shift; 2175 shift128RightJamming(asig0, asig1, iexp - aexp, 2176 &asig0, &asig1); 2177 if (asign == isign) { 2178 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2179 } else { 2180 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2181 } 2182 aexp = iexp; 2183 asign = isign; 2184 } 2185 /* 2186 * Multiply by the second argument to compute the required 2187 * result. 2188 */ 2189 if (arg1_exp == 0) { 2190 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2191 } 2192 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2193 aexp += arg1_exp - 0x3ffe; 2194 /* This result is inexact. */ 2195 asig1 |= 1; 2196 env->fp_status.float_rounding_mode = save_mode; 2197 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2198 asign ^ arg1_sign, aexp, 2199 asig0, asig1, &env->fp_status); 2200 } 2201 2202 env->fp_status.floatx80_rounding_precision = save_prec; 2203 } 2204 fpop(env); 2205 merge_exception_flags(env, old_flags); 2206 } 2207 2208 void helper_fsqrt(CPUX86State *env) 2209 { 2210 uint8_t old_flags = save_exception_flags(env); 2211 if (floatx80_is_neg(ST0)) { 2212 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2213 env->fpus |= 0x400; 2214 } 2215 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2216 merge_exception_flags(env, old_flags); 2217 } 2218 2219 void helper_fsincos(CPUX86State *env) 2220 { 2221 double fptemp = floatx80_to_double(env, ST0); 2222 2223 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2224 env->fpus |= 0x400; 2225 } else { 2226 ST0 = double_to_floatx80(env, sin(fptemp)); 2227 fpush(env); 2228 ST0 = double_to_floatx80(env, cos(fptemp)); 2229 env->fpus &= ~0x400; /* C2 <-- 0 */ 2230 /* the above code is for |arg| < 2**63 only */ 2231 } 2232 } 2233 2234 void helper_frndint(CPUX86State *env) 2235 { 2236 uint8_t old_flags = save_exception_flags(env); 2237 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2238 merge_exception_flags(env, old_flags); 2239 } 2240 2241 void helper_fscale(CPUX86State *env) 2242 { 2243 uint8_t old_flags = save_exception_flags(env); 2244 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2245 float_raise(float_flag_invalid, &env->fp_status); 2246 ST0 = floatx80_default_nan(&env->fp_status); 2247 } else if (floatx80_is_any_nan(ST1)) { 2248 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2249 float_raise(float_flag_invalid, &env->fp_status); 2250 } 2251 ST0 = ST1; 2252 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2253 float_raise(float_flag_invalid, &env->fp_status); 2254 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2255 } 2256 } else if (floatx80_is_infinity(ST1) && 2257 !floatx80_invalid_encoding(ST0) && 2258 !floatx80_is_any_nan(ST0)) { 2259 if (floatx80_is_neg(ST1)) { 2260 if (floatx80_is_infinity(ST0)) { 2261 float_raise(float_flag_invalid, &env->fp_status); 2262 ST0 = floatx80_default_nan(&env->fp_status); 2263 } else { 2264 ST0 = (floatx80_is_neg(ST0) ? 2265 floatx80_chs(floatx80_zero) : 2266 floatx80_zero); 2267 } 2268 } else { 2269 if (floatx80_is_zero(ST0)) { 2270 float_raise(float_flag_invalid, &env->fp_status); 2271 ST0 = floatx80_default_nan(&env->fp_status); 2272 } else { 2273 ST0 = (floatx80_is_neg(ST0) ? 2274 floatx80_chs(floatx80_infinity) : 2275 floatx80_infinity); 2276 } 2277 } 2278 } else { 2279 int n; 2280 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2281 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2282 set_float_exception_flags(0, &env->fp_status); 2283 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2284 set_float_exception_flags(save_flags, &env->fp_status); 2285 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2286 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2287 env->fp_status.floatx80_rounding_precision = save; 2288 } 2289 merge_exception_flags(env, old_flags); 2290 } 2291 2292 void helper_fsin(CPUX86State *env) 2293 { 2294 double fptemp = floatx80_to_double(env, ST0); 2295 2296 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2297 env->fpus |= 0x400; 2298 } else { 2299 ST0 = double_to_floatx80(env, sin(fptemp)); 2300 env->fpus &= ~0x400; /* C2 <-- 0 */ 2301 /* the above code is for |arg| < 2**53 only */ 2302 } 2303 } 2304 2305 void helper_fcos(CPUX86State *env) 2306 { 2307 double fptemp = floatx80_to_double(env, ST0); 2308 2309 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2310 env->fpus |= 0x400; 2311 } else { 2312 ST0 = double_to_floatx80(env, cos(fptemp)); 2313 env->fpus &= ~0x400; /* C2 <-- 0 */ 2314 /* the above code is for |arg| < 2**63 only */ 2315 } 2316 } 2317 2318 void helper_fxam_ST0(CPUX86State *env) 2319 { 2320 CPU_LDoubleU temp; 2321 int expdif; 2322 2323 temp.d = ST0; 2324 2325 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2326 if (SIGND(temp)) { 2327 env->fpus |= 0x200; /* C1 <-- 1 */ 2328 } 2329 2330 if (env->fptags[env->fpstt]) { 2331 env->fpus |= 0x4100; /* Empty */ 2332 return; 2333 } 2334 2335 expdif = EXPD(temp); 2336 if (expdif == MAXEXPD) { 2337 if (MANTD(temp) == 0x8000000000000000ULL) { 2338 env->fpus |= 0x500; /* Infinity */ 2339 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2340 env->fpus |= 0x100; /* NaN */ 2341 } 2342 } else if (expdif == 0) { 2343 if (MANTD(temp) == 0) { 2344 env->fpus |= 0x4000; /* Zero */ 2345 } else { 2346 env->fpus |= 0x4400; /* Denormal */ 2347 } 2348 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2349 env->fpus |= 0x400; 2350 } 2351 } 2352 2353 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 2354 uintptr_t retaddr) 2355 { 2356 int fpus, fptag, exp, i; 2357 uint64_t mant; 2358 CPU_LDoubleU tmp; 2359 2360 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2361 fptag = 0; 2362 for (i = 7; i >= 0; i--) { 2363 fptag <<= 2; 2364 if (env->fptags[i]) { 2365 fptag |= 3; 2366 } else { 2367 tmp.d = env->fpregs[i].d; 2368 exp = EXPD(tmp); 2369 mant = MANTD(tmp); 2370 if (exp == 0 && mant == 0) { 2371 /* zero */ 2372 fptag |= 1; 2373 } else if (exp == 0 || exp == MAXEXPD 2374 || (mant & (1LL << 63)) == 0) { 2375 /* NaNs, infinity, denormal */ 2376 fptag |= 2; 2377 } 2378 } 2379 } 2380 if (data32) { 2381 /* 32 bit */ 2382 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 2383 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 2384 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 2385 cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */ 2386 cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */ 2387 cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */ 2388 cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */ 2389 } else { 2390 /* 16 bit */ 2391 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 2392 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 2393 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 2394 cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr); 2395 cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr); 2396 cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr); 2397 cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr); 2398 } 2399 } 2400 2401 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2402 { 2403 do_fstenv(env, ptr, data32, GETPC()); 2404 } 2405 2406 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2407 { 2408 env->fpstt = (fpus >> 11) & 7; 2409 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2410 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2411 #if !defined(CONFIG_USER_ONLY) 2412 if (!(env->fpus & FPUS_SE)) { 2413 /* 2414 * Here the processor deasserts FERR#; in response, the chipset deasserts 2415 * IGNNE#. 2416 */ 2417 cpu_clear_ignne(); 2418 } 2419 #endif 2420 } 2421 2422 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 2423 uintptr_t retaddr) 2424 { 2425 int i, fpus, fptag; 2426 2427 if (data32) { 2428 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2429 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2430 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 2431 } else { 2432 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2433 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 2434 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2435 } 2436 cpu_set_fpus(env, fpus); 2437 for (i = 0; i < 8; i++) { 2438 env->fptags[i] = ((fptag & 3) == 3); 2439 fptag >>= 2; 2440 } 2441 } 2442 2443 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2444 { 2445 do_fldenv(env, ptr, data32, GETPC()); 2446 } 2447 2448 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, 2449 uintptr_t retaddr) 2450 { 2451 floatx80 tmp; 2452 int i; 2453 2454 do_fstenv(env, ptr, data32, retaddr); 2455 2456 ptr += (14 << data32); 2457 for (i = 0; i < 8; i++) { 2458 tmp = ST(i); 2459 do_fstt(env, tmp, ptr, retaddr); 2460 ptr += 10; 2461 } 2462 2463 do_fninit(env); 2464 } 2465 2466 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2467 { 2468 do_fsave(env, ptr, data32, GETPC()); 2469 } 2470 2471 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, 2472 uintptr_t retaddr) 2473 { 2474 floatx80 tmp; 2475 int i; 2476 2477 do_fldenv(env, ptr, data32, retaddr); 2478 ptr += (14 << data32); 2479 2480 for (i = 0; i < 8; i++) { 2481 tmp = do_fldt(env, ptr, retaddr); 2482 ST(i) = tmp; 2483 ptr += 10; 2484 } 2485 } 2486 2487 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2488 { 2489 do_frstor(env, ptr, data32, GETPC()); 2490 } 2491 2492 #if defined(CONFIG_USER_ONLY) 2493 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 2494 { 2495 do_fsave(env, ptr, data32, 0); 2496 } 2497 2498 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 2499 { 2500 do_frstor(env, ptr, data32, 0); 2501 } 2502 #endif 2503 2504 #define XO(X) offsetof(X86XSaveArea, X) 2505 2506 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2507 { 2508 int fpus, fptag, i; 2509 target_ulong addr; 2510 2511 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2512 fptag = 0; 2513 for (i = 0; i < 8; i++) { 2514 fptag |= (env->fptags[i] << i); 2515 } 2516 2517 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 2518 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 2519 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 2520 2521 /* In 32-bit mode this is eip, sel, dp, sel. 2522 In 64-bit mode this is rip, rdp. 2523 But in either case we don't write actual data, just zeros. */ 2524 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 2525 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 2526 2527 addr = ptr + XO(legacy.fpregs); 2528 for (i = 0; i < 8; i++) { 2529 floatx80 tmp = ST(i); 2530 do_fstt(env, tmp, addr, ra); 2531 addr += 16; 2532 } 2533 } 2534 2535 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2536 { 2537 update_mxcsr_from_sse_status(env); 2538 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 2539 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 2540 } 2541 2542 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2543 { 2544 int i, nb_xmm_regs; 2545 target_ulong addr; 2546 2547 if (env->hflags & HF_CS64_MASK) { 2548 nb_xmm_regs = 16; 2549 } else { 2550 nb_xmm_regs = 8; 2551 } 2552 2553 addr = ptr + XO(legacy.xmm_regs); 2554 for (i = 0; i < nb_xmm_regs; i++) { 2555 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 2556 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 2557 addr += 16; 2558 } 2559 } 2560 2561 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2562 { 2563 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2564 int i; 2565 2566 for (i = 0; i < 4; i++, addr += 16) { 2567 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 2568 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 2569 } 2570 } 2571 2572 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2573 { 2574 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2575 env->bndcs_regs.cfgu, ra); 2576 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2577 env->bndcs_regs.sts, ra); 2578 } 2579 2580 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2581 { 2582 cpu_stq_data_ra(env, ptr, env->pkru, ra); 2583 } 2584 2585 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2586 { 2587 /* The operand must be 16 byte aligned */ 2588 if (ptr & 0xf) { 2589 raise_exception_ra(env, EXCP0D_GPF, ra); 2590 } 2591 2592 do_xsave_fpu(env, ptr, ra); 2593 2594 if (env->cr[4] & CR4_OSFXSR_MASK) { 2595 do_xsave_mxcsr(env, ptr, ra); 2596 /* Fast FXSAVE leaves out the XMM registers */ 2597 if (!(env->efer & MSR_EFER_FFXSR) 2598 || (env->hflags & HF_CPL_MASK) 2599 || !(env->hflags & HF_LMA_MASK)) { 2600 do_xsave_sse(env, ptr, ra); 2601 } 2602 } 2603 } 2604 2605 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2606 { 2607 do_fxsave(env, ptr, GETPC()); 2608 } 2609 2610 static uint64_t get_xinuse(CPUX86State *env) 2611 { 2612 uint64_t inuse = -1; 2613 2614 /* For the most part, we don't track XINUSE. We could calculate it 2615 here for all components, but it's probably less work to simply 2616 indicate in use. That said, the state of BNDREGS is important 2617 enough to track in HFLAGS, so we might as well use that here. */ 2618 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2619 inuse &= ~XSTATE_BNDREGS_MASK; 2620 } 2621 return inuse; 2622 } 2623 2624 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2625 uint64_t inuse, uint64_t opt, uintptr_t ra) 2626 { 2627 uint64_t old_bv, new_bv; 2628 2629 /* The OS must have enabled XSAVE. */ 2630 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2631 raise_exception_ra(env, EXCP06_ILLOP, ra); 2632 } 2633 2634 /* The operand must be 64 byte aligned. */ 2635 if (ptr & 63) { 2636 raise_exception_ra(env, EXCP0D_GPF, ra); 2637 } 2638 2639 /* Never save anything not enabled by XCR0. */ 2640 rfbm &= env->xcr0; 2641 opt &= rfbm; 2642 2643 if (opt & XSTATE_FP_MASK) { 2644 do_xsave_fpu(env, ptr, ra); 2645 } 2646 if (rfbm & XSTATE_SSE_MASK) { 2647 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2648 do_xsave_mxcsr(env, ptr, ra); 2649 } 2650 if (opt & XSTATE_SSE_MASK) { 2651 do_xsave_sse(env, ptr, ra); 2652 } 2653 if (opt & XSTATE_BNDREGS_MASK) { 2654 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 2655 } 2656 if (opt & XSTATE_BNDCSR_MASK) { 2657 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 2658 } 2659 if (opt & XSTATE_PKRU_MASK) { 2660 do_xsave_pkru(env, ptr + XO(pkru_state), ra); 2661 } 2662 2663 /* Update the XSTATE_BV field. */ 2664 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2665 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2666 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 2667 } 2668 2669 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2670 { 2671 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 2672 } 2673 2674 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2675 { 2676 uint64_t inuse = get_xinuse(env); 2677 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2678 } 2679 2680 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2681 { 2682 int i, fpuc, fpus, fptag; 2683 target_ulong addr; 2684 2685 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 2686 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 2687 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 2688 cpu_set_fpuc(env, fpuc); 2689 cpu_set_fpus(env, fpus); 2690 fptag ^= 0xff; 2691 for (i = 0; i < 8; i++) { 2692 env->fptags[i] = ((fptag >> i) & 1); 2693 } 2694 2695 addr = ptr + XO(legacy.fpregs); 2696 for (i = 0; i < 8; i++) { 2697 floatx80 tmp = do_fldt(env, addr, ra); 2698 ST(i) = tmp; 2699 addr += 16; 2700 } 2701 } 2702 2703 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2704 { 2705 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 2706 } 2707 2708 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2709 { 2710 int i, nb_xmm_regs; 2711 target_ulong addr; 2712 2713 if (env->hflags & HF_CS64_MASK) { 2714 nb_xmm_regs = 16; 2715 } else { 2716 nb_xmm_regs = 8; 2717 } 2718 2719 addr = ptr + XO(legacy.xmm_regs); 2720 for (i = 0; i < nb_xmm_regs; i++) { 2721 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 2722 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 2723 addr += 16; 2724 } 2725 } 2726 2727 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2728 { 2729 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2730 int i; 2731 2732 for (i = 0; i < 4; i++, addr += 16) { 2733 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 2734 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 2735 } 2736 } 2737 2738 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2739 { 2740 /* FIXME: Extend highest implemented bit of linear address. */ 2741 env->bndcs_regs.cfgu 2742 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 2743 env->bndcs_regs.sts 2744 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 2745 } 2746 2747 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2748 { 2749 env->pkru = cpu_ldq_data_ra(env, ptr, ra); 2750 } 2751 2752 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2753 { 2754 /* The operand must be 16 byte aligned */ 2755 if (ptr & 0xf) { 2756 raise_exception_ra(env, EXCP0D_GPF, ra); 2757 } 2758 2759 do_xrstor_fpu(env, ptr, ra); 2760 2761 if (env->cr[4] & CR4_OSFXSR_MASK) { 2762 do_xrstor_mxcsr(env, ptr, ra); 2763 /* Fast FXRSTOR leaves out the XMM registers */ 2764 if (!(env->efer & MSR_EFER_FFXSR) 2765 || (env->hflags & HF_CPL_MASK) 2766 || !(env->hflags & HF_LMA_MASK)) { 2767 do_xrstor_sse(env, ptr, ra); 2768 } 2769 } 2770 } 2771 2772 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2773 { 2774 do_fxrstor(env, ptr, GETPC()); 2775 } 2776 2777 #if defined(CONFIG_USER_ONLY) 2778 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 2779 { 2780 do_fxsave(env, ptr, 0); 2781 } 2782 2783 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 2784 { 2785 do_fxrstor(env, ptr, 0); 2786 } 2787 #endif 2788 2789 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2790 { 2791 uintptr_t ra = GETPC(); 2792 uint64_t xstate_bv, xcomp_bv, reserve0; 2793 2794 rfbm &= env->xcr0; 2795 2796 /* The OS must have enabled XSAVE. */ 2797 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2798 raise_exception_ra(env, EXCP06_ILLOP, ra); 2799 } 2800 2801 /* The operand must be 64 byte aligned. */ 2802 if (ptr & 63) { 2803 raise_exception_ra(env, EXCP0D_GPF, ra); 2804 } 2805 2806 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2807 2808 if ((int64_t)xstate_bv < 0) { 2809 /* FIXME: Compact form. */ 2810 raise_exception_ra(env, EXCP0D_GPF, ra); 2811 } 2812 2813 /* Standard form. */ 2814 2815 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2816 if (xstate_bv & ~env->xcr0) { 2817 raise_exception_ra(env, EXCP0D_GPF, ra); 2818 } 2819 2820 /* The XCOMP_BV field must be zero. Note that, as of the April 2016 2821 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 2822 describes only XCOMP_BV, but the description of the standard form 2823 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 2824 includes the next 64-bit field. */ 2825 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 2826 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 2827 if (xcomp_bv || reserve0) { 2828 raise_exception_ra(env, EXCP0D_GPF, ra); 2829 } 2830 2831 if (rfbm & XSTATE_FP_MASK) { 2832 if (xstate_bv & XSTATE_FP_MASK) { 2833 do_xrstor_fpu(env, ptr, ra); 2834 } else { 2835 do_fninit(env); 2836 memset(env->fpregs, 0, sizeof(env->fpregs)); 2837 } 2838 } 2839 if (rfbm & XSTATE_SSE_MASK) { 2840 /* Note that the standard form of XRSTOR loads MXCSR from memory 2841 whether or not the XSTATE_BV bit is set. */ 2842 do_xrstor_mxcsr(env, ptr, ra); 2843 if (xstate_bv & XSTATE_SSE_MASK) { 2844 do_xrstor_sse(env, ptr, ra); 2845 } else { 2846 /* ??? When AVX is implemented, we may have to be more 2847 selective in the clearing. */ 2848 memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); 2849 } 2850 } 2851 if (rfbm & XSTATE_BNDREGS_MASK) { 2852 if (xstate_bv & XSTATE_BNDREGS_MASK) { 2853 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 2854 env->hflags |= HF_MPX_IU_MASK; 2855 } else { 2856 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2857 env->hflags &= ~HF_MPX_IU_MASK; 2858 } 2859 } 2860 if (rfbm & XSTATE_BNDCSR_MASK) { 2861 if (xstate_bv & XSTATE_BNDCSR_MASK) { 2862 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 2863 } else { 2864 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2865 } 2866 cpu_sync_bndcs_hflags(env); 2867 } 2868 if (rfbm & XSTATE_PKRU_MASK) { 2869 uint64_t old_pkru = env->pkru; 2870 if (xstate_bv & XSTATE_PKRU_MASK) { 2871 do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 2872 } else { 2873 env->pkru = 0; 2874 } 2875 if (env->pkru != old_pkru) { 2876 CPUState *cs = env_cpu(env); 2877 tlb_flush(cs); 2878 } 2879 } 2880 } 2881 2882 #undef XO 2883 2884 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 2885 { 2886 /* The OS must have enabled XSAVE. */ 2887 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2888 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2889 } 2890 2891 switch (ecx) { 2892 case 0: 2893 return env->xcr0; 2894 case 1: 2895 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 2896 return env->xcr0 & get_xinuse(env); 2897 } 2898 break; 2899 } 2900 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2901 } 2902 2903 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 2904 { 2905 uint32_t dummy, ena_lo, ena_hi; 2906 uint64_t ena; 2907 2908 /* The OS must have enabled XSAVE. */ 2909 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2910 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2911 } 2912 2913 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 2914 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 2915 goto do_gpf; 2916 } 2917 2918 /* Disallow enabling unimplemented features. */ 2919 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 2920 ena = ((uint64_t)ena_hi << 32) | ena_lo; 2921 if (mask & ~ena) { 2922 goto do_gpf; 2923 } 2924 2925 /* Disallow enabling only half of MPX. */ 2926 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 2927 & XSTATE_BNDCSR_MASK) { 2928 goto do_gpf; 2929 } 2930 2931 env->xcr0 = mask; 2932 cpu_sync_bndcs_hflags(env); 2933 return; 2934 2935 do_gpf: 2936 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2937 } 2938 2939 /* MMX/SSE */ 2940 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 2941 2942 #define SSE_DAZ 0x0040 2943 #define SSE_RC_MASK 0x6000 2944 #define SSE_RC_NEAR 0x0000 2945 #define SSE_RC_DOWN 0x2000 2946 #define SSE_RC_UP 0x4000 2947 #define SSE_RC_CHOP 0x6000 2948 #define SSE_FZ 0x8000 2949 2950 void update_mxcsr_status(CPUX86State *env) 2951 { 2952 uint32_t mxcsr = env->mxcsr; 2953 int rnd_type; 2954 2955 /* set rounding mode */ 2956 switch (mxcsr & SSE_RC_MASK) { 2957 default: 2958 case SSE_RC_NEAR: 2959 rnd_type = float_round_nearest_even; 2960 break; 2961 case SSE_RC_DOWN: 2962 rnd_type = float_round_down; 2963 break; 2964 case SSE_RC_UP: 2965 rnd_type = float_round_up; 2966 break; 2967 case SSE_RC_CHOP: 2968 rnd_type = float_round_to_zero; 2969 break; 2970 } 2971 set_float_rounding_mode(rnd_type, &env->sse_status); 2972 2973 /* Set exception flags. */ 2974 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 2975 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 2976 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 2977 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 2978 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 2979 &env->sse_status); 2980 2981 /* set denormals are zero */ 2982 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 2983 2984 /* set flush to zero */ 2985 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 2986 } 2987 2988 void update_mxcsr_from_sse_status(CPUX86State *env) 2989 { 2990 uint8_t flags = get_float_exception_flags(&env->sse_status); 2991 /* 2992 * The MXCSR denormal flag has opposite semantics to 2993 * float_flag_input_denormal (the softfloat code sets that flag 2994 * only when flushing input denormals to zero, but SSE sets it 2995 * only when not flushing them to zero), so is not converted 2996 * here. 2997 */ 2998 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 2999 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3000 (flags & float_flag_overflow ? FPUS_OE : 0) | 3001 (flags & float_flag_underflow ? FPUS_UE : 0) | 3002 (flags & float_flag_inexact ? FPUS_PE : 0) | 3003 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3004 0)); 3005 } 3006 3007 void helper_update_mxcsr(CPUX86State *env) 3008 { 3009 update_mxcsr_from_sse_status(env); 3010 } 3011 3012 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3013 { 3014 cpu_set_mxcsr(env, val); 3015 } 3016 3017 void helper_enter_mmx(CPUX86State *env) 3018 { 3019 env->fpstt = 0; 3020 *(uint32_t *)(env->fptags) = 0; 3021 *(uint32_t *)(env->fptags + 4) = 0; 3022 } 3023 3024 void helper_emms(CPUX86State *env) 3025 { 3026 /* set to empty state */ 3027 *(uint32_t *)(env->fptags) = 0x01010101; 3028 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3029 } 3030 3031 /* XXX: suppress */ 3032 void helper_movq(CPUX86State *env, void *d, void *s) 3033 { 3034 *(uint64_t *)d = *(uint64_t *)s; 3035 } 3036 3037 #define SHIFT 0 3038 #include "ops_sse.h" 3039 3040 #define SHIFT 1 3041 #include "ops_sse.h" 3042