1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/cpu_ldst.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "fpu/softfloat-macros.h" 28 #include "helper-tcg.h" 29 30 /* float macros */ 31 #define FT0 (env->ft0) 32 #define ST0 (env->fpregs[env->fpstt].d) 33 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 34 #define ST1 ST(1) 35 36 #define FPU_RC_SHIFT 10 37 #define FPU_RC_MASK (3 << FPU_RC_SHIFT) 38 #define FPU_RC_NEAR 0x000 39 #define FPU_RC_DOWN 0x400 40 #define FPU_RC_UP 0x800 41 #define FPU_RC_CHOP 0xc00 42 43 #define MAXTAN 9223372036854775808.0 44 45 /* the following deal with x86 long double-precision numbers */ 46 #define MAXEXPD 0x7fff 47 #define EXPBIAS 16383 48 #define EXPD(fp) (fp.l.upper & 0x7fff) 49 #define SIGND(fp) ((fp.l.upper) & 0x8000) 50 #define MANTD(fp) (fp.l.lower) 51 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 52 53 #define FPUS_IE (1 << 0) 54 #define FPUS_DE (1 << 1) 55 #define FPUS_ZE (1 << 2) 56 #define FPUS_OE (1 << 3) 57 #define FPUS_UE (1 << 4) 58 #define FPUS_PE (1 << 5) 59 #define FPUS_SF (1 << 6) 60 #define FPUS_SE (1 << 7) 61 #define FPUS_B (1 << 15) 62 63 #define FPUC_EM 0x3f 64 65 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 66 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 67 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 68 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 69 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 70 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 71 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 72 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 73 74 static inline void fpush(CPUX86State *env) 75 { 76 env->fpstt = (env->fpstt - 1) & 7; 77 env->fptags[env->fpstt] = 0; /* validate stack entry */ 78 } 79 80 static inline void fpop(CPUX86State *env) 81 { 82 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 83 env->fpstt = (env->fpstt + 1) & 7; 84 } 85 86 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) 87 { 88 CPU_LDoubleU temp; 89 90 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 91 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 92 return temp.d; 93 } 94 95 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 96 uintptr_t retaddr) 97 { 98 CPU_LDoubleU temp; 99 100 temp.d = f; 101 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 102 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 103 } 104 105 /* x87 FPU helpers */ 106 107 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 108 { 109 union { 110 float64 f64; 111 double d; 112 } u; 113 114 u.f64 = floatx80_to_float64(a, &env->fp_status); 115 return u.d; 116 } 117 118 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 119 { 120 union { 121 float64 f64; 122 double d; 123 } u; 124 125 u.d = a; 126 return float64_to_floatx80(u.f64, &env->fp_status); 127 } 128 129 static void fpu_set_exception(CPUX86State *env, int mask) 130 { 131 env->fpus |= mask; 132 if (env->fpus & (~env->fpuc & FPUC_EM)) { 133 env->fpus |= FPUS_SE | FPUS_B; 134 } 135 } 136 137 static inline uint8_t save_exception_flags(CPUX86State *env) 138 { 139 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 140 set_float_exception_flags(0, &env->fp_status); 141 return old_flags; 142 } 143 144 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 145 { 146 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 147 float_raise(old_flags, &env->fp_status); 148 fpu_set_exception(env, 149 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 150 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 151 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 152 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 153 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 154 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 155 } 156 157 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 158 { 159 uint8_t old_flags = save_exception_flags(env); 160 floatx80 ret = floatx80_div(a, b, &env->fp_status); 161 merge_exception_flags(env, old_flags); 162 return ret; 163 } 164 165 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 166 { 167 if (env->cr[0] & CR0_NE_MASK) { 168 raise_exception_ra(env, EXCP10_COPR, retaddr); 169 } 170 #if !defined(CONFIG_USER_ONLY) 171 else { 172 fpu_check_raise_ferr_irq(env); 173 } 174 #endif 175 } 176 177 void helper_flds_FT0(CPUX86State *env, uint32_t val) 178 { 179 uint8_t old_flags = save_exception_flags(env); 180 union { 181 float32 f; 182 uint32_t i; 183 } u; 184 185 u.i = val; 186 FT0 = float32_to_floatx80(u.f, &env->fp_status); 187 merge_exception_flags(env, old_flags); 188 } 189 190 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 191 { 192 uint8_t old_flags = save_exception_flags(env); 193 union { 194 float64 f; 195 uint64_t i; 196 } u; 197 198 u.i = val; 199 FT0 = float64_to_floatx80(u.f, &env->fp_status); 200 merge_exception_flags(env, old_flags); 201 } 202 203 void helper_fildl_FT0(CPUX86State *env, int32_t val) 204 { 205 FT0 = int32_to_floatx80(val, &env->fp_status); 206 } 207 208 void helper_flds_ST0(CPUX86State *env, uint32_t val) 209 { 210 uint8_t old_flags = save_exception_flags(env); 211 int new_fpstt; 212 union { 213 float32 f; 214 uint32_t i; 215 } u; 216 217 new_fpstt = (env->fpstt - 1) & 7; 218 u.i = val; 219 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 220 env->fpstt = new_fpstt; 221 env->fptags[new_fpstt] = 0; /* validate stack entry */ 222 merge_exception_flags(env, old_flags); 223 } 224 225 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 226 { 227 uint8_t old_flags = save_exception_flags(env); 228 int new_fpstt; 229 union { 230 float64 f; 231 uint64_t i; 232 } u; 233 234 new_fpstt = (env->fpstt - 1) & 7; 235 u.i = val; 236 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 237 env->fpstt = new_fpstt; 238 env->fptags[new_fpstt] = 0; /* validate stack entry */ 239 merge_exception_flags(env, old_flags); 240 } 241 242 static FloatX80RoundPrec tmp_maximise_precision(float_status *st) 243 { 244 FloatX80RoundPrec old = get_floatx80_rounding_precision(st); 245 set_floatx80_rounding_precision(floatx80_precision_x, st); 246 return old; 247 } 248 249 void helper_fildl_ST0(CPUX86State *env, int32_t val) 250 { 251 int new_fpstt; 252 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 253 254 new_fpstt = (env->fpstt - 1) & 7; 255 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 256 env->fpstt = new_fpstt; 257 env->fptags[new_fpstt] = 0; /* validate stack entry */ 258 259 set_floatx80_rounding_precision(old, &env->fp_status); 260 } 261 262 void helper_fildll_ST0(CPUX86State *env, int64_t val) 263 { 264 int new_fpstt; 265 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 266 267 new_fpstt = (env->fpstt - 1) & 7; 268 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 269 env->fpstt = new_fpstt; 270 env->fptags[new_fpstt] = 0; /* validate stack entry */ 271 272 set_floatx80_rounding_precision(old, &env->fp_status); 273 } 274 275 uint32_t helper_fsts_ST0(CPUX86State *env) 276 { 277 uint8_t old_flags = save_exception_flags(env); 278 union { 279 float32 f; 280 uint32_t i; 281 } u; 282 283 u.f = floatx80_to_float32(ST0, &env->fp_status); 284 merge_exception_flags(env, old_flags); 285 return u.i; 286 } 287 288 uint64_t helper_fstl_ST0(CPUX86State *env) 289 { 290 uint8_t old_flags = save_exception_flags(env); 291 union { 292 float64 f; 293 uint64_t i; 294 } u; 295 296 u.f = floatx80_to_float64(ST0, &env->fp_status); 297 merge_exception_flags(env, old_flags); 298 return u.i; 299 } 300 301 int32_t helper_fist_ST0(CPUX86State *env) 302 { 303 uint8_t old_flags = save_exception_flags(env); 304 int32_t val; 305 306 val = floatx80_to_int32(ST0, &env->fp_status); 307 if (val != (int16_t)val) { 308 set_float_exception_flags(float_flag_invalid, &env->fp_status); 309 val = -32768; 310 } 311 merge_exception_flags(env, old_flags); 312 return val; 313 } 314 315 int32_t helper_fistl_ST0(CPUX86State *env) 316 { 317 uint8_t old_flags = save_exception_flags(env); 318 int32_t val; 319 320 val = floatx80_to_int32(ST0, &env->fp_status); 321 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 322 val = 0x80000000; 323 } 324 merge_exception_flags(env, old_flags); 325 return val; 326 } 327 328 int64_t helper_fistll_ST0(CPUX86State *env) 329 { 330 uint8_t old_flags = save_exception_flags(env); 331 int64_t val; 332 333 val = floatx80_to_int64(ST0, &env->fp_status); 334 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 335 val = 0x8000000000000000ULL; 336 } 337 merge_exception_flags(env, old_flags); 338 return val; 339 } 340 341 int32_t helper_fistt_ST0(CPUX86State *env) 342 { 343 uint8_t old_flags = save_exception_flags(env); 344 int32_t val; 345 346 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 347 if (val != (int16_t)val) { 348 set_float_exception_flags(float_flag_invalid, &env->fp_status); 349 val = -32768; 350 } 351 merge_exception_flags(env, old_flags); 352 return val; 353 } 354 355 int32_t helper_fisttl_ST0(CPUX86State *env) 356 { 357 uint8_t old_flags = save_exception_flags(env); 358 int32_t val; 359 360 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 361 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 362 val = 0x80000000; 363 } 364 merge_exception_flags(env, old_flags); 365 return val; 366 } 367 368 int64_t helper_fisttll_ST0(CPUX86State *env) 369 { 370 uint8_t old_flags = save_exception_flags(env); 371 int64_t val; 372 373 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 374 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 375 val = 0x8000000000000000ULL; 376 } 377 merge_exception_flags(env, old_flags); 378 return val; 379 } 380 381 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 382 { 383 int new_fpstt; 384 385 new_fpstt = (env->fpstt - 1) & 7; 386 env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); 387 env->fpstt = new_fpstt; 388 env->fptags[new_fpstt] = 0; /* validate stack entry */ 389 } 390 391 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 392 { 393 do_fstt(env, ST0, ptr, GETPC()); 394 } 395 396 void helper_fpush(CPUX86State *env) 397 { 398 fpush(env); 399 } 400 401 void helper_fpop(CPUX86State *env) 402 { 403 fpop(env); 404 } 405 406 void helper_fdecstp(CPUX86State *env) 407 { 408 env->fpstt = (env->fpstt - 1) & 7; 409 env->fpus &= ~0x4700; 410 } 411 412 void helper_fincstp(CPUX86State *env) 413 { 414 env->fpstt = (env->fpstt + 1) & 7; 415 env->fpus &= ~0x4700; 416 } 417 418 /* FPU move */ 419 420 void helper_ffree_STN(CPUX86State *env, int st_index) 421 { 422 env->fptags[(env->fpstt + st_index) & 7] = 1; 423 } 424 425 void helper_fmov_ST0_FT0(CPUX86State *env) 426 { 427 ST0 = FT0; 428 } 429 430 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 431 { 432 FT0 = ST(st_index); 433 } 434 435 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 436 { 437 ST0 = ST(st_index); 438 } 439 440 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 441 { 442 ST(st_index) = ST0; 443 } 444 445 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 446 { 447 floatx80 tmp; 448 449 tmp = ST(st_index); 450 ST(st_index) = ST0; 451 ST0 = tmp; 452 } 453 454 /* FPU operations */ 455 456 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 457 458 void helper_fcom_ST0_FT0(CPUX86State *env) 459 { 460 uint8_t old_flags = save_exception_flags(env); 461 FloatRelation ret; 462 463 ret = floatx80_compare(ST0, FT0, &env->fp_status); 464 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 465 merge_exception_flags(env, old_flags); 466 } 467 468 void helper_fucom_ST0_FT0(CPUX86State *env) 469 { 470 uint8_t old_flags = save_exception_flags(env); 471 FloatRelation ret; 472 473 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 474 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 475 merge_exception_flags(env, old_flags); 476 } 477 478 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 479 480 void helper_fcomi_ST0_FT0(CPUX86State *env) 481 { 482 uint8_t old_flags = save_exception_flags(env); 483 int eflags; 484 FloatRelation ret; 485 486 ret = floatx80_compare(ST0, FT0, &env->fp_status); 487 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 488 CC_SRC = eflags | fcomi_ccval[ret + 1]; 489 merge_exception_flags(env, old_flags); 490 } 491 492 void helper_fucomi_ST0_FT0(CPUX86State *env) 493 { 494 uint8_t old_flags = save_exception_flags(env); 495 int eflags; 496 FloatRelation ret; 497 498 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 499 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 500 CC_SRC = eflags | fcomi_ccval[ret + 1]; 501 merge_exception_flags(env, old_flags); 502 } 503 504 void helper_fadd_ST0_FT0(CPUX86State *env) 505 { 506 uint8_t old_flags = save_exception_flags(env); 507 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 508 merge_exception_flags(env, old_flags); 509 } 510 511 void helper_fmul_ST0_FT0(CPUX86State *env) 512 { 513 uint8_t old_flags = save_exception_flags(env); 514 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 515 merge_exception_flags(env, old_flags); 516 } 517 518 void helper_fsub_ST0_FT0(CPUX86State *env) 519 { 520 uint8_t old_flags = save_exception_flags(env); 521 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 522 merge_exception_flags(env, old_flags); 523 } 524 525 void helper_fsubr_ST0_FT0(CPUX86State *env) 526 { 527 uint8_t old_flags = save_exception_flags(env); 528 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 529 merge_exception_flags(env, old_flags); 530 } 531 532 void helper_fdiv_ST0_FT0(CPUX86State *env) 533 { 534 ST0 = helper_fdiv(env, ST0, FT0); 535 } 536 537 void helper_fdivr_ST0_FT0(CPUX86State *env) 538 { 539 ST0 = helper_fdiv(env, FT0, ST0); 540 } 541 542 /* fp operations between STN and ST0 */ 543 544 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 545 { 546 uint8_t old_flags = save_exception_flags(env); 547 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 548 merge_exception_flags(env, old_flags); 549 } 550 551 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 552 { 553 uint8_t old_flags = save_exception_flags(env); 554 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 555 merge_exception_flags(env, old_flags); 556 } 557 558 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 559 { 560 uint8_t old_flags = save_exception_flags(env); 561 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 562 merge_exception_flags(env, old_flags); 563 } 564 565 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 566 { 567 uint8_t old_flags = save_exception_flags(env); 568 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 569 merge_exception_flags(env, old_flags); 570 } 571 572 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 573 { 574 floatx80 *p; 575 576 p = &ST(st_index); 577 *p = helper_fdiv(env, *p, ST0); 578 } 579 580 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 581 { 582 floatx80 *p; 583 584 p = &ST(st_index); 585 *p = helper_fdiv(env, ST0, *p); 586 } 587 588 /* misc FPU operations */ 589 void helper_fchs_ST0(CPUX86State *env) 590 { 591 ST0 = floatx80_chs(ST0); 592 } 593 594 void helper_fabs_ST0(CPUX86State *env) 595 { 596 ST0 = floatx80_abs(ST0); 597 } 598 599 void helper_fld1_ST0(CPUX86State *env) 600 { 601 ST0 = floatx80_one; 602 } 603 604 void helper_fldl2t_ST0(CPUX86State *env) 605 { 606 switch (env->fpuc & FPU_RC_MASK) { 607 case FPU_RC_UP: 608 ST0 = floatx80_l2t_u; 609 break; 610 default: 611 ST0 = floatx80_l2t; 612 break; 613 } 614 } 615 616 void helper_fldl2e_ST0(CPUX86State *env) 617 { 618 switch (env->fpuc & FPU_RC_MASK) { 619 case FPU_RC_DOWN: 620 case FPU_RC_CHOP: 621 ST0 = floatx80_l2e_d; 622 break; 623 default: 624 ST0 = floatx80_l2e; 625 break; 626 } 627 } 628 629 void helper_fldpi_ST0(CPUX86State *env) 630 { 631 switch (env->fpuc & FPU_RC_MASK) { 632 case FPU_RC_DOWN: 633 case FPU_RC_CHOP: 634 ST0 = floatx80_pi_d; 635 break; 636 default: 637 ST0 = floatx80_pi; 638 break; 639 } 640 } 641 642 void helper_fldlg2_ST0(CPUX86State *env) 643 { 644 switch (env->fpuc & FPU_RC_MASK) { 645 case FPU_RC_DOWN: 646 case FPU_RC_CHOP: 647 ST0 = floatx80_lg2_d; 648 break; 649 default: 650 ST0 = floatx80_lg2; 651 break; 652 } 653 } 654 655 void helper_fldln2_ST0(CPUX86State *env) 656 { 657 switch (env->fpuc & FPU_RC_MASK) { 658 case FPU_RC_DOWN: 659 case FPU_RC_CHOP: 660 ST0 = floatx80_ln2_d; 661 break; 662 default: 663 ST0 = floatx80_ln2; 664 break; 665 } 666 } 667 668 void helper_fldz_ST0(CPUX86State *env) 669 { 670 ST0 = floatx80_zero; 671 } 672 673 void helper_fldz_FT0(CPUX86State *env) 674 { 675 FT0 = floatx80_zero; 676 } 677 678 uint32_t helper_fnstsw(CPUX86State *env) 679 { 680 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 681 } 682 683 uint32_t helper_fnstcw(CPUX86State *env) 684 { 685 return env->fpuc; 686 } 687 688 static void set_x86_rounding_mode(unsigned mode, float_status *status) 689 { 690 static FloatRoundMode x86_round_mode[4] = { 691 float_round_nearest_even, 692 float_round_down, 693 float_round_up, 694 float_round_to_zero 695 }; 696 assert(mode < ARRAY_SIZE(x86_round_mode)); 697 set_float_rounding_mode(x86_round_mode[mode], status); 698 } 699 700 void update_fp_status(CPUX86State *env) 701 { 702 int rnd_mode; 703 FloatX80RoundPrec rnd_prec; 704 705 /* set rounding mode */ 706 rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; 707 set_x86_rounding_mode(rnd_mode, &env->fp_status); 708 709 switch ((env->fpuc >> 8) & 3) { 710 case 0: 711 rnd_prec = floatx80_precision_s; 712 break; 713 case 2: 714 rnd_prec = floatx80_precision_d; 715 break; 716 case 3: 717 default: 718 rnd_prec = floatx80_precision_x; 719 break; 720 } 721 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 722 } 723 724 void helper_fldcw(CPUX86State *env, uint32_t val) 725 { 726 cpu_set_fpuc(env, val); 727 } 728 729 void helper_fclex(CPUX86State *env) 730 { 731 env->fpus &= 0x7f00; 732 } 733 734 void helper_fwait(CPUX86State *env) 735 { 736 if (env->fpus & FPUS_SE) { 737 fpu_raise_exception(env, GETPC()); 738 } 739 } 740 741 static void do_fninit(CPUX86State *env) 742 { 743 env->fpus = 0; 744 env->fpstt = 0; 745 env->fpcs = 0; 746 env->fpds = 0; 747 env->fpip = 0; 748 env->fpdp = 0; 749 cpu_set_fpuc(env, 0x37f); 750 env->fptags[0] = 1; 751 env->fptags[1] = 1; 752 env->fptags[2] = 1; 753 env->fptags[3] = 1; 754 env->fptags[4] = 1; 755 env->fptags[5] = 1; 756 env->fptags[6] = 1; 757 env->fptags[7] = 1; 758 } 759 760 void helper_fninit(CPUX86State *env) 761 { 762 do_fninit(env); 763 } 764 765 /* BCD ops */ 766 767 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 768 { 769 floatx80 tmp; 770 uint64_t val; 771 unsigned int v; 772 int i; 773 774 val = 0; 775 for (i = 8; i >= 0; i--) { 776 v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 777 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 778 } 779 tmp = int64_to_floatx80(val, &env->fp_status); 780 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 781 tmp = floatx80_chs(tmp); 782 } 783 fpush(env); 784 ST0 = tmp; 785 } 786 787 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 788 { 789 uint8_t old_flags = save_exception_flags(env); 790 int v; 791 target_ulong mem_ref, mem_end; 792 int64_t val; 793 CPU_LDoubleU temp; 794 795 temp.d = ST0; 796 797 val = floatx80_to_int64(ST0, &env->fp_status); 798 mem_ref = ptr; 799 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 800 set_float_exception_flags(float_flag_invalid, &env->fp_status); 801 while (mem_ref < ptr + 7) { 802 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 803 } 804 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 805 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 806 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 807 merge_exception_flags(env, old_flags); 808 return; 809 } 810 mem_end = mem_ref + 9; 811 if (SIGND(temp)) { 812 cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 813 val = -val; 814 } else { 815 cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 816 } 817 while (mem_ref < mem_end) { 818 if (val == 0) { 819 break; 820 } 821 v = val % 100; 822 val = val / 100; 823 v = ((v / 10) << 4) | (v % 10); 824 cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 825 } 826 while (mem_ref < mem_end) { 827 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 828 } 829 merge_exception_flags(env, old_flags); 830 } 831 832 /* 128-bit significand of log(2). */ 833 #define ln2_sig_high 0xb17217f7d1cf79abULL 834 #define ln2_sig_low 0xc9e3b39803f2f6afULL 835 836 /* 837 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 838 * the interval [-1/64, 1/64]. 839 */ 840 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 841 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 842 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 843 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 844 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 845 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 846 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 847 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 848 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 849 850 struct f2xm1_data { 851 /* 852 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 853 * are very close to exact floatx80 values. 854 */ 855 floatx80 t; 856 /* The value of 2^t. */ 857 floatx80 exp2; 858 /* The value of 2^t - 1. */ 859 floatx80 exp2m1; 860 }; 861 862 static const struct f2xm1_data f2xm1_table[65] = { 863 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 864 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 865 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 866 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 867 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 868 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 869 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 870 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 871 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 872 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 873 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 874 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 875 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 876 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 877 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 878 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 879 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 880 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 881 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 882 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 883 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 884 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 885 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 886 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 887 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 888 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 889 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 890 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 891 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 892 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 893 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 894 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 895 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 896 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 897 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 898 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 899 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 900 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 901 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 902 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 903 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 904 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 905 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 906 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 907 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 908 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 909 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 910 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 911 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 912 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 913 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 914 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 915 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 916 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 917 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 918 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 919 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 920 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 921 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 922 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 923 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 924 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 925 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 926 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 927 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 928 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 929 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 930 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 931 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 932 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 933 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 934 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 935 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 936 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 937 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 938 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 939 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 940 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 941 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 942 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 943 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 944 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 945 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 946 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 947 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 948 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 949 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 950 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 951 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 952 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 953 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 954 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 955 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 956 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 957 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 958 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 959 { floatx80_zero_init, 960 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 961 floatx80_zero_init }, 962 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 963 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 964 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 965 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 966 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 967 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 968 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 969 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 970 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 971 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 972 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 973 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 974 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 975 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 976 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 977 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 978 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 979 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 980 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 981 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 982 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 983 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 984 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 985 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 986 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 987 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 988 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 989 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 990 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 991 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 992 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 993 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 994 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 995 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 996 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 997 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 998 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 999 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1000 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1001 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1002 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1003 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1004 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1005 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1006 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1007 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1008 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1009 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1010 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1011 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1012 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1013 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1014 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1015 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1016 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1017 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1018 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1019 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1020 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1021 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1022 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1023 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1024 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1025 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1026 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1027 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1028 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1029 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1030 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1031 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1032 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1033 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1034 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1035 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1036 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1037 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1038 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1039 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1040 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1041 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1042 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1043 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1044 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1045 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1046 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1047 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1048 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1049 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1050 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1051 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1052 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1053 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1054 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1055 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1056 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1057 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1058 }; 1059 1060 void helper_f2xm1(CPUX86State *env) 1061 { 1062 uint8_t old_flags = save_exception_flags(env); 1063 uint64_t sig = extractFloatx80Frac(ST0); 1064 int32_t exp = extractFloatx80Exp(ST0); 1065 bool sign = extractFloatx80Sign(ST0); 1066 1067 if (floatx80_invalid_encoding(ST0)) { 1068 float_raise(float_flag_invalid, &env->fp_status); 1069 ST0 = floatx80_default_nan(&env->fp_status); 1070 } else if (floatx80_is_any_nan(ST0)) { 1071 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1072 float_raise(float_flag_invalid, &env->fp_status); 1073 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1074 } 1075 } else if (exp > 0x3fff || 1076 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1077 /* Out of range for the instruction, treat as invalid. */ 1078 float_raise(float_flag_invalid, &env->fp_status); 1079 ST0 = floatx80_default_nan(&env->fp_status); 1080 } else if (exp == 0x3fff) { 1081 /* Argument 1 or -1, exact result 1 or -0.5. */ 1082 if (sign) { 1083 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1084 } 1085 } else if (exp < 0x3fb0) { 1086 if (!floatx80_is_zero(ST0)) { 1087 /* 1088 * Multiplying the argument by an extra-precision version 1089 * of log(2) is sufficiently precise. Zero arguments are 1090 * returned unchanged. 1091 */ 1092 uint64_t sig0, sig1, sig2; 1093 if (exp == 0) { 1094 normalizeFloatx80Subnormal(sig, &exp, &sig); 1095 } 1096 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1097 &sig2); 1098 /* This result is inexact. */ 1099 sig1 |= 1; 1100 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1101 sign, exp, sig0, sig1, 1102 &env->fp_status); 1103 } 1104 } else { 1105 floatx80 tmp, y, accum; 1106 bool asign, bsign; 1107 int32_t n, aexp, bexp; 1108 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1109 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1110 FloatX80RoundPrec save_prec = 1111 env->fp_status.floatx80_rounding_precision; 1112 env->fp_status.float_rounding_mode = float_round_nearest_even; 1113 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1114 1115 /* Find the nearest multiple of 1/32 to the argument. */ 1116 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1117 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1118 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1119 1120 if (floatx80_is_zero(y)) { 1121 /* 1122 * Use the value of 2^t - 1 from the table, to avoid 1123 * needing to special-case zero as a result of 1124 * multiplication below. 1125 */ 1126 ST0 = f2xm1_table[n].t; 1127 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1128 env->fp_status.float_rounding_mode = save_mode; 1129 } else { 1130 /* 1131 * Compute the lower parts of a polynomial expansion for 1132 * (2^y - 1) / y. 1133 */ 1134 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1135 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1136 accum = floatx80_mul(accum, y, &env->fp_status); 1137 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1138 accum = floatx80_mul(accum, y, &env->fp_status); 1139 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1140 accum = floatx80_mul(accum, y, &env->fp_status); 1141 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1142 accum = floatx80_mul(accum, y, &env->fp_status); 1143 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1144 accum = floatx80_mul(accum, y, &env->fp_status); 1145 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1146 accum = floatx80_mul(accum, y, &env->fp_status); 1147 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1148 1149 /* 1150 * The full polynomial expansion is f2xm1_coeff_0 + accum 1151 * (where accum has much lower magnitude, and so, in 1152 * particular, carry out of the addition is not possible). 1153 * (This expansion is only accurate to about 70 bits, not 1154 * 128 bits.) 1155 */ 1156 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1157 asign = extractFloatx80Sign(f2xm1_coeff_0); 1158 shift128RightJamming(extractFloatx80Frac(accum), 0, 1159 aexp - extractFloatx80Exp(accum), 1160 &asig0, &asig1); 1161 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1162 bsig1 = 0; 1163 if (asign == extractFloatx80Sign(accum)) { 1164 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1165 } else { 1166 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1167 } 1168 /* And thus compute an approximation to 2^y - 1. */ 1169 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1170 &asig0, &asig1, &asig2); 1171 aexp += extractFloatx80Exp(y) - 0x3ffe; 1172 asign ^= extractFloatx80Sign(y); 1173 if (n != 32) { 1174 /* 1175 * Multiply this by the precomputed value of 2^t and 1176 * add that of 2^t - 1. 1177 */ 1178 mul128By64To192(asig0, asig1, 1179 extractFloatx80Frac(f2xm1_table[n].exp2), 1180 &asig0, &asig1, &asig2); 1181 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1182 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1183 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1184 bsig1 = 0; 1185 if (bexp < aexp) { 1186 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1187 &bsig0, &bsig1); 1188 } else if (aexp < bexp) { 1189 shift128RightJamming(asig0, asig1, bexp - aexp, 1190 &asig0, &asig1); 1191 aexp = bexp; 1192 } 1193 /* The sign of 2^t - 1 is always that of the result. */ 1194 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1195 if (asign == bsign) { 1196 /* Avoid possible carry out of the addition. */ 1197 shift128RightJamming(asig0, asig1, 1, 1198 &asig0, &asig1); 1199 shift128RightJamming(bsig0, bsig1, 1, 1200 &bsig0, &bsig1); 1201 ++aexp; 1202 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1203 } else { 1204 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1205 asign = bsign; 1206 } 1207 } 1208 env->fp_status.float_rounding_mode = save_mode; 1209 /* This result is inexact. */ 1210 asig1 |= 1; 1211 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1212 asign, aexp, asig0, asig1, 1213 &env->fp_status); 1214 } 1215 1216 env->fp_status.floatx80_rounding_precision = save_prec; 1217 } 1218 merge_exception_flags(env, old_flags); 1219 } 1220 1221 void helper_fptan(CPUX86State *env) 1222 { 1223 double fptemp = floatx80_to_double(env, ST0); 1224 1225 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1226 env->fpus |= 0x400; 1227 } else { 1228 fptemp = tan(fptemp); 1229 ST0 = double_to_floatx80(env, fptemp); 1230 fpush(env); 1231 ST0 = floatx80_one; 1232 env->fpus &= ~0x400; /* C2 <-- 0 */ 1233 /* the above code is for |arg| < 2**52 only */ 1234 } 1235 } 1236 1237 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1238 #define pi_4_exp 0x3ffe 1239 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1240 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1241 #define pi_2_exp 0x3fff 1242 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1243 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1244 #define pi_34_exp 0x4000 1245 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1246 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1247 #define pi_exp 0x4000 1248 #define pi_sig_high 0xc90fdaa22168c234ULL 1249 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1250 1251 /* 1252 * Polynomial coefficients for an approximation to atan(x), with only 1253 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1254 * for some other approximations, no low part is needed for the first 1255 * coefficient here to achieve a sufficiently accurate result, because 1256 * the coefficient in this minimax approximation is very close to 1257 * exactly 1.) 1258 */ 1259 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1260 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1261 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1262 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1263 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1264 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1265 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1266 1267 struct fpatan_data { 1268 /* High and low parts of atan(x). */ 1269 floatx80 atan_high, atan_low; 1270 }; 1271 1272 static const struct fpatan_data fpatan_table[9] = { 1273 { floatx80_zero_init, 1274 floatx80_zero_init }, 1275 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1276 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1277 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1278 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1279 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1280 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1281 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1282 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1283 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1284 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1285 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1286 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1287 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1288 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1289 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1290 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1291 }; 1292 1293 void helper_fpatan(CPUX86State *env) 1294 { 1295 uint8_t old_flags = save_exception_flags(env); 1296 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1297 int32_t arg0_exp = extractFloatx80Exp(ST0); 1298 bool arg0_sign = extractFloatx80Sign(ST0); 1299 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1300 int32_t arg1_exp = extractFloatx80Exp(ST1); 1301 bool arg1_sign = extractFloatx80Sign(ST1); 1302 1303 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1304 float_raise(float_flag_invalid, &env->fp_status); 1305 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1306 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1307 float_raise(float_flag_invalid, &env->fp_status); 1308 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1309 } else if (floatx80_invalid_encoding(ST0) || 1310 floatx80_invalid_encoding(ST1)) { 1311 float_raise(float_flag_invalid, &env->fp_status); 1312 ST1 = floatx80_default_nan(&env->fp_status); 1313 } else if (floatx80_is_any_nan(ST0)) { 1314 ST1 = ST0; 1315 } else if (floatx80_is_any_nan(ST1)) { 1316 /* Pass this NaN through. */ 1317 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1318 /* Pass this zero through. */ 1319 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1320 arg0_exp - arg1_exp >= 80) && 1321 !arg0_sign) { 1322 /* 1323 * Dividing ST1 by ST0 gives the correct result up to 1324 * rounding, and avoids spurious underflow exceptions that 1325 * might result from passing some small values through the 1326 * polynomial approximation, but if a finite nonzero result of 1327 * division is exact, the result of fpatan is still inexact 1328 * (and underflowing where appropriate). 1329 */ 1330 FloatX80RoundPrec save_prec = 1331 env->fp_status.floatx80_rounding_precision; 1332 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1333 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1334 env->fp_status.floatx80_rounding_precision = save_prec; 1335 if (!floatx80_is_zero(ST1) && 1336 !(get_float_exception_flags(&env->fp_status) & 1337 float_flag_inexact)) { 1338 /* 1339 * The mathematical result is very slightly closer to zero 1340 * than this exact result. Round a value with the 1341 * significand adjusted accordingly to get the correct 1342 * exceptions, and possibly an adjusted result depending 1343 * on the rounding mode. 1344 */ 1345 uint64_t sig = extractFloatx80Frac(ST1); 1346 int32_t exp = extractFloatx80Exp(ST1); 1347 bool sign = extractFloatx80Sign(ST1); 1348 if (exp == 0) { 1349 normalizeFloatx80Subnormal(sig, &exp, &sig); 1350 } 1351 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1352 sign, exp, sig - 1, 1353 -1, &env->fp_status); 1354 } 1355 } else { 1356 /* The result is inexact. */ 1357 bool rsign = arg1_sign; 1358 int32_t rexp; 1359 uint64_t rsig0, rsig1; 1360 if (floatx80_is_zero(ST1)) { 1361 /* 1362 * ST0 is negative. The result is pi with the sign of 1363 * ST1. 1364 */ 1365 rexp = pi_exp; 1366 rsig0 = pi_sig_high; 1367 rsig1 = pi_sig_low; 1368 } else if (floatx80_is_infinity(ST1)) { 1369 if (floatx80_is_infinity(ST0)) { 1370 if (arg0_sign) { 1371 rexp = pi_34_exp; 1372 rsig0 = pi_34_sig_high; 1373 rsig1 = pi_34_sig_low; 1374 } else { 1375 rexp = pi_4_exp; 1376 rsig0 = pi_4_sig_high; 1377 rsig1 = pi_4_sig_low; 1378 } 1379 } else { 1380 rexp = pi_2_exp; 1381 rsig0 = pi_2_sig_high; 1382 rsig1 = pi_2_sig_low; 1383 } 1384 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1385 rexp = pi_2_exp; 1386 rsig0 = pi_2_sig_high; 1387 rsig1 = pi_2_sig_low; 1388 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1389 /* ST0 is negative. */ 1390 rexp = pi_exp; 1391 rsig0 = pi_sig_high; 1392 rsig1 = pi_sig_low; 1393 } else { 1394 /* 1395 * ST0 and ST1 are finite, nonzero and with exponents not 1396 * too far apart. 1397 */ 1398 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1399 int32_t azexp, axexp; 1400 bool adj_sub, ysign, zsign; 1401 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1402 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1403 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1404 uint64_t azsig0, azsig1; 1405 uint64_t azsig2, azsig3, axsig0, axsig1; 1406 floatx80 x8; 1407 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1408 FloatX80RoundPrec save_prec = 1409 env->fp_status.floatx80_rounding_precision; 1410 env->fp_status.float_rounding_mode = float_round_nearest_even; 1411 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1412 1413 if (arg0_exp == 0) { 1414 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1415 } 1416 if (arg1_exp == 0) { 1417 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1418 } 1419 if (arg0_exp > arg1_exp || 1420 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1421 /* Work with abs(ST1) / abs(ST0). */ 1422 num_exp = arg1_exp; 1423 num_sig = arg1_sig; 1424 den_exp = arg0_exp; 1425 den_sig = arg0_sig; 1426 if (arg0_sign) { 1427 /* The result is subtracted from pi. */ 1428 adj_exp = pi_exp; 1429 adj_sig0 = pi_sig_high; 1430 adj_sig1 = pi_sig_low; 1431 adj_sub = true; 1432 } else { 1433 /* The result is used as-is. */ 1434 adj_exp = 0; 1435 adj_sig0 = 0; 1436 adj_sig1 = 0; 1437 adj_sub = false; 1438 } 1439 } else { 1440 /* Work with abs(ST0) / abs(ST1). */ 1441 num_exp = arg0_exp; 1442 num_sig = arg0_sig; 1443 den_exp = arg1_exp; 1444 den_sig = arg1_sig; 1445 /* The result is added to or subtracted from pi/2. */ 1446 adj_exp = pi_2_exp; 1447 adj_sig0 = pi_2_sig_high; 1448 adj_sig1 = pi_2_sig_low; 1449 adj_sub = !arg0_sign; 1450 } 1451 1452 /* 1453 * Compute x = num/den, where 0 < x <= 1 and x is not too 1454 * small. 1455 */ 1456 xexp = num_exp - den_exp + 0x3ffe; 1457 remsig0 = num_sig; 1458 remsig1 = 0; 1459 if (den_sig <= remsig0) { 1460 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1461 ++xexp; 1462 } 1463 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1464 mul64To128(den_sig, xsig0, &msig0, &msig1); 1465 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1466 while ((int64_t) remsig0 < 0) { 1467 --xsig0; 1468 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1469 } 1470 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1471 /* 1472 * No need to correct any estimation error in xsig1; even 1473 * with such error, it is accurate enough. 1474 */ 1475 1476 /* 1477 * Split x as x = t + y, where t = n/8 is the nearest 1478 * multiple of 1/8 to x. 1479 */ 1480 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1481 false, xexp + 3, xsig0, 1482 xsig1, &env->fp_status); 1483 n = floatx80_to_int32(x8, &env->fp_status); 1484 if (n == 0) { 1485 ysign = false; 1486 yexp = xexp; 1487 ysig0 = xsig0; 1488 ysig1 = xsig1; 1489 texp = 0; 1490 tsig = 0; 1491 } else { 1492 int shift = clz32(n) + 32; 1493 texp = 0x403b - shift; 1494 tsig = n; 1495 tsig <<= shift; 1496 if (texp == xexp) { 1497 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1498 if ((int64_t) ysig0 >= 0) { 1499 ysign = false; 1500 if (ysig0 == 0) { 1501 if (ysig1 == 0) { 1502 yexp = 0; 1503 } else { 1504 shift = clz64(ysig1) + 64; 1505 yexp = xexp - shift; 1506 shift128Left(ysig0, ysig1, shift, 1507 &ysig0, &ysig1); 1508 } 1509 } else { 1510 shift = clz64(ysig0); 1511 yexp = xexp - shift; 1512 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1513 } 1514 } else { 1515 ysign = true; 1516 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1517 if (ysig0 == 0) { 1518 shift = clz64(ysig1) + 64; 1519 } else { 1520 shift = clz64(ysig0); 1521 } 1522 yexp = xexp - shift; 1523 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1524 } 1525 } else { 1526 /* 1527 * t's exponent must be greater than x's because t 1528 * is positive and the nearest multiple of 1/8 to 1529 * x, and if x has a greater exponent, the power 1530 * of 2 with that exponent is also a multiple of 1531 * 1/8. 1532 */ 1533 uint64_t usig0, usig1; 1534 shift128RightJamming(xsig0, xsig1, texp - xexp, 1535 &usig0, &usig1); 1536 ysign = true; 1537 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1538 if (ysig0 == 0) { 1539 shift = clz64(ysig1) + 64; 1540 } else { 1541 shift = clz64(ysig0); 1542 } 1543 yexp = texp - shift; 1544 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1545 } 1546 } 1547 1548 /* 1549 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1550 * arctan(z). 1551 */ 1552 zsign = ysign; 1553 if (texp == 0 || yexp == 0) { 1554 zexp = yexp; 1555 zsig0 = ysig0; 1556 zsig1 = ysig1; 1557 } else { 1558 /* 1559 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1560 */ 1561 int32_t dexp = texp + xexp - 0x3ffe; 1562 uint64_t dsig0, dsig1, dsig2; 1563 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1564 /* 1565 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1566 * bit). Add 1 to produce the denominator 1+tx. 1567 */ 1568 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1569 &dsig0, &dsig1); 1570 dsig0 |= 0x8000000000000000ULL; 1571 zexp = yexp - 1; 1572 remsig0 = ysig0; 1573 remsig1 = ysig1; 1574 remsig2 = 0; 1575 if (dsig0 <= remsig0) { 1576 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1577 ++zexp; 1578 } 1579 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1580 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1581 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1582 &remsig0, &remsig1, &remsig2); 1583 while ((int64_t) remsig0 < 0) { 1584 --zsig0; 1585 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1586 &remsig0, &remsig1, &remsig2); 1587 } 1588 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1589 /* No need to correct any estimation error in zsig1. */ 1590 } 1591 1592 if (zexp == 0) { 1593 azexp = 0; 1594 azsig0 = 0; 1595 azsig1 = 0; 1596 } else { 1597 floatx80 z2, accum; 1598 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1599 /* Compute z^2. */ 1600 mul128To256(zsig0, zsig1, zsig0, zsig1, 1601 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1602 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1603 zexp + zexp - 0x3ffe, 1604 z2sig0, z2sig1, 1605 &env->fp_status); 1606 1607 /* Compute the lower parts of the polynomial expansion. */ 1608 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1609 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1610 accum = floatx80_mul(accum, z2, &env->fp_status); 1611 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1612 accum = floatx80_mul(accum, z2, &env->fp_status); 1613 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1614 accum = floatx80_mul(accum, z2, &env->fp_status); 1615 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1616 accum = floatx80_mul(accum, z2, &env->fp_status); 1617 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1618 accum = floatx80_mul(accum, z2, &env->fp_status); 1619 1620 /* 1621 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1622 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1623 */ 1624 aexp = extractFloatx80Exp(fpatan_coeff_0); 1625 shift128RightJamming(extractFloatx80Frac(accum), 0, 1626 aexp - extractFloatx80Exp(accum), 1627 &asig0, &asig1); 1628 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1629 &asig0, &asig1); 1630 /* Multiply by z to compute arctan(z). */ 1631 azexp = aexp + zexp - 0x3ffe; 1632 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1633 &azsig2, &azsig3); 1634 } 1635 1636 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1637 if (texp == 0) { 1638 /* z is positive. */ 1639 axexp = azexp; 1640 axsig0 = azsig0; 1641 axsig1 = azsig1; 1642 } else { 1643 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1644 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1645 uint64_t low_sig0 = 1646 extractFloatx80Frac(fpatan_table[n].atan_low); 1647 uint64_t low_sig1 = 0; 1648 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1649 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1650 axsig1 = 0; 1651 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1652 &low_sig0, &low_sig1); 1653 if (low_sign) { 1654 sub128(axsig0, axsig1, low_sig0, low_sig1, 1655 &axsig0, &axsig1); 1656 } else { 1657 add128(axsig0, axsig1, low_sig0, low_sig1, 1658 &axsig0, &axsig1); 1659 } 1660 if (azexp >= axexp) { 1661 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1662 &axsig0, &axsig1); 1663 axexp = azexp + 1; 1664 shift128RightJamming(azsig0, azsig1, 1, 1665 &azsig0, &azsig1); 1666 } else { 1667 shift128RightJamming(axsig0, axsig1, 1, 1668 &axsig0, &axsig1); 1669 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1670 &azsig0, &azsig1); 1671 ++axexp; 1672 } 1673 if (zsign) { 1674 sub128(axsig0, axsig1, azsig0, azsig1, 1675 &axsig0, &axsig1); 1676 } else { 1677 add128(axsig0, axsig1, azsig0, azsig1, 1678 &axsig0, &axsig1); 1679 } 1680 } 1681 1682 if (adj_exp == 0) { 1683 rexp = axexp; 1684 rsig0 = axsig0; 1685 rsig1 = axsig1; 1686 } else { 1687 /* 1688 * Add or subtract arctan(x) (exponent axexp, 1689 * significand axsig0 and axsig1, positive, not 1690 * necessarily normalized) to the number given by 1691 * adj_exp, adj_sig0 and adj_sig1, according to 1692 * adj_sub. 1693 */ 1694 if (adj_exp >= axexp) { 1695 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1696 &axsig0, &axsig1); 1697 rexp = adj_exp + 1; 1698 shift128RightJamming(adj_sig0, adj_sig1, 1, 1699 &adj_sig0, &adj_sig1); 1700 } else { 1701 shift128RightJamming(axsig0, axsig1, 1, 1702 &axsig0, &axsig1); 1703 shift128RightJamming(adj_sig0, adj_sig1, 1704 axexp - adj_exp + 1, 1705 &adj_sig0, &adj_sig1); 1706 rexp = axexp + 1; 1707 } 1708 if (adj_sub) { 1709 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1710 &rsig0, &rsig1); 1711 } else { 1712 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1713 &rsig0, &rsig1); 1714 } 1715 } 1716 1717 env->fp_status.float_rounding_mode = save_mode; 1718 env->fp_status.floatx80_rounding_precision = save_prec; 1719 } 1720 /* This result is inexact. */ 1721 rsig1 |= 1; 1722 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1723 rsig0, rsig1, &env->fp_status); 1724 } 1725 1726 fpop(env); 1727 merge_exception_flags(env, old_flags); 1728 } 1729 1730 void helper_fxtract(CPUX86State *env) 1731 { 1732 uint8_t old_flags = save_exception_flags(env); 1733 CPU_LDoubleU temp; 1734 1735 temp.d = ST0; 1736 1737 if (floatx80_is_zero(ST0)) { 1738 /* Easy way to generate -inf and raising division by 0 exception */ 1739 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1740 &env->fp_status); 1741 fpush(env); 1742 ST0 = temp.d; 1743 } else if (floatx80_invalid_encoding(ST0)) { 1744 float_raise(float_flag_invalid, &env->fp_status); 1745 ST0 = floatx80_default_nan(&env->fp_status); 1746 fpush(env); 1747 ST0 = ST1; 1748 } else if (floatx80_is_any_nan(ST0)) { 1749 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1750 float_raise(float_flag_invalid, &env->fp_status); 1751 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1752 } 1753 fpush(env); 1754 ST0 = ST1; 1755 } else if (floatx80_is_infinity(ST0)) { 1756 fpush(env); 1757 ST0 = ST1; 1758 ST1 = floatx80_infinity; 1759 } else { 1760 int expdif; 1761 1762 if (EXPD(temp) == 0) { 1763 int shift = clz64(temp.l.lower); 1764 temp.l.lower <<= shift; 1765 expdif = 1 - EXPBIAS - shift; 1766 float_raise(float_flag_input_denormal, &env->fp_status); 1767 } else { 1768 expdif = EXPD(temp) - EXPBIAS; 1769 } 1770 /* DP exponent bias */ 1771 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1772 fpush(env); 1773 BIASEXPONENT(temp); 1774 ST0 = temp.d; 1775 } 1776 merge_exception_flags(env, old_flags); 1777 } 1778 1779 static void helper_fprem_common(CPUX86State *env, bool mod) 1780 { 1781 uint8_t old_flags = save_exception_flags(env); 1782 uint64_t quotient; 1783 CPU_LDoubleU temp0, temp1; 1784 int exp0, exp1, expdiff; 1785 1786 temp0.d = ST0; 1787 temp1.d = ST1; 1788 exp0 = EXPD(temp0); 1789 exp1 = EXPD(temp1); 1790 1791 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1792 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1793 exp0 == 0x7fff || exp1 == 0x7fff || 1794 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1795 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1796 } else { 1797 if (exp0 == 0) { 1798 exp0 = 1 - clz64(temp0.l.lower); 1799 } 1800 if (exp1 == 0) { 1801 exp1 = 1 - clz64(temp1.l.lower); 1802 } 1803 expdiff = exp0 - exp1; 1804 if (expdiff < 64) { 1805 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1806 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1807 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1808 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1809 } else { 1810 /* 1811 * Partial remainder. This choice of how many bits to 1812 * process at once is specified in AMD instruction set 1813 * manuals, and empirically is followed by Intel 1814 * processors as well; it ensures that the final remainder 1815 * operation in a loop does produce the correct low three 1816 * bits of the quotient. AMD manuals specify that the 1817 * flags other than C2 are cleared, and empirically Intel 1818 * processors clear them as well. 1819 */ 1820 int n = 32 + (expdiff % 32); 1821 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1822 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1823 env->fpus |= 0x400; /* C2 <-- 1 */ 1824 } 1825 } 1826 merge_exception_flags(env, old_flags); 1827 } 1828 1829 void helper_fprem1(CPUX86State *env) 1830 { 1831 helper_fprem_common(env, false); 1832 } 1833 1834 void helper_fprem(CPUX86State *env) 1835 { 1836 helper_fprem_common(env, true); 1837 } 1838 1839 /* 128-bit significand of log2(e). */ 1840 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1841 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1842 1843 /* 1844 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1845 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1846 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1847 * interval [sqrt(2)/2, sqrt(2)]. 1848 */ 1849 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1850 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1851 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1852 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1853 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1854 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1855 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1856 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1857 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1858 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1859 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1860 1861 /* 1862 * Compute an approximation of log2(1+arg), where 1+arg is in the 1863 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1864 * function is called, rounding precision is set to 80 and the 1865 * round-to-nearest mode is in effect. arg must not be exactly zero, 1866 * and must not be so close to zero that underflow might occur. 1867 */ 1868 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1869 uint64_t *sig0, uint64_t *sig1) 1870 { 1871 uint64_t arg0_sig = extractFloatx80Frac(arg); 1872 int32_t arg0_exp = extractFloatx80Exp(arg); 1873 bool arg0_sign = extractFloatx80Sign(arg); 1874 bool asign; 1875 int32_t dexp, texp, aexp; 1876 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1877 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1878 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1879 floatx80 t2, accum; 1880 1881 /* 1882 * Compute an approximation of arg/(2+arg), with extra precision, 1883 * as the argument to a polynomial approximation. The extra 1884 * precision is only needed for the first term of the 1885 * approximation, with subsequent terms being significantly 1886 * smaller; the approximation only uses odd exponents, and the 1887 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1888 */ 1889 if (arg0_sign) { 1890 dexp = 0x3fff; 1891 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1892 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1893 } else { 1894 dexp = 0x4000; 1895 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1896 dsig0 |= 0x8000000000000000ULL; 1897 } 1898 texp = arg0_exp - dexp + 0x3ffe; 1899 rsig0 = arg0_sig; 1900 rsig1 = 0; 1901 rsig2 = 0; 1902 if (dsig0 <= rsig0) { 1903 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1904 ++texp; 1905 } 1906 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1907 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1908 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1909 &rsig0, &rsig1, &rsig2); 1910 while ((int64_t) rsig0 < 0) { 1911 --tsig0; 1912 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1913 &rsig0, &rsig1, &rsig2); 1914 } 1915 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1916 /* 1917 * No need to correct any estimation error in tsig1; even with 1918 * such error, it is accurate enough. Now compute the square of 1919 * that approximation. 1920 */ 1921 mul128To256(tsig0, tsig1, tsig0, tsig1, 1922 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1923 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1924 texp + texp - 0x3ffe, 1925 t2sig0, t2sig1, &env->fp_status); 1926 1927 /* Compute the lower parts of the polynomial expansion. */ 1928 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1929 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1930 accum = floatx80_mul(accum, t2, &env->fp_status); 1931 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1932 accum = floatx80_mul(accum, t2, &env->fp_status); 1933 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1934 accum = floatx80_mul(accum, t2, &env->fp_status); 1935 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1936 accum = floatx80_mul(accum, t2, &env->fp_status); 1937 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1938 accum = floatx80_mul(accum, t2, &env->fp_status); 1939 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1940 accum = floatx80_mul(accum, t2, &env->fp_status); 1941 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1942 accum = floatx80_mul(accum, t2, &env->fp_status); 1943 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1944 accum = floatx80_mul(accum, t2, &env->fp_status); 1945 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1946 1947 /* 1948 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1949 * accum has much lower magnitude, and so, in particular, carry 1950 * out of the addition is not possible), multiplied by t. (This 1951 * expansion is only accurate to about 70 bits, not 128 bits.) 1952 */ 1953 aexp = extractFloatx80Exp(fyl2x_coeff_0); 1954 asign = extractFloatx80Sign(fyl2x_coeff_0); 1955 shift128RightJamming(extractFloatx80Frac(accum), 0, 1956 aexp - extractFloatx80Exp(accum), 1957 &asig0, &asig1); 1958 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1959 bsig1 = 0; 1960 if (asign == extractFloatx80Sign(accum)) { 1961 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1962 } else { 1963 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1964 } 1965 /* Multiply by t to compute the required result. */ 1966 mul128To256(asig0, asig1, tsig0, tsig1, 1967 &asig0, &asig1, &asig2, &asig3); 1968 aexp += texp - 0x3ffe; 1969 *exp = aexp; 1970 *sig0 = asig0; 1971 *sig1 = asig1; 1972 } 1973 1974 void helper_fyl2xp1(CPUX86State *env) 1975 { 1976 uint8_t old_flags = save_exception_flags(env); 1977 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1978 int32_t arg0_exp = extractFloatx80Exp(ST0); 1979 bool arg0_sign = extractFloatx80Sign(ST0); 1980 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1981 int32_t arg1_exp = extractFloatx80Exp(ST1); 1982 bool arg1_sign = extractFloatx80Sign(ST1); 1983 1984 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1985 float_raise(float_flag_invalid, &env->fp_status); 1986 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1987 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1988 float_raise(float_flag_invalid, &env->fp_status); 1989 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1990 } else if (floatx80_invalid_encoding(ST0) || 1991 floatx80_invalid_encoding(ST1)) { 1992 float_raise(float_flag_invalid, &env->fp_status); 1993 ST1 = floatx80_default_nan(&env->fp_status); 1994 } else if (floatx80_is_any_nan(ST0)) { 1995 ST1 = ST0; 1996 } else if (floatx80_is_any_nan(ST1)) { 1997 /* Pass this NaN through. */ 1998 } else if (arg0_exp > 0x3ffd || 1999 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2000 0x95f619980c4336f7ULL : 2001 0xd413cccfe7799211ULL))) { 2002 /* 2003 * Out of range for the instruction (ST0 must have absolute 2004 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2005 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2006 * to sqrt(2) - 1, which we allow here), treat as invalid. 2007 */ 2008 float_raise(float_flag_invalid, &env->fp_status); 2009 ST1 = floatx80_default_nan(&env->fp_status); 2010 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2011 arg1_exp == 0x7fff) { 2012 /* 2013 * One argument is zero, or multiplying by infinity; correct 2014 * result is exact and can be obtained by multiplying the 2015 * arguments. 2016 */ 2017 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2018 } else if (arg0_exp < 0x3fb0) { 2019 /* 2020 * Multiplying both arguments and an extra-precision version 2021 * of log2(e) is sufficiently precise. 2022 */ 2023 uint64_t sig0, sig1, sig2; 2024 int32_t exp; 2025 if (arg0_exp == 0) { 2026 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2027 } 2028 if (arg1_exp == 0) { 2029 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2030 } 2031 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2032 &sig0, &sig1, &sig2); 2033 exp = arg0_exp + 1; 2034 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2035 exp += arg1_exp - 0x3ffe; 2036 /* This result is inexact. */ 2037 sig1 |= 1; 2038 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2039 arg0_sign ^ arg1_sign, exp, 2040 sig0, sig1, &env->fp_status); 2041 } else { 2042 int32_t aexp; 2043 uint64_t asig0, asig1, asig2; 2044 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2045 FloatX80RoundPrec save_prec = 2046 env->fp_status.floatx80_rounding_precision; 2047 env->fp_status.float_rounding_mode = float_round_nearest_even; 2048 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2049 2050 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2051 /* 2052 * Multiply by the second argument to compute the required 2053 * result. 2054 */ 2055 if (arg1_exp == 0) { 2056 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2057 } 2058 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2059 aexp += arg1_exp - 0x3ffe; 2060 /* This result is inexact. */ 2061 asig1 |= 1; 2062 env->fp_status.float_rounding_mode = save_mode; 2063 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2064 arg0_sign ^ arg1_sign, aexp, 2065 asig0, asig1, &env->fp_status); 2066 env->fp_status.floatx80_rounding_precision = save_prec; 2067 } 2068 fpop(env); 2069 merge_exception_flags(env, old_flags); 2070 } 2071 2072 void helper_fyl2x(CPUX86State *env) 2073 { 2074 uint8_t old_flags = save_exception_flags(env); 2075 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2076 int32_t arg0_exp = extractFloatx80Exp(ST0); 2077 bool arg0_sign = extractFloatx80Sign(ST0); 2078 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2079 int32_t arg1_exp = extractFloatx80Exp(ST1); 2080 bool arg1_sign = extractFloatx80Sign(ST1); 2081 2082 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2083 float_raise(float_flag_invalid, &env->fp_status); 2084 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2085 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2086 float_raise(float_flag_invalid, &env->fp_status); 2087 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2088 } else if (floatx80_invalid_encoding(ST0) || 2089 floatx80_invalid_encoding(ST1)) { 2090 float_raise(float_flag_invalid, &env->fp_status); 2091 ST1 = floatx80_default_nan(&env->fp_status); 2092 } else if (floatx80_is_any_nan(ST0)) { 2093 ST1 = ST0; 2094 } else if (floatx80_is_any_nan(ST1)) { 2095 /* Pass this NaN through. */ 2096 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2097 float_raise(float_flag_invalid, &env->fp_status); 2098 ST1 = floatx80_default_nan(&env->fp_status); 2099 } else if (floatx80_is_infinity(ST1)) { 2100 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2101 &env->fp_status); 2102 switch (cmp) { 2103 case float_relation_less: 2104 ST1 = floatx80_chs(ST1); 2105 break; 2106 case float_relation_greater: 2107 /* Result is infinity of the same sign as ST1. */ 2108 break; 2109 default: 2110 float_raise(float_flag_invalid, &env->fp_status); 2111 ST1 = floatx80_default_nan(&env->fp_status); 2112 break; 2113 } 2114 } else if (floatx80_is_infinity(ST0)) { 2115 if (floatx80_is_zero(ST1)) { 2116 float_raise(float_flag_invalid, &env->fp_status); 2117 ST1 = floatx80_default_nan(&env->fp_status); 2118 } else if (arg1_sign) { 2119 ST1 = floatx80_chs(ST0); 2120 } else { 2121 ST1 = ST0; 2122 } 2123 } else if (floatx80_is_zero(ST0)) { 2124 if (floatx80_is_zero(ST1)) { 2125 float_raise(float_flag_invalid, &env->fp_status); 2126 ST1 = floatx80_default_nan(&env->fp_status); 2127 } else { 2128 /* Result is infinity with opposite sign to ST1. */ 2129 float_raise(float_flag_divbyzero, &env->fp_status); 2130 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2131 0x8000000000000000ULL); 2132 } 2133 } else if (floatx80_is_zero(ST1)) { 2134 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2135 ST1 = floatx80_chs(ST1); 2136 } 2137 /* Otherwise, ST1 is already the correct result. */ 2138 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2139 if (arg1_sign) { 2140 ST1 = floatx80_chs(floatx80_zero); 2141 } else { 2142 ST1 = floatx80_zero; 2143 } 2144 } else { 2145 int32_t int_exp; 2146 floatx80 arg0_m1; 2147 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2148 FloatX80RoundPrec save_prec = 2149 env->fp_status.floatx80_rounding_precision; 2150 env->fp_status.float_rounding_mode = float_round_nearest_even; 2151 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2152 2153 if (arg0_exp == 0) { 2154 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2155 } 2156 if (arg1_exp == 0) { 2157 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2158 } 2159 int_exp = arg0_exp - 0x3fff; 2160 if (arg0_sig > 0xb504f333f9de6484ULL) { 2161 ++int_exp; 2162 } 2163 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2164 &env->fp_status), 2165 floatx80_one, &env->fp_status); 2166 if (floatx80_is_zero(arg0_m1)) { 2167 /* Exact power of 2; multiply by ST1. */ 2168 env->fp_status.float_rounding_mode = save_mode; 2169 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2170 ST1, &env->fp_status); 2171 } else { 2172 bool asign = extractFloatx80Sign(arg0_m1); 2173 int32_t aexp; 2174 uint64_t asig0, asig1, asig2; 2175 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2176 if (int_exp != 0) { 2177 bool isign = (int_exp < 0); 2178 int32_t iexp; 2179 uint64_t isig; 2180 int shift; 2181 int_exp = isign ? -int_exp : int_exp; 2182 shift = clz32(int_exp) + 32; 2183 isig = int_exp; 2184 isig <<= shift; 2185 iexp = 0x403e - shift; 2186 shift128RightJamming(asig0, asig1, iexp - aexp, 2187 &asig0, &asig1); 2188 if (asign == isign) { 2189 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2190 } else { 2191 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2192 } 2193 aexp = iexp; 2194 asign = isign; 2195 } 2196 /* 2197 * Multiply by the second argument to compute the required 2198 * result. 2199 */ 2200 if (arg1_exp == 0) { 2201 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2202 } 2203 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2204 aexp += arg1_exp - 0x3ffe; 2205 /* This result is inexact. */ 2206 asig1 |= 1; 2207 env->fp_status.float_rounding_mode = save_mode; 2208 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2209 asign ^ arg1_sign, aexp, 2210 asig0, asig1, &env->fp_status); 2211 } 2212 2213 env->fp_status.floatx80_rounding_precision = save_prec; 2214 } 2215 fpop(env); 2216 merge_exception_flags(env, old_flags); 2217 } 2218 2219 void helper_fsqrt(CPUX86State *env) 2220 { 2221 uint8_t old_flags = save_exception_flags(env); 2222 if (floatx80_is_neg(ST0)) { 2223 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2224 env->fpus |= 0x400; 2225 } 2226 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2227 merge_exception_flags(env, old_flags); 2228 } 2229 2230 void helper_fsincos(CPUX86State *env) 2231 { 2232 double fptemp = floatx80_to_double(env, ST0); 2233 2234 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2235 env->fpus |= 0x400; 2236 } else { 2237 ST0 = double_to_floatx80(env, sin(fptemp)); 2238 fpush(env); 2239 ST0 = double_to_floatx80(env, cos(fptemp)); 2240 env->fpus &= ~0x400; /* C2 <-- 0 */ 2241 /* the above code is for |arg| < 2**63 only */ 2242 } 2243 } 2244 2245 void helper_frndint(CPUX86State *env) 2246 { 2247 uint8_t old_flags = save_exception_flags(env); 2248 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2249 merge_exception_flags(env, old_flags); 2250 } 2251 2252 void helper_fscale(CPUX86State *env) 2253 { 2254 uint8_t old_flags = save_exception_flags(env); 2255 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2256 float_raise(float_flag_invalid, &env->fp_status); 2257 ST0 = floatx80_default_nan(&env->fp_status); 2258 } else if (floatx80_is_any_nan(ST1)) { 2259 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2260 float_raise(float_flag_invalid, &env->fp_status); 2261 } 2262 ST0 = ST1; 2263 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2264 float_raise(float_flag_invalid, &env->fp_status); 2265 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2266 } 2267 } else if (floatx80_is_infinity(ST1) && 2268 !floatx80_invalid_encoding(ST0) && 2269 !floatx80_is_any_nan(ST0)) { 2270 if (floatx80_is_neg(ST1)) { 2271 if (floatx80_is_infinity(ST0)) { 2272 float_raise(float_flag_invalid, &env->fp_status); 2273 ST0 = floatx80_default_nan(&env->fp_status); 2274 } else { 2275 ST0 = (floatx80_is_neg(ST0) ? 2276 floatx80_chs(floatx80_zero) : 2277 floatx80_zero); 2278 } 2279 } else { 2280 if (floatx80_is_zero(ST0)) { 2281 float_raise(float_flag_invalid, &env->fp_status); 2282 ST0 = floatx80_default_nan(&env->fp_status); 2283 } else { 2284 ST0 = (floatx80_is_neg(ST0) ? 2285 floatx80_chs(floatx80_infinity) : 2286 floatx80_infinity); 2287 } 2288 } 2289 } else { 2290 int n; 2291 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2292 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2293 set_float_exception_flags(0, &env->fp_status); 2294 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2295 set_float_exception_flags(save_flags, &env->fp_status); 2296 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2297 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2298 env->fp_status.floatx80_rounding_precision = save; 2299 } 2300 merge_exception_flags(env, old_flags); 2301 } 2302 2303 void helper_fsin(CPUX86State *env) 2304 { 2305 double fptemp = floatx80_to_double(env, ST0); 2306 2307 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2308 env->fpus |= 0x400; 2309 } else { 2310 ST0 = double_to_floatx80(env, sin(fptemp)); 2311 env->fpus &= ~0x400; /* C2 <-- 0 */ 2312 /* the above code is for |arg| < 2**53 only */ 2313 } 2314 } 2315 2316 void helper_fcos(CPUX86State *env) 2317 { 2318 double fptemp = floatx80_to_double(env, ST0); 2319 2320 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2321 env->fpus |= 0x400; 2322 } else { 2323 ST0 = double_to_floatx80(env, cos(fptemp)); 2324 env->fpus &= ~0x400; /* C2 <-- 0 */ 2325 /* the above code is for |arg| < 2**63 only */ 2326 } 2327 } 2328 2329 void helper_fxam_ST0(CPUX86State *env) 2330 { 2331 CPU_LDoubleU temp; 2332 int expdif; 2333 2334 temp.d = ST0; 2335 2336 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2337 if (SIGND(temp)) { 2338 env->fpus |= 0x200; /* C1 <-- 1 */ 2339 } 2340 2341 if (env->fptags[env->fpstt]) { 2342 env->fpus |= 0x4100; /* Empty */ 2343 return; 2344 } 2345 2346 expdif = EXPD(temp); 2347 if (expdif == MAXEXPD) { 2348 if (MANTD(temp) == 0x8000000000000000ULL) { 2349 env->fpus |= 0x500; /* Infinity */ 2350 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2351 env->fpus |= 0x100; /* NaN */ 2352 } 2353 } else if (expdif == 0) { 2354 if (MANTD(temp) == 0) { 2355 env->fpus |= 0x4000; /* Zero */ 2356 } else { 2357 env->fpus |= 0x4400; /* Denormal */ 2358 } 2359 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2360 env->fpus |= 0x400; 2361 } 2362 } 2363 2364 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 2365 uintptr_t retaddr) 2366 { 2367 int fpus, fptag, exp, i; 2368 uint64_t mant; 2369 CPU_LDoubleU tmp; 2370 2371 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2372 fptag = 0; 2373 for (i = 7; i >= 0; i--) { 2374 fptag <<= 2; 2375 if (env->fptags[i]) { 2376 fptag |= 3; 2377 } else { 2378 tmp.d = env->fpregs[i].d; 2379 exp = EXPD(tmp); 2380 mant = MANTD(tmp); 2381 if (exp == 0 && mant == 0) { 2382 /* zero */ 2383 fptag |= 1; 2384 } else if (exp == 0 || exp == MAXEXPD 2385 || (mant & (1LL << 63)) == 0) { 2386 /* NaNs, infinity, denormal */ 2387 fptag |= 2; 2388 } 2389 } 2390 } 2391 if (data32) { 2392 /* 32 bit */ 2393 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 2394 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 2395 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 2396 cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */ 2397 cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */ 2398 cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */ 2399 cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */ 2400 } else { 2401 /* 16 bit */ 2402 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 2403 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 2404 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 2405 cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr); 2406 cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr); 2407 cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr); 2408 cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr); 2409 } 2410 } 2411 2412 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2413 { 2414 do_fstenv(env, ptr, data32, GETPC()); 2415 } 2416 2417 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2418 { 2419 env->fpstt = (fpus >> 11) & 7; 2420 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2421 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2422 #if !defined(CONFIG_USER_ONLY) 2423 if (!(env->fpus & FPUS_SE)) { 2424 /* 2425 * Here the processor deasserts FERR#; in response, the chipset deasserts 2426 * IGNNE#. 2427 */ 2428 cpu_clear_ignne(); 2429 } 2430 #endif 2431 } 2432 2433 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 2434 uintptr_t retaddr) 2435 { 2436 int i, fpus, fptag; 2437 2438 if (data32) { 2439 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2440 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2441 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 2442 } else { 2443 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2444 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 2445 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2446 } 2447 cpu_set_fpus(env, fpus); 2448 for (i = 0; i < 8; i++) { 2449 env->fptags[i] = ((fptag & 3) == 3); 2450 fptag >>= 2; 2451 } 2452 } 2453 2454 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2455 { 2456 do_fldenv(env, ptr, data32, GETPC()); 2457 } 2458 2459 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, 2460 uintptr_t retaddr) 2461 { 2462 floatx80 tmp; 2463 int i; 2464 2465 do_fstenv(env, ptr, data32, retaddr); 2466 2467 ptr += (target_ulong)14 << data32; 2468 for (i = 0; i < 8; i++) { 2469 tmp = ST(i); 2470 do_fstt(env, tmp, ptr, retaddr); 2471 ptr += 10; 2472 } 2473 2474 do_fninit(env); 2475 } 2476 2477 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2478 { 2479 do_fsave(env, ptr, data32, GETPC()); 2480 } 2481 2482 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, 2483 uintptr_t retaddr) 2484 { 2485 floatx80 tmp; 2486 int i; 2487 2488 do_fldenv(env, ptr, data32, retaddr); 2489 ptr += (target_ulong)14 << data32; 2490 2491 for (i = 0; i < 8; i++) { 2492 tmp = do_fldt(env, ptr, retaddr); 2493 ST(i) = tmp; 2494 ptr += 10; 2495 } 2496 } 2497 2498 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2499 { 2500 do_frstor(env, ptr, data32, GETPC()); 2501 } 2502 2503 #define XO(X) offsetof(X86XSaveArea, X) 2504 2505 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2506 { 2507 int fpus, fptag, i; 2508 target_ulong addr; 2509 2510 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2511 fptag = 0; 2512 for (i = 0; i < 8; i++) { 2513 fptag |= (env->fptags[i] << i); 2514 } 2515 2516 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 2517 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 2518 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 2519 2520 /* In 32-bit mode this is eip, sel, dp, sel. 2521 In 64-bit mode this is rip, rdp. 2522 But in either case we don't write actual data, just zeros. */ 2523 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 2524 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 2525 2526 addr = ptr + XO(legacy.fpregs); 2527 for (i = 0; i < 8; i++) { 2528 floatx80 tmp = ST(i); 2529 do_fstt(env, tmp, addr, ra); 2530 addr += 16; 2531 } 2532 } 2533 2534 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2535 { 2536 update_mxcsr_from_sse_status(env); 2537 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 2538 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 2539 } 2540 2541 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2542 { 2543 int i, nb_xmm_regs; 2544 target_ulong addr; 2545 2546 if (env->hflags & HF_CS64_MASK) { 2547 nb_xmm_regs = 16; 2548 } else { 2549 nb_xmm_regs = 8; 2550 } 2551 2552 addr = ptr + XO(legacy.xmm_regs); 2553 for (i = 0; i < nb_xmm_regs; i++) { 2554 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 2555 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 2556 addr += 16; 2557 } 2558 } 2559 2560 static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2561 { 2562 int i, nb_xmm_regs; 2563 2564 if (env->hflags & HF_CS64_MASK) { 2565 nb_xmm_regs = 16; 2566 } else { 2567 nb_xmm_regs = 8; 2568 } 2569 2570 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2571 cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra); 2572 cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra); 2573 } 2574 } 2575 2576 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2577 { 2578 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2579 int i; 2580 2581 for (i = 0; i < 4; i++, addr += 16) { 2582 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 2583 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 2584 } 2585 } 2586 2587 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2588 { 2589 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2590 env->bndcs_regs.cfgu, ra); 2591 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2592 env->bndcs_regs.sts, ra); 2593 } 2594 2595 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2596 { 2597 cpu_stq_data_ra(env, ptr, env->pkru, ra); 2598 } 2599 2600 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2601 { 2602 /* The operand must be 16 byte aligned */ 2603 if (ptr & 0xf) { 2604 raise_exception_ra(env, EXCP0D_GPF, ra); 2605 } 2606 2607 do_xsave_fpu(env, ptr, ra); 2608 2609 if (env->cr[4] & CR4_OSFXSR_MASK) { 2610 do_xsave_mxcsr(env, ptr, ra); 2611 /* Fast FXSAVE leaves out the XMM registers */ 2612 if (!(env->efer & MSR_EFER_FFXSR) 2613 || (env->hflags & HF_CPL_MASK) 2614 || !(env->hflags & HF_LMA_MASK)) { 2615 do_xsave_sse(env, ptr, ra); 2616 } 2617 } 2618 } 2619 2620 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2621 { 2622 do_fxsave(env, ptr, GETPC()); 2623 } 2624 2625 static uint64_t get_xinuse(CPUX86State *env) 2626 { 2627 uint64_t inuse = -1; 2628 2629 /* For the most part, we don't track XINUSE. We could calculate it 2630 here for all components, but it's probably less work to simply 2631 indicate in use. That said, the state of BNDREGS is important 2632 enough to track in HFLAGS, so we might as well use that here. */ 2633 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2634 inuse &= ~XSTATE_BNDREGS_MASK; 2635 } 2636 return inuse; 2637 } 2638 2639 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2640 uint64_t inuse, uint64_t opt, uintptr_t ra) 2641 { 2642 uint64_t old_bv, new_bv; 2643 2644 /* The OS must have enabled XSAVE. */ 2645 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2646 raise_exception_ra(env, EXCP06_ILLOP, ra); 2647 } 2648 2649 /* The operand must be 64 byte aligned. */ 2650 if (ptr & 63) { 2651 raise_exception_ra(env, EXCP0D_GPF, ra); 2652 } 2653 2654 /* Never save anything not enabled by XCR0. */ 2655 rfbm &= env->xcr0; 2656 opt &= rfbm; 2657 2658 if (opt & XSTATE_FP_MASK) { 2659 do_xsave_fpu(env, ptr, ra); 2660 } 2661 if (rfbm & XSTATE_SSE_MASK) { 2662 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2663 do_xsave_mxcsr(env, ptr, ra); 2664 } 2665 if (opt & XSTATE_SSE_MASK) { 2666 do_xsave_sse(env, ptr, ra); 2667 } 2668 if (opt & XSTATE_YMM_MASK) { 2669 do_xsave_ymmh(env, ptr + XO(avx_state), ra); 2670 } 2671 if (opt & XSTATE_BNDREGS_MASK) { 2672 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 2673 } 2674 if (opt & XSTATE_BNDCSR_MASK) { 2675 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 2676 } 2677 if (opt & XSTATE_PKRU_MASK) { 2678 do_xsave_pkru(env, ptr + XO(pkru_state), ra); 2679 } 2680 2681 /* Update the XSTATE_BV field. */ 2682 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2683 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2684 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 2685 } 2686 2687 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2688 { 2689 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 2690 } 2691 2692 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2693 { 2694 uint64_t inuse = get_xinuse(env); 2695 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2696 } 2697 2698 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2699 { 2700 int i, fpuc, fpus, fptag; 2701 target_ulong addr; 2702 2703 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 2704 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 2705 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 2706 cpu_set_fpuc(env, fpuc); 2707 cpu_set_fpus(env, fpus); 2708 fptag ^= 0xff; 2709 for (i = 0; i < 8; i++) { 2710 env->fptags[i] = ((fptag >> i) & 1); 2711 } 2712 2713 addr = ptr + XO(legacy.fpregs); 2714 for (i = 0; i < 8; i++) { 2715 floatx80 tmp = do_fldt(env, addr, ra); 2716 ST(i) = tmp; 2717 addr += 16; 2718 } 2719 } 2720 2721 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2722 { 2723 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 2724 } 2725 2726 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2727 { 2728 int i, nb_xmm_regs; 2729 target_ulong addr; 2730 2731 if (env->hflags & HF_CS64_MASK) { 2732 nb_xmm_regs = 16; 2733 } else { 2734 nb_xmm_regs = 8; 2735 } 2736 2737 addr = ptr + XO(legacy.xmm_regs); 2738 for (i = 0; i < nb_xmm_regs; i++) { 2739 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 2740 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 2741 addr += 16; 2742 } 2743 } 2744 2745 static void do_clear_sse(CPUX86State *env) 2746 { 2747 int i, nb_xmm_regs; 2748 2749 if (env->hflags & HF_CS64_MASK) { 2750 nb_xmm_regs = 16; 2751 } else { 2752 nb_xmm_regs = 8; 2753 } 2754 2755 for (i = 0; i < nb_xmm_regs; i++) { 2756 env->xmm_regs[i].ZMM_Q(0) = 0; 2757 env->xmm_regs[i].ZMM_Q(1) = 0; 2758 } 2759 } 2760 2761 static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2762 { 2763 int i, nb_xmm_regs; 2764 2765 if (env->hflags & HF_CS64_MASK) { 2766 nb_xmm_regs = 16; 2767 } else { 2768 nb_xmm_regs = 8; 2769 } 2770 2771 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2772 env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra); 2773 env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra); 2774 } 2775 } 2776 2777 static void do_clear_ymmh(CPUX86State *env) 2778 { 2779 int i, nb_xmm_regs; 2780 2781 if (env->hflags & HF_CS64_MASK) { 2782 nb_xmm_regs = 16; 2783 } else { 2784 nb_xmm_regs = 8; 2785 } 2786 2787 for (i = 0; i < nb_xmm_regs; i++) { 2788 env->xmm_regs[i].ZMM_Q(2) = 0; 2789 env->xmm_regs[i].ZMM_Q(3) = 0; 2790 } 2791 } 2792 2793 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2794 { 2795 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2796 int i; 2797 2798 for (i = 0; i < 4; i++, addr += 16) { 2799 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 2800 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 2801 } 2802 } 2803 2804 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2805 { 2806 /* FIXME: Extend highest implemented bit of linear address. */ 2807 env->bndcs_regs.cfgu 2808 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 2809 env->bndcs_regs.sts 2810 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 2811 } 2812 2813 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2814 { 2815 env->pkru = cpu_ldq_data_ra(env, ptr, ra); 2816 } 2817 2818 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2819 { 2820 /* The operand must be 16 byte aligned */ 2821 if (ptr & 0xf) { 2822 raise_exception_ra(env, EXCP0D_GPF, ra); 2823 } 2824 2825 do_xrstor_fpu(env, ptr, ra); 2826 2827 if (env->cr[4] & CR4_OSFXSR_MASK) { 2828 do_xrstor_mxcsr(env, ptr, ra); 2829 /* Fast FXRSTOR leaves out the XMM registers */ 2830 if (!(env->efer & MSR_EFER_FFXSR) 2831 || (env->hflags & HF_CPL_MASK) 2832 || !(env->hflags & HF_LMA_MASK)) { 2833 do_xrstor_sse(env, ptr, ra); 2834 } 2835 } 2836 } 2837 2838 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2839 { 2840 do_fxrstor(env, ptr, GETPC()); 2841 } 2842 2843 static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) 2844 { 2845 uint64_t xstate_bv, xcomp_bv, reserve0; 2846 2847 rfbm &= env->xcr0; 2848 2849 /* The OS must have enabled XSAVE. */ 2850 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2851 raise_exception_ra(env, EXCP06_ILLOP, ra); 2852 } 2853 2854 /* The operand must be 64 byte aligned. */ 2855 if (ptr & 63) { 2856 raise_exception_ra(env, EXCP0D_GPF, ra); 2857 } 2858 2859 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2860 2861 if ((int64_t)xstate_bv < 0) { 2862 /* FIXME: Compact form. */ 2863 raise_exception_ra(env, EXCP0D_GPF, ra); 2864 } 2865 2866 /* Standard form. */ 2867 2868 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2869 if (xstate_bv & ~env->xcr0) { 2870 raise_exception_ra(env, EXCP0D_GPF, ra); 2871 } 2872 2873 /* The XCOMP_BV field must be zero. Note that, as of the April 2016 2874 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 2875 describes only XCOMP_BV, but the description of the standard form 2876 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 2877 includes the next 64-bit field. */ 2878 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 2879 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 2880 if (xcomp_bv || reserve0) { 2881 raise_exception_ra(env, EXCP0D_GPF, ra); 2882 } 2883 2884 if (rfbm & XSTATE_FP_MASK) { 2885 if (xstate_bv & XSTATE_FP_MASK) { 2886 do_xrstor_fpu(env, ptr, ra); 2887 } else { 2888 do_fninit(env); 2889 memset(env->fpregs, 0, sizeof(env->fpregs)); 2890 } 2891 } 2892 if (rfbm & XSTATE_SSE_MASK) { 2893 /* Note that the standard form of XRSTOR loads MXCSR from memory 2894 whether or not the XSTATE_BV bit is set. */ 2895 do_xrstor_mxcsr(env, ptr, ra); 2896 if (xstate_bv & XSTATE_SSE_MASK) { 2897 do_xrstor_sse(env, ptr, ra); 2898 } else { 2899 do_clear_sse(env); 2900 } 2901 } 2902 if (rfbm & XSTATE_YMM_MASK) { 2903 if (xstate_bv & XSTATE_YMM_MASK) { 2904 do_xrstor_ymmh(env, ptr + XO(avx_state), ra); 2905 } else { 2906 do_clear_ymmh(env); 2907 } 2908 } 2909 if (rfbm & XSTATE_BNDREGS_MASK) { 2910 if (xstate_bv & XSTATE_BNDREGS_MASK) { 2911 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 2912 env->hflags |= HF_MPX_IU_MASK; 2913 } else { 2914 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2915 env->hflags &= ~HF_MPX_IU_MASK; 2916 } 2917 } 2918 if (rfbm & XSTATE_BNDCSR_MASK) { 2919 if (xstate_bv & XSTATE_BNDCSR_MASK) { 2920 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 2921 } else { 2922 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2923 } 2924 cpu_sync_bndcs_hflags(env); 2925 } 2926 if (rfbm & XSTATE_PKRU_MASK) { 2927 uint64_t old_pkru = env->pkru; 2928 if (xstate_bv & XSTATE_PKRU_MASK) { 2929 do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 2930 } else { 2931 env->pkru = 0; 2932 } 2933 if (env->pkru != old_pkru) { 2934 CPUState *cs = env_cpu(env); 2935 tlb_flush(cs); 2936 } 2937 } 2938 } 2939 2940 #undef XO 2941 2942 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2943 { 2944 do_xrstor(env, ptr, rfbm, GETPC()); 2945 } 2946 2947 #if defined(CONFIG_USER_ONLY) 2948 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 2949 { 2950 do_fsave(env, ptr, data32, 0); 2951 } 2952 2953 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 2954 { 2955 do_frstor(env, ptr, data32, 0); 2956 } 2957 2958 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 2959 { 2960 do_fxsave(env, ptr, 0); 2961 } 2962 2963 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 2964 { 2965 do_fxrstor(env, ptr, 0); 2966 } 2967 2968 void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) 2969 { 2970 do_xsave(env, ptr, -1, get_xinuse(env), -1, 0); 2971 } 2972 2973 void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr) 2974 { 2975 do_xrstor(env, ptr, -1, 0); 2976 } 2977 #endif 2978 2979 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 2980 { 2981 /* The OS must have enabled XSAVE. */ 2982 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2983 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2984 } 2985 2986 switch (ecx) { 2987 case 0: 2988 return env->xcr0; 2989 case 1: 2990 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 2991 return env->xcr0 & get_xinuse(env); 2992 } 2993 break; 2994 } 2995 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2996 } 2997 2998 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 2999 { 3000 uint32_t dummy, ena_lo, ena_hi; 3001 uint64_t ena; 3002 3003 /* The OS must have enabled XSAVE. */ 3004 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3005 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3006 } 3007 3008 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 3009 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 3010 goto do_gpf; 3011 } 3012 3013 /* Disallow enabling unimplemented features. */ 3014 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 3015 ena = ((uint64_t)ena_hi << 32) | ena_lo; 3016 if (mask & ~ena) { 3017 goto do_gpf; 3018 } 3019 3020 /* Disallow enabling only half of MPX. */ 3021 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 3022 & XSTATE_BNDCSR_MASK) { 3023 goto do_gpf; 3024 } 3025 3026 env->xcr0 = mask; 3027 cpu_sync_bndcs_hflags(env); 3028 cpu_sync_avx_hflag(env); 3029 return; 3030 3031 do_gpf: 3032 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3033 } 3034 3035 /* MMX/SSE */ 3036 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 3037 3038 #define SSE_DAZ 0x0040 3039 #define SSE_RC_SHIFT 13 3040 #define SSE_RC_MASK (3 << SSE_RC_SHIFT) 3041 #define SSE_FZ 0x8000 3042 3043 void update_mxcsr_status(CPUX86State *env) 3044 { 3045 uint32_t mxcsr = env->mxcsr; 3046 int rnd_type; 3047 3048 /* set rounding mode */ 3049 rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; 3050 set_x86_rounding_mode(rnd_type, &env->sse_status); 3051 3052 /* Set exception flags. */ 3053 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 3054 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 3055 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 3056 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 3057 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 3058 &env->sse_status); 3059 3060 /* set denormals are zero */ 3061 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 3062 3063 /* set flush to zero */ 3064 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 3065 } 3066 3067 void update_mxcsr_from_sse_status(CPUX86State *env) 3068 { 3069 uint8_t flags = get_float_exception_flags(&env->sse_status); 3070 /* 3071 * The MXCSR denormal flag has opposite semantics to 3072 * float_flag_input_denormal (the softfloat code sets that flag 3073 * only when flushing input denormals to zero, but SSE sets it 3074 * only when not flushing them to zero), so is not converted 3075 * here. 3076 */ 3077 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3078 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3079 (flags & float_flag_overflow ? FPUS_OE : 0) | 3080 (flags & float_flag_underflow ? FPUS_UE : 0) | 3081 (flags & float_flag_inexact ? FPUS_PE : 0) | 3082 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3083 0)); 3084 } 3085 3086 void helper_update_mxcsr(CPUX86State *env) 3087 { 3088 update_mxcsr_from_sse_status(env); 3089 } 3090 3091 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3092 { 3093 cpu_set_mxcsr(env, val); 3094 } 3095 3096 void helper_enter_mmx(CPUX86State *env) 3097 { 3098 env->fpstt = 0; 3099 *(uint32_t *)(env->fptags) = 0; 3100 *(uint32_t *)(env->fptags + 4) = 0; 3101 } 3102 3103 void helper_emms(CPUX86State *env) 3104 { 3105 /* set to empty state */ 3106 *(uint32_t *)(env->fptags) = 0x01010101; 3107 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3108 } 3109 3110 #define SHIFT 0 3111 #include "ops_sse.h" 3112 3113 #define SHIFT 1 3114 #include "ops_sse.h" 3115 3116 #define SHIFT 2 3117 #include "ops_sse.h" 3118