1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/helper-proto.h" 25 #include "fpu/softfloat.h" 26 #include "fpu/softfloat-macros.h" 27 #include "helper-tcg.h" 28 29 /* float macros */ 30 #define FT0 (env->ft0) 31 #define ST0 (env->fpregs[env->fpstt].d) 32 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 33 #define ST1 ST(1) 34 35 #define FPU_RC_MASK 0xc00 36 #define FPU_RC_NEAR 0x000 37 #define FPU_RC_DOWN 0x400 38 #define FPU_RC_UP 0x800 39 #define FPU_RC_CHOP 0xc00 40 41 #define MAXTAN 9223372036854775808.0 42 43 /* the following deal with x86 long double-precision numbers */ 44 #define MAXEXPD 0x7fff 45 #define EXPBIAS 16383 46 #define EXPD(fp) (fp.l.upper & 0x7fff) 47 #define SIGND(fp) ((fp.l.upper) & 0x8000) 48 #define MANTD(fp) (fp.l.lower) 49 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 50 51 #define FPUS_IE (1 << 0) 52 #define FPUS_DE (1 << 1) 53 #define FPUS_ZE (1 << 2) 54 #define FPUS_OE (1 << 3) 55 #define FPUS_UE (1 << 4) 56 #define FPUS_PE (1 << 5) 57 #define FPUS_SF (1 << 6) 58 #define FPUS_SE (1 << 7) 59 #define FPUS_B (1 << 15) 60 61 #define FPUC_EM 0x3f 62 63 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 64 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 65 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 66 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 67 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 68 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 69 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 70 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 71 72 static inline void fpush(CPUX86State *env) 73 { 74 env->fpstt = (env->fpstt - 1) & 7; 75 env->fptags[env->fpstt] = 0; /* validate stack entry */ 76 } 77 78 static inline void fpop(CPUX86State *env) 79 { 80 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 81 env->fpstt = (env->fpstt + 1) & 7; 82 } 83 84 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) 85 { 86 CPU_LDoubleU temp; 87 88 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 89 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 90 return temp.d; 91 } 92 93 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 94 uintptr_t retaddr) 95 { 96 CPU_LDoubleU temp; 97 98 temp.d = f; 99 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 100 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 101 } 102 103 /* x87 FPU helpers */ 104 105 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 106 { 107 union { 108 float64 f64; 109 double d; 110 } u; 111 112 u.f64 = floatx80_to_float64(a, &env->fp_status); 113 return u.d; 114 } 115 116 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 117 { 118 union { 119 float64 f64; 120 double d; 121 } u; 122 123 u.d = a; 124 return float64_to_floatx80(u.f64, &env->fp_status); 125 } 126 127 static void fpu_set_exception(CPUX86State *env, int mask) 128 { 129 env->fpus |= mask; 130 if (env->fpus & (~env->fpuc & FPUC_EM)) { 131 env->fpus |= FPUS_SE | FPUS_B; 132 } 133 } 134 135 static inline uint8_t save_exception_flags(CPUX86State *env) 136 { 137 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 138 set_float_exception_flags(0, &env->fp_status); 139 return old_flags; 140 } 141 142 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 143 { 144 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 145 float_raise(old_flags, &env->fp_status); 146 fpu_set_exception(env, 147 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 148 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 149 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 150 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 151 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 152 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 153 } 154 155 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 156 { 157 uint8_t old_flags = save_exception_flags(env); 158 floatx80 ret = floatx80_div(a, b, &env->fp_status); 159 merge_exception_flags(env, old_flags); 160 return ret; 161 } 162 163 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 164 { 165 if (env->cr[0] & CR0_NE_MASK) { 166 raise_exception_ra(env, EXCP10_COPR, retaddr); 167 } 168 #if !defined(CONFIG_USER_ONLY) 169 else { 170 fpu_check_raise_ferr_irq(env); 171 } 172 #endif 173 } 174 175 void helper_flds_FT0(CPUX86State *env, uint32_t val) 176 { 177 uint8_t old_flags = save_exception_flags(env); 178 union { 179 float32 f; 180 uint32_t i; 181 } u; 182 183 u.i = val; 184 FT0 = float32_to_floatx80(u.f, &env->fp_status); 185 merge_exception_flags(env, old_flags); 186 } 187 188 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 189 { 190 uint8_t old_flags = save_exception_flags(env); 191 union { 192 float64 f; 193 uint64_t i; 194 } u; 195 196 u.i = val; 197 FT0 = float64_to_floatx80(u.f, &env->fp_status); 198 merge_exception_flags(env, old_flags); 199 } 200 201 void helper_fildl_FT0(CPUX86State *env, int32_t val) 202 { 203 FT0 = int32_to_floatx80(val, &env->fp_status); 204 } 205 206 void helper_flds_ST0(CPUX86State *env, uint32_t val) 207 { 208 uint8_t old_flags = save_exception_flags(env); 209 int new_fpstt; 210 union { 211 float32 f; 212 uint32_t i; 213 } u; 214 215 new_fpstt = (env->fpstt - 1) & 7; 216 u.i = val; 217 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 218 env->fpstt = new_fpstt; 219 env->fptags[new_fpstt] = 0; /* validate stack entry */ 220 merge_exception_flags(env, old_flags); 221 } 222 223 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 224 { 225 uint8_t old_flags = save_exception_flags(env); 226 int new_fpstt; 227 union { 228 float64 f; 229 uint64_t i; 230 } u; 231 232 new_fpstt = (env->fpstt - 1) & 7; 233 u.i = val; 234 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 235 env->fpstt = new_fpstt; 236 env->fptags[new_fpstt] = 0; /* validate stack entry */ 237 merge_exception_flags(env, old_flags); 238 } 239 240 void helper_fildl_ST0(CPUX86State *env, int32_t val) 241 { 242 int new_fpstt; 243 244 new_fpstt = (env->fpstt - 1) & 7; 245 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 246 env->fpstt = new_fpstt; 247 env->fptags[new_fpstt] = 0; /* validate stack entry */ 248 } 249 250 void helper_fildll_ST0(CPUX86State *env, int64_t val) 251 { 252 int new_fpstt; 253 254 new_fpstt = (env->fpstt - 1) & 7; 255 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 256 env->fpstt = new_fpstt; 257 env->fptags[new_fpstt] = 0; /* validate stack entry */ 258 } 259 260 uint32_t helper_fsts_ST0(CPUX86State *env) 261 { 262 uint8_t old_flags = save_exception_flags(env); 263 union { 264 float32 f; 265 uint32_t i; 266 } u; 267 268 u.f = floatx80_to_float32(ST0, &env->fp_status); 269 merge_exception_flags(env, old_flags); 270 return u.i; 271 } 272 273 uint64_t helper_fstl_ST0(CPUX86State *env) 274 { 275 uint8_t old_flags = save_exception_flags(env); 276 union { 277 float64 f; 278 uint64_t i; 279 } u; 280 281 u.f = floatx80_to_float64(ST0, &env->fp_status); 282 merge_exception_flags(env, old_flags); 283 return u.i; 284 } 285 286 int32_t helper_fist_ST0(CPUX86State *env) 287 { 288 uint8_t old_flags = save_exception_flags(env); 289 int32_t val; 290 291 val = floatx80_to_int32(ST0, &env->fp_status); 292 if (val != (int16_t)val) { 293 set_float_exception_flags(float_flag_invalid, &env->fp_status); 294 val = -32768; 295 } 296 merge_exception_flags(env, old_flags); 297 return val; 298 } 299 300 int32_t helper_fistl_ST0(CPUX86State *env) 301 { 302 uint8_t old_flags = save_exception_flags(env); 303 int32_t val; 304 305 val = floatx80_to_int32(ST0, &env->fp_status); 306 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 307 val = 0x80000000; 308 } 309 merge_exception_flags(env, old_flags); 310 return val; 311 } 312 313 int64_t helper_fistll_ST0(CPUX86State *env) 314 { 315 uint8_t old_flags = save_exception_flags(env); 316 int64_t val; 317 318 val = floatx80_to_int64(ST0, &env->fp_status); 319 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 320 val = 0x8000000000000000ULL; 321 } 322 merge_exception_flags(env, old_flags); 323 return val; 324 } 325 326 int32_t helper_fistt_ST0(CPUX86State *env) 327 { 328 uint8_t old_flags = save_exception_flags(env); 329 int32_t val; 330 331 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 332 if (val != (int16_t)val) { 333 set_float_exception_flags(float_flag_invalid, &env->fp_status); 334 val = -32768; 335 } 336 merge_exception_flags(env, old_flags); 337 return val; 338 } 339 340 int32_t helper_fisttl_ST0(CPUX86State *env) 341 { 342 uint8_t old_flags = save_exception_flags(env); 343 int32_t val; 344 345 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 346 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 347 val = 0x80000000; 348 } 349 merge_exception_flags(env, old_flags); 350 return val; 351 } 352 353 int64_t helper_fisttll_ST0(CPUX86State *env) 354 { 355 uint8_t old_flags = save_exception_flags(env); 356 int64_t val; 357 358 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 359 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 360 val = 0x8000000000000000ULL; 361 } 362 merge_exception_flags(env, old_flags); 363 return val; 364 } 365 366 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 367 { 368 int new_fpstt; 369 370 new_fpstt = (env->fpstt - 1) & 7; 371 env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); 372 env->fpstt = new_fpstt; 373 env->fptags[new_fpstt] = 0; /* validate stack entry */ 374 } 375 376 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 377 { 378 do_fstt(env, ST0, ptr, GETPC()); 379 } 380 381 void helper_fpush(CPUX86State *env) 382 { 383 fpush(env); 384 } 385 386 void helper_fpop(CPUX86State *env) 387 { 388 fpop(env); 389 } 390 391 void helper_fdecstp(CPUX86State *env) 392 { 393 env->fpstt = (env->fpstt - 1) & 7; 394 env->fpus &= ~0x4700; 395 } 396 397 void helper_fincstp(CPUX86State *env) 398 { 399 env->fpstt = (env->fpstt + 1) & 7; 400 env->fpus &= ~0x4700; 401 } 402 403 /* FPU move */ 404 405 void helper_ffree_STN(CPUX86State *env, int st_index) 406 { 407 env->fptags[(env->fpstt + st_index) & 7] = 1; 408 } 409 410 void helper_fmov_ST0_FT0(CPUX86State *env) 411 { 412 ST0 = FT0; 413 } 414 415 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 416 { 417 FT0 = ST(st_index); 418 } 419 420 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 421 { 422 ST0 = ST(st_index); 423 } 424 425 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 426 { 427 ST(st_index) = ST0; 428 } 429 430 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 431 { 432 floatx80 tmp; 433 434 tmp = ST(st_index); 435 ST(st_index) = ST0; 436 ST0 = tmp; 437 } 438 439 /* FPU operations */ 440 441 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 442 443 void helper_fcom_ST0_FT0(CPUX86State *env) 444 { 445 uint8_t old_flags = save_exception_flags(env); 446 FloatRelation ret; 447 448 ret = floatx80_compare(ST0, FT0, &env->fp_status); 449 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 450 merge_exception_flags(env, old_flags); 451 } 452 453 void helper_fucom_ST0_FT0(CPUX86State *env) 454 { 455 uint8_t old_flags = save_exception_flags(env); 456 FloatRelation ret; 457 458 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 459 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 460 merge_exception_flags(env, old_flags); 461 } 462 463 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 464 465 void helper_fcomi_ST0_FT0(CPUX86State *env) 466 { 467 uint8_t old_flags = save_exception_flags(env); 468 int eflags; 469 FloatRelation ret; 470 471 ret = floatx80_compare(ST0, FT0, &env->fp_status); 472 eflags = cpu_cc_compute_all(env, CC_OP); 473 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 474 CC_SRC = eflags; 475 merge_exception_flags(env, old_flags); 476 } 477 478 void helper_fucomi_ST0_FT0(CPUX86State *env) 479 { 480 uint8_t old_flags = save_exception_flags(env); 481 int eflags; 482 FloatRelation ret; 483 484 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 485 eflags = cpu_cc_compute_all(env, CC_OP); 486 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 487 CC_SRC = eflags; 488 merge_exception_flags(env, old_flags); 489 } 490 491 void helper_fadd_ST0_FT0(CPUX86State *env) 492 { 493 uint8_t old_flags = save_exception_flags(env); 494 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 495 merge_exception_flags(env, old_flags); 496 } 497 498 void helper_fmul_ST0_FT0(CPUX86State *env) 499 { 500 uint8_t old_flags = save_exception_flags(env); 501 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 502 merge_exception_flags(env, old_flags); 503 } 504 505 void helper_fsub_ST0_FT0(CPUX86State *env) 506 { 507 uint8_t old_flags = save_exception_flags(env); 508 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 509 merge_exception_flags(env, old_flags); 510 } 511 512 void helper_fsubr_ST0_FT0(CPUX86State *env) 513 { 514 uint8_t old_flags = save_exception_flags(env); 515 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 516 merge_exception_flags(env, old_flags); 517 } 518 519 void helper_fdiv_ST0_FT0(CPUX86State *env) 520 { 521 ST0 = helper_fdiv(env, ST0, FT0); 522 } 523 524 void helper_fdivr_ST0_FT0(CPUX86State *env) 525 { 526 ST0 = helper_fdiv(env, FT0, ST0); 527 } 528 529 /* fp operations between STN and ST0 */ 530 531 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 532 { 533 uint8_t old_flags = save_exception_flags(env); 534 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 535 merge_exception_flags(env, old_flags); 536 } 537 538 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 539 { 540 uint8_t old_flags = save_exception_flags(env); 541 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 542 merge_exception_flags(env, old_flags); 543 } 544 545 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 546 { 547 uint8_t old_flags = save_exception_flags(env); 548 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 549 merge_exception_flags(env, old_flags); 550 } 551 552 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 553 { 554 uint8_t old_flags = save_exception_flags(env); 555 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 556 merge_exception_flags(env, old_flags); 557 } 558 559 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 560 { 561 floatx80 *p; 562 563 p = &ST(st_index); 564 *p = helper_fdiv(env, *p, ST0); 565 } 566 567 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 568 { 569 floatx80 *p; 570 571 p = &ST(st_index); 572 *p = helper_fdiv(env, ST0, *p); 573 } 574 575 /* misc FPU operations */ 576 void helper_fchs_ST0(CPUX86State *env) 577 { 578 ST0 = floatx80_chs(ST0); 579 } 580 581 void helper_fabs_ST0(CPUX86State *env) 582 { 583 ST0 = floatx80_abs(ST0); 584 } 585 586 void helper_fld1_ST0(CPUX86State *env) 587 { 588 ST0 = floatx80_one; 589 } 590 591 void helper_fldl2t_ST0(CPUX86State *env) 592 { 593 switch (env->fpuc & FPU_RC_MASK) { 594 case FPU_RC_UP: 595 ST0 = floatx80_l2t_u; 596 break; 597 default: 598 ST0 = floatx80_l2t; 599 break; 600 } 601 } 602 603 void helper_fldl2e_ST0(CPUX86State *env) 604 { 605 switch (env->fpuc & FPU_RC_MASK) { 606 case FPU_RC_DOWN: 607 case FPU_RC_CHOP: 608 ST0 = floatx80_l2e_d; 609 break; 610 default: 611 ST0 = floatx80_l2e; 612 break; 613 } 614 } 615 616 void helper_fldpi_ST0(CPUX86State *env) 617 { 618 switch (env->fpuc & FPU_RC_MASK) { 619 case FPU_RC_DOWN: 620 case FPU_RC_CHOP: 621 ST0 = floatx80_pi_d; 622 break; 623 default: 624 ST0 = floatx80_pi; 625 break; 626 } 627 } 628 629 void helper_fldlg2_ST0(CPUX86State *env) 630 { 631 switch (env->fpuc & FPU_RC_MASK) { 632 case FPU_RC_DOWN: 633 case FPU_RC_CHOP: 634 ST0 = floatx80_lg2_d; 635 break; 636 default: 637 ST0 = floatx80_lg2; 638 break; 639 } 640 } 641 642 void helper_fldln2_ST0(CPUX86State *env) 643 { 644 switch (env->fpuc & FPU_RC_MASK) { 645 case FPU_RC_DOWN: 646 case FPU_RC_CHOP: 647 ST0 = floatx80_ln2_d; 648 break; 649 default: 650 ST0 = floatx80_ln2; 651 break; 652 } 653 } 654 655 void helper_fldz_ST0(CPUX86State *env) 656 { 657 ST0 = floatx80_zero; 658 } 659 660 void helper_fldz_FT0(CPUX86State *env) 661 { 662 FT0 = floatx80_zero; 663 } 664 665 uint32_t helper_fnstsw(CPUX86State *env) 666 { 667 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 668 } 669 670 uint32_t helper_fnstcw(CPUX86State *env) 671 { 672 return env->fpuc; 673 } 674 675 void update_fp_status(CPUX86State *env) 676 { 677 FloatRoundMode rnd_mode; 678 FloatX80RoundPrec rnd_prec; 679 680 /* set rounding mode */ 681 switch (env->fpuc & FPU_RC_MASK) { 682 default: 683 case FPU_RC_NEAR: 684 rnd_mode = float_round_nearest_even; 685 break; 686 case FPU_RC_DOWN: 687 rnd_mode = float_round_down; 688 break; 689 case FPU_RC_UP: 690 rnd_mode = float_round_up; 691 break; 692 case FPU_RC_CHOP: 693 rnd_mode = float_round_to_zero; 694 break; 695 } 696 set_float_rounding_mode(rnd_mode, &env->fp_status); 697 698 switch ((env->fpuc >> 8) & 3) { 699 case 0: 700 rnd_prec = floatx80_precision_s; 701 break; 702 case 2: 703 rnd_prec = floatx80_precision_d; 704 break; 705 case 3: 706 default: 707 rnd_prec = floatx80_precision_x; 708 break; 709 } 710 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 711 } 712 713 void helper_fldcw(CPUX86State *env, uint32_t val) 714 { 715 cpu_set_fpuc(env, val); 716 } 717 718 void helper_fclex(CPUX86State *env) 719 { 720 env->fpus &= 0x7f00; 721 } 722 723 void helper_fwait(CPUX86State *env) 724 { 725 if (env->fpus & FPUS_SE) { 726 fpu_raise_exception(env, GETPC()); 727 } 728 } 729 730 void helper_fninit(CPUX86State *env) 731 { 732 env->fpus = 0; 733 env->fpstt = 0; 734 cpu_set_fpuc(env, 0x37f); 735 env->fptags[0] = 1; 736 env->fptags[1] = 1; 737 env->fptags[2] = 1; 738 env->fptags[3] = 1; 739 env->fptags[4] = 1; 740 env->fptags[5] = 1; 741 env->fptags[6] = 1; 742 env->fptags[7] = 1; 743 } 744 745 /* BCD ops */ 746 747 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 748 { 749 floatx80 tmp; 750 uint64_t val; 751 unsigned int v; 752 int i; 753 754 val = 0; 755 for (i = 8; i >= 0; i--) { 756 v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 757 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 758 } 759 tmp = int64_to_floatx80(val, &env->fp_status); 760 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 761 tmp = floatx80_chs(tmp); 762 } 763 fpush(env); 764 ST0 = tmp; 765 } 766 767 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 768 { 769 uint8_t old_flags = save_exception_flags(env); 770 int v; 771 target_ulong mem_ref, mem_end; 772 int64_t val; 773 CPU_LDoubleU temp; 774 775 temp.d = ST0; 776 777 val = floatx80_to_int64(ST0, &env->fp_status); 778 mem_ref = ptr; 779 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 780 set_float_exception_flags(float_flag_invalid, &env->fp_status); 781 while (mem_ref < ptr + 7) { 782 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 783 } 784 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 785 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 786 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 787 merge_exception_flags(env, old_flags); 788 return; 789 } 790 mem_end = mem_ref + 9; 791 if (SIGND(temp)) { 792 cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 793 val = -val; 794 } else { 795 cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 796 } 797 while (mem_ref < mem_end) { 798 if (val == 0) { 799 break; 800 } 801 v = val % 100; 802 val = val / 100; 803 v = ((v / 10) << 4) | (v % 10); 804 cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 805 } 806 while (mem_ref < mem_end) { 807 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 808 } 809 merge_exception_flags(env, old_flags); 810 } 811 812 /* 128-bit significand of log(2). */ 813 #define ln2_sig_high 0xb17217f7d1cf79abULL 814 #define ln2_sig_low 0xc9e3b39803f2f6afULL 815 816 /* 817 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 818 * the interval [-1/64, 1/64]. 819 */ 820 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 821 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 822 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 823 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 824 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 825 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 826 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 827 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 828 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 829 830 struct f2xm1_data { 831 /* 832 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 833 * are very close to exact floatx80 values. 834 */ 835 floatx80 t; 836 /* The value of 2^t. */ 837 floatx80 exp2; 838 /* The value of 2^t - 1. */ 839 floatx80 exp2m1; 840 }; 841 842 static const struct f2xm1_data f2xm1_table[65] = { 843 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 844 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 845 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 846 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 847 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 848 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 849 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 850 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 851 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 852 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 853 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 854 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 855 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 856 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 857 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 858 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 859 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 860 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 861 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 862 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 863 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 864 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 865 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 866 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 867 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 868 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 869 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 870 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 871 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 872 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 873 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 874 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 875 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 876 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 877 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 878 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 879 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 880 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 881 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 882 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 883 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 884 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 885 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 886 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 887 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 888 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 889 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 890 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 891 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 892 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 893 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 894 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 895 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 896 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 897 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 898 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 899 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 900 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 901 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 902 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 903 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 904 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 905 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 906 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 907 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 908 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 909 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 910 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 911 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 912 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 913 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 914 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 915 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 916 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 917 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 918 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 919 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 920 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 921 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 922 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 923 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 924 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 925 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 926 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 927 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 928 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 929 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 930 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 931 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 932 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 933 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 934 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 935 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 936 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 937 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 938 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 939 { floatx80_zero_init, 940 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 941 floatx80_zero_init }, 942 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 943 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 944 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 945 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 946 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 947 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 948 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 949 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 950 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 951 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 952 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 953 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 954 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 955 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 956 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 957 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 958 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 959 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 960 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 961 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 962 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 963 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 964 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 965 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 966 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 967 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 968 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 969 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 970 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 971 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 972 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 973 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 974 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 975 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 976 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 977 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 978 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 979 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 980 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 981 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 982 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 983 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 984 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 985 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 986 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 987 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 988 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 989 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 990 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 991 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 992 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 993 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 994 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 995 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 996 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 997 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 998 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 999 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1000 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1001 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1002 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1003 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1004 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1005 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1006 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1007 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1008 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1009 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1010 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1011 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1012 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1013 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1014 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1015 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1016 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1017 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1018 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1019 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1020 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1021 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1022 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1023 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1024 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1025 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1026 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1027 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1028 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1029 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1030 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1031 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1032 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1033 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1034 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1035 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1036 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1037 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1038 }; 1039 1040 void helper_f2xm1(CPUX86State *env) 1041 { 1042 uint8_t old_flags = save_exception_flags(env); 1043 uint64_t sig = extractFloatx80Frac(ST0); 1044 int32_t exp = extractFloatx80Exp(ST0); 1045 bool sign = extractFloatx80Sign(ST0); 1046 1047 if (floatx80_invalid_encoding(ST0)) { 1048 float_raise(float_flag_invalid, &env->fp_status); 1049 ST0 = floatx80_default_nan(&env->fp_status); 1050 } else if (floatx80_is_any_nan(ST0)) { 1051 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1052 float_raise(float_flag_invalid, &env->fp_status); 1053 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1054 } 1055 } else if (exp > 0x3fff || 1056 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1057 /* Out of range for the instruction, treat as invalid. */ 1058 float_raise(float_flag_invalid, &env->fp_status); 1059 ST0 = floatx80_default_nan(&env->fp_status); 1060 } else if (exp == 0x3fff) { 1061 /* Argument 1 or -1, exact result 1 or -0.5. */ 1062 if (sign) { 1063 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1064 } 1065 } else if (exp < 0x3fb0) { 1066 if (!floatx80_is_zero(ST0)) { 1067 /* 1068 * Multiplying the argument by an extra-precision version 1069 * of log(2) is sufficiently precise. Zero arguments are 1070 * returned unchanged. 1071 */ 1072 uint64_t sig0, sig1, sig2; 1073 if (exp == 0) { 1074 normalizeFloatx80Subnormal(sig, &exp, &sig); 1075 } 1076 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1077 &sig2); 1078 /* This result is inexact. */ 1079 sig1 |= 1; 1080 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1081 sign, exp, sig0, sig1, 1082 &env->fp_status); 1083 } 1084 } else { 1085 floatx80 tmp, y, accum; 1086 bool asign, bsign; 1087 int32_t n, aexp, bexp; 1088 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1089 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1090 FloatX80RoundPrec save_prec = 1091 env->fp_status.floatx80_rounding_precision; 1092 env->fp_status.float_rounding_mode = float_round_nearest_even; 1093 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1094 1095 /* Find the nearest multiple of 1/32 to the argument. */ 1096 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1097 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1098 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1099 1100 if (floatx80_is_zero(y)) { 1101 /* 1102 * Use the value of 2^t - 1 from the table, to avoid 1103 * needing to special-case zero as a result of 1104 * multiplication below. 1105 */ 1106 ST0 = f2xm1_table[n].t; 1107 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1108 env->fp_status.float_rounding_mode = save_mode; 1109 } else { 1110 /* 1111 * Compute the lower parts of a polynomial expansion for 1112 * (2^y - 1) / y. 1113 */ 1114 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1115 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1116 accum = floatx80_mul(accum, y, &env->fp_status); 1117 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1118 accum = floatx80_mul(accum, y, &env->fp_status); 1119 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1120 accum = floatx80_mul(accum, y, &env->fp_status); 1121 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1122 accum = floatx80_mul(accum, y, &env->fp_status); 1123 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1124 accum = floatx80_mul(accum, y, &env->fp_status); 1125 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1126 accum = floatx80_mul(accum, y, &env->fp_status); 1127 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1128 1129 /* 1130 * The full polynomial expansion is f2xm1_coeff_0 + accum 1131 * (where accum has much lower magnitude, and so, in 1132 * particular, carry out of the addition is not possible). 1133 * (This expansion is only accurate to about 70 bits, not 1134 * 128 bits.) 1135 */ 1136 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1137 asign = extractFloatx80Sign(f2xm1_coeff_0); 1138 shift128RightJamming(extractFloatx80Frac(accum), 0, 1139 aexp - extractFloatx80Exp(accum), 1140 &asig0, &asig1); 1141 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1142 bsig1 = 0; 1143 if (asign == extractFloatx80Sign(accum)) { 1144 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1145 } else { 1146 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1147 } 1148 /* And thus compute an approximation to 2^y - 1. */ 1149 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1150 &asig0, &asig1, &asig2); 1151 aexp += extractFloatx80Exp(y) - 0x3ffe; 1152 asign ^= extractFloatx80Sign(y); 1153 if (n != 32) { 1154 /* 1155 * Multiply this by the precomputed value of 2^t and 1156 * add that of 2^t - 1. 1157 */ 1158 mul128By64To192(asig0, asig1, 1159 extractFloatx80Frac(f2xm1_table[n].exp2), 1160 &asig0, &asig1, &asig2); 1161 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1162 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1163 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1164 bsig1 = 0; 1165 if (bexp < aexp) { 1166 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1167 &bsig0, &bsig1); 1168 } else if (aexp < bexp) { 1169 shift128RightJamming(asig0, asig1, bexp - aexp, 1170 &asig0, &asig1); 1171 aexp = bexp; 1172 } 1173 /* The sign of 2^t - 1 is always that of the result. */ 1174 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1175 if (asign == bsign) { 1176 /* Avoid possible carry out of the addition. */ 1177 shift128RightJamming(asig0, asig1, 1, 1178 &asig0, &asig1); 1179 shift128RightJamming(bsig0, bsig1, 1, 1180 &bsig0, &bsig1); 1181 ++aexp; 1182 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1183 } else { 1184 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1185 asign = bsign; 1186 } 1187 } 1188 env->fp_status.float_rounding_mode = save_mode; 1189 /* This result is inexact. */ 1190 asig1 |= 1; 1191 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1192 asign, aexp, asig0, asig1, 1193 &env->fp_status); 1194 } 1195 1196 env->fp_status.floatx80_rounding_precision = save_prec; 1197 } 1198 merge_exception_flags(env, old_flags); 1199 } 1200 1201 void helper_fptan(CPUX86State *env) 1202 { 1203 double fptemp = floatx80_to_double(env, ST0); 1204 1205 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1206 env->fpus |= 0x400; 1207 } else { 1208 fptemp = tan(fptemp); 1209 ST0 = double_to_floatx80(env, fptemp); 1210 fpush(env); 1211 ST0 = floatx80_one; 1212 env->fpus &= ~0x400; /* C2 <-- 0 */ 1213 /* the above code is for |arg| < 2**52 only */ 1214 } 1215 } 1216 1217 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1218 #define pi_4_exp 0x3ffe 1219 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1220 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1221 #define pi_2_exp 0x3fff 1222 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1223 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1224 #define pi_34_exp 0x4000 1225 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1226 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1227 #define pi_exp 0x4000 1228 #define pi_sig_high 0xc90fdaa22168c234ULL 1229 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1230 1231 /* 1232 * Polynomial coefficients for an approximation to atan(x), with only 1233 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1234 * for some other approximations, no low part is needed for the first 1235 * coefficient here to achieve a sufficiently accurate result, because 1236 * the coefficient in this minimax approximation is very close to 1237 * exactly 1.) 1238 */ 1239 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1240 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1241 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1242 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1243 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1244 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1245 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1246 1247 struct fpatan_data { 1248 /* High and low parts of atan(x). */ 1249 floatx80 atan_high, atan_low; 1250 }; 1251 1252 static const struct fpatan_data fpatan_table[9] = { 1253 { floatx80_zero_init, 1254 floatx80_zero_init }, 1255 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1256 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1257 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1258 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1259 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1260 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1261 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1262 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1263 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1264 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1265 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1266 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1267 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1268 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1269 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1270 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1271 }; 1272 1273 void helper_fpatan(CPUX86State *env) 1274 { 1275 uint8_t old_flags = save_exception_flags(env); 1276 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1277 int32_t arg0_exp = extractFloatx80Exp(ST0); 1278 bool arg0_sign = extractFloatx80Sign(ST0); 1279 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1280 int32_t arg1_exp = extractFloatx80Exp(ST1); 1281 bool arg1_sign = extractFloatx80Sign(ST1); 1282 1283 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1284 float_raise(float_flag_invalid, &env->fp_status); 1285 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1286 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1287 float_raise(float_flag_invalid, &env->fp_status); 1288 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1289 } else if (floatx80_invalid_encoding(ST0) || 1290 floatx80_invalid_encoding(ST1)) { 1291 float_raise(float_flag_invalid, &env->fp_status); 1292 ST1 = floatx80_default_nan(&env->fp_status); 1293 } else if (floatx80_is_any_nan(ST0)) { 1294 ST1 = ST0; 1295 } else if (floatx80_is_any_nan(ST1)) { 1296 /* Pass this NaN through. */ 1297 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1298 /* Pass this zero through. */ 1299 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1300 arg0_exp - arg1_exp >= 80) && 1301 !arg0_sign) { 1302 /* 1303 * Dividing ST1 by ST0 gives the correct result up to 1304 * rounding, and avoids spurious underflow exceptions that 1305 * might result from passing some small values through the 1306 * polynomial approximation, but if a finite nonzero result of 1307 * division is exact, the result of fpatan is still inexact 1308 * (and underflowing where appropriate). 1309 */ 1310 FloatX80RoundPrec save_prec = 1311 env->fp_status.floatx80_rounding_precision; 1312 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1313 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1314 env->fp_status.floatx80_rounding_precision = save_prec; 1315 if (!floatx80_is_zero(ST1) && 1316 !(get_float_exception_flags(&env->fp_status) & 1317 float_flag_inexact)) { 1318 /* 1319 * The mathematical result is very slightly closer to zero 1320 * than this exact result. Round a value with the 1321 * significand adjusted accordingly to get the correct 1322 * exceptions, and possibly an adjusted result depending 1323 * on the rounding mode. 1324 */ 1325 uint64_t sig = extractFloatx80Frac(ST1); 1326 int32_t exp = extractFloatx80Exp(ST1); 1327 bool sign = extractFloatx80Sign(ST1); 1328 if (exp == 0) { 1329 normalizeFloatx80Subnormal(sig, &exp, &sig); 1330 } 1331 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1332 sign, exp, sig - 1, 1333 -1, &env->fp_status); 1334 } 1335 } else { 1336 /* The result is inexact. */ 1337 bool rsign = arg1_sign; 1338 int32_t rexp; 1339 uint64_t rsig0, rsig1; 1340 if (floatx80_is_zero(ST1)) { 1341 /* 1342 * ST0 is negative. The result is pi with the sign of 1343 * ST1. 1344 */ 1345 rexp = pi_exp; 1346 rsig0 = pi_sig_high; 1347 rsig1 = pi_sig_low; 1348 } else if (floatx80_is_infinity(ST1)) { 1349 if (floatx80_is_infinity(ST0)) { 1350 if (arg0_sign) { 1351 rexp = pi_34_exp; 1352 rsig0 = pi_34_sig_high; 1353 rsig1 = pi_34_sig_low; 1354 } else { 1355 rexp = pi_4_exp; 1356 rsig0 = pi_4_sig_high; 1357 rsig1 = pi_4_sig_low; 1358 } 1359 } else { 1360 rexp = pi_2_exp; 1361 rsig0 = pi_2_sig_high; 1362 rsig1 = pi_2_sig_low; 1363 } 1364 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1365 rexp = pi_2_exp; 1366 rsig0 = pi_2_sig_high; 1367 rsig1 = pi_2_sig_low; 1368 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1369 /* ST0 is negative. */ 1370 rexp = pi_exp; 1371 rsig0 = pi_sig_high; 1372 rsig1 = pi_sig_low; 1373 } else { 1374 /* 1375 * ST0 and ST1 are finite, nonzero and with exponents not 1376 * too far apart. 1377 */ 1378 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1379 int32_t azexp, axexp; 1380 bool adj_sub, ysign, zsign; 1381 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1382 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1383 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1384 uint64_t azsig0, azsig1; 1385 uint64_t azsig2, azsig3, axsig0, axsig1; 1386 floatx80 x8; 1387 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1388 FloatX80RoundPrec save_prec = 1389 env->fp_status.floatx80_rounding_precision; 1390 env->fp_status.float_rounding_mode = float_round_nearest_even; 1391 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1392 1393 if (arg0_exp == 0) { 1394 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1395 } 1396 if (arg1_exp == 0) { 1397 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1398 } 1399 if (arg0_exp > arg1_exp || 1400 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1401 /* Work with abs(ST1) / abs(ST0). */ 1402 num_exp = arg1_exp; 1403 num_sig = arg1_sig; 1404 den_exp = arg0_exp; 1405 den_sig = arg0_sig; 1406 if (arg0_sign) { 1407 /* The result is subtracted from pi. */ 1408 adj_exp = pi_exp; 1409 adj_sig0 = pi_sig_high; 1410 adj_sig1 = pi_sig_low; 1411 adj_sub = true; 1412 } else { 1413 /* The result is used as-is. */ 1414 adj_exp = 0; 1415 adj_sig0 = 0; 1416 adj_sig1 = 0; 1417 adj_sub = false; 1418 } 1419 } else { 1420 /* Work with abs(ST0) / abs(ST1). */ 1421 num_exp = arg0_exp; 1422 num_sig = arg0_sig; 1423 den_exp = arg1_exp; 1424 den_sig = arg1_sig; 1425 /* The result is added to or subtracted from pi/2. */ 1426 adj_exp = pi_2_exp; 1427 adj_sig0 = pi_2_sig_high; 1428 adj_sig1 = pi_2_sig_low; 1429 adj_sub = !arg0_sign; 1430 } 1431 1432 /* 1433 * Compute x = num/den, where 0 < x <= 1 and x is not too 1434 * small. 1435 */ 1436 xexp = num_exp - den_exp + 0x3ffe; 1437 remsig0 = num_sig; 1438 remsig1 = 0; 1439 if (den_sig <= remsig0) { 1440 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1441 ++xexp; 1442 } 1443 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1444 mul64To128(den_sig, xsig0, &msig0, &msig1); 1445 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1446 while ((int64_t) remsig0 < 0) { 1447 --xsig0; 1448 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1449 } 1450 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1451 /* 1452 * No need to correct any estimation error in xsig1; even 1453 * with such error, it is accurate enough. 1454 */ 1455 1456 /* 1457 * Split x as x = t + y, where t = n/8 is the nearest 1458 * multiple of 1/8 to x. 1459 */ 1460 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1461 false, xexp + 3, xsig0, 1462 xsig1, &env->fp_status); 1463 n = floatx80_to_int32(x8, &env->fp_status); 1464 if (n == 0) { 1465 ysign = false; 1466 yexp = xexp; 1467 ysig0 = xsig0; 1468 ysig1 = xsig1; 1469 texp = 0; 1470 tsig = 0; 1471 } else { 1472 int shift = clz32(n) + 32; 1473 texp = 0x403b - shift; 1474 tsig = n; 1475 tsig <<= shift; 1476 if (texp == xexp) { 1477 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1478 if ((int64_t) ysig0 >= 0) { 1479 ysign = false; 1480 if (ysig0 == 0) { 1481 if (ysig1 == 0) { 1482 yexp = 0; 1483 } else { 1484 shift = clz64(ysig1) + 64; 1485 yexp = xexp - shift; 1486 shift128Left(ysig0, ysig1, shift, 1487 &ysig0, &ysig1); 1488 } 1489 } else { 1490 shift = clz64(ysig0); 1491 yexp = xexp - shift; 1492 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1493 } 1494 } else { 1495 ysign = true; 1496 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1497 if (ysig0 == 0) { 1498 shift = clz64(ysig1) + 64; 1499 } else { 1500 shift = clz64(ysig0); 1501 } 1502 yexp = xexp - shift; 1503 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1504 } 1505 } else { 1506 /* 1507 * t's exponent must be greater than x's because t 1508 * is positive and the nearest multiple of 1/8 to 1509 * x, and if x has a greater exponent, the power 1510 * of 2 with that exponent is also a multiple of 1511 * 1/8. 1512 */ 1513 uint64_t usig0, usig1; 1514 shift128RightJamming(xsig0, xsig1, texp - xexp, 1515 &usig0, &usig1); 1516 ysign = true; 1517 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1518 if (ysig0 == 0) { 1519 shift = clz64(ysig1) + 64; 1520 } else { 1521 shift = clz64(ysig0); 1522 } 1523 yexp = texp - shift; 1524 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1525 } 1526 } 1527 1528 /* 1529 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1530 * arctan(z). 1531 */ 1532 zsign = ysign; 1533 if (texp == 0 || yexp == 0) { 1534 zexp = yexp; 1535 zsig0 = ysig0; 1536 zsig1 = ysig1; 1537 } else { 1538 /* 1539 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1540 */ 1541 int32_t dexp = texp + xexp - 0x3ffe; 1542 uint64_t dsig0, dsig1, dsig2; 1543 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1544 /* 1545 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1546 * bit). Add 1 to produce the denominator 1+tx. 1547 */ 1548 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1549 &dsig0, &dsig1); 1550 dsig0 |= 0x8000000000000000ULL; 1551 zexp = yexp - 1; 1552 remsig0 = ysig0; 1553 remsig1 = ysig1; 1554 remsig2 = 0; 1555 if (dsig0 <= remsig0) { 1556 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1557 ++zexp; 1558 } 1559 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1560 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1561 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1562 &remsig0, &remsig1, &remsig2); 1563 while ((int64_t) remsig0 < 0) { 1564 --zsig0; 1565 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1566 &remsig0, &remsig1, &remsig2); 1567 } 1568 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1569 /* No need to correct any estimation error in zsig1. */ 1570 } 1571 1572 if (zexp == 0) { 1573 azexp = 0; 1574 azsig0 = 0; 1575 azsig1 = 0; 1576 } else { 1577 floatx80 z2, accum; 1578 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1579 /* Compute z^2. */ 1580 mul128To256(zsig0, zsig1, zsig0, zsig1, 1581 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1582 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1583 zexp + zexp - 0x3ffe, 1584 z2sig0, z2sig1, 1585 &env->fp_status); 1586 1587 /* Compute the lower parts of the polynomial expansion. */ 1588 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1589 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1590 accum = floatx80_mul(accum, z2, &env->fp_status); 1591 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1592 accum = floatx80_mul(accum, z2, &env->fp_status); 1593 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1594 accum = floatx80_mul(accum, z2, &env->fp_status); 1595 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1596 accum = floatx80_mul(accum, z2, &env->fp_status); 1597 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1598 accum = floatx80_mul(accum, z2, &env->fp_status); 1599 1600 /* 1601 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1602 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1603 */ 1604 aexp = extractFloatx80Exp(fpatan_coeff_0); 1605 shift128RightJamming(extractFloatx80Frac(accum), 0, 1606 aexp - extractFloatx80Exp(accum), 1607 &asig0, &asig1); 1608 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1609 &asig0, &asig1); 1610 /* Multiply by z to compute arctan(z). */ 1611 azexp = aexp + zexp - 0x3ffe; 1612 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1613 &azsig2, &azsig3); 1614 } 1615 1616 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1617 if (texp == 0) { 1618 /* z is positive. */ 1619 axexp = azexp; 1620 axsig0 = azsig0; 1621 axsig1 = azsig1; 1622 } else { 1623 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1624 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1625 uint64_t low_sig0 = 1626 extractFloatx80Frac(fpatan_table[n].atan_low); 1627 uint64_t low_sig1 = 0; 1628 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1629 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1630 axsig1 = 0; 1631 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1632 &low_sig0, &low_sig1); 1633 if (low_sign) { 1634 sub128(axsig0, axsig1, low_sig0, low_sig1, 1635 &axsig0, &axsig1); 1636 } else { 1637 add128(axsig0, axsig1, low_sig0, low_sig1, 1638 &axsig0, &axsig1); 1639 } 1640 if (azexp >= axexp) { 1641 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1642 &axsig0, &axsig1); 1643 axexp = azexp + 1; 1644 shift128RightJamming(azsig0, azsig1, 1, 1645 &azsig0, &azsig1); 1646 } else { 1647 shift128RightJamming(axsig0, axsig1, 1, 1648 &axsig0, &axsig1); 1649 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1650 &azsig0, &azsig1); 1651 ++axexp; 1652 } 1653 if (zsign) { 1654 sub128(axsig0, axsig1, azsig0, azsig1, 1655 &axsig0, &axsig1); 1656 } else { 1657 add128(axsig0, axsig1, azsig0, azsig1, 1658 &axsig0, &axsig1); 1659 } 1660 } 1661 1662 if (adj_exp == 0) { 1663 rexp = axexp; 1664 rsig0 = axsig0; 1665 rsig1 = axsig1; 1666 } else { 1667 /* 1668 * Add or subtract arctan(x) (exponent axexp, 1669 * significand axsig0 and axsig1, positive, not 1670 * necessarily normalized) to the number given by 1671 * adj_exp, adj_sig0 and adj_sig1, according to 1672 * adj_sub. 1673 */ 1674 if (adj_exp >= axexp) { 1675 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1676 &axsig0, &axsig1); 1677 rexp = adj_exp + 1; 1678 shift128RightJamming(adj_sig0, adj_sig1, 1, 1679 &adj_sig0, &adj_sig1); 1680 } else { 1681 shift128RightJamming(axsig0, axsig1, 1, 1682 &axsig0, &axsig1); 1683 shift128RightJamming(adj_sig0, adj_sig1, 1684 axexp - adj_exp + 1, 1685 &adj_sig0, &adj_sig1); 1686 rexp = axexp + 1; 1687 } 1688 if (adj_sub) { 1689 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1690 &rsig0, &rsig1); 1691 } else { 1692 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1693 &rsig0, &rsig1); 1694 } 1695 } 1696 1697 env->fp_status.float_rounding_mode = save_mode; 1698 env->fp_status.floatx80_rounding_precision = save_prec; 1699 } 1700 /* This result is inexact. */ 1701 rsig1 |= 1; 1702 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1703 rsig0, rsig1, &env->fp_status); 1704 } 1705 1706 fpop(env); 1707 merge_exception_flags(env, old_flags); 1708 } 1709 1710 void helper_fxtract(CPUX86State *env) 1711 { 1712 uint8_t old_flags = save_exception_flags(env); 1713 CPU_LDoubleU temp; 1714 1715 temp.d = ST0; 1716 1717 if (floatx80_is_zero(ST0)) { 1718 /* Easy way to generate -inf and raising division by 0 exception */ 1719 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1720 &env->fp_status); 1721 fpush(env); 1722 ST0 = temp.d; 1723 } else if (floatx80_invalid_encoding(ST0)) { 1724 float_raise(float_flag_invalid, &env->fp_status); 1725 ST0 = floatx80_default_nan(&env->fp_status); 1726 fpush(env); 1727 ST0 = ST1; 1728 } else if (floatx80_is_any_nan(ST0)) { 1729 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1730 float_raise(float_flag_invalid, &env->fp_status); 1731 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1732 } 1733 fpush(env); 1734 ST0 = ST1; 1735 } else if (floatx80_is_infinity(ST0)) { 1736 fpush(env); 1737 ST0 = ST1; 1738 ST1 = floatx80_infinity; 1739 } else { 1740 int expdif; 1741 1742 if (EXPD(temp) == 0) { 1743 int shift = clz64(temp.l.lower); 1744 temp.l.lower <<= shift; 1745 expdif = 1 - EXPBIAS - shift; 1746 float_raise(float_flag_input_denormal, &env->fp_status); 1747 } else { 1748 expdif = EXPD(temp) - EXPBIAS; 1749 } 1750 /* DP exponent bias */ 1751 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1752 fpush(env); 1753 BIASEXPONENT(temp); 1754 ST0 = temp.d; 1755 } 1756 merge_exception_flags(env, old_flags); 1757 } 1758 1759 static void helper_fprem_common(CPUX86State *env, bool mod) 1760 { 1761 uint8_t old_flags = save_exception_flags(env); 1762 uint64_t quotient; 1763 CPU_LDoubleU temp0, temp1; 1764 int exp0, exp1, expdiff; 1765 1766 temp0.d = ST0; 1767 temp1.d = ST1; 1768 exp0 = EXPD(temp0); 1769 exp1 = EXPD(temp1); 1770 1771 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1772 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1773 exp0 == 0x7fff || exp1 == 0x7fff || 1774 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1775 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1776 } else { 1777 if (exp0 == 0) { 1778 exp0 = 1 - clz64(temp0.l.lower); 1779 } 1780 if (exp1 == 0) { 1781 exp1 = 1 - clz64(temp1.l.lower); 1782 } 1783 expdiff = exp0 - exp1; 1784 if (expdiff < 64) { 1785 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1786 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1787 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1788 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1789 } else { 1790 /* 1791 * Partial remainder. This choice of how many bits to 1792 * process at once is specified in AMD instruction set 1793 * manuals, and empirically is followed by Intel 1794 * processors as well; it ensures that the final remainder 1795 * operation in a loop does produce the correct low three 1796 * bits of the quotient. AMD manuals specify that the 1797 * flags other than C2 are cleared, and empirically Intel 1798 * processors clear them as well. 1799 */ 1800 int n = 32 + (expdiff % 32); 1801 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1802 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1803 env->fpus |= 0x400; /* C2 <-- 1 */ 1804 } 1805 } 1806 merge_exception_flags(env, old_flags); 1807 } 1808 1809 void helper_fprem1(CPUX86State *env) 1810 { 1811 helper_fprem_common(env, false); 1812 } 1813 1814 void helper_fprem(CPUX86State *env) 1815 { 1816 helper_fprem_common(env, true); 1817 } 1818 1819 /* 128-bit significand of log2(e). */ 1820 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1821 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1822 1823 /* 1824 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1825 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1826 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1827 * interval [sqrt(2)/2, sqrt(2)]. 1828 */ 1829 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1830 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1831 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1832 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1833 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1834 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1835 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1836 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1837 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1838 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1839 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1840 1841 /* 1842 * Compute an approximation of log2(1+arg), where 1+arg is in the 1843 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1844 * function is called, rounding precision is set to 80 and the 1845 * round-to-nearest mode is in effect. arg must not be exactly zero, 1846 * and must not be so close to zero that underflow might occur. 1847 */ 1848 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1849 uint64_t *sig0, uint64_t *sig1) 1850 { 1851 uint64_t arg0_sig = extractFloatx80Frac(arg); 1852 int32_t arg0_exp = extractFloatx80Exp(arg); 1853 bool arg0_sign = extractFloatx80Sign(arg); 1854 bool asign; 1855 int32_t dexp, texp, aexp; 1856 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1857 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1858 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1859 floatx80 t2, accum; 1860 1861 /* 1862 * Compute an approximation of arg/(2+arg), with extra precision, 1863 * as the argument to a polynomial approximation. The extra 1864 * precision is only needed for the first term of the 1865 * approximation, with subsequent terms being significantly 1866 * smaller; the approximation only uses odd exponents, and the 1867 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1868 */ 1869 if (arg0_sign) { 1870 dexp = 0x3fff; 1871 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1872 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1873 } else { 1874 dexp = 0x4000; 1875 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1876 dsig0 |= 0x8000000000000000ULL; 1877 } 1878 texp = arg0_exp - dexp + 0x3ffe; 1879 rsig0 = arg0_sig; 1880 rsig1 = 0; 1881 rsig2 = 0; 1882 if (dsig0 <= rsig0) { 1883 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1884 ++texp; 1885 } 1886 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1887 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1888 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1889 &rsig0, &rsig1, &rsig2); 1890 while ((int64_t) rsig0 < 0) { 1891 --tsig0; 1892 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1893 &rsig0, &rsig1, &rsig2); 1894 } 1895 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1896 /* 1897 * No need to correct any estimation error in tsig1; even with 1898 * such error, it is accurate enough. Now compute the square of 1899 * that approximation. 1900 */ 1901 mul128To256(tsig0, tsig1, tsig0, tsig1, 1902 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1903 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1904 texp + texp - 0x3ffe, 1905 t2sig0, t2sig1, &env->fp_status); 1906 1907 /* Compute the lower parts of the polynomial expansion. */ 1908 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1909 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1910 accum = floatx80_mul(accum, t2, &env->fp_status); 1911 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1912 accum = floatx80_mul(accum, t2, &env->fp_status); 1913 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1914 accum = floatx80_mul(accum, t2, &env->fp_status); 1915 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1916 accum = floatx80_mul(accum, t2, &env->fp_status); 1917 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1918 accum = floatx80_mul(accum, t2, &env->fp_status); 1919 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1920 accum = floatx80_mul(accum, t2, &env->fp_status); 1921 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1922 accum = floatx80_mul(accum, t2, &env->fp_status); 1923 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1924 accum = floatx80_mul(accum, t2, &env->fp_status); 1925 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1926 1927 /* 1928 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1929 * accum has much lower magnitude, and so, in particular, carry 1930 * out of the addition is not possible), multiplied by t. (This 1931 * expansion is only accurate to about 70 bits, not 128 bits.) 1932 */ 1933 aexp = extractFloatx80Exp(fyl2x_coeff_0); 1934 asign = extractFloatx80Sign(fyl2x_coeff_0); 1935 shift128RightJamming(extractFloatx80Frac(accum), 0, 1936 aexp - extractFloatx80Exp(accum), 1937 &asig0, &asig1); 1938 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1939 bsig1 = 0; 1940 if (asign == extractFloatx80Sign(accum)) { 1941 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1942 } else { 1943 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1944 } 1945 /* Multiply by t to compute the required result. */ 1946 mul128To256(asig0, asig1, tsig0, tsig1, 1947 &asig0, &asig1, &asig2, &asig3); 1948 aexp += texp - 0x3ffe; 1949 *exp = aexp; 1950 *sig0 = asig0; 1951 *sig1 = asig1; 1952 } 1953 1954 void helper_fyl2xp1(CPUX86State *env) 1955 { 1956 uint8_t old_flags = save_exception_flags(env); 1957 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1958 int32_t arg0_exp = extractFloatx80Exp(ST0); 1959 bool arg0_sign = extractFloatx80Sign(ST0); 1960 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1961 int32_t arg1_exp = extractFloatx80Exp(ST1); 1962 bool arg1_sign = extractFloatx80Sign(ST1); 1963 1964 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1965 float_raise(float_flag_invalid, &env->fp_status); 1966 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1967 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1968 float_raise(float_flag_invalid, &env->fp_status); 1969 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1970 } else if (floatx80_invalid_encoding(ST0) || 1971 floatx80_invalid_encoding(ST1)) { 1972 float_raise(float_flag_invalid, &env->fp_status); 1973 ST1 = floatx80_default_nan(&env->fp_status); 1974 } else if (floatx80_is_any_nan(ST0)) { 1975 ST1 = ST0; 1976 } else if (floatx80_is_any_nan(ST1)) { 1977 /* Pass this NaN through. */ 1978 } else if (arg0_exp > 0x3ffd || 1979 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 1980 0x95f619980c4336f7ULL : 1981 0xd413cccfe7799211ULL))) { 1982 /* 1983 * Out of range for the instruction (ST0 must have absolute 1984 * value less than 1 - sqrt(2)/2 = 0.292..., according to 1985 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 1986 * to sqrt(2) - 1, which we allow here), treat as invalid. 1987 */ 1988 float_raise(float_flag_invalid, &env->fp_status); 1989 ST1 = floatx80_default_nan(&env->fp_status); 1990 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1991 arg1_exp == 0x7fff) { 1992 /* 1993 * One argument is zero, or multiplying by infinity; correct 1994 * result is exact and can be obtained by multiplying the 1995 * arguments. 1996 */ 1997 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 1998 } else if (arg0_exp < 0x3fb0) { 1999 /* 2000 * Multiplying both arguments and an extra-precision version 2001 * of log2(e) is sufficiently precise. 2002 */ 2003 uint64_t sig0, sig1, sig2; 2004 int32_t exp; 2005 if (arg0_exp == 0) { 2006 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2007 } 2008 if (arg1_exp == 0) { 2009 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2010 } 2011 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2012 &sig0, &sig1, &sig2); 2013 exp = arg0_exp + 1; 2014 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2015 exp += arg1_exp - 0x3ffe; 2016 /* This result is inexact. */ 2017 sig1 |= 1; 2018 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2019 arg0_sign ^ arg1_sign, exp, 2020 sig0, sig1, &env->fp_status); 2021 } else { 2022 int32_t aexp; 2023 uint64_t asig0, asig1, asig2; 2024 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2025 FloatX80RoundPrec save_prec = 2026 env->fp_status.floatx80_rounding_precision; 2027 env->fp_status.float_rounding_mode = float_round_nearest_even; 2028 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2029 2030 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2031 /* 2032 * Multiply by the second argument to compute the required 2033 * result. 2034 */ 2035 if (arg1_exp == 0) { 2036 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2037 } 2038 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2039 aexp += arg1_exp - 0x3ffe; 2040 /* This result is inexact. */ 2041 asig1 |= 1; 2042 env->fp_status.float_rounding_mode = save_mode; 2043 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2044 arg0_sign ^ arg1_sign, aexp, 2045 asig0, asig1, &env->fp_status); 2046 env->fp_status.floatx80_rounding_precision = save_prec; 2047 } 2048 fpop(env); 2049 merge_exception_flags(env, old_flags); 2050 } 2051 2052 void helper_fyl2x(CPUX86State *env) 2053 { 2054 uint8_t old_flags = save_exception_flags(env); 2055 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2056 int32_t arg0_exp = extractFloatx80Exp(ST0); 2057 bool arg0_sign = extractFloatx80Sign(ST0); 2058 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2059 int32_t arg1_exp = extractFloatx80Exp(ST1); 2060 bool arg1_sign = extractFloatx80Sign(ST1); 2061 2062 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2063 float_raise(float_flag_invalid, &env->fp_status); 2064 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2065 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2066 float_raise(float_flag_invalid, &env->fp_status); 2067 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2068 } else if (floatx80_invalid_encoding(ST0) || 2069 floatx80_invalid_encoding(ST1)) { 2070 float_raise(float_flag_invalid, &env->fp_status); 2071 ST1 = floatx80_default_nan(&env->fp_status); 2072 } else if (floatx80_is_any_nan(ST0)) { 2073 ST1 = ST0; 2074 } else if (floatx80_is_any_nan(ST1)) { 2075 /* Pass this NaN through. */ 2076 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2077 float_raise(float_flag_invalid, &env->fp_status); 2078 ST1 = floatx80_default_nan(&env->fp_status); 2079 } else if (floatx80_is_infinity(ST1)) { 2080 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2081 &env->fp_status); 2082 switch (cmp) { 2083 case float_relation_less: 2084 ST1 = floatx80_chs(ST1); 2085 break; 2086 case float_relation_greater: 2087 /* Result is infinity of the same sign as ST1. */ 2088 break; 2089 default: 2090 float_raise(float_flag_invalid, &env->fp_status); 2091 ST1 = floatx80_default_nan(&env->fp_status); 2092 break; 2093 } 2094 } else if (floatx80_is_infinity(ST0)) { 2095 if (floatx80_is_zero(ST1)) { 2096 float_raise(float_flag_invalid, &env->fp_status); 2097 ST1 = floatx80_default_nan(&env->fp_status); 2098 } else if (arg1_sign) { 2099 ST1 = floatx80_chs(ST0); 2100 } else { 2101 ST1 = ST0; 2102 } 2103 } else if (floatx80_is_zero(ST0)) { 2104 if (floatx80_is_zero(ST1)) { 2105 float_raise(float_flag_invalid, &env->fp_status); 2106 ST1 = floatx80_default_nan(&env->fp_status); 2107 } else { 2108 /* Result is infinity with opposite sign to ST1. */ 2109 float_raise(float_flag_divbyzero, &env->fp_status); 2110 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2111 0x8000000000000000ULL); 2112 } 2113 } else if (floatx80_is_zero(ST1)) { 2114 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2115 ST1 = floatx80_chs(ST1); 2116 } 2117 /* Otherwise, ST1 is already the correct result. */ 2118 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2119 if (arg1_sign) { 2120 ST1 = floatx80_chs(floatx80_zero); 2121 } else { 2122 ST1 = floatx80_zero; 2123 } 2124 } else { 2125 int32_t int_exp; 2126 floatx80 arg0_m1; 2127 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2128 FloatX80RoundPrec save_prec = 2129 env->fp_status.floatx80_rounding_precision; 2130 env->fp_status.float_rounding_mode = float_round_nearest_even; 2131 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2132 2133 if (arg0_exp == 0) { 2134 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2135 } 2136 if (arg1_exp == 0) { 2137 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2138 } 2139 int_exp = arg0_exp - 0x3fff; 2140 if (arg0_sig > 0xb504f333f9de6484ULL) { 2141 ++int_exp; 2142 } 2143 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2144 &env->fp_status), 2145 floatx80_one, &env->fp_status); 2146 if (floatx80_is_zero(arg0_m1)) { 2147 /* Exact power of 2; multiply by ST1. */ 2148 env->fp_status.float_rounding_mode = save_mode; 2149 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2150 ST1, &env->fp_status); 2151 } else { 2152 bool asign = extractFloatx80Sign(arg0_m1); 2153 int32_t aexp; 2154 uint64_t asig0, asig1, asig2; 2155 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2156 if (int_exp != 0) { 2157 bool isign = (int_exp < 0); 2158 int32_t iexp; 2159 uint64_t isig; 2160 int shift; 2161 int_exp = isign ? -int_exp : int_exp; 2162 shift = clz32(int_exp) + 32; 2163 isig = int_exp; 2164 isig <<= shift; 2165 iexp = 0x403e - shift; 2166 shift128RightJamming(asig0, asig1, iexp - aexp, 2167 &asig0, &asig1); 2168 if (asign == isign) { 2169 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2170 } else { 2171 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2172 } 2173 aexp = iexp; 2174 asign = isign; 2175 } 2176 /* 2177 * Multiply by the second argument to compute the required 2178 * result. 2179 */ 2180 if (arg1_exp == 0) { 2181 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2182 } 2183 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2184 aexp += arg1_exp - 0x3ffe; 2185 /* This result is inexact. */ 2186 asig1 |= 1; 2187 env->fp_status.float_rounding_mode = save_mode; 2188 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2189 asign ^ arg1_sign, aexp, 2190 asig0, asig1, &env->fp_status); 2191 } 2192 2193 env->fp_status.floatx80_rounding_precision = save_prec; 2194 } 2195 fpop(env); 2196 merge_exception_flags(env, old_flags); 2197 } 2198 2199 void helper_fsqrt(CPUX86State *env) 2200 { 2201 uint8_t old_flags = save_exception_flags(env); 2202 if (floatx80_is_neg(ST0)) { 2203 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2204 env->fpus |= 0x400; 2205 } 2206 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2207 merge_exception_flags(env, old_flags); 2208 } 2209 2210 void helper_fsincos(CPUX86State *env) 2211 { 2212 double fptemp = floatx80_to_double(env, ST0); 2213 2214 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2215 env->fpus |= 0x400; 2216 } else { 2217 ST0 = double_to_floatx80(env, sin(fptemp)); 2218 fpush(env); 2219 ST0 = double_to_floatx80(env, cos(fptemp)); 2220 env->fpus &= ~0x400; /* C2 <-- 0 */ 2221 /* the above code is for |arg| < 2**63 only */ 2222 } 2223 } 2224 2225 void helper_frndint(CPUX86State *env) 2226 { 2227 uint8_t old_flags = save_exception_flags(env); 2228 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2229 merge_exception_flags(env, old_flags); 2230 } 2231 2232 void helper_fscale(CPUX86State *env) 2233 { 2234 uint8_t old_flags = save_exception_flags(env); 2235 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2236 float_raise(float_flag_invalid, &env->fp_status); 2237 ST0 = floatx80_default_nan(&env->fp_status); 2238 } else if (floatx80_is_any_nan(ST1)) { 2239 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2240 float_raise(float_flag_invalid, &env->fp_status); 2241 } 2242 ST0 = ST1; 2243 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2244 float_raise(float_flag_invalid, &env->fp_status); 2245 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2246 } 2247 } else if (floatx80_is_infinity(ST1) && 2248 !floatx80_invalid_encoding(ST0) && 2249 !floatx80_is_any_nan(ST0)) { 2250 if (floatx80_is_neg(ST1)) { 2251 if (floatx80_is_infinity(ST0)) { 2252 float_raise(float_flag_invalid, &env->fp_status); 2253 ST0 = floatx80_default_nan(&env->fp_status); 2254 } else { 2255 ST0 = (floatx80_is_neg(ST0) ? 2256 floatx80_chs(floatx80_zero) : 2257 floatx80_zero); 2258 } 2259 } else { 2260 if (floatx80_is_zero(ST0)) { 2261 float_raise(float_flag_invalid, &env->fp_status); 2262 ST0 = floatx80_default_nan(&env->fp_status); 2263 } else { 2264 ST0 = (floatx80_is_neg(ST0) ? 2265 floatx80_chs(floatx80_infinity) : 2266 floatx80_infinity); 2267 } 2268 } 2269 } else { 2270 int n; 2271 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2272 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2273 set_float_exception_flags(0, &env->fp_status); 2274 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2275 set_float_exception_flags(save_flags, &env->fp_status); 2276 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2277 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2278 env->fp_status.floatx80_rounding_precision = save; 2279 } 2280 merge_exception_flags(env, old_flags); 2281 } 2282 2283 void helper_fsin(CPUX86State *env) 2284 { 2285 double fptemp = floatx80_to_double(env, ST0); 2286 2287 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2288 env->fpus |= 0x400; 2289 } else { 2290 ST0 = double_to_floatx80(env, sin(fptemp)); 2291 env->fpus &= ~0x400; /* C2 <-- 0 */ 2292 /* the above code is for |arg| < 2**53 only */ 2293 } 2294 } 2295 2296 void helper_fcos(CPUX86State *env) 2297 { 2298 double fptemp = floatx80_to_double(env, ST0); 2299 2300 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2301 env->fpus |= 0x400; 2302 } else { 2303 ST0 = double_to_floatx80(env, cos(fptemp)); 2304 env->fpus &= ~0x400; /* C2 <-- 0 */ 2305 /* the above code is for |arg| < 2**63 only */ 2306 } 2307 } 2308 2309 void helper_fxam_ST0(CPUX86State *env) 2310 { 2311 CPU_LDoubleU temp; 2312 int expdif; 2313 2314 temp.d = ST0; 2315 2316 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2317 if (SIGND(temp)) { 2318 env->fpus |= 0x200; /* C1 <-- 1 */ 2319 } 2320 2321 if (env->fptags[env->fpstt]) { 2322 env->fpus |= 0x4100; /* Empty */ 2323 return; 2324 } 2325 2326 expdif = EXPD(temp); 2327 if (expdif == MAXEXPD) { 2328 if (MANTD(temp) == 0x8000000000000000ULL) { 2329 env->fpus |= 0x500; /* Infinity */ 2330 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2331 env->fpus |= 0x100; /* NaN */ 2332 } 2333 } else if (expdif == 0) { 2334 if (MANTD(temp) == 0) { 2335 env->fpus |= 0x4000; /* Zero */ 2336 } else { 2337 env->fpus |= 0x4400; /* Denormal */ 2338 } 2339 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2340 env->fpus |= 0x400; 2341 } 2342 } 2343 2344 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 2345 uintptr_t retaddr) 2346 { 2347 int fpus, fptag, exp, i; 2348 uint64_t mant; 2349 CPU_LDoubleU tmp; 2350 2351 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2352 fptag = 0; 2353 for (i = 7; i >= 0; i--) { 2354 fptag <<= 2; 2355 if (env->fptags[i]) { 2356 fptag |= 3; 2357 } else { 2358 tmp.d = env->fpregs[i].d; 2359 exp = EXPD(tmp); 2360 mant = MANTD(tmp); 2361 if (exp == 0 && mant == 0) { 2362 /* zero */ 2363 fptag |= 1; 2364 } else if (exp == 0 || exp == MAXEXPD 2365 || (mant & (1LL << 63)) == 0) { 2366 /* NaNs, infinity, denormal */ 2367 fptag |= 2; 2368 } 2369 } 2370 } 2371 if (data32) { 2372 /* 32 bit */ 2373 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 2374 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 2375 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 2376 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */ 2377 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */ 2378 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */ 2379 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */ 2380 } else { 2381 /* 16 bit */ 2382 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 2383 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 2384 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 2385 cpu_stw_data_ra(env, ptr + 6, 0, retaddr); 2386 cpu_stw_data_ra(env, ptr + 8, 0, retaddr); 2387 cpu_stw_data_ra(env, ptr + 10, 0, retaddr); 2388 cpu_stw_data_ra(env, ptr + 12, 0, retaddr); 2389 } 2390 } 2391 2392 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2393 { 2394 do_fstenv(env, ptr, data32, GETPC()); 2395 } 2396 2397 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2398 { 2399 env->fpstt = (fpus >> 11) & 7; 2400 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2401 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2402 #if !defined(CONFIG_USER_ONLY) 2403 if (!(env->fpus & FPUS_SE)) { 2404 /* 2405 * Here the processor deasserts FERR#; in response, the chipset deasserts 2406 * IGNNE#. 2407 */ 2408 cpu_clear_ignne(); 2409 } 2410 #endif 2411 } 2412 2413 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 2414 uintptr_t retaddr) 2415 { 2416 int i, fpus, fptag; 2417 2418 if (data32) { 2419 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2420 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2421 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 2422 } else { 2423 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2424 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 2425 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2426 } 2427 cpu_set_fpus(env, fpus); 2428 for (i = 0; i < 8; i++) { 2429 env->fptags[i] = ((fptag & 3) == 3); 2430 fptag >>= 2; 2431 } 2432 } 2433 2434 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2435 { 2436 do_fldenv(env, ptr, data32, GETPC()); 2437 } 2438 2439 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, 2440 uintptr_t retaddr) 2441 { 2442 floatx80 tmp; 2443 int i; 2444 2445 do_fstenv(env, ptr, data32, retaddr); 2446 2447 ptr += (14 << data32); 2448 for (i = 0; i < 8; i++) { 2449 tmp = ST(i); 2450 do_fstt(env, tmp, ptr, retaddr); 2451 ptr += 10; 2452 } 2453 2454 /* fninit */ 2455 env->fpus = 0; 2456 env->fpstt = 0; 2457 cpu_set_fpuc(env, 0x37f); 2458 env->fptags[0] = 1; 2459 env->fptags[1] = 1; 2460 env->fptags[2] = 1; 2461 env->fptags[3] = 1; 2462 env->fptags[4] = 1; 2463 env->fptags[5] = 1; 2464 env->fptags[6] = 1; 2465 env->fptags[7] = 1; 2466 } 2467 2468 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2469 { 2470 do_fsave(env, ptr, data32, GETPC()); 2471 } 2472 2473 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, 2474 uintptr_t retaddr) 2475 { 2476 floatx80 tmp; 2477 int i; 2478 2479 do_fldenv(env, ptr, data32, retaddr); 2480 ptr += (14 << data32); 2481 2482 for (i = 0; i < 8; i++) { 2483 tmp = do_fldt(env, ptr, retaddr); 2484 ST(i) = tmp; 2485 ptr += 10; 2486 } 2487 } 2488 2489 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2490 { 2491 do_frstor(env, ptr, data32, GETPC()); 2492 } 2493 2494 #if defined(CONFIG_USER_ONLY) 2495 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 2496 { 2497 do_fsave(env, ptr, data32, 0); 2498 } 2499 2500 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 2501 { 2502 do_frstor(env, ptr, data32, 0); 2503 } 2504 #endif 2505 2506 #define XO(X) offsetof(X86XSaveArea, X) 2507 2508 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2509 { 2510 int fpus, fptag, i; 2511 target_ulong addr; 2512 2513 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2514 fptag = 0; 2515 for (i = 0; i < 8; i++) { 2516 fptag |= (env->fptags[i] << i); 2517 } 2518 2519 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 2520 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 2521 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 2522 2523 /* In 32-bit mode this is eip, sel, dp, sel. 2524 In 64-bit mode this is rip, rdp. 2525 But in either case we don't write actual data, just zeros. */ 2526 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 2527 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 2528 2529 addr = ptr + XO(legacy.fpregs); 2530 for (i = 0; i < 8; i++) { 2531 floatx80 tmp = ST(i); 2532 do_fstt(env, tmp, addr, ra); 2533 addr += 16; 2534 } 2535 } 2536 2537 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2538 { 2539 update_mxcsr_from_sse_status(env); 2540 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 2541 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 2542 } 2543 2544 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2545 { 2546 int i, nb_xmm_regs; 2547 target_ulong addr; 2548 2549 if (env->hflags & HF_CS64_MASK) { 2550 nb_xmm_regs = 16; 2551 } else { 2552 nb_xmm_regs = 8; 2553 } 2554 2555 addr = ptr + XO(legacy.xmm_regs); 2556 for (i = 0; i < nb_xmm_regs; i++) { 2557 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 2558 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 2559 addr += 16; 2560 } 2561 } 2562 2563 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2564 { 2565 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2566 int i; 2567 2568 for (i = 0; i < 4; i++, addr += 16) { 2569 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 2570 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 2571 } 2572 } 2573 2574 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2575 { 2576 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2577 env->bndcs_regs.cfgu, ra); 2578 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2579 env->bndcs_regs.sts, ra); 2580 } 2581 2582 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2583 { 2584 cpu_stq_data_ra(env, ptr, env->pkru, ra); 2585 } 2586 2587 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2588 { 2589 /* The operand must be 16 byte aligned */ 2590 if (ptr & 0xf) { 2591 raise_exception_ra(env, EXCP0D_GPF, ra); 2592 } 2593 2594 do_xsave_fpu(env, ptr, ra); 2595 2596 if (env->cr[4] & CR4_OSFXSR_MASK) { 2597 do_xsave_mxcsr(env, ptr, ra); 2598 /* Fast FXSAVE leaves out the XMM registers */ 2599 if (!(env->efer & MSR_EFER_FFXSR) 2600 || (env->hflags & HF_CPL_MASK) 2601 || !(env->hflags & HF_LMA_MASK)) { 2602 do_xsave_sse(env, ptr, ra); 2603 } 2604 } 2605 } 2606 2607 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2608 { 2609 do_fxsave(env, ptr, GETPC()); 2610 } 2611 2612 static uint64_t get_xinuse(CPUX86State *env) 2613 { 2614 uint64_t inuse = -1; 2615 2616 /* For the most part, we don't track XINUSE. We could calculate it 2617 here for all components, but it's probably less work to simply 2618 indicate in use. That said, the state of BNDREGS is important 2619 enough to track in HFLAGS, so we might as well use that here. */ 2620 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2621 inuse &= ~XSTATE_BNDREGS_MASK; 2622 } 2623 return inuse; 2624 } 2625 2626 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2627 uint64_t inuse, uint64_t opt, uintptr_t ra) 2628 { 2629 uint64_t old_bv, new_bv; 2630 2631 /* The OS must have enabled XSAVE. */ 2632 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2633 raise_exception_ra(env, EXCP06_ILLOP, ra); 2634 } 2635 2636 /* The operand must be 64 byte aligned. */ 2637 if (ptr & 63) { 2638 raise_exception_ra(env, EXCP0D_GPF, ra); 2639 } 2640 2641 /* Never save anything not enabled by XCR0. */ 2642 rfbm &= env->xcr0; 2643 opt &= rfbm; 2644 2645 if (opt & XSTATE_FP_MASK) { 2646 do_xsave_fpu(env, ptr, ra); 2647 } 2648 if (rfbm & XSTATE_SSE_MASK) { 2649 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2650 do_xsave_mxcsr(env, ptr, ra); 2651 } 2652 if (opt & XSTATE_SSE_MASK) { 2653 do_xsave_sse(env, ptr, ra); 2654 } 2655 if (opt & XSTATE_BNDREGS_MASK) { 2656 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 2657 } 2658 if (opt & XSTATE_BNDCSR_MASK) { 2659 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 2660 } 2661 if (opt & XSTATE_PKRU_MASK) { 2662 do_xsave_pkru(env, ptr + XO(pkru_state), ra); 2663 } 2664 2665 /* Update the XSTATE_BV field. */ 2666 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2667 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2668 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 2669 } 2670 2671 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2672 { 2673 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 2674 } 2675 2676 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2677 { 2678 uint64_t inuse = get_xinuse(env); 2679 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2680 } 2681 2682 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2683 { 2684 int i, fpuc, fpus, fptag; 2685 target_ulong addr; 2686 2687 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 2688 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 2689 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 2690 cpu_set_fpuc(env, fpuc); 2691 cpu_set_fpus(env, fpus); 2692 fptag ^= 0xff; 2693 for (i = 0; i < 8; i++) { 2694 env->fptags[i] = ((fptag >> i) & 1); 2695 } 2696 2697 addr = ptr + XO(legacy.fpregs); 2698 for (i = 0; i < 8; i++) { 2699 floatx80 tmp = do_fldt(env, addr, ra); 2700 ST(i) = tmp; 2701 addr += 16; 2702 } 2703 } 2704 2705 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2706 { 2707 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 2708 } 2709 2710 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2711 { 2712 int i, nb_xmm_regs; 2713 target_ulong addr; 2714 2715 if (env->hflags & HF_CS64_MASK) { 2716 nb_xmm_regs = 16; 2717 } else { 2718 nb_xmm_regs = 8; 2719 } 2720 2721 addr = ptr + XO(legacy.xmm_regs); 2722 for (i = 0; i < nb_xmm_regs; i++) { 2723 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 2724 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 2725 addr += 16; 2726 } 2727 } 2728 2729 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2730 { 2731 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2732 int i; 2733 2734 for (i = 0; i < 4; i++, addr += 16) { 2735 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 2736 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 2737 } 2738 } 2739 2740 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2741 { 2742 /* FIXME: Extend highest implemented bit of linear address. */ 2743 env->bndcs_regs.cfgu 2744 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 2745 env->bndcs_regs.sts 2746 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 2747 } 2748 2749 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2750 { 2751 env->pkru = cpu_ldq_data_ra(env, ptr, ra); 2752 } 2753 2754 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2755 { 2756 /* The operand must be 16 byte aligned */ 2757 if (ptr & 0xf) { 2758 raise_exception_ra(env, EXCP0D_GPF, ra); 2759 } 2760 2761 do_xrstor_fpu(env, ptr, ra); 2762 2763 if (env->cr[4] & CR4_OSFXSR_MASK) { 2764 do_xrstor_mxcsr(env, ptr, ra); 2765 /* Fast FXRSTOR leaves out the XMM registers */ 2766 if (!(env->efer & MSR_EFER_FFXSR) 2767 || (env->hflags & HF_CPL_MASK) 2768 || !(env->hflags & HF_LMA_MASK)) { 2769 do_xrstor_sse(env, ptr, ra); 2770 } 2771 } 2772 } 2773 2774 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2775 { 2776 do_fxrstor(env, ptr, GETPC()); 2777 } 2778 2779 #if defined(CONFIG_USER_ONLY) 2780 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 2781 { 2782 do_fxsave(env, ptr, 0); 2783 } 2784 2785 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 2786 { 2787 do_fxrstor(env, ptr, 0); 2788 } 2789 #endif 2790 2791 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2792 { 2793 uintptr_t ra = GETPC(); 2794 uint64_t xstate_bv, xcomp_bv, reserve0; 2795 2796 rfbm &= env->xcr0; 2797 2798 /* The OS must have enabled XSAVE. */ 2799 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2800 raise_exception_ra(env, EXCP06_ILLOP, ra); 2801 } 2802 2803 /* The operand must be 64 byte aligned. */ 2804 if (ptr & 63) { 2805 raise_exception_ra(env, EXCP0D_GPF, ra); 2806 } 2807 2808 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2809 2810 if ((int64_t)xstate_bv < 0) { 2811 /* FIXME: Compact form. */ 2812 raise_exception_ra(env, EXCP0D_GPF, ra); 2813 } 2814 2815 /* Standard form. */ 2816 2817 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2818 if (xstate_bv & ~env->xcr0) { 2819 raise_exception_ra(env, EXCP0D_GPF, ra); 2820 } 2821 2822 /* The XCOMP_BV field must be zero. Note that, as of the April 2016 2823 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 2824 describes only XCOMP_BV, but the description of the standard form 2825 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 2826 includes the next 64-bit field. */ 2827 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 2828 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 2829 if (xcomp_bv || reserve0) { 2830 raise_exception_ra(env, EXCP0D_GPF, ra); 2831 } 2832 2833 if (rfbm & XSTATE_FP_MASK) { 2834 if (xstate_bv & XSTATE_FP_MASK) { 2835 do_xrstor_fpu(env, ptr, ra); 2836 } else { 2837 helper_fninit(env); 2838 memset(env->fpregs, 0, sizeof(env->fpregs)); 2839 } 2840 } 2841 if (rfbm & XSTATE_SSE_MASK) { 2842 /* Note that the standard form of XRSTOR loads MXCSR from memory 2843 whether or not the XSTATE_BV bit is set. */ 2844 do_xrstor_mxcsr(env, ptr, ra); 2845 if (xstate_bv & XSTATE_SSE_MASK) { 2846 do_xrstor_sse(env, ptr, ra); 2847 } else { 2848 /* ??? When AVX is implemented, we may have to be more 2849 selective in the clearing. */ 2850 memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); 2851 } 2852 } 2853 if (rfbm & XSTATE_BNDREGS_MASK) { 2854 if (xstate_bv & XSTATE_BNDREGS_MASK) { 2855 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 2856 env->hflags |= HF_MPX_IU_MASK; 2857 } else { 2858 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2859 env->hflags &= ~HF_MPX_IU_MASK; 2860 } 2861 } 2862 if (rfbm & XSTATE_BNDCSR_MASK) { 2863 if (xstate_bv & XSTATE_BNDCSR_MASK) { 2864 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 2865 } else { 2866 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2867 } 2868 cpu_sync_bndcs_hflags(env); 2869 } 2870 if (rfbm & XSTATE_PKRU_MASK) { 2871 uint64_t old_pkru = env->pkru; 2872 if (xstate_bv & XSTATE_PKRU_MASK) { 2873 do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 2874 } else { 2875 env->pkru = 0; 2876 } 2877 if (env->pkru != old_pkru) { 2878 CPUState *cs = env_cpu(env); 2879 tlb_flush(cs); 2880 } 2881 } 2882 } 2883 2884 #undef XO 2885 2886 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 2887 { 2888 /* The OS must have enabled XSAVE. */ 2889 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2890 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2891 } 2892 2893 switch (ecx) { 2894 case 0: 2895 return env->xcr0; 2896 case 1: 2897 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 2898 return env->xcr0 & get_xinuse(env); 2899 } 2900 break; 2901 } 2902 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2903 } 2904 2905 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 2906 { 2907 uint32_t dummy, ena_lo, ena_hi; 2908 uint64_t ena; 2909 2910 /* The OS must have enabled XSAVE. */ 2911 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2912 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2913 } 2914 2915 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 2916 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 2917 goto do_gpf; 2918 } 2919 2920 /* Disallow enabling unimplemented features. */ 2921 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 2922 ena = ((uint64_t)ena_hi << 32) | ena_lo; 2923 if (mask & ~ena) { 2924 goto do_gpf; 2925 } 2926 2927 /* Disallow enabling only half of MPX. */ 2928 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 2929 & XSTATE_BNDCSR_MASK) { 2930 goto do_gpf; 2931 } 2932 2933 env->xcr0 = mask; 2934 cpu_sync_bndcs_hflags(env); 2935 return; 2936 2937 do_gpf: 2938 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2939 } 2940 2941 /* MMX/SSE */ 2942 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 2943 2944 #define SSE_DAZ 0x0040 2945 #define SSE_RC_MASK 0x6000 2946 #define SSE_RC_NEAR 0x0000 2947 #define SSE_RC_DOWN 0x2000 2948 #define SSE_RC_UP 0x4000 2949 #define SSE_RC_CHOP 0x6000 2950 #define SSE_FZ 0x8000 2951 2952 void update_mxcsr_status(CPUX86State *env) 2953 { 2954 uint32_t mxcsr = env->mxcsr; 2955 int rnd_type; 2956 2957 /* set rounding mode */ 2958 switch (mxcsr & SSE_RC_MASK) { 2959 default: 2960 case SSE_RC_NEAR: 2961 rnd_type = float_round_nearest_even; 2962 break; 2963 case SSE_RC_DOWN: 2964 rnd_type = float_round_down; 2965 break; 2966 case SSE_RC_UP: 2967 rnd_type = float_round_up; 2968 break; 2969 case SSE_RC_CHOP: 2970 rnd_type = float_round_to_zero; 2971 break; 2972 } 2973 set_float_rounding_mode(rnd_type, &env->sse_status); 2974 2975 /* Set exception flags. */ 2976 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 2977 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 2978 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 2979 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 2980 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 2981 &env->sse_status); 2982 2983 /* set denormals are zero */ 2984 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 2985 2986 /* set flush to zero */ 2987 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 2988 } 2989 2990 void update_mxcsr_from_sse_status(CPUX86State *env) 2991 { 2992 uint8_t flags = get_float_exception_flags(&env->sse_status); 2993 /* 2994 * The MXCSR denormal flag has opposite semantics to 2995 * float_flag_input_denormal (the softfloat code sets that flag 2996 * only when flushing input denormals to zero, but SSE sets it 2997 * only when not flushing them to zero), so is not converted 2998 * here. 2999 */ 3000 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3001 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3002 (flags & float_flag_overflow ? FPUS_OE : 0) | 3003 (flags & float_flag_underflow ? FPUS_UE : 0) | 3004 (flags & float_flag_inexact ? FPUS_PE : 0) | 3005 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3006 0)); 3007 } 3008 3009 void helper_update_mxcsr(CPUX86State *env) 3010 { 3011 update_mxcsr_from_sse_status(env); 3012 } 3013 3014 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3015 { 3016 cpu_set_mxcsr(env, val); 3017 } 3018 3019 void helper_enter_mmx(CPUX86State *env) 3020 { 3021 env->fpstt = 0; 3022 *(uint32_t *)(env->fptags) = 0; 3023 *(uint32_t *)(env->fptags + 4) = 0; 3024 } 3025 3026 void helper_emms(CPUX86State *env) 3027 { 3028 /* set to empty state */ 3029 *(uint32_t *)(env->fptags) = 0x01010101; 3030 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3031 } 3032 3033 /* XXX: suppress */ 3034 void helper_movq(CPUX86State *env, void *d, void *s) 3035 { 3036 *(uint64_t *)d = *(uint64_t *)s; 3037 } 3038 3039 #define SHIFT 0 3040 #include "ops_sse.h" 3041 3042 #define SHIFT 1 3043 #include "ops_sse.h" 3044