1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "exec/helper-proto.h" 24 #include "fpu/softfloat.h" 25 #include "fpu/softfloat-macros.h" 26 #include "helper-tcg.h" 27 28 /* float macros */ 29 #define FT0 (env->ft0) 30 #define ST0 (env->fpregs[env->fpstt].d) 31 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 32 #define ST1 ST(1) 33 34 #define FPU_RC_MASK 0xc00 35 #define FPU_RC_NEAR 0x000 36 #define FPU_RC_DOWN 0x400 37 #define FPU_RC_UP 0x800 38 #define FPU_RC_CHOP 0xc00 39 40 #define MAXTAN 9223372036854775808.0 41 42 /* the following deal with x86 long double-precision numbers */ 43 #define MAXEXPD 0x7fff 44 #define EXPBIAS 16383 45 #define EXPD(fp) (fp.l.upper & 0x7fff) 46 #define SIGND(fp) ((fp.l.upper) & 0x8000) 47 #define MANTD(fp) (fp.l.lower) 48 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 49 50 #define FPUS_IE (1 << 0) 51 #define FPUS_DE (1 << 1) 52 #define FPUS_ZE (1 << 2) 53 #define FPUS_OE (1 << 3) 54 #define FPUS_UE (1 << 4) 55 #define FPUS_PE (1 << 5) 56 #define FPUS_SF (1 << 6) 57 #define FPUS_SE (1 << 7) 58 #define FPUS_B (1 << 15) 59 60 #define FPUC_EM 0x3f 61 62 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 63 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 64 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 65 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 66 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 67 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 68 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 69 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 70 71 static inline void fpush(CPUX86State *env) 72 { 73 env->fpstt = (env->fpstt - 1) & 7; 74 env->fptags[env->fpstt] = 0; /* validate stack entry */ 75 } 76 77 static inline void fpop(CPUX86State *env) 78 { 79 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 80 env->fpstt = (env->fpstt + 1) & 7; 81 } 82 83 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) 84 { 85 CPU_LDoubleU temp; 86 87 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 88 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 89 return temp.d; 90 } 91 92 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 93 uintptr_t retaddr) 94 { 95 CPU_LDoubleU temp; 96 97 temp.d = f; 98 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 99 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 100 } 101 102 /* x87 FPU helpers */ 103 104 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 105 { 106 union { 107 float64 f64; 108 double d; 109 } u; 110 111 u.f64 = floatx80_to_float64(a, &env->fp_status); 112 return u.d; 113 } 114 115 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 116 { 117 union { 118 float64 f64; 119 double d; 120 } u; 121 122 u.d = a; 123 return float64_to_floatx80(u.f64, &env->fp_status); 124 } 125 126 static void fpu_set_exception(CPUX86State *env, int mask) 127 { 128 env->fpus |= mask; 129 if (env->fpus & (~env->fpuc & FPUC_EM)) { 130 env->fpus |= FPUS_SE | FPUS_B; 131 } 132 } 133 134 static inline uint8_t save_exception_flags(CPUX86State *env) 135 { 136 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 137 set_float_exception_flags(0, &env->fp_status); 138 return old_flags; 139 } 140 141 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 142 { 143 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 144 float_raise(old_flags, &env->fp_status); 145 fpu_set_exception(env, 146 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 147 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 148 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 149 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 150 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 151 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 152 } 153 154 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 155 { 156 uint8_t old_flags = save_exception_flags(env); 157 floatx80 ret = floatx80_div(a, b, &env->fp_status); 158 merge_exception_flags(env, old_flags); 159 return ret; 160 } 161 162 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 163 { 164 if (env->cr[0] & CR0_NE_MASK) { 165 raise_exception_ra(env, EXCP10_COPR, retaddr); 166 } 167 #if !defined(CONFIG_USER_ONLY) 168 else { 169 fpu_check_raise_ferr_irq(env); 170 } 171 #endif 172 } 173 174 void helper_flds_FT0(CPUX86State *env, uint32_t val) 175 { 176 uint8_t old_flags = save_exception_flags(env); 177 union { 178 float32 f; 179 uint32_t i; 180 } u; 181 182 u.i = val; 183 FT0 = float32_to_floatx80(u.f, &env->fp_status); 184 merge_exception_flags(env, old_flags); 185 } 186 187 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 188 { 189 uint8_t old_flags = save_exception_flags(env); 190 union { 191 float64 f; 192 uint64_t i; 193 } u; 194 195 u.i = val; 196 FT0 = float64_to_floatx80(u.f, &env->fp_status); 197 merge_exception_flags(env, old_flags); 198 } 199 200 void helper_fildl_FT0(CPUX86State *env, int32_t val) 201 { 202 FT0 = int32_to_floatx80(val, &env->fp_status); 203 } 204 205 void helper_flds_ST0(CPUX86State *env, uint32_t val) 206 { 207 uint8_t old_flags = save_exception_flags(env); 208 int new_fpstt; 209 union { 210 float32 f; 211 uint32_t i; 212 } u; 213 214 new_fpstt = (env->fpstt - 1) & 7; 215 u.i = val; 216 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 217 env->fpstt = new_fpstt; 218 env->fptags[new_fpstt] = 0; /* validate stack entry */ 219 merge_exception_flags(env, old_flags); 220 } 221 222 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 223 { 224 uint8_t old_flags = save_exception_flags(env); 225 int new_fpstt; 226 union { 227 float64 f; 228 uint64_t i; 229 } u; 230 231 new_fpstt = (env->fpstt - 1) & 7; 232 u.i = val; 233 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 234 env->fpstt = new_fpstt; 235 env->fptags[new_fpstt] = 0; /* validate stack entry */ 236 merge_exception_flags(env, old_flags); 237 } 238 239 void helper_fildl_ST0(CPUX86State *env, int32_t val) 240 { 241 int new_fpstt; 242 243 new_fpstt = (env->fpstt - 1) & 7; 244 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 245 env->fpstt = new_fpstt; 246 env->fptags[new_fpstt] = 0; /* validate stack entry */ 247 } 248 249 void helper_fildll_ST0(CPUX86State *env, int64_t val) 250 { 251 int new_fpstt; 252 253 new_fpstt = (env->fpstt - 1) & 7; 254 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 255 env->fpstt = new_fpstt; 256 env->fptags[new_fpstt] = 0; /* validate stack entry */ 257 } 258 259 uint32_t helper_fsts_ST0(CPUX86State *env) 260 { 261 uint8_t old_flags = save_exception_flags(env); 262 union { 263 float32 f; 264 uint32_t i; 265 } u; 266 267 u.f = floatx80_to_float32(ST0, &env->fp_status); 268 merge_exception_flags(env, old_flags); 269 return u.i; 270 } 271 272 uint64_t helper_fstl_ST0(CPUX86State *env) 273 { 274 uint8_t old_flags = save_exception_flags(env); 275 union { 276 float64 f; 277 uint64_t i; 278 } u; 279 280 u.f = floatx80_to_float64(ST0, &env->fp_status); 281 merge_exception_flags(env, old_flags); 282 return u.i; 283 } 284 285 int32_t helper_fist_ST0(CPUX86State *env) 286 { 287 uint8_t old_flags = save_exception_flags(env); 288 int32_t val; 289 290 val = floatx80_to_int32(ST0, &env->fp_status); 291 if (val != (int16_t)val) { 292 set_float_exception_flags(float_flag_invalid, &env->fp_status); 293 val = -32768; 294 } 295 merge_exception_flags(env, old_flags); 296 return val; 297 } 298 299 int32_t helper_fistl_ST0(CPUX86State *env) 300 { 301 uint8_t old_flags = save_exception_flags(env); 302 int32_t val; 303 304 val = floatx80_to_int32(ST0, &env->fp_status); 305 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 306 val = 0x80000000; 307 } 308 merge_exception_flags(env, old_flags); 309 return val; 310 } 311 312 int64_t helper_fistll_ST0(CPUX86State *env) 313 { 314 uint8_t old_flags = save_exception_flags(env); 315 int64_t val; 316 317 val = floatx80_to_int64(ST0, &env->fp_status); 318 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 319 val = 0x8000000000000000ULL; 320 } 321 merge_exception_flags(env, old_flags); 322 return val; 323 } 324 325 int32_t helper_fistt_ST0(CPUX86State *env) 326 { 327 uint8_t old_flags = save_exception_flags(env); 328 int32_t val; 329 330 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 331 if (val != (int16_t)val) { 332 set_float_exception_flags(float_flag_invalid, &env->fp_status); 333 val = -32768; 334 } 335 merge_exception_flags(env, old_flags); 336 return val; 337 } 338 339 int32_t helper_fisttl_ST0(CPUX86State *env) 340 { 341 uint8_t old_flags = save_exception_flags(env); 342 int32_t val; 343 344 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 345 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 346 val = 0x80000000; 347 } 348 merge_exception_flags(env, old_flags); 349 return val; 350 } 351 352 int64_t helper_fisttll_ST0(CPUX86State *env) 353 { 354 uint8_t old_flags = save_exception_flags(env); 355 int64_t val; 356 357 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 358 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 359 val = 0x8000000000000000ULL; 360 } 361 merge_exception_flags(env, old_flags); 362 return val; 363 } 364 365 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 366 { 367 int new_fpstt; 368 369 new_fpstt = (env->fpstt - 1) & 7; 370 env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); 371 env->fpstt = new_fpstt; 372 env->fptags[new_fpstt] = 0; /* validate stack entry */ 373 } 374 375 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 376 { 377 do_fstt(env, ST0, ptr, GETPC()); 378 } 379 380 void helper_fpush(CPUX86State *env) 381 { 382 fpush(env); 383 } 384 385 void helper_fpop(CPUX86State *env) 386 { 387 fpop(env); 388 } 389 390 void helper_fdecstp(CPUX86State *env) 391 { 392 env->fpstt = (env->fpstt - 1) & 7; 393 env->fpus &= ~0x4700; 394 } 395 396 void helper_fincstp(CPUX86State *env) 397 { 398 env->fpstt = (env->fpstt + 1) & 7; 399 env->fpus &= ~0x4700; 400 } 401 402 /* FPU move */ 403 404 void helper_ffree_STN(CPUX86State *env, int st_index) 405 { 406 env->fptags[(env->fpstt + st_index) & 7] = 1; 407 } 408 409 void helper_fmov_ST0_FT0(CPUX86State *env) 410 { 411 ST0 = FT0; 412 } 413 414 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 415 { 416 FT0 = ST(st_index); 417 } 418 419 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 420 { 421 ST0 = ST(st_index); 422 } 423 424 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 425 { 426 ST(st_index) = ST0; 427 } 428 429 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 430 { 431 floatx80 tmp; 432 433 tmp = ST(st_index); 434 ST(st_index) = ST0; 435 ST0 = tmp; 436 } 437 438 /* FPU operations */ 439 440 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 441 442 void helper_fcom_ST0_FT0(CPUX86State *env) 443 { 444 uint8_t old_flags = save_exception_flags(env); 445 FloatRelation ret; 446 447 ret = floatx80_compare(ST0, FT0, &env->fp_status); 448 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 449 merge_exception_flags(env, old_flags); 450 } 451 452 void helper_fucom_ST0_FT0(CPUX86State *env) 453 { 454 uint8_t old_flags = save_exception_flags(env); 455 FloatRelation ret; 456 457 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 458 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 459 merge_exception_flags(env, old_flags); 460 } 461 462 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 463 464 void helper_fcomi_ST0_FT0(CPUX86State *env) 465 { 466 uint8_t old_flags = save_exception_flags(env); 467 int eflags; 468 FloatRelation ret; 469 470 ret = floatx80_compare(ST0, FT0, &env->fp_status); 471 eflags = cpu_cc_compute_all(env, CC_OP); 472 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 473 CC_SRC = eflags; 474 merge_exception_flags(env, old_flags); 475 } 476 477 void helper_fucomi_ST0_FT0(CPUX86State *env) 478 { 479 uint8_t old_flags = save_exception_flags(env); 480 int eflags; 481 FloatRelation ret; 482 483 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 484 eflags = cpu_cc_compute_all(env, CC_OP); 485 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 486 CC_SRC = eflags; 487 merge_exception_flags(env, old_flags); 488 } 489 490 void helper_fadd_ST0_FT0(CPUX86State *env) 491 { 492 uint8_t old_flags = save_exception_flags(env); 493 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 494 merge_exception_flags(env, old_flags); 495 } 496 497 void helper_fmul_ST0_FT0(CPUX86State *env) 498 { 499 uint8_t old_flags = save_exception_flags(env); 500 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 501 merge_exception_flags(env, old_flags); 502 } 503 504 void helper_fsub_ST0_FT0(CPUX86State *env) 505 { 506 uint8_t old_flags = save_exception_flags(env); 507 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 508 merge_exception_flags(env, old_flags); 509 } 510 511 void helper_fsubr_ST0_FT0(CPUX86State *env) 512 { 513 uint8_t old_flags = save_exception_flags(env); 514 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 515 merge_exception_flags(env, old_flags); 516 } 517 518 void helper_fdiv_ST0_FT0(CPUX86State *env) 519 { 520 ST0 = helper_fdiv(env, ST0, FT0); 521 } 522 523 void helper_fdivr_ST0_FT0(CPUX86State *env) 524 { 525 ST0 = helper_fdiv(env, FT0, ST0); 526 } 527 528 /* fp operations between STN and ST0 */ 529 530 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 531 { 532 uint8_t old_flags = save_exception_flags(env); 533 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 534 merge_exception_flags(env, old_flags); 535 } 536 537 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 538 { 539 uint8_t old_flags = save_exception_flags(env); 540 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 541 merge_exception_flags(env, old_flags); 542 } 543 544 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 545 { 546 uint8_t old_flags = save_exception_flags(env); 547 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 548 merge_exception_flags(env, old_flags); 549 } 550 551 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 552 { 553 uint8_t old_flags = save_exception_flags(env); 554 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 555 merge_exception_flags(env, old_flags); 556 } 557 558 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 559 { 560 floatx80 *p; 561 562 p = &ST(st_index); 563 *p = helper_fdiv(env, *p, ST0); 564 } 565 566 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 567 { 568 floatx80 *p; 569 570 p = &ST(st_index); 571 *p = helper_fdiv(env, ST0, *p); 572 } 573 574 /* misc FPU operations */ 575 void helper_fchs_ST0(CPUX86State *env) 576 { 577 ST0 = floatx80_chs(ST0); 578 } 579 580 void helper_fabs_ST0(CPUX86State *env) 581 { 582 ST0 = floatx80_abs(ST0); 583 } 584 585 void helper_fld1_ST0(CPUX86State *env) 586 { 587 ST0 = floatx80_one; 588 } 589 590 void helper_fldl2t_ST0(CPUX86State *env) 591 { 592 switch (env->fpuc & FPU_RC_MASK) { 593 case FPU_RC_UP: 594 ST0 = floatx80_l2t_u; 595 break; 596 default: 597 ST0 = floatx80_l2t; 598 break; 599 } 600 } 601 602 void helper_fldl2e_ST0(CPUX86State *env) 603 { 604 switch (env->fpuc & FPU_RC_MASK) { 605 case FPU_RC_DOWN: 606 case FPU_RC_CHOP: 607 ST0 = floatx80_l2e_d; 608 break; 609 default: 610 ST0 = floatx80_l2e; 611 break; 612 } 613 } 614 615 void helper_fldpi_ST0(CPUX86State *env) 616 { 617 switch (env->fpuc & FPU_RC_MASK) { 618 case FPU_RC_DOWN: 619 case FPU_RC_CHOP: 620 ST0 = floatx80_pi_d; 621 break; 622 default: 623 ST0 = floatx80_pi; 624 break; 625 } 626 } 627 628 void helper_fldlg2_ST0(CPUX86State *env) 629 { 630 switch (env->fpuc & FPU_RC_MASK) { 631 case FPU_RC_DOWN: 632 case FPU_RC_CHOP: 633 ST0 = floatx80_lg2_d; 634 break; 635 default: 636 ST0 = floatx80_lg2; 637 break; 638 } 639 } 640 641 void helper_fldln2_ST0(CPUX86State *env) 642 { 643 switch (env->fpuc & FPU_RC_MASK) { 644 case FPU_RC_DOWN: 645 case FPU_RC_CHOP: 646 ST0 = floatx80_ln2_d; 647 break; 648 default: 649 ST0 = floatx80_ln2; 650 break; 651 } 652 } 653 654 void helper_fldz_ST0(CPUX86State *env) 655 { 656 ST0 = floatx80_zero; 657 } 658 659 void helper_fldz_FT0(CPUX86State *env) 660 { 661 FT0 = floatx80_zero; 662 } 663 664 uint32_t helper_fnstsw(CPUX86State *env) 665 { 666 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 667 } 668 669 uint32_t helper_fnstcw(CPUX86State *env) 670 { 671 return env->fpuc; 672 } 673 674 void update_fp_status(CPUX86State *env) 675 { 676 int rnd_type; 677 678 /* set rounding mode */ 679 switch (env->fpuc & FPU_RC_MASK) { 680 default: 681 case FPU_RC_NEAR: 682 rnd_type = float_round_nearest_even; 683 break; 684 case FPU_RC_DOWN: 685 rnd_type = float_round_down; 686 break; 687 case FPU_RC_UP: 688 rnd_type = float_round_up; 689 break; 690 case FPU_RC_CHOP: 691 rnd_type = float_round_to_zero; 692 break; 693 } 694 set_float_rounding_mode(rnd_type, &env->fp_status); 695 switch ((env->fpuc >> 8) & 3) { 696 case 0: 697 rnd_type = 32; 698 break; 699 case 2: 700 rnd_type = 64; 701 break; 702 case 3: 703 default: 704 rnd_type = 80; 705 break; 706 } 707 set_floatx80_rounding_precision(rnd_type, &env->fp_status); 708 } 709 710 void helper_fldcw(CPUX86State *env, uint32_t val) 711 { 712 cpu_set_fpuc(env, val); 713 } 714 715 void helper_fclex(CPUX86State *env) 716 { 717 env->fpus &= 0x7f00; 718 } 719 720 void helper_fwait(CPUX86State *env) 721 { 722 if (env->fpus & FPUS_SE) { 723 fpu_raise_exception(env, GETPC()); 724 } 725 } 726 727 void helper_fninit(CPUX86State *env) 728 { 729 env->fpus = 0; 730 env->fpstt = 0; 731 cpu_set_fpuc(env, 0x37f); 732 env->fptags[0] = 1; 733 env->fptags[1] = 1; 734 env->fptags[2] = 1; 735 env->fptags[3] = 1; 736 env->fptags[4] = 1; 737 env->fptags[5] = 1; 738 env->fptags[6] = 1; 739 env->fptags[7] = 1; 740 } 741 742 /* BCD ops */ 743 744 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 745 { 746 floatx80 tmp; 747 uint64_t val; 748 unsigned int v; 749 int i; 750 751 val = 0; 752 for (i = 8; i >= 0; i--) { 753 v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 754 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 755 } 756 tmp = int64_to_floatx80(val, &env->fp_status); 757 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 758 tmp = floatx80_chs(tmp); 759 } 760 fpush(env); 761 ST0 = tmp; 762 } 763 764 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 765 { 766 uint8_t old_flags = save_exception_flags(env); 767 int v; 768 target_ulong mem_ref, mem_end; 769 int64_t val; 770 CPU_LDoubleU temp; 771 772 temp.d = ST0; 773 774 val = floatx80_to_int64(ST0, &env->fp_status); 775 mem_ref = ptr; 776 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 777 set_float_exception_flags(float_flag_invalid, &env->fp_status); 778 while (mem_ref < ptr + 7) { 779 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 780 } 781 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 782 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 783 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 784 merge_exception_flags(env, old_flags); 785 return; 786 } 787 mem_end = mem_ref + 9; 788 if (SIGND(temp)) { 789 cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 790 val = -val; 791 } else { 792 cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 793 } 794 while (mem_ref < mem_end) { 795 if (val == 0) { 796 break; 797 } 798 v = val % 100; 799 val = val / 100; 800 v = ((v / 10) << 4) | (v % 10); 801 cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 802 } 803 while (mem_ref < mem_end) { 804 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 805 } 806 merge_exception_flags(env, old_flags); 807 } 808 809 /* 128-bit significand of log(2). */ 810 #define ln2_sig_high 0xb17217f7d1cf79abULL 811 #define ln2_sig_low 0xc9e3b39803f2f6afULL 812 813 /* 814 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 815 * the interval [-1/64, 1/64]. 816 */ 817 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 818 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 819 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 820 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 821 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 822 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 823 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 824 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 825 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 826 827 struct f2xm1_data { 828 /* 829 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 830 * are very close to exact floatx80 values. 831 */ 832 floatx80 t; 833 /* The value of 2^t. */ 834 floatx80 exp2; 835 /* The value of 2^t - 1. */ 836 floatx80 exp2m1; 837 }; 838 839 static const struct f2xm1_data f2xm1_table[65] = { 840 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 841 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 842 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 843 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 844 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 845 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 846 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 847 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 848 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 849 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 850 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 851 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 852 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 853 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 854 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 855 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 856 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 857 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 858 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 859 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 860 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 861 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 862 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 863 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 864 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 865 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 866 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 867 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 868 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 869 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 870 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 871 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 872 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 873 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 874 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 875 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 876 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 877 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 878 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 879 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 880 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 881 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 882 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 883 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 884 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 885 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 886 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 887 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 888 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 889 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 890 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 891 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 892 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 893 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 894 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 895 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 896 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 897 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 898 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 899 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 900 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 901 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 902 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 903 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 904 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 905 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 906 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 907 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 908 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 909 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 910 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 911 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 912 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 913 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 914 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 915 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 916 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 917 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 918 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 919 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 920 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 921 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 922 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 923 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 924 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 925 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 926 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 927 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 928 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 929 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 930 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 931 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 932 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 933 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 934 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 935 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 936 { floatx80_zero_init, 937 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 938 floatx80_zero_init }, 939 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 940 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 941 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 942 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 943 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 944 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 945 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 946 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 947 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 948 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 949 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 950 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 951 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 952 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 953 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 954 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 955 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 956 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 957 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 958 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 959 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 960 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 961 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 962 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 963 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 964 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 965 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 966 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 967 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 968 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 969 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 970 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 971 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 972 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 973 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 974 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 975 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 976 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 977 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 978 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 979 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 980 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 981 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 982 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 983 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 984 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 985 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 986 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 987 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 988 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 989 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 990 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 991 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 992 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 993 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 994 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 995 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 996 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 997 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 998 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 999 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1000 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1001 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1002 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1003 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1004 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1005 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1006 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1007 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1008 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1009 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1010 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1011 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1012 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1013 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1014 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1015 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1016 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1017 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1018 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1019 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1020 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1021 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1022 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1023 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1024 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1025 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1026 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1027 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1028 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1029 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1030 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1031 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1032 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1033 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1034 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1035 }; 1036 1037 void helper_f2xm1(CPUX86State *env) 1038 { 1039 uint8_t old_flags = save_exception_flags(env); 1040 uint64_t sig = extractFloatx80Frac(ST0); 1041 int32_t exp = extractFloatx80Exp(ST0); 1042 bool sign = extractFloatx80Sign(ST0); 1043 1044 if (floatx80_invalid_encoding(ST0)) { 1045 float_raise(float_flag_invalid, &env->fp_status); 1046 ST0 = floatx80_default_nan(&env->fp_status); 1047 } else if (floatx80_is_any_nan(ST0)) { 1048 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1049 float_raise(float_flag_invalid, &env->fp_status); 1050 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1051 } 1052 } else if (exp > 0x3fff || 1053 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1054 /* Out of range for the instruction, treat as invalid. */ 1055 float_raise(float_flag_invalid, &env->fp_status); 1056 ST0 = floatx80_default_nan(&env->fp_status); 1057 } else if (exp == 0x3fff) { 1058 /* Argument 1 or -1, exact result 1 or -0.5. */ 1059 if (sign) { 1060 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1061 } 1062 } else if (exp < 0x3fb0) { 1063 if (!floatx80_is_zero(ST0)) { 1064 /* 1065 * Multiplying the argument by an extra-precision version 1066 * of log(2) is sufficiently precise. Zero arguments are 1067 * returned unchanged. 1068 */ 1069 uint64_t sig0, sig1, sig2; 1070 if (exp == 0) { 1071 normalizeFloatx80Subnormal(sig, &exp, &sig); 1072 } 1073 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1074 &sig2); 1075 /* This result is inexact. */ 1076 sig1 |= 1; 1077 ST0 = normalizeRoundAndPackFloatx80(80, sign, exp, sig0, sig1, 1078 &env->fp_status); 1079 } 1080 } else { 1081 floatx80 tmp, y, accum; 1082 bool asign, bsign; 1083 int32_t n, aexp, bexp; 1084 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1085 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1086 signed char save_prec = env->fp_status.floatx80_rounding_precision; 1087 env->fp_status.float_rounding_mode = float_round_nearest_even; 1088 env->fp_status.floatx80_rounding_precision = 80; 1089 1090 /* Find the nearest multiple of 1/32 to the argument. */ 1091 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1092 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1093 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1094 1095 if (floatx80_is_zero(y)) { 1096 /* 1097 * Use the value of 2^t - 1 from the table, to avoid 1098 * needing to special-case zero as a result of 1099 * multiplication below. 1100 */ 1101 ST0 = f2xm1_table[n].t; 1102 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1103 env->fp_status.float_rounding_mode = save_mode; 1104 } else { 1105 /* 1106 * Compute the lower parts of a polynomial expansion for 1107 * (2^y - 1) / y. 1108 */ 1109 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1110 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1111 accum = floatx80_mul(accum, y, &env->fp_status); 1112 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1113 accum = floatx80_mul(accum, y, &env->fp_status); 1114 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1115 accum = floatx80_mul(accum, y, &env->fp_status); 1116 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1117 accum = floatx80_mul(accum, y, &env->fp_status); 1118 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1119 accum = floatx80_mul(accum, y, &env->fp_status); 1120 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1121 accum = floatx80_mul(accum, y, &env->fp_status); 1122 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1123 1124 /* 1125 * The full polynomial expansion is f2xm1_coeff_0 + accum 1126 * (where accum has much lower magnitude, and so, in 1127 * particular, carry out of the addition is not possible). 1128 * (This expansion is only accurate to about 70 bits, not 1129 * 128 bits.) 1130 */ 1131 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1132 asign = extractFloatx80Sign(f2xm1_coeff_0); 1133 shift128RightJamming(extractFloatx80Frac(accum), 0, 1134 aexp - extractFloatx80Exp(accum), 1135 &asig0, &asig1); 1136 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1137 bsig1 = 0; 1138 if (asign == extractFloatx80Sign(accum)) { 1139 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1140 } else { 1141 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1142 } 1143 /* And thus compute an approximation to 2^y - 1. */ 1144 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1145 &asig0, &asig1, &asig2); 1146 aexp += extractFloatx80Exp(y) - 0x3ffe; 1147 asign ^= extractFloatx80Sign(y); 1148 if (n != 32) { 1149 /* 1150 * Multiply this by the precomputed value of 2^t and 1151 * add that of 2^t - 1. 1152 */ 1153 mul128By64To192(asig0, asig1, 1154 extractFloatx80Frac(f2xm1_table[n].exp2), 1155 &asig0, &asig1, &asig2); 1156 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1157 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1158 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1159 bsig1 = 0; 1160 if (bexp < aexp) { 1161 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1162 &bsig0, &bsig1); 1163 } else if (aexp < bexp) { 1164 shift128RightJamming(asig0, asig1, bexp - aexp, 1165 &asig0, &asig1); 1166 aexp = bexp; 1167 } 1168 /* The sign of 2^t - 1 is always that of the result. */ 1169 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1170 if (asign == bsign) { 1171 /* Avoid possible carry out of the addition. */ 1172 shift128RightJamming(asig0, asig1, 1, 1173 &asig0, &asig1); 1174 shift128RightJamming(bsig0, bsig1, 1, 1175 &bsig0, &bsig1); 1176 ++aexp; 1177 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1178 } else { 1179 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1180 asign = bsign; 1181 } 1182 } 1183 env->fp_status.float_rounding_mode = save_mode; 1184 /* This result is inexact. */ 1185 asig1 |= 1; 1186 ST0 = normalizeRoundAndPackFloatx80(80, asign, aexp, asig0, asig1, 1187 &env->fp_status); 1188 } 1189 1190 env->fp_status.floatx80_rounding_precision = save_prec; 1191 } 1192 merge_exception_flags(env, old_flags); 1193 } 1194 1195 void helper_fptan(CPUX86State *env) 1196 { 1197 double fptemp = floatx80_to_double(env, ST0); 1198 1199 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1200 env->fpus |= 0x400; 1201 } else { 1202 fptemp = tan(fptemp); 1203 ST0 = double_to_floatx80(env, fptemp); 1204 fpush(env); 1205 ST0 = floatx80_one; 1206 env->fpus &= ~0x400; /* C2 <-- 0 */ 1207 /* the above code is for |arg| < 2**52 only */ 1208 } 1209 } 1210 1211 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1212 #define pi_4_exp 0x3ffe 1213 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1214 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1215 #define pi_2_exp 0x3fff 1216 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1217 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1218 #define pi_34_exp 0x4000 1219 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1220 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1221 #define pi_exp 0x4000 1222 #define pi_sig_high 0xc90fdaa22168c234ULL 1223 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1224 1225 /* 1226 * Polynomial coefficients for an approximation to atan(x), with only 1227 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1228 * for some other approximations, no low part is needed for the first 1229 * coefficient here to achieve a sufficiently accurate result, because 1230 * the coefficient in this minimax approximation is very close to 1231 * exactly 1.) 1232 */ 1233 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1234 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1235 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1236 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1237 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1238 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1239 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1240 1241 struct fpatan_data { 1242 /* High and low parts of atan(x). */ 1243 floatx80 atan_high, atan_low; 1244 }; 1245 1246 static const struct fpatan_data fpatan_table[9] = { 1247 { floatx80_zero_init, 1248 floatx80_zero_init }, 1249 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1250 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1251 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1252 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1253 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1254 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1255 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1256 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1257 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1258 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1259 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1260 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1261 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1262 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1263 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1264 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1265 }; 1266 1267 void helper_fpatan(CPUX86State *env) 1268 { 1269 uint8_t old_flags = save_exception_flags(env); 1270 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1271 int32_t arg0_exp = extractFloatx80Exp(ST0); 1272 bool arg0_sign = extractFloatx80Sign(ST0); 1273 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1274 int32_t arg1_exp = extractFloatx80Exp(ST1); 1275 bool arg1_sign = extractFloatx80Sign(ST1); 1276 1277 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1278 float_raise(float_flag_invalid, &env->fp_status); 1279 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1280 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1281 float_raise(float_flag_invalid, &env->fp_status); 1282 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1283 } else if (floatx80_invalid_encoding(ST0) || 1284 floatx80_invalid_encoding(ST1)) { 1285 float_raise(float_flag_invalid, &env->fp_status); 1286 ST1 = floatx80_default_nan(&env->fp_status); 1287 } else if (floatx80_is_any_nan(ST0)) { 1288 ST1 = ST0; 1289 } else if (floatx80_is_any_nan(ST1)) { 1290 /* Pass this NaN through. */ 1291 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1292 /* Pass this zero through. */ 1293 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1294 arg0_exp - arg1_exp >= 80) && 1295 !arg0_sign) { 1296 /* 1297 * Dividing ST1 by ST0 gives the correct result up to 1298 * rounding, and avoids spurious underflow exceptions that 1299 * might result from passing some small values through the 1300 * polynomial approximation, but if a finite nonzero result of 1301 * division is exact, the result of fpatan is still inexact 1302 * (and underflowing where appropriate). 1303 */ 1304 signed char save_prec = env->fp_status.floatx80_rounding_precision; 1305 env->fp_status.floatx80_rounding_precision = 80; 1306 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1307 env->fp_status.floatx80_rounding_precision = save_prec; 1308 if (!floatx80_is_zero(ST1) && 1309 !(get_float_exception_flags(&env->fp_status) & 1310 float_flag_inexact)) { 1311 /* 1312 * The mathematical result is very slightly closer to zero 1313 * than this exact result. Round a value with the 1314 * significand adjusted accordingly to get the correct 1315 * exceptions, and possibly an adjusted result depending 1316 * on the rounding mode. 1317 */ 1318 uint64_t sig = extractFloatx80Frac(ST1); 1319 int32_t exp = extractFloatx80Exp(ST1); 1320 bool sign = extractFloatx80Sign(ST1); 1321 if (exp == 0) { 1322 normalizeFloatx80Subnormal(sig, &exp, &sig); 1323 } 1324 ST1 = normalizeRoundAndPackFloatx80(80, sign, exp, sig - 1, 1325 -1, &env->fp_status); 1326 } 1327 } else { 1328 /* The result is inexact. */ 1329 bool rsign = arg1_sign; 1330 int32_t rexp; 1331 uint64_t rsig0, rsig1; 1332 if (floatx80_is_zero(ST1)) { 1333 /* 1334 * ST0 is negative. The result is pi with the sign of 1335 * ST1. 1336 */ 1337 rexp = pi_exp; 1338 rsig0 = pi_sig_high; 1339 rsig1 = pi_sig_low; 1340 } else if (floatx80_is_infinity(ST1)) { 1341 if (floatx80_is_infinity(ST0)) { 1342 if (arg0_sign) { 1343 rexp = pi_34_exp; 1344 rsig0 = pi_34_sig_high; 1345 rsig1 = pi_34_sig_low; 1346 } else { 1347 rexp = pi_4_exp; 1348 rsig0 = pi_4_sig_high; 1349 rsig1 = pi_4_sig_low; 1350 } 1351 } else { 1352 rexp = pi_2_exp; 1353 rsig0 = pi_2_sig_high; 1354 rsig1 = pi_2_sig_low; 1355 } 1356 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1357 rexp = pi_2_exp; 1358 rsig0 = pi_2_sig_high; 1359 rsig1 = pi_2_sig_low; 1360 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1361 /* ST0 is negative. */ 1362 rexp = pi_exp; 1363 rsig0 = pi_sig_high; 1364 rsig1 = pi_sig_low; 1365 } else { 1366 /* 1367 * ST0 and ST1 are finite, nonzero and with exponents not 1368 * too far apart. 1369 */ 1370 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1371 int32_t azexp, axexp; 1372 bool adj_sub, ysign, zsign; 1373 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1374 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1375 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1376 uint64_t azsig0, azsig1; 1377 uint64_t azsig2, azsig3, axsig0, axsig1; 1378 floatx80 x8; 1379 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1380 signed char save_prec = env->fp_status.floatx80_rounding_precision; 1381 env->fp_status.float_rounding_mode = float_round_nearest_even; 1382 env->fp_status.floatx80_rounding_precision = 80; 1383 1384 if (arg0_exp == 0) { 1385 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1386 } 1387 if (arg1_exp == 0) { 1388 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1389 } 1390 if (arg0_exp > arg1_exp || 1391 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1392 /* Work with abs(ST1) / abs(ST0). */ 1393 num_exp = arg1_exp; 1394 num_sig = arg1_sig; 1395 den_exp = arg0_exp; 1396 den_sig = arg0_sig; 1397 if (arg0_sign) { 1398 /* The result is subtracted from pi. */ 1399 adj_exp = pi_exp; 1400 adj_sig0 = pi_sig_high; 1401 adj_sig1 = pi_sig_low; 1402 adj_sub = true; 1403 } else { 1404 /* The result is used as-is. */ 1405 adj_exp = 0; 1406 adj_sig0 = 0; 1407 adj_sig1 = 0; 1408 adj_sub = false; 1409 } 1410 } else { 1411 /* Work with abs(ST0) / abs(ST1). */ 1412 num_exp = arg0_exp; 1413 num_sig = arg0_sig; 1414 den_exp = arg1_exp; 1415 den_sig = arg1_sig; 1416 /* The result is added to or subtracted from pi/2. */ 1417 adj_exp = pi_2_exp; 1418 adj_sig0 = pi_2_sig_high; 1419 adj_sig1 = pi_2_sig_low; 1420 adj_sub = !arg0_sign; 1421 } 1422 1423 /* 1424 * Compute x = num/den, where 0 < x <= 1 and x is not too 1425 * small. 1426 */ 1427 xexp = num_exp - den_exp + 0x3ffe; 1428 remsig0 = num_sig; 1429 remsig1 = 0; 1430 if (den_sig <= remsig0) { 1431 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1432 ++xexp; 1433 } 1434 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1435 mul64To128(den_sig, xsig0, &msig0, &msig1); 1436 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1437 while ((int64_t) remsig0 < 0) { 1438 --xsig0; 1439 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1440 } 1441 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1442 /* 1443 * No need to correct any estimation error in xsig1; even 1444 * with such error, it is accurate enough. 1445 */ 1446 1447 /* 1448 * Split x as x = t + y, where t = n/8 is the nearest 1449 * multiple of 1/8 to x. 1450 */ 1451 x8 = normalizeRoundAndPackFloatx80(80, false, xexp + 3, xsig0, 1452 xsig1, &env->fp_status); 1453 n = floatx80_to_int32(x8, &env->fp_status); 1454 if (n == 0) { 1455 ysign = false; 1456 yexp = xexp; 1457 ysig0 = xsig0; 1458 ysig1 = xsig1; 1459 texp = 0; 1460 tsig = 0; 1461 } else { 1462 int shift = clz32(n) + 32; 1463 texp = 0x403b - shift; 1464 tsig = n; 1465 tsig <<= shift; 1466 if (texp == xexp) { 1467 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1468 if ((int64_t) ysig0 >= 0) { 1469 ysign = false; 1470 if (ysig0 == 0) { 1471 if (ysig1 == 0) { 1472 yexp = 0; 1473 } else { 1474 shift = clz64(ysig1) + 64; 1475 yexp = xexp - shift; 1476 shift128Left(ysig0, ysig1, shift, 1477 &ysig0, &ysig1); 1478 } 1479 } else { 1480 shift = clz64(ysig0); 1481 yexp = xexp - shift; 1482 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1483 } 1484 } else { 1485 ysign = true; 1486 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1487 if (ysig0 == 0) { 1488 shift = clz64(ysig1) + 64; 1489 } else { 1490 shift = clz64(ysig0); 1491 } 1492 yexp = xexp - shift; 1493 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1494 } 1495 } else { 1496 /* 1497 * t's exponent must be greater than x's because t 1498 * is positive and the nearest multiple of 1/8 to 1499 * x, and if x has a greater exponent, the power 1500 * of 2 with that exponent is also a multiple of 1501 * 1/8. 1502 */ 1503 uint64_t usig0, usig1; 1504 shift128RightJamming(xsig0, xsig1, texp - xexp, 1505 &usig0, &usig1); 1506 ysign = true; 1507 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1508 if (ysig0 == 0) { 1509 shift = clz64(ysig1) + 64; 1510 } else { 1511 shift = clz64(ysig0); 1512 } 1513 yexp = texp - shift; 1514 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1515 } 1516 } 1517 1518 /* 1519 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1520 * arctan(z). 1521 */ 1522 zsign = ysign; 1523 if (texp == 0 || yexp == 0) { 1524 zexp = yexp; 1525 zsig0 = ysig0; 1526 zsig1 = ysig1; 1527 } else { 1528 /* 1529 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1530 */ 1531 int32_t dexp = texp + xexp - 0x3ffe; 1532 uint64_t dsig0, dsig1, dsig2; 1533 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1534 /* 1535 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1536 * bit). Add 1 to produce the denominator 1+tx. 1537 */ 1538 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1539 &dsig0, &dsig1); 1540 dsig0 |= 0x8000000000000000ULL; 1541 zexp = yexp - 1; 1542 remsig0 = ysig0; 1543 remsig1 = ysig1; 1544 remsig2 = 0; 1545 if (dsig0 <= remsig0) { 1546 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1547 ++zexp; 1548 } 1549 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1550 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1551 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1552 &remsig0, &remsig1, &remsig2); 1553 while ((int64_t) remsig0 < 0) { 1554 --zsig0; 1555 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1556 &remsig0, &remsig1, &remsig2); 1557 } 1558 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1559 /* No need to correct any estimation error in zsig1. */ 1560 } 1561 1562 if (zexp == 0) { 1563 azexp = 0; 1564 azsig0 = 0; 1565 azsig1 = 0; 1566 } else { 1567 floatx80 z2, accum; 1568 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1569 /* Compute z^2. */ 1570 mul128To256(zsig0, zsig1, zsig0, zsig1, 1571 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1572 z2 = normalizeRoundAndPackFloatx80(80, false, 1573 zexp + zexp - 0x3ffe, 1574 z2sig0, z2sig1, 1575 &env->fp_status); 1576 1577 /* Compute the lower parts of the polynomial expansion. */ 1578 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1579 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1580 accum = floatx80_mul(accum, z2, &env->fp_status); 1581 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1582 accum = floatx80_mul(accum, z2, &env->fp_status); 1583 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1584 accum = floatx80_mul(accum, z2, &env->fp_status); 1585 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1586 accum = floatx80_mul(accum, z2, &env->fp_status); 1587 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1588 accum = floatx80_mul(accum, z2, &env->fp_status); 1589 1590 /* 1591 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1592 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1593 */ 1594 aexp = extractFloatx80Exp(fpatan_coeff_0); 1595 shift128RightJamming(extractFloatx80Frac(accum), 0, 1596 aexp - extractFloatx80Exp(accum), 1597 &asig0, &asig1); 1598 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1599 &asig0, &asig1); 1600 /* Multiply by z to compute arctan(z). */ 1601 azexp = aexp + zexp - 0x3ffe; 1602 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1603 &azsig2, &azsig3); 1604 } 1605 1606 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1607 if (texp == 0) { 1608 /* z is positive. */ 1609 axexp = azexp; 1610 axsig0 = azsig0; 1611 axsig1 = azsig1; 1612 } else { 1613 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1614 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1615 uint64_t low_sig0 = 1616 extractFloatx80Frac(fpatan_table[n].atan_low); 1617 uint64_t low_sig1 = 0; 1618 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1619 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1620 axsig1 = 0; 1621 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1622 &low_sig0, &low_sig1); 1623 if (low_sign) { 1624 sub128(axsig0, axsig1, low_sig0, low_sig1, 1625 &axsig0, &axsig1); 1626 } else { 1627 add128(axsig0, axsig1, low_sig0, low_sig1, 1628 &axsig0, &axsig1); 1629 } 1630 if (azexp >= axexp) { 1631 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1632 &axsig0, &axsig1); 1633 axexp = azexp + 1; 1634 shift128RightJamming(azsig0, azsig1, 1, 1635 &azsig0, &azsig1); 1636 } else { 1637 shift128RightJamming(axsig0, axsig1, 1, 1638 &axsig0, &axsig1); 1639 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1640 &azsig0, &azsig1); 1641 ++axexp; 1642 } 1643 if (zsign) { 1644 sub128(axsig0, axsig1, azsig0, azsig1, 1645 &axsig0, &axsig1); 1646 } else { 1647 add128(axsig0, axsig1, azsig0, azsig1, 1648 &axsig0, &axsig1); 1649 } 1650 } 1651 1652 if (adj_exp == 0) { 1653 rexp = axexp; 1654 rsig0 = axsig0; 1655 rsig1 = axsig1; 1656 } else { 1657 /* 1658 * Add or subtract arctan(x) (exponent axexp, 1659 * significand axsig0 and axsig1, positive, not 1660 * necessarily normalized) to the number given by 1661 * adj_exp, adj_sig0 and adj_sig1, according to 1662 * adj_sub. 1663 */ 1664 if (adj_exp >= axexp) { 1665 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1666 &axsig0, &axsig1); 1667 rexp = adj_exp + 1; 1668 shift128RightJamming(adj_sig0, adj_sig1, 1, 1669 &adj_sig0, &adj_sig1); 1670 } else { 1671 shift128RightJamming(axsig0, axsig1, 1, 1672 &axsig0, &axsig1); 1673 shift128RightJamming(adj_sig0, adj_sig1, 1674 axexp - adj_exp + 1, 1675 &adj_sig0, &adj_sig1); 1676 rexp = axexp + 1; 1677 } 1678 if (adj_sub) { 1679 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1680 &rsig0, &rsig1); 1681 } else { 1682 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1683 &rsig0, &rsig1); 1684 } 1685 } 1686 1687 env->fp_status.float_rounding_mode = save_mode; 1688 env->fp_status.floatx80_rounding_precision = save_prec; 1689 } 1690 /* This result is inexact. */ 1691 rsig1 |= 1; 1692 ST1 = normalizeRoundAndPackFloatx80(80, rsign, rexp, 1693 rsig0, rsig1, &env->fp_status); 1694 } 1695 1696 fpop(env); 1697 merge_exception_flags(env, old_flags); 1698 } 1699 1700 void helper_fxtract(CPUX86State *env) 1701 { 1702 uint8_t old_flags = save_exception_flags(env); 1703 CPU_LDoubleU temp; 1704 1705 temp.d = ST0; 1706 1707 if (floatx80_is_zero(ST0)) { 1708 /* Easy way to generate -inf and raising division by 0 exception */ 1709 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1710 &env->fp_status); 1711 fpush(env); 1712 ST0 = temp.d; 1713 } else if (floatx80_invalid_encoding(ST0)) { 1714 float_raise(float_flag_invalid, &env->fp_status); 1715 ST0 = floatx80_default_nan(&env->fp_status); 1716 fpush(env); 1717 ST0 = ST1; 1718 } else if (floatx80_is_any_nan(ST0)) { 1719 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1720 float_raise(float_flag_invalid, &env->fp_status); 1721 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1722 } 1723 fpush(env); 1724 ST0 = ST1; 1725 } else if (floatx80_is_infinity(ST0)) { 1726 fpush(env); 1727 ST0 = ST1; 1728 ST1 = floatx80_infinity; 1729 } else { 1730 int expdif; 1731 1732 if (EXPD(temp) == 0) { 1733 int shift = clz64(temp.l.lower); 1734 temp.l.lower <<= shift; 1735 expdif = 1 - EXPBIAS - shift; 1736 float_raise(float_flag_input_denormal, &env->fp_status); 1737 } else { 1738 expdif = EXPD(temp) - EXPBIAS; 1739 } 1740 /* DP exponent bias */ 1741 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1742 fpush(env); 1743 BIASEXPONENT(temp); 1744 ST0 = temp.d; 1745 } 1746 merge_exception_flags(env, old_flags); 1747 } 1748 1749 static void helper_fprem_common(CPUX86State *env, bool mod) 1750 { 1751 uint8_t old_flags = save_exception_flags(env); 1752 uint64_t quotient; 1753 CPU_LDoubleU temp0, temp1; 1754 int exp0, exp1, expdiff; 1755 1756 temp0.d = ST0; 1757 temp1.d = ST1; 1758 exp0 = EXPD(temp0); 1759 exp1 = EXPD(temp1); 1760 1761 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1762 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1763 exp0 == 0x7fff || exp1 == 0x7fff || 1764 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1765 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1766 } else { 1767 if (exp0 == 0) { 1768 exp0 = 1 - clz64(temp0.l.lower); 1769 } 1770 if (exp1 == 0) { 1771 exp1 = 1 - clz64(temp1.l.lower); 1772 } 1773 expdiff = exp0 - exp1; 1774 if (expdiff < 64) { 1775 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1776 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1777 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1778 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1779 } else { 1780 /* 1781 * Partial remainder. This choice of how many bits to 1782 * process at once is specified in AMD instruction set 1783 * manuals, and empirically is followed by Intel 1784 * processors as well; it ensures that the final remainder 1785 * operation in a loop does produce the correct low three 1786 * bits of the quotient. AMD manuals specify that the 1787 * flags other than C2 are cleared, and empirically Intel 1788 * processors clear them as well. 1789 */ 1790 int n = 32 + (expdiff % 32); 1791 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1792 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1793 env->fpus |= 0x400; /* C2 <-- 1 */ 1794 } 1795 } 1796 merge_exception_flags(env, old_flags); 1797 } 1798 1799 void helper_fprem1(CPUX86State *env) 1800 { 1801 helper_fprem_common(env, false); 1802 } 1803 1804 void helper_fprem(CPUX86State *env) 1805 { 1806 helper_fprem_common(env, true); 1807 } 1808 1809 /* 128-bit significand of log2(e). */ 1810 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1811 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1812 1813 /* 1814 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1815 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1816 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1817 * interval [sqrt(2)/2, sqrt(2)]. 1818 */ 1819 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1820 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1821 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1822 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1823 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1824 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1825 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1826 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1827 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1828 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1829 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1830 1831 /* 1832 * Compute an approximation of log2(1+arg), where 1+arg is in the 1833 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1834 * function is called, rounding precision is set to 80 and the 1835 * round-to-nearest mode is in effect. arg must not be exactly zero, 1836 * and must not be so close to zero that underflow might occur. 1837 */ 1838 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1839 uint64_t *sig0, uint64_t *sig1) 1840 { 1841 uint64_t arg0_sig = extractFloatx80Frac(arg); 1842 int32_t arg0_exp = extractFloatx80Exp(arg); 1843 bool arg0_sign = extractFloatx80Sign(arg); 1844 bool asign; 1845 int32_t dexp, texp, aexp; 1846 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1847 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1848 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1849 floatx80 t2, accum; 1850 1851 /* 1852 * Compute an approximation of arg/(2+arg), with extra precision, 1853 * as the argument to a polynomial approximation. The extra 1854 * precision is only needed for the first term of the 1855 * approximation, with subsequent terms being significantly 1856 * smaller; the approximation only uses odd exponents, and the 1857 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1858 */ 1859 if (arg0_sign) { 1860 dexp = 0x3fff; 1861 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1862 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1863 } else { 1864 dexp = 0x4000; 1865 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1866 dsig0 |= 0x8000000000000000ULL; 1867 } 1868 texp = arg0_exp - dexp + 0x3ffe; 1869 rsig0 = arg0_sig; 1870 rsig1 = 0; 1871 rsig2 = 0; 1872 if (dsig0 <= rsig0) { 1873 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1874 ++texp; 1875 } 1876 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1877 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1878 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1879 &rsig0, &rsig1, &rsig2); 1880 while ((int64_t) rsig0 < 0) { 1881 --tsig0; 1882 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1883 &rsig0, &rsig1, &rsig2); 1884 } 1885 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1886 /* 1887 * No need to correct any estimation error in tsig1; even with 1888 * such error, it is accurate enough. Now compute the square of 1889 * that approximation. 1890 */ 1891 mul128To256(tsig0, tsig1, tsig0, tsig1, 1892 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1893 t2 = normalizeRoundAndPackFloatx80(80, false, texp + texp - 0x3ffe, 1894 t2sig0, t2sig1, &env->fp_status); 1895 1896 /* Compute the lower parts of the polynomial expansion. */ 1897 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1898 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1899 accum = floatx80_mul(accum, t2, &env->fp_status); 1900 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1901 accum = floatx80_mul(accum, t2, &env->fp_status); 1902 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1903 accum = floatx80_mul(accum, t2, &env->fp_status); 1904 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1905 accum = floatx80_mul(accum, t2, &env->fp_status); 1906 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1907 accum = floatx80_mul(accum, t2, &env->fp_status); 1908 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1909 accum = floatx80_mul(accum, t2, &env->fp_status); 1910 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1911 accum = floatx80_mul(accum, t2, &env->fp_status); 1912 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1913 accum = floatx80_mul(accum, t2, &env->fp_status); 1914 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1915 1916 /* 1917 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1918 * accum has much lower magnitude, and so, in particular, carry 1919 * out of the addition is not possible), multiplied by t. (This 1920 * expansion is only accurate to about 70 bits, not 128 bits.) 1921 */ 1922 aexp = extractFloatx80Exp(fyl2x_coeff_0); 1923 asign = extractFloatx80Sign(fyl2x_coeff_0); 1924 shift128RightJamming(extractFloatx80Frac(accum), 0, 1925 aexp - extractFloatx80Exp(accum), 1926 &asig0, &asig1); 1927 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1928 bsig1 = 0; 1929 if (asign == extractFloatx80Sign(accum)) { 1930 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1931 } else { 1932 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1933 } 1934 /* Multiply by t to compute the required result. */ 1935 mul128To256(asig0, asig1, tsig0, tsig1, 1936 &asig0, &asig1, &asig2, &asig3); 1937 aexp += texp - 0x3ffe; 1938 *exp = aexp; 1939 *sig0 = asig0; 1940 *sig1 = asig1; 1941 } 1942 1943 void helper_fyl2xp1(CPUX86State *env) 1944 { 1945 uint8_t old_flags = save_exception_flags(env); 1946 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1947 int32_t arg0_exp = extractFloatx80Exp(ST0); 1948 bool arg0_sign = extractFloatx80Sign(ST0); 1949 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1950 int32_t arg1_exp = extractFloatx80Exp(ST1); 1951 bool arg1_sign = extractFloatx80Sign(ST1); 1952 1953 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1954 float_raise(float_flag_invalid, &env->fp_status); 1955 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1956 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1957 float_raise(float_flag_invalid, &env->fp_status); 1958 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1959 } else if (floatx80_invalid_encoding(ST0) || 1960 floatx80_invalid_encoding(ST1)) { 1961 float_raise(float_flag_invalid, &env->fp_status); 1962 ST1 = floatx80_default_nan(&env->fp_status); 1963 } else if (floatx80_is_any_nan(ST0)) { 1964 ST1 = ST0; 1965 } else if (floatx80_is_any_nan(ST1)) { 1966 /* Pass this NaN through. */ 1967 } else if (arg0_exp > 0x3ffd || 1968 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 1969 0x95f619980c4336f7ULL : 1970 0xd413cccfe7799211ULL))) { 1971 /* 1972 * Out of range for the instruction (ST0 must have absolute 1973 * value less than 1 - sqrt(2)/2 = 0.292..., according to 1974 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 1975 * to sqrt(2) - 1, which we allow here), treat as invalid. 1976 */ 1977 float_raise(float_flag_invalid, &env->fp_status); 1978 ST1 = floatx80_default_nan(&env->fp_status); 1979 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1980 arg1_exp == 0x7fff) { 1981 /* 1982 * One argument is zero, or multiplying by infinity; correct 1983 * result is exact and can be obtained by multiplying the 1984 * arguments. 1985 */ 1986 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 1987 } else if (arg0_exp < 0x3fb0) { 1988 /* 1989 * Multiplying both arguments and an extra-precision version 1990 * of log2(e) is sufficiently precise. 1991 */ 1992 uint64_t sig0, sig1, sig2; 1993 int32_t exp; 1994 if (arg0_exp == 0) { 1995 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1996 } 1997 if (arg1_exp == 0) { 1998 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1999 } 2000 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2001 &sig0, &sig1, &sig2); 2002 exp = arg0_exp + 1; 2003 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2004 exp += arg1_exp - 0x3ffe; 2005 /* This result is inexact. */ 2006 sig1 |= 1; 2007 ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, exp, 2008 sig0, sig1, &env->fp_status); 2009 } else { 2010 int32_t aexp; 2011 uint64_t asig0, asig1, asig2; 2012 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2013 signed char save_prec = env->fp_status.floatx80_rounding_precision; 2014 env->fp_status.float_rounding_mode = float_round_nearest_even; 2015 env->fp_status.floatx80_rounding_precision = 80; 2016 2017 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2018 /* 2019 * Multiply by the second argument to compute the required 2020 * result. 2021 */ 2022 if (arg1_exp == 0) { 2023 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2024 } 2025 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2026 aexp += arg1_exp - 0x3ffe; 2027 /* This result is inexact. */ 2028 asig1 |= 1; 2029 env->fp_status.float_rounding_mode = save_mode; 2030 ST1 = normalizeRoundAndPackFloatx80(80, arg0_sign ^ arg1_sign, aexp, 2031 asig0, asig1, &env->fp_status); 2032 env->fp_status.floatx80_rounding_precision = save_prec; 2033 } 2034 fpop(env); 2035 merge_exception_flags(env, old_flags); 2036 } 2037 2038 void helper_fyl2x(CPUX86State *env) 2039 { 2040 uint8_t old_flags = save_exception_flags(env); 2041 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2042 int32_t arg0_exp = extractFloatx80Exp(ST0); 2043 bool arg0_sign = extractFloatx80Sign(ST0); 2044 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2045 int32_t arg1_exp = extractFloatx80Exp(ST1); 2046 bool arg1_sign = extractFloatx80Sign(ST1); 2047 2048 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2049 float_raise(float_flag_invalid, &env->fp_status); 2050 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2051 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2052 float_raise(float_flag_invalid, &env->fp_status); 2053 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2054 } else if (floatx80_invalid_encoding(ST0) || 2055 floatx80_invalid_encoding(ST1)) { 2056 float_raise(float_flag_invalid, &env->fp_status); 2057 ST1 = floatx80_default_nan(&env->fp_status); 2058 } else if (floatx80_is_any_nan(ST0)) { 2059 ST1 = ST0; 2060 } else if (floatx80_is_any_nan(ST1)) { 2061 /* Pass this NaN through. */ 2062 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2063 float_raise(float_flag_invalid, &env->fp_status); 2064 ST1 = floatx80_default_nan(&env->fp_status); 2065 } else if (floatx80_is_infinity(ST1)) { 2066 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2067 &env->fp_status); 2068 switch (cmp) { 2069 case float_relation_less: 2070 ST1 = floatx80_chs(ST1); 2071 break; 2072 case float_relation_greater: 2073 /* Result is infinity of the same sign as ST1. */ 2074 break; 2075 default: 2076 float_raise(float_flag_invalid, &env->fp_status); 2077 ST1 = floatx80_default_nan(&env->fp_status); 2078 break; 2079 } 2080 } else if (floatx80_is_infinity(ST0)) { 2081 if (floatx80_is_zero(ST1)) { 2082 float_raise(float_flag_invalid, &env->fp_status); 2083 ST1 = floatx80_default_nan(&env->fp_status); 2084 } else if (arg1_sign) { 2085 ST1 = floatx80_chs(ST0); 2086 } else { 2087 ST1 = ST0; 2088 } 2089 } else if (floatx80_is_zero(ST0)) { 2090 if (floatx80_is_zero(ST1)) { 2091 float_raise(float_flag_invalid, &env->fp_status); 2092 ST1 = floatx80_default_nan(&env->fp_status); 2093 } else { 2094 /* Result is infinity with opposite sign to ST1. */ 2095 float_raise(float_flag_divbyzero, &env->fp_status); 2096 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2097 0x8000000000000000ULL); 2098 } 2099 } else if (floatx80_is_zero(ST1)) { 2100 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2101 ST1 = floatx80_chs(ST1); 2102 } 2103 /* Otherwise, ST1 is already the correct result. */ 2104 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2105 if (arg1_sign) { 2106 ST1 = floatx80_chs(floatx80_zero); 2107 } else { 2108 ST1 = floatx80_zero; 2109 } 2110 } else { 2111 int32_t int_exp; 2112 floatx80 arg0_m1; 2113 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2114 signed char save_prec = env->fp_status.floatx80_rounding_precision; 2115 env->fp_status.float_rounding_mode = float_round_nearest_even; 2116 env->fp_status.floatx80_rounding_precision = 80; 2117 2118 if (arg0_exp == 0) { 2119 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2120 } 2121 if (arg1_exp == 0) { 2122 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2123 } 2124 int_exp = arg0_exp - 0x3fff; 2125 if (arg0_sig > 0xb504f333f9de6484ULL) { 2126 ++int_exp; 2127 } 2128 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2129 &env->fp_status), 2130 floatx80_one, &env->fp_status); 2131 if (floatx80_is_zero(arg0_m1)) { 2132 /* Exact power of 2; multiply by ST1. */ 2133 env->fp_status.float_rounding_mode = save_mode; 2134 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2135 ST1, &env->fp_status); 2136 } else { 2137 bool asign = extractFloatx80Sign(arg0_m1); 2138 int32_t aexp; 2139 uint64_t asig0, asig1, asig2; 2140 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2141 if (int_exp != 0) { 2142 bool isign = (int_exp < 0); 2143 int32_t iexp; 2144 uint64_t isig; 2145 int shift; 2146 int_exp = isign ? -int_exp : int_exp; 2147 shift = clz32(int_exp) + 32; 2148 isig = int_exp; 2149 isig <<= shift; 2150 iexp = 0x403e - shift; 2151 shift128RightJamming(asig0, asig1, iexp - aexp, 2152 &asig0, &asig1); 2153 if (asign == isign) { 2154 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2155 } else { 2156 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2157 } 2158 aexp = iexp; 2159 asign = isign; 2160 } 2161 /* 2162 * Multiply by the second argument to compute the required 2163 * result. 2164 */ 2165 if (arg1_exp == 0) { 2166 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2167 } 2168 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2169 aexp += arg1_exp - 0x3ffe; 2170 /* This result is inexact. */ 2171 asig1 |= 1; 2172 env->fp_status.float_rounding_mode = save_mode; 2173 ST1 = normalizeRoundAndPackFloatx80(80, asign ^ arg1_sign, aexp, 2174 asig0, asig1, &env->fp_status); 2175 } 2176 2177 env->fp_status.floatx80_rounding_precision = save_prec; 2178 } 2179 fpop(env); 2180 merge_exception_flags(env, old_flags); 2181 } 2182 2183 void helper_fsqrt(CPUX86State *env) 2184 { 2185 uint8_t old_flags = save_exception_flags(env); 2186 if (floatx80_is_neg(ST0)) { 2187 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2188 env->fpus |= 0x400; 2189 } 2190 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2191 merge_exception_flags(env, old_flags); 2192 } 2193 2194 void helper_fsincos(CPUX86State *env) 2195 { 2196 double fptemp = floatx80_to_double(env, ST0); 2197 2198 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2199 env->fpus |= 0x400; 2200 } else { 2201 ST0 = double_to_floatx80(env, sin(fptemp)); 2202 fpush(env); 2203 ST0 = double_to_floatx80(env, cos(fptemp)); 2204 env->fpus &= ~0x400; /* C2 <-- 0 */ 2205 /* the above code is for |arg| < 2**63 only */ 2206 } 2207 } 2208 2209 void helper_frndint(CPUX86State *env) 2210 { 2211 uint8_t old_flags = save_exception_flags(env); 2212 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2213 merge_exception_flags(env, old_flags); 2214 } 2215 2216 void helper_fscale(CPUX86State *env) 2217 { 2218 uint8_t old_flags = save_exception_flags(env); 2219 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2220 float_raise(float_flag_invalid, &env->fp_status); 2221 ST0 = floatx80_default_nan(&env->fp_status); 2222 } else if (floatx80_is_any_nan(ST1)) { 2223 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2224 float_raise(float_flag_invalid, &env->fp_status); 2225 } 2226 ST0 = ST1; 2227 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2228 float_raise(float_flag_invalid, &env->fp_status); 2229 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2230 } 2231 } else if (floatx80_is_infinity(ST1) && 2232 !floatx80_invalid_encoding(ST0) && 2233 !floatx80_is_any_nan(ST0)) { 2234 if (floatx80_is_neg(ST1)) { 2235 if (floatx80_is_infinity(ST0)) { 2236 float_raise(float_flag_invalid, &env->fp_status); 2237 ST0 = floatx80_default_nan(&env->fp_status); 2238 } else { 2239 ST0 = (floatx80_is_neg(ST0) ? 2240 floatx80_chs(floatx80_zero) : 2241 floatx80_zero); 2242 } 2243 } else { 2244 if (floatx80_is_zero(ST0)) { 2245 float_raise(float_flag_invalid, &env->fp_status); 2246 ST0 = floatx80_default_nan(&env->fp_status); 2247 } else { 2248 ST0 = (floatx80_is_neg(ST0) ? 2249 floatx80_chs(floatx80_infinity) : 2250 floatx80_infinity); 2251 } 2252 } 2253 } else { 2254 int n; 2255 signed char save = env->fp_status.floatx80_rounding_precision; 2256 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2257 set_float_exception_flags(0, &env->fp_status); 2258 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2259 set_float_exception_flags(save_flags, &env->fp_status); 2260 env->fp_status.floatx80_rounding_precision = 80; 2261 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2262 env->fp_status.floatx80_rounding_precision = save; 2263 } 2264 merge_exception_flags(env, old_flags); 2265 } 2266 2267 void helper_fsin(CPUX86State *env) 2268 { 2269 double fptemp = floatx80_to_double(env, ST0); 2270 2271 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2272 env->fpus |= 0x400; 2273 } else { 2274 ST0 = double_to_floatx80(env, sin(fptemp)); 2275 env->fpus &= ~0x400; /* C2 <-- 0 */ 2276 /* the above code is for |arg| < 2**53 only */ 2277 } 2278 } 2279 2280 void helper_fcos(CPUX86State *env) 2281 { 2282 double fptemp = floatx80_to_double(env, ST0); 2283 2284 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2285 env->fpus |= 0x400; 2286 } else { 2287 ST0 = double_to_floatx80(env, cos(fptemp)); 2288 env->fpus &= ~0x400; /* C2 <-- 0 */ 2289 /* the above code is for |arg| < 2**63 only */ 2290 } 2291 } 2292 2293 void helper_fxam_ST0(CPUX86State *env) 2294 { 2295 CPU_LDoubleU temp; 2296 int expdif; 2297 2298 temp.d = ST0; 2299 2300 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2301 if (SIGND(temp)) { 2302 env->fpus |= 0x200; /* C1 <-- 1 */ 2303 } 2304 2305 if (env->fptags[env->fpstt]) { 2306 env->fpus |= 0x4100; /* Empty */ 2307 return; 2308 } 2309 2310 expdif = EXPD(temp); 2311 if (expdif == MAXEXPD) { 2312 if (MANTD(temp) == 0x8000000000000000ULL) { 2313 env->fpus |= 0x500; /* Infinity */ 2314 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2315 env->fpus |= 0x100; /* NaN */ 2316 } 2317 } else if (expdif == 0) { 2318 if (MANTD(temp) == 0) { 2319 env->fpus |= 0x4000; /* Zero */ 2320 } else { 2321 env->fpus |= 0x4400; /* Denormal */ 2322 } 2323 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2324 env->fpus |= 0x400; 2325 } 2326 } 2327 2328 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 2329 uintptr_t retaddr) 2330 { 2331 int fpus, fptag, exp, i; 2332 uint64_t mant; 2333 CPU_LDoubleU tmp; 2334 2335 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2336 fptag = 0; 2337 for (i = 7; i >= 0; i--) { 2338 fptag <<= 2; 2339 if (env->fptags[i]) { 2340 fptag |= 3; 2341 } else { 2342 tmp.d = env->fpregs[i].d; 2343 exp = EXPD(tmp); 2344 mant = MANTD(tmp); 2345 if (exp == 0 && mant == 0) { 2346 /* zero */ 2347 fptag |= 1; 2348 } else if (exp == 0 || exp == MAXEXPD 2349 || (mant & (1LL << 63)) == 0) { 2350 /* NaNs, infinity, denormal */ 2351 fptag |= 2; 2352 } 2353 } 2354 } 2355 if (data32) { 2356 /* 32 bit */ 2357 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 2358 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 2359 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 2360 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */ 2361 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */ 2362 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */ 2363 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */ 2364 } else { 2365 /* 16 bit */ 2366 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 2367 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 2368 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 2369 cpu_stw_data_ra(env, ptr + 6, 0, retaddr); 2370 cpu_stw_data_ra(env, ptr + 8, 0, retaddr); 2371 cpu_stw_data_ra(env, ptr + 10, 0, retaddr); 2372 cpu_stw_data_ra(env, ptr + 12, 0, retaddr); 2373 } 2374 } 2375 2376 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2377 { 2378 do_fstenv(env, ptr, data32, GETPC()); 2379 } 2380 2381 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2382 { 2383 env->fpstt = (fpus >> 11) & 7; 2384 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2385 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2386 #if !defined(CONFIG_USER_ONLY) 2387 if (!(env->fpus & FPUS_SE)) { 2388 /* 2389 * Here the processor deasserts FERR#; in response, the chipset deasserts 2390 * IGNNE#. 2391 */ 2392 cpu_clear_ignne(); 2393 } 2394 #endif 2395 } 2396 2397 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 2398 uintptr_t retaddr) 2399 { 2400 int i, fpus, fptag; 2401 2402 if (data32) { 2403 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2404 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2405 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 2406 } else { 2407 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2408 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 2409 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2410 } 2411 cpu_set_fpus(env, fpus); 2412 for (i = 0; i < 8; i++) { 2413 env->fptags[i] = ((fptag & 3) == 3); 2414 fptag >>= 2; 2415 } 2416 } 2417 2418 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2419 { 2420 do_fldenv(env, ptr, data32, GETPC()); 2421 } 2422 2423 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, 2424 uintptr_t retaddr) 2425 { 2426 floatx80 tmp; 2427 int i; 2428 2429 do_fstenv(env, ptr, data32, retaddr); 2430 2431 ptr += (14 << data32); 2432 for (i = 0; i < 8; i++) { 2433 tmp = ST(i); 2434 do_fstt(env, tmp, ptr, retaddr); 2435 ptr += 10; 2436 } 2437 2438 /* fninit */ 2439 env->fpus = 0; 2440 env->fpstt = 0; 2441 cpu_set_fpuc(env, 0x37f); 2442 env->fptags[0] = 1; 2443 env->fptags[1] = 1; 2444 env->fptags[2] = 1; 2445 env->fptags[3] = 1; 2446 env->fptags[4] = 1; 2447 env->fptags[5] = 1; 2448 env->fptags[6] = 1; 2449 env->fptags[7] = 1; 2450 } 2451 2452 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2453 { 2454 do_fsave(env, ptr, data32, GETPC()); 2455 } 2456 2457 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, 2458 uintptr_t retaddr) 2459 { 2460 floatx80 tmp; 2461 int i; 2462 2463 do_fldenv(env, ptr, data32, retaddr); 2464 ptr += (14 << data32); 2465 2466 for (i = 0; i < 8; i++) { 2467 tmp = do_fldt(env, ptr, retaddr); 2468 ST(i) = tmp; 2469 ptr += 10; 2470 } 2471 } 2472 2473 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2474 { 2475 do_frstor(env, ptr, data32, GETPC()); 2476 } 2477 2478 #if defined(CONFIG_USER_ONLY) 2479 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 2480 { 2481 do_fsave(env, ptr, data32, 0); 2482 } 2483 2484 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 2485 { 2486 do_frstor(env, ptr, data32, 0); 2487 } 2488 #endif 2489 2490 #define XO(X) offsetof(X86XSaveArea, X) 2491 2492 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2493 { 2494 int fpus, fptag, i; 2495 target_ulong addr; 2496 2497 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2498 fptag = 0; 2499 for (i = 0; i < 8; i++) { 2500 fptag |= (env->fptags[i] << i); 2501 } 2502 2503 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 2504 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 2505 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 2506 2507 /* In 32-bit mode this is eip, sel, dp, sel. 2508 In 64-bit mode this is rip, rdp. 2509 But in either case we don't write actual data, just zeros. */ 2510 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 2511 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 2512 2513 addr = ptr + XO(legacy.fpregs); 2514 for (i = 0; i < 8; i++) { 2515 floatx80 tmp = ST(i); 2516 do_fstt(env, tmp, addr, ra); 2517 addr += 16; 2518 } 2519 } 2520 2521 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2522 { 2523 update_mxcsr_from_sse_status(env); 2524 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 2525 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 2526 } 2527 2528 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2529 { 2530 int i, nb_xmm_regs; 2531 target_ulong addr; 2532 2533 if (env->hflags & HF_CS64_MASK) { 2534 nb_xmm_regs = 16; 2535 } else { 2536 nb_xmm_regs = 8; 2537 } 2538 2539 addr = ptr + XO(legacy.xmm_regs); 2540 for (i = 0; i < nb_xmm_regs; i++) { 2541 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 2542 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 2543 addr += 16; 2544 } 2545 } 2546 2547 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2548 { 2549 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2550 int i; 2551 2552 for (i = 0; i < 4; i++, addr += 16) { 2553 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 2554 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 2555 } 2556 } 2557 2558 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2559 { 2560 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2561 env->bndcs_regs.cfgu, ra); 2562 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2563 env->bndcs_regs.sts, ra); 2564 } 2565 2566 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2567 { 2568 cpu_stq_data_ra(env, ptr, env->pkru, ra); 2569 } 2570 2571 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2572 { 2573 /* The operand must be 16 byte aligned */ 2574 if (ptr & 0xf) { 2575 raise_exception_ra(env, EXCP0D_GPF, ra); 2576 } 2577 2578 do_xsave_fpu(env, ptr, ra); 2579 2580 if (env->cr[4] & CR4_OSFXSR_MASK) { 2581 do_xsave_mxcsr(env, ptr, ra); 2582 /* Fast FXSAVE leaves out the XMM registers */ 2583 if (!(env->efer & MSR_EFER_FFXSR) 2584 || (env->hflags & HF_CPL_MASK) 2585 || !(env->hflags & HF_LMA_MASK)) { 2586 do_xsave_sse(env, ptr, ra); 2587 } 2588 } 2589 } 2590 2591 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2592 { 2593 do_fxsave(env, ptr, GETPC()); 2594 } 2595 2596 static uint64_t get_xinuse(CPUX86State *env) 2597 { 2598 uint64_t inuse = -1; 2599 2600 /* For the most part, we don't track XINUSE. We could calculate it 2601 here for all components, but it's probably less work to simply 2602 indicate in use. That said, the state of BNDREGS is important 2603 enough to track in HFLAGS, so we might as well use that here. */ 2604 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2605 inuse &= ~XSTATE_BNDREGS_MASK; 2606 } 2607 return inuse; 2608 } 2609 2610 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2611 uint64_t inuse, uint64_t opt, uintptr_t ra) 2612 { 2613 uint64_t old_bv, new_bv; 2614 2615 /* The OS must have enabled XSAVE. */ 2616 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2617 raise_exception_ra(env, EXCP06_ILLOP, ra); 2618 } 2619 2620 /* The operand must be 64 byte aligned. */ 2621 if (ptr & 63) { 2622 raise_exception_ra(env, EXCP0D_GPF, ra); 2623 } 2624 2625 /* Never save anything not enabled by XCR0. */ 2626 rfbm &= env->xcr0; 2627 opt &= rfbm; 2628 2629 if (opt & XSTATE_FP_MASK) { 2630 do_xsave_fpu(env, ptr, ra); 2631 } 2632 if (rfbm & XSTATE_SSE_MASK) { 2633 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2634 do_xsave_mxcsr(env, ptr, ra); 2635 } 2636 if (opt & XSTATE_SSE_MASK) { 2637 do_xsave_sse(env, ptr, ra); 2638 } 2639 if (opt & XSTATE_BNDREGS_MASK) { 2640 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 2641 } 2642 if (opt & XSTATE_BNDCSR_MASK) { 2643 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 2644 } 2645 if (opt & XSTATE_PKRU_MASK) { 2646 do_xsave_pkru(env, ptr + XO(pkru_state), ra); 2647 } 2648 2649 /* Update the XSTATE_BV field. */ 2650 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2651 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2652 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 2653 } 2654 2655 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2656 { 2657 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 2658 } 2659 2660 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2661 { 2662 uint64_t inuse = get_xinuse(env); 2663 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2664 } 2665 2666 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2667 { 2668 int i, fpuc, fpus, fptag; 2669 target_ulong addr; 2670 2671 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 2672 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 2673 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 2674 cpu_set_fpuc(env, fpuc); 2675 cpu_set_fpus(env, fpus); 2676 fptag ^= 0xff; 2677 for (i = 0; i < 8; i++) { 2678 env->fptags[i] = ((fptag >> i) & 1); 2679 } 2680 2681 addr = ptr + XO(legacy.fpregs); 2682 for (i = 0; i < 8; i++) { 2683 floatx80 tmp = do_fldt(env, addr, ra); 2684 ST(i) = tmp; 2685 addr += 16; 2686 } 2687 } 2688 2689 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2690 { 2691 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 2692 } 2693 2694 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2695 { 2696 int i, nb_xmm_regs; 2697 target_ulong addr; 2698 2699 if (env->hflags & HF_CS64_MASK) { 2700 nb_xmm_regs = 16; 2701 } else { 2702 nb_xmm_regs = 8; 2703 } 2704 2705 addr = ptr + XO(legacy.xmm_regs); 2706 for (i = 0; i < nb_xmm_regs; i++) { 2707 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 2708 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 2709 addr += 16; 2710 } 2711 } 2712 2713 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2714 { 2715 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2716 int i; 2717 2718 for (i = 0; i < 4; i++, addr += 16) { 2719 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 2720 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 2721 } 2722 } 2723 2724 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2725 { 2726 /* FIXME: Extend highest implemented bit of linear address. */ 2727 env->bndcs_regs.cfgu 2728 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 2729 env->bndcs_regs.sts 2730 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 2731 } 2732 2733 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2734 { 2735 env->pkru = cpu_ldq_data_ra(env, ptr, ra); 2736 } 2737 2738 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2739 { 2740 /* The operand must be 16 byte aligned */ 2741 if (ptr & 0xf) { 2742 raise_exception_ra(env, EXCP0D_GPF, ra); 2743 } 2744 2745 do_xrstor_fpu(env, ptr, ra); 2746 2747 if (env->cr[4] & CR4_OSFXSR_MASK) { 2748 do_xrstor_mxcsr(env, ptr, ra); 2749 /* Fast FXRSTOR leaves out the XMM registers */ 2750 if (!(env->efer & MSR_EFER_FFXSR) 2751 || (env->hflags & HF_CPL_MASK) 2752 || !(env->hflags & HF_LMA_MASK)) { 2753 do_xrstor_sse(env, ptr, ra); 2754 } 2755 } 2756 } 2757 2758 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2759 { 2760 do_fxrstor(env, ptr, GETPC()); 2761 } 2762 2763 #if defined(CONFIG_USER_ONLY) 2764 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 2765 { 2766 do_fxsave(env, ptr, 0); 2767 } 2768 2769 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 2770 { 2771 do_fxrstor(env, ptr, 0); 2772 } 2773 #endif 2774 2775 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2776 { 2777 uintptr_t ra = GETPC(); 2778 uint64_t xstate_bv, xcomp_bv, reserve0; 2779 2780 rfbm &= env->xcr0; 2781 2782 /* The OS must have enabled XSAVE. */ 2783 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2784 raise_exception_ra(env, EXCP06_ILLOP, ra); 2785 } 2786 2787 /* The operand must be 64 byte aligned. */ 2788 if (ptr & 63) { 2789 raise_exception_ra(env, EXCP0D_GPF, ra); 2790 } 2791 2792 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2793 2794 if ((int64_t)xstate_bv < 0) { 2795 /* FIXME: Compact form. */ 2796 raise_exception_ra(env, EXCP0D_GPF, ra); 2797 } 2798 2799 /* Standard form. */ 2800 2801 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2802 if (xstate_bv & ~env->xcr0) { 2803 raise_exception_ra(env, EXCP0D_GPF, ra); 2804 } 2805 2806 /* The XCOMP_BV field must be zero. Note that, as of the April 2016 2807 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 2808 describes only XCOMP_BV, but the description of the standard form 2809 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 2810 includes the next 64-bit field. */ 2811 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 2812 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 2813 if (xcomp_bv || reserve0) { 2814 raise_exception_ra(env, EXCP0D_GPF, ra); 2815 } 2816 2817 if (rfbm & XSTATE_FP_MASK) { 2818 if (xstate_bv & XSTATE_FP_MASK) { 2819 do_xrstor_fpu(env, ptr, ra); 2820 } else { 2821 helper_fninit(env); 2822 memset(env->fpregs, 0, sizeof(env->fpregs)); 2823 } 2824 } 2825 if (rfbm & XSTATE_SSE_MASK) { 2826 /* Note that the standard form of XRSTOR loads MXCSR from memory 2827 whether or not the XSTATE_BV bit is set. */ 2828 do_xrstor_mxcsr(env, ptr, ra); 2829 if (xstate_bv & XSTATE_SSE_MASK) { 2830 do_xrstor_sse(env, ptr, ra); 2831 } else { 2832 /* ??? When AVX is implemented, we may have to be more 2833 selective in the clearing. */ 2834 memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); 2835 } 2836 } 2837 if (rfbm & XSTATE_BNDREGS_MASK) { 2838 if (xstate_bv & XSTATE_BNDREGS_MASK) { 2839 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 2840 env->hflags |= HF_MPX_IU_MASK; 2841 } else { 2842 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2843 env->hflags &= ~HF_MPX_IU_MASK; 2844 } 2845 } 2846 if (rfbm & XSTATE_BNDCSR_MASK) { 2847 if (xstate_bv & XSTATE_BNDCSR_MASK) { 2848 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 2849 } else { 2850 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2851 } 2852 cpu_sync_bndcs_hflags(env); 2853 } 2854 if (rfbm & XSTATE_PKRU_MASK) { 2855 uint64_t old_pkru = env->pkru; 2856 if (xstate_bv & XSTATE_PKRU_MASK) { 2857 do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 2858 } else { 2859 env->pkru = 0; 2860 } 2861 if (env->pkru != old_pkru) { 2862 CPUState *cs = env_cpu(env); 2863 tlb_flush(cs); 2864 } 2865 } 2866 } 2867 2868 #undef XO 2869 2870 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 2871 { 2872 /* The OS must have enabled XSAVE. */ 2873 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2874 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2875 } 2876 2877 switch (ecx) { 2878 case 0: 2879 return env->xcr0; 2880 case 1: 2881 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 2882 return env->xcr0 & get_xinuse(env); 2883 } 2884 break; 2885 } 2886 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2887 } 2888 2889 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 2890 { 2891 uint32_t dummy, ena_lo, ena_hi; 2892 uint64_t ena; 2893 2894 /* The OS must have enabled XSAVE. */ 2895 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2896 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2897 } 2898 2899 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 2900 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 2901 goto do_gpf; 2902 } 2903 2904 /* Disallow enabling unimplemented features. */ 2905 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 2906 ena = ((uint64_t)ena_hi << 32) | ena_lo; 2907 if (mask & ~ena) { 2908 goto do_gpf; 2909 } 2910 2911 /* Disallow enabling only half of MPX. */ 2912 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 2913 & XSTATE_BNDCSR_MASK) { 2914 goto do_gpf; 2915 } 2916 2917 env->xcr0 = mask; 2918 cpu_sync_bndcs_hflags(env); 2919 return; 2920 2921 do_gpf: 2922 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2923 } 2924 2925 /* MMX/SSE */ 2926 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 2927 2928 #define SSE_DAZ 0x0040 2929 #define SSE_RC_MASK 0x6000 2930 #define SSE_RC_NEAR 0x0000 2931 #define SSE_RC_DOWN 0x2000 2932 #define SSE_RC_UP 0x4000 2933 #define SSE_RC_CHOP 0x6000 2934 #define SSE_FZ 0x8000 2935 2936 void update_mxcsr_status(CPUX86State *env) 2937 { 2938 uint32_t mxcsr = env->mxcsr; 2939 int rnd_type; 2940 2941 /* set rounding mode */ 2942 switch (mxcsr & SSE_RC_MASK) { 2943 default: 2944 case SSE_RC_NEAR: 2945 rnd_type = float_round_nearest_even; 2946 break; 2947 case SSE_RC_DOWN: 2948 rnd_type = float_round_down; 2949 break; 2950 case SSE_RC_UP: 2951 rnd_type = float_round_up; 2952 break; 2953 case SSE_RC_CHOP: 2954 rnd_type = float_round_to_zero; 2955 break; 2956 } 2957 set_float_rounding_mode(rnd_type, &env->sse_status); 2958 2959 /* Set exception flags. */ 2960 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 2961 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 2962 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 2963 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 2964 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 2965 &env->sse_status); 2966 2967 /* set denormals are zero */ 2968 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 2969 2970 /* set flush to zero */ 2971 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 2972 } 2973 2974 void update_mxcsr_from_sse_status(CPUX86State *env) 2975 { 2976 uint8_t flags = get_float_exception_flags(&env->sse_status); 2977 /* 2978 * The MXCSR denormal flag has opposite semantics to 2979 * float_flag_input_denormal (the softfloat code sets that flag 2980 * only when flushing input denormals to zero, but SSE sets it 2981 * only when not flushing them to zero), so is not converted 2982 * here. 2983 */ 2984 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 2985 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 2986 (flags & float_flag_overflow ? FPUS_OE : 0) | 2987 (flags & float_flag_underflow ? FPUS_UE : 0) | 2988 (flags & float_flag_inexact ? FPUS_PE : 0) | 2989 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 2990 0)); 2991 } 2992 2993 void helper_update_mxcsr(CPUX86State *env) 2994 { 2995 update_mxcsr_from_sse_status(env); 2996 } 2997 2998 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 2999 { 3000 cpu_set_mxcsr(env, val); 3001 } 3002 3003 void helper_enter_mmx(CPUX86State *env) 3004 { 3005 env->fpstt = 0; 3006 *(uint32_t *)(env->fptags) = 0; 3007 *(uint32_t *)(env->fptags + 4) = 0; 3008 } 3009 3010 void helper_emms(CPUX86State *env) 3011 { 3012 /* set to empty state */ 3013 *(uint32_t *)(env->fptags) = 0x01010101; 3014 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3015 } 3016 3017 /* XXX: suppress */ 3018 void helper_movq(CPUX86State *env, void *d, void *s) 3019 { 3020 *(uint64_t *)d = *(uint64_t *)s; 3021 } 3022 3023 #define SHIFT 0 3024 #include "ops_sse.h" 3025 3026 #define SHIFT 1 3027 #include "ops_sse.h" 3028