1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "fpu/softfloat-macros.h" 29 #include "helper-tcg.h" 30 31 /* float macros */ 32 #define FT0 (env->ft0) 33 #define ST0 (env->fpregs[env->fpstt].d) 34 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 35 #define ST1 ST(1) 36 37 #define FPU_RC_SHIFT 10 38 #define FPU_RC_MASK (3 << FPU_RC_SHIFT) 39 #define FPU_RC_NEAR 0x000 40 #define FPU_RC_DOWN 0x400 41 #define FPU_RC_UP 0x800 42 #define FPU_RC_CHOP 0xc00 43 44 #define MAXTAN 9223372036854775808.0 45 46 /* the following deal with x86 long double-precision numbers */ 47 #define MAXEXPD 0x7fff 48 #define EXPBIAS 16383 49 #define EXPD(fp) (fp.l.upper & 0x7fff) 50 #define SIGND(fp) ((fp.l.upper) & 0x8000) 51 #define MANTD(fp) (fp.l.lower) 52 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 53 54 #define FPUS_IE (1 << 0) 55 #define FPUS_DE (1 << 1) 56 #define FPUS_ZE (1 << 2) 57 #define FPUS_OE (1 << 3) 58 #define FPUS_UE (1 << 4) 59 #define FPUS_PE (1 << 5) 60 #define FPUS_SF (1 << 6) 61 #define FPUS_SE (1 << 7) 62 #define FPUS_B (1 << 15) 63 64 #define FPUC_EM 0x3f 65 66 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 67 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 68 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 69 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 70 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 71 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 72 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 73 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 74 75 static inline void fpush(CPUX86State *env) 76 { 77 env->fpstt = (env->fpstt - 1) & 7; 78 env->fptags[env->fpstt] = 0; /* validate stack entry */ 79 } 80 81 static inline void fpop(CPUX86State *env) 82 { 83 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 84 env->fpstt = (env->fpstt + 1) & 7; 85 } 86 87 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) 88 { 89 CPU_LDoubleU temp; 90 91 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 92 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 93 return temp.d; 94 } 95 96 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 97 uintptr_t retaddr) 98 { 99 CPU_LDoubleU temp; 100 101 temp.d = f; 102 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 103 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 104 } 105 106 /* x87 FPU helpers */ 107 108 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 109 { 110 union { 111 float64 f64; 112 double d; 113 } u; 114 115 u.f64 = floatx80_to_float64(a, &env->fp_status); 116 return u.d; 117 } 118 119 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 120 { 121 union { 122 float64 f64; 123 double d; 124 } u; 125 126 u.d = a; 127 return float64_to_floatx80(u.f64, &env->fp_status); 128 } 129 130 static void fpu_set_exception(CPUX86State *env, int mask) 131 { 132 env->fpus |= mask; 133 if (env->fpus & (~env->fpuc & FPUC_EM)) { 134 env->fpus |= FPUS_SE | FPUS_B; 135 } 136 } 137 138 static inline uint8_t save_exception_flags(CPUX86State *env) 139 { 140 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 141 set_float_exception_flags(0, &env->fp_status); 142 return old_flags; 143 } 144 145 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 146 { 147 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 148 float_raise(old_flags, &env->fp_status); 149 fpu_set_exception(env, 150 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 151 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 152 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 153 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 154 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 155 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 156 } 157 158 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 159 { 160 uint8_t old_flags = save_exception_flags(env); 161 floatx80 ret = floatx80_div(a, b, &env->fp_status); 162 merge_exception_flags(env, old_flags); 163 return ret; 164 } 165 166 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 167 { 168 if (env->cr[0] & CR0_NE_MASK) { 169 raise_exception_ra(env, EXCP10_COPR, retaddr); 170 } 171 #if !defined(CONFIG_USER_ONLY) 172 else { 173 fpu_check_raise_ferr_irq(env); 174 } 175 #endif 176 } 177 178 void helper_flds_FT0(CPUX86State *env, uint32_t val) 179 { 180 uint8_t old_flags = save_exception_flags(env); 181 union { 182 float32 f; 183 uint32_t i; 184 } u; 185 186 u.i = val; 187 FT0 = float32_to_floatx80(u.f, &env->fp_status); 188 merge_exception_flags(env, old_flags); 189 } 190 191 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 192 { 193 uint8_t old_flags = save_exception_flags(env); 194 union { 195 float64 f; 196 uint64_t i; 197 } u; 198 199 u.i = val; 200 FT0 = float64_to_floatx80(u.f, &env->fp_status); 201 merge_exception_flags(env, old_flags); 202 } 203 204 void helper_fildl_FT0(CPUX86State *env, int32_t val) 205 { 206 FT0 = int32_to_floatx80(val, &env->fp_status); 207 } 208 209 void helper_flds_ST0(CPUX86State *env, uint32_t val) 210 { 211 uint8_t old_flags = save_exception_flags(env); 212 int new_fpstt; 213 union { 214 float32 f; 215 uint32_t i; 216 } u; 217 218 new_fpstt = (env->fpstt - 1) & 7; 219 u.i = val; 220 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 221 env->fpstt = new_fpstt; 222 env->fptags[new_fpstt] = 0; /* validate stack entry */ 223 merge_exception_flags(env, old_flags); 224 } 225 226 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 227 { 228 uint8_t old_flags = save_exception_flags(env); 229 int new_fpstt; 230 union { 231 float64 f; 232 uint64_t i; 233 } u; 234 235 new_fpstt = (env->fpstt - 1) & 7; 236 u.i = val; 237 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 238 env->fpstt = new_fpstt; 239 env->fptags[new_fpstt] = 0; /* validate stack entry */ 240 merge_exception_flags(env, old_flags); 241 } 242 243 static FloatX80RoundPrec tmp_maximise_precision(float_status *st) 244 { 245 FloatX80RoundPrec old = get_floatx80_rounding_precision(st); 246 set_floatx80_rounding_precision(floatx80_precision_x, st); 247 return old; 248 } 249 250 void helper_fildl_ST0(CPUX86State *env, int32_t val) 251 { 252 int new_fpstt; 253 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 254 255 new_fpstt = (env->fpstt - 1) & 7; 256 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 257 env->fpstt = new_fpstt; 258 env->fptags[new_fpstt] = 0; /* validate stack entry */ 259 260 set_floatx80_rounding_precision(old, &env->fp_status); 261 } 262 263 void helper_fildll_ST0(CPUX86State *env, int64_t val) 264 { 265 int new_fpstt; 266 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 267 268 new_fpstt = (env->fpstt - 1) & 7; 269 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 270 env->fpstt = new_fpstt; 271 env->fptags[new_fpstt] = 0; /* validate stack entry */ 272 273 set_floatx80_rounding_precision(old, &env->fp_status); 274 } 275 276 uint32_t helper_fsts_ST0(CPUX86State *env) 277 { 278 uint8_t old_flags = save_exception_flags(env); 279 union { 280 float32 f; 281 uint32_t i; 282 } u; 283 284 u.f = floatx80_to_float32(ST0, &env->fp_status); 285 merge_exception_flags(env, old_flags); 286 return u.i; 287 } 288 289 uint64_t helper_fstl_ST0(CPUX86State *env) 290 { 291 uint8_t old_flags = save_exception_flags(env); 292 union { 293 float64 f; 294 uint64_t i; 295 } u; 296 297 u.f = floatx80_to_float64(ST0, &env->fp_status); 298 merge_exception_flags(env, old_flags); 299 return u.i; 300 } 301 302 int32_t helper_fist_ST0(CPUX86State *env) 303 { 304 uint8_t old_flags = save_exception_flags(env); 305 int32_t val; 306 307 val = floatx80_to_int32(ST0, &env->fp_status); 308 if (val != (int16_t)val) { 309 set_float_exception_flags(float_flag_invalid, &env->fp_status); 310 val = -32768; 311 } 312 merge_exception_flags(env, old_flags); 313 return val; 314 } 315 316 int32_t helper_fistl_ST0(CPUX86State *env) 317 { 318 uint8_t old_flags = save_exception_flags(env); 319 int32_t val; 320 321 val = floatx80_to_int32(ST0, &env->fp_status); 322 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 323 val = 0x80000000; 324 } 325 merge_exception_flags(env, old_flags); 326 return val; 327 } 328 329 int64_t helper_fistll_ST0(CPUX86State *env) 330 { 331 uint8_t old_flags = save_exception_flags(env); 332 int64_t val; 333 334 val = floatx80_to_int64(ST0, &env->fp_status); 335 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 336 val = 0x8000000000000000ULL; 337 } 338 merge_exception_flags(env, old_flags); 339 return val; 340 } 341 342 int32_t helper_fistt_ST0(CPUX86State *env) 343 { 344 uint8_t old_flags = save_exception_flags(env); 345 int32_t val; 346 347 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 348 if (val != (int16_t)val) { 349 set_float_exception_flags(float_flag_invalid, &env->fp_status); 350 val = -32768; 351 } 352 merge_exception_flags(env, old_flags); 353 return val; 354 } 355 356 int32_t helper_fisttl_ST0(CPUX86State *env) 357 { 358 uint8_t old_flags = save_exception_flags(env); 359 int32_t val; 360 361 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 362 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 363 val = 0x80000000; 364 } 365 merge_exception_flags(env, old_flags); 366 return val; 367 } 368 369 int64_t helper_fisttll_ST0(CPUX86State *env) 370 { 371 uint8_t old_flags = save_exception_flags(env); 372 int64_t val; 373 374 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 375 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 376 val = 0x8000000000000000ULL; 377 } 378 merge_exception_flags(env, old_flags); 379 return val; 380 } 381 382 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 383 { 384 int new_fpstt; 385 386 new_fpstt = (env->fpstt - 1) & 7; 387 env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); 388 env->fpstt = new_fpstt; 389 env->fptags[new_fpstt] = 0; /* validate stack entry */ 390 } 391 392 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 393 { 394 do_fstt(env, ST0, ptr, GETPC()); 395 } 396 397 void helper_fpush(CPUX86State *env) 398 { 399 fpush(env); 400 } 401 402 void helper_fpop(CPUX86State *env) 403 { 404 fpop(env); 405 } 406 407 void helper_fdecstp(CPUX86State *env) 408 { 409 env->fpstt = (env->fpstt - 1) & 7; 410 env->fpus &= ~0x4700; 411 } 412 413 void helper_fincstp(CPUX86State *env) 414 { 415 env->fpstt = (env->fpstt + 1) & 7; 416 env->fpus &= ~0x4700; 417 } 418 419 /* FPU move */ 420 421 void helper_ffree_STN(CPUX86State *env, int st_index) 422 { 423 env->fptags[(env->fpstt + st_index) & 7] = 1; 424 } 425 426 void helper_fmov_ST0_FT0(CPUX86State *env) 427 { 428 ST0 = FT0; 429 } 430 431 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 432 { 433 FT0 = ST(st_index); 434 } 435 436 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 437 { 438 ST0 = ST(st_index); 439 } 440 441 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 442 { 443 ST(st_index) = ST0; 444 } 445 446 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 447 { 448 floatx80 tmp; 449 450 tmp = ST(st_index); 451 ST(st_index) = ST0; 452 ST0 = tmp; 453 } 454 455 /* FPU operations */ 456 457 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 458 459 void helper_fcom_ST0_FT0(CPUX86State *env) 460 { 461 uint8_t old_flags = save_exception_flags(env); 462 FloatRelation ret; 463 464 ret = floatx80_compare(ST0, FT0, &env->fp_status); 465 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 466 merge_exception_flags(env, old_flags); 467 } 468 469 void helper_fucom_ST0_FT0(CPUX86State *env) 470 { 471 uint8_t old_flags = save_exception_flags(env); 472 FloatRelation ret; 473 474 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 475 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 476 merge_exception_flags(env, old_flags); 477 } 478 479 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 480 481 void helper_fcomi_ST0_FT0(CPUX86State *env) 482 { 483 uint8_t old_flags = save_exception_flags(env); 484 int eflags; 485 FloatRelation ret; 486 487 ret = floatx80_compare(ST0, FT0, &env->fp_status); 488 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 489 CC_SRC = eflags | fcomi_ccval[ret + 1]; 490 merge_exception_flags(env, old_flags); 491 } 492 493 void helper_fucomi_ST0_FT0(CPUX86State *env) 494 { 495 uint8_t old_flags = save_exception_flags(env); 496 int eflags; 497 FloatRelation ret; 498 499 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 500 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 501 CC_SRC = eflags | fcomi_ccval[ret + 1]; 502 merge_exception_flags(env, old_flags); 503 } 504 505 void helper_fadd_ST0_FT0(CPUX86State *env) 506 { 507 uint8_t old_flags = save_exception_flags(env); 508 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 509 merge_exception_flags(env, old_flags); 510 } 511 512 void helper_fmul_ST0_FT0(CPUX86State *env) 513 { 514 uint8_t old_flags = save_exception_flags(env); 515 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 516 merge_exception_flags(env, old_flags); 517 } 518 519 void helper_fsub_ST0_FT0(CPUX86State *env) 520 { 521 uint8_t old_flags = save_exception_flags(env); 522 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 523 merge_exception_flags(env, old_flags); 524 } 525 526 void helper_fsubr_ST0_FT0(CPUX86State *env) 527 { 528 uint8_t old_flags = save_exception_flags(env); 529 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 530 merge_exception_flags(env, old_flags); 531 } 532 533 void helper_fdiv_ST0_FT0(CPUX86State *env) 534 { 535 ST0 = helper_fdiv(env, ST0, FT0); 536 } 537 538 void helper_fdivr_ST0_FT0(CPUX86State *env) 539 { 540 ST0 = helper_fdiv(env, FT0, ST0); 541 } 542 543 /* fp operations between STN and ST0 */ 544 545 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 546 { 547 uint8_t old_flags = save_exception_flags(env); 548 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 549 merge_exception_flags(env, old_flags); 550 } 551 552 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 553 { 554 uint8_t old_flags = save_exception_flags(env); 555 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 556 merge_exception_flags(env, old_flags); 557 } 558 559 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 560 { 561 uint8_t old_flags = save_exception_flags(env); 562 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 563 merge_exception_flags(env, old_flags); 564 } 565 566 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 567 { 568 uint8_t old_flags = save_exception_flags(env); 569 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 570 merge_exception_flags(env, old_flags); 571 } 572 573 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 574 { 575 floatx80 *p; 576 577 p = &ST(st_index); 578 *p = helper_fdiv(env, *p, ST0); 579 } 580 581 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 582 { 583 floatx80 *p; 584 585 p = &ST(st_index); 586 *p = helper_fdiv(env, ST0, *p); 587 } 588 589 /* misc FPU operations */ 590 void helper_fchs_ST0(CPUX86State *env) 591 { 592 ST0 = floatx80_chs(ST0); 593 } 594 595 void helper_fabs_ST0(CPUX86State *env) 596 { 597 ST0 = floatx80_abs(ST0); 598 } 599 600 void helper_fld1_ST0(CPUX86State *env) 601 { 602 ST0 = floatx80_one; 603 } 604 605 void helper_fldl2t_ST0(CPUX86State *env) 606 { 607 switch (env->fpuc & FPU_RC_MASK) { 608 case FPU_RC_UP: 609 ST0 = floatx80_l2t_u; 610 break; 611 default: 612 ST0 = floatx80_l2t; 613 break; 614 } 615 } 616 617 void helper_fldl2e_ST0(CPUX86State *env) 618 { 619 switch (env->fpuc & FPU_RC_MASK) { 620 case FPU_RC_DOWN: 621 case FPU_RC_CHOP: 622 ST0 = floatx80_l2e_d; 623 break; 624 default: 625 ST0 = floatx80_l2e; 626 break; 627 } 628 } 629 630 void helper_fldpi_ST0(CPUX86State *env) 631 { 632 switch (env->fpuc & FPU_RC_MASK) { 633 case FPU_RC_DOWN: 634 case FPU_RC_CHOP: 635 ST0 = floatx80_pi_d; 636 break; 637 default: 638 ST0 = floatx80_pi; 639 break; 640 } 641 } 642 643 void helper_fldlg2_ST0(CPUX86State *env) 644 { 645 switch (env->fpuc & FPU_RC_MASK) { 646 case FPU_RC_DOWN: 647 case FPU_RC_CHOP: 648 ST0 = floatx80_lg2_d; 649 break; 650 default: 651 ST0 = floatx80_lg2; 652 break; 653 } 654 } 655 656 void helper_fldln2_ST0(CPUX86State *env) 657 { 658 switch (env->fpuc & FPU_RC_MASK) { 659 case FPU_RC_DOWN: 660 case FPU_RC_CHOP: 661 ST0 = floatx80_ln2_d; 662 break; 663 default: 664 ST0 = floatx80_ln2; 665 break; 666 } 667 } 668 669 void helper_fldz_ST0(CPUX86State *env) 670 { 671 ST0 = floatx80_zero; 672 } 673 674 void helper_fldz_FT0(CPUX86State *env) 675 { 676 FT0 = floatx80_zero; 677 } 678 679 uint32_t helper_fnstsw(CPUX86State *env) 680 { 681 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 682 } 683 684 uint32_t helper_fnstcw(CPUX86State *env) 685 { 686 return env->fpuc; 687 } 688 689 static void set_x86_rounding_mode(unsigned mode, float_status *status) 690 { 691 static FloatRoundMode x86_round_mode[4] = { 692 float_round_nearest_even, 693 float_round_down, 694 float_round_up, 695 float_round_to_zero 696 }; 697 assert(mode < ARRAY_SIZE(x86_round_mode)); 698 set_float_rounding_mode(x86_round_mode[mode], status); 699 } 700 701 void update_fp_status(CPUX86State *env) 702 { 703 int rnd_mode; 704 FloatX80RoundPrec rnd_prec; 705 706 /* set rounding mode */ 707 rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; 708 set_x86_rounding_mode(rnd_mode, &env->fp_status); 709 710 switch ((env->fpuc >> 8) & 3) { 711 case 0: 712 rnd_prec = floatx80_precision_s; 713 break; 714 case 2: 715 rnd_prec = floatx80_precision_d; 716 break; 717 case 3: 718 default: 719 rnd_prec = floatx80_precision_x; 720 break; 721 } 722 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 723 } 724 725 void helper_fldcw(CPUX86State *env, uint32_t val) 726 { 727 cpu_set_fpuc(env, val); 728 } 729 730 void helper_fclex(CPUX86State *env) 731 { 732 env->fpus &= 0x7f00; 733 } 734 735 void helper_fwait(CPUX86State *env) 736 { 737 if (env->fpus & FPUS_SE) { 738 fpu_raise_exception(env, GETPC()); 739 } 740 } 741 742 static void do_fninit(CPUX86State *env) 743 { 744 env->fpus = 0; 745 env->fpstt = 0; 746 env->fpcs = 0; 747 env->fpds = 0; 748 env->fpip = 0; 749 env->fpdp = 0; 750 cpu_set_fpuc(env, 0x37f); 751 env->fptags[0] = 1; 752 env->fptags[1] = 1; 753 env->fptags[2] = 1; 754 env->fptags[3] = 1; 755 env->fptags[4] = 1; 756 env->fptags[5] = 1; 757 env->fptags[6] = 1; 758 env->fptags[7] = 1; 759 } 760 761 void helper_fninit(CPUX86State *env) 762 { 763 do_fninit(env); 764 } 765 766 /* BCD ops */ 767 768 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 769 { 770 floatx80 tmp; 771 uint64_t val; 772 unsigned int v; 773 int i; 774 775 val = 0; 776 for (i = 8; i >= 0; i--) { 777 v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 778 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 779 } 780 tmp = int64_to_floatx80(val, &env->fp_status); 781 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 782 tmp = floatx80_chs(tmp); 783 } 784 fpush(env); 785 ST0 = tmp; 786 } 787 788 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 789 { 790 uint8_t old_flags = save_exception_flags(env); 791 int v; 792 target_ulong mem_ref, mem_end; 793 int64_t val; 794 CPU_LDoubleU temp; 795 796 temp.d = ST0; 797 798 val = floatx80_to_int64(ST0, &env->fp_status); 799 mem_ref = ptr; 800 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 801 set_float_exception_flags(float_flag_invalid, &env->fp_status); 802 while (mem_ref < ptr + 7) { 803 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 804 } 805 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 806 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 807 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 808 merge_exception_flags(env, old_flags); 809 return; 810 } 811 mem_end = mem_ref + 9; 812 if (SIGND(temp)) { 813 cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 814 val = -val; 815 } else { 816 cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 817 } 818 while (mem_ref < mem_end) { 819 if (val == 0) { 820 break; 821 } 822 v = val % 100; 823 val = val / 100; 824 v = ((v / 10) << 4) | (v % 10); 825 cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 826 } 827 while (mem_ref < mem_end) { 828 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 829 } 830 merge_exception_flags(env, old_flags); 831 } 832 833 /* 128-bit significand of log(2). */ 834 #define ln2_sig_high 0xb17217f7d1cf79abULL 835 #define ln2_sig_low 0xc9e3b39803f2f6afULL 836 837 /* 838 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 839 * the interval [-1/64, 1/64]. 840 */ 841 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 842 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 843 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 844 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 845 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 846 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 847 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 848 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 849 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 850 851 struct f2xm1_data { 852 /* 853 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 854 * are very close to exact floatx80 values. 855 */ 856 floatx80 t; 857 /* The value of 2^t. */ 858 floatx80 exp2; 859 /* The value of 2^t - 1. */ 860 floatx80 exp2m1; 861 }; 862 863 static const struct f2xm1_data f2xm1_table[65] = { 864 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 865 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 866 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 867 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 868 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 869 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 870 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 871 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 872 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 873 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 874 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 875 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 876 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 877 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 878 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 879 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 880 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 881 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 882 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 883 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 884 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 885 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 886 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 887 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 888 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 889 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 890 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 891 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 892 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 893 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 894 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 895 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 896 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 897 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 898 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 899 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 900 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 901 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 902 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 903 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 904 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 905 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 906 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 907 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 908 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 909 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 910 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 911 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 912 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 913 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 914 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 915 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 916 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 917 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 918 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 919 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 920 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 921 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 922 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 923 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 924 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 925 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 926 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 927 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 928 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 929 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 930 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 931 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 932 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 933 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 934 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 935 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 936 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 937 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 938 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 939 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 940 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 941 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 942 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 943 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 944 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 945 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 946 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 947 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 948 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 949 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 950 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 951 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 952 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 953 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 954 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 955 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 956 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 957 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 958 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 959 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 960 { floatx80_zero_init, 961 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 962 floatx80_zero_init }, 963 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 964 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 965 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 966 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 967 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 968 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 969 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 970 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 971 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 972 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 973 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 974 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 975 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 976 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 977 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 978 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 979 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 980 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 981 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 982 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 983 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 984 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 985 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 986 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 987 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 988 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 989 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 990 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 991 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 992 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 993 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 994 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 995 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 996 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 997 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 998 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 999 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1000 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1001 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1002 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1003 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1004 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1005 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1006 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1007 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1008 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1009 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1010 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1011 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1012 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1013 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1014 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1015 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1016 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1017 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1018 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1019 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1020 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1021 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1022 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1023 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1024 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1025 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1026 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1027 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1028 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1029 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1030 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1031 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1032 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1033 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1034 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1035 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1036 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1037 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1038 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1039 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1040 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1041 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1042 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1043 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1044 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1045 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1046 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1047 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1048 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1049 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1050 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1051 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1052 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1053 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1054 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1055 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1056 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1057 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1058 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1059 }; 1060 1061 void helper_f2xm1(CPUX86State *env) 1062 { 1063 uint8_t old_flags = save_exception_flags(env); 1064 uint64_t sig = extractFloatx80Frac(ST0); 1065 int32_t exp = extractFloatx80Exp(ST0); 1066 bool sign = extractFloatx80Sign(ST0); 1067 1068 if (floatx80_invalid_encoding(ST0)) { 1069 float_raise(float_flag_invalid, &env->fp_status); 1070 ST0 = floatx80_default_nan(&env->fp_status); 1071 } else if (floatx80_is_any_nan(ST0)) { 1072 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1073 float_raise(float_flag_invalid, &env->fp_status); 1074 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1075 } 1076 } else if (exp > 0x3fff || 1077 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1078 /* Out of range for the instruction, treat as invalid. */ 1079 float_raise(float_flag_invalid, &env->fp_status); 1080 ST0 = floatx80_default_nan(&env->fp_status); 1081 } else if (exp == 0x3fff) { 1082 /* Argument 1 or -1, exact result 1 or -0.5. */ 1083 if (sign) { 1084 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1085 } 1086 } else if (exp < 0x3fb0) { 1087 if (!floatx80_is_zero(ST0)) { 1088 /* 1089 * Multiplying the argument by an extra-precision version 1090 * of log(2) is sufficiently precise. Zero arguments are 1091 * returned unchanged. 1092 */ 1093 uint64_t sig0, sig1, sig2; 1094 if (exp == 0) { 1095 normalizeFloatx80Subnormal(sig, &exp, &sig); 1096 } 1097 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1098 &sig2); 1099 /* This result is inexact. */ 1100 sig1 |= 1; 1101 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1102 sign, exp, sig0, sig1, 1103 &env->fp_status); 1104 } 1105 } else { 1106 floatx80 tmp, y, accum; 1107 bool asign, bsign; 1108 int32_t n, aexp, bexp; 1109 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1110 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1111 FloatX80RoundPrec save_prec = 1112 env->fp_status.floatx80_rounding_precision; 1113 env->fp_status.float_rounding_mode = float_round_nearest_even; 1114 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1115 1116 /* Find the nearest multiple of 1/32 to the argument. */ 1117 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1118 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1119 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1120 1121 if (floatx80_is_zero(y)) { 1122 /* 1123 * Use the value of 2^t - 1 from the table, to avoid 1124 * needing to special-case zero as a result of 1125 * multiplication below. 1126 */ 1127 ST0 = f2xm1_table[n].t; 1128 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1129 env->fp_status.float_rounding_mode = save_mode; 1130 } else { 1131 /* 1132 * Compute the lower parts of a polynomial expansion for 1133 * (2^y - 1) / y. 1134 */ 1135 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1136 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1137 accum = floatx80_mul(accum, y, &env->fp_status); 1138 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1139 accum = floatx80_mul(accum, y, &env->fp_status); 1140 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1141 accum = floatx80_mul(accum, y, &env->fp_status); 1142 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1143 accum = floatx80_mul(accum, y, &env->fp_status); 1144 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1145 accum = floatx80_mul(accum, y, &env->fp_status); 1146 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1147 accum = floatx80_mul(accum, y, &env->fp_status); 1148 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1149 1150 /* 1151 * The full polynomial expansion is f2xm1_coeff_0 + accum 1152 * (where accum has much lower magnitude, and so, in 1153 * particular, carry out of the addition is not possible). 1154 * (This expansion is only accurate to about 70 bits, not 1155 * 128 bits.) 1156 */ 1157 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1158 asign = extractFloatx80Sign(f2xm1_coeff_0); 1159 shift128RightJamming(extractFloatx80Frac(accum), 0, 1160 aexp - extractFloatx80Exp(accum), 1161 &asig0, &asig1); 1162 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1163 bsig1 = 0; 1164 if (asign == extractFloatx80Sign(accum)) { 1165 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1166 } else { 1167 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1168 } 1169 /* And thus compute an approximation to 2^y - 1. */ 1170 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1171 &asig0, &asig1, &asig2); 1172 aexp += extractFloatx80Exp(y) - 0x3ffe; 1173 asign ^= extractFloatx80Sign(y); 1174 if (n != 32) { 1175 /* 1176 * Multiply this by the precomputed value of 2^t and 1177 * add that of 2^t - 1. 1178 */ 1179 mul128By64To192(asig0, asig1, 1180 extractFloatx80Frac(f2xm1_table[n].exp2), 1181 &asig0, &asig1, &asig2); 1182 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1183 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1184 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1185 bsig1 = 0; 1186 if (bexp < aexp) { 1187 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1188 &bsig0, &bsig1); 1189 } else if (aexp < bexp) { 1190 shift128RightJamming(asig0, asig1, bexp - aexp, 1191 &asig0, &asig1); 1192 aexp = bexp; 1193 } 1194 /* The sign of 2^t - 1 is always that of the result. */ 1195 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1196 if (asign == bsign) { 1197 /* Avoid possible carry out of the addition. */ 1198 shift128RightJamming(asig0, asig1, 1, 1199 &asig0, &asig1); 1200 shift128RightJamming(bsig0, bsig1, 1, 1201 &bsig0, &bsig1); 1202 ++aexp; 1203 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1204 } else { 1205 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1206 asign = bsign; 1207 } 1208 } 1209 env->fp_status.float_rounding_mode = save_mode; 1210 /* This result is inexact. */ 1211 asig1 |= 1; 1212 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1213 asign, aexp, asig0, asig1, 1214 &env->fp_status); 1215 } 1216 1217 env->fp_status.floatx80_rounding_precision = save_prec; 1218 } 1219 merge_exception_flags(env, old_flags); 1220 } 1221 1222 void helper_fptan(CPUX86State *env) 1223 { 1224 double fptemp = floatx80_to_double(env, ST0); 1225 1226 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1227 env->fpus |= 0x400; 1228 } else { 1229 fptemp = tan(fptemp); 1230 ST0 = double_to_floatx80(env, fptemp); 1231 fpush(env); 1232 ST0 = floatx80_one; 1233 env->fpus &= ~0x400; /* C2 <-- 0 */ 1234 /* the above code is for |arg| < 2**52 only */ 1235 } 1236 } 1237 1238 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1239 #define pi_4_exp 0x3ffe 1240 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1241 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1242 #define pi_2_exp 0x3fff 1243 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1244 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1245 #define pi_34_exp 0x4000 1246 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1247 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1248 #define pi_exp 0x4000 1249 #define pi_sig_high 0xc90fdaa22168c234ULL 1250 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1251 1252 /* 1253 * Polynomial coefficients for an approximation to atan(x), with only 1254 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1255 * for some other approximations, no low part is needed for the first 1256 * coefficient here to achieve a sufficiently accurate result, because 1257 * the coefficient in this minimax approximation is very close to 1258 * exactly 1.) 1259 */ 1260 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1261 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1262 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1263 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1264 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1265 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1266 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1267 1268 struct fpatan_data { 1269 /* High and low parts of atan(x). */ 1270 floatx80 atan_high, atan_low; 1271 }; 1272 1273 static const struct fpatan_data fpatan_table[9] = { 1274 { floatx80_zero_init, 1275 floatx80_zero_init }, 1276 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1277 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1278 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1279 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1280 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1281 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1282 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1283 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1284 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1285 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1286 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1287 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1288 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1289 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1290 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1291 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1292 }; 1293 1294 void helper_fpatan(CPUX86State *env) 1295 { 1296 uint8_t old_flags = save_exception_flags(env); 1297 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1298 int32_t arg0_exp = extractFloatx80Exp(ST0); 1299 bool arg0_sign = extractFloatx80Sign(ST0); 1300 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1301 int32_t arg1_exp = extractFloatx80Exp(ST1); 1302 bool arg1_sign = extractFloatx80Sign(ST1); 1303 1304 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1305 float_raise(float_flag_invalid, &env->fp_status); 1306 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1307 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1308 float_raise(float_flag_invalid, &env->fp_status); 1309 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1310 } else if (floatx80_invalid_encoding(ST0) || 1311 floatx80_invalid_encoding(ST1)) { 1312 float_raise(float_flag_invalid, &env->fp_status); 1313 ST1 = floatx80_default_nan(&env->fp_status); 1314 } else if (floatx80_is_any_nan(ST0)) { 1315 ST1 = ST0; 1316 } else if (floatx80_is_any_nan(ST1)) { 1317 /* Pass this NaN through. */ 1318 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1319 /* Pass this zero through. */ 1320 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1321 arg0_exp - arg1_exp >= 80) && 1322 !arg0_sign) { 1323 /* 1324 * Dividing ST1 by ST0 gives the correct result up to 1325 * rounding, and avoids spurious underflow exceptions that 1326 * might result from passing some small values through the 1327 * polynomial approximation, but if a finite nonzero result of 1328 * division is exact, the result of fpatan is still inexact 1329 * (and underflowing where appropriate). 1330 */ 1331 FloatX80RoundPrec save_prec = 1332 env->fp_status.floatx80_rounding_precision; 1333 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1334 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1335 env->fp_status.floatx80_rounding_precision = save_prec; 1336 if (!floatx80_is_zero(ST1) && 1337 !(get_float_exception_flags(&env->fp_status) & 1338 float_flag_inexact)) { 1339 /* 1340 * The mathematical result is very slightly closer to zero 1341 * than this exact result. Round a value with the 1342 * significand adjusted accordingly to get the correct 1343 * exceptions, and possibly an adjusted result depending 1344 * on the rounding mode. 1345 */ 1346 uint64_t sig = extractFloatx80Frac(ST1); 1347 int32_t exp = extractFloatx80Exp(ST1); 1348 bool sign = extractFloatx80Sign(ST1); 1349 if (exp == 0) { 1350 normalizeFloatx80Subnormal(sig, &exp, &sig); 1351 } 1352 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1353 sign, exp, sig - 1, 1354 -1, &env->fp_status); 1355 } 1356 } else { 1357 /* The result is inexact. */ 1358 bool rsign = arg1_sign; 1359 int32_t rexp; 1360 uint64_t rsig0, rsig1; 1361 if (floatx80_is_zero(ST1)) { 1362 /* 1363 * ST0 is negative. The result is pi with the sign of 1364 * ST1. 1365 */ 1366 rexp = pi_exp; 1367 rsig0 = pi_sig_high; 1368 rsig1 = pi_sig_low; 1369 } else if (floatx80_is_infinity(ST1)) { 1370 if (floatx80_is_infinity(ST0)) { 1371 if (arg0_sign) { 1372 rexp = pi_34_exp; 1373 rsig0 = pi_34_sig_high; 1374 rsig1 = pi_34_sig_low; 1375 } else { 1376 rexp = pi_4_exp; 1377 rsig0 = pi_4_sig_high; 1378 rsig1 = pi_4_sig_low; 1379 } 1380 } else { 1381 rexp = pi_2_exp; 1382 rsig0 = pi_2_sig_high; 1383 rsig1 = pi_2_sig_low; 1384 } 1385 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1386 rexp = pi_2_exp; 1387 rsig0 = pi_2_sig_high; 1388 rsig1 = pi_2_sig_low; 1389 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1390 /* ST0 is negative. */ 1391 rexp = pi_exp; 1392 rsig0 = pi_sig_high; 1393 rsig1 = pi_sig_low; 1394 } else { 1395 /* 1396 * ST0 and ST1 are finite, nonzero and with exponents not 1397 * too far apart. 1398 */ 1399 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1400 int32_t azexp, axexp; 1401 bool adj_sub, ysign, zsign; 1402 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1403 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1404 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1405 uint64_t azsig0, azsig1; 1406 uint64_t azsig2, azsig3, axsig0, axsig1; 1407 floatx80 x8; 1408 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1409 FloatX80RoundPrec save_prec = 1410 env->fp_status.floatx80_rounding_precision; 1411 env->fp_status.float_rounding_mode = float_round_nearest_even; 1412 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1413 1414 if (arg0_exp == 0) { 1415 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1416 } 1417 if (arg1_exp == 0) { 1418 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1419 } 1420 if (arg0_exp > arg1_exp || 1421 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1422 /* Work with abs(ST1) / abs(ST0). */ 1423 num_exp = arg1_exp; 1424 num_sig = arg1_sig; 1425 den_exp = arg0_exp; 1426 den_sig = arg0_sig; 1427 if (arg0_sign) { 1428 /* The result is subtracted from pi. */ 1429 adj_exp = pi_exp; 1430 adj_sig0 = pi_sig_high; 1431 adj_sig1 = pi_sig_low; 1432 adj_sub = true; 1433 } else { 1434 /* The result is used as-is. */ 1435 adj_exp = 0; 1436 adj_sig0 = 0; 1437 adj_sig1 = 0; 1438 adj_sub = false; 1439 } 1440 } else { 1441 /* Work with abs(ST0) / abs(ST1). */ 1442 num_exp = arg0_exp; 1443 num_sig = arg0_sig; 1444 den_exp = arg1_exp; 1445 den_sig = arg1_sig; 1446 /* The result is added to or subtracted from pi/2. */ 1447 adj_exp = pi_2_exp; 1448 adj_sig0 = pi_2_sig_high; 1449 adj_sig1 = pi_2_sig_low; 1450 adj_sub = !arg0_sign; 1451 } 1452 1453 /* 1454 * Compute x = num/den, where 0 < x <= 1 and x is not too 1455 * small. 1456 */ 1457 xexp = num_exp - den_exp + 0x3ffe; 1458 remsig0 = num_sig; 1459 remsig1 = 0; 1460 if (den_sig <= remsig0) { 1461 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1462 ++xexp; 1463 } 1464 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1465 mul64To128(den_sig, xsig0, &msig0, &msig1); 1466 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1467 while ((int64_t) remsig0 < 0) { 1468 --xsig0; 1469 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1470 } 1471 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1472 /* 1473 * No need to correct any estimation error in xsig1; even 1474 * with such error, it is accurate enough. 1475 */ 1476 1477 /* 1478 * Split x as x = t + y, where t = n/8 is the nearest 1479 * multiple of 1/8 to x. 1480 */ 1481 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1482 false, xexp + 3, xsig0, 1483 xsig1, &env->fp_status); 1484 n = floatx80_to_int32(x8, &env->fp_status); 1485 if (n == 0) { 1486 ysign = false; 1487 yexp = xexp; 1488 ysig0 = xsig0; 1489 ysig1 = xsig1; 1490 texp = 0; 1491 tsig = 0; 1492 } else { 1493 int shift = clz32(n) + 32; 1494 texp = 0x403b - shift; 1495 tsig = n; 1496 tsig <<= shift; 1497 if (texp == xexp) { 1498 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1499 if ((int64_t) ysig0 >= 0) { 1500 ysign = false; 1501 if (ysig0 == 0) { 1502 if (ysig1 == 0) { 1503 yexp = 0; 1504 } else { 1505 shift = clz64(ysig1) + 64; 1506 yexp = xexp - shift; 1507 shift128Left(ysig0, ysig1, shift, 1508 &ysig0, &ysig1); 1509 } 1510 } else { 1511 shift = clz64(ysig0); 1512 yexp = xexp - shift; 1513 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1514 } 1515 } else { 1516 ysign = true; 1517 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1518 if (ysig0 == 0) { 1519 shift = clz64(ysig1) + 64; 1520 } else { 1521 shift = clz64(ysig0); 1522 } 1523 yexp = xexp - shift; 1524 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1525 } 1526 } else { 1527 /* 1528 * t's exponent must be greater than x's because t 1529 * is positive and the nearest multiple of 1/8 to 1530 * x, and if x has a greater exponent, the power 1531 * of 2 with that exponent is also a multiple of 1532 * 1/8. 1533 */ 1534 uint64_t usig0, usig1; 1535 shift128RightJamming(xsig0, xsig1, texp - xexp, 1536 &usig0, &usig1); 1537 ysign = true; 1538 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1539 if (ysig0 == 0) { 1540 shift = clz64(ysig1) + 64; 1541 } else { 1542 shift = clz64(ysig0); 1543 } 1544 yexp = texp - shift; 1545 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1546 } 1547 } 1548 1549 /* 1550 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1551 * arctan(z). 1552 */ 1553 zsign = ysign; 1554 if (texp == 0 || yexp == 0) { 1555 zexp = yexp; 1556 zsig0 = ysig0; 1557 zsig1 = ysig1; 1558 } else { 1559 /* 1560 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1561 */ 1562 int32_t dexp = texp + xexp - 0x3ffe; 1563 uint64_t dsig0, dsig1, dsig2; 1564 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1565 /* 1566 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1567 * bit). Add 1 to produce the denominator 1+tx. 1568 */ 1569 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1570 &dsig0, &dsig1); 1571 dsig0 |= 0x8000000000000000ULL; 1572 zexp = yexp - 1; 1573 remsig0 = ysig0; 1574 remsig1 = ysig1; 1575 remsig2 = 0; 1576 if (dsig0 <= remsig0) { 1577 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1578 ++zexp; 1579 } 1580 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1581 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1582 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1583 &remsig0, &remsig1, &remsig2); 1584 while ((int64_t) remsig0 < 0) { 1585 --zsig0; 1586 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1587 &remsig0, &remsig1, &remsig2); 1588 } 1589 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1590 /* No need to correct any estimation error in zsig1. */ 1591 } 1592 1593 if (zexp == 0) { 1594 azexp = 0; 1595 azsig0 = 0; 1596 azsig1 = 0; 1597 } else { 1598 floatx80 z2, accum; 1599 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1600 /* Compute z^2. */ 1601 mul128To256(zsig0, zsig1, zsig0, zsig1, 1602 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1603 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1604 zexp + zexp - 0x3ffe, 1605 z2sig0, z2sig1, 1606 &env->fp_status); 1607 1608 /* Compute the lower parts of the polynomial expansion. */ 1609 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1610 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1611 accum = floatx80_mul(accum, z2, &env->fp_status); 1612 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1613 accum = floatx80_mul(accum, z2, &env->fp_status); 1614 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1615 accum = floatx80_mul(accum, z2, &env->fp_status); 1616 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1617 accum = floatx80_mul(accum, z2, &env->fp_status); 1618 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1619 accum = floatx80_mul(accum, z2, &env->fp_status); 1620 1621 /* 1622 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1623 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1624 */ 1625 aexp = extractFloatx80Exp(fpatan_coeff_0); 1626 shift128RightJamming(extractFloatx80Frac(accum), 0, 1627 aexp - extractFloatx80Exp(accum), 1628 &asig0, &asig1); 1629 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1630 &asig0, &asig1); 1631 /* Multiply by z to compute arctan(z). */ 1632 azexp = aexp + zexp - 0x3ffe; 1633 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1634 &azsig2, &azsig3); 1635 } 1636 1637 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1638 if (texp == 0) { 1639 /* z is positive. */ 1640 axexp = azexp; 1641 axsig0 = azsig0; 1642 axsig1 = azsig1; 1643 } else { 1644 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1645 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1646 uint64_t low_sig0 = 1647 extractFloatx80Frac(fpatan_table[n].atan_low); 1648 uint64_t low_sig1 = 0; 1649 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1650 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1651 axsig1 = 0; 1652 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1653 &low_sig0, &low_sig1); 1654 if (low_sign) { 1655 sub128(axsig0, axsig1, low_sig0, low_sig1, 1656 &axsig0, &axsig1); 1657 } else { 1658 add128(axsig0, axsig1, low_sig0, low_sig1, 1659 &axsig0, &axsig1); 1660 } 1661 if (azexp >= axexp) { 1662 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1663 &axsig0, &axsig1); 1664 axexp = azexp + 1; 1665 shift128RightJamming(azsig0, azsig1, 1, 1666 &azsig0, &azsig1); 1667 } else { 1668 shift128RightJamming(axsig0, axsig1, 1, 1669 &axsig0, &axsig1); 1670 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1671 &azsig0, &azsig1); 1672 ++axexp; 1673 } 1674 if (zsign) { 1675 sub128(axsig0, axsig1, azsig0, azsig1, 1676 &axsig0, &axsig1); 1677 } else { 1678 add128(axsig0, axsig1, azsig0, azsig1, 1679 &axsig0, &axsig1); 1680 } 1681 } 1682 1683 if (adj_exp == 0) { 1684 rexp = axexp; 1685 rsig0 = axsig0; 1686 rsig1 = axsig1; 1687 } else { 1688 /* 1689 * Add or subtract arctan(x) (exponent axexp, 1690 * significand axsig0 and axsig1, positive, not 1691 * necessarily normalized) to the number given by 1692 * adj_exp, adj_sig0 and adj_sig1, according to 1693 * adj_sub. 1694 */ 1695 if (adj_exp >= axexp) { 1696 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1697 &axsig0, &axsig1); 1698 rexp = adj_exp + 1; 1699 shift128RightJamming(adj_sig0, adj_sig1, 1, 1700 &adj_sig0, &adj_sig1); 1701 } else { 1702 shift128RightJamming(axsig0, axsig1, 1, 1703 &axsig0, &axsig1); 1704 shift128RightJamming(adj_sig0, adj_sig1, 1705 axexp - adj_exp + 1, 1706 &adj_sig0, &adj_sig1); 1707 rexp = axexp + 1; 1708 } 1709 if (adj_sub) { 1710 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1711 &rsig0, &rsig1); 1712 } else { 1713 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1714 &rsig0, &rsig1); 1715 } 1716 } 1717 1718 env->fp_status.float_rounding_mode = save_mode; 1719 env->fp_status.floatx80_rounding_precision = save_prec; 1720 } 1721 /* This result is inexact. */ 1722 rsig1 |= 1; 1723 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1724 rsig0, rsig1, &env->fp_status); 1725 } 1726 1727 fpop(env); 1728 merge_exception_flags(env, old_flags); 1729 } 1730 1731 void helper_fxtract(CPUX86State *env) 1732 { 1733 uint8_t old_flags = save_exception_flags(env); 1734 CPU_LDoubleU temp; 1735 1736 temp.d = ST0; 1737 1738 if (floatx80_is_zero(ST0)) { 1739 /* Easy way to generate -inf and raising division by 0 exception */ 1740 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1741 &env->fp_status); 1742 fpush(env); 1743 ST0 = temp.d; 1744 } else if (floatx80_invalid_encoding(ST0)) { 1745 float_raise(float_flag_invalid, &env->fp_status); 1746 ST0 = floatx80_default_nan(&env->fp_status); 1747 fpush(env); 1748 ST0 = ST1; 1749 } else if (floatx80_is_any_nan(ST0)) { 1750 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1751 float_raise(float_flag_invalid, &env->fp_status); 1752 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1753 } 1754 fpush(env); 1755 ST0 = ST1; 1756 } else if (floatx80_is_infinity(ST0)) { 1757 fpush(env); 1758 ST0 = ST1; 1759 ST1 = floatx80_infinity; 1760 } else { 1761 int expdif; 1762 1763 if (EXPD(temp) == 0) { 1764 int shift = clz64(temp.l.lower); 1765 temp.l.lower <<= shift; 1766 expdif = 1 - EXPBIAS - shift; 1767 float_raise(float_flag_input_denormal, &env->fp_status); 1768 } else { 1769 expdif = EXPD(temp) - EXPBIAS; 1770 } 1771 /* DP exponent bias */ 1772 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1773 fpush(env); 1774 BIASEXPONENT(temp); 1775 ST0 = temp.d; 1776 } 1777 merge_exception_flags(env, old_flags); 1778 } 1779 1780 static void helper_fprem_common(CPUX86State *env, bool mod) 1781 { 1782 uint8_t old_flags = save_exception_flags(env); 1783 uint64_t quotient; 1784 CPU_LDoubleU temp0, temp1; 1785 int exp0, exp1, expdiff; 1786 1787 temp0.d = ST0; 1788 temp1.d = ST1; 1789 exp0 = EXPD(temp0); 1790 exp1 = EXPD(temp1); 1791 1792 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1793 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1794 exp0 == 0x7fff || exp1 == 0x7fff || 1795 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1796 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1797 } else { 1798 if (exp0 == 0) { 1799 exp0 = 1 - clz64(temp0.l.lower); 1800 } 1801 if (exp1 == 0) { 1802 exp1 = 1 - clz64(temp1.l.lower); 1803 } 1804 expdiff = exp0 - exp1; 1805 if (expdiff < 64) { 1806 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1807 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1808 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1809 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1810 } else { 1811 /* 1812 * Partial remainder. This choice of how many bits to 1813 * process at once is specified in AMD instruction set 1814 * manuals, and empirically is followed by Intel 1815 * processors as well; it ensures that the final remainder 1816 * operation in a loop does produce the correct low three 1817 * bits of the quotient. AMD manuals specify that the 1818 * flags other than C2 are cleared, and empirically Intel 1819 * processors clear them as well. 1820 */ 1821 int n = 32 + (expdiff % 32); 1822 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1823 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1824 env->fpus |= 0x400; /* C2 <-- 1 */ 1825 } 1826 } 1827 merge_exception_flags(env, old_flags); 1828 } 1829 1830 void helper_fprem1(CPUX86State *env) 1831 { 1832 helper_fprem_common(env, false); 1833 } 1834 1835 void helper_fprem(CPUX86State *env) 1836 { 1837 helper_fprem_common(env, true); 1838 } 1839 1840 /* 128-bit significand of log2(e). */ 1841 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1842 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1843 1844 /* 1845 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1846 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1847 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1848 * interval [sqrt(2)/2, sqrt(2)]. 1849 */ 1850 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1851 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1852 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1853 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1854 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1855 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1856 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1857 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1858 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1859 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1860 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1861 1862 /* 1863 * Compute an approximation of log2(1+arg), where 1+arg is in the 1864 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1865 * function is called, rounding precision is set to 80 and the 1866 * round-to-nearest mode is in effect. arg must not be exactly zero, 1867 * and must not be so close to zero that underflow might occur. 1868 */ 1869 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1870 uint64_t *sig0, uint64_t *sig1) 1871 { 1872 uint64_t arg0_sig = extractFloatx80Frac(arg); 1873 int32_t arg0_exp = extractFloatx80Exp(arg); 1874 bool arg0_sign = extractFloatx80Sign(arg); 1875 bool asign; 1876 int32_t dexp, texp, aexp; 1877 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1878 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1879 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1880 floatx80 t2, accum; 1881 1882 /* 1883 * Compute an approximation of arg/(2+arg), with extra precision, 1884 * as the argument to a polynomial approximation. The extra 1885 * precision is only needed for the first term of the 1886 * approximation, with subsequent terms being significantly 1887 * smaller; the approximation only uses odd exponents, and the 1888 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1889 */ 1890 if (arg0_sign) { 1891 dexp = 0x3fff; 1892 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1893 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1894 } else { 1895 dexp = 0x4000; 1896 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1897 dsig0 |= 0x8000000000000000ULL; 1898 } 1899 texp = arg0_exp - dexp + 0x3ffe; 1900 rsig0 = arg0_sig; 1901 rsig1 = 0; 1902 rsig2 = 0; 1903 if (dsig0 <= rsig0) { 1904 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1905 ++texp; 1906 } 1907 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1908 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1909 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1910 &rsig0, &rsig1, &rsig2); 1911 while ((int64_t) rsig0 < 0) { 1912 --tsig0; 1913 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1914 &rsig0, &rsig1, &rsig2); 1915 } 1916 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1917 /* 1918 * No need to correct any estimation error in tsig1; even with 1919 * such error, it is accurate enough. Now compute the square of 1920 * that approximation. 1921 */ 1922 mul128To256(tsig0, tsig1, tsig0, tsig1, 1923 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1924 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1925 texp + texp - 0x3ffe, 1926 t2sig0, t2sig1, &env->fp_status); 1927 1928 /* Compute the lower parts of the polynomial expansion. */ 1929 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1930 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1931 accum = floatx80_mul(accum, t2, &env->fp_status); 1932 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1933 accum = floatx80_mul(accum, t2, &env->fp_status); 1934 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1935 accum = floatx80_mul(accum, t2, &env->fp_status); 1936 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1937 accum = floatx80_mul(accum, t2, &env->fp_status); 1938 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1939 accum = floatx80_mul(accum, t2, &env->fp_status); 1940 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1941 accum = floatx80_mul(accum, t2, &env->fp_status); 1942 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1943 accum = floatx80_mul(accum, t2, &env->fp_status); 1944 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1945 accum = floatx80_mul(accum, t2, &env->fp_status); 1946 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1947 1948 /* 1949 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1950 * accum has much lower magnitude, and so, in particular, carry 1951 * out of the addition is not possible), multiplied by t. (This 1952 * expansion is only accurate to about 70 bits, not 128 bits.) 1953 */ 1954 aexp = extractFloatx80Exp(fyl2x_coeff_0); 1955 asign = extractFloatx80Sign(fyl2x_coeff_0); 1956 shift128RightJamming(extractFloatx80Frac(accum), 0, 1957 aexp - extractFloatx80Exp(accum), 1958 &asig0, &asig1); 1959 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1960 bsig1 = 0; 1961 if (asign == extractFloatx80Sign(accum)) { 1962 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1963 } else { 1964 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1965 } 1966 /* Multiply by t to compute the required result. */ 1967 mul128To256(asig0, asig1, tsig0, tsig1, 1968 &asig0, &asig1, &asig2, &asig3); 1969 aexp += texp - 0x3ffe; 1970 *exp = aexp; 1971 *sig0 = asig0; 1972 *sig1 = asig1; 1973 } 1974 1975 void helper_fyl2xp1(CPUX86State *env) 1976 { 1977 uint8_t old_flags = save_exception_flags(env); 1978 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1979 int32_t arg0_exp = extractFloatx80Exp(ST0); 1980 bool arg0_sign = extractFloatx80Sign(ST0); 1981 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1982 int32_t arg1_exp = extractFloatx80Exp(ST1); 1983 bool arg1_sign = extractFloatx80Sign(ST1); 1984 1985 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1986 float_raise(float_flag_invalid, &env->fp_status); 1987 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1988 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1989 float_raise(float_flag_invalid, &env->fp_status); 1990 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1991 } else if (floatx80_invalid_encoding(ST0) || 1992 floatx80_invalid_encoding(ST1)) { 1993 float_raise(float_flag_invalid, &env->fp_status); 1994 ST1 = floatx80_default_nan(&env->fp_status); 1995 } else if (floatx80_is_any_nan(ST0)) { 1996 ST1 = ST0; 1997 } else if (floatx80_is_any_nan(ST1)) { 1998 /* Pass this NaN through. */ 1999 } else if (arg0_exp > 0x3ffd || 2000 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2001 0x95f619980c4336f7ULL : 2002 0xd413cccfe7799211ULL))) { 2003 /* 2004 * Out of range for the instruction (ST0 must have absolute 2005 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2006 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2007 * to sqrt(2) - 1, which we allow here), treat as invalid. 2008 */ 2009 float_raise(float_flag_invalid, &env->fp_status); 2010 ST1 = floatx80_default_nan(&env->fp_status); 2011 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2012 arg1_exp == 0x7fff) { 2013 /* 2014 * One argument is zero, or multiplying by infinity; correct 2015 * result is exact and can be obtained by multiplying the 2016 * arguments. 2017 */ 2018 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2019 } else if (arg0_exp < 0x3fb0) { 2020 /* 2021 * Multiplying both arguments and an extra-precision version 2022 * of log2(e) is sufficiently precise. 2023 */ 2024 uint64_t sig0, sig1, sig2; 2025 int32_t exp; 2026 if (arg0_exp == 0) { 2027 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2028 } 2029 if (arg1_exp == 0) { 2030 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2031 } 2032 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2033 &sig0, &sig1, &sig2); 2034 exp = arg0_exp + 1; 2035 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2036 exp += arg1_exp - 0x3ffe; 2037 /* This result is inexact. */ 2038 sig1 |= 1; 2039 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2040 arg0_sign ^ arg1_sign, exp, 2041 sig0, sig1, &env->fp_status); 2042 } else { 2043 int32_t aexp; 2044 uint64_t asig0, asig1, asig2; 2045 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2046 FloatX80RoundPrec save_prec = 2047 env->fp_status.floatx80_rounding_precision; 2048 env->fp_status.float_rounding_mode = float_round_nearest_even; 2049 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2050 2051 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2052 /* 2053 * Multiply by the second argument to compute the required 2054 * result. 2055 */ 2056 if (arg1_exp == 0) { 2057 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2058 } 2059 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2060 aexp += arg1_exp - 0x3ffe; 2061 /* This result is inexact. */ 2062 asig1 |= 1; 2063 env->fp_status.float_rounding_mode = save_mode; 2064 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2065 arg0_sign ^ arg1_sign, aexp, 2066 asig0, asig1, &env->fp_status); 2067 env->fp_status.floatx80_rounding_precision = save_prec; 2068 } 2069 fpop(env); 2070 merge_exception_flags(env, old_flags); 2071 } 2072 2073 void helper_fyl2x(CPUX86State *env) 2074 { 2075 uint8_t old_flags = save_exception_flags(env); 2076 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2077 int32_t arg0_exp = extractFloatx80Exp(ST0); 2078 bool arg0_sign = extractFloatx80Sign(ST0); 2079 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2080 int32_t arg1_exp = extractFloatx80Exp(ST1); 2081 bool arg1_sign = extractFloatx80Sign(ST1); 2082 2083 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2084 float_raise(float_flag_invalid, &env->fp_status); 2085 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2086 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2087 float_raise(float_flag_invalid, &env->fp_status); 2088 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2089 } else if (floatx80_invalid_encoding(ST0) || 2090 floatx80_invalid_encoding(ST1)) { 2091 float_raise(float_flag_invalid, &env->fp_status); 2092 ST1 = floatx80_default_nan(&env->fp_status); 2093 } else if (floatx80_is_any_nan(ST0)) { 2094 ST1 = ST0; 2095 } else if (floatx80_is_any_nan(ST1)) { 2096 /* Pass this NaN through. */ 2097 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2098 float_raise(float_flag_invalid, &env->fp_status); 2099 ST1 = floatx80_default_nan(&env->fp_status); 2100 } else if (floatx80_is_infinity(ST1)) { 2101 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2102 &env->fp_status); 2103 switch (cmp) { 2104 case float_relation_less: 2105 ST1 = floatx80_chs(ST1); 2106 break; 2107 case float_relation_greater: 2108 /* Result is infinity of the same sign as ST1. */ 2109 break; 2110 default: 2111 float_raise(float_flag_invalid, &env->fp_status); 2112 ST1 = floatx80_default_nan(&env->fp_status); 2113 break; 2114 } 2115 } else if (floatx80_is_infinity(ST0)) { 2116 if (floatx80_is_zero(ST1)) { 2117 float_raise(float_flag_invalid, &env->fp_status); 2118 ST1 = floatx80_default_nan(&env->fp_status); 2119 } else if (arg1_sign) { 2120 ST1 = floatx80_chs(ST0); 2121 } else { 2122 ST1 = ST0; 2123 } 2124 } else if (floatx80_is_zero(ST0)) { 2125 if (floatx80_is_zero(ST1)) { 2126 float_raise(float_flag_invalid, &env->fp_status); 2127 ST1 = floatx80_default_nan(&env->fp_status); 2128 } else { 2129 /* Result is infinity with opposite sign to ST1. */ 2130 float_raise(float_flag_divbyzero, &env->fp_status); 2131 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2132 0x8000000000000000ULL); 2133 } 2134 } else if (floatx80_is_zero(ST1)) { 2135 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2136 ST1 = floatx80_chs(ST1); 2137 } 2138 /* Otherwise, ST1 is already the correct result. */ 2139 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2140 if (arg1_sign) { 2141 ST1 = floatx80_chs(floatx80_zero); 2142 } else { 2143 ST1 = floatx80_zero; 2144 } 2145 } else { 2146 int32_t int_exp; 2147 floatx80 arg0_m1; 2148 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2149 FloatX80RoundPrec save_prec = 2150 env->fp_status.floatx80_rounding_precision; 2151 env->fp_status.float_rounding_mode = float_round_nearest_even; 2152 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2153 2154 if (arg0_exp == 0) { 2155 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2156 } 2157 if (arg1_exp == 0) { 2158 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2159 } 2160 int_exp = arg0_exp - 0x3fff; 2161 if (arg0_sig > 0xb504f333f9de6484ULL) { 2162 ++int_exp; 2163 } 2164 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2165 &env->fp_status), 2166 floatx80_one, &env->fp_status); 2167 if (floatx80_is_zero(arg0_m1)) { 2168 /* Exact power of 2; multiply by ST1. */ 2169 env->fp_status.float_rounding_mode = save_mode; 2170 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2171 ST1, &env->fp_status); 2172 } else { 2173 bool asign = extractFloatx80Sign(arg0_m1); 2174 int32_t aexp; 2175 uint64_t asig0, asig1, asig2; 2176 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2177 if (int_exp != 0) { 2178 bool isign = (int_exp < 0); 2179 int32_t iexp; 2180 uint64_t isig; 2181 int shift; 2182 int_exp = isign ? -int_exp : int_exp; 2183 shift = clz32(int_exp) + 32; 2184 isig = int_exp; 2185 isig <<= shift; 2186 iexp = 0x403e - shift; 2187 shift128RightJamming(asig0, asig1, iexp - aexp, 2188 &asig0, &asig1); 2189 if (asign == isign) { 2190 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2191 } else { 2192 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2193 } 2194 aexp = iexp; 2195 asign = isign; 2196 } 2197 /* 2198 * Multiply by the second argument to compute the required 2199 * result. 2200 */ 2201 if (arg1_exp == 0) { 2202 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2203 } 2204 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2205 aexp += arg1_exp - 0x3ffe; 2206 /* This result is inexact. */ 2207 asig1 |= 1; 2208 env->fp_status.float_rounding_mode = save_mode; 2209 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2210 asign ^ arg1_sign, aexp, 2211 asig0, asig1, &env->fp_status); 2212 } 2213 2214 env->fp_status.floatx80_rounding_precision = save_prec; 2215 } 2216 fpop(env); 2217 merge_exception_flags(env, old_flags); 2218 } 2219 2220 void helper_fsqrt(CPUX86State *env) 2221 { 2222 uint8_t old_flags = save_exception_flags(env); 2223 if (floatx80_is_neg(ST0)) { 2224 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2225 env->fpus |= 0x400; 2226 } 2227 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2228 merge_exception_flags(env, old_flags); 2229 } 2230 2231 void helper_fsincos(CPUX86State *env) 2232 { 2233 double fptemp = floatx80_to_double(env, ST0); 2234 2235 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2236 env->fpus |= 0x400; 2237 } else { 2238 ST0 = double_to_floatx80(env, sin(fptemp)); 2239 fpush(env); 2240 ST0 = double_to_floatx80(env, cos(fptemp)); 2241 env->fpus &= ~0x400; /* C2 <-- 0 */ 2242 /* the above code is for |arg| < 2**63 only */ 2243 } 2244 } 2245 2246 void helper_frndint(CPUX86State *env) 2247 { 2248 uint8_t old_flags = save_exception_flags(env); 2249 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2250 merge_exception_flags(env, old_flags); 2251 } 2252 2253 void helper_fscale(CPUX86State *env) 2254 { 2255 uint8_t old_flags = save_exception_flags(env); 2256 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2257 float_raise(float_flag_invalid, &env->fp_status); 2258 ST0 = floatx80_default_nan(&env->fp_status); 2259 } else if (floatx80_is_any_nan(ST1)) { 2260 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2261 float_raise(float_flag_invalid, &env->fp_status); 2262 } 2263 ST0 = ST1; 2264 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2265 float_raise(float_flag_invalid, &env->fp_status); 2266 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2267 } 2268 } else if (floatx80_is_infinity(ST1) && 2269 !floatx80_invalid_encoding(ST0) && 2270 !floatx80_is_any_nan(ST0)) { 2271 if (floatx80_is_neg(ST1)) { 2272 if (floatx80_is_infinity(ST0)) { 2273 float_raise(float_flag_invalid, &env->fp_status); 2274 ST0 = floatx80_default_nan(&env->fp_status); 2275 } else { 2276 ST0 = (floatx80_is_neg(ST0) ? 2277 floatx80_chs(floatx80_zero) : 2278 floatx80_zero); 2279 } 2280 } else { 2281 if (floatx80_is_zero(ST0)) { 2282 float_raise(float_flag_invalid, &env->fp_status); 2283 ST0 = floatx80_default_nan(&env->fp_status); 2284 } else { 2285 ST0 = (floatx80_is_neg(ST0) ? 2286 floatx80_chs(floatx80_infinity) : 2287 floatx80_infinity); 2288 } 2289 } 2290 } else { 2291 int n; 2292 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2293 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2294 set_float_exception_flags(0, &env->fp_status); 2295 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2296 set_float_exception_flags(save_flags, &env->fp_status); 2297 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2298 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2299 env->fp_status.floatx80_rounding_precision = save; 2300 } 2301 merge_exception_flags(env, old_flags); 2302 } 2303 2304 void helper_fsin(CPUX86State *env) 2305 { 2306 double fptemp = floatx80_to_double(env, ST0); 2307 2308 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2309 env->fpus |= 0x400; 2310 } else { 2311 ST0 = double_to_floatx80(env, sin(fptemp)); 2312 env->fpus &= ~0x400; /* C2 <-- 0 */ 2313 /* the above code is for |arg| < 2**53 only */ 2314 } 2315 } 2316 2317 void helper_fcos(CPUX86State *env) 2318 { 2319 double fptemp = floatx80_to_double(env, ST0); 2320 2321 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2322 env->fpus |= 0x400; 2323 } else { 2324 ST0 = double_to_floatx80(env, cos(fptemp)); 2325 env->fpus &= ~0x400; /* C2 <-- 0 */ 2326 /* the above code is for |arg| < 2**63 only */ 2327 } 2328 } 2329 2330 void helper_fxam_ST0(CPUX86State *env) 2331 { 2332 CPU_LDoubleU temp; 2333 int expdif; 2334 2335 temp.d = ST0; 2336 2337 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2338 if (SIGND(temp)) { 2339 env->fpus |= 0x200; /* C1 <-- 1 */ 2340 } 2341 2342 if (env->fptags[env->fpstt]) { 2343 env->fpus |= 0x4100; /* Empty */ 2344 return; 2345 } 2346 2347 expdif = EXPD(temp); 2348 if (expdif == MAXEXPD) { 2349 if (MANTD(temp) == 0x8000000000000000ULL) { 2350 env->fpus |= 0x500; /* Infinity */ 2351 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2352 env->fpus |= 0x100; /* NaN */ 2353 } 2354 } else if (expdif == 0) { 2355 if (MANTD(temp) == 0) { 2356 env->fpus |= 0x4000; /* Zero */ 2357 } else { 2358 env->fpus |= 0x4400; /* Denormal */ 2359 } 2360 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2361 env->fpus |= 0x400; 2362 } 2363 } 2364 2365 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 2366 uintptr_t retaddr) 2367 { 2368 int fpus, fptag, exp, i; 2369 uint64_t mant; 2370 CPU_LDoubleU tmp; 2371 2372 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2373 fptag = 0; 2374 for (i = 7; i >= 0; i--) { 2375 fptag <<= 2; 2376 if (env->fptags[i]) { 2377 fptag |= 3; 2378 } else { 2379 tmp.d = env->fpregs[i].d; 2380 exp = EXPD(tmp); 2381 mant = MANTD(tmp); 2382 if (exp == 0 && mant == 0) { 2383 /* zero */ 2384 fptag |= 1; 2385 } else if (exp == 0 || exp == MAXEXPD 2386 || (mant & (1LL << 63)) == 0) { 2387 /* NaNs, infinity, denormal */ 2388 fptag |= 2; 2389 } 2390 } 2391 } 2392 if (data32) { 2393 /* 32 bit */ 2394 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 2395 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 2396 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 2397 cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */ 2398 cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */ 2399 cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */ 2400 cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */ 2401 } else { 2402 /* 16 bit */ 2403 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 2404 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 2405 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 2406 cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr); 2407 cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr); 2408 cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr); 2409 cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr); 2410 } 2411 } 2412 2413 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2414 { 2415 do_fstenv(env, ptr, data32, GETPC()); 2416 } 2417 2418 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2419 { 2420 env->fpstt = (fpus >> 11) & 7; 2421 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2422 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2423 #if !defined(CONFIG_USER_ONLY) 2424 if (!(env->fpus & FPUS_SE)) { 2425 /* 2426 * Here the processor deasserts FERR#; in response, the chipset deasserts 2427 * IGNNE#. 2428 */ 2429 cpu_clear_ignne(); 2430 } 2431 #endif 2432 } 2433 2434 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 2435 uintptr_t retaddr) 2436 { 2437 int i, fpus, fptag; 2438 2439 if (data32) { 2440 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2441 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2442 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 2443 } else { 2444 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2445 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 2446 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2447 } 2448 cpu_set_fpus(env, fpus); 2449 for (i = 0; i < 8; i++) { 2450 env->fptags[i] = ((fptag & 3) == 3); 2451 fptag >>= 2; 2452 } 2453 } 2454 2455 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2456 { 2457 do_fldenv(env, ptr, data32, GETPC()); 2458 } 2459 2460 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, 2461 uintptr_t retaddr) 2462 { 2463 floatx80 tmp; 2464 int i; 2465 2466 do_fstenv(env, ptr, data32, retaddr); 2467 2468 ptr += (target_ulong)14 << data32; 2469 for (i = 0; i < 8; i++) { 2470 tmp = ST(i); 2471 do_fstt(env, tmp, ptr, retaddr); 2472 ptr += 10; 2473 } 2474 2475 do_fninit(env); 2476 } 2477 2478 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2479 { 2480 do_fsave(env, ptr, data32, GETPC()); 2481 } 2482 2483 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, 2484 uintptr_t retaddr) 2485 { 2486 floatx80 tmp; 2487 int i; 2488 2489 do_fldenv(env, ptr, data32, retaddr); 2490 ptr += (target_ulong)14 << data32; 2491 2492 for (i = 0; i < 8; i++) { 2493 tmp = do_fldt(env, ptr, retaddr); 2494 ST(i) = tmp; 2495 ptr += 10; 2496 } 2497 } 2498 2499 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2500 { 2501 do_frstor(env, ptr, data32, GETPC()); 2502 } 2503 2504 #define XO(X) offsetof(X86XSaveArea, X) 2505 2506 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2507 { 2508 int fpus, fptag, i; 2509 target_ulong addr; 2510 2511 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2512 fptag = 0; 2513 for (i = 0; i < 8; i++) { 2514 fptag |= (env->fptags[i] << i); 2515 } 2516 2517 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 2518 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 2519 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 2520 2521 /* In 32-bit mode this is eip, sel, dp, sel. 2522 In 64-bit mode this is rip, rdp. 2523 But in either case we don't write actual data, just zeros. */ 2524 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 2525 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 2526 2527 addr = ptr + XO(legacy.fpregs); 2528 for (i = 0; i < 8; i++) { 2529 floatx80 tmp = ST(i); 2530 do_fstt(env, tmp, addr, ra); 2531 addr += 16; 2532 } 2533 } 2534 2535 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2536 { 2537 update_mxcsr_from_sse_status(env); 2538 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 2539 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 2540 } 2541 2542 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2543 { 2544 int i, nb_xmm_regs; 2545 target_ulong addr; 2546 2547 if (env->hflags & HF_CS64_MASK) { 2548 nb_xmm_regs = 16; 2549 } else { 2550 nb_xmm_regs = 8; 2551 } 2552 2553 addr = ptr + XO(legacy.xmm_regs); 2554 for (i = 0; i < nb_xmm_regs; i++) { 2555 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 2556 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 2557 addr += 16; 2558 } 2559 } 2560 2561 static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2562 { 2563 int i, nb_xmm_regs; 2564 2565 if (env->hflags & HF_CS64_MASK) { 2566 nb_xmm_regs = 16; 2567 } else { 2568 nb_xmm_regs = 8; 2569 } 2570 2571 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2572 cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra); 2573 cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra); 2574 } 2575 } 2576 2577 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2578 { 2579 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2580 int i; 2581 2582 for (i = 0; i < 4; i++, addr += 16) { 2583 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 2584 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 2585 } 2586 } 2587 2588 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2589 { 2590 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2591 env->bndcs_regs.cfgu, ra); 2592 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2593 env->bndcs_regs.sts, ra); 2594 } 2595 2596 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2597 { 2598 cpu_stq_data_ra(env, ptr, env->pkru, ra); 2599 } 2600 2601 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2602 { 2603 /* The operand must be 16 byte aligned */ 2604 if (ptr & 0xf) { 2605 raise_exception_ra(env, EXCP0D_GPF, ra); 2606 } 2607 2608 do_xsave_fpu(env, ptr, ra); 2609 2610 if (env->cr[4] & CR4_OSFXSR_MASK) { 2611 do_xsave_mxcsr(env, ptr, ra); 2612 /* Fast FXSAVE leaves out the XMM registers */ 2613 if (!(env->efer & MSR_EFER_FFXSR) 2614 || (env->hflags & HF_CPL_MASK) 2615 || !(env->hflags & HF_LMA_MASK)) { 2616 do_xsave_sse(env, ptr, ra); 2617 } 2618 } 2619 } 2620 2621 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2622 { 2623 do_fxsave(env, ptr, GETPC()); 2624 } 2625 2626 static uint64_t get_xinuse(CPUX86State *env) 2627 { 2628 uint64_t inuse = -1; 2629 2630 /* For the most part, we don't track XINUSE. We could calculate it 2631 here for all components, but it's probably less work to simply 2632 indicate in use. That said, the state of BNDREGS is important 2633 enough to track in HFLAGS, so we might as well use that here. */ 2634 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2635 inuse &= ~XSTATE_BNDREGS_MASK; 2636 } 2637 return inuse; 2638 } 2639 2640 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2641 uint64_t inuse, uint64_t opt, uintptr_t ra) 2642 { 2643 uint64_t old_bv, new_bv; 2644 2645 /* The OS must have enabled XSAVE. */ 2646 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2647 raise_exception_ra(env, EXCP06_ILLOP, ra); 2648 } 2649 2650 /* The operand must be 64 byte aligned. */ 2651 if (ptr & 63) { 2652 raise_exception_ra(env, EXCP0D_GPF, ra); 2653 } 2654 2655 /* Never save anything not enabled by XCR0. */ 2656 rfbm &= env->xcr0; 2657 opt &= rfbm; 2658 2659 if (opt & XSTATE_FP_MASK) { 2660 do_xsave_fpu(env, ptr, ra); 2661 } 2662 if (rfbm & XSTATE_SSE_MASK) { 2663 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2664 do_xsave_mxcsr(env, ptr, ra); 2665 } 2666 if (opt & XSTATE_SSE_MASK) { 2667 do_xsave_sse(env, ptr, ra); 2668 } 2669 if (opt & XSTATE_YMM_MASK) { 2670 do_xsave_ymmh(env, ptr + XO(avx_state), ra); 2671 } 2672 if (opt & XSTATE_BNDREGS_MASK) { 2673 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 2674 } 2675 if (opt & XSTATE_BNDCSR_MASK) { 2676 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 2677 } 2678 if (opt & XSTATE_PKRU_MASK) { 2679 do_xsave_pkru(env, ptr + XO(pkru_state), ra); 2680 } 2681 2682 /* Update the XSTATE_BV field. */ 2683 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2684 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2685 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 2686 } 2687 2688 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2689 { 2690 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 2691 } 2692 2693 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2694 { 2695 uint64_t inuse = get_xinuse(env); 2696 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2697 } 2698 2699 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2700 { 2701 int i, fpuc, fpus, fptag; 2702 target_ulong addr; 2703 2704 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 2705 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 2706 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 2707 cpu_set_fpuc(env, fpuc); 2708 cpu_set_fpus(env, fpus); 2709 fptag ^= 0xff; 2710 for (i = 0; i < 8; i++) { 2711 env->fptags[i] = ((fptag >> i) & 1); 2712 } 2713 2714 addr = ptr + XO(legacy.fpregs); 2715 for (i = 0; i < 8; i++) { 2716 floatx80 tmp = do_fldt(env, addr, ra); 2717 ST(i) = tmp; 2718 addr += 16; 2719 } 2720 } 2721 2722 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2723 { 2724 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 2725 } 2726 2727 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2728 { 2729 int i, nb_xmm_regs; 2730 target_ulong addr; 2731 2732 if (env->hflags & HF_CS64_MASK) { 2733 nb_xmm_regs = 16; 2734 } else { 2735 nb_xmm_regs = 8; 2736 } 2737 2738 addr = ptr + XO(legacy.xmm_regs); 2739 for (i = 0; i < nb_xmm_regs; i++) { 2740 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 2741 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 2742 addr += 16; 2743 } 2744 } 2745 2746 static void do_clear_sse(CPUX86State *env) 2747 { 2748 int i, nb_xmm_regs; 2749 2750 if (env->hflags & HF_CS64_MASK) { 2751 nb_xmm_regs = 16; 2752 } else { 2753 nb_xmm_regs = 8; 2754 } 2755 2756 for (i = 0; i < nb_xmm_regs; i++) { 2757 env->xmm_regs[i].ZMM_Q(0) = 0; 2758 env->xmm_regs[i].ZMM_Q(1) = 0; 2759 } 2760 } 2761 2762 static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2763 { 2764 int i, nb_xmm_regs; 2765 2766 if (env->hflags & HF_CS64_MASK) { 2767 nb_xmm_regs = 16; 2768 } else { 2769 nb_xmm_regs = 8; 2770 } 2771 2772 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2773 env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra); 2774 env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra); 2775 } 2776 } 2777 2778 static void do_clear_ymmh(CPUX86State *env) 2779 { 2780 int i, nb_xmm_regs; 2781 2782 if (env->hflags & HF_CS64_MASK) { 2783 nb_xmm_regs = 16; 2784 } else { 2785 nb_xmm_regs = 8; 2786 } 2787 2788 for (i = 0; i < nb_xmm_regs; i++) { 2789 env->xmm_regs[i].ZMM_Q(2) = 0; 2790 env->xmm_regs[i].ZMM_Q(3) = 0; 2791 } 2792 } 2793 2794 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2795 { 2796 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2797 int i; 2798 2799 for (i = 0; i < 4; i++, addr += 16) { 2800 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 2801 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 2802 } 2803 } 2804 2805 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2806 { 2807 /* FIXME: Extend highest implemented bit of linear address. */ 2808 env->bndcs_regs.cfgu 2809 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 2810 env->bndcs_regs.sts 2811 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 2812 } 2813 2814 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2815 { 2816 env->pkru = cpu_ldq_data_ra(env, ptr, ra); 2817 } 2818 2819 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2820 { 2821 /* The operand must be 16 byte aligned */ 2822 if (ptr & 0xf) { 2823 raise_exception_ra(env, EXCP0D_GPF, ra); 2824 } 2825 2826 do_xrstor_fpu(env, ptr, ra); 2827 2828 if (env->cr[4] & CR4_OSFXSR_MASK) { 2829 do_xrstor_mxcsr(env, ptr, ra); 2830 /* Fast FXRSTOR leaves out the XMM registers */ 2831 if (!(env->efer & MSR_EFER_FFXSR) 2832 || (env->hflags & HF_CPL_MASK) 2833 || !(env->hflags & HF_LMA_MASK)) { 2834 do_xrstor_sse(env, ptr, ra); 2835 } 2836 } 2837 } 2838 2839 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2840 { 2841 do_fxrstor(env, ptr, GETPC()); 2842 } 2843 2844 static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) 2845 { 2846 uint64_t xstate_bv, xcomp_bv, reserve0; 2847 2848 rfbm &= env->xcr0; 2849 2850 /* The OS must have enabled XSAVE. */ 2851 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2852 raise_exception_ra(env, EXCP06_ILLOP, ra); 2853 } 2854 2855 /* The operand must be 64 byte aligned. */ 2856 if (ptr & 63) { 2857 raise_exception_ra(env, EXCP0D_GPF, ra); 2858 } 2859 2860 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2861 2862 if ((int64_t)xstate_bv < 0) { 2863 /* FIXME: Compact form. */ 2864 raise_exception_ra(env, EXCP0D_GPF, ra); 2865 } 2866 2867 /* Standard form. */ 2868 2869 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2870 if (xstate_bv & ~env->xcr0) { 2871 raise_exception_ra(env, EXCP0D_GPF, ra); 2872 } 2873 2874 /* The XCOMP_BV field must be zero. Note that, as of the April 2016 2875 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 2876 describes only XCOMP_BV, but the description of the standard form 2877 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 2878 includes the next 64-bit field. */ 2879 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 2880 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 2881 if (xcomp_bv || reserve0) { 2882 raise_exception_ra(env, EXCP0D_GPF, ra); 2883 } 2884 2885 if (rfbm & XSTATE_FP_MASK) { 2886 if (xstate_bv & XSTATE_FP_MASK) { 2887 do_xrstor_fpu(env, ptr, ra); 2888 } else { 2889 do_fninit(env); 2890 memset(env->fpregs, 0, sizeof(env->fpregs)); 2891 } 2892 } 2893 if (rfbm & XSTATE_SSE_MASK) { 2894 /* Note that the standard form of XRSTOR loads MXCSR from memory 2895 whether or not the XSTATE_BV bit is set. */ 2896 do_xrstor_mxcsr(env, ptr, ra); 2897 if (xstate_bv & XSTATE_SSE_MASK) { 2898 do_xrstor_sse(env, ptr, ra); 2899 } else { 2900 do_clear_sse(env); 2901 } 2902 } 2903 if (rfbm & XSTATE_YMM_MASK) { 2904 if (xstate_bv & XSTATE_YMM_MASK) { 2905 do_xrstor_ymmh(env, ptr + XO(avx_state), ra); 2906 } else { 2907 do_clear_ymmh(env); 2908 } 2909 } 2910 if (rfbm & XSTATE_BNDREGS_MASK) { 2911 if (xstate_bv & XSTATE_BNDREGS_MASK) { 2912 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 2913 env->hflags |= HF_MPX_IU_MASK; 2914 } else { 2915 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2916 env->hflags &= ~HF_MPX_IU_MASK; 2917 } 2918 } 2919 if (rfbm & XSTATE_BNDCSR_MASK) { 2920 if (xstate_bv & XSTATE_BNDCSR_MASK) { 2921 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 2922 } else { 2923 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2924 } 2925 cpu_sync_bndcs_hflags(env); 2926 } 2927 if (rfbm & XSTATE_PKRU_MASK) { 2928 uint64_t old_pkru = env->pkru; 2929 if (xstate_bv & XSTATE_PKRU_MASK) { 2930 do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 2931 } else { 2932 env->pkru = 0; 2933 } 2934 if (env->pkru != old_pkru) { 2935 CPUState *cs = env_cpu(env); 2936 tlb_flush(cs); 2937 } 2938 } 2939 } 2940 2941 #undef XO 2942 2943 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2944 { 2945 do_xrstor(env, ptr, rfbm, GETPC()); 2946 } 2947 2948 #if defined(CONFIG_USER_ONLY) 2949 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 2950 { 2951 do_fsave(env, ptr, data32, 0); 2952 } 2953 2954 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 2955 { 2956 do_frstor(env, ptr, data32, 0); 2957 } 2958 2959 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 2960 { 2961 do_fxsave(env, ptr, 0); 2962 } 2963 2964 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 2965 { 2966 do_fxrstor(env, ptr, 0); 2967 } 2968 2969 void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) 2970 { 2971 do_xsave(env, ptr, -1, get_xinuse(env), -1, 0); 2972 } 2973 2974 void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr) 2975 { 2976 do_xrstor(env, ptr, -1, 0); 2977 } 2978 #endif 2979 2980 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 2981 { 2982 /* The OS must have enabled XSAVE. */ 2983 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2984 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2985 } 2986 2987 switch (ecx) { 2988 case 0: 2989 return env->xcr0; 2990 case 1: 2991 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 2992 return env->xcr0 & get_xinuse(env); 2993 } 2994 break; 2995 } 2996 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2997 } 2998 2999 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 3000 { 3001 uint32_t dummy, ena_lo, ena_hi; 3002 uint64_t ena; 3003 3004 /* The OS must have enabled XSAVE. */ 3005 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3006 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3007 } 3008 3009 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 3010 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 3011 goto do_gpf; 3012 } 3013 3014 /* Disallow enabling unimplemented features. */ 3015 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 3016 ena = ((uint64_t)ena_hi << 32) | ena_lo; 3017 if (mask & ~ena) { 3018 goto do_gpf; 3019 } 3020 3021 /* Disallow enabling only half of MPX. */ 3022 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 3023 & XSTATE_BNDCSR_MASK) { 3024 goto do_gpf; 3025 } 3026 3027 env->xcr0 = mask; 3028 cpu_sync_bndcs_hflags(env); 3029 cpu_sync_avx_hflag(env); 3030 return; 3031 3032 do_gpf: 3033 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3034 } 3035 3036 /* MMX/SSE */ 3037 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 3038 3039 #define SSE_DAZ 0x0040 3040 #define SSE_RC_SHIFT 13 3041 #define SSE_RC_MASK (3 << SSE_RC_SHIFT) 3042 #define SSE_FZ 0x8000 3043 3044 void update_mxcsr_status(CPUX86State *env) 3045 { 3046 uint32_t mxcsr = env->mxcsr; 3047 int rnd_type; 3048 3049 /* set rounding mode */ 3050 rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; 3051 set_x86_rounding_mode(rnd_type, &env->sse_status); 3052 3053 /* Set exception flags. */ 3054 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 3055 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 3056 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 3057 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 3058 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 3059 &env->sse_status); 3060 3061 /* set denormals are zero */ 3062 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 3063 3064 /* set flush to zero */ 3065 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 3066 } 3067 3068 void update_mxcsr_from_sse_status(CPUX86State *env) 3069 { 3070 uint8_t flags = get_float_exception_flags(&env->sse_status); 3071 /* 3072 * The MXCSR denormal flag has opposite semantics to 3073 * float_flag_input_denormal (the softfloat code sets that flag 3074 * only when flushing input denormals to zero, but SSE sets it 3075 * only when not flushing them to zero), so is not converted 3076 * here. 3077 */ 3078 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3079 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3080 (flags & float_flag_overflow ? FPUS_OE : 0) | 3081 (flags & float_flag_underflow ? FPUS_UE : 0) | 3082 (flags & float_flag_inexact ? FPUS_PE : 0) | 3083 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3084 0)); 3085 } 3086 3087 void helper_update_mxcsr(CPUX86State *env) 3088 { 3089 update_mxcsr_from_sse_status(env); 3090 } 3091 3092 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3093 { 3094 cpu_set_mxcsr(env, val); 3095 } 3096 3097 void helper_enter_mmx(CPUX86State *env) 3098 { 3099 env->fpstt = 0; 3100 *(uint32_t *)(env->fptags) = 0; 3101 *(uint32_t *)(env->fptags + 4) = 0; 3102 } 3103 3104 void helper_emms(CPUX86State *env) 3105 { 3106 /* set to empty state */ 3107 *(uint32_t *)(env->fptags) = 0x01010101; 3108 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3109 } 3110 3111 #define SHIFT 0 3112 #include "ops_sse.h" 3113 3114 #define SHIFT 1 3115 #include "ops_sse.h" 3116 3117 #define SHIFT 2 3118 #include "ops_sse.h" 3119