1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "fpu/softfloat-macros.h" 29 #include "helper-tcg.h" 30 31 /* float macros */ 32 #define FT0 (env->ft0) 33 #define ST0 (env->fpregs[env->fpstt].d) 34 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 35 #define ST1 ST(1) 36 37 #define FPU_RC_SHIFT 10 38 #define FPU_RC_MASK (3 << FPU_RC_SHIFT) 39 #define FPU_RC_NEAR 0x000 40 #define FPU_RC_DOWN 0x400 41 #define FPU_RC_UP 0x800 42 #define FPU_RC_CHOP 0xc00 43 44 #define MAXTAN 9223372036854775808.0 45 46 /* the following deal with x86 long double-precision numbers */ 47 #define MAXEXPD 0x7fff 48 #define EXPBIAS 16383 49 #define EXPD(fp) (fp.l.upper & 0x7fff) 50 #define SIGND(fp) ((fp.l.upper) & 0x8000) 51 #define MANTD(fp) (fp.l.lower) 52 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 53 54 #define FPUS_IE (1 << 0) 55 #define FPUS_DE (1 << 1) 56 #define FPUS_ZE (1 << 2) 57 #define FPUS_OE (1 << 3) 58 #define FPUS_UE (1 << 4) 59 #define FPUS_PE (1 << 5) 60 #define FPUS_SF (1 << 6) 61 #define FPUS_SE (1 << 7) 62 #define FPUS_B (1 << 15) 63 64 #define FPUC_EM 0x3f 65 66 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 67 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 68 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 69 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 70 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 71 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 72 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 73 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 74 75 static inline void fpush(CPUX86State *env) 76 { 77 env->fpstt = (env->fpstt - 1) & 7; 78 env->fptags[env->fpstt] = 0; /* validate stack entry */ 79 } 80 81 static inline void fpop(CPUX86State *env) 82 { 83 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 84 env->fpstt = (env->fpstt + 1) & 7; 85 } 86 87 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) 88 { 89 CPU_LDoubleU temp; 90 91 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 92 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 93 return temp.d; 94 } 95 96 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 97 uintptr_t retaddr) 98 { 99 CPU_LDoubleU temp; 100 101 temp.d = f; 102 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 103 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 104 } 105 106 /* x87 FPU helpers */ 107 108 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 109 { 110 union { 111 float64 f64; 112 double d; 113 } u; 114 115 u.f64 = floatx80_to_float64(a, &env->fp_status); 116 return u.d; 117 } 118 119 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 120 { 121 union { 122 float64 f64; 123 double d; 124 } u; 125 126 u.d = a; 127 return float64_to_floatx80(u.f64, &env->fp_status); 128 } 129 130 static void fpu_set_exception(CPUX86State *env, int mask) 131 { 132 env->fpus |= mask; 133 if (env->fpus & (~env->fpuc & FPUC_EM)) { 134 env->fpus |= FPUS_SE | FPUS_B; 135 } 136 } 137 138 static inline uint8_t save_exception_flags(CPUX86State *env) 139 { 140 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 141 set_float_exception_flags(0, &env->fp_status); 142 return old_flags; 143 } 144 145 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 146 { 147 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 148 float_raise(old_flags, &env->fp_status); 149 fpu_set_exception(env, 150 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 151 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 152 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 153 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 154 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 155 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 156 } 157 158 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 159 { 160 uint8_t old_flags = save_exception_flags(env); 161 floatx80 ret = floatx80_div(a, b, &env->fp_status); 162 merge_exception_flags(env, old_flags); 163 return ret; 164 } 165 166 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 167 { 168 if (env->cr[0] & CR0_NE_MASK) { 169 raise_exception_ra(env, EXCP10_COPR, retaddr); 170 } 171 #if !defined(CONFIG_USER_ONLY) 172 else { 173 fpu_check_raise_ferr_irq(env); 174 } 175 #endif 176 } 177 178 void helper_flds_FT0(CPUX86State *env, uint32_t val) 179 { 180 uint8_t old_flags = save_exception_flags(env); 181 union { 182 float32 f; 183 uint32_t i; 184 } u; 185 186 u.i = val; 187 FT0 = float32_to_floatx80(u.f, &env->fp_status); 188 merge_exception_flags(env, old_flags); 189 } 190 191 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 192 { 193 uint8_t old_flags = save_exception_flags(env); 194 union { 195 float64 f; 196 uint64_t i; 197 } u; 198 199 u.i = val; 200 FT0 = float64_to_floatx80(u.f, &env->fp_status); 201 merge_exception_flags(env, old_flags); 202 } 203 204 void helper_fildl_FT0(CPUX86State *env, int32_t val) 205 { 206 FT0 = int32_to_floatx80(val, &env->fp_status); 207 } 208 209 void helper_flds_ST0(CPUX86State *env, uint32_t val) 210 { 211 uint8_t old_flags = save_exception_flags(env); 212 int new_fpstt; 213 union { 214 float32 f; 215 uint32_t i; 216 } u; 217 218 new_fpstt = (env->fpstt - 1) & 7; 219 u.i = val; 220 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 221 env->fpstt = new_fpstt; 222 env->fptags[new_fpstt] = 0; /* validate stack entry */ 223 merge_exception_flags(env, old_flags); 224 } 225 226 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 227 { 228 uint8_t old_flags = save_exception_flags(env); 229 int new_fpstt; 230 union { 231 float64 f; 232 uint64_t i; 233 } u; 234 235 new_fpstt = (env->fpstt - 1) & 7; 236 u.i = val; 237 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 238 env->fpstt = new_fpstt; 239 env->fptags[new_fpstt] = 0; /* validate stack entry */ 240 merge_exception_flags(env, old_flags); 241 } 242 243 static FloatX80RoundPrec tmp_maximise_precision(float_status *st) 244 { 245 FloatX80RoundPrec old = get_floatx80_rounding_precision(st); 246 set_floatx80_rounding_precision(floatx80_precision_x, st); 247 return old; 248 } 249 250 void helper_fildl_ST0(CPUX86State *env, int32_t val) 251 { 252 int new_fpstt; 253 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 254 255 new_fpstt = (env->fpstt - 1) & 7; 256 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 257 env->fpstt = new_fpstt; 258 env->fptags[new_fpstt] = 0; /* validate stack entry */ 259 260 set_floatx80_rounding_precision(old, &env->fp_status); 261 } 262 263 void helper_fildll_ST0(CPUX86State *env, int64_t val) 264 { 265 int new_fpstt; 266 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 267 268 new_fpstt = (env->fpstt - 1) & 7; 269 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 270 env->fpstt = new_fpstt; 271 env->fptags[new_fpstt] = 0; /* validate stack entry */ 272 273 set_floatx80_rounding_precision(old, &env->fp_status); 274 } 275 276 uint32_t helper_fsts_ST0(CPUX86State *env) 277 { 278 uint8_t old_flags = save_exception_flags(env); 279 union { 280 float32 f; 281 uint32_t i; 282 } u; 283 284 u.f = floatx80_to_float32(ST0, &env->fp_status); 285 merge_exception_flags(env, old_flags); 286 return u.i; 287 } 288 289 uint64_t helper_fstl_ST0(CPUX86State *env) 290 { 291 uint8_t old_flags = save_exception_flags(env); 292 union { 293 float64 f; 294 uint64_t i; 295 } u; 296 297 u.f = floatx80_to_float64(ST0, &env->fp_status); 298 merge_exception_flags(env, old_flags); 299 return u.i; 300 } 301 302 int32_t helper_fist_ST0(CPUX86State *env) 303 { 304 uint8_t old_flags = save_exception_flags(env); 305 int32_t val; 306 307 val = floatx80_to_int32(ST0, &env->fp_status); 308 if (val != (int16_t)val) { 309 set_float_exception_flags(float_flag_invalid, &env->fp_status); 310 val = -32768; 311 } 312 merge_exception_flags(env, old_flags); 313 return val; 314 } 315 316 int32_t helper_fistl_ST0(CPUX86State *env) 317 { 318 uint8_t old_flags = save_exception_flags(env); 319 int32_t val; 320 321 val = floatx80_to_int32(ST0, &env->fp_status); 322 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 323 val = 0x80000000; 324 } 325 merge_exception_flags(env, old_flags); 326 return val; 327 } 328 329 int64_t helper_fistll_ST0(CPUX86State *env) 330 { 331 uint8_t old_flags = save_exception_flags(env); 332 int64_t val; 333 334 val = floatx80_to_int64(ST0, &env->fp_status); 335 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 336 val = 0x8000000000000000ULL; 337 } 338 merge_exception_flags(env, old_flags); 339 return val; 340 } 341 342 int32_t helper_fistt_ST0(CPUX86State *env) 343 { 344 uint8_t old_flags = save_exception_flags(env); 345 int32_t val; 346 347 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 348 if (val != (int16_t)val) { 349 set_float_exception_flags(float_flag_invalid, &env->fp_status); 350 val = -32768; 351 } 352 merge_exception_flags(env, old_flags); 353 return val; 354 } 355 356 int32_t helper_fisttl_ST0(CPUX86State *env) 357 { 358 uint8_t old_flags = save_exception_flags(env); 359 int32_t val; 360 361 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 362 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 363 val = 0x80000000; 364 } 365 merge_exception_flags(env, old_flags); 366 return val; 367 } 368 369 int64_t helper_fisttll_ST0(CPUX86State *env) 370 { 371 uint8_t old_flags = save_exception_flags(env); 372 int64_t val; 373 374 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 375 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 376 val = 0x8000000000000000ULL; 377 } 378 merge_exception_flags(env, old_flags); 379 return val; 380 } 381 382 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 383 { 384 int new_fpstt; 385 386 new_fpstt = (env->fpstt - 1) & 7; 387 env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); 388 env->fpstt = new_fpstt; 389 env->fptags[new_fpstt] = 0; /* validate stack entry */ 390 } 391 392 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 393 { 394 do_fstt(env, ST0, ptr, GETPC()); 395 } 396 397 void helper_fpush(CPUX86State *env) 398 { 399 fpush(env); 400 } 401 402 void helper_fpop(CPUX86State *env) 403 { 404 fpop(env); 405 } 406 407 void helper_fdecstp(CPUX86State *env) 408 { 409 env->fpstt = (env->fpstt - 1) & 7; 410 env->fpus &= ~0x4700; 411 } 412 413 void helper_fincstp(CPUX86State *env) 414 { 415 env->fpstt = (env->fpstt + 1) & 7; 416 env->fpus &= ~0x4700; 417 } 418 419 /* FPU move */ 420 421 void helper_ffree_STN(CPUX86State *env, int st_index) 422 { 423 env->fptags[(env->fpstt + st_index) & 7] = 1; 424 } 425 426 void helper_fmov_ST0_FT0(CPUX86State *env) 427 { 428 ST0 = FT0; 429 } 430 431 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 432 { 433 FT0 = ST(st_index); 434 } 435 436 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 437 { 438 ST0 = ST(st_index); 439 } 440 441 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 442 { 443 ST(st_index) = ST0; 444 } 445 446 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 447 { 448 floatx80 tmp; 449 450 tmp = ST(st_index); 451 ST(st_index) = ST0; 452 ST0 = tmp; 453 } 454 455 /* FPU operations */ 456 457 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 458 459 void helper_fcom_ST0_FT0(CPUX86State *env) 460 { 461 uint8_t old_flags = save_exception_flags(env); 462 FloatRelation ret; 463 464 ret = floatx80_compare(ST0, FT0, &env->fp_status); 465 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 466 merge_exception_flags(env, old_flags); 467 } 468 469 void helper_fucom_ST0_FT0(CPUX86State *env) 470 { 471 uint8_t old_flags = save_exception_flags(env); 472 FloatRelation ret; 473 474 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 475 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 476 merge_exception_flags(env, old_flags); 477 } 478 479 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 480 481 void helper_fcomi_ST0_FT0(CPUX86State *env) 482 { 483 uint8_t old_flags = save_exception_flags(env); 484 int eflags; 485 FloatRelation ret; 486 487 ret = floatx80_compare(ST0, FT0, &env->fp_status); 488 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 489 CC_SRC = eflags | fcomi_ccval[ret + 1]; 490 CC_OP = CC_OP_EFLAGS; 491 merge_exception_flags(env, old_flags); 492 } 493 494 void helper_fucomi_ST0_FT0(CPUX86State *env) 495 { 496 uint8_t old_flags = save_exception_flags(env); 497 int eflags; 498 FloatRelation ret; 499 500 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 501 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 502 CC_SRC = eflags | fcomi_ccval[ret + 1]; 503 CC_OP = CC_OP_EFLAGS; 504 merge_exception_flags(env, old_flags); 505 } 506 507 void helper_fadd_ST0_FT0(CPUX86State *env) 508 { 509 uint8_t old_flags = save_exception_flags(env); 510 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 511 merge_exception_flags(env, old_flags); 512 } 513 514 void helper_fmul_ST0_FT0(CPUX86State *env) 515 { 516 uint8_t old_flags = save_exception_flags(env); 517 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 518 merge_exception_flags(env, old_flags); 519 } 520 521 void helper_fsub_ST0_FT0(CPUX86State *env) 522 { 523 uint8_t old_flags = save_exception_flags(env); 524 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 525 merge_exception_flags(env, old_flags); 526 } 527 528 void helper_fsubr_ST0_FT0(CPUX86State *env) 529 { 530 uint8_t old_flags = save_exception_flags(env); 531 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 532 merge_exception_flags(env, old_flags); 533 } 534 535 void helper_fdiv_ST0_FT0(CPUX86State *env) 536 { 537 ST0 = helper_fdiv(env, ST0, FT0); 538 } 539 540 void helper_fdivr_ST0_FT0(CPUX86State *env) 541 { 542 ST0 = helper_fdiv(env, FT0, ST0); 543 } 544 545 /* fp operations between STN and ST0 */ 546 547 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 548 { 549 uint8_t old_flags = save_exception_flags(env); 550 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 551 merge_exception_flags(env, old_flags); 552 } 553 554 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 555 { 556 uint8_t old_flags = save_exception_flags(env); 557 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 558 merge_exception_flags(env, old_flags); 559 } 560 561 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 562 { 563 uint8_t old_flags = save_exception_flags(env); 564 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 565 merge_exception_flags(env, old_flags); 566 } 567 568 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 569 { 570 uint8_t old_flags = save_exception_flags(env); 571 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 572 merge_exception_flags(env, old_flags); 573 } 574 575 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 576 { 577 floatx80 *p; 578 579 p = &ST(st_index); 580 *p = helper_fdiv(env, *p, ST0); 581 } 582 583 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 584 { 585 floatx80 *p; 586 587 p = &ST(st_index); 588 *p = helper_fdiv(env, ST0, *p); 589 } 590 591 /* misc FPU operations */ 592 void helper_fchs_ST0(CPUX86State *env) 593 { 594 ST0 = floatx80_chs(ST0); 595 } 596 597 void helper_fabs_ST0(CPUX86State *env) 598 { 599 ST0 = floatx80_abs(ST0); 600 } 601 602 void helper_fld1_ST0(CPUX86State *env) 603 { 604 ST0 = floatx80_one; 605 } 606 607 void helper_fldl2t_ST0(CPUX86State *env) 608 { 609 switch (env->fpuc & FPU_RC_MASK) { 610 case FPU_RC_UP: 611 ST0 = floatx80_l2t_u; 612 break; 613 default: 614 ST0 = floatx80_l2t; 615 break; 616 } 617 } 618 619 void helper_fldl2e_ST0(CPUX86State *env) 620 { 621 switch (env->fpuc & FPU_RC_MASK) { 622 case FPU_RC_DOWN: 623 case FPU_RC_CHOP: 624 ST0 = floatx80_l2e_d; 625 break; 626 default: 627 ST0 = floatx80_l2e; 628 break; 629 } 630 } 631 632 void helper_fldpi_ST0(CPUX86State *env) 633 { 634 switch (env->fpuc & FPU_RC_MASK) { 635 case FPU_RC_DOWN: 636 case FPU_RC_CHOP: 637 ST0 = floatx80_pi_d; 638 break; 639 default: 640 ST0 = floatx80_pi; 641 break; 642 } 643 } 644 645 void helper_fldlg2_ST0(CPUX86State *env) 646 { 647 switch (env->fpuc & FPU_RC_MASK) { 648 case FPU_RC_DOWN: 649 case FPU_RC_CHOP: 650 ST0 = floatx80_lg2_d; 651 break; 652 default: 653 ST0 = floatx80_lg2; 654 break; 655 } 656 } 657 658 void helper_fldln2_ST0(CPUX86State *env) 659 { 660 switch (env->fpuc & FPU_RC_MASK) { 661 case FPU_RC_DOWN: 662 case FPU_RC_CHOP: 663 ST0 = floatx80_ln2_d; 664 break; 665 default: 666 ST0 = floatx80_ln2; 667 break; 668 } 669 } 670 671 void helper_fldz_ST0(CPUX86State *env) 672 { 673 ST0 = floatx80_zero; 674 } 675 676 void helper_fldz_FT0(CPUX86State *env) 677 { 678 FT0 = floatx80_zero; 679 } 680 681 uint32_t helper_fnstsw(CPUX86State *env) 682 { 683 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 684 } 685 686 uint32_t helper_fnstcw(CPUX86State *env) 687 { 688 return env->fpuc; 689 } 690 691 static void set_x86_rounding_mode(unsigned mode, float_status *status) 692 { 693 static FloatRoundMode x86_round_mode[4] = { 694 float_round_nearest_even, 695 float_round_down, 696 float_round_up, 697 float_round_to_zero 698 }; 699 assert(mode < ARRAY_SIZE(x86_round_mode)); 700 set_float_rounding_mode(x86_round_mode[mode], status); 701 } 702 703 void update_fp_status(CPUX86State *env) 704 { 705 int rnd_mode; 706 FloatX80RoundPrec rnd_prec; 707 708 /* set rounding mode */ 709 rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; 710 set_x86_rounding_mode(rnd_mode, &env->fp_status); 711 712 switch ((env->fpuc >> 8) & 3) { 713 case 0: 714 rnd_prec = floatx80_precision_s; 715 break; 716 case 2: 717 rnd_prec = floatx80_precision_d; 718 break; 719 case 3: 720 default: 721 rnd_prec = floatx80_precision_x; 722 break; 723 } 724 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 725 } 726 727 void helper_fldcw(CPUX86State *env, uint32_t val) 728 { 729 cpu_set_fpuc(env, val); 730 } 731 732 void helper_fclex(CPUX86State *env) 733 { 734 env->fpus &= 0x7f00; 735 } 736 737 void helper_fwait(CPUX86State *env) 738 { 739 if (env->fpus & FPUS_SE) { 740 fpu_raise_exception(env, GETPC()); 741 } 742 } 743 744 static void do_fninit(CPUX86State *env) 745 { 746 env->fpus = 0; 747 env->fpstt = 0; 748 env->fpcs = 0; 749 env->fpds = 0; 750 env->fpip = 0; 751 env->fpdp = 0; 752 cpu_set_fpuc(env, 0x37f); 753 env->fptags[0] = 1; 754 env->fptags[1] = 1; 755 env->fptags[2] = 1; 756 env->fptags[3] = 1; 757 env->fptags[4] = 1; 758 env->fptags[5] = 1; 759 env->fptags[6] = 1; 760 env->fptags[7] = 1; 761 } 762 763 void helper_fninit(CPUX86State *env) 764 { 765 do_fninit(env); 766 } 767 768 /* BCD ops */ 769 770 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 771 { 772 floatx80 tmp; 773 uint64_t val; 774 unsigned int v; 775 int i; 776 777 val = 0; 778 for (i = 8; i >= 0; i--) { 779 v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 780 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 781 } 782 tmp = int64_to_floatx80(val, &env->fp_status); 783 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 784 tmp = floatx80_chs(tmp); 785 } 786 fpush(env); 787 ST0 = tmp; 788 } 789 790 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 791 { 792 uint8_t old_flags = save_exception_flags(env); 793 int v; 794 target_ulong mem_ref, mem_end; 795 int64_t val; 796 CPU_LDoubleU temp; 797 798 temp.d = ST0; 799 800 val = floatx80_to_int64(ST0, &env->fp_status); 801 mem_ref = ptr; 802 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 803 set_float_exception_flags(float_flag_invalid, &env->fp_status); 804 while (mem_ref < ptr + 7) { 805 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 806 } 807 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 808 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 809 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 810 merge_exception_flags(env, old_flags); 811 return; 812 } 813 mem_end = mem_ref + 9; 814 if (SIGND(temp)) { 815 cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 816 val = -val; 817 } else { 818 cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 819 } 820 while (mem_ref < mem_end) { 821 if (val == 0) { 822 break; 823 } 824 v = val % 100; 825 val = val / 100; 826 v = ((v / 10) << 4) | (v % 10); 827 cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 828 } 829 while (mem_ref < mem_end) { 830 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 831 } 832 merge_exception_flags(env, old_flags); 833 } 834 835 /* 128-bit significand of log(2). */ 836 #define ln2_sig_high 0xb17217f7d1cf79abULL 837 #define ln2_sig_low 0xc9e3b39803f2f6afULL 838 839 /* 840 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 841 * the interval [-1/64, 1/64]. 842 */ 843 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 844 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 845 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 846 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 847 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 848 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 849 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 850 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 851 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 852 853 struct f2xm1_data { 854 /* 855 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 856 * are very close to exact floatx80 values. 857 */ 858 floatx80 t; 859 /* The value of 2^t. */ 860 floatx80 exp2; 861 /* The value of 2^t - 1. */ 862 floatx80 exp2m1; 863 }; 864 865 static const struct f2xm1_data f2xm1_table[65] = { 866 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 867 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 868 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 869 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 870 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 871 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 872 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 873 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 874 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 875 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 876 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 877 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 878 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 879 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 880 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 881 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 882 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 883 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 884 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 885 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 886 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 887 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 888 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 889 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 890 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 891 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 892 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 893 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 894 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 895 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 896 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 897 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 898 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 899 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 900 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 901 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 902 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 903 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 904 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 905 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 906 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 907 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 908 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 909 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 910 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 911 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 912 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 913 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 914 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 915 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 916 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 917 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 918 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 919 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 920 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 921 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 922 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 923 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 924 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 925 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 926 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 927 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 928 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 929 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 930 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 931 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 932 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 933 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 934 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 935 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 936 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 937 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 938 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 939 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 940 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 941 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 942 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 943 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 944 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 945 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 946 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 947 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 948 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 949 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 950 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 951 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 952 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 953 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 954 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 955 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 956 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 957 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 958 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 959 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 960 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 961 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 962 { floatx80_zero_init, 963 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 964 floatx80_zero_init }, 965 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 966 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 967 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 968 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 969 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 970 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 971 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 972 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 973 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 974 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 975 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 976 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 977 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 978 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 979 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 980 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 981 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 982 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 983 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 984 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 985 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 986 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 987 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 988 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 989 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 990 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 991 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 992 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 993 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 994 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 995 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 996 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 997 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 998 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 999 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 1000 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 1001 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1002 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1003 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1004 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1005 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1006 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1007 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1008 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1009 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1010 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1011 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1012 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1013 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1014 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1015 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1016 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1017 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1018 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1019 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1020 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1021 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1022 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1023 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1024 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1025 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1026 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1027 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1028 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1029 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1030 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1031 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1032 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1033 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1034 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1035 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1036 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1037 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1038 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1039 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1040 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1041 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1042 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1043 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1044 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1045 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1046 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1047 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1048 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1049 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1050 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1051 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1052 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1053 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1054 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1055 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1056 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1057 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1058 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1059 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1060 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1061 }; 1062 1063 void helper_f2xm1(CPUX86State *env) 1064 { 1065 uint8_t old_flags = save_exception_flags(env); 1066 uint64_t sig = extractFloatx80Frac(ST0); 1067 int32_t exp = extractFloatx80Exp(ST0); 1068 bool sign = extractFloatx80Sign(ST0); 1069 1070 if (floatx80_invalid_encoding(ST0)) { 1071 float_raise(float_flag_invalid, &env->fp_status); 1072 ST0 = floatx80_default_nan(&env->fp_status); 1073 } else if (floatx80_is_any_nan(ST0)) { 1074 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1075 float_raise(float_flag_invalid, &env->fp_status); 1076 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1077 } 1078 } else if (exp > 0x3fff || 1079 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1080 /* Out of range for the instruction, treat as invalid. */ 1081 float_raise(float_flag_invalid, &env->fp_status); 1082 ST0 = floatx80_default_nan(&env->fp_status); 1083 } else if (exp == 0x3fff) { 1084 /* Argument 1 or -1, exact result 1 or -0.5. */ 1085 if (sign) { 1086 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1087 } 1088 } else if (exp < 0x3fb0) { 1089 if (!floatx80_is_zero(ST0)) { 1090 /* 1091 * Multiplying the argument by an extra-precision version 1092 * of log(2) is sufficiently precise. Zero arguments are 1093 * returned unchanged. 1094 */ 1095 uint64_t sig0, sig1, sig2; 1096 if (exp == 0) { 1097 normalizeFloatx80Subnormal(sig, &exp, &sig); 1098 } 1099 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1100 &sig2); 1101 /* This result is inexact. */ 1102 sig1 |= 1; 1103 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1104 sign, exp, sig0, sig1, 1105 &env->fp_status); 1106 } 1107 } else { 1108 floatx80 tmp, y, accum; 1109 bool asign, bsign; 1110 int32_t n, aexp, bexp; 1111 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1112 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1113 FloatX80RoundPrec save_prec = 1114 env->fp_status.floatx80_rounding_precision; 1115 env->fp_status.float_rounding_mode = float_round_nearest_even; 1116 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1117 1118 /* Find the nearest multiple of 1/32 to the argument. */ 1119 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1120 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1121 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1122 1123 if (floatx80_is_zero(y)) { 1124 /* 1125 * Use the value of 2^t - 1 from the table, to avoid 1126 * needing to special-case zero as a result of 1127 * multiplication below. 1128 */ 1129 ST0 = f2xm1_table[n].t; 1130 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1131 env->fp_status.float_rounding_mode = save_mode; 1132 } else { 1133 /* 1134 * Compute the lower parts of a polynomial expansion for 1135 * (2^y - 1) / y. 1136 */ 1137 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1138 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1139 accum = floatx80_mul(accum, y, &env->fp_status); 1140 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1141 accum = floatx80_mul(accum, y, &env->fp_status); 1142 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1143 accum = floatx80_mul(accum, y, &env->fp_status); 1144 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1145 accum = floatx80_mul(accum, y, &env->fp_status); 1146 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1147 accum = floatx80_mul(accum, y, &env->fp_status); 1148 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1149 accum = floatx80_mul(accum, y, &env->fp_status); 1150 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1151 1152 /* 1153 * The full polynomial expansion is f2xm1_coeff_0 + accum 1154 * (where accum has much lower magnitude, and so, in 1155 * particular, carry out of the addition is not possible). 1156 * (This expansion is only accurate to about 70 bits, not 1157 * 128 bits.) 1158 */ 1159 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1160 asign = extractFloatx80Sign(f2xm1_coeff_0); 1161 shift128RightJamming(extractFloatx80Frac(accum), 0, 1162 aexp - extractFloatx80Exp(accum), 1163 &asig0, &asig1); 1164 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1165 bsig1 = 0; 1166 if (asign == extractFloatx80Sign(accum)) { 1167 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1168 } else { 1169 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1170 } 1171 /* And thus compute an approximation to 2^y - 1. */ 1172 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1173 &asig0, &asig1, &asig2); 1174 aexp += extractFloatx80Exp(y) - 0x3ffe; 1175 asign ^= extractFloatx80Sign(y); 1176 if (n != 32) { 1177 /* 1178 * Multiply this by the precomputed value of 2^t and 1179 * add that of 2^t - 1. 1180 */ 1181 mul128By64To192(asig0, asig1, 1182 extractFloatx80Frac(f2xm1_table[n].exp2), 1183 &asig0, &asig1, &asig2); 1184 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1185 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1186 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1187 bsig1 = 0; 1188 if (bexp < aexp) { 1189 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1190 &bsig0, &bsig1); 1191 } else if (aexp < bexp) { 1192 shift128RightJamming(asig0, asig1, bexp - aexp, 1193 &asig0, &asig1); 1194 aexp = bexp; 1195 } 1196 /* The sign of 2^t - 1 is always that of the result. */ 1197 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1198 if (asign == bsign) { 1199 /* Avoid possible carry out of the addition. */ 1200 shift128RightJamming(asig0, asig1, 1, 1201 &asig0, &asig1); 1202 shift128RightJamming(bsig0, bsig1, 1, 1203 &bsig0, &bsig1); 1204 ++aexp; 1205 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1206 } else { 1207 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1208 asign = bsign; 1209 } 1210 } 1211 env->fp_status.float_rounding_mode = save_mode; 1212 /* This result is inexact. */ 1213 asig1 |= 1; 1214 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1215 asign, aexp, asig0, asig1, 1216 &env->fp_status); 1217 } 1218 1219 env->fp_status.floatx80_rounding_precision = save_prec; 1220 } 1221 merge_exception_flags(env, old_flags); 1222 } 1223 1224 void helper_fptan(CPUX86State *env) 1225 { 1226 double fptemp = floatx80_to_double(env, ST0); 1227 1228 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1229 env->fpus |= 0x400; 1230 } else { 1231 fptemp = tan(fptemp); 1232 ST0 = double_to_floatx80(env, fptemp); 1233 fpush(env); 1234 ST0 = floatx80_one; 1235 env->fpus &= ~0x400; /* C2 <-- 0 */ 1236 /* the above code is for |arg| < 2**52 only */ 1237 } 1238 } 1239 1240 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1241 #define pi_4_exp 0x3ffe 1242 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1243 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1244 #define pi_2_exp 0x3fff 1245 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1246 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1247 #define pi_34_exp 0x4000 1248 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1249 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1250 #define pi_exp 0x4000 1251 #define pi_sig_high 0xc90fdaa22168c234ULL 1252 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1253 1254 /* 1255 * Polynomial coefficients for an approximation to atan(x), with only 1256 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1257 * for some other approximations, no low part is needed for the first 1258 * coefficient here to achieve a sufficiently accurate result, because 1259 * the coefficient in this minimax approximation is very close to 1260 * exactly 1.) 1261 */ 1262 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1263 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1264 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1265 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1266 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1267 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1268 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1269 1270 struct fpatan_data { 1271 /* High and low parts of atan(x). */ 1272 floatx80 atan_high, atan_low; 1273 }; 1274 1275 static const struct fpatan_data fpatan_table[9] = { 1276 { floatx80_zero_init, 1277 floatx80_zero_init }, 1278 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1279 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1280 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1281 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1282 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1283 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1284 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1285 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1286 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1287 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1288 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1289 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1290 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1291 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1292 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1293 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1294 }; 1295 1296 void helper_fpatan(CPUX86State *env) 1297 { 1298 uint8_t old_flags = save_exception_flags(env); 1299 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1300 int32_t arg0_exp = extractFloatx80Exp(ST0); 1301 bool arg0_sign = extractFloatx80Sign(ST0); 1302 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1303 int32_t arg1_exp = extractFloatx80Exp(ST1); 1304 bool arg1_sign = extractFloatx80Sign(ST1); 1305 1306 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1307 float_raise(float_flag_invalid, &env->fp_status); 1308 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1309 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1310 float_raise(float_flag_invalid, &env->fp_status); 1311 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1312 } else if (floatx80_invalid_encoding(ST0) || 1313 floatx80_invalid_encoding(ST1)) { 1314 float_raise(float_flag_invalid, &env->fp_status); 1315 ST1 = floatx80_default_nan(&env->fp_status); 1316 } else if (floatx80_is_any_nan(ST0)) { 1317 ST1 = ST0; 1318 } else if (floatx80_is_any_nan(ST1)) { 1319 /* Pass this NaN through. */ 1320 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1321 /* Pass this zero through. */ 1322 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1323 arg0_exp - arg1_exp >= 80) && 1324 !arg0_sign) { 1325 /* 1326 * Dividing ST1 by ST0 gives the correct result up to 1327 * rounding, and avoids spurious underflow exceptions that 1328 * might result from passing some small values through the 1329 * polynomial approximation, but if a finite nonzero result of 1330 * division is exact, the result of fpatan is still inexact 1331 * (and underflowing where appropriate). 1332 */ 1333 FloatX80RoundPrec save_prec = 1334 env->fp_status.floatx80_rounding_precision; 1335 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1336 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1337 env->fp_status.floatx80_rounding_precision = save_prec; 1338 if (!floatx80_is_zero(ST1) && 1339 !(get_float_exception_flags(&env->fp_status) & 1340 float_flag_inexact)) { 1341 /* 1342 * The mathematical result is very slightly closer to zero 1343 * than this exact result. Round a value with the 1344 * significand adjusted accordingly to get the correct 1345 * exceptions, and possibly an adjusted result depending 1346 * on the rounding mode. 1347 */ 1348 uint64_t sig = extractFloatx80Frac(ST1); 1349 int32_t exp = extractFloatx80Exp(ST1); 1350 bool sign = extractFloatx80Sign(ST1); 1351 if (exp == 0) { 1352 normalizeFloatx80Subnormal(sig, &exp, &sig); 1353 } 1354 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1355 sign, exp, sig - 1, 1356 -1, &env->fp_status); 1357 } 1358 } else { 1359 /* The result is inexact. */ 1360 bool rsign = arg1_sign; 1361 int32_t rexp; 1362 uint64_t rsig0, rsig1; 1363 if (floatx80_is_zero(ST1)) { 1364 /* 1365 * ST0 is negative. The result is pi with the sign of 1366 * ST1. 1367 */ 1368 rexp = pi_exp; 1369 rsig0 = pi_sig_high; 1370 rsig1 = pi_sig_low; 1371 } else if (floatx80_is_infinity(ST1)) { 1372 if (floatx80_is_infinity(ST0)) { 1373 if (arg0_sign) { 1374 rexp = pi_34_exp; 1375 rsig0 = pi_34_sig_high; 1376 rsig1 = pi_34_sig_low; 1377 } else { 1378 rexp = pi_4_exp; 1379 rsig0 = pi_4_sig_high; 1380 rsig1 = pi_4_sig_low; 1381 } 1382 } else { 1383 rexp = pi_2_exp; 1384 rsig0 = pi_2_sig_high; 1385 rsig1 = pi_2_sig_low; 1386 } 1387 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1388 rexp = pi_2_exp; 1389 rsig0 = pi_2_sig_high; 1390 rsig1 = pi_2_sig_low; 1391 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1392 /* ST0 is negative. */ 1393 rexp = pi_exp; 1394 rsig0 = pi_sig_high; 1395 rsig1 = pi_sig_low; 1396 } else { 1397 /* 1398 * ST0 and ST1 are finite, nonzero and with exponents not 1399 * too far apart. 1400 */ 1401 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1402 int32_t azexp, axexp; 1403 bool adj_sub, ysign, zsign; 1404 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1405 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1406 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1407 uint64_t azsig0, azsig1; 1408 uint64_t azsig2, azsig3, axsig0, axsig1; 1409 floatx80 x8; 1410 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1411 FloatX80RoundPrec save_prec = 1412 env->fp_status.floatx80_rounding_precision; 1413 env->fp_status.float_rounding_mode = float_round_nearest_even; 1414 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1415 1416 if (arg0_exp == 0) { 1417 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1418 } 1419 if (arg1_exp == 0) { 1420 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1421 } 1422 if (arg0_exp > arg1_exp || 1423 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1424 /* Work with abs(ST1) / abs(ST0). */ 1425 num_exp = arg1_exp; 1426 num_sig = arg1_sig; 1427 den_exp = arg0_exp; 1428 den_sig = arg0_sig; 1429 if (arg0_sign) { 1430 /* The result is subtracted from pi. */ 1431 adj_exp = pi_exp; 1432 adj_sig0 = pi_sig_high; 1433 adj_sig1 = pi_sig_low; 1434 adj_sub = true; 1435 } else { 1436 /* The result is used as-is. */ 1437 adj_exp = 0; 1438 adj_sig0 = 0; 1439 adj_sig1 = 0; 1440 adj_sub = false; 1441 } 1442 } else { 1443 /* Work with abs(ST0) / abs(ST1). */ 1444 num_exp = arg0_exp; 1445 num_sig = arg0_sig; 1446 den_exp = arg1_exp; 1447 den_sig = arg1_sig; 1448 /* The result is added to or subtracted from pi/2. */ 1449 adj_exp = pi_2_exp; 1450 adj_sig0 = pi_2_sig_high; 1451 adj_sig1 = pi_2_sig_low; 1452 adj_sub = !arg0_sign; 1453 } 1454 1455 /* 1456 * Compute x = num/den, where 0 < x <= 1 and x is not too 1457 * small. 1458 */ 1459 xexp = num_exp - den_exp + 0x3ffe; 1460 remsig0 = num_sig; 1461 remsig1 = 0; 1462 if (den_sig <= remsig0) { 1463 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1464 ++xexp; 1465 } 1466 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1467 mul64To128(den_sig, xsig0, &msig0, &msig1); 1468 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1469 while ((int64_t) remsig0 < 0) { 1470 --xsig0; 1471 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1472 } 1473 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1474 /* 1475 * No need to correct any estimation error in xsig1; even 1476 * with such error, it is accurate enough. 1477 */ 1478 1479 /* 1480 * Split x as x = t + y, where t = n/8 is the nearest 1481 * multiple of 1/8 to x. 1482 */ 1483 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1484 false, xexp + 3, xsig0, 1485 xsig1, &env->fp_status); 1486 n = floatx80_to_int32(x8, &env->fp_status); 1487 if (n == 0) { 1488 ysign = false; 1489 yexp = xexp; 1490 ysig0 = xsig0; 1491 ysig1 = xsig1; 1492 texp = 0; 1493 tsig = 0; 1494 } else { 1495 int shift = clz32(n) + 32; 1496 texp = 0x403b - shift; 1497 tsig = n; 1498 tsig <<= shift; 1499 if (texp == xexp) { 1500 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1501 if ((int64_t) ysig0 >= 0) { 1502 ysign = false; 1503 if (ysig0 == 0) { 1504 if (ysig1 == 0) { 1505 yexp = 0; 1506 } else { 1507 shift = clz64(ysig1) + 64; 1508 yexp = xexp - shift; 1509 shift128Left(ysig0, ysig1, shift, 1510 &ysig0, &ysig1); 1511 } 1512 } else { 1513 shift = clz64(ysig0); 1514 yexp = xexp - shift; 1515 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1516 } 1517 } else { 1518 ysign = true; 1519 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1520 if (ysig0 == 0) { 1521 shift = clz64(ysig1) + 64; 1522 } else { 1523 shift = clz64(ysig0); 1524 } 1525 yexp = xexp - shift; 1526 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1527 } 1528 } else { 1529 /* 1530 * t's exponent must be greater than x's because t 1531 * is positive and the nearest multiple of 1/8 to 1532 * x, and if x has a greater exponent, the power 1533 * of 2 with that exponent is also a multiple of 1534 * 1/8. 1535 */ 1536 uint64_t usig0, usig1; 1537 shift128RightJamming(xsig0, xsig1, texp - xexp, 1538 &usig0, &usig1); 1539 ysign = true; 1540 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1541 if (ysig0 == 0) { 1542 shift = clz64(ysig1) + 64; 1543 } else { 1544 shift = clz64(ysig0); 1545 } 1546 yexp = texp - shift; 1547 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1548 } 1549 } 1550 1551 /* 1552 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1553 * arctan(z). 1554 */ 1555 zsign = ysign; 1556 if (texp == 0 || yexp == 0) { 1557 zexp = yexp; 1558 zsig0 = ysig0; 1559 zsig1 = ysig1; 1560 } else { 1561 /* 1562 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1563 */ 1564 int32_t dexp = texp + xexp - 0x3ffe; 1565 uint64_t dsig0, dsig1, dsig2; 1566 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1567 /* 1568 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1569 * bit). Add 1 to produce the denominator 1+tx. 1570 */ 1571 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1572 &dsig0, &dsig1); 1573 dsig0 |= 0x8000000000000000ULL; 1574 zexp = yexp - 1; 1575 remsig0 = ysig0; 1576 remsig1 = ysig1; 1577 remsig2 = 0; 1578 if (dsig0 <= remsig0) { 1579 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1580 ++zexp; 1581 } 1582 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1583 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1584 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1585 &remsig0, &remsig1, &remsig2); 1586 while ((int64_t) remsig0 < 0) { 1587 --zsig0; 1588 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1589 &remsig0, &remsig1, &remsig2); 1590 } 1591 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1592 /* No need to correct any estimation error in zsig1. */ 1593 } 1594 1595 if (zexp == 0) { 1596 azexp = 0; 1597 azsig0 = 0; 1598 azsig1 = 0; 1599 } else { 1600 floatx80 z2, accum; 1601 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1602 /* Compute z^2. */ 1603 mul128To256(zsig0, zsig1, zsig0, zsig1, 1604 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1605 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1606 zexp + zexp - 0x3ffe, 1607 z2sig0, z2sig1, 1608 &env->fp_status); 1609 1610 /* Compute the lower parts of the polynomial expansion. */ 1611 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1612 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1613 accum = floatx80_mul(accum, z2, &env->fp_status); 1614 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1615 accum = floatx80_mul(accum, z2, &env->fp_status); 1616 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1617 accum = floatx80_mul(accum, z2, &env->fp_status); 1618 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1619 accum = floatx80_mul(accum, z2, &env->fp_status); 1620 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1621 accum = floatx80_mul(accum, z2, &env->fp_status); 1622 1623 /* 1624 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1625 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1626 */ 1627 aexp = extractFloatx80Exp(fpatan_coeff_0); 1628 shift128RightJamming(extractFloatx80Frac(accum), 0, 1629 aexp - extractFloatx80Exp(accum), 1630 &asig0, &asig1); 1631 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1632 &asig0, &asig1); 1633 /* Multiply by z to compute arctan(z). */ 1634 azexp = aexp + zexp - 0x3ffe; 1635 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1636 &azsig2, &azsig3); 1637 } 1638 1639 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1640 if (texp == 0) { 1641 /* z is positive. */ 1642 axexp = azexp; 1643 axsig0 = azsig0; 1644 axsig1 = azsig1; 1645 } else { 1646 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1647 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1648 uint64_t low_sig0 = 1649 extractFloatx80Frac(fpatan_table[n].atan_low); 1650 uint64_t low_sig1 = 0; 1651 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1652 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1653 axsig1 = 0; 1654 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1655 &low_sig0, &low_sig1); 1656 if (low_sign) { 1657 sub128(axsig0, axsig1, low_sig0, low_sig1, 1658 &axsig0, &axsig1); 1659 } else { 1660 add128(axsig0, axsig1, low_sig0, low_sig1, 1661 &axsig0, &axsig1); 1662 } 1663 if (azexp >= axexp) { 1664 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1665 &axsig0, &axsig1); 1666 axexp = azexp + 1; 1667 shift128RightJamming(azsig0, azsig1, 1, 1668 &azsig0, &azsig1); 1669 } else { 1670 shift128RightJamming(axsig0, axsig1, 1, 1671 &axsig0, &axsig1); 1672 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1673 &azsig0, &azsig1); 1674 ++axexp; 1675 } 1676 if (zsign) { 1677 sub128(axsig0, axsig1, azsig0, azsig1, 1678 &axsig0, &axsig1); 1679 } else { 1680 add128(axsig0, axsig1, azsig0, azsig1, 1681 &axsig0, &axsig1); 1682 } 1683 } 1684 1685 if (adj_exp == 0) { 1686 rexp = axexp; 1687 rsig0 = axsig0; 1688 rsig1 = axsig1; 1689 } else { 1690 /* 1691 * Add or subtract arctan(x) (exponent axexp, 1692 * significand axsig0 and axsig1, positive, not 1693 * necessarily normalized) to the number given by 1694 * adj_exp, adj_sig0 and adj_sig1, according to 1695 * adj_sub. 1696 */ 1697 if (adj_exp >= axexp) { 1698 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1699 &axsig0, &axsig1); 1700 rexp = adj_exp + 1; 1701 shift128RightJamming(adj_sig0, adj_sig1, 1, 1702 &adj_sig0, &adj_sig1); 1703 } else { 1704 shift128RightJamming(axsig0, axsig1, 1, 1705 &axsig0, &axsig1); 1706 shift128RightJamming(adj_sig0, adj_sig1, 1707 axexp - adj_exp + 1, 1708 &adj_sig0, &adj_sig1); 1709 rexp = axexp + 1; 1710 } 1711 if (adj_sub) { 1712 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1713 &rsig0, &rsig1); 1714 } else { 1715 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1716 &rsig0, &rsig1); 1717 } 1718 } 1719 1720 env->fp_status.float_rounding_mode = save_mode; 1721 env->fp_status.floatx80_rounding_precision = save_prec; 1722 } 1723 /* This result is inexact. */ 1724 rsig1 |= 1; 1725 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1726 rsig0, rsig1, &env->fp_status); 1727 } 1728 1729 fpop(env); 1730 merge_exception_flags(env, old_flags); 1731 } 1732 1733 void helper_fxtract(CPUX86State *env) 1734 { 1735 uint8_t old_flags = save_exception_flags(env); 1736 CPU_LDoubleU temp; 1737 1738 temp.d = ST0; 1739 1740 if (floatx80_is_zero(ST0)) { 1741 /* Easy way to generate -inf and raising division by 0 exception */ 1742 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1743 &env->fp_status); 1744 fpush(env); 1745 ST0 = temp.d; 1746 } else if (floatx80_invalid_encoding(ST0)) { 1747 float_raise(float_flag_invalid, &env->fp_status); 1748 ST0 = floatx80_default_nan(&env->fp_status); 1749 fpush(env); 1750 ST0 = ST1; 1751 } else if (floatx80_is_any_nan(ST0)) { 1752 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1753 float_raise(float_flag_invalid, &env->fp_status); 1754 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1755 } 1756 fpush(env); 1757 ST0 = ST1; 1758 } else if (floatx80_is_infinity(ST0)) { 1759 fpush(env); 1760 ST0 = ST1; 1761 ST1 = floatx80_infinity; 1762 } else { 1763 int expdif; 1764 1765 if (EXPD(temp) == 0) { 1766 int shift = clz64(temp.l.lower); 1767 temp.l.lower <<= shift; 1768 expdif = 1 - EXPBIAS - shift; 1769 float_raise(float_flag_input_denormal, &env->fp_status); 1770 } else { 1771 expdif = EXPD(temp) - EXPBIAS; 1772 } 1773 /* DP exponent bias */ 1774 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1775 fpush(env); 1776 BIASEXPONENT(temp); 1777 ST0 = temp.d; 1778 } 1779 merge_exception_flags(env, old_flags); 1780 } 1781 1782 static void helper_fprem_common(CPUX86State *env, bool mod) 1783 { 1784 uint8_t old_flags = save_exception_flags(env); 1785 uint64_t quotient; 1786 CPU_LDoubleU temp0, temp1; 1787 int exp0, exp1, expdiff; 1788 1789 temp0.d = ST0; 1790 temp1.d = ST1; 1791 exp0 = EXPD(temp0); 1792 exp1 = EXPD(temp1); 1793 1794 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1795 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1796 exp0 == 0x7fff || exp1 == 0x7fff || 1797 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1798 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1799 } else { 1800 if (exp0 == 0) { 1801 exp0 = 1 - clz64(temp0.l.lower); 1802 } 1803 if (exp1 == 0) { 1804 exp1 = 1 - clz64(temp1.l.lower); 1805 } 1806 expdiff = exp0 - exp1; 1807 if (expdiff < 64) { 1808 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1809 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1810 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1811 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1812 } else { 1813 /* 1814 * Partial remainder. This choice of how many bits to 1815 * process at once is specified in AMD instruction set 1816 * manuals, and empirically is followed by Intel 1817 * processors as well; it ensures that the final remainder 1818 * operation in a loop does produce the correct low three 1819 * bits of the quotient. AMD manuals specify that the 1820 * flags other than C2 are cleared, and empirically Intel 1821 * processors clear them as well. 1822 */ 1823 int n = 32 + (expdiff % 32); 1824 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1825 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1826 env->fpus |= 0x400; /* C2 <-- 1 */ 1827 } 1828 } 1829 merge_exception_flags(env, old_flags); 1830 } 1831 1832 void helper_fprem1(CPUX86State *env) 1833 { 1834 helper_fprem_common(env, false); 1835 } 1836 1837 void helper_fprem(CPUX86State *env) 1838 { 1839 helper_fprem_common(env, true); 1840 } 1841 1842 /* 128-bit significand of log2(e). */ 1843 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1844 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1845 1846 /* 1847 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1848 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1849 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1850 * interval [sqrt(2)/2, sqrt(2)]. 1851 */ 1852 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1853 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1854 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1855 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1856 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1857 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1858 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1859 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1860 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1861 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1862 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1863 1864 /* 1865 * Compute an approximation of log2(1+arg), where 1+arg is in the 1866 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1867 * function is called, rounding precision is set to 80 and the 1868 * round-to-nearest mode is in effect. arg must not be exactly zero, 1869 * and must not be so close to zero that underflow might occur. 1870 */ 1871 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1872 uint64_t *sig0, uint64_t *sig1) 1873 { 1874 uint64_t arg0_sig = extractFloatx80Frac(arg); 1875 int32_t arg0_exp = extractFloatx80Exp(arg); 1876 bool arg0_sign = extractFloatx80Sign(arg); 1877 bool asign; 1878 int32_t dexp, texp, aexp; 1879 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1880 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1881 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1882 floatx80 t2, accum; 1883 1884 /* 1885 * Compute an approximation of arg/(2+arg), with extra precision, 1886 * as the argument to a polynomial approximation. The extra 1887 * precision is only needed for the first term of the 1888 * approximation, with subsequent terms being significantly 1889 * smaller; the approximation only uses odd exponents, and the 1890 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1891 */ 1892 if (arg0_sign) { 1893 dexp = 0x3fff; 1894 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1895 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1896 } else { 1897 dexp = 0x4000; 1898 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1899 dsig0 |= 0x8000000000000000ULL; 1900 } 1901 texp = arg0_exp - dexp + 0x3ffe; 1902 rsig0 = arg0_sig; 1903 rsig1 = 0; 1904 rsig2 = 0; 1905 if (dsig0 <= rsig0) { 1906 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1907 ++texp; 1908 } 1909 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1910 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1911 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1912 &rsig0, &rsig1, &rsig2); 1913 while ((int64_t) rsig0 < 0) { 1914 --tsig0; 1915 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1916 &rsig0, &rsig1, &rsig2); 1917 } 1918 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1919 /* 1920 * No need to correct any estimation error in tsig1; even with 1921 * such error, it is accurate enough. Now compute the square of 1922 * that approximation. 1923 */ 1924 mul128To256(tsig0, tsig1, tsig0, tsig1, 1925 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1926 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1927 texp + texp - 0x3ffe, 1928 t2sig0, t2sig1, &env->fp_status); 1929 1930 /* Compute the lower parts of the polynomial expansion. */ 1931 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1932 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1933 accum = floatx80_mul(accum, t2, &env->fp_status); 1934 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1935 accum = floatx80_mul(accum, t2, &env->fp_status); 1936 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1937 accum = floatx80_mul(accum, t2, &env->fp_status); 1938 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1939 accum = floatx80_mul(accum, t2, &env->fp_status); 1940 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1941 accum = floatx80_mul(accum, t2, &env->fp_status); 1942 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1943 accum = floatx80_mul(accum, t2, &env->fp_status); 1944 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1945 accum = floatx80_mul(accum, t2, &env->fp_status); 1946 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1947 accum = floatx80_mul(accum, t2, &env->fp_status); 1948 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1949 1950 /* 1951 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1952 * accum has much lower magnitude, and so, in particular, carry 1953 * out of the addition is not possible), multiplied by t. (This 1954 * expansion is only accurate to about 70 bits, not 128 bits.) 1955 */ 1956 aexp = extractFloatx80Exp(fyl2x_coeff_0); 1957 asign = extractFloatx80Sign(fyl2x_coeff_0); 1958 shift128RightJamming(extractFloatx80Frac(accum), 0, 1959 aexp - extractFloatx80Exp(accum), 1960 &asig0, &asig1); 1961 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1962 bsig1 = 0; 1963 if (asign == extractFloatx80Sign(accum)) { 1964 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1965 } else { 1966 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1967 } 1968 /* Multiply by t to compute the required result. */ 1969 mul128To256(asig0, asig1, tsig0, tsig1, 1970 &asig0, &asig1, &asig2, &asig3); 1971 aexp += texp - 0x3ffe; 1972 *exp = aexp; 1973 *sig0 = asig0; 1974 *sig1 = asig1; 1975 } 1976 1977 void helper_fyl2xp1(CPUX86State *env) 1978 { 1979 uint8_t old_flags = save_exception_flags(env); 1980 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1981 int32_t arg0_exp = extractFloatx80Exp(ST0); 1982 bool arg0_sign = extractFloatx80Sign(ST0); 1983 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1984 int32_t arg1_exp = extractFloatx80Exp(ST1); 1985 bool arg1_sign = extractFloatx80Sign(ST1); 1986 1987 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1988 float_raise(float_flag_invalid, &env->fp_status); 1989 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1990 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1991 float_raise(float_flag_invalid, &env->fp_status); 1992 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1993 } else if (floatx80_invalid_encoding(ST0) || 1994 floatx80_invalid_encoding(ST1)) { 1995 float_raise(float_flag_invalid, &env->fp_status); 1996 ST1 = floatx80_default_nan(&env->fp_status); 1997 } else if (floatx80_is_any_nan(ST0)) { 1998 ST1 = ST0; 1999 } else if (floatx80_is_any_nan(ST1)) { 2000 /* Pass this NaN through. */ 2001 } else if (arg0_exp > 0x3ffd || 2002 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2003 0x95f619980c4336f7ULL : 2004 0xd413cccfe7799211ULL))) { 2005 /* 2006 * Out of range for the instruction (ST0 must have absolute 2007 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2008 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2009 * to sqrt(2) - 1, which we allow here), treat as invalid. 2010 */ 2011 float_raise(float_flag_invalid, &env->fp_status); 2012 ST1 = floatx80_default_nan(&env->fp_status); 2013 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2014 arg1_exp == 0x7fff) { 2015 /* 2016 * One argument is zero, or multiplying by infinity; correct 2017 * result is exact and can be obtained by multiplying the 2018 * arguments. 2019 */ 2020 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2021 } else if (arg0_exp < 0x3fb0) { 2022 /* 2023 * Multiplying both arguments and an extra-precision version 2024 * of log2(e) is sufficiently precise. 2025 */ 2026 uint64_t sig0, sig1, sig2; 2027 int32_t exp; 2028 if (arg0_exp == 0) { 2029 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2030 } 2031 if (arg1_exp == 0) { 2032 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2033 } 2034 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2035 &sig0, &sig1, &sig2); 2036 exp = arg0_exp + 1; 2037 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2038 exp += arg1_exp - 0x3ffe; 2039 /* This result is inexact. */ 2040 sig1 |= 1; 2041 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2042 arg0_sign ^ arg1_sign, exp, 2043 sig0, sig1, &env->fp_status); 2044 } else { 2045 int32_t aexp; 2046 uint64_t asig0, asig1, asig2; 2047 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2048 FloatX80RoundPrec save_prec = 2049 env->fp_status.floatx80_rounding_precision; 2050 env->fp_status.float_rounding_mode = float_round_nearest_even; 2051 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2052 2053 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2054 /* 2055 * Multiply by the second argument to compute the required 2056 * result. 2057 */ 2058 if (arg1_exp == 0) { 2059 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2060 } 2061 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2062 aexp += arg1_exp - 0x3ffe; 2063 /* This result is inexact. */ 2064 asig1 |= 1; 2065 env->fp_status.float_rounding_mode = save_mode; 2066 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2067 arg0_sign ^ arg1_sign, aexp, 2068 asig0, asig1, &env->fp_status); 2069 env->fp_status.floatx80_rounding_precision = save_prec; 2070 } 2071 fpop(env); 2072 merge_exception_flags(env, old_flags); 2073 } 2074 2075 void helper_fyl2x(CPUX86State *env) 2076 { 2077 uint8_t old_flags = save_exception_flags(env); 2078 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2079 int32_t arg0_exp = extractFloatx80Exp(ST0); 2080 bool arg0_sign = extractFloatx80Sign(ST0); 2081 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2082 int32_t arg1_exp = extractFloatx80Exp(ST1); 2083 bool arg1_sign = extractFloatx80Sign(ST1); 2084 2085 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2086 float_raise(float_flag_invalid, &env->fp_status); 2087 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2088 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2089 float_raise(float_flag_invalid, &env->fp_status); 2090 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2091 } else if (floatx80_invalid_encoding(ST0) || 2092 floatx80_invalid_encoding(ST1)) { 2093 float_raise(float_flag_invalid, &env->fp_status); 2094 ST1 = floatx80_default_nan(&env->fp_status); 2095 } else if (floatx80_is_any_nan(ST0)) { 2096 ST1 = ST0; 2097 } else if (floatx80_is_any_nan(ST1)) { 2098 /* Pass this NaN through. */ 2099 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2100 float_raise(float_flag_invalid, &env->fp_status); 2101 ST1 = floatx80_default_nan(&env->fp_status); 2102 } else if (floatx80_is_infinity(ST1)) { 2103 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2104 &env->fp_status); 2105 switch (cmp) { 2106 case float_relation_less: 2107 ST1 = floatx80_chs(ST1); 2108 break; 2109 case float_relation_greater: 2110 /* Result is infinity of the same sign as ST1. */ 2111 break; 2112 default: 2113 float_raise(float_flag_invalid, &env->fp_status); 2114 ST1 = floatx80_default_nan(&env->fp_status); 2115 break; 2116 } 2117 } else if (floatx80_is_infinity(ST0)) { 2118 if (floatx80_is_zero(ST1)) { 2119 float_raise(float_flag_invalid, &env->fp_status); 2120 ST1 = floatx80_default_nan(&env->fp_status); 2121 } else if (arg1_sign) { 2122 ST1 = floatx80_chs(ST0); 2123 } else { 2124 ST1 = ST0; 2125 } 2126 } else if (floatx80_is_zero(ST0)) { 2127 if (floatx80_is_zero(ST1)) { 2128 float_raise(float_flag_invalid, &env->fp_status); 2129 ST1 = floatx80_default_nan(&env->fp_status); 2130 } else { 2131 /* Result is infinity with opposite sign to ST1. */ 2132 float_raise(float_flag_divbyzero, &env->fp_status); 2133 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2134 0x8000000000000000ULL); 2135 } 2136 } else if (floatx80_is_zero(ST1)) { 2137 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2138 ST1 = floatx80_chs(ST1); 2139 } 2140 /* Otherwise, ST1 is already the correct result. */ 2141 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2142 if (arg1_sign) { 2143 ST1 = floatx80_chs(floatx80_zero); 2144 } else { 2145 ST1 = floatx80_zero; 2146 } 2147 } else { 2148 int32_t int_exp; 2149 floatx80 arg0_m1; 2150 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2151 FloatX80RoundPrec save_prec = 2152 env->fp_status.floatx80_rounding_precision; 2153 env->fp_status.float_rounding_mode = float_round_nearest_even; 2154 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2155 2156 if (arg0_exp == 0) { 2157 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2158 } 2159 if (arg1_exp == 0) { 2160 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2161 } 2162 int_exp = arg0_exp - 0x3fff; 2163 if (arg0_sig > 0xb504f333f9de6484ULL) { 2164 ++int_exp; 2165 } 2166 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2167 &env->fp_status), 2168 floatx80_one, &env->fp_status); 2169 if (floatx80_is_zero(arg0_m1)) { 2170 /* Exact power of 2; multiply by ST1. */ 2171 env->fp_status.float_rounding_mode = save_mode; 2172 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2173 ST1, &env->fp_status); 2174 } else { 2175 bool asign = extractFloatx80Sign(arg0_m1); 2176 int32_t aexp; 2177 uint64_t asig0, asig1, asig2; 2178 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2179 if (int_exp != 0) { 2180 bool isign = (int_exp < 0); 2181 int32_t iexp; 2182 uint64_t isig; 2183 int shift; 2184 int_exp = isign ? -int_exp : int_exp; 2185 shift = clz32(int_exp) + 32; 2186 isig = int_exp; 2187 isig <<= shift; 2188 iexp = 0x403e - shift; 2189 shift128RightJamming(asig0, asig1, iexp - aexp, 2190 &asig0, &asig1); 2191 if (asign == isign) { 2192 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2193 } else { 2194 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2195 } 2196 aexp = iexp; 2197 asign = isign; 2198 } 2199 /* 2200 * Multiply by the second argument to compute the required 2201 * result. 2202 */ 2203 if (arg1_exp == 0) { 2204 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2205 } 2206 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2207 aexp += arg1_exp - 0x3ffe; 2208 /* This result is inexact. */ 2209 asig1 |= 1; 2210 env->fp_status.float_rounding_mode = save_mode; 2211 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2212 asign ^ arg1_sign, aexp, 2213 asig0, asig1, &env->fp_status); 2214 } 2215 2216 env->fp_status.floatx80_rounding_precision = save_prec; 2217 } 2218 fpop(env); 2219 merge_exception_flags(env, old_flags); 2220 } 2221 2222 void helper_fsqrt(CPUX86State *env) 2223 { 2224 uint8_t old_flags = save_exception_flags(env); 2225 if (floatx80_is_neg(ST0)) { 2226 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2227 env->fpus |= 0x400; 2228 } 2229 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2230 merge_exception_flags(env, old_flags); 2231 } 2232 2233 void helper_fsincos(CPUX86State *env) 2234 { 2235 double fptemp = floatx80_to_double(env, ST0); 2236 2237 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2238 env->fpus |= 0x400; 2239 } else { 2240 ST0 = double_to_floatx80(env, sin(fptemp)); 2241 fpush(env); 2242 ST0 = double_to_floatx80(env, cos(fptemp)); 2243 env->fpus &= ~0x400; /* C2 <-- 0 */ 2244 /* the above code is for |arg| < 2**63 only */ 2245 } 2246 } 2247 2248 void helper_frndint(CPUX86State *env) 2249 { 2250 uint8_t old_flags = save_exception_flags(env); 2251 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2252 merge_exception_flags(env, old_flags); 2253 } 2254 2255 void helper_fscale(CPUX86State *env) 2256 { 2257 uint8_t old_flags = save_exception_flags(env); 2258 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2259 float_raise(float_flag_invalid, &env->fp_status); 2260 ST0 = floatx80_default_nan(&env->fp_status); 2261 } else if (floatx80_is_any_nan(ST1)) { 2262 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2263 float_raise(float_flag_invalid, &env->fp_status); 2264 } 2265 ST0 = ST1; 2266 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2267 float_raise(float_flag_invalid, &env->fp_status); 2268 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2269 } 2270 } else if (floatx80_is_infinity(ST1) && 2271 !floatx80_invalid_encoding(ST0) && 2272 !floatx80_is_any_nan(ST0)) { 2273 if (floatx80_is_neg(ST1)) { 2274 if (floatx80_is_infinity(ST0)) { 2275 float_raise(float_flag_invalid, &env->fp_status); 2276 ST0 = floatx80_default_nan(&env->fp_status); 2277 } else { 2278 ST0 = (floatx80_is_neg(ST0) ? 2279 floatx80_chs(floatx80_zero) : 2280 floatx80_zero); 2281 } 2282 } else { 2283 if (floatx80_is_zero(ST0)) { 2284 float_raise(float_flag_invalid, &env->fp_status); 2285 ST0 = floatx80_default_nan(&env->fp_status); 2286 } else { 2287 ST0 = (floatx80_is_neg(ST0) ? 2288 floatx80_chs(floatx80_infinity) : 2289 floatx80_infinity); 2290 } 2291 } 2292 } else { 2293 int n; 2294 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2295 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2296 set_float_exception_flags(0, &env->fp_status); 2297 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2298 set_float_exception_flags(save_flags, &env->fp_status); 2299 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2300 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2301 env->fp_status.floatx80_rounding_precision = save; 2302 } 2303 merge_exception_flags(env, old_flags); 2304 } 2305 2306 void helper_fsin(CPUX86State *env) 2307 { 2308 double fptemp = floatx80_to_double(env, ST0); 2309 2310 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2311 env->fpus |= 0x400; 2312 } else { 2313 ST0 = double_to_floatx80(env, sin(fptemp)); 2314 env->fpus &= ~0x400; /* C2 <-- 0 */ 2315 /* the above code is for |arg| < 2**53 only */ 2316 } 2317 } 2318 2319 void helper_fcos(CPUX86State *env) 2320 { 2321 double fptemp = floatx80_to_double(env, ST0); 2322 2323 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2324 env->fpus |= 0x400; 2325 } else { 2326 ST0 = double_to_floatx80(env, cos(fptemp)); 2327 env->fpus &= ~0x400; /* C2 <-- 0 */ 2328 /* the above code is for |arg| < 2**63 only */ 2329 } 2330 } 2331 2332 void helper_fxam_ST0(CPUX86State *env) 2333 { 2334 CPU_LDoubleU temp; 2335 int expdif; 2336 2337 temp.d = ST0; 2338 2339 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2340 if (SIGND(temp)) { 2341 env->fpus |= 0x200; /* C1 <-- 1 */ 2342 } 2343 2344 if (env->fptags[env->fpstt]) { 2345 env->fpus |= 0x4100; /* Empty */ 2346 return; 2347 } 2348 2349 expdif = EXPD(temp); 2350 if (expdif == MAXEXPD) { 2351 if (MANTD(temp) == 0x8000000000000000ULL) { 2352 env->fpus |= 0x500; /* Infinity */ 2353 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2354 env->fpus |= 0x100; /* NaN */ 2355 } 2356 } else if (expdif == 0) { 2357 if (MANTD(temp) == 0) { 2358 env->fpus |= 0x4000; /* Zero */ 2359 } else { 2360 env->fpus |= 0x4400; /* Denormal */ 2361 } 2362 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2363 env->fpus |= 0x400; 2364 } 2365 } 2366 2367 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 2368 uintptr_t retaddr) 2369 { 2370 int fpus, fptag, exp, i; 2371 uint64_t mant; 2372 CPU_LDoubleU tmp; 2373 2374 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2375 fptag = 0; 2376 for (i = 7; i >= 0; i--) { 2377 fptag <<= 2; 2378 if (env->fptags[i]) { 2379 fptag |= 3; 2380 } else { 2381 tmp.d = env->fpregs[i].d; 2382 exp = EXPD(tmp); 2383 mant = MANTD(tmp); 2384 if (exp == 0 && mant == 0) { 2385 /* zero */ 2386 fptag |= 1; 2387 } else if (exp == 0 || exp == MAXEXPD 2388 || (mant & (1LL << 63)) == 0) { 2389 /* NaNs, infinity, denormal */ 2390 fptag |= 2; 2391 } 2392 } 2393 } 2394 if (data32) { 2395 /* 32 bit */ 2396 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 2397 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 2398 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 2399 cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */ 2400 cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */ 2401 cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */ 2402 cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */ 2403 } else { 2404 /* 16 bit */ 2405 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 2406 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 2407 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 2408 cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr); 2409 cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr); 2410 cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr); 2411 cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr); 2412 } 2413 } 2414 2415 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2416 { 2417 do_fstenv(env, ptr, data32, GETPC()); 2418 } 2419 2420 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2421 { 2422 env->fpstt = (fpus >> 11) & 7; 2423 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2424 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2425 #if !defined(CONFIG_USER_ONLY) 2426 if (!(env->fpus & FPUS_SE)) { 2427 /* 2428 * Here the processor deasserts FERR#; in response, the chipset deasserts 2429 * IGNNE#. 2430 */ 2431 cpu_clear_ignne(); 2432 } 2433 #endif 2434 } 2435 2436 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 2437 uintptr_t retaddr) 2438 { 2439 int i, fpus, fptag; 2440 2441 if (data32) { 2442 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2443 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2444 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 2445 } else { 2446 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2447 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 2448 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2449 } 2450 cpu_set_fpus(env, fpus); 2451 for (i = 0; i < 8; i++) { 2452 env->fptags[i] = ((fptag & 3) == 3); 2453 fptag >>= 2; 2454 } 2455 } 2456 2457 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2458 { 2459 do_fldenv(env, ptr, data32, GETPC()); 2460 } 2461 2462 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, 2463 uintptr_t retaddr) 2464 { 2465 floatx80 tmp; 2466 int i; 2467 2468 do_fstenv(env, ptr, data32, retaddr); 2469 2470 ptr += (target_ulong)14 << data32; 2471 for (i = 0; i < 8; i++) { 2472 tmp = ST(i); 2473 do_fstt(env, tmp, ptr, retaddr); 2474 ptr += 10; 2475 } 2476 2477 do_fninit(env); 2478 } 2479 2480 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2481 { 2482 do_fsave(env, ptr, data32, GETPC()); 2483 } 2484 2485 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, 2486 uintptr_t retaddr) 2487 { 2488 floatx80 tmp; 2489 int i; 2490 2491 do_fldenv(env, ptr, data32, retaddr); 2492 ptr += (target_ulong)14 << data32; 2493 2494 for (i = 0; i < 8; i++) { 2495 tmp = do_fldt(env, ptr, retaddr); 2496 ST(i) = tmp; 2497 ptr += 10; 2498 } 2499 } 2500 2501 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2502 { 2503 do_frstor(env, ptr, data32, GETPC()); 2504 } 2505 2506 #define XO(X) offsetof(X86XSaveArea, X) 2507 2508 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2509 { 2510 int fpus, fptag, i; 2511 target_ulong addr; 2512 2513 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2514 fptag = 0; 2515 for (i = 0; i < 8; i++) { 2516 fptag |= (env->fptags[i] << i); 2517 } 2518 2519 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 2520 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 2521 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 2522 2523 /* In 32-bit mode this is eip, sel, dp, sel. 2524 In 64-bit mode this is rip, rdp. 2525 But in either case we don't write actual data, just zeros. */ 2526 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 2527 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 2528 2529 addr = ptr + XO(legacy.fpregs); 2530 for (i = 0; i < 8; i++) { 2531 floatx80 tmp = ST(i); 2532 do_fstt(env, tmp, addr, ra); 2533 addr += 16; 2534 } 2535 } 2536 2537 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2538 { 2539 update_mxcsr_from_sse_status(env); 2540 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 2541 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 2542 } 2543 2544 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2545 { 2546 int i, nb_xmm_regs; 2547 target_ulong addr; 2548 2549 if (env->hflags & HF_CS64_MASK) { 2550 nb_xmm_regs = 16; 2551 } else { 2552 nb_xmm_regs = 8; 2553 } 2554 2555 addr = ptr + XO(legacy.xmm_regs); 2556 for (i = 0; i < nb_xmm_regs; i++) { 2557 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 2558 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 2559 addr += 16; 2560 } 2561 } 2562 2563 static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2564 { 2565 int i, nb_xmm_regs; 2566 2567 if (env->hflags & HF_CS64_MASK) { 2568 nb_xmm_regs = 16; 2569 } else { 2570 nb_xmm_regs = 8; 2571 } 2572 2573 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2574 cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra); 2575 cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra); 2576 } 2577 } 2578 2579 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2580 { 2581 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2582 int i; 2583 2584 for (i = 0; i < 4; i++, addr += 16) { 2585 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 2586 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 2587 } 2588 } 2589 2590 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2591 { 2592 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2593 env->bndcs_regs.cfgu, ra); 2594 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2595 env->bndcs_regs.sts, ra); 2596 } 2597 2598 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2599 { 2600 cpu_stq_data_ra(env, ptr, env->pkru, ra); 2601 } 2602 2603 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2604 { 2605 /* The operand must be 16 byte aligned */ 2606 if (ptr & 0xf) { 2607 raise_exception_ra(env, EXCP0D_GPF, ra); 2608 } 2609 2610 do_xsave_fpu(env, ptr, ra); 2611 2612 if (env->cr[4] & CR4_OSFXSR_MASK) { 2613 do_xsave_mxcsr(env, ptr, ra); 2614 /* Fast FXSAVE leaves out the XMM registers */ 2615 if (!(env->efer & MSR_EFER_FFXSR) 2616 || (env->hflags & HF_CPL_MASK) 2617 || !(env->hflags & HF_LMA_MASK)) { 2618 do_xsave_sse(env, ptr, ra); 2619 } 2620 } 2621 } 2622 2623 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2624 { 2625 do_fxsave(env, ptr, GETPC()); 2626 } 2627 2628 static uint64_t get_xinuse(CPUX86State *env) 2629 { 2630 uint64_t inuse = -1; 2631 2632 /* For the most part, we don't track XINUSE. We could calculate it 2633 here for all components, but it's probably less work to simply 2634 indicate in use. That said, the state of BNDREGS is important 2635 enough to track in HFLAGS, so we might as well use that here. */ 2636 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2637 inuse &= ~XSTATE_BNDREGS_MASK; 2638 } 2639 return inuse; 2640 } 2641 2642 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2643 uint64_t inuse, uint64_t opt, uintptr_t ra) 2644 { 2645 uint64_t old_bv, new_bv; 2646 2647 /* The OS must have enabled XSAVE. */ 2648 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2649 raise_exception_ra(env, EXCP06_ILLOP, ra); 2650 } 2651 2652 /* The operand must be 64 byte aligned. */ 2653 if (ptr & 63) { 2654 raise_exception_ra(env, EXCP0D_GPF, ra); 2655 } 2656 2657 /* Never save anything not enabled by XCR0. */ 2658 rfbm &= env->xcr0; 2659 opt &= rfbm; 2660 2661 if (opt & XSTATE_FP_MASK) { 2662 do_xsave_fpu(env, ptr, ra); 2663 } 2664 if (rfbm & XSTATE_SSE_MASK) { 2665 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2666 do_xsave_mxcsr(env, ptr, ra); 2667 } 2668 if (opt & XSTATE_SSE_MASK) { 2669 do_xsave_sse(env, ptr, ra); 2670 } 2671 if (opt & XSTATE_YMM_MASK) { 2672 do_xsave_ymmh(env, ptr + XO(avx_state), ra); 2673 } 2674 if (opt & XSTATE_BNDREGS_MASK) { 2675 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 2676 } 2677 if (opt & XSTATE_BNDCSR_MASK) { 2678 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 2679 } 2680 if (opt & XSTATE_PKRU_MASK) { 2681 do_xsave_pkru(env, ptr + XO(pkru_state), ra); 2682 } 2683 2684 /* Update the XSTATE_BV field. */ 2685 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2686 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2687 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 2688 } 2689 2690 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2691 { 2692 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 2693 } 2694 2695 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2696 { 2697 uint64_t inuse = get_xinuse(env); 2698 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2699 } 2700 2701 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2702 { 2703 int i, fpuc, fpus, fptag; 2704 target_ulong addr; 2705 2706 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 2707 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 2708 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 2709 cpu_set_fpuc(env, fpuc); 2710 cpu_set_fpus(env, fpus); 2711 fptag ^= 0xff; 2712 for (i = 0; i < 8; i++) { 2713 env->fptags[i] = ((fptag >> i) & 1); 2714 } 2715 2716 addr = ptr + XO(legacy.fpregs); 2717 for (i = 0; i < 8; i++) { 2718 floatx80 tmp = do_fldt(env, addr, ra); 2719 ST(i) = tmp; 2720 addr += 16; 2721 } 2722 } 2723 2724 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2725 { 2726 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 2727 } 2728 2729 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2730 { 2731 int i, nb_xmm_regs; 2732 target_ulong addr; 2733 2734 if (env->hflags & HF_CS64_MASK) { 2735 nb_xmm_regs = 16; 2736 } else { 2737 nb_xmm_regs = 8; 2738 } 2739 2740 addr = ptr + XO(legacy.xmm_regs); 2741 for (i = 0; i < nb_xmm_regs; i++) { 2742 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 2743 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 2744 addr += 16; 2745 } 2746 } 2747 2748 static void do_clear_sse(CPUX86State *env) 2749 { 2750 int i, nb_xmm_regs; 2751 2752 if (env->hflags & HF_CS64_MASK) { 2753 nb_xmm_regs = 16; 2754 } else { 2755 nb_xmm_regs = 8; 2756 } 2757 2758 for (i = 0; i < nb_xmm_regs; i++) { 2759 env->xmm_regs[i].ZMM_Q(0) = 0; 2760 env->xmm_regs[i].ZMM_Q(1) = 0; 2761 } 2762 } 2763 2764 static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2765 { 2766 int i, nb_xmm_regs; 2767 2768 if (env->hflags & HF_CS64_MASK) { 2769 nb_xmm_regs = 16; 2770 } else { 2771 nb_xmm_regs = 8; 2772 } 2773 2774 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2775 env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra); 2776 env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra); 2777 } 2778 } 2779 2780 static void do_clear_ymmh(CPUX86State *env) 2781 { 2782 int i, nb_xmm_regs; 2783 2784 if (env->hflags & HF_CS64_MASK) { 2785 nb_xmm_regs = 16; 2786 } else { 2787 nb_xmm_regs = 8; 2788 } 2789 2790 for (i = 0; i < nb_xmm_regs; i++) { 2791 env->xmm_regs[i].ZMM_Q(2) = 0; 2792 env->xmm_regs[i].ZMM_Q(3) = 0; 2793 } 2794 } 2795 2796 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2797 { 2798 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2799 int i; 2800 2801 for (i = 0; i < 4; i++, addr += 16) { 2802 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 2803 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 2804 } 2805 } 2806 2807 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2808 { 2809 /* FIXME: Extend highest implemented bit of linear address. */ 2810 env->bndcs_regs.cfgu 2811 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 2812 env->bndcs_regs.sts 2813 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 2814 } 2815 2816 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2817 { 2818 env->pkru = cpu_ldq_data_ra(env, ptr, ra); 2819 } 2820 2821 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2822 { 2823 /* The operand must be 16 byte aligned */ 2824 if (ptr & 0xf) { 2825 raise_exception_ra(env, EXCP0D_GPF, ra); 2826 } 2827 2828 do_xrstor_fpu(env, ptr, ra); 2829 2830 if (env->cr[4] & CR4_OSFXSR_MASK) { 2831 do_xrstor_mxcsr(env, ptr, ra); 2832 /* Fast FXRSTOR leaves out the XMM registers */ 2833 if (!(env->efer & MSR_EFER_FFXSR) 2834 || (env->hflags & HF_CPL_MASK) 2835 || !(env->hflags & HF_LMA_MASK)) { 2836 do_xrstor_sse(env, ptr, ra); 2837 } 2838 } 2839 } 2840 2841 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2842 { 2843 do_fxrstor(env, ptr, GETPC()); 2844 } 2845 2846 static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) 2847 { 2848 uint64_t xstate_bv, xcomp_bv, reserve0; 2849 2850 rfbm &= env->xcr0; 2851 2852 /* The OS must have enabled XSAVE. */ 2853 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2854 raise_exception_ra(env, EXCP06_ILLOP, ra); 2855 } 2856 2857 /* The operand must be 64 byte aligned. */ 2858 if (ptr & 63) { 2859 raise_exception_ra(env, EXCP0D_GPF, ra); 2860 } 2861 2862 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2863 2864 if ((int64_t)xstate_bv < 0) { 2865 /* FIXME: Compact form. */ 2866 raise_exception_ra(env, EXCP0D_GPF, ra); 2867 } 2868 2869 /* Standard form. */ 2870 2871 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2872 if (xstate_bv & ~env->xcr0) { 2873 raise_exception_ra(env, EXCP0D_GPF, ra); 2874 } 2875 2876 /* The XCOMP_BV field must be zero. Note that, as of the April 2016 2877 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 2878 describes only XCOMP_BV, but the description of the standard form 2879 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 2880 includes the next 64-bit field. */ 2881 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 2882 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 2883 if (xcomp_bv || reserve0) { 2884 raise_exception_ra(env, EXCP0D_GPF, ra); 2885 } 2886 2887 if (rfbm & XSTATE_FP_MASK) { 2888 if (xstate_bv & XSTATE_FP_MASK) { 2889 do_xrstor_fpu(env, ptr, ra); 2890 } else { 2891 do_fninit(env); 2892 memset(env->fpregs, 0, sizeof(env->fpregs)); 2893 } 2894 } 2895 if (rfbm & XSTATE_SSE_MASK) { 2896 /* Note that the standard form of XRSTOR loads MXCSR from memory 2897 whether or not the XSTATE_BV bit is set. */ 2898 do_xrstor_mxcsr(env, ptr, ra); 2899 if (xstate_bv & XSTATE_SSE_MASK) { 2900 do_xrstor_sse(env, ptr, ra); 2901 } else { 2902 do_clear_sse(env); 2903 } 2904 } 2905 if (rfbm & XSTATE_YMM_MASK) { 2906 if (xstate_bv & XSTATE_YMM_MASK) { 2907 do_xrstor_ymmh(env, ptr + XO(avx_state), ra); 2908 } else { 2909 do_clear_ymmh(env); 2910 } 2911 } 2912 if (rfbm & XSTATE_BNDREGS_MASK) { 2913 if (xstate_bv & XSTATE_BNDREGS_MASK) { 2914 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 2915 env->hflags |= HF_MPX_IU_MASK; 2916 } else { 2917 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2918 env->hflags &= ~HF_MPX_IU_MASK; 2919 } 2920 } 2921 if (rfbm & XSTATE_BNDCSR_MASK) { 2922 if (xstate_bv & XSTATE_BNDCSR_MASK) { 2923 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 2924 } else { 2925 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2926 } 2927 cpu_sync_bndcs_hflags(env); 2928 } 2929 if (rfbm & XSTATE_PKRU_MASK) { 2930 uint64_t old_pkru = env->pkru; 2931 if (xstate_bv & XSTATE_PKRU_MASK) { 2932 do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 2933 } else { 2934 env->pkru = 0; 2935 } 2936 if (env->pkru != old_pkru) { 2937 CPUState *cs = env_cpu(env); 2938 tlb_flush(cs); 2939 } 2940 } 2941 } 2942 2943 #undef XO 2944 2945 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2946 { 2947 do_xrstor(env, ptr, rfbm, GETPC()); 2948 } 2949 2950 #if defined(CONFIG_USER_ONLY) 2951 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 2952 { 2953 do_fsave(env, ptr, data32, 0); 2954 } 2955 2956 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 2957 { 2958 do_frstor(env, ptr, data32, 0); 2959 } 2960 2961 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 2962 { 2963 do_fxsave(env, ptr, 0); 2964 } 2965 2966 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 2967 { 2968 do_fxrstor(env, ptr, 0); 2969 } 2970 2971 void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) 2972 { 2973 do_xsave(env, ptr, -1, get_xinuse(env), -1, 0); 2974 } 2975 2976 void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr) 2977 { 2978 do_xrstor(env, ptr, -1, 0); 2979 } 2980 #endif 2981 2982 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 2983 { 2984 /* The OS must have enabled XSAVE. */ 2985 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2986 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2987 } 2988 2989 switch (ecx) { 2990 case 0: 2991 return env->xcr0; 2992 case 1: 2993 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 2994 return env->xcr0 & get_xinuse(env); 2995 } 2996 break; 2997 } 2998 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2999 } 3000 3001 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 3002 { 3003 uint32_t dummy, ena_lo, ena_hi; 3004 uint64_t ena; 3005 3006 /* The OS must have enabled XSAVE. */ 3007 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3008 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3009 } 3010 3011 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 3012 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 3013 goto do_gpf; 3014 } 3015 3016 /* Disallow enabling unimplemented features. */ 3017 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 3018 ena = ((uint64_t)ena_hi << 32) | ena_lo; 3019 if (mask & ~ena) { 3020 goto do_gpf; 3021 } 3022 3023 /* Disallow enabling only half of MPX. */ 3024 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 3025 & XSTATE_BNDCSR_MASK) { 3026 goto do_gpf; 3027 } 3028 3029 env->xcr0 = mask; 3030 cpu_sync_bndcs_hflags(env); 3031 cpu_sync_avx_hflag(env); 3032 return; 3033 3034 do_gpf: 3035 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3036 } 3037 3038 /* MMX/SSE */ 3039 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 3040 3041 #define SSE_DAZ 0x0040 3042 #define SSE_RC_SHIFT 13 3043 #define SSE_RC_MASK (3 << SSE_RC_SHIFT) 3044 #define SSE_FZ 0x8000 3045 3046 void update_mxcsr_status(CPUX86State *env) 3047 { 3048 uint32_t mxcsr = env->mxcsr; 3049 int rnd_type; 3050 3051 /* set rounding mode */ 3052 rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; 3053 set_x86_rounding_mode(rnd_type, &env->sse_status); 3054 3055 /* Set exception flags. */ 3056 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 3057 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 3058 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 3059 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 3060 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 3061 &env->sse_status); 3062 3063 /* set denormals are zero */ 3064 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 3065 3066 /* set flush to zero */ 3067 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 3068 } 3069 3070 void update_mxcsr_from_sse_status(CPUX86State *env) 3071 { 3072 uint8_t flags = get_float_exception_flags(&env->sse_status); 3073 /* 3074 * The MXCSR denormal flag has opposite semantics to 3075 * float_flag_input_denormal (the softfloat code sets that flag 3076 * only when flushing input denormals to zero, but SSE sets it 3077 * only when not flushing them to zero), so is not converted 3078 * here. 3079 */ 3080 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3081 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3082 (flags & float_flag_overflow ? FPUS_OE : 0) | 3083 (flags & float_flag_underflow ? FPUS_UE : 0) | 3084 (flags & float_flag_inexact ? FPUS_PE : 0) | 3085 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3086 0)); 3087 } 3088 3089 void helper_update_mxcsr(CPUX86State *env) 3090 { 3091 update_mxcsr_from_sse_status(env); 3092 } 3093 3094 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3095 { 3096 cpu_set_mxcsr(env, val); 3097 } 3098 3099 void helper_enter_mmx(CPUX86State *env) 3100 { 3101 env->fpstt = 0; 3102 *(uint32_t *)(env->fptags) = 0; 3103 *(uint32_t *)(env->fptags + 4) = 0; 3104 } 3105 3106 void helper_emms(CPUX86State *env) 3107 { 3108 /* set to empty state */ 3109 *(uint32_t *)(env->fptags) = 0x01010101; 3110 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3111 } 3112 3113 #define SHIFT 0 3114 #include "ops_sse.h" 3115 3116 #define SHIFT 1 3117 #include "ops_sse.h" 3118 3119 #define SHIFT 2 3120 #include "ops_sse.h" 3121