1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/cpu_ldst.h" 25 #include "exec/helper-proto.h" 26 #include "fpu/softfloat.h" 27 #include "fpu/softfloat-macros.h" 28 #include "helper-tcg.h" 29 30 /* float macros */ 31 #define FT0 (env->ft0) 32 #define ST0 (env->fpregs[env->fpstt].d) 33 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 34 #define ST1 ST(1) 35 36 #define FPU_RC_SHIFT 10 37 #define FPU_RC_MASK (3 << FPU_RC_SHIFT) 38 #define FPU_RC_NEAR 0x000 39 #define FPU_RC_DOWN 0x400 40 #define FPU_RC_UP 0x800 41 #define FPU_RC_CHOP 0xc00 42 43 #define MAXTAN 9223372036854775808.0 44 45 /* the following deal with x86 long double-precision numbers */ 46 #define MAXEXPD 0x7fff 47 #define EXPBIAS 16383 48 #define EXPD(fp) (fp.l.upper & 0x7fff) 49 #define SIGND(fp) ((fp.l.upper) & 0x8000) 50 #define MANTD(fp) (fp.l.lower) 51 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 52 53 #define FPUS_IE (1 << 0) 54 #define FPUS_DE (1 << 1) 55 #define FPUS_ZE (1 << 2) 56 #define FPUS_OE (1 << 3) 57 #define FPUS_UE (1 << 4) 58 #define FPUS_PE (1 << 5) 59 #define FPUS_SF (1 << 6) 60 #define FPUS_SE (1 << 7) 61 #define FPUS_B (1 << 15) 62 63 #define FPUC_EM 0x3f 64 65 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 66 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 67 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 68 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 69 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 70 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 71 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 72 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 73 74 static inline void fpush(CPUX86State *env) 75 { 76 env->fpstt = (env->fpstt - 1) & 7; 77 env->fptags[env->fpstt] = 0; /* validate stack entry */ 78 } 79 80 static inline void fpop(CPUX86State *env) 81 { 82 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 83 env->fpstt = (env->fpstt + 1) & 7; 84 } 85 86 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) 87 { 88 CPU_LDoubleU temp; 89 90 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 91 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 92 return temp.d; 93 } 94 95 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 96 uintptr_t retaddr) 97 { 98 CPU_LDoubleU temp; 99 100 temp.d = f; 101 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 102 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 103 } 104 105 /* x87 FPU helpers */ 106 107 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 108 { 109 union { 110 float64 f64; 111 double d; 112 } u; 113 114 u.f64 = floatx80_to_float64(a, &env->fp_status); 115 return u.d; 116 } 117 118 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 119 { 120 union { 121 float64 f64; 122 double d; 123 } u; 124 125 u.d = a; 126 return float64_to_floatx80(u.f64, &env->fp_status); 127 } 128 129 static void fpu_set_exception(CPUX86State *env, int mask) 130 { 131 env->fpus |= mask; 132 if (env->fpus & (~env->fpuc & FPUC_EM)) { 133 env->fpus |= FPUS_SE | FPUS_B; 134 } 135 } 136 137 static inline uint8_t save_exception_flags(CPUX86State *env) 138 { 139 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 140 set_float_exception_flags(0, &env->fp_status); 141 return old_flags; 142 } 143 144 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 145 { 146 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 147 float_raise(old_flags, &env->fp_status); 148 fpu_set_exception(env, 149 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 150 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 151 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 152 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 153 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 154 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 155 } 156 157 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 158 { 159 uint8_t old_flags = save_exception_flags(env); 160 floatx80 ret = floatx80_div(a, b, &env->fp_status); 161 merge_exception_flags(env, old_flags); 162 return ret; 163 } 164 165 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 166 { 167 if (env->cr[0] & CR0_NE_MASK) { 168 raise_exception_ra(env, EXCP10_COPR, retaddr); 169 } 170 #if !defined(CONFIG_USER_ONLY) 171 else { 172 fpu_check_raise_ferr_irq(env); 173 } 174 #endif 175 } 176 177 void helper_flds_FT0(CPUX86State *env, uint32_t val) 178 { 179 uint8_t old_flags = save_exception_flags(env); 180 union { 181 float32 f; 182 uint32_t i; 183 } u; 184 185 u.i = val; 186 FT0 = float32_to_floatx80(u.f, &env->fp_status); 187 merge_exception_flags(env, old_flags); 188 } 189 190 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 191 { 192 uint8_t old_flags = save_exception_flags(env); 193 union { 194 float64 f; 195 uint64_t i; 196 } u; 197 198 u.i = val; 199 FT0 = float64_to_floatx80(u.f, &env->fp_status); 200 merge_exception_flags(env, old_flags); 201 } 202 203 void helper_fildl_FT0(CPUX86State *env, int32_t val) 204 { 205 FT0 = int32_to_floatx80(val, &env->fp_status); 206 } 207 208 void helper_flds_ST0(CPUX86State *env, uint32_t val) 209 { 210 uint8_t old_flags = save_exception_flags(env); 211 int new_fpstt; 212 union { 213 float32 f; 214 uint32_t i; 215 } u; 216 217 new_fpstt = (env->fpstt - 1) & 7; 218 u.i = val; 219 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 220 env->fpstt = new_fpstt; 221 env->fptags[new_fpstt] = 0; /* validate stack entry */ 222 merge_exception_flags(env, old_flags); 223 } 224 225 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 226 { 227 uint8_t old_flags = save_exception_flags(env); 228 int new_fpstt; 229 union { 230 float64 f; 231 uint64_t i; 232 } u; 233 234 new_fpstt = (env->fpstt - 1) & 7; 235 u.i = val; 236 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 237 env->fpstt = new_fpstt; 238 env->fptags[new_fpstt] = 0; /* validate stack entry */ 239 merge_exception_flags(env, old_flags); 240 } 241 242 static FloatX80RoundPrec tmp_maximise_precision(float_status *st) 243 { 244 FloatX80RoundPrec old = get_floatx80_rounding_precision(st); 245 set_floatx80_rounding_precision(floatx80_precision_x, st); 246 return old; 247 } 248 249 void helper_fildl_ST0(CPUX86State *env, int32_t val) 250 { 251 int new_fpstt; 252 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 253 254 new_fpstt = (env->fpstt - 1) & 7; 255 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 256 env->fpstt = new_fpstt; 257 env->fptags[new_fpstt] = 0; /* validate stack entry */ 258 259 set_floatx80_rounding_precision(old, &env->fp_status); 260 } 261 262 void helper_fildll_ST0(CPUX86State *env, int64_t val) 263 { 264 int new_fpstt; 265 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 266 267 new_fpstt = (env->fpstt - 1) & 7; 268 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 269 env->fpstt = new_fpstt; 270 env->fptags[new_fpstt] = 0; /* validate stack entry */ 271 272 set_floatx80_rounding_precision(old, &env->fp_status); 273 } 274 275 uint32_t helper_fsts_ST0(CPUX86State *env) 276 { 277 uint8_t old_flags = save_exception_flags(env); 278 union { 279 float32 f; 280 uint32_t i; 281 } u; 282 283 u.f = floatx80_to_float32(ST0, &env->fp_status); 284 merge_exception_flags(env, old_flags); 285 return u.i; 286 } 287 288 uint64_t helper_fstl_ST0(CPUX86State *env) 289 { 290 uint8_t old_flags = save_exception_flags(env); 291 union { 292 float64 f; 293 uint64_t i; 294 } u; 295 296 u.f = floatx80_to_float64(ST0, &env->fp_status); 297 merge_exception_flags(env, old_flags); 298 return u.i; 299 } 300 301 int32_t helper_fist_ST0(CPUX86State *env) 302 { 303 uint8_t old_flags = save_exception_flags(env); 304 int32_t val; 305 306 val = floatx80_to_int32(ST0, &env->fp_status); 307 if (val != (int16_t)val) { 308 set_float_exception_flags(float_flag_invalid, &env->fp_status); 309 val = -32768; 310 } 311 merge_exception_flags(env, old_flags); 312 return val; 313 } 314 315 int32_t helper_fistl_ST0(CPUX86State *env) 316 { 317 uint8_t old_flags = save_exception_flags(env); 318 int32_t val; 319 320 val = floatx80_to_int32(ST0, &env->fp_status); 321 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 322 val = 0x80000000; 323 } 324 merge_exception_flags(env, old_flags); 325 return val; 326 } 327 328 int64_t helper_fistll_ST0(CPUX86State *env) 329 { 330 uint8_t old_flags = save_exception_flags(env); 331 int64_t val; 332 333 val = floatx80_to_int64(ST0, &env->fp_status); 334 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 335 val = 0x8000000000000000ULL; 336 } 337 merge_exception_flags(env, old_flags); 338 return val; 339 } 340 341 int32_t helper_fistt_ST0(CPUX86State *env) 342 { 343 uint8_t old_flags = save_exception_flags(env); 344 int32_t val; 345 346 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 347 if (val != (int16_t)val) { 348 set_float_exception_flags(float_flag_invalid, &env->fp_status); 349 val = -32768; 350 } 351 merge_exception_flags(env, old_flags); 352 return val; 353 } 354 355 int32_t helper_fisttl_ST0(CPUX86State *env) 356 { 357 uint8_t old_flags = save_exception_flags(env); 358 int32_t val; 359 360 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 361 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 362 val = 0x80000000; 363 } 364 merge_exception_flags(env, old_flags); 365 return val; 366 } 367 368 int64_t helper_fisttll_ST0(CPUX86State *env) 369 { 370 uint8_t old_flags = save_exception_flags(env); 371 int64_t val; 372 373 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 374 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 375 val = 0x8000000000000000ULL; 376 } 377 merge_exception_flags(env, old_flags); 378 return val; 379 } 380 381 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 382 { 383 int new_fpstt; 384 385 new_fpstt = (env->fpstt - 1) & 7; 386 env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); 387 env->fpstt = new_fpstt; 388 env->fptags[new_fpstt] = 0; /* validate stack entry */ 389 } 390 391 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 392 { 393 do_fstt(env, ST0, ptr, GETPC()); 394 } 395 396 void helper_fpush(CPUX86State *env) 397 { 398 fpush(env); 399 } 400 401 void helper_fpop(CPUX86State *env) 402 { 403 fpop(env); 404 } 405 406 void helper_fdecstp(CPUX86State *env) 407 { 408 env->fpstt = (env->fpstt - 1) & 7; 409 env->fpus &= ~0x4700; 410 } 411 412 void helper_fincstp(CPUX86State *env) 413 { 414 env->fpstt = (env->fpstt + 1) & 7; 415 env->fpus &= ~0x4700; 416 } 417 418 /* FPU move */ 419 420 void helper_ffree_STN(CPUX86State *env, int st_index) 421 { 422 env->fptags[(env->fpstt + st_index) & 7] = 1; 423 } 424 425 void helper_fmov_ST0_FT0(CPUX86State *env) 426 { 427 ST0 = FT0; 428 } 429 430 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 431 { 432 FT0 = ST(st_index); 433 } 434 435 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 436 { 437 ST0 = ST(st_index); 438 } 439 440 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 441 { 442 ST(st_index) = ST0; 443 } 444 445 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 446 { 447 floatx80 tmp; 448 449 tmp = ST(st_index); 450 ST(st_index) = ST0; 451 ST0 = tmp; 452 } 453 454 /* FPU operations */ 455 456 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 457 458 void helper_fcom_ST0_FT0(CPUX86State *env) 459 { 460 uint8_t old_flags = save_exception_flags(env); 461 FloatRelation ret; 462 463 ret = floatx80_compare(ST0, FT0, &env->fp_status); 464 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 465 merge_exception_flags(env, old_flags); 466 } 467 468 void helper_fucom_ST0_FT0(CPUX86State *env) 469 { 470 uint8_t old_flags = save_exception_flags(env); 471 FloatRelation ret; 472 473 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 474 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 475 merge_exception_flags(env, old_flags); 476 } 477 478 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 479 480 void helper_fcomi_ST0_FT0(CPUX86State *env) 481 { 482 uint8_t old_flags = save_exception_flags(env); 483 int eflags; 484 FloatRelation ret; 485 486 ret = floatx80_compare(ST0, FT0, &env->fp_status); 487 eflags = cpu_cc_compute_all(env, CC_OP); 488 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 489 CC_SRC = eflags; 490 merge_exception_flags(env, old_flags); 491 } 492 493 void helper_fucomi_ST0_FT0(CPUX86State *env) 494 { 495 uint8_t old_flags = save_exception_flags(env); 496 int eflags; 497 FloatRelation ret; 498 499 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 500 eflags = cpu_cc_compute_all(env, CC_OP); 501 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 502 CC_SRC = eflags; 503 merge_exception_flags(env, old_flags); 504 } 505 506 void helper_fadd_ST0_FT0(CPUX86State *env) 507 { 508 uint8_t old_flags = save_exception_flags(env); 509 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 510 merge_exception_flags(env, old_flags); 511 } 512 513 void helper_fmul_ST0_FT0(CPUX86State *env) 514 { 515 uint8_t old_flags = save_exception_flags(env); 516 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 517 merge_exception_flags(env, old_flags); 518 } 519 520 void helper_fsub_ST0_FT0(CPUX86State *env) 521 { 522 uint8_t old_flags = save_exception_flags(env); 523 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 524 merge_exception_flags(env, old_flags); 525 } 526 527 void helper_fsubr_ST0_FT0(CPUX86State *env) 528 { 529 uint8_t old_flags = save_exception_flags(env); 530 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 531 merge_exception_flags(env, old_flags); 532 } 533 534 void helper_fdiv_ST0_FT0(CPUX86State *env) 535 { 536 ST0 = helper_fdiv(env, ST0, FT0); 537 } 538 539 void helper_fdivr_ST0_FT0(CPUX86State *env) 540 { 541 ST0 = helper_fdiv(env, FT0, ST0); 542 } 543 544 /* fp operations between STN and ST0 */ 545 546 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 547 { 548 uint8_t old_flags = save_exception_flags(env); 549 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 550 merge_exception_flags(env, old_flags); 551 } 552 553 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 554 { 555 uint8_t old_flags = save_exception_flags(env); 556 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 557 merge_exception_flags(env, old_flags); 558 } 559 560 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 561 { 562 uint8_t old_flags = save_exception_flags(env); 563 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 564 merge_exception_flags(env, old_flags); 565 } 566 567 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 568 { 569 uint8_t old_flags = save_exception_flags(env); 570 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 571 merge_exception_flags(env, old_flags); 572 } 573 574 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 575 { 576 floatx80 *p; 577 578 p = &ST(st_index); 579 *p = helper_fdiv(env, *p, ST0); 580 } 581 582 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 583 { 584 floatx80 *p; 585 586 p = &ST(st_index); 587 *p = helper_fdiv(env, ST0, *p); 588 } 589 590 /* misc FPU operations */ 591 void helper_fchs_ST0(CPUX86State *env) 592 { 593 ST0 = floatx80_chs(ST0); 594 } 595 596 void helper_fabs_ST0(CPUX86State *env) 597 { 598 ST0 = floatx80_abs(ST0); 599 } 600 601 void helper_fld1_ST0(CPUX86State *env) 602 { 603 ST0 = floatx80_one; 604 } 605 606 void helper_fldl2t_ST0(CPUX86State *env) 607 { 608 switch (env->fpuc & FPU_RC_MASK) { 609 case FPU_RC_UP: 610 ST0 = floatx80_l2t_u; 611 break; 612 default: 613 ST0 = floatx80_l2t; 614 break; 615 } 616 } 617 618 void helper_fldl2e_ST0(CPUX86State *env) 619 { 620 switch (env->fpuc & FPU_RC_MASK) { 621 case FPU_RC_DOWN: 622 case FPU_RC_CHOP: 623 ST0 = floatx80_l2e_d; 624 break; 625 default: 626 ST0 = floatx80_l2e; 627 break; 628 } 629 } 630 631 void helper_fldpi_ST0(CPUX86State *env) 632 { 633 switch (env->fpuc & FPU_RC_MASK) { 634 case FPU_RC_DOWN: 635 case FPU_RC_CHOP: 636 ST0 = floatx80_pi_d; 637 break; 638 default: 639 ST0 = floatx80_pi; 640 break; 641 } 642 } 643 644 void helper_fldlg2_ST0(CPUX86State *env) 645 { 646 switch (env->fpuc & FPU_RC_MASK) { 647 case FPU_RC_DOWN: 648 case FPU_RC_CHOP: 649 ST0 = floatx80_lg2_d; 650 break; 651 default: 652 ST0 = floatx80_lg2; 653 break; 654 } 655 } 656 657 void helper_fldln2_ST0(CPUX86State *env) 658 { 659 switch (env->fpuc & FPU_RC_MASK) { 660 case FPU_RC_DOWN: 661 case FPU_RC_CHOP: 662 ST0 = floatx80_ln2_d; 663 break; 664 default: 665 ST0 = floatx80_ln2; 666 break; 667 } 668 } 669 670 void helper_fldz_ST0(CPUX86State *env) 671 { 672 ST0 = floatx80_zero; 673 } 674 675 void helper_fldz_FT0(CPUX86State *env) 676 { 677 FT0 = floatx80_zero; 678 } 679 680 uint32_t helper_fnstsw(CPUX86State *env) 681 { 682 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 683 } 684 685 uint32_t helper_fnstcw(CPUX86State *env) 686 { 687 return env->fpuc; 688 } 689 690 static void set_x86_rounding_mode(unsigned mode, float_status *status) 691 { 692 static FloatRoundMode x86_round_mode[4] = { 693 float_round_nearest_even, 694 float_round_down, 695 float_round_up, 696 float_round_to_zero 697 }; 698 assert(mode < ARRAY_SIZE(x86_round_mode)); 699 set_float_rounding_mode(x86_round_mode[mode], status); 700 } 701 702 void update_fp_status(CPUX86State *env) 703 { 704 int rnd_mode; 705 FloatX80RoundPrec rnd_prec; 706 707 /* set rounding mode */ 708 rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; 709 set_x86_rounding_mode(rnd_mode, &env->fp_status); 710 711 switch ((env->fpuc >> 8) & 3) { 712 case 0: 713 rnd_prec = floatx80_precision_s; 714 break; 715 case 2: 716 rnd_prec = floatx80_precision_d; 717 break; 718 case 3: 719 default: 720 rnd_prec = floatx80_precision_x; 721 break; 722 } 723 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 724 } 725 726 void helper_fldcw(CPUX86State *env, uint32_t val) 727 { 728 cpu_set_fpuc(env, val); 729 } 730 731 void helper_fclex(CPUX86State *env) 732 { 733 env->fpus &= 0x7f00; 734 } 735 736 void helper_fwait(CPUX86State *env) 737 { 738 if (env->fpus & FPUS_SE) { 739 fpu_raise_exception(env, GETPC()); 740 } 741 } 742 743 static void do_fninit(CPUX86State *env) 744 { 745 env->fpus = 0; 746 env->fpstt = 0; 747 env->fpcs = 0; 748 env->fpds = 0; 749 env->fpip = 0; 750 env->fpdp = 0; 751 cpu_set_fpuc(env, 0x37f); 752 env->fptags[0] = 1; 753 env->fptags[1] = 1; 754 env->fptags[2] = 1; 755 env->fptags[3] = 1; 756 env->fptags[4] = 1; 757 env->fptags[5] = 1; 758 env->fptags[6] = 1; 759 env->fptags[7] = 1; 760 } 761 762 void helper_fninit(CPUX86State *env) 763 { 764 do_fninit(env); 765 } 766 767 /* BCD ops */ 768 769 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 770 { 771 floatx80 tmp; 772 uint64_t val; 773 unsigned int v; 774 int i; 775 776 val = 0; 777 for (i = 8; i >= 0; i--) { 778 v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 779 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 780 } 781 tmp = int64_to_floatx80(val, &env->fp_status); 782 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 783 tmp = floatx80_chs(tmp); 784 } 785 fpush(env); 786 ST0 = tmp; 787 } 788 789 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 790 { 791 uint8_t old_flags = save_exception_flags(env); 792 int v; 793 target_ulong mem_ref, mem_end; 794 int64_t val; 795 CPU_LDoubleU temp; 796 797 temp.d = ST0; 798 799 val = floatx80_to_int64(ST0, &env->fp_status); 800 mem_ref = ptr; 801 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 802 set_float_exception_flags(float_flag_invalid, &env->fp_status); 803 while (mem_ref < ptr + 7) { 804 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 805 } 806 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 807 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 808 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 809 merge_exception_flags(env, old_flags); 810 return; 811 } 812 mem_end = mem_ref + 9; 813 if (SIGND(temp)) { 814 cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 815 val = -val; 816 } else { 817 cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 818 } 819 while (mem_ref < mem_end) { 820 if (val == 0) { 821 break; 822 } 823 v = val % 100; 824 val = val / 100; 825 v = ((v / 10) << 4) | (v % 10); 826 cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 827 } 828 while (mem_ref < mem_end) { 829 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 830 } 831 merge_exception_flags(env, old_flags); 832 } 833 834 /* 128-bit significand of log(2). */ 835 #define ln2_sig_high 0xb17217f7d1cf79abULL 836 #define ln2_sig_low 0xc9e3b39803f2f6afULL 837 838 /* 839 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 840 * the interval [-1/64, 1/64]. 841 */ 842 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 843 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 844 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 845 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 846 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 847 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 848 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 849 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 850 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 851 852 struct f2xm1_data { 853 /* 854 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 855 * are very close to exact floatx80 values. 856 */ 857 floatx80 t; 858 /* The value of 2^t. */ 859 floatx80 exp2; 860 /* The value of 2^t - 1. */ 861 floatx80 exp2m1; 862 }; 863 864 static const struct f2xm1_data f2xm1_table[65] = { 865 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 866 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 867 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 868 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 869 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 870 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 871 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 872 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 873 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 874 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 875 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 876 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 877 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 878 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 879 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 880 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 881 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 882 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 883 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 884 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 885 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 886 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 887 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 888 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 889 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 890 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 891 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 892 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 893 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 894 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 895 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 896 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 897 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 898 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 899 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 900 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 901 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 902 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 903 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 904 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 905 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 906 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 907 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 908 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 909 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 910 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 911 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 912 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 913 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 914 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 915 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 916 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 917 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 918 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 919 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 920 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 921 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 922 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 923 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 924 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 925 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 926 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 927 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 928 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 929 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 930 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 931 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 932 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 933 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 934 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 935 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 936 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 937 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 938 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 939 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 940 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 941 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 942 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 943 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 944 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 945 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 946 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 947 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 948 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 949 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 950 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 951 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 952 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 953 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 954 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 955 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 956 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 957 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 958 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 959 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 960 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 961 { floatx80_zero_init, 962 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 963 floatx80_zero_init }, 964 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 965 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 966 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 967 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 968 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 969 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 970 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 971 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 972 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 973 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 974 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 975 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 976 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 977 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 978 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 979 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 980 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 981 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 982 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 983 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 984 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 985 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 986 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 987 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 988 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 989 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 990 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 991 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 992 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 993 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 994 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 995 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 996 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 997 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 998 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 999 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 1000 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1001 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1002 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1003 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1004 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1005 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1006 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1007 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1008 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1009 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1010 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1011 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1012 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1013 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1014 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1015 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1016 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1017 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1018 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1019 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1020 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1021 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1022 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1023 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1024 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1025 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1026 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1027 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1028 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1029 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1030 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1031 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1032 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1033 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1034 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1035 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1036 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1037 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1038 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1039 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1040 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1041 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1042 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1043 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1044 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1045 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1046 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1047 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1048 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1049 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1050 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1051 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1052 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1053 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1054 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1055 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1056 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1057 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1058 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1059 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1060 }; 1061 1062 void helper_f2xm1(CPUX86State *env) 1063 { 1064 uint8_t old_flags = save_exception_flags(env); 1065 uint64_t sig = extractFloatx80Frac(ST0); 1066 int32_t exp = extractFloatx80Exp(ST0); 1067 bool sign = extractFloatx80Sign(ST0); 1068 1069 if (floatx80_invalid_encoding(ST0)) { 1070 float_raise(float_flag_invalid, &env->fp_status); 1071 ST0 = floatx80_default_nan(&env->fp_status); 1072 } else if (floatx80_is_any_nan(ST0)) { 1073 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1074 float_raise(float_flag_invalid, &env->fp_status); 1075 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1076 } 1077 } else if (exp > 0x3fff || 1078 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1079 /* Out of range for the instruction, treat as invalid. */ 1080 float_raise(float_flag_invalid, &env->fp_status); 1081 ST0 = floatx80_default_nan(&env->fp_status); 1082 } else if (exp == 0x3fff) { 1083 /* Argument 1 or -1, exact result 1 or -0.5. */ 1084 if (sign) { 1085 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1086 } 1087 } else if (exp < 0x3fb0) { 1088 if (!floatx80_is_zero(ST0)) { 1089 /* 1090 * Multiplying the argument by an extra-precision version 1091 * of log(2) is sufficiently precise. Zero arguments are 1092 * returned unchanged. 1093 */ 1094 uint64_t sig0, sig1, sig2; 1095 if (exp == 0) { 1096 normalizeFloatx80Subnormal(sig, &exp, &sig); 1097 } 1098 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1099 &sig2); 1100 /* This result is inexact. */ 1101 sig1 |= 1; 1102 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1103 sign, exp, sig0, sig1, 1104 &env->fp_status); 1105 } 1106 } else { 1107 floatx80 tmp, y, accum; 1108 bool asign, bsign; 1109 int32_t n, aexp, bexp; 1110 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1111 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1112 FloatX80RoundPrec save_prec = 1113 env->fp_status.floatx80_rounding_precision; 1114 env->fp_status.float_rounding_mode = float_round_nearest_even; 1115 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1116 1117 /* Find the nearest multiple of 1/32 to the argument. */ 1118 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1119 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1120 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1121 1122 if (floatx80_is_zero(y)) { 1123 /* 1124 * Use the value of 2^t - 1 from the table, to avoid 1125 * needing to special-case zero as a result of 1126 * multiplication below. 1127 */ 1128 ST0 = f2xm1_table[n].t; 1129 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1130 env->fp_status.float_rounding_mode = save_mode; 1131 } else { 1132 /* 1133 * Compute the lower parts of a polynomial expansion for 1134 * (2^y - 1) / y. 1135 */ 1136 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1137 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1138 accum = floatx80_mul(accum, y, &env->fp_status); 1139 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1140 accum = floatx80_mul(accum, y, &env->fp_status); 1141 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1142 accum = floatx80_mul(accum, y, &env->fp_status); 1143 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1144 accum = floatx80_mul(accum, y, &env->fp_status); 1145 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1146 accum = floatx80_mul(accum, y, &env->fp_status); 1147 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1148 accum = floatx80_mul(accum, y, &env->fp_status); 1149 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1150 1151 /* 1152 * The full polynomial expansion is f2xm1_coeff_0 + accum 1153 * (where accum has much lower magnitude, and so, in 1154 * particular, carry out of the addition is not possible). 1155 * (This expansion is only accurate to about 70 bits, not 1156 * 128 bits.) 1157 */ 1158 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1159 asign = extractFloatx80Sign(f2xm1_coeff_0); 1160 shift128RightJamming(extractFloatx80Frac(accum), 0, 1161 aexp - extractFloatx80Exp(accum), 1162 &asig0, &asig1); 1163 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1164 bsig1 = 0; 1165 if (asign == extractFloatx80Sign(accum)) { 1166 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1167 } else { 1168 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1169 } 1170 /* And thus compute an approximation to 2^y - 1. */ 1171 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1172 &asig0, &asig1, &asig2); 1173 aexp += extractFloatx80Exp(y) - 0x3ffe; 1174 asign ^= extractFloatx80Sign(y); 1175 if (n != 32) { 1176 /* 1177 * Multiply this by the precomputed value of 2^t and 1178 * add that of 2^t - 1. 1179 */ 1180 mul128By64To192(asig0, asig1, 1181 extractFloatx80Frac(f2xm1_table[n].exp2), 1182 &asig0, &asig1, &asig2); 1183 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1184 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1185 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1186 bsig1 = 0; 1187 if (bexp < aexp) { 1188 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1189 &bsig0, &bsig1); 1190 } else if (aexp < bexp) { 1191 shift128RightJamming(asig0, asig1, bexp - aexp, 1192 &asig0, &asig1); 1193 aexp = bexp; 1194 } 1195 /* The sign of 2^t - 1 is always that of the result. */ 1196 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1197 if (asign == bsign) { 1198 /* Avoid possible carry out of the addition. */ 1199 shift128RightJamming(asig0, asig1, 1, 1200 &asig0, &asig1); 1201 shift128RightJamming(bsig0, bsig1, 1, 1202 &bsig0, &bsig1); 1203 ++aexp; 1204 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1205 } else { 1206 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1207 asign = bsign; 1208 } 1209 } 1210 env->fp_status.float_rounding_mode = save_mode; 1211 /* This result is inexact. */ 1212 asig1 |= 1; 1213 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1214 asign, aexp, asig0, asig1, 1215 &env->fp_status); 1216 } 1217 1218 env->fp_status.floatx80_rounding_precision = save_prec; 1219 } 1220 merge_exception_flags(env, old_flags); 1221 } 1222 1223 void helper_fptan(CPUX86State *env) 1224 { 1225 double fptemp = floatx80_to_double(env, ST0); 1226 1227 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1228 env->fpus |= 0x400; 1229 } else { 1230 fptemp = tan(fptemp); 1231 ST0 = double_to_floatx80(env, fptemp); 1232 fpush(env); 1233 ST0 = floatx80_one; 1234 env->fpus &= ~0x400; /* C2 <-- 0 */ 1235 /* the above code is for |arg| < 2**52 only */ 1236 } 1237 } 1238 1239 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1240 #define pi_4_exp 0x3ffe 1241 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1242 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1243 #define pi_2_exp 0x3fff 1244 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1245 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1246 #define pi_34_exp 0x4000 1247 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1248 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1249 #define pi_exp 0x4000 1250 #define pi_sig_high 0xc90fdaa22168c234ULL 1251 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1252 1253 /* 1254 * Polynomial coefficients for an approximation to atan(x), with only 1255 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1256 * for some other approximations, no low part is needed for the first 1257 * coefficient here to achieve a sufficiently accurate result, because 1258 * the coefficient in this minimax approximation is very close to 1259 * exactly 1.) 1260 */ 1261 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1262 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1263 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1264 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1265 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1266 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1267 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1268 1269 struct fpatan_data { 1270 /* High and low parts of atan(x). */ 1271 floatx80 atan_high, atan_low; 1272 }; 1273 1274 static const struct fpatan_data fpatan_table[9] = { 1275 { floatx80_zero_init, 1276 floatx80_zero_init }, 1277 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1278 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1279 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1280 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1281 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1282 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1283 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1284 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1285 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1286 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1287 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1288 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1289 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1290 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1291 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1292 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1293 }; 1294 1295 void helper_fpatan(CPUX86State *env) 1296 { 1297 uint8_t old_flags = save_exception_flags(env); 1298 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1299 int32_t arg0_exp = extractFloatx80Exp(ST0); 1300 bool arg0_sign = extractFloatx80Sign(ST0); 1301 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1302 int32_t arg1_exp = extractFloatx80Exp(ST1); 1303 bool arg1_sign = extractFloatx80Sign(ST1); 1304 1305 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1306 float_raise(float_flag_invalid, &env->fp_status); 1307 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1308 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1309 float_raise(float_flag_invalid, &env->fp_status); 1310 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1311 } else if (floatx80_invalid_encoding(ST0) || 1312 floatx80_invalid_encoding(ST1)) { 1313 float_raise(float_flag_invalid, &env->fp_status); 1314 ST1 = floatx80_default_nan(&env->fp_status); 1315 } else if (floatx80_is_any_nan(ST0)) { 1316 ST1 = ST0; 1317 } else if (floatx80_is_any_nan(ST1)) { 1318 /* Pass this NaN through. */ 1319 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1320 /* Pass this zero through. */ 1321 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1322 arg0_exp - arg1_exp >= 80) && 1323 !arg0_sign) { 1324 /* 1325 * Dividing ST1 by ST0 gives the correct result up to 1326 * rounding, and avoids spurious underflow exceptions that 1327 * might result from passing some small values through the 1328 * polynomial approximation, but if a finite nonzero result of 1329 * division is exact, the result of fpatan is still inexact 1330 * (and underflowing where appropriate). 1331 */ 1332 FloatX80RoundPrec save_prec = 1333 env->fp_status.floatx80_rounding_precision; 1334 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1335 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1336 env->fp_status.floatx80_rounding_precision = save_prec; 1337 if (!floatx80_is_zero(ST1) && 1338 !(get_float_exception_flags(&env->fp_status) & 1339 float_flag_inexact)) { 1340 /* 1341 * The mathematical result is very slightly closer to zero 1342 * than this exact result. Round a value with the 1343 * significand adjusted accordingly to get the correct 1344 * exceptions, and possibly an adjusted result depending 1345 * on the rounding mode. 1346 */ 1347 uint64_t sig = extractFloatx80Frac(ST1); 1348 int32_t exp = extractFloatx80Exp(ST1); 1349 bool sign = extractFloatx80Sign(ST1); 1350 if (exp == 0) { 1351 normalizeFloatx80Subnormal(sig, &exp, &sig); 1352 } 1353 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1354 sign, exp, sig - 1, 1355 -1, &env->fp_status); 1356 } 1357 } else { 1358 /* The result is inexact. */ 1359 bool rsign = arg1_sign; 1360 int32_t rexp; 1361 uint64_t rsig0, rsig1; 1362 if (floatx80_is_zero(ST1)) { 1363 /* 1364 * ST0 is negative. The result is pi with the sign of 1365 * ST1. 1366 */ 1367 rexp = pi_exp; 1368 rsig0 = pi_sig_high; 1369 rsig1 = pi_sig_low; 1370 } else if (floatx80_is_infinity(ST1)) { 1371 if (floatx80_is_infinity(ST0)) { 1372 if (arg0_sign) { 1373 rexp = pi_34_exp; 1374 rsig0 = pi_34_sig_high; 1375 rsig1 = pi_34_sig_low; 1376 } else { 1377 rexp = pi_4_exp; 1378 rsig0 = pi_4_sig_high; 1379 rsig1 = pi_4_sig_low; 1380 } 1381 } else { 1382 rexp = pi_2_exp; 1383 rsig0 = pi_2_sig_high; 1384 rsig1 = pi_2_sig_low; 1385 } 1386 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1387 rexp = pi_2_exp; 1388 rsig0 = pi_2_sig_high; 1389 rsig1 = pi_2_sig_low; 1390 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1391 /* ST0 is negative. */ 1392 rexp = pi_exp; 1393 rsig0 = pi_sig_high; 1394 rsig1 = pi_sig_low; 1395 } else { 1396 /* 1397 * ST0 and ST1 are finite, nonzero and with exponents not 1398 * too far apart. 1399 */ 1400 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1401 int32_t azexp, axexp; 1402 bool adj_sub, ysign, zsign; 1403 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1404 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1405 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1406 uint64_t azsig0, azsig1; 1407 uint64_t azsig2, azsig3, axsig0, axsig1; 1408 floatx80 x8; 1409 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1410 FloatX80RoundPrec save_prec = 1411 env->fp_status.floatx80_rounding_precision; 1412 env->fp_status.float_rounding_mode = float_round_nearest_even; 1413 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1414 1415 if (arg0_exp == 0) { 1416 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1417 } 1418 if (arg1_exp == 0) { 1419 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1420 } 1421 if (arg0_exp > arg1_exp || 1422 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1423 /* Work with abs(ST1) / abs(ST0). */ 1424 num_exp = arg1_exp; 1425 num_sig = arg1_sig; 1426 den_exp = arg0_exp; 1427 den_sig = arg0_sig; 1428 if (arg0_sign) { 1429 /* The result is subtracted from pi. */ 1430 adj_exp = pi_exp; 1431 adj_sig0 = pi_sig_high; 1432 adj_sig1 = pi_sig_low; 1433 adj_sub = true; 1434 } else { 1435 /* The result is used as-is. */ 1436 adj_exp = 0; 1437 adj_sig0 = 0; 1438 adj_sig1 = 0; 1439 adj_sub = false; 1440 } 1441 } else { 1442 /* Work with abs(ST0) / abs(ST1). */ 1443 num_exp = arg0_exp; 1444 num_sig = arg0_sig; 1445 den_exp = arg1_exp; 1446 den_sig = arg1_sig; 1447 /* The result is added to or subtracted from pi/2. */ 1448 adj_exp = pi_2_exp; 1449 adj_sig0 = pi_2_sig_high; 1450 adj_sig1 = pi_2_sig_low; 1451 adj_sub = !arg0_sign; 1452 } 1453 1454 /* 1455 * Compute x = num/den, where 0 < x <= 1 and x is not too 1456 * small. 1457 */ 1458 xexp = num_exp - den_exp + 0x3ffe; 1459 remsig0 = num_sig; 1460 remsig1 = 0; 1461 if (den_sig <= remsig0) { 1462 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1463 ++xexp; 1464 } 1465 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1466 mul64To128(den_sig, xsig0, &msig0, &msig1); 1467 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1468 while ((int64_t) remsig0 < 0) { 1469 --xsig0; 1470 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1471 } 1472 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1473 /* 1474 * No need to correct any estimation error in xsig1; even 1475 * with such error, it is accurate enough. 1476 */ 1477 1478 /* 1479 * Split x as x = t + y, where t = n/8 is the nearest 1480 * multiple of 1/8 to x. 1481 */ 1482 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1483 false, xexp + 3, xsig0, 1484 xsig1, &env->fp_status); 1485 n = floatx80_to_int32(x8, &env->fp_status); 1486 if (n == 0) { 1487 ysign = false; 1488 yexp = xexp; 1489 ysig0 = xsig0; 1490 ysig1 = xsig1; 1491 texp = 0; 1492 tsig = 0; 1493 } else { 1494 int shift = clz32(n) + 32; 1495 texp = 0x403b - shift; 1496 tsig = n; 1497 tsig <<= shift; 1498 if (texp == xexp) { 1499 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1500 if ((int64_t) ysig0 >= 0) { 1501 ysign = false; 1502 if (ysig0 == 0) { 1503 if (ysig1 == 0) { 1504 yexp = 0; 1505 } else { 1506 shift = clz64(ysig1) + 64; 1507 yexp = xexp - shift; 1508 shift128Left(ysig0, ysig1, shift, 1509 &ysig0, &ysig1); 1510 } 1511 } else { 1512 shift = clz64(ysig0); 1513 yexp = xexp - shift; 1514 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1515 } 1516 } else { 1517 ysign = true; 1518 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1519 if (ysig0 == 0) { 1520 shift = clz64(ysig1) + 64; 1521 } else { 1522 shift = clz64(ysig0); 1523 } 1524 yexp = xexp - shift; 1525 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1526 } 1527 } else { 1528 /* 1529 * t's exponent must be greater than x's because t 1530 * is positive and the nearest multiple of 1/8 to 1531 * x, and if x has a greater exponent, the power 1532 * of 2 with that exponent is also a multiple of 1533 * 1/8. 1534 */ 1535 uint64_t usig0, usig1; 1536 shift128RightJamming(xsig0, xsig1, texp - xexp, 1537 &usig0, &usig1); 1538 ysign = true; 1539 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1540 if (ysig0 == 0) { 1541 shift = clz64(ysig1) + 64; 1542 } else { 1543 shift = clz64(ysig0); 1544 } 1545 yexp = texp - shift; 1546 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1547 } 1548 } 1549 1550 /* 1551 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1552 * arctan(z). 1553 */ 1554 zsign = ysign; 1555 if (texp == 0 || yexp == 0) { 1556 zexp = yexp; 1557 zsig0 = ysig0; 1558 zsig1 = ysig1; 1559 } else { 1560 /* 1561 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1562 */ 1563 int32_t dexp = texp + xexp - 0x3ffe; 1564 uint64_t dsig0, dsig1, dsig2; 1565 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1566 /* 1567 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1568 * bit). Add 1 to produce the denominator 1+tx. 1569 */ 1570 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1571 &dsig0, &dsig1); 1572 dsig0 |= 0x8000000000000000ULL; 1573 zexp = yexp - 1; 1574 remsig0 = ysig0; 1575 remsig1 = ysig1; 1576 remsig2 = 0; 1577 if (dsig0 <= remsig0) { 1578 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1579 ++zexp; 1580 } 1581 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1582 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1583 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1584 &remsig0, &remsig1, &remsig2); 1585 while ((int64_t) remsig0 < 0) { 1586 --zsig0; 1587 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1588 &remsig0, &remsig1, &remsig2); 1589 } 1590 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1591 /* No need to correct any estimation error in zsig1. */ 1592 } 1593 1594 if (zexp == 0) { 1595 azexp = 0; 1596 azsig0 = 0; 1597 azsig1 = 0; 1598 } else { 1599 floatx80 z2, accum; 1600 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1601 /* Compute z^2. */ 1602 mul128To256(zsig0, zsig1, zsig0, zsig1, 1603 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1604 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1605 zexp + zexp - 0x3ffe, 1606 z2sig0, z2sig1, 1607 &env->fp_status); 1608 1609 /* Compute the lower parts of the polynomial expansion. */ 1610 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1611 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1612 accum = floatx80_mul(accum, z2, &env->fp_status); 1613 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1614 accum = floatx80_mul(accum, z2, &env->fp_status); 1615 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1616 accum = floatx80_mul(accum, z2, &env->fp_status); 1617 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1618 accum = floatx80_mul(accum, z2, &env->fp_status); 1619 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1620 accum = floatx80_mul(accum, z2, &env->fp_status); 1621 1622 /* 1623 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1624 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1625 */ 1626 aexp = extractFloatx80Exp(fpatan_coeff_0); 1627 shift128RightJamming(extractFloatx80Frac(accum), 0, 1628 aexp - extractFloatx80Exp(accum), 1629 &asig0, &asig1); 1630 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1631 &asig0, &asig1); 1632 /* Multiply by z to compute arctan(z). */ 1633 azexp = aexp + zexp - 0x3ffe; 1634 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1635 &azsig2, &azsig3); 1636 } 1637 1638 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1639 if (texp == 0) { 1640 /* z is positive. */ 1641 axexp = azexp; 1642 axsig0 = azsig0; 1643 axsig1 = azsig1; 1644 } else { 1645 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1646 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1647 uint64_t low_sig0 = 1648 extractFloatx80Frac(fpatan_table[n].atan_low); 1649 uint64_t low_sig1 = 0; 1650 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1651 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1652 axsig1 = 0; 1653 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1654 &low_sig0, &low_sig1); 1655 if (low_sign) { 1656 sub128(axsig0, axsig1, low_sig0, low_sig1, 1657 &axsig0, &axsig1); 1658 } else { 1659 add128(axsig0, axsig1, low_sig0, low_sig1, 1660 &axsig0, &axsig1); 1661 } 1662 if (azexp >= axexp) { 1663 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1664 &axsig0, &axsig1); 1665 axexp = azexp + 1; 1666 shift128RightJamming(azsig0, azsig1, 1, 1667 &azsig0, &azsig1); 1668 } else { 1669 shift128RightJamming(axsig0, axsig1, 1, 1670 &axsig0, &axsig1); 1671 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1672 &azsig0, &azsig1); 1673 ++axexp; 1674 } 1675 if (zsign) { 1676 sub128(axsig0, axsig1, azsig0, azsig1, 1677 &axsig0, &axsig1); 1678 } else { 1679 add128(axsig0, axsig1, azsig0, azsig1, 1680 &axsig0, &axsig1); 1681 } 1682 } 1683 1684 if (adj_exp == 0) { 1685 rexp = axexp; 1686 rsig0 = axsig0; 1687 rsig1 = axsig1; 1688 } else { 1689 /* 1690 * Add or subtract arctan(x) (exponent axexp, 1691 * significand axsig0 and axsig1, positive, not 1692 * necessarily normalized) to the number given by 1693 * adj_exp, adj_sig0 and adj_sig1, according to 1694 * adj_sub. 1695 */ 1696 if (adj_exp >= axexp) { 1697 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1698 &axsig0, &axsig1); 1699 rexp = adj_exp + 1; 1700 shift128RightJamming(adj_sig0, adj_sig1, 1, 1701 &adj_sig0, &adj_sig1); 1702 } else { 1703 shift128RightJamming(axsig0, axsig1, 1, 1704 &axsig0, &axsig1); 1705 shift128RightJamming(adj_sig0, adj_sig1, 1706 axexp - adj_exp + 1, 1707 &adj_sig0, &adj_sig1); 1708 rexp = axexp + 1; 1709 } 1710 if (adj_sub) { 1711 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1712 &rsig0, &rsig1); 1713 } else { 1714 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1715 &rsig0, &rsig1); 1716 } 1717 } 1718 1719 env->fp_status.float_rounding_mode = save_mode; 1720 env->fp_status.floatx80_rounding_precision = save_prec; 1721 } 1722 /* This result is inexact. */ 1723 rsig1 |= 1; 1724 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1725 rsig0, rsig1, &env->fp_status); 1726 } 1727 1728 fpop(env); 1729 merge_exception_flags(env, old_flags); 1730 } 1731 1732 void helper_fxtract(CPUX86State *env) 1733 { 1734 uint8_t old_flags = save_exception_flags(env); 1735 CPU_LDoubleU temp; 1736 1737 temp.d = ST0; 1738 1739 if (floatx80_is_zero(ST0)) { 1740 /* Easy way to generate -inf and raising division by 0 exception */ 1741 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1742 &env->fp_status); 1743 fpush(env); 1744 ST0 = temp.d; 1745 } else if (floatx80_invalid_encoding(ST0)) { 1746 float_raise(float_flag_invalid, &env->fp_status); 1747 ST0 = floatx80_default_nan(&env->fp_status); 1748 fpush(env); 1749 ST0 = ST1; 1750 } else if (floatx80_is_any_nan(ST0)) { 1751 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1752 float_raise(float_flag_invalid, &env->fp_status); 1753 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1754 } 1755 fpush(env); 1756 ST0 = ST1; 1757 } else if (floatx80_is_infinity(ST0)) { 1758 fpush(env); 1759 ST0 = ST1; 1760 ST1 = floatx80_infinity; 1761 } else { 1762 int expdif; 1763 1764 if (EXPD(temp) == 0) { 1765 int shift = clz64(temp.l.lower); 1766 temp.l.lower <<= shift; 1767 expdif = 1 - EXPBIAS - shift; 1768 float_raise(float_flag_input_denormal, &env->fp_status); 1769 } else { 1770 expdif = EXPD(temp) - EXPBIAS; 1771 } 1772 /* DP exponent bias */ 1773 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1774 fpush(env); 1775 BIASEXPONENT(temp); 1776 ST0 = temp.d; 1777 } 1778 merge_exception_flags(env, old_flags); 1779 } 1780 1781 static void helper_fprem_common(CPUX86State *env, bool mod) 1782 { 1783 uint8_t old_flags = save_exception_flags(env); 1784 uint64_t quotient; 1785 CPU_LDoubleU temp0, temp1; 1786 int exp0, exp1, expdiff; 1787 1788 temp0.d = ST0; 1789 temp1.d = ST1; 1790 exp0 = EXPD(temp0); 1791 exp1 = EXPD(temp1); 1792 1793 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1794 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1795 exp0 == 0x7fff || exp1 == 0x7fff || 1796 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1797 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1798 } else { 1799 if (exp0 == 0) { 1800 exp0 = 1 - clz64(temp0.l.lower); 1801 } 1802 if (exp1 == 0) { 1803 exp1 = 1 - clz64(temp1.l.lower); 1804 } 1805 expdiff = exp0 - exp1; 1806 if (expdiff < 64) { 1807 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1808 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1809 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1810 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1811 } else { 1812 /* 1813 * Partial remainder. This choice of how many bits to 1814 * process at once is specified in AMD instruction set 1815 * manuals, and empirically is followed by Intel 1816 * processors as well; it ensures that the final remainder 1817 * operation in a loop does produce the correct low three 1818 * bits of the quotient. AMD manuals specify that the 1819 * flags other than C2 are cleared, and empirically Intel 1820 * processors clear them as well. 1821 */ 1822 int n = 32 + (expdiff % 32); 1823 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1824 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1825 env->fpus |= 0x400; /* C2 <-- 1 */ 1826 } 1827 } 1828 merge_exception_flags(env, old_flags); 1829 } 1830 1831 void helper_fprem1(CPUX86State *env) 1832 { 1833 helper_fprem_common(env, false); 1834 } 1835 1836 void helper_fprem(CPUX86State *env) 1837 { 1838 helper_fprem_common(env, true); 1839 } 1840 1841 /* 128-bit significand of log2(e). */ 1842 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1843 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1844 1845 /* 1846 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1847 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1848 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1849 * interval [sqrt(2)/2, sqrt(2)]. 1850 */ 1851 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1852 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1853 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1854 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1855 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1856 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1857 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1858 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1859 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1860 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1861 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1862 1863 /* 1864 * Compute an approximation of log2(1+arg), where 1+arg is in the 1865 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1866 * function is called, rounding precision is set to 80 and the 1867 * round-to-nearest mode is in effect. arg must not be exactly zero, 1868 * and must not be so close to zero that underflow might occur. 1869 */ 1870 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1871 uint64_t *sig0, uint64_t *sig1) 1872 { 1873 uint64_t arg0_sig = extractFloatx80Frac(arg); 1874 int32_t arg0_exp = extractFloatx80Exp(arg); 1875 bool arg0_sign = extractFloatx80Sign(arg); 1876 bool asign; 1877 int32_t dexp, texp, aexp; 1878 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1879 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1880 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1881 floatx80 t2, accum; 1882 1883 /* 1884 * Compute an approximation of arg/(2+arg), with extra precision, 1885 * as the argument to a polynomial approximation. The extra 1886 * precision is only needed for the first term of the 1887 * approximation, with subsequent terms being significantly 1888 * smaller; the approximation only uses odd exponents, and the 1889 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1890 */ 1891 if (arg0_sign) { 1892 dexp = 0x3fff; 1893 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1894 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1895 } else { 1896 dexp = 0x4000; 1897 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1898 dsig0 |= 0x8000000000000000ULL; 1899 } 1900 texp = arg0_exp - dexp + 0x3ffe; 1901 rsig0 = arg0_sig; 1902 rsig1 = 0; 1903 rsig2 = 0; 1904 if (dsig0 <= rsig0) { 1905 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1906 ++texp; 1907 } 1908 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1909 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1910 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1911 &rsig0, &rsig1, &rsig2); 1912 while ((int64_t) rsig0 < 0) { 1913 --tsig0; 1914 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1915 &rsig0, &rsig1, &rsig2); 1916 } 1917 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1918 /* 1919 * No need to correct any estimation error in tsig1; even with 1920 * such error, it is accurate enough. Now compute the square of 1921 * that approximation. 1922 */ 1923 mul128To256(tsig0, tsig1, tsig0, tsig1, 1924 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1925 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1926 texp + texp - 0x3ffe, 1927 t2sig0, t2sig1, &env->fp_status); 1928 1929 /* Compute the lower parts of the polynomial expansion. */ 1930 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1931 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1932 accum = floatx80_mul(accum, t2, &env->fp_status); 1933 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1934 accum = floatx80_mul(accum, t2, &env->fp_status); 1935 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1936 accum = floatx80_mul(accum, t2, &env->fp_status); 1937 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1938 accum = floatx80_mul(accum, t2, &env->fp_status); 1939 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1940 accum = floatx80_mul(accum, t2, &env->fp_status); 1941 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1942 accum = floatx80_mul(accum, t2, &env->fp_status); 1943 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1944 accum = floatx80_mul(accum, t2, &env->fp_status); 1945 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1946 accum = floatx80_mul(accum, t2, &env->fp_status); 1947 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1948 1949 /* 1950 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1951 * accum has much lower magnitude, and so, in particular, carry 1952 * out of the addition is not possible), multiplied by t. (This 1953 * expansion is only accurate to about 70 bits, not 128 bits.) 1954 */ 1955 aexp = extractFloatx80Exp(fyl2x_coeff_0); 1956 asign = extractFloatx80Sign(fyl2x_coeff_0); 1957 shift128RightJamming(extractFloatx80Frac(accum), 0, 1958 aexp - extractFloatx80Exp(accum), 1959 &asig0, &asig1); 1960 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1961 bsig1 = 0; 1962 if (asign == extractFloatx80Sign(accum)) { 1963 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1964 } else { 1965 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1966 } 1967 /* Multiply by t to compute the required result. */ 1968 mul128To256(asig0, asig1, tsig0, tsig1, 1969 &asig0, &asig1, &asig2, &asig3); 1970 aexp += texp - 0x3ffe; 1971 *exp = aexp; 1972 *sig0 = asig0; 1973 *sig1 = asig1; 1974 } 1975 1976 void helper_fyl2xp1(CPUX86State *env) 1977 { 1978 uint8_t old_flags = save_exception_flags(env); 1979 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1980 int32_t arg0_exp = extractFloatx80Exp(ST0); 1981 bool arg0_sign = extractFloatx80Sign(ST0); 1982 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1983 int32_t arg1_exp = extractFloatx80Exp(ST1); 1984 bool arg1_sign = extractFloatx80Sign(ST1); 1985 1986 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1987 float_raise(float_flag_invalid, &env->fp_status); 1988 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1989 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1990 float_raise(float_flag_invalid, &env->fp_status); 1991 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1992 } else if (floatx80_invalid_encoding(ST0) || 1993 floatx80_invalid_encoding(ST1)) { 1994 float_raise(float_flag_invalid, &env->fp_status); 1995 ST1 = floatx80_default_nan(&env->fp_status); 1996 } else if (floatx80_is_any_nan(ST0)) { 1997 ST1 = ST0; 1998 } else if (floatx80_is_any_nan(ST1)) { 1999 /* Pass this NaN through. */ 2000 } else if (arg0_exp > 0x3ffd || 2001 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2002 0x95f619980c4336f7ULL : 2003 0xd413cccfe7799211ULL))) { 2004 /* 2005 * Out of range for the instruction (ST0 must have absolute 2006 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2007 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2008 * to sqrt(2) - 1, which we allow here), treat as invalid. 2009 */ 2010 float_raise(float_flag_invalid, &env->fp_status); 2011 ST1 = floatx80_default_nan(&env->fp_status); 2012 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2013 arg1_exp == 0x7fff) { 2014 /* 2015 * One argument is zero, or multiplying by infinity; correct 2016 * result is exact and can be obtained by multiplying the 2017 * arguments. 2018 */ 2019 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2020 } else if (arg0_exp < 0x3fb0) { 2021 /* 2022 * Multiplying both arguments and an extra-precision version 2023 * of log2(e) is sufficiently precise. 2024 */ 2025 uint64_t sig0, sig1, sig2; 2026 int32_t exp; 2027 if (arg0_exp == 0) { 2028 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2029 } 2030 if (arg1_exp == 0) { 2031 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2032 } 2033 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2034 &sig0, &sig1, &sig2); 2035 exp = arg0_exp + 1; 2036 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2037 exp += arg1_exp - 0x3ffe; 2038 /* This result is inexact. */ 2039 sig1 |= 1; 2040 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2041 arg0_sign ^ arg1_sign, exp, 2042 sig0, sig1, &env->fp_status); 2043 } else { 2044 int32_t aexp; 2045 uint64_t asig0, asig1, asig2; 2046 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2047 FloatX80RoundPrec save_prec = 2048 env->fp_status.floatx80_rounding_precision; 2049 env->fp_status.float_rounding_mode = float_round_nearest_even; 2050 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2051 2052 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2053 /* 2054 * Multiply by the second argument to compute the required 2055 * result. 2056 */ 2057 if (arg1_exp == 0) { 2058 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2059 } 2060 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2061 aexp += arg1_exp - 0x3ffe; 2062 /* This result is inexact. */ 2063 asig1 |= 1; 2064 env->fp_status.float_rounding_mode = save_mode; 2065 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2066 arg0_sign ^ arg1_sign, aexp, 2067 asig0, asig1, &env->fp_status); 2068 env->fp_status.floatx80_rounding_precision = save_prec; 2069 } 2070 fpop(env); 2071 merge_exception_flags(env, old_flags); 2072 } 2073 2074 void helper_fyl2x(CPUX86State *env) 2075 { 2076 uint8_t old_flags = save_exception_flags(env); 2077 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2078 int32_t arg0_exp = extractFloatx80Exp(ST0); 2079 bool arg0_sign = extractFloatx80Sign(ST0); 2080 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2081 int32_t arg1_exp = extractFloatx80Exp(ST1); 2082 bool arg1_sign = extractFloatx80Sign(ST1); 2083 2084 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2085 float_raise(float_flag_invalid, &env->fp_status); 2086 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2087 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2088 float_raise(float_flag_invalid, &env->fp_status); 2089 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2090 } else if (floatx80_invalid_encoding(ST0) || 2091 floatx80_invalid_encoding(ST1)) { 2092 float_raise(float_flag_invalid, &env->fp_status); 2093 ST1 = floatx80_default_nan(&env->fp_status); 2094 } else if (floatx80_is_any_nan(ST0)) { 2095 ST1 = ST0; 2096 } else if (floatx80_is_any_nan(ST1)) { 2097 /* Pass this NaN through. */ 2098 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2099 float_raise(float_flag_invalid, &env->fp_status); 2100 ST1 = floatx80_default_nan(&env->fp_status); 2101 } else if (floatx80_is_infinity(ST1)) { 2102 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2103 &env->fp_status); 2104 switch (cmp) { 2105 case float_relation_less: 2106 ST1 = floatx80_chs(ST1); 2107 break; 2108 case float_relation_greater: 2109 /* Result is infinity of the same sign as ST1. */ 2110 break; 2111 default: 2112 float_raise(float_flag_invalid, &env->fp_status); 2113 ST1 = floatx80_default_nan(&env->fp_status); 2114 break; 2115 } 2116 } else if (floatx80_is_infinity(ST0)) { 2117 if (floatx80_is_zero(ST1)) { 2118 float_raise(float_flag_invalid, &env->fp_status); 2119 ST1 = floatx80_default_nan(&env->fp_status); 2120 } else if (arg1_sign) { 2121 ST1 = floatx80_chs(ST0); 2122 } else { 2123 ST1 = ST0; 2124 } 2125 } else if (floatx80_is_zero(ST0)) { 2126 if (floatx80_is_zero(ST1)) { 2127 float_raise(float_flag_invalid, &env->fp_status); 2128 ST1 = floatx80_default_nan(&env->fp_status); 2129 } else { 2130 /* Result is infinity with opposite sign to ST1. */ 2131 float_raise(float_flag_divbyzero, &env->fp_status); 2132 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2133 0x8000000000000000ULL); 2134 } 2135 } else if (floatx80_is_zero(ST1)) { 2136 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2137 ST1 = floatx80_chs(ST1); 2138 } 2139 /* Otherwise, ST1 is already the correct result. */ 2140 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2141 if (arg1_sign) { 2142 ST1 = floatx80_chs(floatx80_zero); 2143 } else { 2144 ST1 = floatx80_zero; 2145 } 2146 } else { 2147 int32_t int_exp; 2148 floatx80 arg0_m1; 2149 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2150 FloatX80RoundPrec save_prec = 2151 env->fp_status.floatx80_rounding_precision; 2152 env->fp_status.float_rounding_mode = float_round_nearest_even; 2153 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2154 2155 if (arg0_exp == 0) { 2156 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2157 } 2158 if (arg1_exp == 0) { 2159 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2160 } 2161 int_exp = arg0_exp - 0x3fff; 2162 if (arg0_sig > 0xb504f333f9de6484ULL) { 2163 ++int_exp; 2164 } 2165 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2166 &env->fp_status), 2167 floatx80_one, &env->fp_status); 2168 if (floatx80_is_zero(arg0_m1)) { 2169 /* Exact power of 2; multiply by ST1. */ 2170 env->fp_status.float_rounding_mode = save_mode; 2171 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2172 ST1, &env->fp_status); 2173 } else { 2174 bool asign = extractFloatx80Sign(arg0_m1); 2175 int32_t aexp; 2176 uint64_t asig0, asig1, asig2; 2177 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2178 if (int_exp != 0) { 2179 bool isign = (int_exp < 0); 2180 int32_t iexp; 2181 uint64_t isig; 2182 int shift; 2183 int_exp = isign ? -int_exp : int_exp; 2184 shift = clz32(int_exp) + 32; 2185 isig = int_exp; 2186 isig <<= shift; 2187 iexp = 0x403e - shift; 2188 shift128RightJamming(asig0, asig1, iexp - aexp, 2189 &asig0, &asig1); 2190 if (asign == isign) { 2191 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2192 } else { 2193 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2194 } 2195 aexp = iexp; 2196 asign = isign; 2197 } 2198 /* 2199 * Multiply by the second argument to compute the required 2200 * result. 2201 */ 2202 if (arg1_exp == 0) { 2203 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2204 } 2205 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2206 aexp += arg1_exp - 0x3ffe; 2207 /* This result is inexact. */ 2208 asig1 |= 1; 2209 env->fp_status.float_rounding_mode = save_mode; 2210 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2211 asign ^ arg1_sign, aexp, 2212 asig0, asig1, &env->fp_status); 2213 } 2214 2215 env->fp_status.floatx80_rounding_precision = save_prec; 2216 } 2217 fpop(env); 2218 merge_exception_flags(env, old_flags); 2219 } 2220 2221 void helper_fsqrt(CPUX86State *env) 2222 { 2223 uint8_t old_flags = save_exception_flags(env); 2224 if (floatx80_is_neg(ST0)) { 2225 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2226 env->fpus |= 0x400; 2227 } 2228 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2229 merge_exception_flags(env, old_flags); 2230 } 2231 2232 void helper_fsincos(CPUX86State *env) 2233 { 2234 double fptemp = floatx80_to_double(env, ST0); 2235 2236 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2237 env->fpus |= 0x400; 2238 } else { 2239 ST0 = double_to_floatx80(env, sin(fptemp)); 2240 fpush(env); 2241 ST0 = double_to_floatx80(env, cos(fptemp)); 2242 env->fpus &= ~0x400; /* C2 <-- 0 */ 2243 /* the above code is for |arg| < 2**63 only */ 2244 } 2245 } 2246 2247 void helper_frndint(CPUX86State *env) 2248 { 2249 uint8_t old_flags = save_exception_flags(env); 2250 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2251 merge_exception_flags(env, old_flags); 2252 } 2253 2254 void helper_fscale(CPUX86State *env) 2255 { 2256 uint8_t old_flags = save_exception_flags(env); 2257 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2258 float_raise(float_flag_invalid, &env->fp_status); 2259 ST0 = floatx80_default_nan(&env->fp_status); 2260 } else if (floatx80_is_any_nan(ST1)) { 2261 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2262 float_raise(float_flag_invalid, &env->fp_status); 2263 } 2264 ST0 = ST1; 2265 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2266 float_raise(float_flag_invalid, &env->fp_status); 2267 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2268 } 2269 } else if (floatx80_is_infinity(ST1) && 2270 !floatx80_invalid_encoding(ST0) && 2271 !floatx80_is_any_nan(ST0)) { 2272 if (floatx80_is_neg(ST1)) { 2273 if (floatx80_is_infinity(ST0)) { 2274 float_raise(float_flag_invalid, &env->fp_status); 2275 ST0 = floatx80_default_nan(&env->fp_status); 2276 } else { 2277 ST0 = (floatx80_is_neg(ST0) ? 2278 floatx80_chs(floatx80_zero) : 2279 floatx80_zero); 2280 } 2281 } else { 2282 if (floatx80_is_zero(ST0)) { 2283 float_raise(float_flag_invalid, &env->fp_status); 2284 ST0 = floatx80_default_nan(&env->fp_status); 2285 } else { 2286 ST0 = (floatx80_is_neg(ST0) ? 2287 floatx80_chs(floatx80_infinity) : 2288 floatx80_infinity); 2289 } 2290 } 2291 } else { 2292 int n; 2293 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2294 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2295 set_float_exception_flags(0, &env->fp_status); 2296 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2297 set_float_exception_flags(save_flags, &env->fp_status); 2298 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2299 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2300 env->fp_status.floatx80_rounding_precision = save; 2301 } 2302 merge_exception_flags(env, old_flags); 2303 } 2304 2305 void helper_fsin(CPUX86State *env) 2306 { 2307 double fptemp = floatx80_to_double(env, ST0); 2308 2309 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2310 env->fpus |= 0x400; 2311 } else { 2312 ST0 = double_to_floatx80(env, sin(fptemp)); 2313 env->fpus &= ~0x400; /* C2 <-- 0 */ 2314 /* the above code is for |arg| < 2**53 only */ 2315 } 2316 } 2317 2318 void helper_fcos(CPUX86State *env) 2319 { 2320 double fptemp = floatx80_to_double(env, ST0); 2321 2322 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2323 env->fpus |= 0x400; 2324 } else { 2325 ST0 = double_to_floatx80(env, cos(fptemp)); 2326 env->fpus &= ~0x400; /* C2 <-- 0 */ 2327 /* the above code is for |arg| < 2**63 only */ 2328 } 2329 } 2330 2331 void helper_fxam_ST0(CPUX86State *env) 2332 { 2333 CPU_LDoubleU temp; 2334 int expdif; 2335 2336 temp.d = ST0; 2337 2338 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2339 if (SIGND(temp)) { 2340 env->fpus |= 0x200; /* C1 <-- 1 */ 2341 } 2342 2343 if (env->fptags[env->fpstt]) { 2344 env->fpus |= 0x4100; /* Empty */ 2345 return; 2346 } 2347 2348 expdif = EXPD(temp); 2349 if (expdif == MAXEXPD) { 2350 if (MANTD(temp) == 0x8000000000000000ULL) { 2351 env->fpus |= 0x500; /* Infinity */ 2352 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2353 env->fpus |= 0x100; /* NaN */ 2354 } 2355 } else if (expdif == 0) { 2356 if (MANTD(temp) == 0) { 2357 env->fpus |= 0x4000; /* Zero */ 2358 } else { 2359 env->fpus |= 0x4400; /* Denormal */ 2360 } 2361 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2362 env->fpus |= 0x400; 2363 } 2364 } 2365 2366 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 2367 uintptr_t retaddr) 2368 { 2369 int fpus, fptag, exp, i; 2370 uint64_t mant; 2371 CPU_LDoubleU tmp; 2372 2373 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2374 fptag = 0; 2375 for (i = 7; i >= 0; i--) { 2376 fptag <<= 2; 2377 if (env->fptags[i]) { 2378 fptag |= 3; 2379 } else { 2380 tmp.d = env->fpregs[i].d; 2381 exp = EXPD(tmp); 2382 mant = MANTD(tmp); 2383 if (exp == 0 && mant == 0) { 2384 /* zero */ 2385 fptag |= 1; 2386 } else if (exp == 0 || exp == MAXEXPD 2387 || (mant & (1LL << 63)) == 0) { 2388 /* NaNs, infinity, denormal */ 2389 fptag |= 2; 2390 } 2391 } 2392 } 2393 if (data32) { 2394 /* 32 bit */ 2395 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 2396 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 2397 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 2398 cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */ 2399 cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */ 2400 cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */ 2401 cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */ 2402 } else { 2403 /* 16 bit */ 2404 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 2405 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 2406 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 2407 cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr); 2408 cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr); 2409 cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr); 2410 cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr); 2411 } 2412 } 2413 2414 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2415 { 2416 do_fstenv(env, ptr, data32, GETPC()); 2417 } 2418 2419 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2420 { 2421 env->fpstt = (fpus >> 11) & 7; 2422 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2423 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2424 #if !defined(CONFIG_USER_ONLY) 2425 if (!(env->fpus & FPUS_SE)) { 2426 /* 2427 * Here the processor deasserts FERR#; in response, the chipset deasserts 2428 * IGNNE#. 2429 */ 2430 cpu_clear_ignne(); 2431 } 2432 #endif 2433 } 2434 2435 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 2436 uintptr_t retaddr) 2437 { 2438 int i, fpus, fptag; 2439 2440 if (data32) { 2441 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2442 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2443 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 2444 } else { 2445 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2446 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 2447 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2448 } 2449 cpu_set_fpus(env, fpus); 2450 for (i = 0; i < 8; i++) { 2451 env->fptags[i] = ((fptag & 3) == 3); 2452 fptag >>= 2; 2453 } 2454 } 2455 2456 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2457 { 2458 do_fldenv(env, ptr, data32, GETPC()); 2459 } 2460 2461 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, 2462 uintptr_t retaddr) 2463 { 2464 floatx80 tmp; 2465 int i; 2466 2467 do_fstenv(env, ptr, data32, retaddr); 2468 2469 ptr += (target_ulong)14 << data32; 2470 for (i = 0; i < 8; i++) { 2471 tmp = ST(i); 2472 do_fstt(env, tmp, ptr, retaddr); 2473 ptr += 10; 2474 } 2475 2476 do_fninit(env); 2477 } 2478 2479 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2480 { 2481 do_fsave(env, ptr, data32, GETPC()); 2482 } 2483 2484 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, 2485 uintptr_t retaddr) 2486 { 2487 floatx80 tmp; 2488 int i; 2489 2490 do_fldenv(env, ptr, data32, retaddr); 2491 ptr += (target_ulong)14 << data32; 2492 2493 for (i = 0; i < 8; i++) { 2494 tmp = do_fldt(env, ptr, retaddr); 2495 ST(i) = tmp; 2496 ptr += 10; 2497 } 2498 } 2499 2500 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2501 { 2502 do_frstor(env, ptr, data32, GETPC()); 2503 } 2504 2505 #define XO(X) offsetof(X86XSaveArea, X) 2506 2507 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2508 { 2509 int fpus, fptag, i; 2510 target_ulong addr; 2511 2512 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2513 fptag = 0; 2514 for (i = 0; i < 8; i++) { 2515 fptag |= (env->fptags[i] << i); 2516 } 2517 2518 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 2519 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 2520 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 2521 2522 /* In 32-bit mode this is eip, sel, dp, sel. 2523 In 64-bit mode this is rip, rdp. 2524 But in either case we don't write actual data, just zeros. */ 2525 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 2526 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 2527 2528 addr = ptr + XO(legacy.fpregs); 2529 for (i = 0; i < 8; i++) { 2530 floatx80 tmp = ST(i); 2531 do_fstt(env, tmp, addr, ra); 2532 addr += 16; 2533 } 2534 } 2535 2536 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2537 { 2538 update_mxcsr_from_sse_status(env); 2539 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 2540 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 2541 } 2542 2543 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2544 { 2545 int i, nb_xmm_regs; 2546 target_ulong addr; 2547 2548 if (env->hflags & HF_CS64_MASK) { 2549 nb_xmm_regs = 16; 2550 } else { 2551 nb_xmm_regs = 8; 2552 } 2553 2554 addr = ptr + XO(legacy.xmm_regs); 2555 for (i = 0; i < nb_xmm_regs; i++) { 2556 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 2557 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 2558 addr += 16; 2559 } 2560 } 2561 2562 static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2563 { 2564 int i, nb_xmm_regs; 2565 2566 if (env->hflags & HF_CS64_MASK) { 2567 nb_xmm_regs = 16; 2568 } else { 2569 nb_xmm_regs = 8; 2570 } 2571 2572 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2573 cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra); 2574 cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra); 2575 } 2576 } 2577 2578 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2579 { 2580 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2581 int i; 2582 2583 for (i = 0; i < 4; i++, addr += 16) { 2584 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 2585 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 2586 } 2587 } 2588 2589 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2590 { 2591 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2592 env->bndcs_regs.cfgu, ra); 2593 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2594 env->bndcs_regs.sts, ra); 2595 } 2596 2597 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2598 { 2599 cpu_stq_data_ra(env, ptr, env->pkru, ra); 2600 } 2601 2602 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2603 { 2604 /* The operand must be 16 byte aligned */ 2605 if (ptr & 0xf) { 2606 raise_exception_ra(env, EXCP0D_GPF, ra); 2607 } 2608 2609 do_xsave_fpu(env, ptr, ra); 2610 2611 if (env->cr[4] & CR4_OSFXSR_MASK) { 2612 do_xsave_mxcsr(env, ptr, ra); 2613 /* Fast FXSAVE leaves out the XMM registers */ 2614 if (!(env->efer & MSR_EFER_FFXSR) 2615 || (env->hflags & HF_CPL_MASK) 2616 || !(env->hflags & HF_LMA_MASK)) { 2617 do_xsave_sse(env, ptr, ra); 2618 } 2619 } 2620 } 2621 2622 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2623 { 2624 do_fxsave(env, ptr, GETPC()); 2625 } 2626 2627 static uint64_t get_xinuse(CPUX86State *env) 2628 { 2629 uint64_t inuse = -1; 2630 2631 /* For the most part, we don't track XINUSE. We could calculate it 2632 here for all components, but it's probably less work to simply 2633 indicate in use. That said, the state of BNDREGS is important 2634 enough to track in HFLAGS, so we might as well use that here. */ 2635 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2636 inuse &= ~XSTATE_BNDREGS_MASK; 2637 } 2638 return inuse; 2639 } 2640 2641 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2642 uint64_t inuse, uint64_t opt, uintptr_t ra) 2643 { 2644 uint64_t old_bv, new_bv; 2645 2646 /* The OS must have enabled XSAVE. */ 2647 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2648 raise_exception_ra(env, EXCP06_ILLOP, ra); 2649 } 2650 2651 /* The operand must be 64 byte aligned. */ 2652 if (ptr & 63) { 2653 raise_exception_ra(env, EXCP0D_GPF, ra); 2654 } 2655 2656 /* Never save anything not enabled by XCR0. */ 2657 rfbm &= env->xcr0; 2658 opt &= rfbm; 2659 2660 if (opt & XSTATE_FP_MASK) { 2661 do_xsave_fpu(env, ptr, ra); 2662 } 2663 if (rfbm & XSTATE_SSE_MASK) { 2664 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2665 do_xsave_mxcsr(env, ptr, ra); 2666 } 2667 if (opt & XSTATE_SSE_MASK) { 2668 do_xsave_sse(env, ptr, ra); 2669 } 2670 if (opt & XSTATE_YMM_MASK) { 2671 do_xsave_ymmh(env, ptr + XO(avx_state), ra); 2672 } 2673 if (opt & XSTATE_BNDREGS_MASK) { 2674 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 2675 } 2676 if (opt & XSTATE_BNDCSR_MASK) { 2677 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 2678 } 2679 if (opt & XSTATE_PKRU_MASK) { 2680 do_xsave_pkru(env, ptr + XO(pkru_state), ra); 2681 } 2682 2683 /* Update the XSTATE_BV field. */ 2684 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2685 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2686 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 2687 } 2688 2689 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2690 { 2691 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 2692 } 2693 2694 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2695 { 2696 uint64_t inuse = get_xinuse(env); 2697 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2698 } 2699 2700 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2701 { 2702 int i, fpuc, fpus, fptag; 2703 target_ulong addr; 2704 2705 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 2706 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 2707 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 2708 cpu_set_fpuc(env, fpuc); 2709 cpu_set_fpus(env, fpus); 2710 fptag ^= 0xff; 2711 for (i = 0; i < 8; i++) { 2712 env->fptags[i] = ((fptag >> i) & 1); 2713 } 2714 2715 addr = ptr + XO(legacy.fpregs); 2716 for (i = 0; i < 8; i++) { 2717 floatx80 tmp = do_fldt(env, addr, ra); 2718 ST(i) = tmp; 2719 addr += 16; 2720 } 2721 } 2722 2723 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2724 { 2725 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 2726 } 2727 2728 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2729 { 2730 int i, nb_xmm_regs; 2731 target_ulong addr; 2732 2733 if (env->hflags & HF_CS64_MASK) { 2734 nb_xmm_regs = 16; 2735 } else { 2736 nb_xmm_regs = 8; 2737 } 2738 2739 addr = ptr + XO(legacy.xmm_regs); 2740 for (i = 0; i < nb_xmm_regs; i++) { 2741 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 2742 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 2743 addr += 16; 2744 } 2745 } 2746 2747 static void do_clear_sse(CPUX86State *env) 2748 { 2749 int i, nb_xmm_regs; 2750 2751 if (env->hflags & HF_CS64_MASK) { 2752 nb_xmm_regs = 16; 2753 } else { 2754 nb_xmm_regs = 8; 2755 } 2756 2757 for (i = 0; i < nb_xmm_regs; i++) { 2758 env->xmm_regs[i].ZMM_Q(0) = 0; 2759 env->xmm_regs[i].ZMM_Q(1) = 0; 2760 } 2761 } 2762 2763 static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2764 { 2765 int i, nb_xmm_regs; 2766 2767 if (env->hflags & HF_CS64_MASK) { 2768 nb_xmm_regs = 16; 2769 } else { 2770 nb_xmm_regs = 8; 2771 } 2772 2773 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2774 env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra); 2775 env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra); 2776 } 2777 } 2778 2779 static void do_clear_ymmh(CPUX86State *env) 2780 { 2781 int i, nb_xmm_regs; 2782 2783 if (env->hflags & HF_CS64_MASK) { 2784 nb_xmm_regs = 16; 2785 } else { 2786 nb_xmm_regs = 8; 2787 } 2788 2789 for (i = 0; i < nb_xmm_regs; i++) { 2790 env->xmm_regs[i].ZMM_Q(2) = 0; 2791 env->xmm_regs[i].ZMM_Q(3) = 0; 2792 } 2793 } 2794 2795 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2796 { 2797 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2798 int i; 2799 2800 for (i = 0; i < 4; i++, addr += 16) { 2801 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 2802 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 2803 } 2804 } 2805 2806 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2807 { 2808 /* FIXME: Extend highest implemented bit of linear address. */ 2809 env->bndcs_regs.cfgu 2810 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 2811 env->bndcs_regs.sts 2812 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 2813 } 2814 2815 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2816 { 2817 env->pkru = cpu_ldq_data_ra(env, ptr, ra); 2818 } 2819 2820 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2821 { 2822 /* The operand must be 16 byte aligned */ 2823 if (ptr & 0xf) { 2824 raise_exception_ra(env, EXCP0D_GPF, ra); 2825 } 2826 2827 do_xrstor_fpu(env, ptr, ra); 2828 2829 if (env->cr[4] & CR4_OSFXSR_MASK) { 2830 do_xrstor_mxcsr(env, ptr, ra); 2831 /* Fast FXRSTOR leaves out the XMM registers */ 2832 if (!(env->efer & MSR_EFER_FFXSR) 2833 || (env->hflags & HF_CPL_MASK) 2834 || !(env->hflags & HF_LMA_MASK)) { 2835 do_xrstor_sse(env, ptr, ra); 2836 } 2837 } 2838 } 2839 2840 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2841 { 2842 do_fxrstor(env, ptr, GETPC()); 2843 } 2844 2845 static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) 2846 { 2847 uint64_t xstate_bv, xcomp_bv, reserve0; 2848 2849 rfbm &= env->xcr0; 2850 2851 /* The OS must have enabled XSAVE. */ 2852 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2853 raise_exception_ra(env, EXCP06_ILLOP, ra); 2854 } 2855 2856 /* The operand must be 64 byte aligned. */ 2857 if (ptr & 63) { 2858 raise_exception_ra(env, EXCP0D_GPF, ra); 2859 } 2860 2861 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2862 2863 if ((int64_t)xstate_bv < 0) { 2864 /* FIXME: Compact form. */ 2865 raise_exception_ra(env, EXCP0D_GPF, ra); 2866 } 2867 2868 /* Standard form. */ 2869 2870 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2871 if (xstate_bv & ~env->xcr0) { 2872 raise_exception_ra(env, EXCP0D_GPF, ra); 2873 } 2874 2875 /* The XCOMP_BV field must be zero. Note that, as of the April 2016 2876 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 2877 describes only XCOMP_BV, but the description of the standard form 2878 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 2879 includes the next 64-bit field. */ 2880 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 2881 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 2882 if (xcomp_bv || reserve0) { 2883 raise_exception_ra(env, EXCP0D_GPF, ra); 2884 } 2885 2886 if (rfbm & XSTATE_FP_MASK) { 2887 if (xstate_bv & XSTATE_FP_MASK) { 2888 do_xrstor_fpu(env, ptr, ra); 2889 } else { 2890 do_fninit(env); 2891 memset(env->fpregs, 0, sizeof(env->fpregs)); 2892 } 2893 } 2894 if (rfbm & XSTATE_SSE_MASK) { 2895 /* Note that the standard form of XRSTOR loads MXCSR from memory 2896 whether or not the XSTATE_BV bit is set. */ 2897 do_xrstor_mxcsr(env, ptr, ra); 2898 if (xstate_bv & XSTATE_SSE_MASK) { 2899 do_xrstor_sse(env, ptr, ra); 2900 } else { 2901 do_clear_sse(env); 2902 } 2903 } 2904 if (rfbm & XSTATE_YMM_MASK) { 2905 if (xstate_bv & XSTATE_YMM_MASK) { 2906 do_xrstor_ymmh(env, ptr + XO(avx_state), ra); 2907 } else { 2908 do_clear_ymmh(env); 2909 } 2910 } 2911 if (rfbm & XSTATE_BNDREGS_MASK) { 2912 if (xstate_bv & XSTATE_BNDREGS_MASK) { 2913 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 2914 env->hflags |= HF_MPX_IU_MASK; 2915 } else { 2916 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2917 env->hflags &= ~HF_MPX_IU_MASK; 2918 } 2919 } 2920 if (rfbm & XSTATE_BNDCSR_MASK) { 2921 if (xstate_bv & XSTATE_BNDCSR_MASK) { 2922 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 2923 } else { 2924 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2925 } 2926 cpu_sync_bndcs_hflags(env); 2927 } 2928 if (rfbm & XSTATE_PKRU_MASK) { 2929 uint64_t old_pkru = env->pkru; 2930 if (xstate_bv & XSTATE_PKRU_MASK) { 2931 do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 2932 } else { 2933 env->pkru = 0; 2934 } 2935 if (env->pkru != old_pkru) { 2936 CPUState *cs = env_cpu(env); 2937 tlb_flush(cs); 2938 } 2939 } 2940 } 2941 2942 #undef XO 2943 2944 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2945 { 2946 do_xrstor(env, ptr, rfbm, GETPC()); 2947 } 2948 2949 #if defined(CONFIG_USER_ONLY) 2950 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 2951 { 2952 do_fsave(env, ptr, data32, 0); 2953 } 2954 2955 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 2956 { 2957 do_frstor(env, ptr, data32, 0); 2958 } 2959 2960 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 2961 { 2962 do_fxsave(env, ptr, 0); 2963 } 2964 2965 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 2966 { 2967 do_fxrstor(env, ptr, 0); 2968 } 2969 2970 void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) 2971 { 2972 do_xsave(env, ptr, -1, get_xinuse(env), -1, 0); 2973 } 2974 2975 void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr) 2976 { 2977 do_xrstor(env, ptr, -1, 0); 2978 } 2979 #endif 2980 2981 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 2982 { 2983 /* The OS must have enabled XSAVE. */ 2984 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2985 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2986 } 2987 2988 switch (ecx) { 2989 case 0: 2990 return env->xcr0; 2991 case 1: 2992 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 2993 return env->xcr0 & get_xinuse(env); 2994 } 2995 break; 2996 } 2997 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2998 } 2999 3000 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 3001 { 3002 uint32_t dummy, ena_lo, ena_hi; 3003 uint64_t ena; 3004 3005 /* The OS must have enabled XSAVE. */ 3006 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3007 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3008 } 3009 3010 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 3011 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 3012 goto do_gpf; 3013 } 3014 3015 /* Disallow enabling unimplemented features. */ 3016 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 3017 ena = ((uint64_t)ena_hi << 32) | ena_lo; 3018 if (mask & ~ena) { 3019 goto do_gpf; 3020 } 3021 3022 /* Disallow enabling only half of MPX. */ 3023 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 3024 & XSTATE_BNDCSR_MASK) { 3025 goto do_gpf; 3026 } 3027 3028 env->xcr0 = mask; 3029 cpu_sync_bndcs_hflags(env); 3030 cpu_sync_avx_hflag(env); 3031 return; 3032 3033 do_gpf: 3034 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3035 } 3036 3037 /* MMX/SSE */ 3038 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 3039 3040 #define SSE_DAZ 0x0040 3041 #define SSE_RC_SHIFT 13 3042 #define SSE_RC_MASK (3 << SSE_RC_SHIFT) 3043 #define SSE_FZ 0x8000 3044 3045 void update_mxcsr_status(CPUX86State *env) 3046 { 3047 uint32_t mxcsr = env->mxcsr; 3048 int rnd_type; 3049 3050 /* set rounding mode */ 3051 rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; 3052 set_x86_rounding_mode(rnd_type, &env->sse_status); 3053 3054 /* Set exception flags. */ 3055 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 3056 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 3057 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 3058 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 3059 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 3060 &env->sse_status); 3061 3062 /* set denormals are zero */ 3063 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 3064 3065 /* set flush to zero */ 3066 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 3067 } 3068 3069 void update_mxcsr_from_sse_status(CPUX86State *env) 3070 { 3071 uint8_t flags = get_float_exception_flags(&env->sse_status); 3072 /* 3073 * The MXCSR denormal flag has opposite semantics to 3074 * float_flag_input_denormal (the softfloat code sets that flag 3075 * only when flushing input denormals to zero, but SSE sets it 3076 * only when not flushing them to zero), so is not converted 3077 * here. 3078 */ 3079 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3080 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3081 (flags & float_flag_overflow ? FPUS_OE : 0) | 3082 (flags & float_flag_underflow ? FPUS_UE : 0) | 3083 (flags & float_flag_inexact ? FPUS_PE : 0) | 3084 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3085 0)); 3086 } 3087 3088 void helper_update_mxcsr(CPUX86State *env) 3089 { 3090 update_mxcsr_from_sse_status(env); 3091 } 3092 3093 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3094 { 3095 cpu_set_mxcsr(env, val); 3096 } 3097 3098 void helper_enter_mmx(CPUX86State *env) 3099 { 3100 env->fpstt = 0; 3101 *(uint32_t *)(env->fptags) = 0; 3102 *(uint32_t *)(env->fptags + 4) = 0; 3103 } 3104 3105 void helper_emms(CPUX86State *env) 3106 { 3107 /* set to empty state */ 3108 *(uint32_t *)(env->fptags) = 0x01010101; 3109 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3110 } 3111 3112 #define SHIFT 0 3113 #include "ops_sse.h" 3114 3115 #define SHIFT 1 3116 #include "ops_sse.h" 3117 3118 #define SHIFT 2 3119 #include "ops_sse.h" 3120