1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/helper-proto.h" 25 #include "fpu/softfloat.h" 26 #include "fpu/softfloat-macros.h" 27 #include "helper-tcg.h" 28 29 /* float macros */ 30 #define FT0 (env->ft0) 31 #define ST0 (env->fpregs[env->fpstt].d) 32 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 33 #define ST1 ST(1) 34 35 #define FPU_RC_MASK 0xc00 36 #define FPU_RC_NEAR 0x000 37 #define FPU_RC_DOWN 0x400 38 #define FPU_RC_UP 0x800 39 #define FPU_RC_CHOP 0xc00 40 41 #define MAXTAN 9223372036854775808.0 42 43 /* the following deal with x86 long double-precision numbers */ 44 #define MAXEXPD 0x7fff 45 #define EXPBIAS 16383 46 #define EXPD(fp) (fp.l.upper & 0x7fff) 47 #define SIGND(fp) ((fp.l.upper) & 0x8000) 48 #define MANTD(fp) (fp.l.lower) 49 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 50 51 #define FPUS_IE (1 << 0) 52 #define FPUS_DE (1 << 1) 53 #define FPUS_ZE (1 << 2) 54 #define FPUS_OE (1 << 3) 55 #define FPUS_UE (1 << 4) 56 #define FPUS_PE (1 << 5) 57 #define FPUS_SF (1 << 6) 58 #define FPUS_SE (1 << 7) 59 #define FPUS_B (1 << 15) 60 61 #define FPUC_EM 0x3f 62 63 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 64 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 65 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 66 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 67 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 68 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 69 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 70 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 71 72 static inline void fpush(CPUX86State *env) 73 { 74 env->fpstt = (env->fpstt - 1) & 7; 75 env->fptags[env->fpstt] = 0; /* validate stack entry */ 76 } 77 78 static inline void fpop(CPUX86State *env) 79 { 80 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 81 env->fpstt = (env->fpstt + 1) & 7; 82 } 83 84 static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) 85 { 86 CPU_LDoubleU temp; 87 88 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); 89 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); 90 return temp.d; 91 } 92 93 static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, 94 uintptr_t retaddr) 95 { 96 CPU_LDoubleU temp; 97 98 temp.d = f; 99 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); 100 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); 101 } 102 103 /* x87 FPU helpers */ 104 105 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 106 { 107 union { 108 float64 f64; 109 double d; 110 } u; 111 112 u.f64 = floatx80_to_float64(a, &env->fp_status); 113 return u.d; 114 } 115 116 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 117 { 118 union { 119 float64 f64; 120 double d; 121 } u; 122 123 u.d = a; 124 return float64_to_floatx80(u.f64, &env->fp_status); 125 } 126 127 static void fpu_set_exception(CPUX86State *env, int mask) 128 { 129 env->fpus |= mask; 130 if (env->fpus & (~env->fpuc & FPUC_EM)) { 131 env->fpus |= FPUS_SE | FPUS_B; 132 } 133 } 134 135 static inline uint8_t save_exception_flags(CPUX86State *env) 136 { 137 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 138 set_float_exception_flags(0, &env->fp_status); 139 return old_flags; 140 } 141 142 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 143 { 144 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 145 float_raise(old_flags, &env->fp_status); 146 fpu_set_exception(env, 147 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 148 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 149 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 150 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 151 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 152 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 153 } 154 155 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 156 { 157 uint8_t old_flags = save_exception_flags(env); 158 floatx80 ret = floatx80_div(a, b, &env->fp_status); 159 merge_exception_flags(env, old_flags); 160 return ret; 161 } 162 163 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 164 { 165 if (env->cr[0] & CR0_NE_MASK) { 166 raise_exception_ra(env, EXCP10_COPR, retaddr); 167 } 168 #if !defined(CONFIG_USER_ONLY) 169 else { 170 fpu_check_raise_ferr_irq(env); 171 } 172 #endif 173 } 174 175 void helper_flds_FT0(CPUX86State *env, uint32_t val) 176 { 177 uint8_t old_flags = save_exception_flags(env); 178 union { 179 float32 f; 180 uint32_t i; 181 } u; 182 183 u.i = val; 184 FT0 = float32_to_floatx80(u.f, &env->fp_status); 185 merge_exception_flags(env, old_flags); 186 } 187 188 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 189 { 190 uint8_t old_flags = save_exception_flags(env); 191 union { 192 float64 f; 193 uint64_t i; 194 } u; 195 196 u.i = val; 197 FT0 = float64_to_floatx80(u.f, &env->fp_status); 198 merge_exception_flags(env, old_flags); 199 } 200 201 void helper_fildl_FT0(CPUX86State *env, int32_t val) 202 { 203 FT0 = int32_to_floatx80(val, &env->fp_status); 204 } 205 206 void helper_flds_ST0(CPUX86State *env, uint32_t val) 207 { 208 uint8_t old_flags = save_exception_flags(env); 209 int new_fpstt; 210 union { 211 float32 f; 212 uint32_t i; 213 } u; 214 215 new_fpstt = (env->fpstt - 1) & 7; 216 u.i = val; 217 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 218 env->fpstt = new_fpstt; 219 env->fptags[new_fpstt] = 0; /* validate stack entry */ 220 merge_exception_flags(env, old_flags); 221 } 222 223 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 224 { 225 uint8_t old_flags = save_exception_flags(env); 226 int new_fpstt; 227 union { 228 float64 f; 229 uint64_t i; 230 } u; 231 232 new_fpstt = (env->fpstt - 1) & 7; 233 u.i = val; 234 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 235 env->fpstt = new_fpstt; 236 env->fptags[new_fpstt] = 0; /* validate stack entry */ 237 merge_exception_flags(env, old_flags); 238 } 239 240 static FloatX80RoundPrec tmp_maximise_precision(float_status *st) 241 { 242 FloatX80RoundPrec old = get_floatx80_rounding_precision(st); 243 set_floatx80_rounding_precision(floatx80_precision_x, st); 244 return old; 245 } 246 247 void helper_fildl_ST0(CPUX86State *env, int32_t val) 248 { 249 int new_fpstt; 250 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 251 252 new_fpstt = (env->fpstt - 1) & 7; 253 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 254 env->fpstt = new_fpstt; 255 env->fptags[new_fpstt] = 0; /* validate stack entry */ 256 257 set_floatx80_rounding_precision(old, &env->fp_status); 258 } 259 260 void helper_fildll_ST0(CPUX86State *env, int64_t val) 261 { 262 int new_fpstt; 263 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 264 265 new_fpstt = (env->fpstt - 1) & 7; 266 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 267 env->fpstt = new_fpstt; 268 env->fptags[new_fpstt] = 0; /* validate stack entry */ 269 270 set_floatx80_rounding_precision(old, &env->fp_status); 271 } 272 273 uint32_t helper_fsts_ST0(CPUX86State *env) 274 { 275 uint8_t old_flags = save_exception_flags(env); 276 union { 277 float32 f; 278 uint32_t i; 279 } u; 280 281 u.f = floatx80_to_float32(ST0, &env->fp_status); 282 merge_exception_flags(env, old_flags); 283 return u.i; 284 } 285 286 uint64_t helper_fstl_ST0(CPUX86State *env) 287 { 288 uint8_t old_flags = save_exception_flags(env); 289 union { 290 float64 f; 291 uint64_t i; 292 } u; 293 294 u.f = floatx80_to_float64(ST0, &env->fp_status); 295 merge_exception_flags(env, old_flags); 296 return u.i; 297 } 298 299 int32_t helper_fist_ST0(CPUX86State *env) 300 { 301 uint8_t old_flags = save_exception_flags(env); 302 int32_t val; 303 304 val = floatx80_to_int32(ST0, &env->fp_status); 305 if (val != (int16_t)val) { 306 set_float_exception_flags(float_flag_invalid, &env->fp_status); 307 val = -32768; 308 } 309 merge_exception_flags(env, old_flags); 310 return val; 311 } 312 313 int32_t helper_fistl_ST0(CPUX86State *env) 314 { 315 uint8_t old_flags = save_exception_flags(env); 316 int32_t val; 317 318 val = floatx80_to_int32(ST0, &env->fp_status); 319 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 320 val = 0x80000000; 321 } 322 merge_exception_flags(env, old_flags); 323 return val; 324 } 325 326 int64_t helper_fistll_ST0(CPUX86State *env) 327 { 328 uint8_t old_flags = save_exception_flags(env); 329 int64_t val; 330 331 val = floatx80_to_int64(ST0, &env->fp_status); 332 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 333 val = 0x8000000000000000ULL; 334 } 335 merge_exception_flags(env, old_flags); 336 return val; 337 } 338 339 int32_t helper_fistt_ST0(CPUX86State *env) 340 { 341 uint8_t old_flags = save_exception_flags(env); 342 int32_t val; 343 344 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 345 if (val != (int16_t)val) { 346 set_float_exception_flags(float_flag_invalid, &env->fp_status); 347 val = -32768; 348 } 349 merge_exception_flags(env, old_flags); 350 return val; 351 } 352 353 int32_t helper_fisttl_ST0(CPUX86State *env) 354 { 355 uint8_t old_flags = save_exception_flags(env); 356 int32_t val; 357 358 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 359 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 360 val = 0x80000000; 361 } 362 merge_exception_flags(env, old_flags); 363 return val; 364 } 365 366 int64_t helper_fisttll_ST0(CPUX86State *env) 367 { 368 uint8_t old_flags = save_exception_flags(env); 369 int64_t val; 370 371 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 372 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 373 val = 0x8000000000000000ULL; 374 } 375 merge_exception_flags(env, old_flags); 376 return val; 377 } 378 379 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 380 { 381 int new_fpstt; 382 383 new_fpstt = (env->fpstt - 1) & 7; 384 env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); 385 env->fpstt = new_fpstt; 386 env->fptags[new_fpstt] = 0; /* validate stack entry */ 387 } 388 389 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 390 { 391 do_fstt(env, ST0, ptr, GETPC()); 392 } 393 394 void helper_fpush(CPUX86State *env) 395 { 396 fpush(env); 397 } 398 399 void helper_fpop(CPUX86State *env) 400 { 401 fpop(env); 402 } 403 404 void helper_fdecstp(CPUX86State *env) 405 { 406 env->fpstt = (env->fpstt - 1) & 7; 407 env->fpus &= ~0x4700; 408 } 409 410 void helper_fincstp(CPUX86State *env) 411 { 412 env->fpstt = (env->fpstt + 1) & 7; 413 env->fpus &= ~0x4700; 414 } 415 416 /* FPU move */ 417 418 void helper_ffree_STN(CPUX86State *env, int st_index) 419 { 420 env->fptags[(env->fpstt + st_index) & 7] = 1; 421 } 422 423 void helper_fmov_ST0_FT0(CPUX86State *env) 424 { 425 ST0 = FT0; 426 } 427 428 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 429 { 430 FT0 = ST(st_index); 431 } 432 433 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 434 { 435 ST0 = ST(st_index); 436 } 437 438 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 439 { 440 ST(st_index) = ST0; 441 } 442 443 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 444 { 445 floatx80 tmp; 446 447 tmp = ST(st_index); 448 ST(st_index) = ST0; 449 ST0 = tmp; 450 } 451 452 /* FPU operations */ 453 454 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 455 456 void helper_fcom_ST0_FT0(CPUX86State *env) 457 { 458 uint8_t old_flags = save_exception_flags(env); 459 FloatRelation ret; 460 461 ret = floatx80_compare(ST0, FT0, &env->fp_status); 462 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 463 merge_exception_flags(env, old_flags); 464 } 465 466 void helper_fucom_ST0_FT0(CPUX86State *env) 467 { 468 uint8_t old_flags = save_exception_flags(env); 469 FloatRelation ret; 470 471 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 472 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 473 merge_exception_flags(env, old_flags); 474 } 475 476 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 477 478 void helper_fcomi_ST0_FT0(CPUX86State *env) 479 { 480 uint8_t old_flags = save_exception_flags(env); 481 int eflags; 482 FloatRelation ret; 483 484 ret = floatx80_compare(ST0, FT0, &env->fp_status); 485 eflags = cpu_cc_compute_all(env, CC_OP); 486 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 487 CC_SRC = eflags; 488 merge_exception_flags(env, old_flags); 489 } 490 491 void helper_fucomi_ST0_FT0(CPUX86State *env) 492 { 493 uint8_t old_flags = save_exception_flags(env); 494 int eflags; 495 FloatRelation ret; 496 497 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 498 eflags = cpu_cc_compute_all(env, CC_OP); 499 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; 500 CC_SRC = eflags; 501 merge_exception_flags(env, old_flags); 502 } 503 504 void helper_fadd_ST0_FT0(CPUX86State *env) 505 { 506 uint8_t old_flags = save_exception_flags(env); 507 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 508 merge_exception_flags(env, old_flags); 509 } 510 511 void helper_fmul_ST0_FT0(CPUX86State *env) 512 { 513 uint8_t old_flags = save_exception_flags(env); 514 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 515 merge_exception_flags(env, old_flags); 516 } 517 518 void helper_fsub_ST0_FT0(CPUX86State *env) 519 { 520 uint8_t old_flags = save_exception_flags(env); 521 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 522 merge_exception_flags(env, old_flags); 523 } 524 525 void helper_fsubr_ST0_FT0(CPUX86State *env) 526 { 527 uint8_t old_flags = save_exception_flags(env); 528 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 529 merge_exception_flags(env, old_flags); 530 } 531 532 void helper_fdiv_ST0_FT0(CPUX86State *env) 533 { 534 ST0 = helper_fdiv(env, ST0, FT0); 535 } 536 537 void helper_fdivr_ST0_FT0(CPUX86State *env) 538 { 539 ST0 = helper_fdiv(env, FT0, ST0); 540 } 541 542 /* fp operations between STN and ST0 */ 543 544 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 545 { 546 uint8_t old_flags = save_exception_flags(env); 547 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 548 merge_exception_flags(env, old_flags); 549 } 550 551 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 552 { 553 uint8_t old_flags = save_exception_flags(env); 554 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 555 merge_exception_flags(env, old_flags); 556 } 557 558 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 559 { 560 uint8_t old_flags = save_exception_flags(env); 561 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 562 merge_exception_flags(env, old_flags); 563 } 564 565 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 566 { 567 uint8_t old_flags = save_exception_flags(env); 568 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 569 merge_exception_flags(env, old_flags); 570 } 571 572 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 573 { 574 floatx80 *p; 575 576 p = &ST(st_index); 577 *p = helper_fdiv(env, *p, ST0); 578 } 579 580 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 581 { 582 floatx80 *p; 583 584 p = &ST(st_index); 585 *p = helper_fdiv(env, ST0, *p); 586 } 587 588 /* misc FPU operations */ 589 void helper_fchs_ST0(CPUX86State *env) 590 { 591 ST0 = floatx80_chs(ST0); 592 } 593 594 void helper_fabs_ST0(CPUX86State *env) 595 { 596 ST0 = floatx80_abs(ST0); 597 } 598 599 void helper_fld1_ST0(CPUX86State *env) 600 { 601 ST0 = floatx80_one; 602 } 603 604 void helper_fldl2t_ST0(CPUX86State *env) 605 { 606 switch (env->fpuc & FPU_RC_MASK) { 607 case FPU_RC_UP: 608 ST0 = floatx80_l2t_u; 609 break; 610 default: 611 ST0 = floatx80_l2t; 612 break; 613 } 614 } 615 616 void helper_fldl2e_ST0(CPUX86State *env) 617 { 618 switch (env->fpuc & FPU_RC_MASK) { 619 case FPU_RC_DOWN: 620 case FPU_RC_CHOP: 621 ST0 = floatx80_l2e_d; 622 break; 623 default: 624 ST0 = floatx80_l2e; 625 break; 626 } 627 } 628 629 void helper_fldpi_ST0(CPUX86State *env) 630 { 631 switch (env->fpuc & FPU_RC_MASK) { 632 case FPU_RC_DOWN: 633 case FPU_RC_CHOP: 634 ST0 = floatx80_pi_d; 635 break; 636 default: 637 ST0 = floatx80_pi; 638 break; 639 } 640 } 641 642 void helper_fldlg2_ST0(CPUX86State *env) 643 { 644 switch (env->fpuc & FPU_RC_MASK) { 645 case FPU_RC_DOWN: 646 case FPU_RC_CHOP: 647 ST0 = floatx80_lg2_d; 648 break; 649 default: 650 ST0 = floatx80_lg2; 651 break; 652 } 653 } 654 655 void helper_fldln2_ST0(CPUX86State *env) 656 { 657 switch (env->fpuc & FPU_RC_MASK) { 658 case FPU_RC_DOWN: 659 case FPU_RC_CHOP: 660 ST0 = floatx80_ln2_d; 661 break; 662 default: 663 ST0 = floatx80_ln2; 664 break; 665 } 666 } 667 668 void helper_fldz_ST0(CPUX86State *env) 669 { 670 ST0 = floatx80_zero; 671 } 672 673 void helper_fldz_FT0(CPUX86State *env) 674 { 675 FT0 = floatx80_zero; 676 } 677 678 uint32_t helper_fnstsw(CPUX86State *env) 679 { 680 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 681 } 682 683 uint32_t helper_fnstcw(CPUX86State *env) 684 { 685 return env->fpuc; 686 } 687 688 void update_fp_status(CPUX86State *env) 689 { 690 FloatRoundMode rnd_mode; 691 FloatX80RoundPrec rnd_prec; 692 693 /* set rounding mode */ 694 switch (env->fpuc & FPU_RC_MASK) { 695 default: 696 case FPU_RC_NEAR: 697 rnd_mode = float_round_nearest_even; 698 break; 699 case FPU_RC_DOWN: 700 rnd_mode = float_round_down; 701 break; 702 case FPU_RC_UP: 703 rnd_mode = float_round_up; 704 break; 705 case FPU_RC_CHOP: 706 rnd_mode = float_round_to_zero; 707 break; 708 } 709 set_float_rounding_mode(rnd_mode, &env->fp_status); 710 711 switch ((env->fpuc >> 8) & 3) { 712 case 0: 713 rnd_prec = floatx80_precision_s; 714 break; 715 case 2: 716 rnd_prec = floatx80_precision_d; 717 break; 718 case 3: 719 default: 720 rnd_prec = floatx80_precision_x; 721 break; 722 } 723 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 724 } 725 726 void helper_fldcw(CPUX86State *env, uint32_t val) 727 { 728 cpu_set_fpuc(env, val); 729 } 730 731 void helper_fclex(CPUX86State *env) 732 { 733 env->fpus &= 0x7f00; 734 } 735 736 void helper_fwait(CPUX86State *env) 737 { 738 if (env->fpus & FPUS_SE) { 739 fpu_raise_exception(env, GETPC()); 740 } 741 } 742 743 static void do_fninit(CPUX86State *env) 744 { 745 env->fpus = 0; 746 env->fpstt = 0; 747 env->fpcs = 0; 748 env->fpds = 0; 749 env->fpip = 0; 750 env->fpdp = 0; 751 cpu_set_fpuc(env, 0x37f); 752 env->fptags[0] = 1; 753 env->fptags[1] = 1; 754 env->fptags[2] = 1; 755 env->fptags[3] = 1; 756 env->fptags[4] = 1; 757 env->fptags[5] = 1; 758 env->fptags[6] = 1; 759 env->fptags[7] = 1; 760 } 761 762 void helper_fninit(CPUX86State *env) 763 { 764 do_fninit(env); 765 } 766 767 /* BCD ops */ 768 769 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 770 { 771 floatx80 tmp; 772 uint64_t val; 773 unsigned int v; 774 int i; 775 776 val = 0; 777 for (i = 8; i >= 0; i--) { 778 v = cpu_ldub_data_ra(env, ptr + i, GETPC()); 779 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 780 } 781 tmp = int64_to_floatx80(val, &env->fp_status); 782 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { 783 tmp = floatx80_chs(tmp); 784 } 785 fpush(env); 786 ST0 = tmp; 787 } 788 789 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 790 { 791 uint8_t old_flags = save_exception_flags(env); 792 int v; 793 target_ulong mem_ref, mem_end; 794 int64_t val; 795 CPU_LDoubleU temp; 796 797 temp.d = ST0; 798 799 val = floatx80_to_int64(ST0, &env->fp_status); 800 mem_ref = ptr; 801 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 802 set_float_exception_flags(float_flag_invalid, &env->fp_status); 803 while (mem_ref < ptr + 7) { 804 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 805 } 806 cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); 807 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 808 cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); 809 merge_exception_flags(env, old_flags); 810 return; 811 } 812 mem_end = mem_ref + 9; 813 if (SIGND(temp)) { 814 cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); 815 val = -val; 816 } else { 817 cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); 818 } 819 while (mem_ref < mem_end) { 820 if (val == 0) { 821 break; 822 } 823 v = val % 100; 824 val = val / 100; 825 v = ((v / 10) << 4) | (v % 10); 826 cpu_stb_data_ra(env, mem_ref++, v, GETPC()); 827 } 828 while (mem_ref < mem_end) { 829 cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); 830 } 831 merge_exception_flags(env, old_flags); 832 } 833 834 /* 128-bit significand of log(2). */ 835 #define ln2_sig_high 0xb17217f7d1cf79abULL 836 #define ln2_sig_low 0xc9e3b39803f2f6afULL 837 838 /* 839 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 840 * the interval [-1/64, 1/64]. 841 */ 842 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 843 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 844 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 845 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 846 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 847 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 848 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 849 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 850 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 851 852 struct f2xm1_data { 853 /* 854 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 855 * are very close to exact floatx80 values. 856 */ 857 floatx80 t; 858 /* The value of 2^t. */ 859 floatx80 exp2; 860 /* The value of 2^t - 1. */ 861 floatx80 exp2m1; 862 }; 863 864 static const struct f2xm1_data f2xm1_table[65] = { 865 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 866 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 867 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 868 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 869 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 870 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 871 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 872 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 873 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 874 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 875 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 876 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 877 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 878 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 879 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 880 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 881 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 882 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 883 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 884 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 885 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 886 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 887 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 888 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 889 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 890 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 891 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 892 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 893 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 894 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 895 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 896 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 897 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 898 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 899 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 900 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 901 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 902 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 903 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 904 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 905 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 906 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 907 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 908 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 909 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 910 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 911 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 912 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 913 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 914 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 915 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 916 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 917 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 918 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 919 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 920 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 921 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 922 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 923 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 924 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 925 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 926 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 927 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 928 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 929 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 930 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 931 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 932 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 933 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 934 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 935 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 936 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 937 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 938 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 939 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 940 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 941 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 942 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 943 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 944 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 945 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 946 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 947 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 948 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 949 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 950 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 951 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 952 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 953 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 954 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 955 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 956 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 957 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 958 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 959 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 960 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 961 { floatx80_zero_init, 962 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 963 floatx80_zero_init }, 964 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 965 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 966 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 967 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 968 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 969 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 970 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 971 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 972 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 973 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 974 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 975 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 976 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 977 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 978 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 979 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 980 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 981 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 982 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 983 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 984 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 985 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 986 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 987 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 988 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 989 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 990 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 991 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 992 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 993 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 994 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 995 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 996 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 997 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 998 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 999 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 1000 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1001 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1002 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1003 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1004 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1005 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1006 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1007 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1008 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1009 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1010 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1011 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1012 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1013 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1014 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1015 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1016 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1017 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1018 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1019 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1020 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1021 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1022 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1023 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1024 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1025 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1026 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1027 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1028 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1029 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1030 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1031 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1032 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1033 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1034 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1035 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1036 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1037 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1038 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1039 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1040 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1041 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1042 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1043 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1044 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1045 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1046 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1047 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1048 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1049 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1050 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1051 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1052 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1053 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1054 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1055 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1056 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1057 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1058 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1059 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1060 }; 1061 1062 void helper_f2xm1(CPUX86State *env) 1063 { 1064 uint8_t old_flags = save_exception_flags(env); 1065 uint64_t sig = extractFloatx80Frac(ST0); 1066 int32_t exp = extractFloatx80Exp(ST0); 1067 bool sign = extractFloatx80Sign(ST0); 1068 1069 if (floatx80_invalid_encoding(ST0)) { 1070 float_raise(float_flag_invalid, &env->fp_status); 1071 ST0 = floatx80_default_nan(&env->fp_status); 1072 } else if (floatx80_is_any_nan(ST0)) { 1073 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1074 float_raise(float_flag_invalid, &env->fp_status); 1075 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1076 } 1077 } else if (exp > 0x3fff || 1078 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1079 /* Out of range for the instruction, treat as invalid. */ 1080 float_raise(float_flag_invalid, &env->fp_status); 1081 ST0 = floatx80_default_nan(&env->fp_status); 1082 } else if (exp == 0x3fff) { 1083 /* Argument 1 or -1, exact result 1 or -0.5. */ 1084 if (sign) { 1085 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1086 } 1087 } else if (exp < 0x3fb0) { 1088 if (!floatx80_is_zero(ST0)) { 1089 /* 1090 * Multiplying the argument by an extra-precision version 1091 * of log(2) is sufficiently precise. Zero arguments are 1092 * returned unchanged. 1093 */ 1094 uint64_t sig0, sig1, sig2; 1095 if (exp == 0) { 1096 normalizeFloatx80Subnormal(sig, &exp, &sig); 1097 } 1098 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1099 &sig2); 1100 /* This result is inexact. */ 1101 sig1 |= 1; 1102 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1103 sign, exp, sig0, sig1, 1104 &env->fp_status); 1105 } 1106 } else { 1107 floatx80 tmp, y, accum; 1108 bool asign, bsign; 1109 int32_t n, aexp, bexp; 1110 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1111 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1112 FloatX80RoundPrec save_prec = 1113 env->fp_status.floatx80_rounding_precision; 1114 env->fp_status.float_rounding_mode = float_round_nearest_even; 1115 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1116 1117 /* Find the nearest multiple of 1/32 to the argument. */ 1118 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1119 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1120 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1121 1122 if (floatx80_is_zero(y)) { 1123 /* 1124 * Use the value of 2^t - 1 from the table, to avoid 1125 * needing to special-case zero as a result of 1126 * multiplication below. 1127 */ 1128 ST0 = f2xm1_table[n].t; 1129 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1130 env->fp_status.float_rounding_mode = save_mode; 1131 } else { 1132 /* 1133 * Compute the lower parts of a polynomial expansion for 1134 * (2^y - 1) / y. 1135 */ 1136 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1137 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1138 accum = floatx80_mul(accum, y, &env->fp_status); 1139 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1140 accum = floatx80_mul(accum, y, &env->fp_status); 1141 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1142 accum = floatx80_mul(accum, y, &env->fp_status); 1143 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1144 accum = floatx80_mul(accum, y, &env->fp_status); 1145 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1146 accum = floatx80_mul(accum, y, &env->fp_status); 1147 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1148 accum = floatx80_mul(accum, y, &env->fp_status); 1149 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1150 1151 /* 1152 * The full polynomial expansion is f2xm1_coeff_0 + accum 1153 * (where accum has much lower magnitude, and so, in 1154 * particular, carry out of the addition is not possible). 1155 * (This expansion is only accurate to about 70 bits, not 1156 * 128 bits.) 1157 */ 1158 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1159 asign = extractFloatx80Sign(f2xm1_coeff_0); 1160 shift128RightJamming(extractFloatx80Frac(accum), 0, 1161 aexp - extractFloatx80Exp(accum), 1162 &asig0, &asig1); 1163 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1164 bsig1 = 0; 1165 if (asign == extractFloatx80Sign(accum)) { 1166 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1167 } else { 1168 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1169 } 1170 /* And thus compute an approximation to 2^y - 1. */ 1171 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1172 &asig0, &asig1, &asig2); 1173 aexp += extractFloatx80Exp(y) - 0x3ffe; 1174 asign ^= extractFloatx80Sign(y); 1175 if (n != 32) { 1176 /* 1177 * Multiply this by the precomputed value of 2^t and 1178 * add that of 2^t - 1. 1179 */ 1180 mul128By64To192(asig0, asig1, 1181 extractFloatx80Frac(f2xm1_table[n].exp2), 1182 &asig0, &asig1, &asig2); 1183 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1184 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1185 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1186 bsig1 = 0; 1187 if (bexp < aexp) { 1188 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1189 &bsig0, &bsig1); 1190 } else if (aexp < bexp) { 1191 shift128RightJamming(asig0, asig1, bexp - aexp, 1192 &asig0, &asig1); 1193 aexp = bexp; 1194 } 1195 /* The sign of 2^t - 1 is always that of the result. */ 1196 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1197 if (asign == bsign) { 1198 /* Avoid possible carry out of the addition. */ 1199 shift128RightJamming(asig0, asig1, 1, 1200 &asig0, &asig1); 1201 shift128RightJamming(bsig0, bsig1, 1, 1202 &bsig0, &bsig1); 1203 ++aexp; 1204 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1205 } else { 1206 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1207 asign = bsign; 1208 } 1209 } 1210 env->fp_status.float_rounding_mode = save_mode; 1211 /* This result is inexact. */ 1212 asig1 |= 1; 1213 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1214 asign, aexp, asig0, asig1, 1215 &env->fp_status); 1216 } 1217 1218 env->fp_status.floatx80_rounding_precision = save_prec; 1219 } 1220 merge_exception_flags(env, old_flags); 1221 } 1222 1223 void helper_fptan(CPUX86State *env) 1224 { 1225 double fptemp = floatx80_to_double(env, ST0); 1226 1227 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1228 env->fpus |= 0x400; 1229 } else { 1230 fptemp = tan(fptemp); 1231 ST0 = double_to_floatx80(env, fptemp); 1232 fpush(env); 1233 ST0 = floatx80_one; 1234 env->fpus &= ~0x400; /* C2 <-- 0 */ 1235 /* the above code is for |arg| < 2**52 only */ 1236 } 1237 } 1238 1239 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1240 #define pi_4_exp 0x3ffe 1241 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1242 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1243 #define pi_2_exp 0x3fff 1244 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1245 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1246 #define pi_34_exp 0x4000 1247 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1248 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1249 #define pi_exp 0x4000 1250 #define pi_sig_high 0xc90fdaa22168c234ULL 1251 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1252 1253 /* 1254 * Polynomial coefficients for an approximation to atan(x), with only 1255 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1256 * for some other approximations, no low part is needed for the first 1257 * coefficient here to achieve a sufficiently accurate result, because 1258 * the coefficient in this minimax approximation is very close to 1259 * exactly 1.) 1260 */ 1261 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1262 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1263 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1264 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1265 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1266 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1267 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1268 1269 struct fpatan_data { 1270 /* High and low parts of atan(x). */ 1271 floatx80 atan_high, atan_low; 1272 }; 1273 1274 static const struct fpatan_data fpatan_table[9] = { 1275 { floatx80_zero_init, 1276 floatx80_zero_init }, 1277 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1278 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1279 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1280 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1281 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1282 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1283 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1284 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1285 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1286 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1287 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1288 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1289 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1290 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1291 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1292 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1293 }; 1294 1295 void helper_fpatan(CPUX86State *env) 1296 { 1297 uint8_t old_flags = save_exception_flags(env); 1298 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1299 int32_t arg0_exp = extractFloatx80Exp(ST0); 1300 bool arg0_sign = extractFloatx80Sign(ST0); 1301 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1302 int32_t arg1_exp = extractFloatx80Exp(ST1); 1303 bool arg1_sign = extractFloatx80Sign(ST1); 1304 1305 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1306 float_raise(float_flag_invalid, &env->fp_status); 1307 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1308 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1309 float_raise(float_flag_invalid, &env->fp_status); 1310 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1311 } else if (floatx80_invalid_encoding(ST0) || 1312 floatx80_invalid_encoding(ST1)) { 1313 float_raise(float_flag_invalid, &env->fp_status); 1314 ST1 = floatx80_default_nan(&env->fp_status); 1315 } else if (floatx80_is_any_nan(ST0)) { 1316 ST1 = ST0; 1317 } else if (floatx80_is_any_nan(ST1)) { 1318 /* Pass this NaN through. */ 1319 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1320 /* Pass this zero through. */ 1321 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1322 arg0_exp - arg1_exp >= 80) && 1323 !arg0_sign) { 1324 /* 1325 * Dividing ST1 by ST0 gives the correct result up to 1326 * rounding, and avoids spurious underflow exceptions that 1327 * might result from passing some small values through the 1328 * polynomial approximation, but if a finite nonzero result of 1329 * division is exact, the result of fpatan is still inexact 1330 * (and underflowing where appropriate). 1331 */ 1332 FloatX80RoundPrec save_prec = 1333 env->fp_status.floatx80_rounding_precision; 1334 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1335 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1336 env->fp_status.floatx80_rounding_precision = save_prec; 1337 if (!floatx80_is_zero(ST1) && 1338 !(get_float_exception_flags(&env->fp_status) & 1339 float_flag_inexact)) { 1340 /* 1341 * The mathematical result is very slightly closer to zero 1342 * than this exact result. Round a value with the 1343 * significand adjusted accordingly to get the correct 1344 * exceptions, and possibly an adjusted result depending 1345 * on the rounding mode. 1346 */ 1347 uint64_t sig = extractFloatx80Frac(ST1); 1348 int32_t exp = extractFloatx80Exp(ST1); 1349 bool sign = extractFloatx80Sign(ST1); 1350 if (exp == 0) { 1351 normalizeFloatx80Subnormal(sig, &exp, &sig); 1352 } 1353 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1354 sign, exp, sig - 1, 1355 -1, &env->fp_status); 1356 } 1357 } else { 1358 /* The result is inexact. */ 1359 bool rsign = arg1_sign; 1360 int32_t rexp; 1361 uint64_t rsig0, rsig1; 1362 if (floatx80_is_zero(ST1)) { 1363 /* 1364 * ST0 is negative. The result is pi with the sign of 1365 * ST1. 1366 */ 1367 rexp = pi_exp; 1368 rsig0 = pi_sig_high; 1369 rsig1 = pi_sig_low; 1370 } else if (floatx80_is_infinity(ST1)) { 1371 if (floatx80_is_infinity(ST0)) { 1372 if (arg0_sign) { 1373 rexp = pi_34_exp; 1374 rsig0 = pi_34_sig_high; 1375 rsig1 = pi_34_sig_low; 1376 } else { 1377 rexp = pi_4_exp; 1378 rsig0 = pi_4_sig_high; 1379 rsig1 = pi_4_sig_low; 1380 } 1381 } else { 1382 rexp = pi_2_exp; 1383 rsig0 = pi_2_sig_high; 1384 rsig1 = pi_2_sig_low; 1385 } 1386 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1387 rexp = pi_2_exp; 1388 rsig0 = pi_2_sig_high; 1389 rsig1 = pi_2_sig_low; 1390 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1391 /* ST0 is negative. */ 1392 rexp = pi_exp; 1393 rsig0 = pi_sig_high; 1394 rsig1 = pi_sig_low; 1395 } else { 1396 /* 1397 * ST0 and ST1 are finite, nonzero and with exponents not 1398 * too far apart. 1399 */ 1400 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1401 int32_t azexp, axexp; 1402 bool adj_sub, ysign, zsign; 1403 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1404 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1405 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1406 uint64_t azsig0, azsig1; 1407 uint64_t azsig2, azsig3, axsig0, axsig1; 1408 floatx80 x8; 1409 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1410 FloatX80RoundPrec save_prec = 1411 env->fp_status.floatx80_rounding_precision; 1412 env->fp_status.float_rounding_mode = float_round_nearest_even; 1413 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1414 1415 if (arg0_exp == 0) { 1416 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1417 } 1418 if (arg1_exp == 0) { 1419 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1420 } 1421 if (arg0_exp > arg1_exp || 1422 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1423 /* Work with abs(ST1) / abs(ST0). */ 1424 num_exp = arg1_exp; 1425 num_sig = arg1_sig; 1426 den_exp = arg0_exp; 1427 den_sig = arg0_sig; 1428 if (arg0_sign) { 1429 /* The result is subtracted from pi. */ 1430 adj_exp = pi_exp; 1431 adj_sig0 = pi_sig_high; 1432 adj_sig1 = pi_sig_low; 1433 adj_sub = true; 1434 } else { 1435 /* The result is used as-is. */ 1436 adj_exp = 0; 1437 adj_sig0 = 0; 1438 adj_sig1 = 0; 1439 adj_sub = false; 1440 } 1441 } else { 1442 /* Work with abs(ST0) / abs(ST1). */ 1443 num_exp = arg0_exp; 1444 num_sig = arg0_sig; 1445 den_exp = arg1_exp; 1446 den_sig = arg1_sig; 1447 /* The result is added to or subtracted from pi/2. */ 1448 adj_exp = pi_2_exp; 1449 adj_sig0 = pi_2_sig_high; 1450 adj_sig1 = pi_2_sig_low; 1451 adj_sub = !arg0_sign; 1452 } 1453 1454 /* 1455 * Compute x = num/den, where 0 < x <= 1 and x is not too 1456 * small. 1457 */ 1458 xexp = num_exp - den_exp + 0x3ffe; 1459 remsig0 = num_sig; 1460 remsig1 = 0; 1461 if (den_sig <= remsig0) { 1462 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1463 ++xexp; 1464 } 1465 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1466 mul64To128(den_sig, xsig0, &msig0, &msig1); 1467 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1468 while ((int64_t) remsig0 < 0) { 1469 --xsig0; 1470 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1471 } 1472 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1473 /* 1474 * No need to correct any estimation error in xsig1; even 1475 * with such error, it is accurate enough. 1476 */ 1477 1478 /* 1479 * Split x as x = t + y, where t = n/8 is the nearest 1480 * multiple of 1/8 to x. 1481 */ 1482 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1483 false, xexp + 3, xsig0, 1484 xsig1, &env->fp_status); 1485 n = floatx80_to_int32(x8, &env->fp_status); 1486 if (n == 0) { 1487 ysign = false; 1488 yexp = xexp; 1489 ysig0 = xsig0; 1490 ysig1 = xsig1; 1491 texp = 0; 1492 tsig = 0; 1493 } else { 1494 int shift = clz32(n) + 32; 1495 texp = 0x403b - shift; 1496 tsig = n; 1497 tsig <<= shift; 1498 if (texp == xexp) { 1499 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1500 if ((int64_t) ysig0 >= 0) { 1501 ysign = false; 1502 if (ysig0 == 0) { 1503 if (ysig1 == 0) { 1504 yexp = 0; 1505 } else { 1506 shift = clz64(ysig1) + 64; 1507 yexp = xexp - shift; 1508 shift128Left(ysig0, ysig1, shift, 1509 &ysig0, &ysig1); 1510 } 1511 } else { 1512 shift = clz64(ysig0); 1513 yexp = xexp - shift; 1514 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1515 } 1516 } else { 1517 ysign = true; 1518 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1519 if (ysig0 == 0) { 1520 shift = clz64(ysig1) + 64; 1521 } else { 1522 shift = clz64(ysig0); 1523 } 1524 yexp = xexp - shift; 1525 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1526 } 1527 } else { 1528 /* 1529 * t's exponent must be greater than x's because t 1530 * is positive and the nearest multiple of 1/8 to 1531 * x, and if x has a greater exponent, the power 1532 * of 2 with that exponent is also a multiple of 1533 * 1/8. 1534 */ 1535 uint64_t usig0, usig1; 1536 shift128RightJamming(xsig0, xsig1, texp - xexp, 1537 &usig0, &usig1); 1538 ysign = true; 1539 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1540 if (ysig0 == 0) { 1541 shift = clz64(ysig1) + 64; 1542 } else { 1543 shift = clz64(ysig0); 1544 } 1545 yexp = texp - shift; 1546 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1547 } 1548 } 1549 1550 /* 1551 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1552 * arctan(z). 1553 */ 1554 zsign = ysign; 1555 if (texp == 0 || yexp == 0) { 1556 zexp = yexp; 1557 zsig0 = ysig0; 1558 zsig1 = ysig1; 1559 } else { 1560 /* 1561 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1562 */ 1563 int32_t dexp = texp + xexp - 0x3ffe; 1564 uint64_t dsig0, dsig1, dsig2; 1565 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1566 /* 1567 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1568 * bit). Add 1 to produce the denominator 1+tx. 1569 */ 1570 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1571 &dsig0, &dsig1); 1572 dsig0 |= 0x8000000000000000ULL; 1573 zexp = yexp - 1; 1574 remsig0 = ysig0; 1575 remsig1 = ysig1; 1576 remsig2 = 0; 1577 if (dsig0 <= remsig0) { 1578 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1579 ++zexp; 1580 } 1581 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1582 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1583 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1584 &remsig0, &remsig1, &remsig2); 1585 while ((int64_t) remsig0 < 0) { 1586 --zsig0; 1587 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1588 &remsig0, &remsig1, &remsig2); 1589 } 1590 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1591 /* No need to correct any estimation error in zsig1. */ 1592 } 1593 1594 if (zexp == 0) { 1595 azexp = 0; 1596 azsig0 = 0; 1597 azsig1 = 0; 1598 } else { 1599 floatx80 z2, accum; 1600 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1601 /* Compute z^2. */ 1602 mul128To256(zsig0, zsig1, zsig0, zsig1, 1603 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1604 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1605 zexp + zexp - 0x3ffe, 1606 z2sig0, z2sig1, 1607 &env->fp_status); 1608 1609 /* Compute the lower parts of the polynomial expansion. */ 1610 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1611 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1612 accum = floatx80_mul(accum, z2, &env->fp_status); 1613 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1614 accum = floatx80_mul(accum, z2, &env->fp_status); 1615 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1616 accum = floatx80_mul(accum, z2, &env->fp_status); 1617 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1618 accum = floatx80_mul(accum, z2, &env->fp_status); 1619 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1620 accum = floatx80_mul(accum, z2, &env->fp_status); 1621 1622 /* 1623 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1624 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1625 */ 1626 aexp = extractFloatx80Exp(fpatan_coeff_0); 1627 shift128RightJamming(extractFloatx80Frac(accum), 0, 1628 aexp - extractFloatx80Exp(accum), 1629 &asig0, &asig1); 1630 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1631 &asig0, &asig1); 1632 /* Multiply by z to compute arctan(z). */ 1633 azexp = aexp + zexp - 0x3ffe; 1634 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1635 &azsig2, &azsig3); 1636 } 1637 1638 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1639 if (texp == 0) { 1640 /* z is positive. */ 1641 axexp = azexp; 1642 axsig0 = azsig0; 1643 axsig1 = azsig1; 1644 } else { 1645 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1646 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1647 uint64_t low_sig0 = 1648 extractFloatx80Frac(fpatan_table[n].atan_low); 1649 uint64_t low_sig1 = 0; 1650 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1651 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1652 axsig1 = 0; 1653 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1654 &low_sig0, &low_sig1); 1655 if (low_sign) { 1656 sub128(axsig0, axsig1, low_sig0, low_sig1, 1657 &axsig0, &axsig1); 1658 } else { 1659 add128(axsig0, axsig1, low_sig0, low_sig1, 1660 &axsig0, &axsig1); 1661 } 1662 if (azexp >= axexp) { 1663 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1664 &axsig0, &axsig1); 1665 axexp = azexp + 1; 1666 shift128RightJamming(azsig0, azsig1, 1, 1667 &azsig0, &azsig1); 1668 } else { 1669 shift128RightJamming(axsig0, axsig1, 1, 1670 &axsig0, &axsig1); 1671 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1672 &azsig0, &azsig1); 1673 ++axexp; 1674 } 1675 if (zsign) { 1676 sub128(axsig0, axsig1, azsig0, azsig1, 1677 &axsig0, &axsig1); 1678 } else { 1679 add128(axsig0, axsig1, azsig0, azsig1, 1680 &axsig0, &axsig1); 1681 } 1682 } 1683 1684 if (adj_exp == 0) { 1685 rexp = axexp; 1686 rsig0 = axsig0; 1687 rsig1 = axsig1; 1688 } else { 1689 /* 1690 * Add or subtract arctan(x) (exponent axexp, 1691 * significand axsig0 and axsig1, positive, not 1692 * necessarily normalized) to the number given by 1693 * adj_exp, adj_sig0 and adj_sig1, according to 1694 * adj_sub. 1695 */ 1696 if (adj_exp >= axexp) { 1697 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1698 &axsig0, &axsig1); 1699 rexp = adj_exp + 1; 1700 shift128RightJamming(adj_sig0, adj_sig1, 1, 1701 &adj_sig0, &adj_sig1); 1702 } else { 1703 shift128RightJamming(axsig0, axsig1, 1, 1704 &axsig0, &axsig1); 1705 shift128RightJamming(adj_sig0, adj_sig1, 1706 axexp - adj_exp + 1, 1707 &adj_sig0, &adj_sig1); 1708 rexp = axexp + 1; 1709 } 1710 if (adj_sub) { 1711 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1712 &rsig0, &rsig1); 1713 } else { 1714 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1715 &rsig0, &rsig1); 1716 } 1717 } 1718 1719 env->fp_status.float_rounding_mode = save_mode; 1720 env->fp_status.floatx80_rounding_precision = save_prec; 1721 } 1722 /* This result is inexact. */ 1723 rsig1 |= 1; 1724 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1725 rsig0, rsig1, &env->fp_status); 1726 } 1727 1728 fpop(env); 1729 merge_exception_flags(env, old_flags); 1730 } 1731 1732 void helper_fxtract(CPUX86State *env) 1733 { 1734 uint8_t old_flags = save_exception_flags(env); 1735 CPU_LDoubleU temp; 1736 1737 temp.d = ST0; 1738 1739 if (floatx80_is_zero(ST0)) { 1740 /* Easy way to generate -inf and raising division by 0 exception */ 1741 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1742 &env->fp_status); 1743 fpush(env); 1744 ST0 = temp.d; 1745 } else if (floatx80_invalid_encoding(ST0)) { 1746 float_raise(float_flag_invalid, &env->fp_status); 1747 ST0 = floatx80_default_nan(&env->fp_status); 1748 fpush(env); 1749 ST0 = ST1; 1750 } else if (floatx80_is_any_nan(ST0)) { 1751 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1752 float_raise(float_flag_invalid, &env->fp_status); 1753 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1754 } 1755 fpush(env); 1756 ST0 = ST1; 1757 } else if (floatx80_is_infinity(ST0)) { 1758 fpush(env); 1759 ST0 = ST1; 1760 ST1 = floatx80_infinity; 1761 } else { 1762 int expdif; 1763 1764 if (EXPD(temp) == 0) { 1765 int shift = clz64(temp.l.lower); 1766 temp.l.lower <<= shift; 1767 expdif = 1 - EXPBIAS - shift; 1768 float_raise(float_flag_input_denormal, &env->fp_status); 1769 } else { 1770 expdif = EXPD(temp) - EXPBIAS; 1771 } 1772 /* DP exponent bias */ 1773 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1774 fpush(env); 1775 BIASEXPONENT(temp); 1776 ST0 = temp.d; 1777 } 1778 merge_exception_flags(env, old_flags); 1779 } 1780 1781 static void helper_fprem_common(CPUX86State *env, bool mod) 1782 { 1783 uint8_t old_flags = save_exception_flags(env); 1784 uint64_t quotient; 1785 CPU_LDoubleU temp0, temp1; 1786 int exp0, exp1, expdiff; 1787 1788 temp0.d = ST0; 1789 temp1.d = ST1; 1790 exp0 = EXPD(temp0); 1791 exp1 = EXPD(temp1); 1792 1793 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1794 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1795 exp0 == 0x7fff || exp1 == 0x7fff || 1796 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1797 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1798 } else { 1799 if (exp0 == 0) { 1800 exp0 = 1 - clz64(temp0.l.lower); 1801 } 1802 if (exp1 == 0) { 1803 exp1 = 1 - clz64(temp1.l.lower); 1804 } 1805 expdiff = exp0 - exp1; 1806 if (expdiff < 64) { 1807 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1808 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1809 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1810 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1811 } else { 1812 /* 1813 * Partial remainder. This choice of how many bits to 1814 * process at once is specified in AMD instruction set 1815 * manuals, and empirically is followed by Intel 1816 * processors as well; it ensures that the final remainder 1817 * operation in a loop does produce the correct low three 1818 * bits of the quotient. AMD manuals specify that the 1819 * flags other than C2 are cleared, and empirically Intel 1820 * processors clear them as well. 1821 */ 1822 int n = 32 + (expdiff % 32); 1823 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1824 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1825 env->fpus |= 0x400; /* C2 <-- 1 */ 1826 } 1827 } 1828 merge_exception_flags(env, old_flags); 1829 } 1830 1831 void helper_fprem1(CPUX86State *env) 1832 { 1833 helper_fprem_common(env, false); 1834 } 1835 1836 void helper_fprem(CPUX86State *env) 1837 { 1838 helper_fprem_common(env, true); 1839 } 1840 1841 /* 128-bit significand of log2(e). */ 1842 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1843 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1844 1845 /* 1846 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1847 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1848 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1849 * interval [sqrt(2)/2, sqrt(2)]. 1850 */ 1851 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1852 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1853 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1854 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1855 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1856 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1857 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1858 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1859 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1860 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1861 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1862 1863 /* 1864 * Compute an approximation of log2(1+arg), where 1+arg is in the 1865 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1866 * function is called, rounding precision is set to 80 and the 1867 * round-to-nearest mode is in effect. arg must not be exactly zero, 1868 * and must not be so close to zero that underflow might occur. 1869 */ 1870 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1871 uint64_t *sig0, uint64_t *sig1) 1872 { 1873 uint64_t arg0_sig = extractFloatx80Frac(arg); 1874 int32_t arg0_exp = extractFloatx80Exp(arg); 1875 bool arg0_sign = extractFloatx80Sign(arg); 1876 bool asign; 1877 int32_t dexp, texp, aexp; 1878 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1879 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1880 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1881 floatx80 t2, accum; 1882 1883 /* 1884 * Compute an approximation of arg/(2+arg), with extra precision, 1885 * as the argument to a polynomial approximation. The extra 1886 * precision is only needed for the first term of the 1887 * approximation, with subsequent terms being significantly 1888 * smaller; the approximation only uses odd exponents, and the 1889 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1890 */ 1891 if (arg0_sign) { 1892 dexp = 0x3fff; 1893 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1894 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1895 } else { 1896 dexp = 0x4000; 1897 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1898 dsig0 |= 0x8000000000000000ULL; 1899 } 1900 texp = arg0_exp - dexp + 0x3ffe; 1901 rsig0 = arg0_sig; 1902 rsig1 = 0; 1903 rsig2 = 0; 1904 if (dsig0 <= rsig0) { 1905 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1906 ++texp; 1907 } 1908 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1909 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1910 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1911 &rsig0, &rsig1, &rsig2); 1912 while ((int64_t) rsig0 < 0) { 1913 --tsig0; 1914 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1915 &rsig0, &rsig1, &rsig2); 1916 } 1917 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1918 /* 1919 * No need to correct any estimation error in tsig1; even with 1920 * such error, it is accurate enough. Now compute the square of 1921 * that approximation. 1922 */ 1923 mul128To256(tsig0, tsig1, tsig0, tsig1, 1924 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1925 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1926 texp + texp - 0x3ffe, 1927 t2sig0, t2sig1, &env->fp_status); 1928 1929 /* Compute the lower parts of the polynomial expansion. */ 1930 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1931 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1932 accum = floatx80_mul(accum, t2, &env->fp_status); 1933 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1934 accum = floatx80_mul(accum, t2, &env->fp_status); 1935 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1936 accum = floatx80_mul(accum, t2, &env->fp_status); 1937 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1938 accum = floatx80_mul(accum, t2, &env->fp_status); 1939 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1940 accum = floatx80_mul(accum, t2, &env->fp_status); 1941 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1942 accum = floatx80_mul(accum, t2, &env->fp_status); 1943 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1944 accum = floatx80_mul(accum, t2, &env->fp_status); 1945 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1946 accum = floatx80_mul(accum, t2, &env->fp_status); 1947 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1948 1949 /* 1950 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1951 * accum has much lower magnitude, and so, in particular, carry 1952 * out of the addition is not possible), multiplied by t. (This 1953 * expansion is only accurate to about 70 bits, not 128 bits.) 1954 */ 1955 aexp = extractFloatx80Exp(fyl2x_coeff_0); 1956 asign = extractFloatx80Sign(fyl2x_coeff_0); 1957 shift128RightJamming(extractFloatx80Frac(accum), 0, 1958 aexp - extractFloatx80Exp(accum), 1959 &asig0, &asig1); 1960 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1961 bsig1 = 0; 1962 if (asign == extractFloatx80Sign(accum)) { 1963 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1964 } else { 1965 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1966 } 1967 /* Multiply by t to compute the required result. */ 1968 mul128To256(asig0, asig1, tsig0, tsig1, 1969 &asig0, &asig1, &asig2, &asig3); 1970 aexp += texp - 0x3ffe; 1971 *exp = aexp; 1972 *sig0 = asig0; 1973 *sig1 = asig1; 1974 } 1975 1976 void helper_fyl2xp1(CPUX86State *env) 1977 { 1978 uint8_t old_flags = save_exception_flags(env); 1979 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1980 int32_t arg0_exp = extractFloatx80Exp(ST0); 1981 bool arg0_sign = extractFloatx80Sign(ST0); 1982 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1983 int32_t arg1_exp = extractFloatx80Exp(ST1); 1984 bool arg1_sign = extractFloatx80Sign(ST1); 1985 1986 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1987 float_raise(float_flag_invalid, &env->fp_status); 1988 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1989 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1990 float_raise(float_flag_invalid, &env->fp_status); 1991 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1992 } else if (floatx80_invalid_encoding(ST0) || 1993 floatx80_invalid_encoding(ST1)) { 1994 float_raise(float_flag_invalid, &env->fp_status); 1995 ST1 = floatx80_default_nan(&env->fp_status); 1996 } else if (floatx80_is_any_nan(ST0)) { 1997 ST1 = ST0; 1998 } else if (floatx80_is_any_nan(ST1)) { 1999 /* Pass this NaN through. */ 2000 } else if (arg0_exp > 0x3ffd || 2001 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2002 0x95f619980c4336f7ULL : 2003 0xd413cccfe7799211ULL))) { 2004 /* 2005 * Out of range for the instruction (ST0 must have absolute 2006 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2007 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2008 * to sqrt(2) - 1, which we allow here), treat as invalid. 2009 */ 2010 float_raise(float_flag_invalid, &env->fp_status); 2011 ST1 = floatx80_default_nan(&env->fp_status); 2012 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2013 arg1_exp == 0x7fff) { 2014 /* 2015 * One argument is zero, or multiplying by infinity; correct 2016 * result is exact and can be obtained by multiplying the 2017 * arguments. 2018 */ 2019 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2020 } else if (arg0_exp < 0x3fb0) { 2021 /* 2022 * Multiplying both arguments and an extra-precision version 2023 * of log2(e) is sufficiently precise. 2024 */ 2025 uint64_t sig0, sig1, sig2; 2026 int32_t exp; 2027 if (arg0_exp == 0) { 2028 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2029 } 2030 if (arg1_exp == 0) { 2031 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2032 } 2033 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2034 &sig0, &sig1, &sig2); 2035 exp = arg0_exp + 1; 2036 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2037 exp += arg1_exp - 0x3ffe; 2038 /* This result is inexact. */ 2039 sig1 |= 1; 2040 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2041 arg0_sign ^ arg1_sign, exp, 2042 sig0, sig1, &env->fp_status); 2043 } else { 2044 int32_t aexp; 2045 uint64_t asig0, asig1, asig2; 2046 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2047 FloatX80RoundPrec save_prec = 2048 env->fp_status.floatx80_rounding_precision; 2049 env->fp_status.float_rounding_mode = float_round_nearest_even; 2050 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2051 2052 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2053 /* 2054 * Multiply by the second argument to compute the required 2055 * result. 2056 */ 2057 if (arg1_exp == 0) { 2058 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2059 } 2060 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2061 aexp += arg1_exp - 0x3ffe; 2062 /* This result is inexact. */ 2063 asig1 |= 1; 2064 env->fp_status.float_rounding_mode = save_mode; 2065 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2066 arg0_sign ^ arg1_sign, aexp, 2067 asig0, asig1, &env->fp_status); 2068 env->fp_status.floatx80_rounding_precision = save_prec; 2069 } 2070 fpop(env); 2071 merge_exception_flags(env, old_flags); 2072 } 2073 2074 void helper_fyl2x(CPUX86State *env) 2075 { 2076 uint8_t old_flags = save_exception_flags(env); 2077 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2078 int32_t arg0_exp = extractFloatx80Exp(ST0); 2079 bool arg0_sign = extractFloatx80Sign(ST0); 2080 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2081 int32_t arg1_exp = extractFloatx80Exp(ST1); 2082 bool arg1_sign = extractFloatx80Sign(ST1); 2083 2084 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2085 float_raise(float_flag_invalid, &env->fp_status); 2086 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2087 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2088 float_raise(float_flag_invalid, &env->fp_status); 2089 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2090 } else if (floatx80_invalid_encoding(ST0) || 2091 floatx80_invalid_encoding(ST1)) { 2092 float_raise(float_flag_invalid, &env->fp_status); 2093 ST1 = floatx80_default_nan(&env->fp_status); 2094 } else if (floatx80_is_any_nan(ST0)) { 2095 ST1 = ST0; 2096 } else if (floatx80_is_any_nan(ST1)) { 2097 /* Pass this NaN through. */ 2098 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2099 float_raise(float_flag_invalid, &env->fp_status); 2100 ST1 = floatx80_default_nan(&env->fp_status); 2101 } else if (floatx80_is_infinity(ST1)) { 2102 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2103 &env->fp_status); 2104 switch (cmp) { 2105 case float_relation_less: 2106 ST1 = floatx80_chs(ST1); 2107 break; 2108 case float_relation_greater: 2109 /* Result is infinity of the same sign as ST1. */ 2110 break; 2111 default: 2112 float_raise(float_flag_invalid, &env->fp_status); 2113 ST1 = floatx80_default_nan(&env->fp_status); 2114 break; 2115 } 2116 } else if (floatx80_is_infinity(ST0)) { 2117 if (floatx80_is_zero(ST1)) { 2118 float_raise(float_flag_invalid, &env->fp_status); 2119 ST1 = floatx80_default_nan(&env->fp_status); 2120 } else if (arg1_sign) { 2121 ST1 = floatx80_chs(ST0); 2122 } else { 2123 ST1 = ST0; 2124 } 2125 } else if (floatx80_is_zero(ST0)) { 2126 if (floatx80_is_zero(ST1)) { 2127 float_raise(float_flag_invalid, &env->fp_status); 2128 ST1 = floatx80_default_nan(&env->fp_status); 2129 } else { 2130 /* Result is infinity with opposite sign to ST1. */ 2131 float_raise(float_flag_divbyzero, &env->fp_status); 2132 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2133 0x8000000000000000ULL); 2134 } 2135 } else if (floatx80_is_zero(ST1)) { 2136 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2137 ST1 = floatx80_chs(ST1); 2138 } 2139 /* Otherwise, ST1 is already the correct result. */ 2140 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2141 if (arg1_sign) { 2142 ST1 = floatx80_chs(floatx80_zero); 2143 } else { 2144 ST1 = floatx80_zero; 2145 } 2146 } else { 2147 int32_t int_exp; 2148 floatx80 arg0_m1; 2149 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2150 FloatX80RoundPrec save_prec = 2151 env->fp_status.floatx80_rounding_precision; 2152 env->fp_status.float_rounding_mode = float_round_nearest_even; 2153 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2154 2155 if (arg0_exp == 0) { 2156 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2157 } 2158 if (arg1_exp == 0) { 2159 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2160 } 2161 int_exp = arg0_exp - 0x3fff; 2162 if (arg0_sig > 0xb504f333f9de6484ULL) { 2163 ++int_exp; 2164 } 2165 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2166 &env->fp_status), 2167 floatx80_one, &env->fp_status); 2168 if (floatx80_is_zero(arg0_m1)) { 2169 /* Exact power of 2; multiply by ST1. */ 2170 env->fp_status.float_rounding_mode = save_mode; 2171 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2172 ST1, &env->fp_status); 2173 } else { 2174 bool asign = extractFloatx80Sign(arg0_m1); 2175 int32_t aexp; 2176 uint64_t asig0, asig1, asig2; 2177 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2178 if (int_exp != 0) { 2179 bool isign = (int_exp < 0); 2180 int32_t iexp; 2181 uint64_t isig; 2182 int shift; 2183 int_exp = isign ? -int_exp : int_exp; 2184 shift = clz32(int_exp) + 32; 2185 isig = int_exp; 2186 isig <<= shift; 2187 iexp = 0x403e - shift; 2188 shift128RightJamming(asig0, asig1, iexp - aexp, 2189 &asig0, &asig1); 2190 if (asign == isign) { 2191 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2192 } else { 2193 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2194 } 2195 aexp = iexp; 2196 asign = isign; 2197 } 2198 /* 2199 * Multiply by the second argument to compute the required 2200 * result. 2201 */ 2202 if (arg1_exp == 0) { 2203 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2204 } 2205 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2206 aexp += arg1_exp - 0x3ffe; 2207 /* This result is inexact. */ 2208 asig1 |= 1; 2209 env->fp_status.float_rounding_mode = save_mode; 2210 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2211 asign ^ arg1_sign, aexp, 2212 asig0, asig1, &env->fp_status); 2213 } 2214 2215 env->fp_status.floatx80_rounding_precision = save_prec; 2216 } 2217 fpop(env); 2218 merge_exception_flags(env, old_flags); 2219 } 2220 2221 void helper_fsqrt(CPUX86State *env) 2222 { 2223 uint8_t old_flags = save_exception_flags(env); 2224 if (floatx80_is_neg(ST0)) { 2225 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2226 env->fpus |= 0x400; 2227 } 2228 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2229 merge_exception_flags(env, old_flags); 2230 } 2231 2232 void helper_fsincos(CPUX86State *env) 2233 { 2234 double fptemp = floatx80_to_double(env, ST0); 2235 2236 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2237 env->fpus |= 0x400; 2238 } else { 2239 ST0 = double_to_floatx80(env, sin(fptemp)); 2240 fpush(env); 2241 ST0 = double_to_floatx80(env, cos(fptemp)); 2242 env->fpus &= ~0x400; /* C2 <-- 0 */ 2243 /* the above code is for |arg| < 2**63 only */ 2244 } 2245 } 2246 2247 void helper_frndint(CPUX86State *env) 2248 { 2249 uint8_t old_flags = save_exception_flags(env); 2250 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2251 merge_exception_flags(env, old_flags); 2252 } 2253 2254 void helper_fscale(CPUX86State *env) 2255 { 2256 uint8_t old_flags = save_exception_flags(env); 2257 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2258 float_raise(float_flag_invalid, &env->fp_status); 2259 ST0 = floatx80_default_nan(&env->fp_status); 2260 } else if (floatx80_is_any_nan(ST1)) { 2261 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2262 float_raise(float_flag_invalid, &env->fp_status); 2263 } 2264 ST0 = ST1; 2265 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2266 float_raise(float_flag_invalid, &env->fp_status); 2267 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2268 } 2269 } else if (floatx80_is_infinity(ST1) && 2270 !floatx80_invalid_encoding(ST0) && 2271 !floatx80_is_any_nan(ST0)) { 2272 if (floatx80_is_neg(ST1)) { 2273 if (floatx80_is_infinity(ST0)) { 2274 float_raise(float_flag_invalid, &env->fp_status); 2275 ST0 = floatx80_default_nan(&env->fp_status); 2276 } else { 2277 ST0 = (floatx80_is_neg(ST0) ? 2278 floatx80_chs(floatx80_zero) : 2279 floatx80_zero); 2280 } 2281 } else { 2282 if (floatx80_is_zero(ST0)) { 2283 float_raise(float_flag_invalid, &env->fp_status); 2284 ST0 = floatx80_default_nan(&env->fp_status); 2285 } else { 2286 ST0 = (floatx80_is_neg(ST0) ? 2287 floatx80_chs(floatx80_infinity) : 2288 floatx80_infinity); 2289 } 2290 } 2291 } else { 2292 int n; 2293 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2294 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2295 set_float_exception_flags(0, &env->fp_status); 2296 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2297 set_float_exception_flags(save_flags, &env->fp_status); 2298 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2299 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2300 env->fp_status.floatx80_rounding_precision = save; 2301 } 2302 merge_exception_flags(env, old_flags); 2303 } 2304 2305 void helper_fsin(CPUX86State *env) 2306 { 2307 double fptemp = floatx80_to_double(env, ST0); 2308 2309 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2310 env->fpus |= 0x400; 2311 } else { 2312 ST0 = double_to_floatx80(env, sin(fptemp)); 2313 env->fpus &= ~0x400; /* C2 <-- 0 */ 2314 /* the above code is for |arg| < 2**53 only */ 2315 } 2316 } 2317 2318 void helper_fcos(CPUX86State *env) 2319 { 2320 double fptemp = floatx80_to_double(env, ST0); 2321 2322 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2323 env->fpus |= 0x400; 2324 } else { 2325 ST0 = double_to_floatx80(env, cos(fptemp)); 2326 env->fpus &= ~0x400; /* C2 <-- 0 */ 2327 /* the above code is for |arg| < 2**63 only */ 2328 } 2329 } 2330 2331 void helper_fxam_ST0(CPUX86State *env) 2332 { 2333 CPU_LDoubleU temp; 2334 int expdif; 2335 2336 temp.d = ST0; 2337 2338 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2339 if (SIGND(temp)) { 2340 env->fpus |= 0x200; /* C1 <-- 1 */ 2341 } 2342 2343 if (env->fptags[env->fpstt]) { 2344 env->fpus |= 0x4100; /* Empty */ 2345 return; 2346 } 2347 2348 expdif = EXPD(temp); 2349 if (expdif == MAXEXPD) { 2350 if (MANTD(temp) == 0x8000000000000000ULL) { 2351 env->fpus |= 0x500; /* Infinity */ 2352 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2353 env->fpus |= 0x100; /* NaN */ 2354 } 2355 } else if (expdif == 0) { 2356 if (MANTD(temp) == 0) { 2357 env->fpus |= 0x4000; /* Zero */ 2358 } else { 2359 env->fpus |= 0x4400; /* Denormal */ 2360 } 2361 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2362 env->fpus |= 0x400; 2363 } 2364 } 2365 2366 static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, 2367 uintptr_t retaddr) 2368 { 2369 int fpus, fptag, exp, i; 2370 uint64_t mant; 2371 CPU_LDoubleU tmp; 2372 2373 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2374 fptag = 0; 2375 for (i = 7; i >= 0; i--) { 2376 fptag <<= 2; 2377 if (env->fptags[i]) { 2378 fptag |= 3; 2379 } else { 2380 tmp.d = env->fpregs[i].d; 2381 exp = EXPD(tmp); 2382 mant = MANTD(tmp); 2383 if (exp == 0 && mant == 0) { 2384 /* zero */ 2385 fptag |= 1; 2386 } else if (exp == 0 || exp == MAXEXPD 2387 || (mant & (1LL << 63)) == 0) { 2388 /* NaNs, infinity, denormal */ 2389 fptag |= 2; 2390 } 2391 } 2392 } 2393 if (data32) { 2394 /* 32 bit */ 2395 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); 2396 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); 2397 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); 2398 cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */ 2399 cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */ 2400 cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */ 2401 cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */ 2402 } else { 2403 /* 16 bit */ 2404 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); 2405 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); 2406 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); 2407 cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr); 2408 cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr); 2409 cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr); 2410 cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr); 2411 } 2412 } 2413 2414 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2415 { 2416 do_fstenv(env, ptr, data32, GETPC()); 2417 } 2418 2419 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2420 { 2421 env->fpstt = (fpus >> 11) & 7; 2422 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2423 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2424 #if !defined(CONFIG_USER_ONLY) 2425 if (!(env->fpus & FPUS_SE)) { 2426 /* 2427 * Here the processor deasserts FERR#; in response, the chipset deasserts 2428 * IGNNE#. 2429 */ 2430 cpu_clear_ignne(); 2431 } 2432 #endif 2433 } 2434 2435 static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, 2436 uintptr_t retaddr) 2437 { 2438 int i, fpus, fptag; 2439 2440 if (data32) { 2441 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2442 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2443 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); 2444 } else { 2445 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); 2446 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); 2447 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); 2448 } 2449 cpu_set_fpus(env, fpus); 2450 for (i = 0; i < 8; i++) { 2451 env->fptags[i] = ((fptag & 3) == 3); 2452 fptag >>= 2; 2453 } 2454 } 2455 2456 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2457 { 2458 do_fldenv(env, ptr, data32, GETPC()); 2459 } 2460 2461 static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, 2462 uintptr_t retaddr) 2463 { 2464 floatx80 tmp; 2465 int i; 2466 2467 do_fstenv(env, ptr, data32, retaddr); 2468 2469 ptr += (target_ulong)14 << data32; 2470 for (i = 0; i < 8; i++) { 2471 tmp = ST(i); 2472 do_fstt(env, tmp, ptr, retaddr); 2473 ptr += 10; 2474 } 2475 2476 do_fninit(env); 2477 } 2478 2479 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2480 { 2481 do_fsave(env, ptr, data32, GETPC()); 2482 } 2483 2484 static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, 2485 uintptr_t retaddr) 2486 { 2487 floatx80 tmp; 2488 int i; 2489 2490 do_fldenv(env, ptr, data32, retaddr); 2491 ptr += (target_ulong)14 << data32; 2492 2493 for (i = 0; i < 8; i++) { 2494 tmp = do_fldt(env, ptr, retaddr); 2495 ST(i) = tmp; 2496 ptr += 10; 2497 } 2498 } 2499 2500 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2501 { 2502 do_frstor(env, ptr, data32, GETPC()); 2503 } 2504 2505 #define XO(X) offsetof(X86XSaveArea, X) 2506 2507 static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2508 { 2509 int fpus, fptag, i; 2510 target_ulong addr; 2511 2512 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2513 fptag = 0; 2514 for (i = 0; i < 8; i++) { 2515 fptag |= (env->fptags[i] << i); 2516 } 2517 2518 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); 2519 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); 2520 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); 2521 2522 /* In 32-bit mode this is eip, sel, dp, sel. 2523 In 64-bit mode this is rip, rdp. 2524 But in either case we don't write actual data, just zeros. */ 2525 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ 2526 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ 2527 2528 addr = ptr + XO(legacy.fpregs); 2529 for (i = 0; i < 8; i++) { 2530 floatx80 tmp = ST(i); 2531 do_fstt(env, tmp, addr, ra); 2532 addr += 16; 2533 } 2534 } 2535 2536 static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2537 { 2538 update_mxcsr_from_sse_status(env); 2539 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); 2540 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); 2541 } 2542 2543 static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2544 { 2545 int i, nb_xmm_regs; 2546 target_ulong addr; 2547 2548 if (env->hflags & HF_CS64_MASK) { 2549 nb_xmm_regs = 16; 2550 } else { 2551 nb_xmm_regs = 8; 2552 } 2553 2554 addr = ptr + XO(legacy.xmm_regs); 2555 for (i = 0; i < nb_xmm_regs; i++) { 2556 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); 2557 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); 2558 addr += 16; 2559 } 2560 } 2561 2562 static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2563 { 2564 int i, nb_xmm_regs; 2565 2566 if (env->hflags & HF_CS64_MASK) { 2567 nb_xmm_regs = 16; 2568 } else { 2569 nb_xmm_regs = 8; 2570 } 2571 2572 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2573 cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra); 2574 cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra); 2575 } 2576 } 2577 2578 static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2579 { 2580 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2581 int i; 2582 2583 for (i = 0; i < 4; i++, addr += 16) { 2584 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); 2585 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); 2586 } 2587 } 2588 2589 static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2590 { 2591 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2592 env->bndcs_regs.cfgu, ra); 2593 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2594 env->bndcs_regs.sts, ra); 2595 } 2596 2597 static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2598 { 2599 cpu_stq_data_ra(env, ptr, env->pkru, ra); 2600 } 2601 2602 static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2603 { 2604 /* The operand must be 16 byte aligned */ 2605 if (ptr & 0xf) { 2606 raise_exception_ra(env, EXCP0D_GPF, ra); 2607 } 2608 2609 do_xsave_fpu(env, ptr, ra); 2610 2611 if (env->cr[4] & CR4_OSFXSR_MASK) { 2612 do_xsave_mxcsr(env, ptr, ra); 2613 /* Fast FXSAVE leaves out the XMM registers */ 2614 if (!(env->efer & MSR_EFER_FFXSR) 2615 || (env->hflags & HF_CPL_MASK) 2616 || !(env->hflags & HF_LMA_MASK)) { 2617 do_xsave_sse(env, ptr, ra); 2618 } 2619 } 2620 } 2621 2622 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2623 { 2624 do_fxsave(env, ptr, GETPC()); 2625 } 2626 2627 static uint64_t get_xinuse(CPUX86State *env) 2628 { 2629 uint64_t inuse = -1; 2630 2631 /* For the most part, we don't track XINUSE. We could calculate it 2632 here for all components, but it's probably less work to simply 2633 indicate in use. That said, the state of BNDREGS is important 2634 enough to track in HFLAGS, so we might as well use that here. */ 2635 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2636 inuse &= ~XSTATE_BNDREGS_MASK; 2637 } 2638 return inuse; 2639 } 2640 2641 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2642 uint64_t inuse, uint64_t opt, uintptr_t ra) 2643 { 2644 uint64_t old_bv, new_bv; 2645 2646 /* The OS must have enabled XSAVE. */ 2647 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2648 raise_exception_ra(env, EXCP06_ILLOP, ra); 2649 } 2650 2651 /* The operand must be 64 byte aligned. */ 2652 if (ptr & 63) { 2653 raise_exception_ra(env, EXCP0D_GPF, ra); 2654 } 2655 2656 /* Never save anything not enabled by XCR0. */ 2657 rfbm &= env->xcr0; 2658 opt &= rfbm; 2659 2660 if (opt & XSTATE_FP_MASK) { 2661 do_xsave_fpu(env, ptr, ra); 2662 } 2663 if (rfbm & XSTATE_SSE_MASK) { 2664 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2665 do_xsave_mxcsr(env, ptr, ra); 2666 } 2667 if (opt & XSTATE_SSE_MASK) { 2668 do_xsave_sse(env, ptr, ra); 2669 } 2670 if (opt & XSTATE_YMM_MASK) { 2671 do_xsave_ymmh(env, ptr + XO(avx_state), ra); 2672 } 2673 if (opt & XSTATE_BNDREGS_MASK) { 2674 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); 2675 } 2676 if (opt & XSTATE_BNDCSR_MASK) { 2677 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); 2678 } 2679 if (opt & XSTATE_PKRU_MASK) { 2680 do_xsave_pkru(env, ptr + XO(pkru_state), ra); 2681 } 2682 2683 /* Update the XSTATE_BV field. */ 2684 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2685 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2686 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); 2687 } 2688 2689 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2690 { 2691 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); 2692 } 2693 2694 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2695 { 2696 uint64_t inuse = get_xinuse(env); 2697 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2698 } 2699 2700 static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2701 { 2702 int i, fpuc, fpus, fptag; 2703 target_ulong addr; 2704 2705 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); 2706 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); 2707 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); 2708 cpu_set_fpuc(env, fpuc); 2709 cpu_set_fpus(env, fpus); 2710 fptag ^= 0xff; 2711 for (i = 0; i < 8; i++) { 2712 env->fptags[i] = ((fptag >> i) & 1); 2713 } 2714 2715 addr = ptr + XO(legacy.fpregs); 2716 for (i = 0; i < 8; i++) { 2717 floatx80 tmp = do_fldt(env, addr, ra); 2718 ST(i) = tmp; 2719 addr += 16; 2720 } 2721 } 2722 2723 static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2724 { 2725 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); 2726 } 2727 2728 static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2729 { 2730 int i, nb_xmm_regs; 2731 target_ulong addr; 2732 2733 if (env->hflags & HF_CS64_MASK) { 2734 nb_xmm_regs = 16; 2735 } else { 2736 nb_xmm_regs = 8; 2737 } 2738 2739 addr = ptr + XO(legacy.xmm_regs); 2740 for (i = 0; i < nb_xmm_regs; i++) { 2741 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); 2742 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); 2743 addr += 16; 2744 } 2745 } 2746 2747 static void do_clear_sse(CPUX86State *env) 2748 { 2749 int i, nb_xmm_regs; 2750 2751 if (env->hflags & HF_CS64_MASK) { 2752 nb_xmm_regs = 16; 2753 } else { 2754 nb_xmm_regs = 8; 2755 } 2756 2757 for (i = 0; i < nb_xmm_regs; i++) { 2758 env->xmm_regs[i].ZMM_Q(0) = 0; 2759 env->xmm_regs[i].ZMM_Q(1) = 0; 2760 } 2761 } 2762 2763 static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2764 { 2765 int i, nb_xmm_regs; 2766 2767 if (env->hflags & HF_CS64_MASK) { 2768 nb_xmm_regs = 16; 2769 } else { 2770 nb_xmm_regs = 8; 2771 } 2772 2773 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2774 env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra); 2775 env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra); 2776 } 2777 } 2778 2779 static void do_clear_ymmh(CPUX86State *env) 2780 { 2781 int i, nb_xmm_regs; 2782 2783 if (env->hflags & HF_CS64_MASK) { 2784 nb_xmm_regs = 16; 2785 } else { 2786 nb_xmm_regs = 8; 2787 } 2788 2789 for (i = 0; i < nb_xmm_regs; i++) { 2790 env->xmm_regs[i].ZMM_Q(2) = 0; 2791 env->xmm_regs[i].ZMM_Q(3) = 0; 2792 } 2793 } 2794 2795 static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2796 { 2797 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2798 int i; 2799 2800 for (i = 0; i < 4; i++, addr += 16) { 2801 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); 2802 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); 2803 } 2804 } 2805 2806 static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2807 { 2808 /* FIXME: Extend highest implemented bit of linear address. */ 2809 env->bndcs_regs.cfgu 2810 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); 2811 env->bndcs_regs.sts 2812 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); 2813 } 2814 2815 static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2816 { 2817 env->pkru = cpu_ldq_data_ra(env, ptr, ra); 2818 } 2819 2820 static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2821 { 2822 /* The operand must be 16 byte aligned */ 2823 if (ptr & 0xf) { 2824 raise_exception_ra(env, EXCP0D_GPF, ra); 2825 } 2826 2827 do_xrstor_fpu(env, ptr, ra); 2828 2829 if (env->cr[4] & CR4_OSFXSR_MASK) { 2830 do_xrstor_mxcsr(env, ptr, ra); 2831 /* Fast FXRSTOR leaves out the XMM registers */ 2832 if (!(env->efer & MSR_EFER_FFXSR) 2833 || (env->hflags & HF_CPL_MASK) 2834 || !(env->hflags & HF_LMA_MASK)) { 2835 do_xrstor_sse(env, ptr, ra); 2836 } 2837 } 2838 } 2839 2840 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2841 { 2842 do_fxrstor(env, ptr, GETPC()); 2843 } 2844 2845 static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) 2846 { 2847 uint64_t xstate_bv, xcomp_bv, reserve0; 2848 2849 rfbm &= env->xcr0; 2850 2851 /* The OS must have enabled XSAVE. */ 2852 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2853 raise_exception_ra(env, EXCP06_ILLOP, ra); 2854 } 2855 2856 /* The operand must be 64 byte aligned. */ 2857 if (ptr & 63) { 2858 raise_exception_ra(env, EXCP0D_GPF, ra); 2859 } 2860 2861 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); 2862 2863 if ((int64_t)xstate_bv < 0) { 2864 /* FIXME: Compact form. */ 2865 raise_exception_ra(env, EXCP0D_GPF, ra); 2866 } 2867 2868 /* Standard form. */ 2869 2870 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2871 if (xstate_bv & ~env->xcr0) { 2872 raise_exception_ra(env, EXCP0D_GPF, ra); 2873 } 2874 2875 /* The XCOMP_BV field must be zero. Note that, as of the April 2016 2876 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) 2877 describes only XCOMP_BV, but the description of the standard form 2878 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which 2879 includes the next 64-bit field. */ 2880 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); 2881 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); 2882 if (xcomp_bv || reserve0) { 2883 raise_exception_ra(env, EXCP0D_GPF, ra); 2884 } 2885 2886 if (rfbm & XSTATE_FP_MASK) { 2887 if (xstate_bv & XSTATE_FP_MASK) { 2888 do_xrstor_fpu(env, ptr, ra); 2889 } else { 2890 do_fninit(env); 2891 memset(env->fpregs, 0, sizeof(env->fpregs)); 2892 } 2893 } 2894 if (rfbm & XSTATE_SSE_MASK) { 2895 /* Note that the standard form of XRSTOR loads MXCSR from memory 2896 whether or not the XSTATE_BV bit is set. */ 2897 do_xrstor_mxcsr(env, ptr, ra); 2898 if (xstate_bv & XSTATE_SSE_MASK) { 2899 do_xrstor_sse(env, ptr, ra); 2900 } else { 2901 do_clear_sse(env); 2902 } 2903 } 2904 if (rfbm & XSTATE_YMM_MASK) { 2905 if (xstate_bv & XSTATE_YMM_MASK) { 2906 do_xrstor_ymmh(env, ptr + XO(avx_state), ra); 2907 } else { 2908 do_clear_ymmh(env); 2909 } 2910 } 2911 if (rfbm & XSTATE_BNDREGS_MASK) { 2912 if (xstate_bv & XSTATE_BNDREGS_MASK) { 2913 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); 2914 env->hflags |= HF_MPX_IU_MASK; 2915 } else { 2916 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2917 env->hflags &= ~HF_MPX_IU_MASK; 2918 } 2919 } 2920 if (rfbm & XSTATE_BNDCSR_MASK) { 2921 if (xstate_bv & XSTATE_BNDCSR_MASK) { 2922 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); 2923 } else { 2924 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2925 } 2926 cpu_sync_bndcs_hflags(env); 2927 } 2928 if (rfbm & XSTATE_PKRU_MASK) { 2929 uint64_t old_pkru = env->pkru; 2930 if (xstate_bv & XSTATE_PKRU_MASK) { 2931 do_xrstor_pkru(env, ptr + XO(pkru_state), ra); 2932 } else { 2933 env->pkru = 0; 2934 } 2935 if (env->pkru != old_pkru) { 2936 CPUState *cs = env_cpu(env); 2937 tlb_flush(cs); 2938 } 2939 } 2940 } 2941 2942 #undef XO 2943 2944 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2945 { 2946 do_xrstor(env, ptr, rfbm, GETPC()); 2947 } 2948 2949 #if defined(CONFIG_USER_ONLY) 2950 void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) 2951 { 2952 do_fsave(env, ptr, data32, 0); 2953 } 2954 2955 void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) 2956 { 2957 do_frstor(env, ptr, data32, 0); 2958 } 2959 2960 void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) 2961 { 2962 do_fxsave(env, ptr, 0); 2963 } 2964 2965 void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) 2966 { 2967 do_fxrstor(env, ptr, 0); 2968 } 2969 2970 void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) 2971 { 2972 do_xsave(env, ptr, -1, get_xinuse(env), -1, 0); 2973 } 2974 2975 void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr) 2976 { 2977 do_xrstor(env, ptr, -1, 0); 2978 } 2979 #endif 2980 2981 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 2982 { 2983 /* The OS must have enabled XSAVE. */ 2984 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2985 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 2986 } 2987 2988 switch (ecx) { 2989 case 0: 2990 return env->xcr0; 2991 case 1: 2992 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 2993 return env->xcr0 & get_xinuse(env); 2994 } 2995 break; 2996 } 2997 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 2998 } 2999 3000 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 3001 { 3002 uint32_t dummy, ena_lo, ena_hi; 3003 uint64_t ena; 3004 3005 /* The OS must have enabled XSAVE. */ 3006 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3007 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3008 } 3009 3010 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 3011 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 3012 goto do_gpf; 3013 } 3014 3015 /* Disallow enabling unimplemented features. */ 3016 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 3017 ena = ((uint64_t)ena_hi << 32) | ena_lo; 3018 if (mask & ~ena) { 3019 goto do_gpf; 3020 } 3021 3022 /* Disallow enabling only half of MPX. */ 3023 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 3024 & XSTATE_BNDCSR_MASK) { 3025 goto do_gpf; 3026 } 3027 3028 env->xcr0 = mask; 3029 cpu_sync_bndcs_hflags(env); 3030 cpu_sync_avx_hflag(env); 3031 return; 3032 3033 do_gpf: 3034 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3035 } 3036 3037 /* MMX/SSE */ 3038 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 3039 3040 #define SSE_DAZ 0x0040 3041 #define SSE_RC_MASK 0x6000 3042 #define SSE_RC_NEAR 0x0000 3043 #define SSE_RC_DOWN 0x2000 3044 #define SSE_RC_UP 0x4000 3045 #define SSE_RC_CHOP 0x6000 3046 #define SSE_FZ 0x8000 3047 3048 void update_mxcsr_status(CPUX86State *env) 3049 { 3050 uint32_t mxcsr = env->mxcsr; 3051 int rnd_type; 3052 3053 /* set rounding mode */ 3054 switch (mxcsr & SSE_RC_MASK) { 3055 default: 3056 case SSE_RC_NEAR: 3057 rnd_type = float_round_nearest_even; 3058 break; 3059 case SSE_RC_DOWN: 3060 rnd_type = float_round_down; 3061 break; 3062 case SSE_RC_UP: 3063 rnd_type = float_round_up; 3064 break; 3065 case SSE_RC_CHOP: 3066 rnd_type = float_round_to_zero; 3067 break; 3068 } 3069 set_float_rounding_mode(rnd_type, &env->sse_status); 3070 3071 /* Set exception flags. */ 3072 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 3073 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 3074 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 3075 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 3076 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 3077 &env->sse_status); 3078 3079 /* set denormals are zero */ 3080 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 3081 3082 /* set flush to zero */ 3083 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 3084 } 3085 3086 void update_mxcsr_from_sse_status(CPUX86State *env) 3087 { 3088 uint8_t flags = get_float_exception_flags(&env->sse_status); 3089 /* 3090 * The MXCSR denormal flag has opposite semantics to 3091 * float_flag_input_denormal (the softfloat code sets that flag 3092 * only when flushing input denormals to zero, but SSE sets it 3093 * only when not flushing them to zero), so is not converted 3094 * here. 3095 */ 3096 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3097 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3098 (flags & float_flag_overflow ? FPUS_OE : 0) | 3099 (flags & float_flag_underflow ? FPUS_UE : 0) | 3100 (flags & float_flag_inexact ? FPUS_PE : 0) | 3101 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3102 0)); 3103 } 3104 3105 void helper_update_mxcsr(CPUX86State *env) 3106 { 3107 update_mxcsr_from_sse_status(env); 3108 } 3109 3110 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3111 { 3112 cpu_set_mxcsr(env, val); 3113 } 3114 3115 void helper_enter_mmx(CPUX86State *env) 3116 { 3117 env->fpstt = 0; 3118 *(uint32_t *)(env->fptags) = 0; 3119 *(uint32_t *)(env->fptags + 4) = 0; 3120 } 3121 3122 void helper_emms(CPUX86State *env) 3123 { 3124 /* set to empty state */ 3125 *(uint32_t *)(env->fptags) = 0x01010101; 3126 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3127 } 3128 3129 #define SHIFT 0 3130 #include "ops_sse.h" 3131 3132 #define SHIFT 1 3133 #include "ops_sse.h" 3134 3135 #define SHIFT 2 3136 #include "ops_sse.h" 3137