1 /* 2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers 3 * 4 * Copyright (c) 2003 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include <math.h> 22 #include "cpu.h" 23 #include "tcg-cpu.h" 24 #include "exec/exec-all.h" 25 #include "exec/cpu_ldst.h" 26 #include "exec/helper-proto.h" 27 #include "fpu/softfloat.h" 28 #include "fpu/softfloat-macros.h" 29 #include "helper-tcg.h" 30 #include "access.h" 31 32 /* float macros */ 33 #define FT0 (env->ft0) 34 #define ST0 (env->fpregs[env->fpstt].d) 35 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) 36 #define ST1 ST(1) 37 38 #define FPU_RC_SHIFT 10 39 #define FPU_RC_MASK (3 << FPU_RC_SHIFT) 40 #define FPU_RC_NEAR 0x000 41 #define FPU_RC_DOWN 0x400 42 #define FPU_RC_UP 0x800 43 #define FPU_RC_CHOP 0xc00 44 45 #define MAXTAN 9223372036854775808.0 46 47 /* the following deal with x86 long double-precision numbers */ 48 #define MAXEXPD 0x7fff 49 #define EXPBIAS 16383 50 #define EXPD(fp) (fp.l.upper & 0x7fff) 51 #define SIGND(fp) ((fp.l.upper) & 0x8000) 52 #define MANTD(fp) (fp.l.lower) 53 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS 54 55 #define FPUS_IE (1 << 0) 56 #define FPUS_DE (1 << 1) 57 #define FPUS_ZE (1 << 2) 58 #define FPUS_OE (1 << 3) 59 #define FPUS_UE (1 << 4) 60 #define FPUS_PE (1 << 5) 61 #define FPUS_SF (1 << 6) 62 #define FPUS_SE (1 << 7) 63 #define FPUS_B (1 << 15) 64 65 #define FPUC_EM 0x3f 66 67 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) 68 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) 69 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) 70 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) 71 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) 72 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) 73 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) 74 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) 75 76 static inline void fpush(CPUX86State *env) 77 { 78 env->fpstt = (env->fpstt - 1) & 7; 79 env->fptags[env->fpstt] = 0; /* validate stack entry */ 80 } 81 82 static inline void fpop(CPUX86State *env) 83 { 84 env->fptags[env->fpstt] = 1; /* invalidate stack entry */ 85 env->fpstt = (env->fpstt + 1) & 7; 86 } 87 88 static floatx80 do_fldt(X86Access *ac, target_ulong ptr) 89 { 90 CPU_LDoubleU temp; 91 92 temp.l.lower = access_ldq(ac, ptr); 93 temp.l.upper = access_ldw(ac, ptr + 8); 94 return temp.d; 95 } 96 97 static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f) 98 { 99 CPU_LDoubleU temp; 100 101 temp.d = f; 102 access_stq(ac, ptr, temp.l.lower); 103 access_stw(ac, ptr + 8, temp.l.upper); 104 } 105 106 /* x87 FPU helpers */ 107 108 static inline double floatx80_to_double(CPUX86State *env, floatx80 a) 109 { 110 union { 111 float64 f64; 112 double d; 113 } u; 114 115 u.f64 = floatx80_to_float64(a, &env->fp_status); 116 return u.d; 117 } 118 119 static inline floatx80 double_to_floatx80(CPUX86State *env, double a) 120 { 121 union { 122 float64 f64; 123 double d; 124 } u; 125 126 u.d = a; 127 return float64_to_floatx80(u.f64, &env->fp_status); 128 } 129 130 static void fpu_set_exception(CPUX86State *env, int mask) 131 { 132 env->fpus |= mask; 133 if (env->fpus & (~env->fpuc & FPUC_EM)) { 134 env->fpus |= FPUS_SE | FPUS_B; 135 } 136 } 137 138 static inline uint8_t save_exception_flags(CPUX86State *env) 139 { 140 uint8_t old_flags = get_float_exception_flags(&env->fp_status); 141 set_float_exception_flags(0, &env->fp_status); 142 return old_flags; 143 } 144 145 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) 146 { 147 uint8_t new_flags = get_float_exception_flags(&env->fp_status); 148 float_raise(old_flags, &env->fp_status); 149 fpu_set_exception(env, 150 ((new_flags & float_flag_invalid ? FPUS_IE : 0) | 151 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | 152 (new_flags & float_flag_overflow ? FPUS_OE : 0) | 153 (new_flags & float_flag_underflow ? FPUS_UE : 0) | 154 (new_flags & float_flag_inexact ? FPUS_PE : 0) | 155 (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); 156 } 157 158 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) 159 { 160 uint8_t old_flags = save_exception_flags(env); 161 floatx80 ret = floatx80_div(a, b, &env->fp_status); 162 merge_exception_flags(env, old_flags); 163 return ret; 164 } 165 166 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) 167 { 168 if (env->cr[0] & CR0_NE_MASK) { 169 raise_exception_ra(env, EXCP10_COPR, retaddr); 170 } 171 #if !defined(CONFIG_USER_ONLY) 172 else { 173 fpu_check_raise_ferr_irq(env); 174 } 175 #endif 176 } 177 178 void helper_flds_FT0(CPUX86State *env, uint32_t val) 179 { 180 uint8_t old_flags = save_exception_flags(env); 181 union { 182 float32 f; 183 uint32_t i; 184 } u; 185 186 u.i = val; 187 FT0 = float32_to_floatx80(u.f, &env->fp_status); 188 merge_exception_flags(env, old_flags); 189 } 190 191 void helper_fldl_FT0(CPUX86State *env, uint64_t val) 192 { 193 uint8_t old_flags = save_exception_flags(env); 194 union { 195 float64 f; 196 uint64_t i; 197 } u; 198 199 u.i = val; 200 FT0 = float64_to_floatx80(u.f, &env->fp_status); 201 merge_exception_flags(env, old_flags); 202 } 203 204 void helper_fildl_FT0(CPUX86State *env, int32_t val) 205 { 206 FT0 = int32_to_floatx80(val, &env->fp_status); 207 } 208 209 void helper_flds_ST0(CPUX86State *env, uint32_t val) 210 { 211 uint8_t old_flags = save_exception_flags(env); 212 int new_fpstt; 213 union { 214 float32 f; 215 uint32_t i; 216 } u; 217 218 new_fpstt = (env->fpstt - 1) & 7; 219 u.i = val; 220 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); 221 env->fpstt = new_fpstt; 222 env->fptags[new_fpstt] = 0; /* validate stack entry */ 223 merge_exception_flags(env, old_flags); 224 } 225 226 void helper_fldl_ST0(CPUX86State *env, uint64_t val) 227 { 228 uint8_t old_flags = save_exception_flags(env); 229 int new_fpstt; 230 union { 231 float64 f; 232 uint64_t i; 233 } u; 234 235 new_fpstt = (env->fpstt - 1) & 7; 236 u.i = val; 237 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); 238 env->fpstt = new_fpstt; 239 env->fptags[new_fpstt] = 0; /* validate stack entry */ 240 merge_exception_flags(env, old_flags); 241 } 242 243 static FloatX80RoundPrec tmp_maximise_precision(float_status *st) 244 { 245 FloatX80RoundPrec old = get_floatx80_rounding_precision(st); 246 set_floatx80_rounding_precision(floatx80_precision_x, st); 247 return old; 248 } 249 250 void helper_fildl_ST0(CPUX86State *env, int32_t val) 251 { 252 int new_fpstt; 253 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 254 255 new_fpstt = (env->fpstt - 1) & 7; 256 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); 257 env->fpstt = new_fpstt; 258 env->fptags[new_fpstt] = 0; /* validate stack entry */ 259 260 set_floatx80_rounding_precision(old, &env->fp_status); 261 } 262 263 void helper_fildll_ST0(CPUX86State *env, int64_t val) 264 { 265 int new_fpstt; 266 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); 267 268 new_fpstt = (env->fpstt - 1) & 7; 269 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); 270 env->fpstt = new_fpstt; 271 env->fptags[new_fpstt] = 0; /* validate stack entry */ 272 273 set_floatx80_rounding_precision(old, &env->fp_status); 274 } 275 276 uint32_t helper_fsts_ST0(CPUX86State *env) 277 { 278 uint8_t old_flags = save_exception_flags(env); 279 union { 280 float32 f; 281 uint32_t i; 282 } u; 283 284 u.f = floatx80_to_float32(ST0, &env->fp_status); 285 merge_exception_flags(env, old_flags); 286 return u.i; 287 } 288 289 uint64_t helper_fstl_ST0(CPUX86State *env) 290 { 291 uint8_t old_flags = save_exception_flags(env); 292 union { 293 float64 f; 294 uint64_t i; 295 } u; 296 297 u.f = floatx80_to_float64(ST0, &env->fp_status); 298 merge_exception_flags(env, old_flags); 299 return u.i; 300 } 301 302 int32_t helper_fist_ST0(CPUX86State *env) 303 { 304 uint8_t old_flags = save_exception_flags(env); 305 int32_t val; 306 307 val = floatx80_to_int32(ST0, &env->fp_status); 308 if (val != (int16_t)val) { 309 set_float_exception_flags(float_flag_invalid, &env->fp_status); 310 val = -32768; 311 } 312 merge_exception_flags(env, old_flags); 313 return val; 314 } 315 316 int32_t helper_fistl_ST0(CPUX86State *env) 317 { 318 uint8_t old_flags = save_exception_flags(env); 319 int32_t val; 320 321 val = floatx80_to_int32(ST0, &env->fp_status); 322 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 323 val = 0x80000000; 324 } 325 merge_exception_flags(env, old_flags); 326 return val; 327 } 328 329 int64_t helper_fistll_ST0(CPUX86State *env) 330 { 331 uint8_t old_flags = save_exception_flags(env); 332 int64_t val; 333 334 val = floatx80_to_int64(ST0, &env->fp_status); 335 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 336 val = 0x8000000000000000ULL; 337 } 338 merge_exception_flags(env, old_flags); 339 return val; 340 } 341 342 int32_t helper_fistt_ST0(CPUX86State *env) 343 { 344 uint8_t old_flags = save_exception_flags(env); 345 int32_t val; 346 347 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 348 if (val != (int16_t)val) { 349 set_float_exception_flags(float_flag_invalid, &env->fp_status); 350 val = -32768; 351 } 352 merge_exception_flags(env, old_flags); 353 return val; 354 } 355 356 int32_t helper_fisttl_ST0(CPUX86State *env) 357 { 358 uint8_t old_flags = save_exception_flags(env); 359 int32_t val; 360 361 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); 362 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 363 val = 0x80000000; 364 } 365 merge_exception_flags(env, old_flags); 366 return val; 367 } 368 369 int64_t helper_fisttll_ST0(CPUX86State *env) 370 { 371 uint8_t old_flags = save_exception_flags(env); 372 int64_t val; 373 374 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); 375 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { 376 val = 0x8000000000000000ULL; 377 } 378 merge_exception_flags(env, old_flags); 379 return val; 380 } 381 382 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) 383 { 384 int new_fpstt; 385 X86Access ac; 386 387 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); 388 389 new_fpstt = (env->fpstt - 1) & 7; 390 env->fpregs[new_fpstt].d = do_fldt(&ac, ptr); 391 env->fpstt = new_fpstt; 392 env->fptags[new_fpstt] = 0; /* validate stack entry */ 393 } 394 395 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) 396 { 397 X86Access ac; 398 399 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); 400 do_fstt(&ac, ptr, ST0); 401 } 402 403 void helper_fpush(CPUX86State *env) 404 { 405 fpush(env); 406 } 407 408 void helper_fpop(CPUX86State *env) 409 { 410 fpop(env); 411 } 412 413 void helper_fdecstp(CPUX86State *env) 414 { 415 env->fpstt = (env->fpstt - 1) & 7; 416 env->fpus &= ~0x4700; 417 } 418 419 void helper_fincstp(CPUX86State *env) 420 { 421 env->fpstt = (env->fpstt + 1) & 7; 422 env->fpus &= ~0x4700; 423 } 424 425 /* FPU move */ 426 427 void helper_ffree_STN(CPUX86State *env, int st_index) 428 { 429 env->fptags[(env->fpstt + st_index) & 7] = 1; 430 } 431 432 void helper_fmov_ST0_FT0(CPUX86State *env) 433 { 434 ST0 = FT0; 435 } 436 437 void helper_fmov_FT0_STN(CPUX86State *env, int st_index) 438 { 439 FT0 = ST(st_index); 440 } 441 442 void helper_fmov_ST0_STN(CPUX86State *env, int st_index) 443 { 444 ST0 = ST(st_index); 445 } 446 447 void helper_fmov_STN_ST0(CPUX86State *env, int st_index) 448 { 449 ST(st_index) = ST0; 450 } 451 452 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) 453 { 454 floatx80 tmp; 455 456 tmp = ST(st_index); 457 ST(st_index) = ST0; 458 ST0 = tmp; 459 } 460 461 /* FPU operations */ 462 463 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; 464 465 void helper_fcom_ST0_FT0(CPUX86State *env) 466 { 467 uint8_t old_flags = save_exception_flags(env); 468 FloatRelation ret; 469 470 ret = floatx80_compare(ST0, FT0, &env->fp_status); 471 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 472 merge_exception_flags(env, old_flags); 473 } 474 475 void helper_fucom_ST0_FT0(CPUX86State *env) 476 { 477 uint8_t old_flags = save_exception_flags(env); 478 FloatRelation ret; 479 480 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 481 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; 482 merge_exception_flags(env, old_flags); 483 } 484 485 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; 486 487 void helper_fcomi_ST0_FT0(CPUX86State *env) 488 { 489 uint8_t old_flags = save_exception_flags(env); 490 int eflags; 491 FloatRelation ret; 492 493 ret = floatx80_compare(ST0, FT0, &env->fp_status); 494 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 495 CC_SRC = eflags | fcomi_ccval[ret + 1]; 496 CC_OP = CC_OP_EFLAGS; 497 merge_exception_flags(env, old_flags); 498 } 499 500 void helper_fucomi_ST0_FT0(CPUX86State *env) 501 { 502 uint8_t old_flags = save_exception_flags(env); 503 int eflags; 504 FloatRelation ret; 505 506 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); 507 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); 508 CC_SRC = eflags | fcomi_ccval[ret + 1]; 509 CC_OP = CC_OP_EFLAGS; 510 merge_exception_flags(env, old_flags); 511 } 512 513 void helper_fadd_ST0_FT0(CPUX86State *env) 514 { 515 uint8_t old_flags = save_exception_flags(env); 516 ST0 = floatx80_add(ST0, FT0, &env->fp_status); 517 merge_exception_flags(env, old_flags); 518 } 519 520 void helper_fmul_ST0_FT0(CPUX86State *env) 521 { 522 uint8_t old_flags = save_exception_flags(env); 523 ST0 = floatx80_mul(ST0, FT0, &env->fp_status); 524 merge_exception_flags(env, old_flags); 525 } 526 527 void helper_fsub_ST0_FT0(CPUX86State *env) 528 { 529 uint8_t old_flags = save_exception_flags(env); 530 ST0 = floatx80_sub(ST0, FT0, &env->fp_status); 531 merge_exception_flags(env, old_flags); 532 } 533 534 void helper_fsubr_ST0_FT0(CPUX86State *env) 535 { 536 uint8_t old_flags = save_exception_flags(env); 537 ST0 = floatx80_sub(FT0, ST0, &env->fp_status); 538 merge_exception_flags(env, old_flags); 539 } 540 541 void helper_fdiv_ST0_FT0(CPUX86State *env) 542 { 543 ST0 = helper_fdiv(env, ST0, FT0); 544 } 545 546 void helper_fdivr_ST0_FT0(CPUX86State *env) 547 { 548 ST0 = helper_fdiv(env, FT0, ST0); 549 } 550 551 /* fp operations between STN and ST0 */ 552 553 void helper_fadd_STN_ST0(CPUX86State *env, int st_index) 554 { 555 uint8_t old_flags = save_exception_flags(env); 556 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); 557 merge_exception_flags(env, old_flags); 558 } 559 560 void helper_fmul_STN_ST0(CPUX86State *env, int st_index) 561 { 562 uint8_t old_flags = save_exception_flags(env); 563 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); 564 merge_exception_flags(env, old_flags); 565 } 566 567 void helper_fsub_STN_ST0(CPUX86State *env, int st_index) 568 { 569 uint8_t old_flags = save_exception_flags(env); 570 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); 571 merge_exception_flags(env, old_flags); 572 } 573 574 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) 575 { 576 uint8_t old_flags = save_exception_flags(env); 577 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); 578 merge_exception_flags(env, old_flags); 579 } 580 581 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) 582 { 583 floatx80 *p; 584 585 p = &ST(st_index); 586 *p = helper_fdiv(env, *p, ST0); 587 } 588 589 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) 590 { 591 floatx80 *p; 592 593 p = &ST(st_index); 594 *p = helper_fdiv(env, ST0, *p); 595 } 596 597 /* misc FPU operations */ 598 void helper_fchs_ST0(CPUX86State *env) 599 { 600 ST0 = floatx80_chs(ST0); 601 } 602 603 void helper_fabs_ST0(CPUX86State *env) 604 { 605 ST0 = floatx80_abs(ST0); 606 } 607 608 void helper_fld1_ST0(CPUX86State *env) 609 { 610 ST0 = floatx80_one; 611 } 612 613 void helper_fldl2t_ST0(CPUX86State *env) 614 { 615 switch (env->fpuc & FPU_RC_MASK) { 616 case FPU_RC_UP: 617 ST0 = floatx80_l2t_u; 618 break; 619 default: 620 ST0 = floatx80_l2t; 621 break; 622 } 623 } 624 625 void helper_fldl2e_ST0(CPUX86State *env) 626 { 627 switch (env->fpuc & FPU_RC_MASK) { 628 case FPU_RC_DOWN: 629 case FPU_RC_CHOP: 630 ST0 = floatx80_l2e_d; 631 break; 632 default: 633 ST0 = floatx80_l2e; 634 break; 635 } 636 } 637 638 void helper_fldpi_ST0(CPUX86State *env) 639 { 640 switch (env->fpuc & FPU_RC_MASK) { 641 case FPU_RC_DOWN: 642 case FPU_RC_CHOP: 643 ST0 = floatx80_pi_d; 644 break; 645 default: 646 ST0 = floatx80_pi; 647 break; 648 } 649 } 650 651 void helper_fldlg2_ST0(CPUX86State *env) 652 { 653 switch (env->fpuc & FPU_RC_MASK) { 654 case FPU_RC_DOWN: 655 case FPU_RC_CHOP: 656 ST0 = floatx80_lg2_d; 657 break; 658 default: 659 ST0 = floatx80_lg2; 660 break; 661 } 662 } 663 664 void helper_fldln2_ST0(CPUX86State *env) 665 { 666 switch (env->fpuc & FPU_RC_MASK) { 667 case FPU_RC_DOWN: 668 case FPU_RC_CHOP: 669 ST0 = floatx80_ln2_d; 670 break; 671 default: 672 ST0 = floatx80_ln2; 673 break; 674 } 675 } 676 677 void helper_fldz_ST0(CPUX86State *env) 678 { 679 ST0 = floatx80_zero; 680 } 681 682 void helper_fldz_FT0(CPUX86State *env) 683 { 684 FT0 = floatx80_zero; 685 } 686 687 uint32_t helper_fnstsw(CPUX86State *env) 688 { 689 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 690 } 691 692 uint32_t helper_fnstcw(CPUX86State *env) 693 { 694 return env->fpuc; 695 } 696 697 static void set_x86_rounding_mode(unsigned mode, float_status *status) 698 { 699 static FloatRoundMode x86_round_mode[4] = { 700 float_round_nearest_even, 701 float_round_down, 702 float_round_up, 703 float_round_to_zero 704 }; 705 assert(mode < ARRAY_SIZE(x86_round_mode)); 706 set_float_rounding_mode(x86_round_mode[mode], status); 707 } 708 709 void update_fp_status(CPUX86State *env) 710 { 711 int rnd_mode; 712 FloatX80RoundPrec rnd_prec; 713 714 /* set rounding mode */ 715 rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; 716 set_x86_rounding_mode(rnd_mode, &env->fp_status); 717 718 switch ((env->fpuc >> 8) & 3) { 719 case 0: 720 rnd_prec = floatx80_precision_s; 721 break; 722 case 2: 723 rnd_prec = floatx80_precision_d; 724 break; 725 case 3: 726 default: 727 rnd_prec = floatx80_precision_x; 728 break; 729 } 730 set_floatx80_rounding_precision(rnd_prec, &env->fp_status); 731 } 732 733 void helper_fldcw(CPUX86State *env, uint32_t val) 734 { 735 cpu_set_fpuc(env, val); 736 } 737 738 void helper_fclex(CPUX86State *env) 739 { 740 env->fpus &= 0x7f00; 741 } 742 743 void helper_fwait(CPUX86State *env) 744 { 745 if (env->fpus & FPUS_SE) { 746 fpu_raise_exception(env, GETPC()); 747 } 748 } 749 750 static void do_fninit(CPUX86State *env) 751 { 752 env->fpus = 0; 753 env->fpstt = 0; 754 env->fpcs = 0; 755 env->fpds = 0; 756 env->fpip = 0; 757 env->fpdp = 0; 758 cpu_set_fpuc(env, 0x37f); 759 env->fptags[0] = 1; 760 env->fptags[1] = 1; 761 env->fptags[2] = 1; 762 env->fptags[3] = 1; 763 env->fptags[4] = 1; 764 env->fptags[5] = 1; 765 env->fptags[6] = 1; 766 env->fptags[7] = 1; 767 } 768 769 void helper_fninit(CPUX86State *env) 770 { 771 do_fninit(env); 772 } 773 774 /* BCD ops */ 775 776 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) 777 { 778 X86Access ac; 779 floatx80 tmp; 780 uint64_t val; 781 unsigned int v; 782 int i; 783 784 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); 785 786 val = 0; 787 for (i = 8; i >= 0; i--) { 788 v = access_ldb(&ac, ptr + i); 789 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); 790 } 791 tmp = int64_to_floatx80(val, &env->fp_status); 792 if (access_ldb(&ac, ptr + 9) & 0x80) { 793 tmp = floatx80_chs(tmp); 794 } 795 fpush(env); 796 ST0 = tmp; 797 } 798 799 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) 800 { 801 uint8_t old_flags = save_exception_flags(env); 802 int v; 803 target_ulong mem_ref, mem_end; 804 int64_t val; 805 CPU_LDoubleU temp; 806 X86Access ac; 807 808 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); 809 temp.d = ST0; 810 811 val = floatx80_to_int64(ST0, &env->fp_status); 812 mem_ref = ptr; 813 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { 814 set_float_exception_flags(float_flag_invalid, &env->fp_status); 815 while (mem_ref < ptr + 7) { 816 access_stb(&ac, mem_ref++, 0); 817 } 818 access_stb(&ac, mem_ref++, 0xc0); 819 access_stb(&ac, mem_ref++, 0xff); 820 access_stb(&ac, mem_ref++, 0xff); 821 merge_exception_flags(env, old_flags); 822 return; 823 } 824 mem_end = mem_ref + 9; 825 if (SIGND(temp)) { 826 access_stb(&ac, mem_end, 0x80); 827 val = -val; 828 } else { 829 access_stb(&ac, mem_end, 0x00); 830 } 831 while (mem_ref < mem_end) { 832 if (val == 0) { 833 break; 834 } 835 v = val % 100; 836 val = val / 100; 837 v = ((v / 10) << 4) | (v % 10); 838 access_stb(&ac, mem_ref++, v); 839 } 840 while (mem_ref < mem_end) { 841 access_stb(&ac, mem_ref++, 0); 842 } 843 merge_exception_flags(env, old_flags); 844 } 845 846 /* 128-bit significand of log(2). */ 847 #define ln2_sig_high 0xb17217f7d1cf79abULL 848 #define ln2_sig_low 0xc9e3b39803f2f6afULL 849 850 /* 851 * Polynomial coefficients for an approximation to (2^x - 1) / x, on 852 * the interval [-1/64, 1/64]. 853 */ 854 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) 855 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) 856 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) 857 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) 858 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) 859 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) 860 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) 861 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) 862 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) 863 864 struct f2xm1_data { 865 /* 866 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 867 * are very close to exact floatx80 values. 868 */ 869 floatx80 t; 870 /* The value of 2^t. */ 871 floatx80 exp2; 872 /* The value of 2^t - 1. */ 873 floatx80 exp2m1; 874 }; 875 876 static const struct f2xm1_data f2xm1_table[65] = { 877 { make_floatx80_init(0xbfff, 0x8000000000000000ULL), 878 make_floatx80_init(0x3ffe, 0x8000000000000000ULL), 879 make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, 880 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), 881 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), 882 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, 883 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), 884 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), 885 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, 886 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), 887 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), 888 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, 889 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), 890 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), 891 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, 892 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), 893 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), 894 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, 895 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), 896 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), 897 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, 898 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), 899 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), 900 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, 901 { make_floatx80_init(0xbffe, 0xc000000000006530ULL), 902 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), 903 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, 904 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), 905 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), 906 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, 907 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), 908 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), 909 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, 910 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), 911 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), 912 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, 913 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), 914 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), 915 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, 916 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), 917 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), 918 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, 919 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), 920 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), 921 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, 922 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), 923 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), 924 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, 925 { make_floatx80_init(0xbffe, 0x800000000000227dULL), 926 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), 927 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, 928 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), 929 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), 930 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, 931 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), 932 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), 933 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, 934 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), 935 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), 936 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, 937 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), 938 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), 939 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, 940 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), 941 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), 942 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, 943 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), 944 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), 945 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, 946 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), 947 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), 948 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, 949 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), 950 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), 951 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, 952 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), 953 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), 954 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, 955 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), 956 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), 957 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, 958 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), 959 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), 960 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, 961 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), 962 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), 963 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, 964 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), 965 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), 966 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, 967 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), 968 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), 969 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, 970 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), 971 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), 972 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, 973 { floatx80_zero_init, 974 make_floatx80_init(0x3fff, 0x8000000000000000ULL), 975 floatx80_zero_init }, 976 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), 977 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), 978 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, 979 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), 980 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), 981 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, 982 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), 983 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), 984 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, 985 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), 986 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), 987 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, 988 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), 989 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), 990 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, 991 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), 992 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), 993 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, 994 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), 995 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), 996 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, 997 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), 998 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), 999 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, 1000 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), 1001 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), 1002 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, 1003 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), 1004 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), 1005 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, 1006 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), 1007 make_floatx80_init(0x3fff, 0xa27043030c49370aULL), 1008 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, 1009 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), 1010 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), 1011 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, 1012 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), 1013 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), 1014 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, 1015 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), 1016 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), 1017 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, 1018 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), 1019 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), 1020 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, 1021 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), 1022 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), 1023 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, 1024 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), 1025 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), 1026 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, 1027 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), 1028 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), 1029 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, 1030 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), 1031 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), 1032 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, 1033 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), 1034 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), 1035 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, 1036 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), 1037 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), 1038 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, 1039 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), 1040 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), 1041 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, 1042 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), 1043 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), 1044 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, 1045 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), 1046 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), 1047 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, 1048 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), 1049 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), 1050 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, 1051 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), 1052 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), 1053 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, 1054 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), 1055 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), 1056 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, 1057 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), 1058 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), 1059 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, 1060 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), 1061 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), 1062 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, 1063 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), 1064 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), 1065 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, 1066 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), 1067 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), 1068 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, 1069 { make_floatx80_init(0x3fff, 0x8000000000000000ULL), 1070 make_floatx80_init(0x4000, 0x8000000000000000ULL), 1071 make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, 1072 }; 1073 1074 void helper_f2xm1(CPUX86State *env) 1075 { 1076 uint8_t old_flags = save_exception_flags(env); 1077 uint64_t sig = extractFloatx80Frac(ST0); 1078 int32_t exp = extractFloatx80Exp(ST0); 1079 bool sign = extractFloatx80Sign(ST0); 1080 1081 if (floatx80_invalid_encoding(ST0)) { 1082 float_raise(float_flag_invalid, &env->fp_status); 1083 ST0 = floatx80_default_nan(&env->fp_status); 1084 } else if (floatx80_is_any_nan(ST0)) { 1085 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1086 float_raise(float_flag_invalid, &env->fp_status); 1087 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1088 } 1089 } else if (exp > 0x3fff || 1090 (exp == 0x3fff && sig != (0x8000000000000000ULL))) { 1091 /* Out of range for the instruction, treat as invalid. */ 1092 float_raise(float_flag_invalid, &env->fp_status); 1093 ST0 = floatx80_default_nan(&env->fp_status); 1094 } else if (exp == 0x3fff) { 1095 /* Argument 1 or -1, exact result 1 or -0.5. */ 1096 if (sign) { 1097 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); 1098 } 1099 } else if (exp < 0x3fb0) { 1100 if (!floatx80_is_zero(ST0)) { 1101 /* 1102 * Multiplying the argument by an extra-precision version 1103 * of log(2) is sufficiently precise. Zero arguments are 1104 * returned unchanged. 1105 */ 1106 uint64_t sig0, sig1, sig2; 1107 if (exp == 0) { 1108 normalizeFloatx80Subnormal(sig, &exp, &sig); 1109 } 1110 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, 1111 &sig2); 1112 /* This result is inexact. */ 1113 sig1 |= 1; 1114 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1115 sign, exp, sig0, sig1, 1116 &env->fp_status); 1117 } 1118 } else { 1119 floatx80 tmp, y, accum; 1120 bool asign, bsign; 1121 int32_t n, aexp, bexp; 1122 uint64_t asig0, asig1, asig2, bsig0, bsig1; 1123 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1124 FloatX80RoundPrec save_prec = 1125 env->fp_status.floatx80_rounding_precision; 1126 env->fp_status.float_rounding_mode = float_round_nearest_even; 1127 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1128 1129 /* Find the nearest multiple of 1/32 to the argument. */ 1130 tmp = floatx80_scalbn(ST0, 5, &env->fp_status); 1131 n = 32 + floatx80_to_int32(tmp, &env->fp_status); 1132 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); 1133 1134 if (floatx80_is_zero(y)) { 1135 /* 1136 * Use the value of 2^t - 1 from the table, to avoid 1137 * needing to special-case zero as a result of 1138 * multiplication below. 1139 */ 1140 ST0 = f2xm1_table[n].t; 1141 set_float_exception_flags(float_flag_inexact, &env->fp_status); 1142 env->fp_status.float_rounding_mode = save_mode; 1143 } else { 1144 /* 1145 * Compute the lower parts of a polynomial expansion for 1146 * (2^y - 1) / y. 1147 */ 1148 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); 1149 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); 1150 accum = floatx80_mul(accum, y, &env->fp_status); 1151 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); 1152 accum = floatx80_mul(accum, y, &env->fp_status); 1153 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); 1154 accum = floatx80_mul(accum, y, &env->fp_status); 1155 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); 1156 accum = floatx80_mul(accum, y, &env->fp_status); 1157 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); 1158 accum = floatx80_mul(accum, y, &env->fp_status); 1159 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); 1160 accum = floatx80_mul(accum, y, &env->fp_status); 1161 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); 1162 1163 /* 1164 * The full polynomial expansion is f2xm1_coeff_0 + accum 1165 * (where accum has much lower magnitude, and so, in 1166 * particular, carry out of the addition is not possible). 1167 * (This expansion is only accurate to about 70 bits, not 1168 * 128 bits.) 1169 */ 1170 aexp = extractFloatx80Exp(f2xm1_coeff_0); 1171 asign = extractFloatx80Sign(f2xm1_coeff_0); 1172 shift128RightJamming(extractFloatx80Frac(accum), 0, 1173 aexp - extractFloatx80Exp(accum), 1174 &asig0, &asig1); 1175 bsig0 = extractFloatx80Frac(f2xm1_coeff_0); 1176 bsig1 = 0; 1177 if (asign == extractFloatx80Sign(accum)) { 1178 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1179 } else { 1180 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1181 } 1182 /* And thus compute an approximation to 2^y - 1. */ 1183 mul128By64To192(asig0, asig1, extractFloatx80Frac(y), 1184 &asig0, &asig1, &asig2); 1185 aexp += extractFloatx80Exp(y) - 0x3ffe; 1186 asign ^= extractFloatx80Sign(y); 1187 if (n != 32) { 1188 /* 1189 * Multiply this by the precomputed value of 2^t and 1190 * add that of 2^t - 1. 1191 */ 1192 mul128By64To192(asig0, asig1, 1193 extractFloatx80Frac(f2xm1_table[n].exp2), 1194 &asig0, &asig1, &asig2); 1195 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; 1196 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); 1197 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); 1198 bsig1 = 0; 1199 if (bexp < aexp) { 1200 shift128RightJamming(bsig0, bsig1, aexp - bexp, 1201 &bsig0, &bsig1); 1202 } else if (aexp < bexp) { 1203 shift128RightJamming(asig0, asig1, bexp - aexp, 1204 &asig0, &asig1); 1205 aexp = bexp; 1206 } 1207 /* The sign of 2^t - 1 is always that of the result. */ 1208 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); 1209 if (asign == bsign) { 1210 /* Avoid possible carry out of the addition. */ 1211 shift128RightJamming(asig0, asig1, 1, 1212 &asig0, &asig1); 1213 shift128RightJamming(bsig0, bsig1, 1, 1214 &bsig0, &bsig1); 1215 ++aexp; 1216 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); 1217 } else { 1218 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1219 asign = bsign; 1220 } 1221 } 1222 env->fp_status.float_rounding_mode = save_mode; 1223 /* This result is inexact. */ 1224 asig1 |= 1; 1225 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1226 asign, aexp, asig0, asig1, 1227 &env->fp_status); 1228 } 1229 1230 env->fp_status.floatx80_rounding_precision = save_prec; 1231 } 1232 merge_exception_flags(env, old_flags); 1233 } 1234 1235 void helper_fptan(CPUX86State *env) 1236 { 1237 double fptemp = floatx80_to_double(env, ST0); 1238 1239 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 1240 env->fpus |= 0x400; 1241 } else { 1242 fptemp = tan(fptemp); 1243 ST0 = double_to_floatx80(env, fptemp); 1244 fpush(env); 1245 ST0 = floatx80_one; 1246 env->fpus &= ~0x400; /* C2 <-- 0 */ 1247 /* the above code is for |arg| < 2**52 only */ 1248 } 1249 } 1250 1251 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ 1252 #define pi_4_exp 0x3ffe 1253 #define pi_4_sig_high 0xc90fdaa22168c234ULL 1254 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL 1255 #define pi_2_exp 0x3fff 1256 #define pi_2_sig_high 0xc90fdaa22168c234ULL 1257 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL 1258 #define pi_34_exp 0x4000 1259 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL 1260 #define pi_34_sig_low 0x9394c9e8a0a5159dULL 1261 #define pi_exp 0x4000 1262 #define pi_sig_high 0xc90fdaa22168c234ULL 1263 #define pi_sig_low 0xc4c6628b80dc1cd1ULL 1264 1265 /* 1266 * Polynomial coefficients for an approximation to atan(x), with only 1267 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike 1268 * for some other approximations, no low part is needed for the first 1269 * coefficient here to achieve a sufficiently accurate result, because 1270 * the coefficient in this minimax approximation is very close to 1271 * exactly 1.) 1272 */ 1273 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) 1274 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) 1275 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) 1276 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) 1277 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) 1278 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) 1279 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) 1280 1281 struct fpatan_data { 1282 /* High and low parts of atan(x). */ 1283 floatx80 atan_high, atan_low; 1284 }; 1285 1286 static const struct fpatan_data fpatan_table[9] = { 1287 { floatx80_zero_init, 1288 floatx80_zero_init }, 1289 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), 1290 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, 1291 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), 1292 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, 1293 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), 1294 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, 1295 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), 1296 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, 1297 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), 1298 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, 1299 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), 1300 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, 1301 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), 1302 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, 1303 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), 1304 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, 1305 }; 1306 1307 void helper_fpatan(CPUX86State *env) 1308 { 1309 uint8_t old_flags = save_exception_flags(env); 1310 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1311 int32_t arg0_exp = extractFloatx80Exp(ST0); 1312 bool arg0_sign = extractFloatx80Sign(ST0); 1313 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1314 int32_t arg1_exp = extractFloatx80Exp(ST1); 1315 bool arg1_sign = extractFloatx80Sign(ST1); 1316 1317 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1318 float_raise(float_flag_invalid, &env->fp_status); 1319 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 1320 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 1321 float_raise(float_flag_invalid, &env->fp_status); 1322 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 1323 } else if (floatx80_invalid_encoding(ST0) || 1324 floatx80_invalid_encoding(ST1)) { 1325 float_raise(float_flag_invalid, &env->fp_status); 1326 ST1 = floatx80_default_nan(&env->fp_status); 1327 } else if (floatx80_is_any_nan(ST0)) { 1328 ST1 = ST0; 1329 } else if (floatx80_is_any_nan(ST1)) { 1330 /* Pass this NaN through. */ 1331 } else if (floatx80_is_zero(ST1) && !arg0_sign) { 1332 /* Pass this zero through. */ 1333 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || 1334 arg0_exp - arg1_exp >= 80) && 1335 !arg0_sign) { 1336 /* 1337 * Dividing ST1 by ST0 gives the correct result up to 1338 * rounding, and avoids spurious underflow exceptions that 1339 * might result from passing some small values through the 1340 * polynomial approximation, but if a finite nonzero result of 1341 * division is exact, the result of fpatan is still inexact 1342 * (and underflowing where appropriate). 1343 */ 1344 FloatX80RoundPrec save_prec = 1345 env->fp_status.floatx80_rounding_precision; 1346 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1347 ST1 = floatx80_div(ST1, ST0, &env->fp_status); 1348 env->fp_status.floatx80_rounding_precision = save_prec; 1349 if (!floatx80_is_zero(ST1) && 1350 !(get_float_exception_flags(&env->fp_status) & 1351 float_flag_inexact)) { 1352 /* 1353 * The mathematical result is very slightly closer to zero 1354 * than this exact result. Round a value with the 1355 * significand adjusted accordingly to get the correct 1356 * exceptions, and possibly an adjusted result depending 1357 * on the rounding mode. 1358 */ 1359 uint64_t sig = extractFloatx80Frac(ST1); 1360 int32_t exp = extractFloatx80Exp(ST1); 1361 bool sign = extractFloatx80Sign(ST1); 1362 if (exp == 0) { 1363 normalizeFloatx80Subnormal(sig, &exp, &sig); 1364 } 1365 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1366 sign, exp, sig - 1, 1367 -1, &env->fp_status); 1368 } 1369 } else { 1370 /* The result is inexact. */ 1371 bool rsign = arg1_sign; 1372 int32_t rexp; 1373 uint64_t rsig0, rsig1; 1374 if (floatx80_is_zero(ST1)) { 1375 /* 1376 * ST0 is negative. The result is pi with the sign of 1377 * ST1. 1378 */ 1379 rexp = pi_exp; 1380 rsig0 = pi_sig_high; 1381 rsig1 = pi_sig_low; 1382 } else if (floatx80_is_infinity(ST1)) { 1383 if (floatx80_is_infinity(ST0)) { 1384 if (arg0_sign) { 1385 rexp = pi_34_exp; 1386 rsig0 = pi_34_sig_high; 1387 rsig1 = pi_34_sig_low; 1388 } else { 1389 rexp = pi_4_exp; 1390 rsig0 = pi_4_sig_high; 1391 rsig1 = pi_4_sig_low; 1392 } 1393 } else { 1394 rexp = pi_2_exp; 1395 rsig0 = pi_2_sig_high; 1396 rsig1 = pi_2_sig_low; 1397 } 1398 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { 1399 rexp = pi_2_exp; 1400 rsig0 = pi_2_sig_high; 1401 rsig1 = pi_2_sig_low; 1402 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { 1403 /* ST0 is negative. */ 1404 rexp = pi_exp; 1405 rsig0 = pi_sig_high; 1406 rsig1 = pi_sig_low; 1407 } else { 1408 /* 1409 * ST0 and ST1 are finite, nonzero and with exponents not 1410 * too far apart. 1411 */ 1412 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; 1413 int32_t azexp, axexp; 1414 bool adj_sub, ysign, zsign; 1415 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; 1416 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; 1417 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; 1418 uint64_t azsig0, azsig1; 1419 uint64_t azsig2, azsig3, axsig0, axsig1; 1420 floatx80 x8; 1421 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 1422 FloatX80RoundPrec save_prec = 1423 env->fp_status.floatx80_rounding_precision; 1424 env->fp_status.float_rounding_mode = float_round_nearest_even; 1425 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 1426 1427 if (arg0_exp == 0) { 1428 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 1429 } 1430 if (arg1_exp == 0) { 1431 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 1432 } 1433 if (arg0_exp > arg1_exp || 1434 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { 1435 /* Work with abs(ST1) / abs(ST0). */ 1436 num_exp = arg1_exp; 1437 num_sig = arg1_sig; 1438 den_exp = arg0_exp; 1439 den_sig = arg0_sig; 1440 if (arg0_sign) { 1441 /* The result is subtracted from pi. */ 1442 adj_exp = pi_exp; 1443 adj_sig0 = pi_sig_high; 1444 adj_sig1 = pi_sig_low; 1445 adj_sub = true; 1446 } else { 1447 /* The result is used as-is. */ 1448 adj_exp = 0; 1449 adj_sig0 = 0; 1450 adj_sig1 = 0; 1451 adj_sub = false; 1452 } 1453 } else { 1454 /* Work with abs(ST0) / abs(ST1). */ 1455 num_exp = arg0_exp; 1456 num_sig = arg0_sig; 1457 den_exp = arg1_exp; 1458 den_sig = arg1_sig; 1459 /* The result is added to or subtracted from pi/2. */ 1460 adj_exp = pi_2_exp; 1461 adj_sig0 = pi_2_sig_high; 1462 adj_sig1 = pi_2_sig_low; 1463 adj_sub = !arg0_sign; 1464 } 1465 1466 /* 1467 * Compute x = num/den, where 0 < x <= 1 and x is not too 1468 * small. 1469 */ 1470 xexp = num_exp - den_exp + 0x3ffe; 1471 remsig0 = num_sig; 1472 remsig1 = 0; 1473 if (den_sig <= remsig0) { 1474 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1475 ++xexp; 1476 } 1477 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); 1478 mul64To128(den_sig, xsig0, &msig0, &msig1); 1479 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); 1480 while ((int64_t) remsig0 < 0) { 1481 --xsig0; 1482 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); 1483 } 1484 xsig1 = estimateDiv128To64(remsig1, 0, den_sig); 1485 /* 1486 * No need to correct any estimation error in xsig1; even 1487 * with such error, it is accurate enough. 1488 */ 1489 1490 /* 1491 * Split x as x = t + y, where t = n/8 is the nearest 1492 * multiple of 1/8 to x. 1493 */ 1494 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 1495 false, xexp + 3, xsig0, 1496 xsig1, &env->fp_status); 1497 n = floatx80_to_int32(x8, &env->fp_status); 1498 if (n == 0) { 1499 ysign = false; 1500 yexp = xexp; 1501 ysig0 = xsig0; 1502 ysig1 = xsig1; 1503 texp = 0; 1504 tsig = 0; 1505 } else { 1506 int shift = clz32(n) + 32; 1507 texp = 0x403b - shift; 1508 tsig = n; 1509 tsig <<= shift; 1510 if (texp == xexp) { 1511 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); 1512 if ((int64_t) ysig0 >= 0) { 1513 ysign = false; 1514 if (ysig0 == 0) { 1515 if (ysig1 == 0) { 1516 yexp = 0; 1517 } else { 1518 shift = clz64(ysig1) + 64; 1519 yexp = xexp - shift; 1520 shift128Left(ysig0, ysig1, shift, 1521 &ysig0, &ysig1); 1522 } 1523 } else { 1524 shift = clz64(ysig0); 1525 yexp = xexp - shift; 1526 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1527 } 1528 } else { 1529 ysign = true; 1530 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); 1531 if (ysig0 == 0) { 1532 shift = clz64(ysig1) + 64; 1533 } else { 1534 shift = clz64(ysig0); 1535 } 1536 yexp = xexp - shift; 1537 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1538 } 1539 } else { 1540 /* 1541 * t's exponent must be greater than x's because t 1542 * is positive and the nearest multiple of 1/8 to 1543 * x, and if x has a greater exponent, the power 1544 * of 2 with that exponent is also a multiple of 1545 * 1/8. 1546 */ 1547 uint64_t usig0, usig1; 1548 shift128RightJamming(xsig0, xsig1, texp - xexp, 1549 &usig0, &usig1); 1550 ysign = true; 1551 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); 1552 if (ysig0 == 0) { 1553 shift = clz64(ysig1) + 64; 1554 } else { 1555 shift = clz64(ysig0); 1556 } 1557 yexp = texp - shift; 1558 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); 1559 } 1560 } 1561 1562 /* 1563 * Compute z = y/(1+tx), so arctan(x) = arctan(t) + 1564 * arctan(z). 1565 */ 1566 zsign = ysign; 1567 if (texp == 0 || yexp == 0) { 1568 zexp = yexp; 1569 zsig0 = ysig0; 1570 zsig1 = ysig1; 1571 } else { 1572 /* 1573 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. 1574 */ 1575 int32_t dexp = texp + xexp - 0x3ffe; 1576 uint64_t dsig0, dsig1, dsig2; 1577 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); 1578 /* 1579 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 1580 * bit). Add 1 to produce the denominator 1+tx. 1581 */ 1582 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, 1583 &dsig0, &dsig1); 1584 dsig0 |= 0x8000000000000000ULL; 1585 zexp = yexp - 1; 1586 remsig0 = ysig0; 1587 remsig1 = ysig1; 1588 remsig2 = 0; 1589 if (dsig0 <= remsig0) { 1590 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); 1591 ++zexp; 1592 } 1593 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); 1594 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); 1595 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, 1596 &remsig0, &remsig1, &remsig2); 1597 while ((int64_t) remsig0 < 0) { 1598 --zsig0; 1599 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, 1600 &remsig0, &remsig1, &remsig2); 1601 } 1602 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); 1603 /* No need to correct any estimation error in zsig1. */ 1604 } 1605 1606 if (zexp == 0) { 1607 azexp = 0; 1608 azsig0 = 0; 1609 azsig1 = 0; 1610 } else { 1611 floatx80 z2, accum; 1612 uint64_t z2sig0, z2sig1, z2sig2, z2sig3; 1613 /* Compute z^2. */ 1614 mul128To256(zsig0, zsig1, zsig0, zsig1, 1615 &z2sig0, &z2sig1, &z2sig2, &z2sig3); 1616 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1617 zexp + zexp - 0x3ffe, 1618 z2sig0, z2sig1, 1619 &env->fp_status); 1620 1621 /* Compute the lower parts of the polynomial expansion. */ 1622 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); 1623 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); 1624 accum = floatx80_mul(accum, z2, &env->fp_status); 1625 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); 1626 accum = floatx80_mul(accum, z2, &env->fp_status); 1627 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); 1628 accum = floatx80_mul(accum, z2, &env->fp_status); 1629 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); 1630 accum = floatx80_mul(accum, z2, &env->fp_status); 1631 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); 1632 accum = floatx80_mul(accum, z2, &env->fp_status); 1633 1634 /* 1635 * The full polynomial expansion is z*(fpatan_coeff_0 + accum). 1636 * fpatan_coeff_0 is 1, and accum is negative and much smaller. 1637 */ 1638 aexp = extractFloatx80Exp(fpatan_coeff_0); 1639 shift128RightJamming(extractFloatx80Frac(accum), 0, 1640 aexp - extractFloatx80Exp(accum), 1641 &asig0, &asig1); 1642 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, 1643 &asig0, &asig1); 1644 /* Multiply by z to compute arctan(z). */ 1645 azexp = aexp + zexp - 0x3ffe; 1646 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, 1647 &azsig2, &azsig3); 1648 } 1649 1650 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ 1651 if (texp == 0) { 1652 /* z is positive. */ 1653 axexp = azexp; 1654 axsig0 = azsig0; 1655 axsig1 = azsig1; 1656 } else { 1657 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); 1658 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); 1659 uint64_t low_sig0 = 1660 extractFloatx80Frac(fpatan_table[n].atan_low); 1661 uint64_t low_sig1 = 0; 1662 axexp = extractFloatx80Exp(fpatan_table[n].atan_high); 1663 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); 1664 axsig1 = 0; 1665 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, 1666 &low_sig0, &low_sig1); 1667 if (low_sign) { 1668 sub128(axsig0, axsig1, low_sig0, low_sig1, 1669 &axsig0, &axsig1); 1670 } else { 1671 add128(axsig0, axsig1, low_sig0, low_sig1, 1672 &axsig0, &axsig1); 1673 } 1674 if (azexp >= axexp) { 1675 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, 1676 &axsig0, &axsig1); 1677 axexp = azexp + 1; 1678 shift128RightJamming(azsig0, azsig1, 1, 1679 &azsig0, &azsig1); 1680 } else { 1681 shift128RightJamming(axsig0, axsig1, 1, 1682 &axsig0, &axsig1); 1683 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, 1684 &azsig0, &azsig1); 1685 ++axexp; 1686 } 1687 if (zsign) { 1688 sub128(axsig0, axsig1, azsig0, azsig1, 1689 &axsig0, &axsig1); 1690 } else { 1691 add128(axsig0, axsig1, azsig0, azsig1, 1692 &axsig0, &axsig1); 1693 } 1694 } 1695 1696 if (adj_exp == 0) { 1697 rexp = axexp; 1698 rsig0 = axsig0; 1699 rsig1 = axsig1; 1700 } else { 1701 /* 1702 * Add or subtract arctan(x) (exponent axexp, 1703 * significand axsig0 and axsig1, positive, not 1704 * necessarily normalized) to the number given by 1705 * adj_exp, adj_sig0 and adj_sig1, according to 1706 * adj_sub. 1707 */ 1708 if (adj_exp >= axexp) { 1709 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, 1710 &axsig0, &axsig1); 1711 rexp = adj_exp + 1; 1712 shift128RightJamming(adj_sig0, adj_sig1, 1, 1713 &adj_sig0, &adj_sig1); 1714 } else { 1715 shift128RightJamming(axsig0, axsig1, 1, 1716 &axsig0, &axsig1); 1717 shift128RightJamming(adj_sig0, adj_sig1, 1718 axexp - adj_exp + 1, 1719 &adj_sig0, &adj_sig1); 1720 rexp = axexp + 1; 1721 } 1722 if (adj_sub) { 1723 sub128(adj_sig0, adj_sig1, axsig0, axsig1, 1724 &rsig0, &rsig1); 1725 } else { 1726 add128(adj_sig0, adj_sig1, axsig0, axsig1, 1727 &rsig0, &rsig1); 1728 } 1729 } 1730 1731 env->fp_status.float_rounding_mode = save_mode; 1732 env->fp_status.floatx80_rounding_precision = save_prec; 1733 } 1734 /* This result is inexact. */ 1735 rsig1 |= 1; 1736 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, 1737 rsig0, rsig1, &env->fp_status); 1738 } 1739 1740 fpop(env); 1741 merge_exception_flags(env, old_flags); 1742 } 1743 1744 void helper_fxtract(CPUX86State *env) 1745 { 1746 uint8_t old_flags = save_exception_flags(env); 1747 CPU_LDoubleU temp; 1748 1749 temp.d = ST0; 1750 1751 if (floatx80_is_zero(ST0)) { 1752 /* Easy way to generate -inf and raising division by 0 exception */ 1753 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, 1754 &env->fp_status); 1755 fpush(env); 1756 ST0 = temp.d; 1757 } else if (floatx80_invalid_encoding(ST0)) { 1758 float_raise(float_flag_invalid, &env->fp_status); 1759 ST0 = floatx80_default_nan(&env->fp_status); 1760 fpush(env); 1761 ST0 = ST1; 1762 } else if (floatx80_is_any_nan(ST0)) { 1763 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1764 float_raise(float_flag_invalid, &env->fp_status); 1765 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 1766 } 1767 fpush(env); 1768 ST0 = ST1; 1769 } else if (floatx80_is_infinity(ST0)) { 1770 fpush(env); 1771 ST0 = ST1; 1772 ST1 = floatx80_infinity; 1773 } else { 1774 int expdif; 1775 1776 if (EXPD(temp) == 0) { 1777 int shift = clz64(temp.l.lower); 1778 temp.l.lower <<= shift; 1779 expdif = 1 - EXPBIAS - shift; 1780 float_raise(float_flag_input_denormal, &env->fp_status); 1781 } else { 1782 expdif = EXPD(temp) - EXPBIAS; 1783 } 1784 /* DP exponent bias */ 1785 ST0 = int32_to_floatx80(expdif, &env->fp_status); 1786 fpush(env); 1787 BIASEXPONENT(temp); 1788 ST0 = temp.d; 1789 } 1790 merge_exception_flags(env, old_flags); 1791 } 1792 1793 static void helper_fprem_common(CPUX86State *env, bool mod) 1794 { 1795 uint8_t old_flags = save_exception_flags(env); 1796 uint64_t quotient; 1797 CPU_LDoubleU temp0, temp1; 1798 int exp0, exp1, expdiff; 1799 1800 temp0.d = ST0; 1801 temp1.d = ST1; 1802 exp0 = EXPD(temp0); 1803 exp1 = EXPD(temp1); 1804 1805 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 1806 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 1807 exp0 == 0x7fff || exp1 == 0x7fff || 1808 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { 1809 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1810 } else { 1811 if (exp0 == 0) { 1812 exp0 = 1 - clz64(temp0.l.lower); 1813 } 1814 if (exp1 == 0) { 1815 exp1 = 1 - clz64(temp1.l.lower); 1816 } 1817 expdiff = exp0 - exp1; 1818 if (expdiff < 64) { 1819 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); 1820 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ 1821 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ 1822 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ 1823 } else { 1824 /* 1825 * Partial remainder. This choice of how many bits to 1826 * process at once is specified in AMD instruction set 1827 * manuals, and empirically is followed by Intel 1828 * processors as well; it ensures that the final remainder 1829 * operation in a loop does produce the correct low three 1830 * bits of the quotient. AMD manuals specify that the 1831 * flags other than C2 are cleared, and empirically Intel 1832 * processors clear them as well. 1833 */ 1834 int n = 32 + (expdiff % 32); 1835 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); 1836 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); 1837 env->fpus |= 0x400; /* C2 <-- 1 */ 1838 } 1839 } 1840 merge_exception_flags(env, old_flags); 1841 } 1842 1843 void helper_fprem1(CPUX86State *env) 1844 { 1845 helper_fprem_common(env, false); 1846 } 1847 1848 void helper_fprem(CPUX86State *env) 1849 { 1850 helper_fprem_common(env, true); 1851 } 1852 1853 /* 128-bit significand of log2(e). */ 1854 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL 1855 #define log2_e_sig_low 0xbe87fed0691d3e89ULL 1856 1857 /* 1858 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), 1859 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, 1860 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the 1861 * interval [sqrt(2)/2, sqrt(2)]. 1862 */ 1863 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) 1864 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) 1865 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) 1866 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) 1867 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) 1868 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) 1869 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) 1870 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) 1871 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) 1872 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) 1873 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) 1874 1875 /* 1876 * Compute an approximation of log2(1+arg), where 1+arg is in the 1877 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this 1878 * function is called, rounding precision is set to 80 and the 1879 * round-to-nearest mode is in effect. arg must not be exactly zero, 1880 * and must not be so close to zero that underflow might occur. 1881 */ 1882 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, 1883 uint64_t *sig0, uint64_t *sig1) 1884 { 1885 uint64_t arg0_sig = extractFloatx80Frac(arg); 1886 int32_t arg0_exp = extractFloatx80Exp(arg); 1887 bool arg0_sign = extractFloatx80Sign(arg); 1888 bool asign; 1889 int32_t dexp, texp, aexp; 1890 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; 1891 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; 1892 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; 1893 floatx80 t2, accum; 1894 1895 /* 1896 * Compute an approximation of arg/(2+arg), with extra precision, 1897 * as the argument to a polynomial approximation. The extra 1898 * precision is only needed for the first term of the 1899 * approximation, with subsequent terms being significantly 1900 * smaller; the approximation only uses odd exponents, and the 1901 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... 1902 */ 1903 if (arg0_sign) { 1904 dexp = 0x3fff; 1905 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1906 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); 1907 } else { 1908 dexp = 0x4000; 1909 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); 1910 dsig0 |= 0x8000000000000000ULL; 1911 } 1912 texp = arg0_exp - dexp + 0x3ffe; 1913 rsig0 = arg0_sig; 1914 rsig1 = 0; 1915 rsig2 = 0; 1916 if (dsig0 <= rsig0) { 1917 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); 1918 ++texp; 1919 } 1920 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); 1921 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); 1922 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, 1923 &rsig0, &rsig1, &rsig2); 1924 while ((int64_t) rsig0 < 0) { 1925 --tsig0; 1926 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, 1927 &rsig0, &rsig1, &rsig2); 1928 } 1929 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); 1930 /* 1931 * No need to correct any estimation error in tsig1; even with 1932 * such error, it is accurate enough. Now compute the square of 1933 * that approximation. 1934 */ 1935 mul128To256(tsig0, tsig1, tsig0, tsig1, 1936 &t2sig0, &t2sig1, &t2sig2, &t2sig3); 1937 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, 1938 texp + texp - 0x3ffe, 1939 t2sig0, t2sig1, &env->fp_status); 1940 1941 /* Compute the lower parts of the polynomial expansion. */ 1942 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); 1943 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); 1944 accum = floatx80_mul(accum, t2, &env->fp_status); 1945 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); 1946 accum = floatx80_mul(accum, t2, &env->fp_status); 1947 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); 1948 accum = floatx80_mul(accum, t2, &env->fp_status); 1949 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); 1950 accum = floatx80_mul(accum, t2, &env->fp_status); 1951 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); 1952 accum = floatx80_mul(accum, t2, &env->fp_status); 1953 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); 1954 accum = floatx80_mul(accum, t2, &env->fp_status); 1955 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); 1956 accum = floatx80_mul(accum, t2, &env->fp_status); 1957 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); 1958 accum = floatx80_mul(accum, t2, &env->fp_status); 1959 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); 1960 1961 /* 1962 * The full polynomial expansion is fyl2x_coeff_0 + accum (where 1963 * accum has much lower magnitude, and so, in particular, carry 1964 * out of the addition is not possible), multiplied by t. (This 1965 * expansion is only accurate to about 70 bits, not 128 bits.) 1966 */ 1967 aexp = extractFloatx80Exp(fyl2x_coeff_0); 1968 asign = extractFloatx80Sign(fyl2x_coeff_0); 1969 shift128RightJamming(extractFloatx80Frac(accum), 0, 1970 aexp - extractFloatx80Exp(accum), 1971 &asig0, &asig1); 1972 bsig0 = extractFloatx80Frac(fyl2x_coeff_0); 1973 bsig1 = 0; 1974 if (asign == extractFloatx80Sign(accum)) { 1975 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1976 } else { 1977 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); 1978 } 1979 /* Multiply by t to compute the required result. */ 1980 mul128To256(asig0, asig1, tsig0, tsig1, 1981 &asig0, &asig1, &asig2, &asig3); 1982 aexp += texp - 0x3ffe; 1983 *exp = aexp; 1984 *sig0 = asig0; 1985 *sig1 = asig1; 1986 } 1987 1988 void helper_fyl2xp1(CPUX86State *env) 1989 { 1990 uint8_t old_flags = save_exception_flags(env); 1991 uint64_t arg0_sig = extractFloatx80Frac(ST0); 1992 int32_t arg0_exp = extractFloatx80Exp(ST0); 1993 bool arg0_sign = extractFloatx80Sign(ST0); 1994 uint64_t arg1_sig = extractFloatx80Frac(ST1); 1995 int32_t arg1_exp = extractFloatx80Exp(ST1); 1996 bool arg1_sign = extractFloatx80Sign(ST1); 1997 1998 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 1999 float_raise(float_flag_invalid, &env->fp_status); 2000 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2001 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2002 float_raise(float_flag_invalid, &env->fp_status); 2003 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2004 } else if (floatx80_invalid_encoding(ST0) || 2005 floatx80_invalid_encoding(ST1)) { 2006 float_raise(float_flag_invalid, &env->fp_status); 2007 ST1 = floatx80_default_nan(&env->fp_status); 2008 } else if (floatx80_is_any_nan(ST0)) { 2009 ST1 = ST0; 2010 } else if (floatx80_is_any_nan(ST1)) { 2011 /* Pass this NaN through. */ 2012 } else if (arg0_exp > 0x3ffd || 2013 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? 2014 0x95f619980c4336f7ULL : 2015 0xd413cccfe7799211ULL))) { 2016 /* 2017 * Out of range for the instruction (ST0 must have absolute 2018 * value less than 1 - sqrt(2)/2 = 0.292..., according to 2019 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 2020 * to sqrt(2) - 1, which we allow here), treat as invalid. 2021 */ 2022 float_raise(float_flag_invalid, &env->fp_status); 2023 ST1 = floatx80_default_nan(&env->fp_status); 2024 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || 2025 arg1_exp == 0x7fff) { 2026 /* 2027 * One argument is zero, or multiplying by infinity; correct 2028 * result is exact and can be obtained by multiplying the 2029 * arguments. 2030 */ 2031 ST1 = floatx80_mul(ST0, ST1, &env->fp_status); 2032 } else if (arg0_exp < 0x3fb0) { 2033 /* 2034 * Multiplying both arguments and an extra-precision version 2035 * of log2(e) is sufficiently precise. 2036 */ 2037 uint64_t sig0, sig1, sig2; 2038 int32_t exp; 2039 if (arg0_exp == 0) { 2040 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2041 } 2042 if (arg1_exp == 0) { 2043 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2044 } 2045 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, 2046 &sig0, &sig1, &sig2); 2047 exp = arg0_exp + 1; 2048 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); 2049 exp += arg1_exp - 0x3ffe; 2050 /* This result is inexact. */ 2051 sig1 |= 1; 2052 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2053 arg0_sign ^ arg1_sign, exp, 2054 sig0, sig1, &env->fp_status); 2055 } else { 2056 int32_t aexp; 2057 uint64_t asig0, asig1, asig2; 2058 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2059 FloatX80RoundPrec save_prec = 2060 env->fp_status.floatx80_rounding_precision; 2061 env->fp_status.float_rounding_mode = float_round_nearest_even; 2062 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2063 2064 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); 2065 /* 2066 * Multiply by the second argument to compute the required 2067 * result. 2068 */ 2069 if (arg1_exp == 0) { 2070 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2071 } 2072 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2073 aexp += arg1_exp - 0x3ffe; 2074 /* This result is inexact. */ 2075 asig1 |= 1; 2076 env->fp_status.float_rounding_mode = save_mode; 2077 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2078 arg0_sign ^ arg1_sign, aexp, 2079 asig0, asig1, &env->fp_status); 2080 env->fp_status.floatx80_rounding_precision = save_prec; 2081 } 2082 fpop(env); 2083 merge_exception_flags(env, old_flags); 2084 } 2085 2086 void helper_fyl2x(CPUX86State *env) 2087 { 2088 uint8_t old_flags = save_exception_flags(env); 2089 uint64_t arg0_sig = extractFloatx80Frac(ST0); 2090 int32_t arg0_exp = extractFloatx80Exp(ST0); 2091 bool arg0_sign = extractFloatx80Sign(ST0); 2092 uint64_t arg1_sig = extractFloatx80Frac(ST1); 2093 int32_t arg1_exp = extractFloatx80Exp(ST1); 2094 bool arg1_sign = extractFloatx80Sign(ST1); 2095 2096 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2097 float_raise(float_flag_invalid, &env->fp_status); 2098 ST1 = floatx80_silence_nan(ST0, &env->fp_status); 2099 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { 2100 float_raise(float_flag_invalid, &env->fp_status); 2101 ST1 = floatx80_silence_nan(ST1, &env->fp_status); 2102 } else if (floatx80_invalid_encoding(ST0) || 2103 floatx80_invalid_encoding(ST1)) { 2104 float_raise(float_flag_invalid, &env->fp_status); 2105 ST1 = floatx80_default_nan(&env->fp_status); 2106 } else if (floatx80_is_any_nan(ST0)) { 2107 ST1 = ST0; 2108 } else if (floatx80_is_any_nan(ST1)) { 2109 /* Pass this NaN through. */ 2110 } else if (arg0_sign && !floatx80_is_zero(ST0)) { 2111 float_raise(float_flag_invalid, &env->fp_status); 2112 ST1 = floatx80_default_nan(&env->fp_status); 2113 } else if (floatx80_is_infinity(ST1)) { 2114 FloatRelation cmp = floatx80_compare(ST0, floatx80_one, 2115 &env->fp_status); 2116 switch (cmp) { 2117 case float_relation_less: 2118 ST1 = floatx80_chs(ST1); 2119 break; 2120 case float_relation_greater: 2121 /* Result is infinity of the same sign as ST1. */ 2122 break; 2123 default: 2124 float_raise(float_flag_invalid, &env->fp_status); 2125 ST1 = floatx80_default_nan(&env->fp_status); 2126 break; 2127 } 2128 } else if (floatx80_is_infinity(ST0)) { 2129 if (floatx80_is_zero(ST1)) { 2130 float_raise(float_flag_invalid, &env->fp_status); 2131 ST1 = floatx80_default_nan(&env->fp_status); 2132 } else if (arg1_sign) { 2133 ST1 = floatx80_chs(ST0); 2134 } else { 2135 ST1 = ST0; 2136 } 2137 } else if (floatx80_is_zero(ST0)) { 2138 if (floatx80_is_zero(ST1)) { 2139 float_raise(float_flag_invalid, &env->fp_status); 2140 ST1 = floatx80_default_nan(&env->fp_status); 2141 } else { 2142 /* Result is infinity with opposite sign to ST1. */ 2143 float_raise(float_flag_divbyzero, &env->fp_status); 2144 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, 2145 0x8000000000000000ULL); 2146 } 2147 } else if (floatx80_is_zero(ST1)) { 2148 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { 2149 ST1 = floatx80_chs(ST1); 2150 } 2151 /* Otherwise, ST1 is already the correct result. */ 2152 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { 2153 if (arg1_sign) { 2154 ST1 = floatx80_chs(floatx80_zero); 2155 } else { 2156 ST1 = floatx80_zero; 2157 } 2158 } else { 2159 int32_t int_exp; 2160 floatx80 arg0_m1; 2161 FloatRoundMode save_mode = env->fp_status.float_rounding_mode; 2162 FloatX80RoundPrec save_prec = 2163 env->fp_status.floatx80_rounding_precision; 2164 env->fp_status.float_rounding_mode = float_round_nearest_even; 2165 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2166 2167 if (arg0_exp == 0) { 2168 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); 2169 } 2170 if (arg1_exp == 0) { 2171 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2172 } 2173 int_exp = arg0_exp - 0x3fff; 2174 if (arg0_sig > 0xb504f333f9de6484ULL) { 2175 ++int_exp; 2176 } 2177 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, 2178 &env->fp_status), 2179 floatx80_one, &env->fp_status); 2180 if (floatx80_is_zero(arg0_m1)) { 2181 /* Exact power of 2; multiply by ST1. */ 2182 env->fp_status.float_rounding_mode = save_mode; 2183 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), 2184 ST1, &env->fp_status); 2185 } else { 2186 bool asign = extractFloatx80Sign(arg0_m1); 2187 int32_t aexp; 2188 uint64_t asig0, asig1, asig2; 2189 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); 2190 if (int_exp != 0) { 2191 bool isign = (int_exp < 0); 2192 int32_t iexp; 2193 uint64_t isig; 2194 int shift; 2195 int_exp = isign ? -int_exp : int_exp; 2196 shift = clz32(int_exp) + 32; 2197 isig = int_exp; 2198 isig <<= shift; 2199 iexp = 0x403e - shift; 2200 shift128RightJamming(asig0, asig1, iexp - aexp, 2201 &asig0, &asig1); 2202 if (asign == isign) { 2203 add128(isig, 0, asig0, asig1, &asig0, &asig1); 2204 } else { 2205 sub128(isig, 0, asig0, asig1, &asig0, &asig1); 2206 } 2207 aexp = iexp; 2208 asign = isign; 2209 } 2210 /* 2211 * Multiply by the second argument to compute the required 2212 * result. 2213 */ 2214 if (arg1_exp == 0) { 2215 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); 2216 } 2217 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); 2218 aexp += arg1_exp - 0x3ffe; 2219 /* This result is inexact. */ 2220 asig1 |= 1; 2221 env->fp_status.float_rounding_mode = save_mode; 2222 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, 2223 asign ^ arg1_sign, aexp, 2224 asig0, asig1, &env->fp_status); 2225 } 2226 2227 env->fp_status.floatx80_rounding_precision = save_prec; 2228 } 2229 fpop(env); 2230 merge_exception_flags(env, old_flags); 2231 } 2232 2233 void helper_fsqrt(CPUX86State *env) 2234 { 2235 uint8_t old_flags = save_exception_flags(env); 2236 if (floatx80_is_neg(ST0)) { 2237 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2238 env->fpus |= 0x400; 2239 } 2240 ST0 = floatx80_sqrt(ST0, &env->fp_status); 2241 merge_exception_flags(env, old_flags); 2242 } 2243 2244 void helper_fsincos(CPUX86State *env) 2245 { 2246 double fptemp = floatx80_to_double(env, ST0); 2247 2248 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2249 env->fpus |= 0x400; 2250 } else { 2251 ST0 = double_to_floatx80(env, sin(fptemp)); 2252 fpush(env); 2253 ST0 = double_to_floatx80(env, cos(fptemp)); 2254 env->fpus &= ~0x400; /* C2 <-- 0 */ 2255 /* the above code is for |arg| < 2**63 only */ 2256 } 2257 } 2258 2259 void helper_frndint(CPUX86State *env) 2260 { 2261 uint8_t old_flags = save_exception_flags(env); 2262 ST0 = floatx80_round_to_int(ST0, &env->fp_status); 2263 merge_exception_flags(env, old_flags); 2264 } 2265 2266 void helper_fscale(CPUX86State *env) 2267 { 2268 uint8_t old_flags = save_exception_flags(env); 2269 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { 2270 float_raise(float_flag_invalid, &env->fp_status); 2271 ST0 = floatx80_default_nan(&env->fp_status); 2272 } else if (floatx80_is_any_nan(ST1)) { 2273 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2274 float_raise(float_flag_invalid, &env->fp_status); 2275 } 2276 ST0 = ST1; 2277 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { 2278 float_raise(float_flag_invalid, &env->fp_status); 2279 ST0 = floatx80_silence_nan(ST0, &env->fp_status); 2280 } 2281 } else if (floatx80_is_infinity(ST1) && 2282 !floatx80_invalid_encoding(ST0) && 2283 !floatx80_is_any_nan(ST0)) { 2284 if (floatx80_is_neg(ST1)) { 2285 if (floatx80_is_infinity(ST0)) { 2286 float_raise(float_flag_invalid, &env->fp_status); 2287 ST0 = floatx80_default_nan(&env->fp_status); 2288 } else { 2289 ST0 = (floatx80_is_neg(ST0) ? 2290 floatx80_chs(floatx80_zero) : 2291 floatx80_zero); 2292 } 2293 } else { 2294 if (floatx80_is_zero(ST0)) { 2295 float_raise(float_flag_invalid, &env->fp_status); 2296 ST0 = floatx80_default_nan(&env->fp_status); 2297 } else { 2298 ST0 = (floatx80_is_neg(ST0) ? 2299 floatx80_chs(floatx80_infinity) : 2300 floatx80_infinity); 2301 } 2302 } 2303 } else { 2304 int n; 2305 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; 2306 uint8_t save_flags = get_float_exception_flags(&env->fp_status); 2307 set_float_exception_flags(0, &env->fp_status); 2308 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); 2309 set_float_exception_flags(save_flags, &env->fp_status); 2310 env->fp_status.floatx80_rounding_precision = floatx80_precision_x; 2311 ST0 = floatx80_scalbn(ST0, n, &env->fp_status); 2312 env->fp_status.floatx80_rounding_precision = save; 2313 } 2314 merge_exception_flags(env, old_flags); 2315 } 2316 2317 void helper_fsin(CPUX86State *env) 2318 { 2319 double fptemp = floatx80_to_double(env, ST0); 2320 2321 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2322 env->fpus |= 0x400; 2323 } else { 2324 ST0 = double_to_floatx80(env, sin(fptemp)); 2325 env->fpus &= ~0x400; /* C2 <-- 0 */ 2326 /* the above code is for |arg| < 2**53 only */ 2327 } 2328 } 2329 2330 void helper_fcos(CPUX86State *env) 2331 { 2332 double fptemp = floatx80_to_double(env, ST0); 2333 2334 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { 2335 env->fpus |= 0x400; 2336 } else { 2337 ST0 = double_to_floatx80(env, cos(fptemp)); 2338 env->fpus &= ~0x400; /* C2 <-- 0 */ 2339 /* the above code is for |arg| < 2**63 only */ 2340 } 2341 } 2342 2343 void helper_fxam_ST0(CPUX86State *env) 2344 { 2345 CPU_LDoubleU temp; 2346 int expdif; 2347 2348 temp.d = ST0; 2349 2350 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ 2351 if (SIGND(temp)) { 2352 env->fpus |= 0x200; /* C1 <-- 1 */ 2353 } 2354 2355 if (env->fptags[env->fpstt]) { 2356 env->fpus |= 0x4100; /* Empty */ 2357 return; 2358 } 2359 2360 expdif = EXPD(temp); 2361 if (expdif == MAXEXPD) { 2362 if (MANTD(temp) == 0x8000000000000000ULL) { 2363 env->fpus |= 0x500; /* Infinity */ 2364 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2365 env->fpus |= 0x100; /* NaN */ 2366 } 2367 } else if (expdif == 0) { 2368 if (MANTD(temp) == 0) { 2369 env->fpus |= 0x4000; /* Zero */ 2370 } else { 2371 env->fpus |= 0x4400; /* Denormal */ 2372 } 2373 } else if (MANTD(temp) & 0x8000000000000000ULL) { 2374 env->fpus |= 0x400; 2375 } 2376 } 2377 2378 static void do_fstenv(X86Access *ac, target_ulong ptr, int data32) 2379 { 2380 CPUX86State *env = ac->env; 2381 int fpus, fptag, exp, i; 2382 uint64_t mant; 2383 CPU_LDoubleU tmp; 2384 2385 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2386 fptag = 0; 2387 for (i = 7; i >= 0; i--) { 2388 fptag <<= 2; 2389 if (env->fptags[i]) { 2390 fptag |= 3; 2391 } else { 2392 tmp.d = env->fpregs[i].d; 2393 exp = EXPD(tmp); 2394 mant = MANTD(tmp); 2395 if (exp == 0 && mant == 0) { 2396 /* zero */ 2397 fptag |= 1; 2398 } else if (exp == 0 || exp == MAXEXPD 2399 || (mant & (1LL << 63)) == 0) { 2400 /* NaNs, infinity, denormal */ 2401 fptag |= 2; 2402 } 2403 } 2404 } 2405 if (data32) { 2406 /* 32 bit */ 2407 access_stl(ac, ptr, env->fpuc); 2408 access_stl(ac, ptr + 4, fpus); 2409 access_stl(ac, ptr + 8, fptag); 2410 access_stl(ac, ptr + 12, env->fpip); /* fpip */ 2411 access_stl(ac, ptr + 16, env->fpcs); /* fpcs */ 2412 access_stl(ac, ptr + 20, env->fpdp); /* fpoo */ 2413 access_stl(ac, ptr + 24, env->fpds); /* fpos */ 2414 } else { 2415 /* 16 bit */ 2416 access_stw(ac, ptr, env->fpuc); 2417 access_stw(ac, ptr + 2, fpus); 2418 access_stw(ac, ptr + 4, fptag); 2419 access_stw(ac, ptr + 6, env->fpip); 2420 access_stw(ac, ptr + 8, env->fpcs); 2421 access_stw(ac, ptr + 10, env->fpdp); 2422 access_stw(ac, ptr + 12, env->fpds); 2423 } 2424 } 2425 2426 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) 2427 { 2428 X86Access ac; 2429 2430 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); 2431 do_fstenv(&ac, ptr, data32); 2432 } 2433 2434 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) 2435 { 2436 env->fpstt = (fpus >> 11) & 7; 2437 env->fpus = fpus & ~0x3800 & ~FPUS_B; 2438 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; 2439 #if !defined(CONFIG_USER_ONLY) 2440 if (!(env->fpus & FPUS_SE)) { 2441 /* 2442 * Here the processor deasserts FERR#; in response, the chipset deasserts 2443 * IGNNE#. 2444 */ 2445 cpu_clear_ignne(); 2446 } 2447 #endif 2448 } 2449 2450 static void do_fldenv(X86Access *ac, target_ulong ptr, int data32) 2451 { 2452 int i, fpus, fptag; 2453 CPUX86State *env = ac->env; 2454 2455 cpu_set_fpuc(env, access_ldw(ac, ptr)); 2456 fpus = access_ldw(ac, ptr + (2 << data32)); 2457 fptag = access_ldw(ac, ptr + (4 << data32)); 2458 2459 cpu_set_fpus(env, fpus); 2460 for (i = 0; i < 8; i++) { 2461 env->fptags[i] = ((fptag & 3) == 3); 2462 fptag >>= 2; 2463 } 2464 } 2465 2466 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) 2467 { 2468 X86Access ac; 2469 2470 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); 2471 do_fldenv(&ac, ptr, data32); 2472 } 2473 2474 static void do_fsave(X86Access *ac, target_ulong ptr, int data32) 2475 { 2476 CPUX86State *env = ac->env; 2477 2478 do_fstenv(ac, ptr, data32); 2479 ptr += 14 << data32; 2480 2481 for (int i = 0; i < 8; i++) { 2482 floatx80 tmp = ST(i); 2483 do_fstt(ac, ptr, tmp); 2484 ptr += 10; 2485 } 2486 2487 do_fninit(env); 2488 } 2489 2490 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) 2491 { 2492 int size = (14 << data32) + 80; 2493 X86Access ac; 2494 2495 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC()); 2496 do_fsave(&ac, ptr, data32); 2497 } 2498 2499 static void do_frstor(X86Access *ac, target_ulong ptr, int data32) 2500 { 2501 CPUX86State *env = ac->env; 2502 2503 do_fldenv(ac, ptr, data32); 2504 ptr += 14 << data32; 2505 2506 for (int i = 0; i < 8; i++) { 2507 floatx80 tmp = do_fldt(ac, ptr); 2508 ST(i) = tmp; 2509 ptr += 10; 2510 } 2511 } 2512 2513 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) 2514 { 2515 int size = (14 << data32) + 80; 2516 X86Access ac; 2517 2518 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC()); 2519 do_frstor(&ac, ptr, data32); 2520 } 2521 2522 #define XO(X) offsetof(X86XSaveArea, X) 2523 2524 static void do_xsave_fpu(X86Access *ac, target_ulong ptr) 2525 { 2526 CPUX86State *env = ac->env; 2527 int fpus, fptag, i; 2528 target_ulong addr; 2529 2530 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; 2531 fptag = 0; 2532 for (i = 0; i < 8; i++) { 2533 fptag |= (env->fptags[i] << i); 2534 } 2535 2536 access_stw(ac, ptr + XO(legacy.fcw), env->fpuc); 2537 access_stw(ac, ptr + XO(legacy.fsw), fpus); 2538 access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff); 2539 2540 /* In 32-bit mode this is eip, sel, dp, sel. 2541 In 64-bit mode this is rip, rdp. 2542 But in either case we don't write actual data, just zeros. */ 2543 access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */ 2544 access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */ 2545 2546 addr = ptr + XO(legacy.fpregs); 2547 2548 for (i = 0; i < 8; i++) { 2549 floatx80 tmp = ST(i); 2550 do_fstt(ac, addr, tmp); 2551 addr += 16; 2552 } 2553 } 2554 2555 static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr) 2556 { 2557 CPUX86State *env = ac->env; 2558 2559 update_mxcsr_from_sse_status(env); 2560 access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr); 2561 access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff); 2562 } 2563 2564 static void do_xsave_sse(X86Access *ac, target_ulong ptr) 2565 { 2566 CPUX86State *env = ac->env; 2567 int i, nb_xmm_regs; 2568 target_ulong addr; 2569 2570 if (env->hflags & HF_CS64_MASK) { 2571 nb_xmm_regs = 16; 2572 } else { 2573 nb_xmm_regs = 8; 2574 } 2575 2576 addr = ptr + XO(legacy.xmm_regs); 2577 for (i = 0; i < nb_xmm_regs; i++) { 2578 access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0)); 2579 access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1)); 2580 addr += 16; 2581 } 2582 } 2583 2584 static void do_xsave_ymmh(X86Access *ac, target_ulong ptr) 2585 { 2586 CPUX86State *env = ac->env; 2587 int i, nb_xmm_regs; 2588 2589 if (env->hflags & HF_CS64_MASK) { 2590 nb_xmm_regs = 16; 2591 } else { 2592 nb_xmm_regs = 8; 2593 } 2594 2595 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2596 access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2)); 2597 access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3)); 2598 } 2599 } 2600 2601 static void do_xsave_bndregs(X86Access *ac, target_ulong ptr) 2602 { 2603 CPUX86State *env = ac->env; 2604 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2605 int i; 2606 2607 for (i = 0; i < 4; i++, addr += 16) { 2608 access_stq(ac, addr, env->bnd_regs[i].lb); 2609 access_stq(ac, addr + 8, env->bnd_regs[i].ub); 2610 } 2611 } 2612 2613 static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr) 2614 { 2615 CPUX86State *env = ac->env; 2616 2617 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), 2618 env->bndcs_regs.cfgu); 2619 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), 2620 env->bndcs_regs.sts); 2621 } 2622 2623 static void do_xsave_pkru(X86Access *ac, target_ulong ptr) 2624 { 2625 access_stq(ac, ptr, ac->env->pkru); 2626 } 2627 2628 static void do_fxsave(X86Access *ac, target_ulong ptr) 2629 { 2630 CPUX86State *env = ac->env; 2631 2632 do_xsave_fpu(ac, ptr); 2633 if (env->cr[4] & CR4_OSFXSR_MASK) { 2634 do_xsave_mxcsr(ac, ptr); 2635 /* Fast FXSAVE leaves out the XMM registers */ 2636 if (!(env->efer & MSR_EFER_FFXSR) 2637 || (env->hflags & HF_CPL_MASK) 2638 || !(env->hflags & HF_LMA_MASK)) { 2639 do_xsave_sse(ac, ptr); 2640 } 2641 } 2642 } 2643 2644 void helper_fxsave(CPUX86State *env, target_ulong ptr) 2645 { 2646 uintptr_t ra = GETPC(); 2647 X86Access ac; 2648 2649 /* The operand must be 16 byte aligned */ 2650 if (ptr & 0xf) { 2651 raise_exception_ra(env, EXCP0D_GPF, ra); 2652 } 2653 2654 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), 2655 MMU_DATA_STORE, ra); 2656 do_fxsave(&ac, ptr); 2657 } 2658 2659 static uint64_t get_xinuse(CPUX86State *env) 2660 { 2661 uint64_t inuse = -1; 2662 2663 /* For the most part, we don't track XINUSE. We could calculate it 2664 here for all components, but it's probably less work to simply 2665 indicate in use. That said, the state of BNDREGS is important 2666 enough to track in HFLAGS, so we might as well use that here. */ 2667 if ((env->hflags & HF_MPX_IU_MASK) == 0) { 2668 inuse &= ~XSTATE_BNDREGS_MASK; 2669 } 2670 return inuse; 2671 } 2672 2673 static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm, 2674 uint64_t inuse, uint64_t opt) 2675 { 2676 uint64_t old_bv, new_bv; 2677 2678 if (opt & XSTATE_FP_MASK) { 2679 do_xsave_fpu(ac, ptr); 2680 } 2681 if (rfbm & XSTATE_SSE_MASK) { 2682 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ 2683 do_xsave_mxcsr(ac, ptr); 2684 } 2685 if (opt & XSTATE_SSE_MASK) { 2686 do_xsave_sse(ac, ptr); 2687 } 2688 if (opt & XSTATE_YMM_MASK) { 2689 do_xsave_ymmh(ac, ptr + XO(avx_state)); 2690 } 2691 if (opt & XSTATE_BNDREGS_MASK) { 2692 do_xsave_bndregs(ac, ptr + XO(bndreg_state)); 2693 } 2694 if (opt & XSTATE_BNDCSR_MASK) { 2695 do_xsave_bndcsr(ac, ptr + XO(bndcsr_state)); 2696 } 2697 if (opt & XSTATE_PKRU_MASK) { 2698 do_xsave_pkru(ac, ptr + XO(pkru_state)); 2699 } 2700 2701 /* Update the XSTATE_BV field. */ 2702 old_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); 2703 new_bv = (old_bv & ~rfbm) | (inuse & rfbm); 2704 access_stq(ac, ptr + XO(header.xstate_bv), new_bv); 2705 } 2706 2707 static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra) 2708 { 2709 /* The OS must have enabled XSAVE. */ 2710 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 2711 raise_exception_ra(env, EXCP06_ILLOP, ra); 2712 } 2713 2714 /* The operand must be 64 byte aligned. */ 2715 if (ptr & 63) { 2716 raise_exception_ra(env, EXCP0D_GPF, ra); 2717 } 2718 } 2719 2720 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, 2721 uint64_t inuse, uint64_t opt, uintptr_t ra) 2722 { 2723 X86Access ac; 2724 unsigned size; 2725 2726 do_xsave_chk(env, ptr, ra); 2727 2728 /* Never save anything not enabled by XCR0. */ 2729 rfbm &= env->xcr0; 2730 opt &= rfbm; 2731 size = xsave_area_size(opt, false); 2732 2733 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra); 2734 do_xsave_access(&ac, ptr, rfbm, inuse, opt); 2735 } 2736 2737 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2738 { 2739 do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC()); 2740 } 2741 2742 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2743 { 2744 uint64_t inuse = get_xinuse(env); 2745 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); 2746 } 2747 2748 static void do_xrstor_fpu(X86Access *ac, target_ulong ptr) 2749 { 2750 CPUX86State *env = ac->env; 2751 int i, fpuc, fpus, fptag; 2752 target_ulong addr; 2753 2754 fpuc = access_ldw(ac, ptr + XO(legacy.fcw)); 2755 fpus = access_ldw(ac, ptr + XO(legacy.fsw)); 2756 fptag = access_ldw(ac, ptr + XO(legacy.ftw)); 2757 cpu_set_fpuc(env, fpuc); 2758 cpu_set_fpus(env, fpus); 2759 2760 fptag ^= 0xff; 2761 for (i = 0; i < 8; i++) { 2762 env->fptags[i] = ((fptag >> i) & 1); 2763 } 2764 2765 addr = ptr + XO(legacy.fpregs); 2766 2767 for (i = 0; i < 8; i++) { 2768 floatx80 tmp = do_fldt(ac, addr); 2769 ST(i) = tmp; 2770 addr += 16; 2771 } 2772 } 2773 2774 static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr) 2775 { 2776 CPUX86State *env = ac->env; 2777 cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr))); 2778 } 2779 2780 static void do_xrstor_sse(X86Access *ac, target_ulong ptr) 2781 { 2782 CPUX86State *env = ac->env; 2783 int i, nb_xmm_regs; 2784 target_ulong addr; 2785 2786 if (env->hflags & HF_CS64_MASK) { 2787 nb_xmm_regs = 16; 2788 } else { 2789 nb_xmm_regs = 8; 2790 } 2791 2792 addr = ptr + XO(legacy.xmm_regs); 2793 for (i = 0; i < nb_xmm_regs; i++) { 2794 env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr); 2795 env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8); 2796 addr += 16; 2797 } 2798 } 2799 2800 static void do_clear_sse(CPUX86State *env) 2801 { 2802 int i, nb_xmm_regs; 2803 2804 if (env->hflags & HF_CS64_MASK) { 2805 nb_xmm_regs = 16; 2806 } else { 2807 nb_xmm_regs = 8; 2808 } 2809 2810 for (i = 0; i < nb_xmm_regs; i++) { 2811 env->xmm_regs[i].ZMM_Q(0) = 0; 2812 env->xmm_regs[i].ZMM_Q(1) = 0; 2813 } 2814 } 2815 2816 static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr) 2817 { 2818 CPUX86State *env = ac->env; 2819 int i, nb_xmm_regs; 2820 2821 if (env->hflags & HF_CS64_MASK) { 2822 nb_xmm_regs = 16; 2823 } else { 2824 nb_xmm_regs = 8; 2825 } 2826 2827 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { 2828 env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr); 2829 env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8); 2830 } 2831 } 2832 2833 static void do_clear_ymmh(CPUX86State *env) 2834 { 2835 int i, nb_xmm_regs; 2836 2837 if (env->hflags & HF_CS64_MASK) { 2838 nb_xmm_regs = 16; 2839 } else { 2840 nb_xmm_regs = 8; 2841 } 2842 2843 for (i = 0; i < nb_xmm_regs; i++) { 2844 env->xmm_regs[i].ZMM_Q(2) = 0; 2845 env->xmm_regs[i].ZMM_Q(3) = 0; 2846 } 2847 } 2848 2849 static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr) 2850 { 2851 CPUX86State *env = ac->env; 2852 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); 2853 int i; 2854 2855 for (i = 0; i < 4; i++, addr += 16) { 2856 env->bnd_regs[i].lb = access_ldq(ac, addr); 2857 env->bnd_regs[i].ub = access_ldq(ac, addr + 8); 2858 } 2859 } 2860 2861 static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr) 2862 { 2863 CPUX86State *env = ac->env; 2864 2865 /* FIXME: Extend highest implemented bit of linear address. */ 2866 env->bndcs_regs.cfgu 2867 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu)); 2868 env->bndcs_regs.sts 2869 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts)); 2870 } 2871 2872 static void do_xrstor_pkru(X86Access *ac, target_ulong ptr) 2873 { 2874 ac->env->pkru = access_ldq(ac, ptr); 2875 } 2876 2877 static void do_fxrstor(X86Access *ac, target_ulong ptr) 2878 { 2879 CPUX86State *env = ac->env; 2880 2881 do_xrstor_fpu(ac, ptr); 2882 if (env->cr[4] & CR4_OSFXSR_MASK) { 2883 do_xrstor_mxcsr(ac, ptr); 2884 /* Fast FXRSTOR leaves out the XMM registers */ 2885 if (!(env->efer & MSR_EFER_FFXSR) 2886 || (env->hflags & HF_CPL_MASK) 2887 || !(env->hflags & HF_LMA_MASK)) { 2888 do_xrstor_sse(ac, ptr); 2889 } 2890 } 2891 } 2892 2893 void helper_fxrstor(CPUX86State *env, target_ulong ptr) 2894 { 2895 uintptr_t ra = GETPC(); 2896 X86Access ac; 2897 2898 /* The operand must be 16 byte aligned */ 2899 if (ptr & 0xf) { 2900 raise_exception_ra(env, EXCP0D_GPF, ra); 2901 } 2902 2903 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), 2904 MMU_DATA_LOAD, ra); 2905 do_fxrstor(&ac, ptr); 2906 } 2907 2908 static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv, 2909 target_ulong ptr) 2910 { 2911 uint64_t xstate_bv, xcomp_bv, reserve0; 2912 2913 xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); 2914 xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv)); 2915 reserve0 = access_ldq(ac, ptr + XO(header.reserve0)); 2916 *pxsbv = xstate_bv; 2917 2918 /* 2919 * XCOMP_BV bit 63 indicates compact form, which we do not support, 2920 * and thus must raise #GP. That leaves us in standard form. 2921 * In standard form, bytes 23:8 must be zero -- which is both 2922 * XCOMP_BV and the following 64-bit field. 2923 */ 2924 if (xcomp_bv || reserve0) { 2925 return false; 2926 } 2927 2928 /* The XSTATE_BV field must not set bits not present in XCR0. */ 2929 return (xstate_bv & ~ac->env->xcr0) == 0; 2930 } 2931 2932 static void do_xrstor(X86Access *ac, target_ulong ptr, 2933 uint64_t rfbm, uint64_t xstate_bv) 2934 { 2935 CPUX86State *env = ac->env; 2936 2937 if (rfbm & XSTATE_FP_MASK) { 2938 if (xstate_bv & XSTATE_FP_MASK) { 2939 do_xrstor_fpu(ac, ptr); 2940 } else { 2941 do_fninit(env); 2942 memset(env->fpregs, 0, sizeof(env->fpregs)); 2943 } 2944 } 2945 if (rfbm & XSTATE_SSE_MASK) { 2946 /* Note that the standard form of XRSTOR loads MXCSR from memory 2947 whether or not the XSTATE_BV bit is set. */ 2948 do_xrstor_mxcsr(ac, ptr); 2949 if (xstate_bv & XSTATE_SSE_MASK) { 2950 do_xrstor_sse(ac, ptr); 2951 } else { 2952 do_clear_sse(env); 2953 } 2954 } 2955 if (rfbm & XSTATE_YMM_MASK) { 2956 if (xstate_bv & XSTATE_YMM_MASK) { 2957 do_xrstor_ymmh(ac, ptr + XO(avx_state)); 2958 } else { 2959 do_clear_ymmh(env); 2960 } 2961 } 2962 if (rfbm & XSTATE_BNDREGS_MASK) { 2963 if (xstate_bv & XSTATE_BNDREGS_MASK) { 2964 do_xrstor_bndregs(ac, ptr + XO(bndreg_state)); 2965 env->hflags |= HF_MPX_IU_MASK; 2966 } else { 2967 memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); 2968 env->hflags &= ~HF_MPX_IU_MASK; 2969 } 2970 } 2971 if (rfbm & XSTATE_BNDCSR_MASK) { 2972 if (xstate_bv & XSTATE_BNDCSR_MASK) { 2973 do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state)); 2974 } else { 2975 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); 2976 } 2977 cpu_sync_bndcs_hflags(env); 2978 } 2979 if (rfbm & XSTATE_PKRU_MASK) { 2980 uint64_t old_pkru = env->pkru; 2981 if (xstate_bv & XSTATE_PKRU_MASK) { 2982 do_xrstor_pkru(ac, ptr + XO(pkru_state)); 2983 } else { 2984 env->pkru = 0; 2985 } 2986 if (env->pkru != old_pkru) { 2987 CPUState *cs = env_cpu(env); 2988 tlb_flush(cs); 2989 } 2990 } 2991 } 2992 2993 #undef XO 2994 2995 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) 2996 { 2997 uintptr_t ra = GETPC(); 2998 X86Access ac; 2999 uint64_t xstate_bv; 3000 unsigned size, size_ext; 3001 3002 do_xsave_chk(env, ptr, ra); 3003 3004 /* Begin with just the minimum size to validate the header. */ 3005 size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); 3006 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra); 3007 if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) { 3008 raise_exception_ra(env, EXCP0D_GPF, ra); 3009 } 3010 3011 rfbm &= env->xcr0; 3012 size_ext = xsave_area_size(rfbm & xstate_bv, false); 3013 if (size < size_ext) { 3014 /* TODO: See if existing page probe has covered extra size. */ 3015 access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra); 3016 } 3017 3018 do_xrstor(&ac, ptr, rfbm, xstate_bv); 3019 } 3020 3021 #if defined(CONFIG_USER_ONLY) 3022 void cpu_x86_fsave(CPUX86State *env, void *host, size_t len) 3023 { 3024 X86Access ac = { 3025 .haddr1 = host, 3026 .size = 4 * 7 + 8 * 10, 3027 .env = env, 3028 }; 3029 3030 assert(ac.size <= len); 3031 do_fsave(&ac, 0, true); 3032 } 3033 3034 void cpu_x86_frstor(CPUX86State *env, void *host, size_t len) 3035 { 3036 X86Access ac = { 3037 .haddr1 = host, 3038 .size = 4 * 7 + 8 * 10, 3039 .env = env, 3040 }; 3041 3042 assert(ac.size <= len); 3043 do_frstor(&ac, 0, true); 3044 } 3045 3046 void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len) 3047 { 3048 X86Access ac = { 3049 .haddr1 = host, 3050 .size = sizeof(X86LegacyXSaveArea), 3051 .env = env, 3052 }; 3053 3054 assert(ac.size <= len); 3055 do_fxsave(&ac, 0); 3056 } 3057 3058 void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len) 3059 { 3060 X86Access ac = { 3061 .haddr1 = host, 3062 .size = sizeof(X86LegacyXSaveArea), 3063 .env = env, 3064 }; 3065 3066 assert(ac.size <= len); 3067 do_fxrstor(&ac, 0); 3068 } 3069 3070 void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm) 3071 { 3072 X86Access ac = { 3073 .haddr1 = host, 3074 .env = env, 3075 }; 3076 3077 /* 3078 * Since this is only called from user-level signal handling, 3079 * we should have done the job correctly there. 3080 */ 3081 assert((rfbm & ~env->xcr0) == 0); 3082 ac.size = xsave_area_size(rfbm, false); 3083 assert(ac.size <= len); 3084 do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm); 3085 } 3086 3087 bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm) 3088 { 3089 X86Access ac = { 3090 .haddr1 = host, 3091 .env = env, 3092 }; 3093 uint64_t xstate_bv; 3094 3095 /* 3096 * Since this is only called from user-level signal handling, 3097 * we should have done the job correctly there. 3098 */ 3099 assert((rfbm & ~env->xcr0) == 0); 3100 ac.size = xsave_area_size(rfbm, false); 3101 assert(ac.size <= len); 3102 3103 if (!valid_xrstor_header(&ac, &xstate_bv, 0)) { 3104 return false; 3105 } 3106 do_xrstor(&ac, 0, rfbm, xstate_bv); 3107 return true; 3108 } 3109 #endif 3110 3111 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) 3112 { 3113 /* The OS must have enabled XSAVE. */ 3114 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3115 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3116 } 3117 3118 switch (ecx) { 3119 case 0: 3120 return env->xcr0; 3121 case 1: 3122 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { 3123 return env->xcr0 & get_xinuse(env); 3124 } 3125 break; 3126 } 3127 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3128 } 3129 3130 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) 3131 { 3132 uint32_t dummy, ena_lo, ena_hi; 3133 uint64_t ena; 3134 3135 /* The OS must have enabled XSAVE. */ 3136 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { 3137 raise_exception_ra(env, EXCP06_ILLOP, GETPC()); 3138 } 3139 3140 /* Only XCR0 is defined at present; the FPU may not be disabled. */ 3141 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { 3142 goto do_gpf; 3143 } 3144 3145 /* SSE can be disabled, but only if AVX is disabled too. */ 3146 if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) { 3147 goto do_gpf; 3148 } 3149 3150 /* Disallow enabling unimplemented features. */ 3151 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); 3152 ena = ((uint64_t)ena_hi << 32) | ena_lo; 3153 if (mask & ~ena) { 3154 goto do_gpf; 3155 } 3156 3157 /* Disallow enabling only half of MPX. */ 3158 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) 3159 & XSTATE_BNDCSR_MASK) { 3160 goto do_gpf; 3161 } 3162 3163 env->xcr0 = mask; 3164 cpu_sync_bndcs_hflags(env); 3165 cpu_sync_avx_hflag(env); 3166 return; 3167 3168 do_gpf: 3169 raise_exception_ra(env, EXCP0D_GPF, GETPC()); 3170 } 3171 3172 /* MMX/SSE */ 3173 /* XXX: optimize by storing fptt and fptags in the static cpu state */ 3174 3175 #define SSE_DAZ 0x0040 3176 #define SSE_RC_SHIFT 13 3177 #define SSE_RC_MASK (3 << SSE_RC_SHIFT) 3178 #define SSE_FZ 0x8000 3179 3180 void update_mxcsr_status(CPUX86State *env) 3181 { 3182 uint32_t mxcsr = env->mxcsr; 3183 int rnd_type; 3184 3185 /* set rounding mode */ 3186 rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; 3187 set_x86_rounding_mode(rnd_type, &env->sse_status); 3188 3189 /* Set exception flags. */ 3190 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | 3191 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | 3192 (mxcsr & FPUS_OE ? float_flag_overflow : 0) | 3193 (mxcsr & FPUS_UE ? float_flag_underflow : 0) | 3194 (mxcsr & FPUS_PE ? float_flag_inexact : 0), 3195 &env->sse_status); 3196 3197 /* set denormals are zero */ 3198 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); 3199 3200 /* set flush to zero */ 3201 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); 3202 } 3203 3204 void update_mxcsr_from_sse_status(CPUX86State *env) 3205 { 3206 uint8_t flags = get_float_exception_flags(&env->sse_status); 3207 /* 3208 * The MXCSR denormal flag has opposite semantics to 3209 * float_flag_input_denormal (the softfloat code sets that flag 3210 * only when flushing input denormals to zero, but SSE sets it 3211 * only when not flushing them to zero), so is not converted 3212 * here. 3213 */ 3214 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | 3215 (flags & float_flag_divbyzero ? FPUS_ZE : 0) | 3216 (flags & float_flag_overflow ? FPUS_OE : 0) | 3217 (flags & float_flag_underflow ? FPUS_UE : 0) | 3218 (flags & float_flag_inexact ? FPUS_PE : 0) | 3219 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : 3220 0)); 3221 } 3222 3223 void helper_update_mxcsr(CPUX86State *env) 3224 { 3225 update_mxcsr_from_sse_status(env); 3226 } 3227 3228 void helper_ldmxcsr(CPUX86State *env, uint32_t val) 3229 { 3230 cpu_set_mxcsr(env, val); 3231 } 3232 3233 void helper_enter_mmx(CPUX86State *env) 3234 { 3235 env->fpstt = 0; 3236 *(uint32_t *)(env->fptags) = 0; 3237 *(uint32_t *)(env->fptags + 4) = 0; 3238 } 3239 3240 void helper_emms(CPUX86State *env) 3241 { 3242 /* set to empty state */ 3243 *(uint32_t *)(env->fptags) = 0x01010101; 3244 *(uint32_t *)(env->fptags + 4) = 0x01010101; 3245 } 3246 3247 #define SHIFT 0 3248 #include "ops_sse.h" 3249 3250 #define SHIFT 1 3251 #include "ops_sse.h" 3252 3253 #define SHIFT 2 3254 #include "ops_sse.h" 3255