xref: /openbmc/qemu/target/i386/tcg/fpu_helper.c (revision f15f7273ea55472d5904c53566c82369d81214c1)
1  /*
2   *  x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3   *
4   *  Copyright (c) 2003 Fabrice Bellard
5   *
6   * This library is free software; you can redistribute it and/or
7   * modify it under the terms of the GNU Lesser General Public
8   * License as published by the Free Software Foundation; either
9   * version 2.1 of the License, or (at your option) any later version.
10   *
11   * This library is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   * Lesser General Public License for more details.
15   *
16   * You should have received a copy of the GNU Lesser General Public
17   * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18   */
19  
20  #include "qemu/osdep.h"
21  #include <math.h>
22  #include "cpu.h"
23  #include "tcg-cpu.h"
24  #include "exec/exec-all.h"
25  #include "exec/cpu_ldst.h"
26  #include "exec/helper-proto.h"
27  #include "fpu/softfloat.h"
28  #include "fpu/softfloat-macros.h"
29  #include "helper-tcg.h"
30  #include "access.h"
31  
32  /* float macros */
33  #define FT0    (env->ft0)
34  #define ST0    (env->fpregs[env->fpstt].d)
35  #define ST(n)  (env->fpregs[(env->fpstt + (n)) & 7].d)
36  #define ST1    ST(1)
37  
38  #define FPU_RC_SHIFT        10
39  #define FPU_RC_MASK         (3 << FPU_RC_SHIFT)
40  #define FPU_RC_NEAR         0x000
41  #define FPU_RC_DOWN         0x400
42  #define FPU_RC_UP           0x800
43  #define FPU_RC_CHOP         0xc00
44  
45  #define MAXTAN 9223372036854775808.0
46  
47  /* the following deal with x86 long double-precision numbers */
48  #define MAXEXPD 0x7fff
49  #define EXPBIAS 16383
50  #define EXPD(fp)        (fp.l.upper & 0x7fff)
51  #define SIGND(fp)       ((fp.l.upper) & 0x8000)
52  #define MANTD(fp)       (fp.l.lower)
53  #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
54  
55  #define FPUS_IE (1 << 0)
56  #define FPUS_DE (1 << 1)
57  #define FPUS_ZE (1 << 2)
58  #define FPUS_OE (1 << 3)
59  #define FPUS_UE (1 << 4)
60  #define FPUS_PE (1 << 5)
61  #define FPUS_SF (1 << 6)
62  #define FPUS_SE (1 << 7)
63  #define FPUS_B  (1 << 15)
64  
65  #define FPUC_EM 0x3f
66  
67  #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
68  #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
69  #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
70  #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
71  #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
72  #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
73  #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
74  #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
75  
fpush(CPUX86State * env)76  static inline void fpush(CPUX86State *env)
77  {
78      env->fpstt = (env->fpstt - 1) & 7;
79      env->fptags[env->fpstt] = 0; /* validate stack entry */
80  }
81  
fpop(CPUX86State * env)82  static inline void fpop(CPUX86State *env)
83  {
84      env->fptags[env->fpstt] = 1; /* invalidate stack entry */
85      env->fpstt = (env->fpstt + 1) & 7;
86  }
87  
do_fldt(X86Access * ac,target_ulong ptr)88  static floatx80 do_fldt(X86Access *ac, target_ulong ptr)
89  {
90      CPU_LDoubleU temp;
91  
92      temp.l.lower = access_ldq(ac, ptr);
93      temp.l.upper = access_ldw(ac, ptr + 8);
94      return temp.d;
95  }
96  
do_fstt(X86Access * ac,target_ulong ptr,floatx80 f)97  static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f)
98  {
99      CPU_LDoubleU temp;
100  
101      temp.d = f;
102      access_stq(ac, ptr, temp.l.lower);
103      access_stw(ac, ptr + 8, temp.l.upper);
104  }
105  
106  /* x87 FPU helpers */
107  
floatx80_to_double(CPUX86State * env,floatx80 a)108  static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
109  {
110      union {
111          float64 f64;
112          double d;
113      } u;
114  
115      u.f64 = floatx80_to_float64(a, &env->fp_status);
116      return u.d;
117  }
118  
double_to_floatx80(CPUX86State * env,double a)119  static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
120  {
121      union {
122          float64 f64;
123          double d;
124      } u;
125  
126      u.d = a;
127      return float64_to_floatx80(u.f64, &env->fp_status);
128  }
129  
fpu_set_exception(CPUX86State * env,int mask)130  static void fpu_set_exception(CPUX86State *env, int mask)
131  {
132      env->fpus |= mask;
133      if (env->fpus & (~env->fpuc & FPUC_EM)) {
134          env->fpus |= FPUS_SE | FPUS_B;
135      }
136  }
137  
cpu_init_fp_statuses(CPUX86State * env)138  void cpu_init_fp_statuses(CPUX86State *env)
139  {
140      /*
141       * Initialise the non-runtime-varying fields of the various
142       * float_status words to x86 behaviour. This must be called at
143       * CPU reset because the float_status words are in the
144       * "zeroed on reset" portion of the CPU state struct.
145       * Fields in float_status that vary under guest control are set
146       * via the codepath for setting that register, eg cpu_set_fpuc().
147       */
148      /*
149       * Use x87 NaN propagation rules:
150       * SNaN + QNaN => return the QNaN
151       * two SNaNs => return the one with the larger significand, silenced
152       * two QNaNs => return the one with the larger significand
153       * SNaN and a non-NaN => return the SNaN, silenced
154       * QNaN and a non-NaN => return the QNaN
155       *
156       * If we get down to comparing significands and they are the same,
157       * return the NaN with the positive sign bit (if any).
158       */
159      set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
160      /*
161       * TODO: These are incorrect: the x86 Software Developer's Manual vol 1
162       * section 4.8.3.5 "Operating on SNaNs and QNaNs" says that the
163       * "larger significand" behaviour is only used for x87 FPU operations.
164       * For SSE the required behaviour is to always return the first NaN,
165       * which is float_2nan_prop_ab.
166       *
167       * mmx_status is used only for the AMD 3DNow! instructions, which
168       * are documented in the "3DNow! Technology Manual" as not supporting
169       * NaNs or infinities as inputs. The result of passing two NaNs is
170       * documented as "undefined", so we can do what we choose.
171       * (Strictly there is some behaviour we don't implement correctly
172       * for these "unsupported" NaN and Inf values, like "NaN * 0 == 0".)
173       */
174      set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status);
175      set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status);
176  }
177  
save_exception_flags(CPUX86State * env)178  static inline uint8_t save_exception_flags(CPUX86State *env)
179  {
180      uint8_t old_flags = get_float_exception_flags(&env->fp_status);
181      set_float_exception_flags(0, &env->fp_status);
182      return old_flags;
183  }
184  
merge_exception_flags(CPUX86State * env,uint8_t old_flags)185  static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
186  {
187      uint8_t new_flags = get_float_exception_flags(&env->fp_status);
188      float_raise(old_flags, &env->fp_status);
189      fpu_set_exception(env,
190                        ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
191                         (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
192                         (new_flags & float_flag_overflow ? FPUS_OE : 0) |
193                         (new_flags & float_flag_underflow ? FPUS_UE : 0) |
194                         (new_flags & float_flag_inexact ? FPUS_PE : 0) |
195                         (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
196  }
197  
helper_fdiv(CPUX86State * env,floatx80 a,floatx80 b)198  static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
199  {
200      uint8_t old_flags = save_exception_flags(env);
201      floatx80 ret = floatx80_div(a, b, &env->fp_status);
202      merge_exception_flags(env, old_flags);
203      return ret;
204  }
205  
fpu_raise_exception(CPUX86State * env,uintptr_t retaddr)206  static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
207  {
208      if (env->cr[0] & CR0_NE_MASK) {
209          raise_exception_ra(env, EXCP10_COPR, retaddr);
210      }
211  #if !defined(CONFIG_USER_ONLY)
212      else {
213          fpu_check_raise_ferr_irq(env);
214      }
215  #endif
216  }
217  
helper_flds_FT0(CPUX86State * env,uint32_t val)218  void helper_flds_FT0(CPUX86State *env, uint32_t val)
219  {
220      uint8_t old_flags = save_exception_flags(env);
221      union {
222          float32 f;
223          uint32_t i;
224      } u;
225  
226      u.i = val;
227      FT0 = float32_to_floatx80(u.f, &env->fp_status);
228      merge_exception_flags(env, old_flags);
229  }
230  
helper_fldl_FT0(CPUX86State * env,uint64_t val)231  void helper_fldl_FT0(CPUX86State *env, uint64_t val)
232  {
233      uint8_t old_flags = save_exception_flags(env);
234      union {
235          float64 f;
236          uint64_t i;
237      } u;
238  
239      u.i = val;
240      FT0 = float64_to_floatx80(u.f, &env->fp_status);
241      merge_exception_flags(env, old_flags);
242  }
243  
helper_fildl_FT0(CPUX86State * env,int32_t val)244  void helper_fildl_FT0(CPUX86State *env, int32_t val)
245  {
246      FT0 = int32_to_floatx80(val, &env->fp_status);
247  }
248  
helper_flds_ST0(CPUX86State * env,uint32_t val)249  void helper_flds_ST0(CPUX86State *env, uint32_t val)
250  {
251      uint8_t old_flags = save_exception_flags(env);
252      int new_fpstt;
253      union {
254          float32 f;
255          uint32_t i;
256      } u;
257  
258      new_fpstt = (env->fpstt - 1) & 7;
259      u.i = val;
260      env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
261      env->fpstt = new_fpstt;
262      env->fptags[new_fpstt] = 0; /* validate stack entry */
263      merge_exception_flags(env, old_flags);
264  }
265  
helper_fldl_ST0(CPUX86State * env,uint64_t val)266  void helper_fldl_ST0(CPUX86State *env, uint64_t val)
267  {
268      uint8_t old_flags = save_exception_flags(env);
269      int new_fpstt;
270      union {
271          float64 f;
272          uint64_t i;
273      } u;
274  
275      new_fpstt = (env->fpstt - 1) & 7;
276      u.i = val;
277      env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
278      env->fpstt = new_fpstt;
279      env->fptags[new_fpstt] = 0; /* validate stack entry */
280      merge_exception_flags(env, old_flags);
281  }
282  
tmp_maximise_precision(float_status * st)283  static FloatX80RoundPrec tmp_maximise_precision(float_status *st)
284  {
285      FloatX80RoundPrec old = get_floatx80_rounding_precision(st);
286      set_floatx80_rounding_precision(floatx80_precision_x, st);
287      return old;
288  }
289  
helper_fildl_ST0(CPUX86State * env,int32_t val)290  void helper_fildl_ST0(CPUX86State *env, int32_t val)
291  {
292      int new_fpstt;
293      FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
294  
295      new_fpstt = (env->fpstt - 1) & 7;
296      env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
297      env->fpstt = new_fpstt;
298      env->fptags[new_fpstt] = 0; /* validate stack entry */
299  
300      set_floatx80_rounding_precision(old, &env->fp_status);
301  }
302  
helper_fildll_ST0(CPUX86State * env,int64_t val)303  void helper_fildll_ST0(CPUX86State *env, int64_t val)
304  {
305      int new_fpstt;
306      FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
307  
308      new_fpstt = (env->fpstt - 1) & 7;
309      env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
310      env->fpstt = new_fpstt;
311      env->fptags[new_fpstt] = 0; /* validate stack entry */
312  
313      set_floatx80_rounding_precision(old, &env->fp_status);
314  }
315  
helper_fsts_ST0(CPUX86State * env)316  uint32_t helper_fsts_ST0(CPUX86State *env)
317  {
318      uint8_t old_flags = save_exception_flags(env);
319      union {
320          float32 f;
321          uint32_t i;
322      } u;
323  
324      u.f = floatx80_to_float32(ST0, &env->fp_status);
325      merge_exception_flags(env, old_flags);
326      return u.i;
327  }
328  
helper_fstl_ST0(CPUX86State * env)329  uint64_t helper_fstl_ST0(CPUX86State *env)
330  {
331      uint8_t old_flags = save_exception_flags(env);
332      union {
333          float64 f;
334          uint64_t i;
335      } u;
336  
337      u.f = floatx80_to_float64(ST0, &env->fp_status);
338      merge_exception_flags(env, old_flags);
339      return u.i;
340  }
341  
helper_fist_ST0(CPUX86State * env)342  int32_t helper_fist_ST0(CPUX86State *env)
343  {
344      uint8_t old_flags = save_exception_flags(env);
345      int32_t val;
346  
347      val = floatx80_to_int32(ST0, &env->fp_status);
348      if (val != (int16_t)val) {
349          set_float_exception_flags(float_flag_invalid, &env->fp_status);
350          val = -32768;
351      }
352      merge_exception_flags(env, old_flags);
353      return val;
354  }
355  
helper_fistl_ST0(CPUX86State * env)356  int32_t helper_fistl_ST0(CPUX86State *env)
357  {
358      uint8_t old_flags = save_exception_flags(env);
359      int32_t val;
360  
361      val = floatx80_to_int32(ST0, &env->fp_status);
362      if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
363          val = 0x80000000;
364      }
365      merge_exception_flags(env, old_flags);
366      return val;
367  }
368  
helper_fistll_ST0(CPUX86State * env)369  int64_t helper_fistll_ST0(CPUX86State *env)
370  {
371      uint8_t old_flags = save_exception_flags(env);
372      int64_t val;
373  
374      val = floatx80_to_int64(ST0, &env->fp_status);
375      if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
376          val = 0x8000000000000000ULL;
377      }
378      merge_exception_flags(env, old_flags);
379      return val;
380  }
381  
helper_fistt_ST0(CPUX86State * env)382  int32_t helper_fistt_ST0(CPUX86State *env)
383  {
384      uint8_t old_flags = save_exception_flags(env);
385      int32_t val;
386  
387      val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
388      if (val != (int16_t)val) {
389          set_float_exception_flags(float_flag_invalid, &env->fp_status);
390          val = -32768;
391      }
392      merge_exception_flags(env, old_flags);
393      return val;
394  }
395  
helper_fisttl_ST0(CPUX86State * env)396  int32_t helper_fisttl_ST0(CPUX86State *env)
397  {
398      uint8_t old_flags = save_exception_flags(env);
399      int32_t val;
400  
401      val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
402      if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
403          val = 0x80000000;
404      }
405      merge_exception_flags(env, old_flags);
406      return val;
407  }
408  
helper_fisttll_ST0(CPUX86State * env)409  int64_t helper_fisttll_ST0(CPUX86State *env)
410  {
411      uint8_t old_flags = save_exception_flags(env);
412      int64_t val;
413  
414      val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
415      if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
416          val = 0x8000000000000000ULL;
417      }
418      merge_exception_flags(env, old_flags);
419      return val;
420  }
421  
helper_fldt_ST0(CPUX86State * env,target_ulong ptr)422  void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
423  {
424      int new_fpstt;
425      X86Access ac;
426  
427      access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
428  
429      new_fpstt = (env->fpstt - 1) & 7;
430      env->fpregs[new_fpstt].d = do_fldt(&ac, ptr);
431      env->fpstt = new_fpstt;
432      env->fptags[new_fpstt] = 0; /* validate stack entry */
433  }
434  
helper_fstt_ST0(CPUX86State * env,target_ulong ptr)435  void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
436  {
437      X86Access ac;
438  
439      access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
440      do_fstt(&ac, ptr, ST0);
441  }
442  
helper_fpush(CPUX86State * env)443  void helper_fpush(CPUX86State *env)
444  {
445      fpush(env);
446  }
447  
helper_fpop(CPUX86State * env)448  void helper_fpop(CPUX86State *env)
449  {
450      fpop(env);
451  }
452  
helper_fdecstp(CPUX86State * env)453  void helper_fdecstp(CPUX86State *env)
454  {
455      env->fpstt = (env->fpstt - 1) & 7;
456      env->fpus &= ~0x4700;
457  }
458  
helper_fincstp(CPUX86State * env)459  void helper_fincstp(CPUX86State *env)
460  {
461      env->fpstt = (env->fpstt + 1) & 7;
462      env->fpus &= ~0x4700;
463  }
464  
465  /* FPU move */
466  
helper_ffree_STN(CPUX86State * env,int st_index)467  void helper_ffree_STN(CPUX86State *env, int st_index)
468  {
469      env->fptags[(env->fpstt + st_index) & 7] = 1;
470  }
471  
helper_fmov_ST0_FT0(CPUX86State * env)472  void helper_fmov_ST0_FT0(CPUX86State *env)
473  {
474      ST0 = FT0;
475  }
476  
helper_fmov_FT0_STN(CPUX86State * env,int st_index)477  void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
478  {
479      FT0 = ST(st_index);
480  }
481  
helper_fmov_ST0_STN(CPUX86State * env,int st_index)482  void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
483  {
484      ST0 = ST(st_index);
485  }
486  
helper_fmov_STN_ST0(CPUX86State * env,int st_index)487  void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
488  {
489      ST(st_index) = ST0;
490  }
491  
helper_fxchg_ST0_STN(CPUX86State * env,int st_index)492  void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
493  {
494      floatx80 tmp;
495  
496      tmp = ST(st_index);
497      ST(st_index) = ST0;
498      ST0 = tmp;
499  }
500  
501  /* FPU operations */
502  
503  static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
504  
helper_fcom_ST0_FT0(CPUX86State * env)505  void helper_fcom_ST0_FT0(CPUX86State *env)
506  {
507      uint8_t old_flags = save_exception_flags(env);
508      FloatRelation ret;
509  
510      ret = floatx80_compare(ST0, FT0, &env->fp_status);
511      env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
512      merge_exception_flags(env, old_flags);
513  }
514  
helper_fucom_ST0_FT0(CPUX86State * env)515  void helper_fucom_ST0_FT0(CPUX86State *env)
516  {
517      uint8_t old_flags = save_exception_flags(env);
518      FloatRelation ret;
519  
520      ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
521      env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
522      merge_exception_flags(env, old_flags);
523  }
524  
525  static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
526  
helper_fcomi_ST0_FT0(CPUX86State * env)527  void helper_fcomi_ST0_FT0(CPUX86State *env)
528  {
529      uint8_t old_flags = save_exception_flags(env);
530      int eflags;
531      FloatRelation ret;
532  
533      ret = floatx80_compare(ST0, FT0, &env->fp_status);
534      eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C);
535      CC_SRC = eflags | fcomi_ccval[ret + 1];
536      CC_OP = CC_OP_EFLAGS;
537      merge_exception_flags(env, old_flags);
538  }
539  
helper_fucomi_ST0_FT0(CPUX86State * env)540  void helper_fucomi_ST0_FT0(CPUX86State *env)
541  {
542      uint8_t old_flags = save_exception_flags(env);
543      int eflags;
544      FloatRelation ret;
545  
546      ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
547      eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C);
548      CC_SRC = eflags | fcomi_ccval[ret + 1];
549      CC_OP = CC_OP_EFLAGS;
550      merge_exception_flags(env, old_flags);
551  }
552  
helper_fadd_ST0_FT0(CPUX86State * env)553  void helper_fadd_ST0_FT0(CPUX86State *env)
554  {
555      uint8_t old_flags = save_exception_flags(env);
556      ST0 = floatx80_add(ST0, FT0, &env->fp_status);
557      merge_exception_flags(env, old_flags);
558  }
559  
helper_fmul_ST0_FT0(CPUX86State * env)560  void helper_fmul_ST0_FT0(CPUX86State *env)
561  {
562      uint8_t old_flags = save_exception_flags(env);
563      ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
564      merge_exception_flags(env, old_flags);
565  }
566  
helper_fsub_ST0_FT0(CPUX86State * env)567  void helper_fsub_ST0_FT0(CPUX86State *env)
568  {
569      uint8_t old_flags = save_exception_flags(env);
570      ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
571      merge_exception_flags(env, old_flags);
572  }
573  
helper_fsubr_ST0_FT0(CPUX86State * env)574  void helper_fsubr_ST0_FT0(CPUX86State *env)
575  {
576      uint8_t old_flags = save_exception_flags(env);
577      ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
578      merge_exception_flags(env, old_flags);
579  }
580  
helper_fdiv_ST0_FT0(CPUX86State * env)581  void helper_fdiv_ST0_FT0(CPUX86State *env)
582  {
583      ST0 = helper_fdiv(env, ST0, FT0);
584  }
585  
helper_fdivr_ST0_FT0(CPUX86State * env)586  void helper_fdivr_ST0_FT0(CPUX86State *env)
587  {
588      ST0 = helper_fdiv(env, FT0, ST0);
589  }
590  
591  /* fp operations between STN and ST0 */
592  
helper_fadd_STN_ST0(CPUX86State * env,int st_index)593  void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
594  {
595      uint8_t old_flags = save_exception_flags(env);
596      ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
597      merge_exception_flags(env, old_flags);
598  }
599  
helper_fmul_STN_ST0(CPUX86State * env,int st_index)600  void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
601  {
602      uint8_t old_flags = save_exception_flags(env);
603      ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
604      merge_exception_flags(env, old_flags);
605  }
606  
helper_fsub_STN_ST0(CPUX86State * env,int st_index)607  void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
608  {
609      uint8_t old_flags = save_exception_flags(env);
610      ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
611      merge_exception_flags(env, old_flags);
612  }
613  
helper_fsubr_STN_ST0(CPUX86State * env,int st_index)614  void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
615  {
616      uint8_t old_flags = save_exception_flags(env);
617      ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
618      merge_exception_flags(env, old_flags);
619  }
620  
helper_fdiv_STN_ST0(CPUX86State * env,int st_index)621  void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
622  {
623      floatx80 *p;
624  
625      p = &ST(st_index);
626      *p = helper_fdiv(env, *p, ST0);
627  }
628  
helper_fdivr_STN_ST0(CPUX86State * env,int st_index)629  void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
630  {
631      floatx80 *p;
632  
633      p = &ST(st_index);
634      *p = helper_fdiv(env, ST0, *p);
635  }
636  
637  /* misc FPU operations */
helper_fchs_ST0(CPUX86State * env)638  void helper_fchs_ST0(CPUX86State *env)
639  {
640      ST0 = floatx80_chs(ST0);
641  }
642  
helper_fabs_ST0(CPUX86State * env)643  void helper_fabs_ST0(CPUX86State *env)
644  {
645      ST0 = floatx80_abs(ST0);
646  }
647  
helper_fld1_ST0(CPUX86State * env)648  void helper_fld1_ST0(CPUX86State *env)
649  {
650      ST0 = floatx80_one;
651  }
652  
helper_fldl2t_ST0(CPUX86State * env)653  void helper_fldl2t_ST0(CPUX86State *env)
654  {
655      switch (env->fpuc & FPU_RC_MASK) {
656      case FPU_RC_UP:
657          ST0 = floatx80_l2t_u;
658          break;
659      default:
660          ST0 = floatx80_l2t;
661          break;
662      }
663  }
664  
helper_fldl2e_ST0(CPUX86State * env)665  void helper_fldl2e_ST0(CPUX86State *env)
666  {
667      switch (env->fpuc & FPU_RC_MASK) {
668      case FPU_RC_DOWN:
669      case FPU_RC_CHOP:
670          ST0 = floatx80_l2e_d;
671          break;
672      default:
673          ST0 = floatx80_l2e;
674          break;
675      }
676  }
677  
helper_fldpi_ST0(CPUX86State * env)678  void helper_fldpi_ST0(CPUX86State *env)
679  {
680      switch (env->fpuc & FPU_RC_MASK) {
681      case FPU_RC_DOWN:
682      case FPU_RC_CHOP:
683          ST0 = floatx80_pi_d;
684          break;
685      default:
686          ST0 = floatx80_pi;
687          break;
688      }
689  }
690  
helper_fldlg2_ST0(CPUX86State * env)691  void helper_fldlg2_ST0(CPUX86State *env)
692  {
693      switch (env->fpuc & FPU_RC_MASK) {
694      case FPU_RC_DOWN:
695      case FPU_RC_CHOP:
696          ST0 = floatx80_lg2_d;
697          break;
698      default:
699          ST0 = floatx80_lg2;
700          break;
701      }
702  }
703  
helper_fldln2_ST0(CPUX86State * env)704  void helper_fldln2_ST0(CPUX86State *env)
705  {
706      switch (env->fpuc & FPU_RC_MASK) {
707      case FPU_RC_DOWN:
708      case FPU_RC_CHOP:
709          ST0 = floatx80_ln2_d;
710          break;
711      default:
712          ST0 = floatx80_ln2;
713          break;
714      }
715  }
716  
helper_fldz_ST0(CPUX86State * env)717  void helper_fldz_ST0(CPUX86State *env)
718  {
719      ST0 = floatx80_zero;
720  }
721  
helper_fldz_FT0(CPUX86State * env)722  void helper_fldz_FT0(CPUX86State *env)
723  {
724      FT0 = floatx80_zero;
725  }
726  
helper_fnstsw(CPUX86State * env)727  uint32_t helper_fnstsw(CPUX86State *env)
728  {
729      return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
730  }
731  
helper_fnstcw(CPUX86State * env)732  uint32_t helper_fnstcw(CPUX86State *env)
733  {
734      return env->fpuc;
735  }
736  
set_x86_rounding_mode(unsigned mode,float_status * status)737  static void set_x86_rounding_mode(unsigned mode, float_status *status)
738  {
739      static FloatRoundMode x86_round_mode[4] = {
740          float_round_nearest_even,
741          float_round_down,
742          float_round_up,
743          float_round_to_zero
744      };
745      assert(mode < ARRAY_SIZE(x86_round_mode));
746      set_float_rounding_mode(x86_round_mode[mode], status);
747  }
748  
update_fp_status(CPUX86State * env)749  void update_fp_status(CPUX86State *env)
750  {
751      int rnd_mode;
752      FloatX80RoundPrec rnd_prec;
753  
754      /* set rounding mode */
755      rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT;
756      set_x86_rounding_mode(rnd_mode, &env->fp_status);
757  
758      switch ((env->fpuc >> 8) & 3) {
759      case 0:
760          rnd_prec = floatx80_precision_s;
761          break;
762      case 2:
763          rnd_prec = floatx80_precision_d;
764          break;
765      case 3:
766      default:
767          rnd_prec = floatx80_precision_x;
768          break;
769      }
770      set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
771  }
772  
helper_fldcw(CPUX86State * env,uint32_t val)773  void helper_fldcw(CPUX86State *env, uint32_t val)
774  {
775      cpu_set_fpuc(env, val);
776  }
777  
helper_fclex(CPUX86State * env)778  void helper_fclex(CPUX86State *env)
779  {
780      env->fpus &= 0x7f00;
781  }
782  
helper_fwait(CPUX86State * env)783  void helper_fwait(CPUX86State *env)
784  {
785      if (env->fpus & FPUS_SE) {
786          fpu_raise_exception(env, GETPC());
787      }
788  }
789  
do_fninit(CPUX86State * env)790  static void do_fninit(CPUX86State *env)
791  {
792      env->fpus = 0;
793      env->fpstt = 0;
794      env->fpcs = 0;
795      env->fpds = 0;
796      env->fpip = 0;
797      env->fpdp = 0;
798      cpu_set_fpuc(env, 0x37f);
799      env->fptags[0] = 1;
800      env->fptags[1] = 1;
801      env->fptags[2] = 1;
802      env->fptags[3] = 1;
803      env->fptags[4] = 1;
804      env->fptags[5] = 1;
805      env->fptags[6] = 1;
806      env->fptags[7] = 1;
807  }
808  
helper_fninit(CPUX86State * env)809  void helper_fninit(CPUX86State *env)
810  {
811      do_fninit(env);
812  }
813  
814  /* BCD ops */
815  
helper_fbld_ST0(CPUX86State * env,target_ulong ptr)816  void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
817  {
818      X86Access ac;
819      floatx80 tmp;
820      uint64_t val;
821      unsigned int v;
822      int i;
823  
824      access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
825  
826      val = 0;
827      for (i = 8; i >= 0; i--) {
828          v = access_ldb(&ac, ptr + i);
829          val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
830      }
831      tmp = int64_to_floatx80(val, &env->fp_status);
832      if (access_ldb(&ac, ptr + 9) & 0x80) {
833          tmp = floatx80_chs(tmp);
834      }
835      fpush(env);
836      ST0 = tmp;
837  }
838  
helper_fbst_ST0(CPUX86State * env,target_ulong ptr)839  void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
840  {
841      uint8_t old_flags = save_exception_flags(env);
842      int v;
843      target_ulong mem_ref, mem_end;
844      int64_t val;
845      CPU_LDoubleU temp;
846      X86Access ac;
847  
848      access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
849      temp.d = ST0;
850  
851      val = floatx80_to_int64(ST0, &env->fp_status);
852      mem_ref = ptr;
853      if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
854          set_float_exception_flags(float_flag_invalid, &env->fp_status);
855          while (mem_ref < ptr + 7) {
856              access_stb(&ac, mem_ref++, 0);
857          }
858          access_stb(&ac, mem_ref++, 0xc0);
859          access_stb(&ac, mem_ref++, 0xff);
860          access_stb(&ac, mem_ref++, 0xff);
861          merge_exception_flags(env, old_flags);
862          return;
863      }
864      mem_end = mem_ref + 9;
865      if (SIGND(temp)) {
866          access_stb(&ac, mem_end, 0x80);
867          val = -val;
868      } else {
869          access_stb(&ac, mem_end, 0x00);
870      }
871      while (mem_ref < mem_end) {
872          if (val == 0) {
873              break;
874          }
875          v = val % 100;
876          val = val / 100;
877          v = ((v / 10) << 4) | (v % 10);
878          access_stb(&ac, mem_ref++, v);
879      }
880      while (mem_ref < mem_end) {
881          access_stb(&ac, mem_ref++, 0);
882      }
883      merge_exception_flags(env, old_flags);
884  }
885  
886  /* 128-bit significand of log(2).  */
887  #define ln2_sig_high 0xb17217f7d1cf79abULL
888  #define ln2_sig_low 0xc9e3b39803f2f6afULL
889  
890  /*
891   * Polynomial coefficients for an approximation to (2^x - 1) / x, on
892   * the interval [-1/64, 1/64].
893   */
894  #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
895  #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
896  #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
897  #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
898  #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
899  #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
900  #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
901  #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
902  #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
903  
904  struct f2xm1_data {
905      /*
906       * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
907       * are very close to exact floatx80 values.
908       */
909      floatx80 t;
910      /* The value of 2^t.  */
911      floatx80 exp2;
912      /* The value of 2^t - 1.  */
913      floatx80 exp2m1;
914  };
915  
916  static const struct f2xm1_data f2xm1_table[65] = {
917      { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
918        make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
919        make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
920      { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
921        make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
922        make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
923      { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
924        make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
925        make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
926      { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
927        make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
928        make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
929      { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
930        make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
931        make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
932      { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
933        make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
934        make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
935      { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
936        make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
937        make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
938      { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
939        make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
940        make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
941      { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
942        make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
943        make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
944      { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
945        make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
946        make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
947      { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
948        make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
949        make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
950      { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
951        make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
952        make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
953      { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
954        make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
955        make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
956      { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
957        make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
958        make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
959      { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
960        make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
961        make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
962      { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
963        make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
964        make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
965      { make_floatx80_init(0xbffe, 0x800000000000227dULL),
966        make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
967        make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
968      { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
969        make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
970        make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
971      { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
972        make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
973        make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
974      { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
975        make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
976        make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
977      { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
978        make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
979        make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
980      { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
981        make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
982        make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
983      { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
984        make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
985        make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
986      { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
987        make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
988        make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
989      { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
990        make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
991        make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
992      { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
993        make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
994        make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
995      { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
996        make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
997        make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
998      { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
999        make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
1000        make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
1001      { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
1002        make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
1003        make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
1004      { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
1005        make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
1006        make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
1007      { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
1008        make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
1009        make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
1010      { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
1011        make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
1012        make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
1013      { floatx80_zero_init,
1014        make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1015        floatx80_zero_init },
1016      { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
1017        make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
1018        make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
1019      { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
1020        make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
1021        make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
1022      { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
1023        make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
1024        make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
1025      { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
1026        make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
1027        make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
1028      { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
1029        make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
1030        make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
1031      { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
1032        make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
1033        make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
1034      { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
1035        make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
1036        make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
1037      { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
1038        make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
1039        make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
1040      { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
1041        make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
1042        make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
1043      { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
1044        make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
1045        make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
1046      { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
1047        make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
1048        make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
1049      { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
1050        make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
1051        make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
1052      { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
1053        make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
1054        make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
1055      { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
1056        make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
1057        make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
1058      { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
1059        make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
1060        make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
1061      { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
1062        make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
1063        make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
1064      { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
1065        make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
1066        make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
1067      { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
1068        make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
1069        make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
1070      { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
1071        make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
1072        make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
1073      { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
1074        make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
1075        make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
1076      { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
1077        make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
1078        make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
1079      { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
1080        make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
1081        make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
1082      { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
1083        make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
1084        make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
1085      { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
1086        make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
1087        make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
1088      { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
1089        make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
1090        make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1091      { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
1092        make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
1093        make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
1094      { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
1095        make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
1096        make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
1097      { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
1098        make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
1099        make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
1100      { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
1101        make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
1102        make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1103      { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
1104        make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
1105        make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
1106      { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
1107        make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
1108        make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
1109      { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1110        make_floatx80_init(0x4000, 0x8000000000000000ULL),
1111        make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
1112  };
1113  
helper_f2xm1(CPUX86State * env)1114  void helper_f2xm1(CPUX86State *env)
1115  {
1116      uint8_t old_flags = save_exception_flags(env);
1117      uint64_t sig = extractFloatx80Frac(ST0);
1118      int32_t exp = extractFloatx80Exp(ST0);
1119      bool sign = extractFloatx80Sign(ST0);
1120  
1121      if (floatx80_invalid_encoding(ST0)) {
1122          float_raise(float_flag_invalid, &env->fp_status);
1123          ST0 = floatx80_default_nan(&env->fp_status);
1124      } else if (floatx80_is_any_nan(ST0)) {
1125          if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1126              float_raise(float_flag_invalid, &env->fp_status);
1127              ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1128          }
1129      } else if (exp > 0x3fff ||
1130                 (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1131          /* Out of range for the instruction, treat as invalid.  */
1132          float_raise(float_flag_invalid, &env->fp_status);
1133          ST0 = floatx80_default_nan(&env->fp_status);
1134      } else if (exp == 0x3fff) {
1135          /* Argument 1 or -1, exact result 1 or -0.5.  */
1136          if (sign) {
1137              ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1138          }
1139      } else if (exp < 0x3fb0) {
1140          if (!floatx80_is_zero(ST0)) {
1141              /*
1142               * Multiplying the argument by an extra-precision version
1143               * of log(2) is sufficiently precise.  Zero arguments are
1144               * returned unchanged.
1145               */
1146              uint64_t sig0, sig1, sig2;
1147              if (exp == 0) {
1148                  normalizeFloatx80Subnormal(sig, &exp, &sig);
1149              }
1150              mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1151                              &sig2);
1152              /* This result is inexact.  */
1153              sig1 |= 1;
1154              ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1155                                                  sign, exp, sig0, sig1,
1156                                                  &env->fp_status);
1157          }
1158      } else {
1159          floatx80 tmp, y, accum;
1160          bool asign, bsign;
1161          int32_t n, aexp, bexp;
1162          uint64_t asig0, asig1, asig2, bsig0, bsig1;
1163          FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1164          FloatX80RoundPrec save_prec =
1165              env->fp_status.floatx80_rounding_precision;
1166          env->fp_status.float_rounding_mode = float_round_nearest_even;
1167          env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1168  
1169          /* Find the nearest multiple of 1/32 to the argument.  */
1170          tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1171          n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1172          y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1173  
1174          if (floatx80_is_zero(y)) {
1175              /*
1176               * Use the value of 2^t - 1 from the table, to avoid
1177               * needing to special-case zero as a result of
1178               * multiplication below.
1179               */
1180              ST0 = f2xm1_table[n].t;
1181              set_float_exception_flags(float_flag_inexact, &env->fp_status);
1182              env->fp_status.float_rounding_mode = save_mode;
1183          } else {
1184              /*
1185               * Compute the lower parts of a polynomial expansion for
1186               * (2^y - 1) / y.
1187               */
1188              accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1189              accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1190              accum = floatx80_mul(accum, y, &env->fp_status);
1191              accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1192              accum = floatx80_mul(accum, y, &env->fp_status);
1193              accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1194              accum = floatx80_mul(accum, y, &env->fp_status);
1195              accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1196              accum = floatx80_mul(accum, y, &env->fp_status);
1197              accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1198              accum = floatx80_mul(accum, y, &env->fp_status);
1199              accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1200              accum = floatx80_mul(accum, y, &env->fp_status);
1201              accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1202  
1203              /*
1204               * The full polynomial expansion is f2xm1_coeff_0 + accum
1205               * (where accum has much lower magnitude, and so, in
1206               * particular, carry out of the addition is not possible).
1207               * (This expansion is only accurate to about 70 bits, not
1208               * 128 bits.)
1209               */
1210              aexp = extractFloatx80Exp(f2xm1_coeff_0);
1211              asign = extractFloatx80Sign(f2xm1_coeff_0);
1212              shift128RightJamming(extractFloatx80Frac(accum), 0,
1213                                   aexp - extractFloatx80Exp(accum),
1214                                   &asig0, &asig1);
1215              bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1216              bsig1 = 0;
1217              if (asign == extractFloatx80Sign(accum)) {
1218                  add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1219              } else {
1220                  sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1221              }
1222              /* And thus compute an approximation to 2^y - 1.  */
1223              mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1224                              &asig0, &asig1, &asig2);
1225              aexp += extractFloatx80Exp(y) - 0x3ffe;
1226              asign ^= extractFloatx80Sign(y);
1227              if (n != 32) {
1228                  /*
1229                   * Multiply this by the precomputed value of 2^t and
1230                   * add that of 2^t - 1.
1231                   */
1232                  mul128By64To192(asig0, asig1,
1233                                  extractFloatx80Frac(f2xm1_table[n].exp2),
1234                                  &asig0, &asig1, &asig2);
1235                  aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1236                  bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1237                  bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1238                  bsig1 = 0;
1239                  if (bexp < aexp) {
1240                      shift128RightJamming(bsig0, bsig1, aexp - bexp,
1241                                           &bsig0, &bsig1);
1242                  } else if (aexp < bexp) {
1243                      shift128RightJamming(asig0, asig1, bexp - aexp,
1244                                           &asig0, &asig1);
1245                      aexp = bexp;
1246                  }
1247                  /* The sign of 2^t - 1 is always that of the result.  */
1248                  bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1249                  if (asign == bsign) {
1250                      /* Avoid possible carry out of the addition.  */
1251                      shift128RightJamming(asig0, asig1, 1,
1252                                           &asig0, &asig1);
1253                      shift128RightJamming(bsig0, bsig1, 1,
1254                                           &bsig0, &bsig1);
1255                      ++aexp;
1256                      add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1257                  } else {
1258                      sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1259                      asign = bsign;
1260                  }
1261              }
1262              env->fp_status.float_rounding_mode = save_mode;
1263              /* This result is inexact.  */
1264              asig1 |= 1;
1265              ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1266                                                  asign, aexp, asig0, asig1,
1267                                                  &env->fp_status);
1268          }
1269  
1270          env->fp_status.floatx80_rounding_precision = save_prec;
1271      }
1272      merge_exception_flags(env, old_flags);
1273  }
1274  
helper_fptan(CPUX86State * env)1275  void helper_fptan(CPUX86State *env)
1276  {
1277      double fptemp = floatx80_to_double(env, ST0);
1278  
1279      if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1280          env->fpus |= 0x400;
1281      } else {
1282          fptemp = tan(fptemp);
1283          ST0 = double_to_floatx80(env, fptemp);
1284          fpush(env);
1285          ST0 = floatx80_one;
1286          env->fpus &= ~0x400; /* C2 <-- 0 */
1287          /* the above code is for |arg| < 2**52 only */
1288      }
1289  }
1290  
1291  /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision.  */
1292  #define pi_4_exp 0x3ffe
1293  #define pi_4_sig_high 0xc90fdaa22168c234ULL
1294  #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1295  #define pi_2_exp 0x3fff
1296  #define pi_2_sig_high 0xc90fdaa22168c234ULL
1297  #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1298  #define pi_34_exp 0x4000
1299  #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1300  #define pi_34_sig_low 0x9394c9e8a0a5159dULL
1301  #define pi_exp 0x4000
1302  #define pi_sig_high 0xc90fdaa22168c234ULL
1303  #define pi_sig_low 0xc4c6628b80dc1cd1ULL
1304  
1305  /*
1306   * Polynomial coefficients for an approximation to atan(x), with only
1307   * odd powers of x used, for x in the interval [-1/16, 1/16].  (Unlike
1308   * for some other approximations, no low part is needed for the first
1309   * coefficient here to achieve a sufficiently accurate result, because
1310   * the coefficient in this minimax approximation is very close to
1311   * exactly 1.)
1312   */
1313  #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1314  #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1315  #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1316  #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1317  #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1318  #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1319  #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1320  
1321  struct fpatan_data {
1322      /* High and low parts of atan(x).  */
1323      floatx80 atan_high, atan_low;
1324  };
1325  
1326  static const struct fpatan_data fpatan_table[9] = {
1327      { floatx80_zero_init,
1328        floatx80_zero_init },
1329      { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
1330        make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
1331      { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
1332        make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
1333      { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
1334        make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
1335      { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
1336        make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
1337      { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
1338        make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
1339      { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
1340        make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
1341      { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
1342        make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
1343      { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
1344        make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
1345  };
1346  
helper_fpatan(CPUX86State * env)1347  void helper_fpatan(CPUX86State *env)
1348  {
1349      uint8_t old_flags = save_exception_flags(env);
1350      uint64_t arg0_sig = extractFloatx80Frac(ST0);
1351      int32_t arg0_exp = extractFloatx80Exp(ST0);
1352      bool arg0_sign = extractFloatx80Sign(ST0);
1353      uint64_t arg1_sig = extractFloatx80Frac(ST1);
1354      int32_t arg1_exp = extractFloatx80Exp(ST1);
1355      bool arg1_sign = extractFloatx80Sign(ST1);
1356  
1357      if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1358          float_raise(float_flag_invalid, &env->fp_status);
1359          ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1360      } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1361          float_raise(float_flag_invalid, &env->fp_status);
1362          ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1363      } else if (floatx80_invalid_encoding(ST0) ||
1364                 floatx80_invalid_encoding(ST1)) {
1365          float_raise(float_flag_invalid, &env->fp_status);
1366          ST1 = floatx80_default_nan(&env->fp_status);
1367      } else if (floatx80_is_any_nan(ST0)) {
1368          ST1 = ST0;
1369      } else if (floatx80_is_any_nan(ST1)) {
1370          /* Pass this NaN through.  */
1371      } else if (floatx80_is_zero(ST1) && !arg0_sign) {
1372          /* Pass this zero through.  */
1373      } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
1374                   arg0_exp - arg1_exp >= 80) &&
1375                 !arg0_sign) {
1376          /*
1377           * Dividing ST1 by ST0 gives the correct result up to
1378           * rounding, and avoids spurious underflow exceptions that
1379           * might result from passing some small values through the
1380           * polynomial approximation, but if a finite nonzero result of
1381           * division is exact, the result of fpatan is still inexact
1382           * (and underflowing where appropriate).
1383           */
1384          FloatX80RoundPrec save_prec =
1385              env->fp_status.floatx80_rounding_precision;
1386          env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1387          ST1 = floatx80_div(ST1, ST0, &env->fp_status);
1388          env->fp_status.floatx80_rounding_precision = save_prec;
1389          if (!floatx80_is_zero(ST1) &&
1390              !(get_float_exception_flags(&env->fp_status) &
1391                float_flag_inexact)) {
1392              /*
1393               * The mathematical result is very slightly closer to zero
1394               * than this exact result.  Round a value with the
1395               * significand adjusted accordingly to get the correct
1396               * exceptions, and possibly an adjusted result depending
1397               * on the rounding mode.
1398               */
1399              uint64_t sig = extractFloatx80Frac(ST1);
1400              int32_t exp = extractFloatx80Exp(ST1);
1401              bool sign = extractFloatx80Sign(ST1);
1402              if (exp == 0) {
1403                  normalizeFloatx80Subnormal(sig, &exp, &sig);
1404              }
1405              ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1406                                                  sign, exp, sig - 1,
1407                                                  -1, &env->fp_status);
1408          }
1409      } else {
1410          /* The result is inexact.  */
1411          bool rsign = arg1_sign;
1412          int32_t rexp;
1413          uint64_t rsig0, rsig1;
1414          if (floatx80_is_zero(ST1)) {
1415              /*
1416               * ST0 is negative.  The result is pi with the sign of
1417               * ST1.
1418               */
1419              rexp = pi_exp;
1420              rsig0 = pi_sig_high;
1421              rsig1 = pi_sig_low;
1422          } else if (floatx80_is_infinity(ST1)) {
1423              if (floatx80_is_infinity(ST0)) {
1424                  if (arg0_sign) {
1425                      rexp = pi_34_exp;
1426                      rsig0 = pi_34_sig_high;
1427                      rsig1 = pi_34_sig_low;
1428                  } else {
1429                      rexp = pi_4_exp;
1430                      rsig0 = pi_4_sig_high;
1431                      rsig1 = pi_4_sig_low;
1432                  }
1433              } else {
1434                  rexp = pi_2_exp;
1435                  rsig0 = pi_2_sig_high;
1436                  rsig1 = pi_2_sig_low;
1437              }
1438          } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
1439              rexp = pi_2_exp;
1440              rsig0 = pi_2_sig_high;
1441              rsig1 = pi_2_sig_low;
1442          } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
1443              /* ST0 is negative.  */
1444              rexp = pi_exp;
1445              rsig0 = pi_sig_high;
1446              rsig1 = pi_sig_low;
1447          } else {
1448              /*
1449               * ST0 and ST1 are finite, nonzero and with exponents not
1450               * too far apart.
1451               */
1452              int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
1453              int32_t azexp, axexp;
1454              bool adj_sub, ysign, zsign;
1455              uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
1456              uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
1457              uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
1458              uint64_t azsig0, azsig1;
1459              uint64_t azsig2, azsig3, axsig0, axsig1;
1460              floatx80 x8;
1461              FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1462              FloatX80RoundPrec save_prec =
1463                  env->fp_status.floatx80_rounding_precision;
1464              env->fp_status.float_rounding_mode = float_round_nearest_even;
1465              env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1466  
1467              if (arg0_exp == 0) {
1468                  normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1469              }
1470              if (arg1_exp == 0) {
1471                  normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1472              }
1473              if (arg0_exp > arg1_exp ||
1474                  (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
1475                  /* Work with abs(ST1) / abs(ST0).  */
1476                  num_exp = arg1_exp;
1477                  num_sig = arg1_sig;
1478                  den_exp = arg0_exp;
1479                  den_sig = arg0_sig;
1480                  if (arg0_sign) {
1481                      /* The result is subtracted from pi.  */
1482                      adj_exp = pi_exp;
1483                      adj_sig0 = pi_sig_high;
1484                      adj_sig1 = pi_sig_low;
1485                      adj_sub = true;
1486                  } else {
1487                      /* The result is used as-is.  */
1488                      adj_exp = 0;
1489                      adj_sig0 = 0;
1490                      adj_sig1 = 0;
1491                      adj_sub = false;
1492                  }
1493              } else {
1494                  /* Work with abs(ST0) / abs(ST1).  */
1495                  num_exp = arg0_exp;
1496                  num_sig = arg0_sig;
1497                  den_exp = arg1_exp;
1498                  den_sig = arg1_sig;
1499                  /* The result is added to or subtracted from pi/2.  */
1500                  adj_exp = pi_2_exp;
1501                  adj_sig0 = pi_2_sig_high;
1502                  adj_sig1 = pi_2_sig_low;
1503                  adj_sub = !arg0_sign;
1504              }
1505  
1506              /*
1507               * Compute x = num/den, where 0 < x <= 1 and x is not too
1508               * small.
1509               */
1510              xexp = num_exp - den_exp + 0x3ffe;
1511              remsig0 = num_sig;
1512              remsig1 = 0;
1513              if (den_sig <= remsig0) {
1514                  shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1515                  ++xexp;
1516              }
1517              xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
1518              mul64To128(den_sig, xsig0, &msig0, &msig1);
1519              sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
1520              while ((int64_t) remsig0 < 0) {
1521                  --xsig0;
1522                  add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
1523              }
1524              xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
1525              /*
1526               * No need to correct any estimation error in xsig1; even
1527               * with such error, it is accurate enough.
1528               */
1529  
1530              /*
1531               * Split x as x = t + y, where t = n/8 is the nearest
1532               * multiple of 1/8 to x.
1533               */
1534              x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1535                                                 false, xexp + 3, xsig0,
1536                                                 xsig1, &env->fp_status);
1537              n = floatx80_to_int32(x8, &env->fp_status);
1538              if (n == 0) {
1539                  ysign = false;
1540                  yexp = xexp;
1541                  ysig0 = xsig0;
1542                  ysig1 = xsig1;
1543                  texp = 0;
1544                  tsig = 0;
1545              } else {
1546                  int shift = clz32(n) + 32;
1547                  texp = 0x403b - shift;
1548                  tsig = n;
1549                  tsig <<= shift;
1550                  if (texp == xexp) {
1551                      sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
1552                      if ((int64_t) ysig0 >= 0) {
1553                          ysign = false;
1554                          if (ysig0 == 0) {
1555                              if (ysig1 == 0) {
1556                                  yexp = 0;
1557                              } else {
1558                                  shift = clz64(ysig1) + 64;
1559                                  yexp = xexp - shift;
1560                                  shift128Left(ysig0, ysig1, shift,
1561                                               &ysig0, &ysig1);
1562                              }
1563                          } else {
1564                              shift = clz64(ysig0);
1565                              yexp = xexp - shift;
1566                              shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1567                          }
1568                      } else {
1569                          ysign = true;
1570                          sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
1571                          if (ysig0 == 0) {
1572                              shift = clz64(ysig1) + 64;
1573                          } else {
1574                              shift = clz64(ysig0);
1575                          }
1576                          yexp = xexp - shift;
1577                          shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1578                      }
1579                  } else {
1580                      /*
1581                       * t's exponent must be greater than x's because t
1582                       * is positive and the nearest multiple of 1/8 to
1583                       * x, and if x has a greater exponent, the power
1584                       * of 2 with that exponent is also a multiple of
1585                       * 1/8.
1586                       */
1587                      uint64_t usig0, usig1;
1588                      shift128RightJamming(xsig0, xsig1, texp - xexp,
1589                                           &usig0, &usig1);
1590                      ysign = true;
1591                      sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
1592                      if (ysig0 == 0) {
1593                          shift = clz64(ysig1) + 64;
1594                      } else {
1595                          shift = clz64(ysig0);
1596                      }
1597                      yexp = texp - shift;
1598                      shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1599                  }
1600              }
1601  
1602              /*
1603               * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1604               * arctan(z).
1605               */
1606              zsign = ysign;
1607              if (texp == 0 || yexp == 0) {
1608                  zexp = yexp;
1609                  zsig0 = ysig0;
1610                  zsig1 = ysig1;
1611              } else {
1612                  /*
1613                   * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1614                   */
1615                  int32_t dexp = texp + xexp - 0x3ffe;
1616                  uint64_t dsig0, dsig1, dsig2;
1617                  mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
1618                  /*
1619                   * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1620                   * bit).  Add 1 to produce the denominator 1+tx.
1621                   */
1622                  shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
1623                                       &dsig0, &dsig1);
1624                  dsig0 |= 0x8000000000000000ULL;
1625                  zexp = yexp - 1;
1626                  remsig0 = ysig0;
1627                  remsig1 = ysig1;
1628                  remsig2 = 0;
1629                  if (dsig0 <= remsig0) {
1630                      shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1631                      ++zexp;
1632                  }
1633                  zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
1634                  mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
1635                  sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
1636                         &remsig0, &remsig1, &remsig2);
1637                  while ((int64_t) remsig0 < 0) {
1638                      --zsig0;
1639                      add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
1640                             &remsig0, &remsig1, &remsig2);
1641                  }
1642                  zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
1643                  /* No need to correct any estimation error in zsig1.  */
1644              }
1645  
1646              if (zexp == 0) {
1647                  azexp = 0;
1648                  azsig0 = 0;
1649                  azsig1 = 0;
1650              } else {
1651                  floatx80 z2, accum;
1652                  uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
1653                  /* Compute z^2.  */
1654                  mul128To256(zsig0, zsig1, zsig0, zsig1,
1655                              &z2sig0, &z2sig1, &z2sig2, &z2sig3);
1656                  z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1657                                                     zexp + zexp - 0x3ffe,
1658                                                     z2sig0, z2sig1,
1659                                                     &env->fp_status);
1660  
1661                  /* Compute the lower parts of the polynomial expansion.  */
1662                  accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
1663                  accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
1664                  accum = floatx80_mul(accum, z2, &env->fp_status);
1665                  accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
1666                  accum = floatx80_mul(accum, z2, &env->fp_status);
1667                  accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
1668                  accum = floatx80_mul(accum, z2, &env->fp_status);
1669                  accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
1670                  accum = floatx80_mul(accum, z2, &env->fp_status);
1671                  accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
1672                  accum = floatx80_mul(accum, z2, &env->fp_status);
1673  
1674                  /*
1675                   * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1676                   * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1677                   */
1678                  aexp = extractFloatx80Exp(fpatan_coeff_0);
1679                  shift128RightJamming(extractFloatx80Frac(accum), 0,
1680                                       aexp - extractFloatx80Exp(accum),
1681                                       &asig0, &asig1);
1682                  sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
1683                         &asig0, &asig1);
1684                  /* Multiply by z to compute arctan(z).  */
1685                  azexp = aexp + zexp - 0x3ffe;
1686                  mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
1687                              &azsig2, &azsig3);
1688              }
1689  
1690              /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign).  */
1691              if (texp == 0) {
1692                  /* z is positive.  */
1693                  axexp = azexp;
1694                  axsig0 = azsig0;
1695                  axsig1 = azsig1;
1696              } else {
1697                  bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
1698                  int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
1699                  uint64_t low_sig0 =
1700                      extractFloatx80Frac(fpatan_table[n].atan_low);
1701                  uint64_t low_sig1 = 0;
1702                  axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
1703                  axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
1704                  axsig1 = 0;
1705                  shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
1706                                       &low_sig0, &low_sig1);
1707                  if (low_sign) {
1708                      sub128(axsig0, axsig1, low_sig0, low_sig1,
1709                             &axsig0, &axsig1);
1710                  } else {
1711                      add128(axsig0, axsig1, low_sig0, low_sig1,
1712                             &axsig0, &axsig1);
1713                  }
1714                  if (azexp >= axexp) {
1715                      shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
1716                                           &axsig0, &axsig1);
1717                      axexp = azexp + 1;
1718                      shift128RightJamming(azsig0, azsig1, 1,
1719                                           &azsig0, &azsig1);
1720                  } else {
1721                      shift128RightJamming(axsig0, axsig1, 1,
1722                                           &axsig0, &axsig1);
1723                      shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
1724                                           &azsig0, &azsig1);
1725                      ++axexp;
1726                  }
1727                  if (zsign) {
1728                      sub128(axsig0, axsig1, azsig0, azsig1,
1729                             &axsig0, &axsig1);
1730                  } else {
1731                      add128(axsig0, axsig1, azsig0, azsig1,
1732                             &axsig0, &axsig1);
1733                  }
1734              }
1735  
1736              if (adj_exp == 0) {
1737                  rexp = axexp;
1738                  rsig0 = axsig0;
1739                  rsig1 = axsig1;
1740              } else {
1741                  /*
1742                   * Add or subtract arctan(x) (exponent axexp,
1743                   * significand axsig0 and axsig1, positive, not
1744                   * necessarily normalized) to the number given by
1745                   * adj_exp, adj_sig0 and adj_sig1, according to
1746                   * adj_sub.
1747                   */
1748                  if (adj_exp >= axexp) {
1749                      shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
1750                                           &axsig0, &axsig1);
1751                      rexp = adj_exp + 1;
1752                      shift128RightJamming(adj_sig0, adj_sig1, 1,
1753                                           &adj_sig0, &adj_sig1);
1754                  } else {
1755                      shift128RightJamming(axsig0, axsig1, 1,
1756                                           &axsig0, &axsig1);
1757                      shift128RightJamming(adj_sig0, adj_sig1,
1758                                           axexp - adj_exp + 1,
1759                                           &adj_sig0, &adj_sig1);
1760                      rexp = axexp + 1;
1761                  }
1762                  if (adj_sub) {
1763                      sub128(adj_sig0, adj_sig1, axsig0, axsig1,
1764                             &rsig0, &rsig1);
1765                  } else {
1766                      add128(adj_sig0, adj_sig1, axsig0, axsig1,
1767                             &rsig0, &rsig1);
1768                  }
1769              }
1770  
1771              env->fp_status.float_rounding_mode = save_mode;
1772              env->fp_status.floatx80_rounding_precision = save_prec;
1773          }
1774          /* This result is inexact.  */
1775          rsig1 |= 1;
1776          ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
1777                                              rsig0, rsig1, &env->fp_status);
1778      }
1779  
1780      fpop(env);
1781      merge_exception_flags(env, old_flags);
1782  }
1783  
helper_fxtract(CPUX86State * env)1784  void helper_fxtract(CPUX86State *env)
1785  {
1786      uint8_t old_flags = save_exception_flags(env);
1787      CPU_LDoubleU temp;
1788  
1789      temp.d = ST0;
1790  
1791      if (floatx80_is_zero(ST0)) {
1792          /* Easy way to generate -inf and raising division by 0 exception */
1793          ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1794                             &env->fp_status);
1795          fpush(env);
1796          ST0 = temp.d;
1797      } else if (floatx80_invalid_encoding(ST0)) {
1798          float_raise(float_flag_invalid, &env->fp_status);
1799          ST0 = floatx80_default_nan(&env->fp_status);
1800          fpush(env);
1801          ST0 = ST1;
1802      } else if (floatx80_is_any_nan(ST0)) {
1803          if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1804              float_raise(float_flag_invalid, &env->fp_status);
1805              ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1806          }
1807          fpush(env);
1808          ST0 = ST1;
1809      } else if (floatx80_is_infinity(ST0)) {
1810          fpush(env);
1811          ST0 = ST1;
1812          ST1 = floatx80_infinity;
1813      } else {
1814          int expdif;
1815  
1816          if (EXPD(temp) == 0) {
1817              int shift = clz64(temp.l.lower);
1818              temp.l.lower <<= shift;
1819              expdif = 1 - EXPBIAS - shift;
1820              float_raise(float_flag_input_denormal, &env->fp_status);
1821          } else {
1822              expdif = EXPD(temp) - EXPBIAS;
1823          }
1824          /* DP exponent bias */
1825          ST0 = int32_to_floatx80(expdif, &env->fp_status);
1826          fpush(env);
1827          BIASEXPONENT(temp);
1828          ST0 = temp.d;
1829      }
1830      merge_exception_flags(env, old_flags);
1831  }
1832  
helper_fprem_common(CPUX86State * env,bool mod)1833  static void helper_fprem_common(CPUX86State *env, bool mod)
1834  {
1835      uint8_t old_flags = save_exception_flags(env);
1836      uint64_t quotient;
1837      CPU_LDoubleU temp0, temp1;
1838      int exp0, exp1, expdiff;
1839  
1840      temp0.d = ST0;
1841      temp1.d = ST1;
1842      exp0 = EXPD(temp0);
1843      exp1 = EXPD(temp1);
1844  
1845      env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1846      if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1847          exp0 == 0x7fff || exp1 == 0x7fff ||
1848          floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
1849          ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1850      } else {
1851          if (exp0 == 0) {
1852              exp0 = 1 - clz64(temp0.l.lower);
1853          }
1854          if (exp1 == 0) {
1855              exp1 = 1 - clz64(temp1.l.lower);
1856          }
1857          expdiff = exp0 - exp1;
1858          if (expdiff < 64) {
1859              ST0 = floatx80_modrem(ST0, ST1, mod, &quotient, &env->fp_status);
1860              env->fpus |= (quotient & 0x4) << (8 - 2);  /* (C0) <-- q2 */
1861              env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
1862              env->fpus |= (quotient & 0x1) << (9 - 0);  /* (C1) <-- q0 */
1863          } else {
1864              /*
1865               * Partial remainder.  This choice of how many bits to
1866               * process at once is specified in AMD instruction set
1867               * manuals, and empirically is followed by Intel
1868               * processors as well; it ensures that the final remainder
1869               * operation in a loop does produce the correct low three
1870               * bits of the quotient.  AMD manuals specify that the
1871               * flags other than C2 are cleared, and empirically Intel
1872               * processors clear them as well.
1873               */
1874              int n = 32 + (expdiff % 32);
1875              temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
1876              ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
1877              env->fpus |= 0x400;  /* C2 <-- 1 */
1878          }
1879      }
1880      merge_exception_flags(env, old_flags);
1881  }
1882  
helper_fprem1(CPUX86State * env)1883  void helper_fprem1(CPUX86State *env)
1884  {
1885      helper_fprem_common(env, false);
1886  }
1887  
helper_fprem(CPUX86State * env)1888  void helper_fprem(CPUX86State *env)
1889  {
1890      helper_fprem_common(env, true);
1891  }
1892  
1893  /* 128-bit significand of log2(e).  */
1894  #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1895  #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1896  
1897  /*
1898   * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1899   * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1900   * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1901   * interval [sqrt(2)/2, sqrt(2)].
1902   */
1903  #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1904  #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1905  #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1906  #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1907  #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1908  #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1909  #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1910  #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1911  #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1912  #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1913  #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1914  
1915  /*
1916   * Compute an approximation of log2(1+arg), where 1+arg is in the
1917   * interval [sqrt(2)/2, sqrt(2)].  It is assumed that when this
1918   * function is called, rounding precision is set to 80 and the
1919   * round-to-nearest mode is in effect.  arg must not be exactly zero,
1920   * and must not be so close to zero that underflow might occur.
1921   */
helper_fyl2x_common(CPUX86State * env,floatx80 arg,int32_t * exp,uint64_t * sig0,uint64_t * sig1)1922  static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
1923                                  uint64_t *sig0, uint64_t *sig1)
1924  {
1925      uint64_t arg0_sig = extractFloatx80Frac(arg);
1926      int32_t arg0_exp = extractFloatx80Exp(arg);
1927      bool arg0_sign = extractFloatx80Sign(arg);
1928      bool asign;
1929      int32_t dexp, texp, aexp;
1930      uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
1931      uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
1932      uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
1933      floatx80 t2, accum;
1934  
1935      /*
1936       * Compute an approximation of arg/(2+arg), with extra precision,
1937       * as the argument to a polynomial approximation.  The extra
1938       * precision is only needed for the first term of the
1939       * approximation, with subsequent terms being significantly
1940       * smaller; the approximation only uses odd exponents, and the
1941       * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1942       */
1943      if (arg0_sign) {
1944          dexp = 0x3fff;
1945          shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1946          sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
1947      } else {
1948          dexp = 0x4000;
1949          shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1950          dsig0 |= 0x8000000000000000ULL;
1951      }
1952      texp = arg0_exp - dexp + 0x3ffe;
1953      rsig0 = arg0_sig;
1954      rsig1 = 0;
1955      rsig2 = 0;
1956      if (dsig0 <= rsig0) {
1957          shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
1958          ++texp;
1959      }
1960      tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
1961      mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
1962      sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
1963             &rsig0, &rsig1, &rsig2);
1964      while ((int64_t) rsig0 < 0) {
1965          --tsig0;
1966          add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
1967                 &rsig0, &rsig1, &rsig2);
1968      }
1969      tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
1970      /*
1971       * No need to correct any estimation error in tsig1; even with
1972       * such error, it is accurate enough.  Now compute the square of
1973       * that approximation.
1974       */
1975      mul128To256(tsig0, tsig1, tsig0, tsig1,
1976                  &t2sig0, &t2sig1, &t2sig2, &t2sig3);
1977      t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1978                                         texp + texp - 0x3ffe,
1979                                         t2sig0, t2sig1, &env->fp_status);
1980  
1981      /* Compute the lower parts of the polynomial expansion.  */
1982      accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
1983      accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
1984      accum = floatx80_mul(accum, t2, &env->fp_status);
1985      accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
1986      accum = floatx80_mul(accum, t2, &env->fp_status);
1987      accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
1988      accum = floatx80_mul(accum, t2, &env->fp_status);
1989      accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
1990      accum = floatx80_mul(accum, t2, &env->fp_status);
1991      accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
1992      accum = floatx80_mul(accum, t2, &env->fp_status);
1993      accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
1994      accum = floatx80_mul(accum, t2, &env->fp_status);
1995      accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
1996      accum = floatx80_mul(accum, t2, &env->fp_status);
1997      accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
1998      accum = floatx80_mul(accum, t2, &env->fp_status);
1999      accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
2000  
2001      /*
2002       * The full polynomial expansion is fyl2x_coeff_0 + accum (where
2003       * accum has much lower magnitude, and so, in particular, carry
2004       * out of the addition is not possible), multiplied by t.  (This
2005       * expansion is only accurate to about 70 bits, not 128 bits.)
2006       */
2007      aexp = extractFloatx80Exp(fyl2x_coeff_0);
2008      asign = extractFloatx80Sign(fyl2x_coeff_0);
2009      shift128RightJamming(extractFloatx80Frac(accum), 0,
2010                           aexp - extractFloatx80Exp(accum),
2011                           &asig0, &asig1);
2012      bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
2013      bsig1 = 0;
2014      if (asign == extractFloatx80Sign(accum)) {
2015          add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
2016      } else {
2017          sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
2018      }
2019      /* Multiply by t to compute the required result.  */
2020      mul128To256(asig0, asig1, tsig0, tsig1,
2021                  &asig0, &asig1, &asig2, &asig3);
2022      aexp += texp - 0x3ffe;
2023      *exp = aexp;
2024      *sig0 = asig0;
2025      *sig1 = asig1;
2026  }
2027  
helper_fyl2xp1(CPUX86State * env)2028  void helper_fyl2xp1(CPUX86State *env)
2029  {
2030      uint8_t old_flags = save_exception_flags(env);
2031      uint64_t arg0_sig = extractFloatx80Frac(ST0);
2032      int32_t arg0_exp = extractFloatx80Exp(ST0);
2033      bool arg0_sign = extractFloatx80Sign(ST0);
2034      uint64_t arg1_sig = extractFloatx80Frac(ST1);
2035      int32_t arg1_exp = extractFloatx80Exp(ST1);
2036      bool arg1_sign = extractFloatx80Sign(ST1);
2037  
2038      if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2039          float_raise(float_flag_invalid, &env->fp_status);
2040          ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2041      } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2042          float_raise(float_flag_invalid, &env->fp_status);
2043          ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2044      } else if (floatx80_invalid_encoding(ST0) ||
2045                 floatx80_invalid_encoding(ST1)) {
2046          float_raise(float_flag_invalid, &env->fp_status);
2047          ST1 = floatx80_default_nan(&env->fp_status);
2048      } else if (floatx80_is_any_nan(ST0)) {
2049          ST1 = ST0;
2050      } else if (floatx80_is_any_nan(ST1)) {
2051          /* Pass this NaN through.  */
2052      } else if (arg0_exp > 0x3ffd ||
2053                 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
2054                                                    0x95f619980c4336f7ULL :
2055                                                    0xd413cccfe7799211ULL))) {
2056          /*
2057           * Out of range for the instruction (ST0 must have absolute
2058           * value less than 1 - sqrt(2)/2 = 0.292..., according to
2059           * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2060           * to sqrt(2) - 1, which we allow here), treat as invalid.
2061           */
2062          float_raise(float_flag_invalid, &env->fp_status);
2063          ST1 = floatx80_default_nan(&env->fp_status);
2064      } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
2065                 arg1_exp == 0x7fff) {
2066          /*
2067           * One argument is zero, or multiplying by infinity; correct
2068           * result is exact and can be obtained by multiplying the
2069           * arguments.
2070           */
2071          ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
2072      } else if (arg0_exp < 0x3fb0) {
2073          /*
2074           * Multiplying both arguments and an extra-precision version
2075           * of log2(e) is sufficiently precise.
2076           */
2077          uint64_t sig0, sig1, sig2;
2078          int32_t exp;
2079          if (arg0_exp == 0) {
2080              normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2081          }
2082          if (arg1_exp == 0) {
2083              normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2084          }
2085          mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
2086                          &sig0, &sig1, &sig2);
2087          exp = arg0_exp + 1;
2088          mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
2089          exp += arg1_exp - 0x3ffe;
2090          /* This result is inexact.  */
2091          sig1 |= 1;
2092          ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2093                                              arg0_sign ^ arg1_sign, exp,
2094                                              sig0, sig1, &env->fp_status);
2095      } else {
2096          int32_t aexp;
2097          uint64_t asig0, asig1, asig2;
2098          FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2099          FloatX80RoundPrec save_prec =
2100              env->fp_status.floatx80_rounding_precision;
2101          env->fp_status.float_rounding_mode = float_round_nearest_even;
2102          env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2103  
2104          helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
2105          /*
2106           * Multiply by the second argument to compute the required
2107           * result.
2108           */
2109          if (arg1_exp == 0) {
2110              normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2111          }
2112          mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2113          aexp += arg1_exp - 0x3ffe;
2114          /* This result is inexact.  */
2115          asig1 |= 1;
2116          env->fp_status.float_rounding_mode = save_mode;
2117          ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2118                                              arg0_sign ^ arg1_sign, aexp,
2119                                              asig0, asig1, &env->fp_status);
2120          env->fp_status.floatx80_rounding_precision = save_prec;
2121      }
2122      fpop(env);
2123      merge_exception_flags(env, old_flags);
2124  }
2125  
helper_fyl2x(CPUX86State * env)2126  void helper_fyl2x(CPUX86State *env)
2127  {
2128      uint8_t old_flags = save_exception_flags(env);
2129      uint64_t arg0_sig = extractFloatx80Frac(ST0);
2130      int32_t arg0_exp = extractFloatx80Exp(ST0);
2131      bool arg0_sign = extractFloatx80Sign(ST0);
2132      uint64_t arg1_sig = extractFloatx80Frac(ST1);
2133      int32_t arg1_exp = extractFloatx80Exp(ST1);
2134      bool arg1_sign = extractFloatx80Sign(ST1);
2135  
2136      if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2137          float_raise(float_flag_invalid, &env->fp_status);
2138          ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2139      } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2140          float_raise(float_flag_invalid, &env->fp_status);
2141          ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2142      } else if (floatx80_invalid_encoding(ST0) ||
2143                 floatx80_invalid_encoding(ST1)) {
2144          float_raise(float_flag_invalid, &env->fp_status);
2145          ST1 = floatx80_default_nan(&env->fp_status);
2146      } else if (floatx80_is_any_nan(ST0)) {
2147          ST1 = ST0;
2148      } else if (floatx80_is_any_nan(ST1)) {
2149          /* Pass this NaN through.  */
2150      } else if (arg0_sign && !floatx80_is_zero(ST0)) {
2151          float_raise(float_flag_invalid, &env->fp_status);
2152          ST1 = floatx80_default_nan(&env->fp_status);
2153      } else if (floatx80_is_infinity(ST1)) {
2154          FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
2155                                               &env->fp_status);
2156          switch (cmp) {
2157          case float_relation_less:
2158              ST1 = floatx80_chs(ST1);
2159              break;
2160          case float_relation_greater:
2161              /* Result is infinity of the same sign as ST1.  */
2162              break;
2163          default:
2164              float_raise(float_flag_invalid, &env->fp_status);
2165              ST1 = floatx80_default_nan(&env->fp_status);
2166              break;
2167          }
2168      } else if (floatx80_is_infinity(ST0)) {
2169          if (floatx80_is_zero(ST1)) {
2170              float_raise(float_flag_invalid, &env->fp_status);
2171              ST1 = floatx80_default_nan(&env->fp_status);
2172          } else if (arg1_sign) {
2173              ST1 = floatx80_chs(ST0);
2174          } else {
2175              ST1 = ST0;
2176          }
2177      } else if (floatx80_is_zero(ST0)) {
2178          if (floatx80_is_zero(ST1)) {
2179              float_raise(float_flag_invalid, &env->fp_status);
2180              ST1 = floatx80_default_nan(&env->fp_status);
2181          } else {
2182              /* Result is infinity with opposite sign to ST1.  */
2183              float_raise(float_flag_divbyzero, &env->fp_status);
2184              ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
2185                                  0x8000000000000000ULL);
2186          }
2187      } else if (floatx80_is_zero(ST1)) {
2188          if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
2189              ST1 = floatx80_chs(ST1);
2190          }
2191          /* Otherwise, ST1 is already the correct result.  */
2192      } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
2193          if (arg1_sign) {
2194              ST1 = floatx80_chs(floatx80_zero);
2195          } else {
2196              ST1 = floatx80_zero;
2197          }
2198      } else {
2199          int32_t int_exp;
2200          floatx80 arg0_m1;
2201          FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2202          FloatX80RoundPrec save_prec =
2203              env->fp_status.floatx80_rounding_precision;
2204          env->fp_status.float_rounding_mode = float_round_nearest_even;
2205          env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2206  
2207          if (arg0_exp == 0) {
2208              normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2209          }
2210          if (arg1_exp == 0) {
2211              normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2212          }
2213          int_exp = arg0_exp - 0x3fff;
2214          if (arg0_sig > 0xb504f333f9de6484ULL) {
2215              ++int_exp;
2216          }
2217          arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
2218                                                 &env->fp_status),
2219                                 floatx80_one, &env->fp_status);
2220          if (floatx80_is_zero(arg0_m1)) {
2221              /* Exact power of 2; multiply by ST1.  */
2222              env->fp_status.float_rounding_mode = save_mode;
2223              ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
2224                                 ST1, &env->fp_status);
2225          } else {
2226              bool asign = extractFloatx80Sign(arg0_m1);
2227              int32_t aexp;
2228              uint64_t asig0, asig1, asig2;
2229              helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
2230              if (int_exp != 0) {
2231                  bool isign = (int_exp < 0);
2232                  int32_t iexp;
2233                  uint64_t isig;
2234                  int shift;
2235                  int_exp = isign ? -int_exp : int_exp;
2236                  shift = clz32(int_exp) + 32;
2237                  isig = int_exp;
2238                  isig <<= shift;
2239                  iexp = 0x403e - shift;
2240                  shift128RightJamming(asig0, asig1, iexp - aexp,
2241                                       &asig0, &asig1);
2242                  if (asign == isign) {
2243                      add128(isig, 0, asig0, asig1, &asig0, &asig1);
2244                  } else {
2245                      sub128(isig, 0, asig0, asig1, &asig0, &asig1);
2246                  }
2247                  aexp = iexp;
2248                  asign = isign;
2249              }
2250              /*
2251               * Multiply by the second argument to compute the required
2252               * result.
2253               */
2254              if (arg1_exp == 0) {
2255                  normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2256              }
2257              mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2258              aexp += arg1_exp - 0x3ffe;
2259              /* This result is inexact.  */
2260              asig1 |= 1;
2261              env->fp_status.float_rounding_mode = save_mode;
2262              ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2263                                                  asign ^ arg1_sign, aexp,
2264                                                  asig0, asig1, &env->fp_status);
2265          }
2266  
2267          env->fp_status.floatx80_rounding_precision = save_prec;
2268      }
2269      fpop(env);
2270      merge_exception_flags(env, old_flags);
2271  }
2272  
helper_fsqrt(CPUX86State * env)2273  void helper_fsqrt(CPUX86State *env)
2274  {
2275      uint8_t old_flags = save_exception_flags(env);
2276      if (floatx80_is_neg(ST0)) {
2277          env->fpus &= ~0x4700;  /* (C3,C2,C1,C0) <-- 0000 */
2278          env->fpus |= 0x400;
2279      }
2280      ST0 = floatx80_sqrt(ST0, &env->fp_status);
2281      merge_exception_flags(env, old_flags);
2282  }
2283  
helper_fsincos(CPUX86State * env)2284  void helper_fsincos(CPUX86State *env)
2285  {
2286      double fptemp = floatx80_to_double(env, ST0);
2287  
2288      if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2289          env->fpus |= 0x400;
2290      } else {
2291          ST0 = double_to_floatx80(env, sin(fptemp));
2292          fpush(env);
2293          ST0 = double_to_floatx80(env, cos(fptemp));
2294          env->fpus &= ~0x400;  /* C2 <-- 0 */
2295          /* the above code is for |arg| < 2**63 only */
2296      }
2297  }
2298  
helper_frndint(CPUX86State * env)2299  void helper_frndint(CPUX86State *env)
2300  {
2301      uint8_t old_flags = save_exception_flags(env);
2302      ST0 = floatx80_round_to_int(ST0, &env->fp_status);
2303      merge_exception_flags(env, old_flags);
2304  }
2305  
helper_fscale(CPUX86State * env)2306  void helper_fscale(CPUX86State *env)
2307  {
2308      uint8_t old_flags = save_exception_flags(env);
2309      if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
2310          float_raise(float_flag_invalid, &env->fp_status);
2311          ST0 = floatx80_default_nan(&env->fp_status);
2312      } else if (floatx80_is_any_nan(ST1)) {
2313          if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2314              float_raise(float_flag_invalid, &env->fp_status);
2315          }
2316          ST0 = ST1;
2317          if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2318              float_raise(float_flag_invalid, &env->fp_status);
2319              ST0 = floatx80_silence_nan(ST0, &env->fp_status);
2320          }
2321      } else if (floatx80_is_infinity(ST1) &&
2322                 !floatx80_invalid_encoding(ST0) &&
2323                 !floatx80_is_any_nan(ST0)) {
2324          if (floatx80_is_neg(ST1)) {
2325              if (floatx80_is_infinity(ST0)) {
2326                  float_raise(float_flag_invalid, &env->fp_status);
2327                  ST0 = floatx80_default_nan(&env->fp_status);
2328              } else {
2329                  ST0 = (floatx80_is_neg(ST0) ?
2330                         floatx80_chs(floatx80_zero) :
2331                         floatx80_zero);
2332              }
2333          } else {
2334              if (floatx80_is_zero(ST0)) {
2335                  float_raise(float_flag_invalid, &env->fp_status);
2336                  ST0 = floatx80_default_nan(&env->fp_status);
2337              } else {
2338                  ST0 = (floatx80_is_neg(ST0) ?
2339                         floatx80_chs(floatx80_infinity) :
2340                         floatx80_infinity);
2341              }
2342          }
2343      } else {
2344          int n;
2345          FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
2346          uint8_t save_flags = get_float_exception_flags(&env->fp_status);
2347          set_float_exception_flags(0, &env->fp_status);
2348          n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
2349          set_float_exception_flags(save_flags, &env->fp_status);
2350          env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2351          ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
2352          env->fp_status.floatx80_rounding_precision = save;
2353      }
2354      merge_exception_flags(env, old_flags);
2355  }
2356  
helper_fsin(CPUX86State * env)2357  void helper_fsin(CPUX86State *env)
2358  {
2359      double fptemp = floatx80_to_double(env, ST0);
2360  
2361      if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2362          env->fpus |= 0x400;
2363      } else {
2364          ST0 = double_to_floatx80(env, sin(fptemp));
2365          env->fpus &= ~0x400;  /* C2 <-- 0 */
2366          /* the above code is for |arg| < 2**53 only */
2367      }
2368  }
2369  
helper_fcos(CPUX86State * env)2370  void helper_fcos(CPUX86State *env)
2371  {
2372      double fptemp = floatx80_to_double(env, ST0);
2373  
2374      if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2375          env->fpus |= 0x400;
2376      } else {
2377          ST0 = double_to_floatx80(env, cos(fptemp));
2378          env->fpus &= ~0x400;  /* C2 <-- 0 */
2379          /* the above code is for |arg| < 2**63 only */
2380      }
2381  }
2382  
helper_fxam_ST0(CPUX86State * env)2383  void helper_fxam_ST0(CPUX86State *env)
2384  {
2385      CPU_LDoubleU temp;
2386      int expdif;
2387  
2388      temp.d = ST0;
2389  
2390      env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2391      if (SIGND(temp)) {
2392          env->fpus |= 0x200; /* C1 <-- 1 */
2393      }
2394  
2395      if (env->fptags[env->fpstt]) {
2396          env->fpus |= 0x4100; /* Empty */
2397          return;
2398      }
2399  
2400      expdif = EXPD(temp);
2401      if (expdif == MAXEXPD) {
2402          if (MANTD(temp) == 0x8000000000000000ULL) {
2403              env->fpus |= 0x500; /* Infinity */
2404          } else if (MANTD(temp) & 0x8000000000000000ULL) {
2405              env->fpus |= 0x100; /* NaN */
2406          }
2407      } else if (expdif == 0) {
2408          if (MANTD(temp) == 0) {
2409              env->fpus |=  0x4000; /* Zero */
2410          } else {
2411              env->fpus |= 0x4400; /* Denormal */
2412          }
2413      } else if (MANTD(temp) & 0x8000000000000000ULL) {
2414          env->fpus |= 0x400;
2415      }
2416  }
2417  
do_fstenv(X86Access * ac,target_ulong ptr,int data32)2418  static void do_fstenv(X86Access *ac, target_ulong ptr, int data32)
2419  {
2420      CPUX86State *env = ac->env;
2421      int fpus, fptag, exp, i;
2422      uint64_t mant;
2423      CPU_LDoubleU tmp;
2424  
2425      fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2426      fptag = 0;
2427      for (i = 7; i >= 0; i--) {
2428          fptag <<= 2;
2429          if (env->fptags[i]) {
2430              fptag |= 3;
2431          } else {
2432              tmp.d = env->fpregs[i].d;
2433              exp = EXPD(tmp);
2434              mant = MANTD(tmp);
2435              if (exp == 0 && mant == 0) {
2436                  /* zero */
2437                  fptag |= 1;
2438              } else if (exp == 0 || exp == MAXEXPD
2439                         || (mant & (1LL << 63)) == 0) {
2440                  /* NaNs, infinity, denormal */
2441                  fptag |= 2;
2442              }
2443          }
2444      }
2445      if (data32) {
2446          /* 32 bit */
2447          access_stl(ac, ptr, env->fpuc);
2448          access_stl(ac, ptr + 4, fpus);
2449          access_stl(ac, ptr + 8, fptag);
2450          access_stl(ac, ptr + 12, env->fpip); /* fpip */
2451          access_stl(ac, ptr + 16, env->fpcs); /* fpcs */
2452          access_stl(ac, ptr + 20, env->fpdp); /* fpoo */
2453          access_stl(ac, ptr + 24, env->fpds); /* fpos */
2454      } else {
2455          /* 16 bit */
2456          access_stw(ac, ptr, env->fpuc);
2457          access_stw(ac, ptr + 2, fpus);
2458          access_stw(ac, ptr + 4, fptag);
2459          access_stw(ac, ptr + 6, env->fpip);
2460          access_stw(ac, ptr + 8, env->fpcs);
2461          access_stw(ac, ptr + 10, env->fpdp);
2462          access_stw(ac, ptr + 12, env->fpds);
2463      }
2464  }
2465  
helper_fstenv(CPUX86State * env,target_ulong ptr,int data32)2466  void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
2467  {
2468      X86Access ac;
2469  
2470      access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
2471      do_fstenv(&ac, ptr, data32);
2472  }
2473  
cpu_set_fpus(CPUX86State * env,uint16_t fpus)2474  static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
2475  {
2476      env->fpstt = (fpus >> 11) & 7;
2477      env->fpus = fpus & ~0x3800 & ~FPUS_B;
2478      env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
2479  #if !defined(CONFIG_USER_ONLY)
2480      if (!(env->fpus & FPUS_SE)) {
2481          /*
2482           * Here the processor deasserts FERR#; in response, the chipset deasserts
2483           * IGNNE#.
2484           */
2485          cpu_clear_ignne();
2486      }
2487  #endif
2488  }
2489  
do_fldenv(X86Access * ac,target_ulong ptr,int data32)2490  static void do_fldenv(X86Access *ac, target_ulong ptr, int data32)
2491  {
2492      int i, fpus, fptag;
2493      CPUX86State *env = ac->env;
2494  
2495      cpu_set_fpuc(env, access_ldw(ac, ptr));
2496      fpus = access_ldw(ac, ptr + (2 << data32));
2497      fptag = access_ldw(ac, ptr + (4 << data32));
2498  
2499      cpu_set_fpus(env, fpus);
2500      for (i = 0; i < 8; i++) {
2501          env->fptags[i] = ((fptag & 3) == 3);
2502          fptag >>= 2;
2503      }
2504  }
2505  
helper_fldenv(CPUX86State * env,target_ulong ptr,int data32)2506  void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
2507  {
2508      X86Access ac;
2509  
2510      access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
2511      do_fldenv(&ac, ptr, data32);
2512  }
2513  
do_fsave(X86Access * ac,target_ulong ptr,int data32)2514  static void do_fsave(X86Access *ac, target_ulong ptr, int data32)
2515  {
2516      CPUX86State *env = ac->env;
2517  
2518      do_fstenv(ac, ptr, data32);
2519      ptr += 14 << data32;
2520  
2521      for (int i = 0; i < 8; i++) {
2522          floatx80 tmp = ST(i);
2523          do_fstt(ac, ptr, tmp);
2524          ptr += 10;
2525      }
2526  
2527      do_fninit(env);
2528  }
2529  
helper_fsave(CPUX86State * env,target_ulong ptr,int data32)2530  void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
2531  {
2532      int size = (14 << data32) + 80;
2533      X86Access ac;
2534  
2535      access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC());
2536      do_fsave(&ac, ptr, data32);
2537  }
2538  
do_frstor(X86Access * ac,target_ulong ptr,int data32)2539  static void do_frstor(X86Access *ac, target_ulong ptr, int data32)
2540  {
2541      CPUX86State *env = ac->env;
2542  
2543      do_fldenv(ac, ptr, data32);
2544      ptr += 14 << data32;
2545  
2546      for (int i = 0; i < 8; i++) {
2547          floatx80 tmp = do_fldt(ac, ptr);
2548          ST(i) = tmp;
2549          ptr += 10;
2550      }
2551  }
2552  
helper_frstor(CPUX86State * env,target_ulong ptr,int data32)2553  void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
2554  {
2555      int size = (14 << data32) + 80;
2556      X86Access ac;
2557  
2558      access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC());
2559      do_frstor(&ac, ptr, data32);
2560  }
2561  
2562  #define XO(X)  offsetof(X86XSaveArea, X)
2563  
do_xsave_fpu(X86Access * ac,target_ulong ptr)2564  static void do_xsave_fpu(X86Access *ac, target_ulong ptr)
2565  {
2566      CPUX86State *env = ac->env;
2567      int fpus, fptag, i;
2568      target_ulong addr;
2569  
2570      fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2571      fptag = 0;
2572      for (i = 0; i < 8; i++) {
2573          fptag |= (env->fptags[i] << i);
2574      }
2575  
2576      access_stw(ac, ptr + XO(legacy.fcw), env->fpuc);
2577      access_stw(ac, ptr + XO(legacy.fsw), fpus);
2578      access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff);
2579  
2580      /* In 32-bit mode this is eip, sel, dp, sel.
2581         In 64-bit mode this is rip, rdp.
2582         But in either case we don't write actual data, just zeros.  */
2583      access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */
2584      access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */
2585  
2586      addr = ptr + XO(legacy.fpregs);
2587  
2588      for (i = 0; i < 8; i++) {
2589          floatx80 tmp = ST(i);
2590          do_fstt(ac, addr, tmp);
2591          addr += 16;
2592      }
2593  }
2594  
do_xsave_mxcsr(X86Access * ac,target_ulong ptr)2595  static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr)
2596  {
2597      CPUX86State *env = ac->env;
2598  
2599      update_mxcsr_from_sse_status(env);
2600      access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr);
2601      access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff);
2602  }
2603  
do_xsave_sse(X86Access * ac,target_ulong ptr)2604  static void do_xsave_sse(X86Access *ac, target_ulong ptr)
2605  {
2606      CPUX86State *env = ac->env;
2607      int i, nb_xmm_regs;
2608      target_ulong addr;
2609  
2610      if (env->hflags & HF_CS64_MASK) {
2611          nb_xmm_regs = 16;
2612      } else {
2613          nb_xmm_regs = 8;
2614      }
2615  
2616      addr = ptr + XO(legacy.xmm_regs);
2617      for (i = 0; i < nb_xmm_regs; i++) {
2618          access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0));
2619          access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1));
2620          addr += 16;
2621      }
2622  }
2623  
do_xsave_ymmh(X86Access * ac,target_ulong ptr)2624  static void do_xsave_ymmh(X86Access *ac, target_ulong ptr)
2625  {
2626      CPUX86State *env = ac->env;
2627      int i, nb_xmm_regs;
2628  
2629      if (env->hflags & HF_CS64_MASK) {
2630          nb_xmm_regs = 16;
2631      } else {
2632          nb_xmm_regs = 8;
2633      }
2634  
2635      for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
2636          access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2));
2637          access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3));
2638      }
2639  }
2640  
do_xsave_bndregs(X86Access * ac,target_ulong ptr)2641  static void do_xsave_bndregs(X86Access *ac, target_ulong ptr)
2642  {
2643      CPUX86State *env = ac->env;
2644      target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2645      int i;
2646  
2647      for (i = 0; i < 4; i++, addr += 16) {
2648          access_stq(ac, addr, env->bnd_regs[i].lb);
2649          access_stq(ac, addr + 8, env->bnd_regs[i].ub);
2650      }
2651  }
2652  
do_xsave_bndcsr(X86Access * ac,target_ulong ptr)2653  static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr)
2654  {
2655      CPUX86State *env = ac->env;
2656  
2657      access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
2658                 env->bndcs_regs.cfgu);
2659      access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
2660                 env->bndcs_regs.sts);
2661  }
2662  
do_xsave_pkru(X86Access * ac,target_ulong ptr)2663  static void do_xsave_pkru(X86Access *ac, target_ulong ptr)
2664  {
2665      access_stq(ac, ptr, ac->env->pkru);
2666  }
2667  
do_fxsave(X86Access * ac,target_ulong ptr)2668  static void do_fxsave(X86Access *ac, target_ulong ptr)
2669  {
2670      CPUX86State *env = ac->env;
2671  
2672      do_xsave_fpu(ac, ptr);
2673      if (env->cr[4] & CR4_OSFXSR_MASK) {
2674          do_xsave_mxcsr(ac, ptr);
2675          /* Fast FXSAVE leaves out the XMM registers */
2676          if (!(env->efer & MSR_EFER_FFXSR)
2677              || (env->hflags & HF_CPL_MASK)
2678              || !(env->hflags & HF_LMA_MASK)) {
2679              do_xsave_sse(ac, ptr);
2680          }
2681      }
2682  }
2683  
helper_fxsave(CPUX86State * env,target_ulong ptr)2684  void helper_fxsave(CPUX86State *env, target_ulong ptr)
2685  {
2686      uintptr_t ra = GETPC();
2687      X86Access ac;
2688  
2689      /* The operand must be 16 byte aligned */
2690      if (ptr & 0xf) {
2691          raise_exception_ra(env, EXCP0D_GPF, ra);
2692      }
2693  
2694      access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
2695                     MMU_DATA_STORE, ra);
2696      do_fxsave(&ac, ptr);
2697  }
2698  
get_xinuse(CPUX86State * env)2699  static uint64_t get_xinuse(CPUX86State *env)
2700  {
2701      uint64_t inuse = -1;
2702  
2703      /* For the most part, we don't track XINUSE.  We could calculate it
2704         here for all components, but it's probably less work to simply
2705         indicate in use.  That said, the state of BNDREGS is important
2706         enough to track in HFLAGS, so we might as well use that here.  */
2707      if ((env->hflags & HF_MPX_IU_MASK) == 0) {
2708         inuse &= ~XSTATE_BNDREGS_MASK;
2709      }
2710      return inuse;
2711  }
2712  
do_xsave_access(X86Access * ac,target_ulong ptr,uint64_t rfbm,uint64_t inuse,uint64_t opt)2713  static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm,
2714                              uint64_t inuse, uint64_t opt)
2715  {
2716      uint64_t old_bv, new_bv;
2717  
2718      if (opt & XSTATE_FP_MASK) {
2719          do_xsave_fpu(ac, ptr);
2720      }
2721      if (rfbm & XSTATE_SSE_MASK) {
2722          /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
2723          do_xsave_mxcsr(ac, ptr);
2724      }
2725      if (opt & XSTATE_SSE_MASK) {
2726          do_xsave_sse(ac, ptr);
2727      }
2728      if (opt & XSTATE_YMM_MASK) {
2729          do_xsave_ymmh(ac, ptr + XO(avx_state));
2730      }
2731      if (opt & XSTATE_BNDREGS_MASK) {
2732          do_xsave_bndregs(ac, ptr + XO(bndreg_state));
2733      }
2734      if (opt & XSTATE_BNDCSR_MASK) {
2735          do_xsave_bndcsr(ac, ptr + XO(bndcsr_state));
2736      }
2737      if (opt & XSTATE_PKRU_MASK) {
2738          do_xsave_pkru(ac, ptr + XO(pkru_state));
2739      }
2740  
2741      /* Update the XSTATE_BV field.  */
2742      old_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
2743      new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2744      access_stq(ac, ptr + XO(header.xstate_bv), new_bv);
2745  }
2746  
do_xsave_chk(CPUX86State * env,target_ulong ptr,uintptr_t ra)2747  static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2748  {
2749      /* The OS must have enabled XSAVE.  */
2750      if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2751          raise_exception_ra(env, EXCP06_ILLOP, ra);
2752      }
2753  
2754      /* The operand must be 64 byte aligned.  */
2755      if (ptr & 63) {
2756          raise_exception_ra(env, EXCP0D_GPF, ra);
2757      }
2758  }
2759  
do_xsave(CPUX86State * env,target_ulong ptr,uint64_t rfbm,uint64_t inuse,uint64_t opt,uintptr_t ra)2760  static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2761                       uint64_t inuse, uint64_t opt, uintptr_t ra)
2762  {
2763      X86Access ac;
2764      unsigned size;
2765  
2766      do_xsave_chk(env, ptr, ra);
2767  
2768      /* Never save anything not enabled by XCR0.  */
2769      rfbm &= env->xcr0;
2770      opt &= rfbm;
2771      size = xsave_area_size(opt, false);
2772  
2773      access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra);
2774      do_xsave_access(&ac, ptr, rfbm, inuse, opt);
2775  }
2776  
helper_xsave(CPUX86State * env,target_ulong ptr,uint64_t rfbm)2777  void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2778  {
2779      do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC());
2780  }
2781  
helper_xsaveopt(CPUX86State * env,target_ulong ptr,uint64_t rfbm)2782  void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2783  {
2784      uint64_t inuse = get_xinuse(env);
2785      do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
2786  }
2787  
do_xrstor_fpu(X86Access * ac,target_ulong ptr)2788  static void do_xrstor_fpu(X86Access *ac, target_ulong ptr)
2789  {
2790      CPUX86State *env = ac->env;
2791      int i, fpuc, fpus, fptag;
2792      target_ulong addr;
2793  
2794      fpuc = access_ldw(ac, ptr + XO(legacy.fcw));
2795      fpus = access_ldw(ac, ptr + XO(legacy.fsw));
2796      fptag = access_ldw(ac, ptr + XO(legacy.ftw));
2797      cpu_set_fpuc(env, fpuc);
2798      cpu_set_fpus(env, fpus);
2799  
2800      fptag ^= 0xff;
2801      for (i = 0; i < 8; i++) {
2802          env->fptags[i] = ((fptag >> i) & 1);
2803      }
2804  
2805      addr = ptr + XO(legacy.fpregs);
2806  
2807      for (i = 0; i < 8; i++) {
2808          floatx80 tmp = do_fldt(ac, addr);
2809          ST(i) = tmp;
2810          addr += 16;
2811      }
2812  }
2813  
do_xrstor_mxcsr(X86Access * ac,target_ulong ptr)2814  static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr)
2815  {
2816      CPUX86State *env = ac->env;
2817      cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr)));
2818  }
2819  
do_xrstor_sse(X86Access * ac,target_ulong ptr)2820  static void do_xrstor_sse(X86Access *ac, target_ulong ptr)
2821  {
2822      CPUX86State *env = ac->env;
2823      int i, nb_xmm_regs;
2824      target_ulong addr;
2825  
2826      if (env->hflags & HF_CS64_MASK) {
2827          nb_xmm_regs = 16;
2828      } else {
2829          nb_xmm_regs = 8;
2830      }
2831  
2832      addr = ptr + XO(legacy.xmm_regs);
2833      for (i = 0; i < nb_xmm_regs; i++) {
2834          env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr);
2835          env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8);
2836          addr += 16;
2837      }
2838  }
2839  
do_clear_sse(CPUX86State * env)2840  static void do_clear_sse(CPUX86State *env)
2841  {
2842      int i, nb_xmm_regs;
2843  
2844      if (env->hflags & HF_CS64_MASK) {
2845          nb_xmm_regs = 16;
2846      } else {
2847          nb_xmm_regs = 8;
2848      }
2849  
2850      for (i = 0; i < nb_xmm_regs; i++) {
2851          env->xmm_regs[i].ZMM_Q(0) = 0;
2852          env->xmm_regs[i].ZMM_Q(1) = 0;
2853      }
2854  }
2855  
do_xrstor_ymmh(X86Access * ac,target_ulong ptr)2856  static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr)
2857  {
2858      CPUX86State *env = ac->env;
2859      int i, nb_xmm_regs;
2860  
2861      if (env->hflags & HF_CS64_MASK) {
2862          nb_xmm_regs = 16;
2863      } else {
2864          nb_xmm_regs = 8;
2865      }
2866  
2867      for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
2868          env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr);
2869          env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8);
2870      }
2871  }
2872  
do_clear_ymmh(CPUX86State * env)2873  static void do_clear_ymmh(CPUX86State *env)
2874  {
2875      int i, nb_xmm_regs;
2876  
2877      if (env->hflags & HF_CS64_MASK) {
2878          nb_xmm_regs = 16;
2879      } else {
2880          nb_xmm_regs = 8;
2881      }
2882  
2883      for (i = 0; i < nb_xmm_regs; i++) {
2884          env->xmm_regs[i].ZMM_Q(2) = 0;
2885          env->xmm_regs[i].ZMM_Q(3) = 0;
2886      }
2887  }
2888  
do_xrstor_bndregs(X86Access * ac,target_ulong ptr)2889  static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr)
2890  {
2891      CPUX86State *env = ac->env;
2892      target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2893      int i;
2894  
2895      for (i = 0; i < 4; i++, addr += 16) {
2896          env->bnd_regs[i].lb = access_ldq(ac, addr);
2897          env->bnd_regs[i].ub = access_ldq(ac, addr + 8);
2898      }
2899  }
2900  
do_xrstor_bndcsr(X86Access * ac,target_ulong ptr)2901  static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr)
2902  {
2903      CPUX86State *env = ac->env;
2904  
2905      /* FIXME: Extend highest implemented bit of linear address.  */
2906      env->bndcs_regs.cfgu
2907          = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu));
2908      env->bndcs_regs.sts
2909          = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts));
2910  }
2911  
do_xrstor_pkru(X86Access * ac,target_ulong ptr)2912  static void do_xrstor_pkru(X86Access *ac, target_ulong ptr)
2913  {
2914      ac->env->pkru = access_ldq(ac, ptr);
2915  }
2916  
do_fxrstor(X86Access * ac,target_ulong ptr)2917  static void do_fxrstor(X86Access *ac, target_ulong ptr)
2918  {
2919      CPUX86State *env = ac->env;
2920  
2921      do_xrstor_fpu(ac, ptr);
2922      if (env->cr[4] & CR4_OSFXSR_MASK) {
2923          do_xrstor_mxcsr(ac, ptr);
2924          /* Fast FXRSTOR leaves out the XMM registers */
2925          if (!(env->efer & MSR_EFER_FFXSR)
2926              || (env->hflags & HF_CPL_MASK)
2927              || !(env->hflags & HF_LMA_MASK)) {
2928              do_xrstor_sse(ac, ptr);
2929          }
2930      }
2931  }
2932  
helper_fxrstor(CPUX86State * env,target_ulong ptr)2933  void helper_fxrstor(CPUX86State *env, target_ulong ptr)
2934  {
2935      uintptr_t ra = GETPC();
2936      X86Access ac;
2937  
2938      /* The operand must be 16 byte aligned */
2939      if (ptr & 0xf) {
2940          raise_exception_ra(env, EXCP0D_GPF, ra);
2941      }
2942  
2943      access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
2944                     MMU_DATA_LOAD, ra);
2945      do_fxrstor(&ac, ptr);
2946  }
2947  
valid_xrstor_header(X86Access * ac,uint64_t * pxsbv,target_ulong ptr)2948  static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv,
2949                                  target_ulong ptr)
2950  {
2951      uint64_t xstate_bv, xcomp_bv, reserve0;
2952  
2953      xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
2954      xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv));
2955      reserve0 = access_ldq(ac, ptr + XO(header.reserve0));
2956      *pxsbv = xstate_bv;
2957  
2958      /*
2959       * XCOMP_BV bit 63 indicates compact form, which we do not support,
2960       * and thus must raise #GP.  That leaves us in standard form.
2961       * In standard form, bytes 23:8 must be zero -- which is both
2962       * XCOMP_BV and the following 64-bit field.
2963       */
2964      if (xcomp_bv || reserve0) {
2965          return false;
2966      }
2967  
2968      /* The XSTATE_BV field must not set bits not present in XCR0.  */
2969      return (xstate_bv & ~ac->env->xcr0) == 0;
2970  }
2971  
do_xrstor(X86Access * ac,target_ulong ptr,uint64_t rfbm,uint64_t xstate_bv)2972  static void do_xrstor(X86Access *ac, target_ulong ptr,
2973                        uint64_t rfbm, uint64_t xstate_bv)
2974  {
2975      CPUX86State *env = ac->env;
2976  
2977      if (rfbm & XSTATE_FP_MASK) {
2978          if (xstate_bv & XSTATE_FP_MASK) {
2979              do_xrstor_fpu(ac, ptr);
2980          } else {
2981              do_fninit(env);
2982              memset(env->fpregs, 0, sizeof(env->fpregs));
2983          }
2984      }
2985      if (rfbm & XSTATE_SSE_MASK) {
2986          /* Note that the standard form of XRSTOR loads MXCSR from memory
2987             whether or not the XSTATE_BV bit is set.  */
2988          do_xrstor_mxcsr(ac, ptr);
2989          if (xstate_bv & XSTATE_SSE_MASK) {
2990              do_xrstor_sse(ac, ptr);
2991          } else {
2992              do_clear_sse(env);
2993          }
2994      }
2995      if (rfbm & XSTATE_YMM_MASK) {
2996          if (xstate_bv & XSTATE_YMM_MASK) {
2997              do_xrstor_ymmh(ac, ptr + XO(avx_state));
2998          } else {
2999              do_clear_ymmh(env);
3000          }
3001      }
3002      if (rfbm & XSTATE_BNDREGS_MASK) {
3003          if (xstate_bv & XSTATE_BNDREGS_MASK) {
3004              do_xrstor_bndregs(ac, ptr + XO(bndreg_state));
3005              env->hflags |= HF_MPX_IU_MASK;
3006          } else {
3007              memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
3008              env->hflags &= ~HF_MPX_IU_MASK;
3009          }
3010      }
3011      if (rfbm & XSTATE_BNDCSR_MASK) {
3012          if (xstate_bv & XSTATE_BNDCSR_MASK) {
3013              do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state));
3014          } else {
3015              memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
3016          }
3017          cpu_sync_bndcs_hflags(env);
3018      }
3019      if (rfbm & XSTATE_PKRU_MASK) {
3020          uint64_t old_pkru = env->pkru;
3021          if (xstate_bv & XSTATE_PKRU_MASK) {
3022              do_xrstor_pkru(ac, ptr + XO(pkru_state));
3023          } else {
3024              env->pkru = 0;
3025          }
3026          if (env->pkru != old_pkru) {
3027              CPUState *cs = env_cpu(env);
3028              tlb_flush(cs);
3029          }
3030      }
3031  }
3032  
3033  #undef XO
3034  
helper_xrstor(CPUX86State * env,target_ulong ptr,uint64_t rfbm)3035  void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
3036  {
3037      uintptr_t ra = GETPC();
3038      X86Access ac;
3039      uint64_t xstate_bv;
3040      unsigned size, size_ext;
3041  
3042      do_xsave_chk(env, ptr, ra);
3043  
3044      /* Begin with just the minimum size to validate the header. */
3045      size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
3046      access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra);
3047      if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) {
3048          raise_exception_ra(env, EXCP0D_GPF, ra);
3049      }
3050  
3051      rfbm &= env->xcr0;
3052      size_ext = xsave_area_size(rfbm & xstate_bv, false);
3053      if (size < size_ext) {
3054          /* TODO: See if existing page probe has covered extra size. */
3055          access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra);
3056      }
3057  
3058      do_xrstor(&ac, ptr, rfbm, xstate_bv);
3059  }
3060  
3061  #if defined(CONFIG_USER_ONLY)
cpu_x86_fsave(CPUX86State * env,void * host,size_t len)3062  void cpu_x86_fsave(CPUX86State *env, void *host, size_t len)
3063  {
3064      X86Access ac = {
3065          .haddr1 = host,
3066          .size = 4 * 7 + 8 * 10,
3067          .env = env,
3068      };
3069  
3070      assert(ac.size <= len);
3071      do_fsave(&ac, 0, true);
3072  }
3073  
cpu_x86_frstor(CPUX86State * env,void * host,size_t len)3074  void cpu_x86_frstor(CPUX86State *env, void *host, size_t len)
3075  {
3076      X86Access ac = {
3077          .haddr1 = host,
3078          .size = 4 * 7 + 8 * 10,
3079          .env = env,
3080      };
3081  
3082      assert(ac.size <= len);
3083      do_frstor(&ac, 0, true);
3084  }
3085  
cpu_x86_fxsave(CPUX86State * env,void * host,size_t len)3086  void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len)
3087  {
3088      X86Access ac = {
3089          .haddr1 = host,
3090          .size = sizeof(X86LegacyXSaveArea),
3091          .env = env,
3092      };
3093  
3094      assert(ac.size <= len);
3095      do_fxsave(&ac, 0);
3096  }
3097  
cpu_x86_fxrstor(CPUX86State * env,void * host,size_t len)3098  void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len)
3099  {
3100      X86Access ac = {
3101          .haddr1 = host,
3102          .size = sizeof(X86LegacyXSaveArea),
3103          .env = env,
3104      };
3105  
3106      assert(ac.size <= len);
3107      do_fxrstor(&ac, 0);
3108  }
3109  
cpu_x86_xsave(CPUX86State * env,void * host,size_t len,uint64_t rfbm)3110  void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
3111  {
3112      X86Access ac = {
3113          .haddr1 = host,
3114          .env = env,
3115      };
3116  
3117      /*
3118       * Since this is only called from user-level signal handling,
3119       * we should have done the job correctly there.
3120       */
3121      assert((rfbm & ~env->xcr0) == 0);
3122      ac.size = xsave_area_size(rfbm, false);
3123      assert(ac.size <= len);
3124      do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm);
3125  }
3126  
cpu_x86_xrstor(CPUX86State * env,void * host,size_t len,uint64_t rfbm)3127  bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
3128  {
3129      X86Access ac = {
3130          .haddr1 = host,
3131          .env = env,
3132      };
3133      uint64_t xstate_bv;
3134  
3135      /*
3136       * Since this is only called from user-level signal handling,
3137       * we should have done the job correctly there.
3138       */
3139      assert((rfbm & ~env->xcr0) == 0);
3140      ac.size = xsave_area_size(rfbm, false);
3141      assert(ac.size <= len);
3142  
3143      if (!valid_xrstor_header(&ac, &xstate_bv, 0)) {
3144          return false;
3145      }
3146      do_xrstor(&ac, 0, rfbm, xstate_bv);
3147      return true;
3148  }
3149  #endif
3150  
helper_xgetbv(CPUX86State * env,uint32_t ecx)3151  uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
3152  {
3153      /* The OS must have enabled XSAVE.  */
3154      if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
3155          raise_exception_ra(env, EXCP06_ILLOP, GETPC());
3156      }
3157  
3158      switch (ecx) {
3159      case 0:
3160          return env->xcr0;
3161      case 1:
3162          if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
3163              return env->xcr0 & get_xinuse(env);
3164          }
3165          break;
3166      }
3167      raise_exception_ra(env, EXCP0D_GPF, GETPC());
3168  }
3169  
helper_xsetbv(CPUX86State * env,uint32_t ecx,uint64_t mask)3170  void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
3171  {
3172      uint32_t dummy, ena_lo, ena_hi;
3173      uint64_t ena;
3174  
3175      /* The OS must have enabled XSAVE.  */
3176      if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
3177          raise_exception_ra(env, EXCP06_ILLOP, GETPC());
3178      }
3179  
3180      /* Only XCR0 is defined at present; the FPU may not be disabled.  */
3181      if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
3182          goto do_gpf;
3183      }
3184  
3185      /* SSE can be disabled, but only if AVX is disabled too.  */
3186      if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) {
3187          goto do_gpf;
3188      }
3189  
3190      /* Disallow enabling unimplemented features.  */
3191      cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
3192      ena = ((uint64_t)ena_hi << 32) | ena_lo;
3193      if (mask & ~ena) {
3194          goto do_gpf;
3195      }
3196  
3197      /* Disallow enabling only half of MPX.  */
3198      if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
3199          & XSTATE_BNDCSR_MASK) {
3200          goto do_gpf;
3201      }
3202  
3203      env->xcr0 = mask;
3204      cpu_sync_bndcs_hflags(env);
3205      cpu_sync_avx_hflag(env);
3206      return;
3207  
3208   do_gpf:
3209      raise_exception_ra(env, EXCP0D_GPF, GETPC());
3210  }
3211  
3212  /* MMX/SSE */
3213  /* XXX: optimize by storing fptt and fptags in the static cpu state */
3214  
3215  #define SSE_DAZ             0x0040
3216  #define SSE_RC_SHIFT        13
3217  #define SSE_RC_MASK         (3 << SSE_RC_SHIFT)
3218  #define SSE_FZ              0x8000
3219  
update_mxcsr_status(CPUX86State * env)3220  void update_mxcsr_status(CPUX86State *env)
3221  {
3222      uint32_t mxcsr = env->mxcsr;
3223      int rnd_type;
3224  
3225      /* set rounding mode */
3226      rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT;
3227      set_x86_rounding_mode(rnd_type, &env->sse_status);
3228  
3229      /* Set exception flags.  */
3230      set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
3231                                (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
3232                                (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
3233                                (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
3234                                (mxcsr & FPUS_PE ? float_flag_inexact : 0),
3235                                &env->sse_status);
3236  
3237      /* set denormals are zero */
3238      set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
3239  
3240      /* set flush to zero */
3241      set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
3242  }
3243  
update_mxcsr_from_sse_status(CPUX86State * env)3244  void update_mxcsr_from_sse_status(CPUX86State *env)
3245  {
3246      uint8_t flags = get_float_exception_flags(&env->sse_status);
3247      /*
3248       * The MXCSR denormal flag has opposite semantics to
3249       * float_flag_input_denormal (the softfloat code sets that flag
3250       * only when flushing input denormals to zero, but SSE sets it
3251       * only when not flushing them to zero), so is not converted
3252       * here.
3253       */
3254      env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
3255                     (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
3256                     (flags & float_flag_overflow ? FPUS_OE : 0) |
3257                     (flags & float_flag_underflow ? FPUS_UE : 0) |
3258                     (flags & float_flag_inexact ? FPUS_PE : 0) |
3259                     (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
3260                      0));
3261  }
3262  
helper_update_mxcsr(CPUX86State * env)3263  void helper_update_mxcsr(CPUX86State *env)
3264  {
3265      update_mxcsr_from_sse_status(env);
3266  }
3267  
helper_ldmxcsr(CPUX86State * env,uint32_t val)3268  void helper_ldmxcsr(CPUX86State *env, uint32_t val)
3269  {
3270      cpu_set_mxcsr(env, val);
3271  }
3272  
helper_enter_mmx(CPUX86State * env)3273  void helper_enter_mmx(CPUX86State *env)
3274  {
3275      env->fpstt = 0;
3276      *(uint32_t *)(env->fptags) = 0;
3277      *(uint32_t *)(env->fptags + 4) = 0;
3278  }
3279  
helper_emms(CPUX86State * env)3280  void helper_emms(CPUX86State *env)
3281  {
3282      /* set to empty state */
3283      *(uint32_t *)(env->fptags) = 0x01010101;
3284      *(uint32_t *)(env->fptags + 4) = 0x01010101;
3285  }
3286  
3287  #define SHIFT 0
3288  #include "ops_sse.h"
3289  
3290  #define SHIFT 1
3291  #include "ops_sse.h"
3292  
3293  #define SHIFT 2
3294  #include "ops_sse.h"
3295