xref: /openbmc/qemu/target/i386/tcg/translate.c (revision bf5dcf8f)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "trace-tcg.h"
34 #include "exec/log.h"
35 
36 #define PREFIX_REPZ   0x01
37 #define PREFIX_REPNZ  0x02
38 #define PREFIX_LOCK   0x04
39 #define PREFIX_DATA   0x08
40 #define PREFIX_ADR    0x10
41 #define PREFIX_VEX    0x20
42 
43 #ifdef TARGET_X86_64
44 #define CODE64(s) ((s)->code64)
45 #define REX_X(s) ((s)->rex_x)
46 #define REX_B(s) ((s)->rex_b)
47 #else
48 #define CODE64(s) 0
49 #define REX_X(s) 0
50 #define REX_B(s) 0
51 #endif
52 
53 #ifdef TARGET_X86_64
54 # define ctztl  ctz64
55 # define clztl  clz64
56 #else
57 # define ctztl  ctz32
58 # define clztl  clz32
59 #endif
60 
61 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
62 #define CASE_MODRM_MEM_OP(OP) \
63     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
64     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
65     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
66 
67 #define CASE_MODRM_OP(OP) \
68     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
69     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
70     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
71     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
72 
73 //#define MACRO_TEST   1
74 
75 /* global register indexes */
76 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
77 static TCGv_i32 cpu_cc_op;
78 static TCGv cpu_regs[CPU_NB_REGS];
79 static TCGv cpu_seg_base[6];
80 static TCGv_i64 cpu_bndl[4];
81 static TCGv_i64 cpu_bndu[4];
82 
83 #include "exec/gen-icount.h"
84 
85 typedef struct DisasContext {
86     DisasContextBase base;
87 
88     /* current insn context */
89     int override; /* -1 if no override */
90     int prefix;
91     MemOp aflag;
92     MemOp dflag;
93     target_ulong pc_start;
94     target_ulong pc; /* pc = eip + cs_base */
95     /* current block context */
96     target_ulong cs_base; /* base of CS segment */
97     int pe;     /* protected mode */
98     int code32; /* 32 bit code segment */
99 #ifdef TARGET_X86_64
100     int lma;    /* long mode active */
101     int code64; /* 64 bit code segment */
102     int rex_x, rex_b;
103 #endif
104     int vex_l;  /* vex vector length */
105     int vex_v;  /* vex vvvv register, without 1's complement.  */
106     int ss32;   /* 32 bit stack segment */
107     CCOp cc_op;  /* current CC operation */
108     bool cc_op_dirty;
109 #ifdef TARGET_X86_64
110     bool x86_64_hregs;
111 #endif
112     int addseg; /* non zero if either DS/ES/SS have a non zero base */
113     int f_st;   /* currently unused */
114     int vm86;   /* vm86 mode */
115     int cpl;
116     int iopl;
117     int tf;     /* TF cpu flag */
118     int jmp_opt; /* use direct block chaining for direct jumps */
119     int repz_opt; /* optimize jumps within repz instructions */
120     int mem_index; /* select memory access functions */
121     uint64_t flags; /* all execution flags */
122     int popl_esp_hack; /* for correct popl with esp base handling */
123     int rip_offset; /* only used in x86_64, but left for simplicity */
124     int cpuid_features;
125     int cpuid_ext_features;
126     int cpuid_ext2_features;
127     int cpuid_ext3_features;
128     int cpuid_7_0_ebx_features;
129     int cpuid_xsave_features;
130 
131     /* TCG local temps */
132     TCGv cc_srcT;
133     TCGv A0;
134     TCGv T0;
135     TCGv T1;
136 
137     /* TCG local register indexes (only used inside old micro ops) */
138     TCGv tmp0;
139     TCGv tmp4;
140     TCGv_ptr ptr0;
141     TCGv_ptr ptr1;
142     TCGv_i32 tmp2_i32;
143     TCGv_i32 tmp3_i32;
144     TCGv_i64 tmp1_i64;
145 
146     sigjmp_buf jmpbuf;
147 } DisasContext;
148 
149 static void gen_eob(DisasContext *s);
150 static void gen_jr(DisasContext *s, TCGv dest);
151 static void gen_jmp(DisasContext *s, target_ulong eip);
152 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
153 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
154 
155 /* i386 arith/logic operations */
156 enum {
157     OP_ADDL,
158     OP_ORL,
159     OP_ADCL,
160     OP_SBBL,
161     OP_ANDL,
162     OP_SUBL,
163     OP_XORL,
164     OP_CMPL,
165 };
166 
167 /* i386 shift ops */
168 enum {
169     OP_ROL,
170     OP_ROR,
171     OP_RCL,
172     OP_RCR,
173     OP_SHL,
174     OP_SHR,
175     OP_SHL1, /* undocumented */
176     OP_SAR = 7,
177 };
178 
179 enum {
180     JCC_O,
181     JCC_B,
182     JCC_Z,
183     JCC_BE,
184     JCC_S,
185     JCC_P,
186     JCC_L,
187     JCC_LE,
188 };
189 
190 enum {
191     /* I386 int registers */
192     OR_EAX,   /* MUST be even numbered */
193     OR_ECX,
194     OR_EDX,
195     OR_EBX,
196     OR_ESP,
197     OR_EBP,
198     OR_ESI,
199     OR_EDI,
200 
201     OR_TMP0 = 16,    /* temporary operand register */
202     OR_TMP1,
203     OR_A0, /* temporary register used when doing address evaluation */
204 };
205 
206 enum {
207     USES_CC_DST  = 1,
208     USES_CC_SRC  = 2,
209     USES_CC_SRC2 = 4,
210     USES_CC_SRCT = 8,
211 };
212 
213 /* Bit set if the global variable is live after setting CC_OP to X.  */
214 static const uint8_t cc_op_live[CC_OP_NB] = {
215     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
216     [CC_OP_EFLAGS] = USES_CC_SRC,
217     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
218     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
219     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
220     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
221     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
222     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
223     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
224     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
225     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
226     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
227     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
228     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
229     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
230     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
231     [CC_OP_CLR] = 0,
232     [CC_OP_POPCNT] = USES_CC_SRC,
233 };
234 
235 static void set_cc_op(DisasContext *s, CCOp op)
236 {
237     int dead;
238 
239     if (s->cc_op == op) {
240         return;
241     }
242 
243     /* Discard CC computation that will no longer be used.  */
244     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
245     if (dead & USES_CC_DST) {
246         tcg_gen_discard_tl(cpu_cc_dst);
247     }
248     if (dead & USES_CC_SRC) {
249         tcg_gen_discard_tl(cpu_cc_src);
250     }
251     if (dead & USES_CC_SRC2) {
252         tcg_gen_discard_tl(cpu_cc_src2);
253     }
254     if (dead & USES_CC_SRCT) {
255         tcg_gen_discard_tl(s->cc_srcT);
256     }
257 
258     if (op == CC_OP_DYNAMIC) {
259         /* The DYNAMIC setting is translator only, and should never be
260            stored.  Thus we always consider it clean.  */
261         s->cc_op_dirty = false;
262     } else {
263         /* Discard any computed CC_OP value (see shifts).  */
264         if (s->cc_op == CC_OP_DYNAMIC) {
265             tcg_gen_discard_i32(cpu_cc_op);
266         }
267         s->cc_op_dirty = true;
268     }
269     s->cc_op = op;
270 }
271 
272 static void gen_update_cc_op(DisasContext *s)
273 {
274     if (s->cc_op_dirty) {
275         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
276         s->cc_op_dirty = false;
277     }
278 }
279 
280 #ifdef TARGET_X86_64
281 
282 #define NB_OP_SIZES 4
283 
284 #else /* !TARGET_X86_64 */
285 
286 #define NB_OP_SIZES 3
287 
288 #endif /* !TARGET_X86_64 */
289 
290 #if defined(HOST_WORDS_BIGENDIAN)
291 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
292 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
293 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
294 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
295 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
296 #else
297 #define REG_B_OFFSET 0
298 #define REG_H_OFFSET 1
299 #define REG_W_OFFSET 0
300 #define REG_L_OFFSET 0
301 #define REG_LH_OFFSET 4
302 #endif
303 
304 /* In instruction encodings for byte register accesses the
305  * register number usually indicates "low 8 bits of register N";
306  * however there are some special cases where N 4..7 indicates
307  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
308  * true for this special case, false otherwise.
309  */
310 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
311 {
312     if (reg < 4) {
313         return false;
314     }
315 #ifdef TARGET_X86_64
316     if (reg >= 8 || s->x86_64_hregs) {
317         return false;
318     }
319 #endif
320     return true;
321 }
322 
323 /* Select the size of a push/pop operation.  */
324 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
325 {
326     if (CODE64(s)) {
327         return ot == MO_16 ? MO_16 : MO_64;
328     } else {
329         return ot;
330     }
331 }
332 
333 /* Select the size of the stack pointer.  */
334 static inline MemOp mo_stacksize(DisasContext *s)
335 {
336     return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
337 }
338 
339 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
340 static inline MemOp mo_64_32(MemOp ot)
341 {
342 #ifdef TARGET_X86_64
343     return ot == MO_64 ? MO_64 : MO_32;
344 #else
345     return MO_32;
346 #endif
347 }
348 
349 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
350    byte vs word opcodes.  */
351 static inline MemOp mo_b_d(int b, MemOp ot)
352 {
353     return b & 1 ? ot : MO_8;
354 }
355 
356 /* Select size 8 if lsb of B is clear, else OT capped at 32.
357    Used for decoding operand size of port opcodes.  */
358 static inline MemOp mo_b_d32(int b, MemOp ot)
359 {
360     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
361 }
362 
363 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
364 {
365     switch(ot) {
366     case MO_8:
367         if (!byte_reg_is_xH(s, reg)) {
368             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
369         } else {
370             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
371         }
372         break;
373     case MO_16:
374         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
375         break;
376     case MO_32:
377         /* For x86_64, this sets the higher half of register to zero.
378            For i386, this is equivalent to a mov. */
379         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
380         break;
381 #ifdef TARGET_X86_64
382     case MO_64:
383         tcg_gen_mov_tl(cpu_regs[reg], t0);
384         break;
385 #endif
386     default:
387         tcg_abort();
388     }
389 }
390 
391 static inline
392 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
393 {
394     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
395         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
396     } else {
397         tcg_gen_mov_tl(t0, cpu_regs[reg]);
398     }
399 }
400 
401 static void gen_add_A0_im(DisasContext *s, int val)
402 {
403     tcg_gen_addi_tl(s->A0, s->A0, val);
404     if (!CODE64(s)) {
405         tcg_gen_ext32u_tl(s->A0, s->A0);
406     }
407 }
408 
409 static inline void gen_op_jmp_v(TCGv dest)
410 {
411     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
412 }
413 
414 static inline
415 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
416 {
417     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
418     gen_op_mov_reg_v(s, size, reg, s->tmp0);
419 }
420 
421 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
422 {
423     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
424     gen_op_mov_reg_v(s, size, reg, s->tmp0);
425 }
426 
427 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
428 {
429     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
430 }
431 
432 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
433 {
434     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
435 }
436 
437 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
438 {
439     if (d == OR_TMP0) {
440         gen_op_st_v(s, idx, s->T0, s->A0);
441     } else {
442         gen_op_mov_reg_v(s, idx, d, s->T0);
443     }
444 }
445 
446 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
447 {
448     tcg_gen_movi_tl(s->tmp0, pc);
449     gen_op_jmp_v(s->tmp0);
450 }
451 
452 /* Compute SEG:REG into A0.  SEG is selected from the override segment
453    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
454    indicate no override.  */
455 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
456                           int def_seg, int ovr_seg)
457 {
458     switch (aflag) {
459 #ifdef TARGET_X86_64
460     case MO_64:
461         if (ovr_seg < 0) {
462             tcg_gen_mov_tl(s->A0, a0);
463             return;
464         }
465         break;
466 #endif
467     case MO_32:
468         /* 32 bit address */
469         if (ovr_seg < 0 && s->addseg) {
470             ovr_seg = def_seg;
471         }
472         if (ovr_seg < 0) {
473             tcg_gen_ext32u_tl(s->A0, a0);
474             return;
475         }
476         break;
477     case MO_16:
478         /* 16 bit address */
479         tcg_gen_ext16u_tl(s->A0, a0);
480         a0 = s->A0;
481         if (ovr_seg < 0) {
482             if (s->addseg) {
483                 ovr_seg = def_seg;
484             } else {
485                 return;
486             }
487         }
488         break;
489     default:
490         tcg_abort();
491     }
492 
493     if (ovr_seg >= 0) {
494         TCGv seg = cpu_seg_base[ovr_seg];
495 
496         if (aflag == MO_64) {
497             tcg_gen_add_tl(s->A0, a0, seg);
498         } else if (CODE64(s)) {
499             tcg_gen_ext32u_tl(s->A0, a0);
500             tcg_gen_add_tl(s->A0, s->A0, seg);
501         } else {
502             tcg_gen_add_tl(s->A0, a0, seg);
503             tcg_gen_ext32u_tl(s->A0, s->A0);
504         }
505     }
506 }
507 
508 static inline void gen_string_movl_A0_ESI(DisasContext *s)
509 {
510     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
511 }
512 
513 static inline void gen_string_movl_A0_EDI(DisasContext *s)
514 {
515     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
516 }
517 
518 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
519 {
520     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
521     tcg_gen_shli_tl(s->T0, s->T0, ot);
522 };
523 
524 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
525 {
526     switch (size) {
527     case MO_8:
528         if (sign) {
529             tcg_gen_ext8s_tl(dst, src);
530         } else {
531             tcg_gen_ext8u_tl(dst, src);
532         }
533         return dst;
534     case MO_16:
535         if (sign) {
536             tcg_gen_ext16s_tl(dst, src);
537         } else {
538             tcg_gen_ext16u_tl(dst, src);
539         }
540         return dst;
541 #ifdef TARGET_X86_64
542     case MO_32:
543         if (sign) {
544             tcg_gen_ext32s_tl(dst, src);
545         } else {
546             tcg_gen_ext32u_tl(dst, src);
547         }
548         return dst;
549 #endif
550     default:
551         return src;
552     }
553 }
554 
555 static void gen_extu(MemOp ot, TCGv reg)
556 {
557     gen_ext_tl(reg, reg, ot, false);
558 }
559 
560 static void gen_exts(MemOp ot, TCGv reg)
561 {
562     gen_ext_tl(reg, reg, ot, true);
563 }
564 
565 static inline
566 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
567 {
568     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
569     gen_extu(size, s->tmp0);
570     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
571 }
572 
573 static inline
574 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
575 {
576     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
577     gen_extu(size, s->tmp0);
578     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
579 }
580 
581 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
582 {
583     switch (ot) {
584     case MO_8:
585         gen_helper_inb(v, cpu_env, n);
586         break;
587     case MO_16:
588         gen_helper_inw(v, cpu_env, n);
589         break;
590     case MO_32:
591         gen_helper_inl(v, cpu_env, n);
592         break;
593     default:
594         tcg_abort();
595     }
596 }
597 
598 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
599 {
600     switch (ot) {
601     case MO_8:
602         gen_helper_outb(cpu_env, v, n);
603         break;
604     case MO_16:
605         gen_helper_outw(cpu_env, v, n);
606         break;
607     case MO_32:
608         gen_helper_outl(cpu_env, v, n);
609         break;
610     default:
611         tcg_abort();
612     }
613 }
614 
615 static void gen_check_io(DisasContext *s, MemOp ot, target_ulong cur_eip,
616                          uint32_t svm_flags)
617 {
618     target_ulong next_eip;
619 
620     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
621         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
622         switch (ot) {
623         case MO_8:
624             gen_helper_check_iob(cpu_env, s->tmp2_i32);
625             break;
626         case MO_16:
627             gen_helper_check_iow(cpu_env, s->tmp2_i32);
628             break;
629         case MO_32:
630             gen_helper_check_iol(cpu_env, s->tmp2_i32);
631             break;
632         default:
633             tcg_abort();
634         }
635     }
636     if(s->flags & HF_GUEST_MASK) {
637         gen_update_cc_op(s);
638         gen_jmp_im(s, cur_eip);
639         svm_flags |= (1 << (4 + ot));
640         next_eip = s->pc - s->cs_base;
641         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
642         gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
643                                 tcg_const_i32(svm_flags),
644                                 tcg_const_i32(next_eip - cur_eip));
645     }
646 }
647 
648 static inline void gen_movs(DisasContext *s, MemOp ot)
649 {
650     gen_string_movl_A0_ESI(s);
651     gen_op_ld_v(s, ot, s->T0, s->A0);
652     gen_string_movl_A0_EDI(s);
653     gen_op_st_v(s, ot, s->T0, s->A0);
654     gen_op_movl_T0_Dshift(s, ot);
655     gen_op_add_reg_T0(s, s->aflag, R_ESI);
656     gen_op_add_reg_T0(s, s->aflag, R_EDI);
657 }
658 
659 static void gen_op_update1_cc(DisasContext *s)
660 {
661     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
662 }
663 
664 static void gen_op_update2_cc(DisasContext *s)
665 {
666     tcg_gen_mov_tl(cpu_cc_src, s->T1);
667     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
668 }
669 
670 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
671 {
672     tcg_gen_mov_tl(cpu_cc_src2, reg);
673     tcg_gen_mov_tl(cpu_cc_src, s->T1);
674     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
675 }
676 
677 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
678 {
679     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
680 }
681 
682 static void gen_op_update_neg_cc(DisasContext *s)
683 {
684     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
685     tcg_gen_neg_tl(cpu_cc_src, s->T0);
686     tcg_gen_movi_tl(s->cc_srcT, 0);
687 }
688 
689 /* compute all eflags to cc_src */
690 static void gen_compute_eflags(DisasContext *s)
691 {
692     TCGv zero, dst, src1, src2;
693     int live, dead;
694 
695     if (s->cc_op == CC_OP_EFLAGS) {
696         return;
697     }
698     if (s->cc_op == CC_OP_CLR) {
699         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
700         set_cc_op(s, CC_OP_EFLAGS);
701         return;
702     }
703 
704     zero = NULL;
705     dst = cpu_cc_dst;
706     src1 = cpu_cc_src;
707     src2 = cpu_cc_src2;
708 
709     /* Take care to not read values that are not live.  */
710     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
711     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
712     if (dead) {
713         zero = tcg_const_tl(0);
714         if (dead & USES_CC_DST) {
715             dst = zero;
716         }
717         if (dead & USES_CC_SRC) {
718             src1 = zero;
719         }
720         if (dead & USES_CC_SRC2) {
721             src2 = zero;
722         }
723     }
724 
725     gen_update_cc_op(s);
726     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
727     set_cc_op(s, CC_OP_EFLAGS);
728 
729     if (dead) {
730         tcg_temp_free(zero);
731     }
732 }
733 
734 typedef struct CCPrepare {
735     TCGCond cond;
736     TCGv reg;
737     TCGv reg2;
738     target_ulong imm;
739     target_ulong mask;
740     bool use_reg2;
741     bool no_setcond;
742 } CCPrepare;
743 
744 /* compute eflags.C to reg */
745 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
746 {
747     TCGv t0, t1;
748     int size, shift;
749 
750     switch (s->cc_op) {
751     case CC_OP_SUBB ... CC_OP_SUBQ:
752         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
753         size = s->cc_op - CC_OP_SUBB;
754         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
755         /* If no temporary was used, be careful not to alias t1 and t0.  */
756         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
757         tcg_gen_mov_tl(t0, s->cc_srcT);
758         gen_extu(size, t0);
759         goto add_sub;
760 
761     case CC_OP_ADDB ... CC_OP_ADDQ:
762         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
763         size = s->cc_op - CC_OP_ADDB;
764         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
765         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
766     add_sub:
767         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
768                              .reg2 = t1, .mask = -1, .use_reg2 = true };
769 
770     case CC_OP_LOGICB ... CC_OP_LOGICQ:
771     case CC_OP_CLR:
772     case CC_OP_POPCNT:
773         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
774 
775     case CC_OP_INCB ... CC_OP_INCQ:
776     case CC_OP_DECB ... CC_OP_DECQ:
777         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
778                              .mask = -1, .no_setcond = true };
779 
780     case CC_OP_SHLB ... CC_OP_SHLQ:
781         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
782         size = s->cc_op - CC_OP_SHLB;
783         shift = (8 << size) - 1;
784         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
785                              .mask = (target_ulong)1 << shift };
786 
787     case CC_OP_MULB ... CC_OP_MULQ:
788         return (CCPrepare) { .cond = TCG_COND_NE,
789                              .reg = cpu_cc_src, .mask = -1 };
790 
791     case CC_OP_BMILGB ... CC_OP_BMILGQ:
792         size = s->cc_op - CC_OP_BMILGB;
793         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
794         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
795 
796     case CC_OP_ADCX:
797     case CC_OP_ADCOX:
798         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
799                              .mask = -1, .no_setcond = true };
800 
801     case CC_OP_EFLAGS:
802     case CC_OP_SARB ... CC_OP_SARQ:
803         /* CC_SRC & 1 */
804         return (CCPrepare) { .cond = TCG_COND_NE,
805                              .reg = cpu_cc_src, .mask = CC_C };
806 
807     default:
808        /* The need to compute only C from CC_OP_DYNAMIC is important
809           in efficiently implementing e.g. INC at the start of a TB.  */
810        gen_update_cc_op(s);
811        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
812                                cpu_cc_src2, cpu_cc_op);
813        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
814                             .mask = -1, .no_setcond = true };
815     }
816 }
817 
818 /* compute eflags.P to reg */
819 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
820 {
821     gen_compute_eflags(s);
822     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
823                          .mask = CC_P };
824 }
825 
826 /* compute eflags.S to reg */
827 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
828 {
829     switch (s->cc_op) {
830     case CC_OP_DYNAMIC:
831         gen_compute_eflags(s);
832         /* FALLTHRU */
833     case CC_OP_EFLAGS:
834     case CC_OP_ADCX:
835     case CC_OP_ADOX:
836     case CC_OP_ADCOX:
837         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
838                              .mask = CC_S };
839     case CC_OP_CLR:
840     case CC_OP_POPCNT:
841         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
842     default:
843         {
844             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
845             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
846             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
847         }
848     }
849 }
850 
851 /* compute eflags.O to reg */
852 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
853 {
854     switch (s->cc_op) {
855     case CC_OP_ADOX:
856     case CC_OP_ADCOX:
857         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
858                              .mask = -1, .no_setcond = true };
859     case CC_OP_CLR:
860     case CC_OP_POPCNT:
861         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
862     default:
863         gen_compute_eflags(s);
864         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
865                              .mask = CC_O };
866     }
867 }
868 
869 /* compute eflags.Z to reg */
870 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
871 {
872     switch (s->cc_op) {
873     case CC_OP_DYNAMIC:
874         gen_compute_eflags(s);
875         /* FALLTHRU */
876     case CC_OP_EFLAGS:
877     case CC_OP_ADCX:
878     case CC_OP_ADOX:
879     case CC_OP_ADCOX:
880         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
881                              .mask = CC_Z };
882     case CC_OP_CLR:
883         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
884     case CC_OP_POPCNT:
885         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
886                              .mask = -1 };
887     default:
888         {
889             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
890             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
891             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
892         }
893     }
894 }
895 
896 /* perform a conditional store into register 'reg' according to jump opcode
897    value 'b'. In the fast case, T0 is guaranted not to be used. */
898 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
899 {
900     int inv, jcc_op, cond;
901     MemOp size;
902     CCPrepare cc;
903     TCGv t0;
904 
905     inv = b & 1;
906     jcc_op = (b >> 1) & 7;
907 
908     switch (s->cc_op) {
909     case CC_OP_SUBB ... CC_OP_SUBQ:
910         /* We optimize relational operators for the cmp/jcc case.  */
911         size = s->cc_op - CC_OP_SUBB;
912         switch (jcc_op) {
913         case JCC_BE:
914             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
915             gen_extu(size, s->tmp4);
916             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
917             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
918                                .reg2 = t0, .mask = -1, .use_reg2 = true };
919             break;
920 
921         case JCC_L:
922             cond = TCG_COND_LT;
923             goto fast_jcc_l;
924         case JCC_LE:
925             cond = TCG_COND_LE;
926         fast_jcc_l:
927             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
928             gen_exts(size, s->tmp4);
929             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
930             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
931                                .reg2 = t0, .mask = -1, .use_reg2 = true };
932             break;
933 
934         default:
935             goto slow_jcc;
936         }
937         break;
938 
939     default:
940     slow_jcc:
941         /* This actually generates good code for JC, JZ and JS.  */
942         switch (jcc_op) {
943         case JCC_O:
944             cc = gen_prepare_eflags_o(s, reg);
945             break;
946         case JCC_B:
947             cc = gen_prepare_eflags_c(s, reg);
948             break;
949         case JCC_Z:
950             cc = gen_prepare_eflags_z(s, reg);
951             break;
952         case JCC_BE:
953             gen_compute_eflags(s);
954             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
955                                .mask = CC_Z | CC_C };
956             break;
957         case JCC_S:
958             cc = gen_prepare_eflags_s(s, reg);
959             break;
960         case JCC_P:
961             cc = gen_prepare_eflags_p(s, reg);
962             break;
963         case JCC_L:
964             gen_compute_eflags(s);
965             if (reg == cpu_cc_src) {
966                 reg = s->tmp0;
967             }
968             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
969             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
970             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
971                                .mask = CC_S };
972             break;
973         default:
974         case JCC_LE:
975             gen_compute_eflags(s);
976             if (reg == cpu_cc_src) {
977                 reg = s->tmp0;
978             }
979             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
980             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
981             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
982                                .mask = CC_S | CC_Z };
983             break;
984         }
985         break;
986     }
987 
988     if (inv) {
989         cc.cond = tcg_invert_cond(cc.cond);
990     }
991     return cc;
992 }
993 
994 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
995 {
996     CCPrepare cc = gen_prepare_cc(s, b, reg);
997 
998     if (cc.no_setcond) {
999         if (cc.cond == TCG_COND_EQ) {
1000             tcg_gen_xori_tl(reg, cc.reg, 1);
1001         } else {
1002             tcg_gen_mov_tl(reg, cc.reg);
1003         }
1004         return;
1005     }
1006 
1007     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1008         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1009         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1010         tcg_gen_andi_tl(reg, reg, 1);
1011         return;
1012     }
1013     if (cc.mask != -1) {
1014         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1015         cc.reg = reg;
1016     }
1017     if (cc.use_reg2) {
1018         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1019     } else {
1020         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1021     }
1022 }
1023 
1024 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1025 {
1026     gen_setcc1(s, JCC_B << 1, reg);
1027 }
1028 
1029 /* generate a conditional jump to label 'l1' according to jump opcode
1030    value 'b'. In the fast case, T0 is guaranted not to be used. */
1031 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1032 {
1033     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1034 
1035     if (cc.mask != -1) {
1036         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1037         cc.reg = s->T0;
1038     }
1039     if (cc.use_reg2) {
1040         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1041     } else {
1042         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1043     }
1044 }
1045 
1046 /* Generate a conditional jump to label 'l1' according to jump opcode
1047    value 'b'. In the fast case, T0 is guaranted not to be used.
1048    A translation block must end soon.  */
1049 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1050 {
1051     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1052 
1053     gen_update_cc_op(s);
1054     if (cc.mask != -1) {
1055         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1056         cc.reg = s->T0;
1057     }
1058     set_cc_op(s, CC_OP_DYNAMIC);
1059     if (cc.use_reg2) {
1060         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1061     } else {
1062         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1063     }
1064 }
1065 
1066 /* XXX: does not work with gdbstub "ice" single step - not a
1067    serious problem */
1068 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1069 {
1070     TCGLabel *l1 = gen_new_label();
1071     TCGLabel *l2 = gen_new_label();
1072     gen_op_jnz_ecx(s, s->aflag, l1);
1073     gen_set_label(l2);
1074     gen_jmp_tb(s, next_eip, 1);
1075     gen_set_label(l1);
1076     return l2;
1077 }
1078 
1079 static inline void gen_stos(DisasContext *s, MemOp ot)
1080 {
1081     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1082     gen_string_movl_A0_EDI(s);
1083     gen_op_st_v(s, ot, s->T0, s->A0);
1084     gen_op_movl_T0_Dshift(s, ot);
1085     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1086 }
1087 
1088 static inline void gen_lods(DisasContext *s, MemOp ot)
1089 {
1090     gen_string_movl_A0_ESI(s);
1091     gen_op_ld_v(s, ot, s->T0, s->A0);
1092     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1093     gen_op_movl_T0_Dshift(s, ot);
1094     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1095 }
1096 
1097 static inline void gen_scas(DisasContext *s, MemOp ot)
1098 {
1099     gen_string_movl_A0_EDI(s);
1100     gen_op_ld_v(s, ot, s->T1, s->A0);
1101     gen_op(s, OP_CMPL, ot, R_EAX);
1102     gen_op_movl_T0_Dshift(s, ot);
1103     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1104 }
1105 
1106 static inline void gen_cmps(DisasContext *s, MemOp ot)
1107 {
1108     gen_string_movl_A0_EDI(s);
1109     gen_op_ld_v(s, ot, s->T1, s->A0);
1110     gen_string_movl_A0_ESI(s);
1111     gen_op(s, OP_CMPL, ot, OR_TMP0);
1112     gen_op_movl_T0_Dshift(s, ot);
1113     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1114     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1115 }
1116 
1117 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1118 {
1119     if (s->flags & HF_IOBPT_MASK) {
1120 #ifdef CONFIG_USER_ONLY
1121         /* user-mode cpu should not be in IOBPT mode */
1122         g_assert_not_reached();
1123 #else
1124         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1125         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1126 
1127         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1128         tcg_temp_free_i32(t_size);
1129         tcg_temp_free(t_next);
1130 #endif /* CONFIG_USER_ONLY */
1131     }
1132 }
1133 
1134 static inline void gen_ins(DisasContext *s, MemOp ot)
1135 {
1136     gen_string_movl_A0_EDI(s);
1137     /* Note: we must do this dummy write first to be restartable in
1138        case of page fault. */
1139     tcg_gen_movi_tl(s->T0, 0);
1140     gen_op_st_v(s, ot, s->T0, s->A0);
1141     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1142     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1143     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1144     gen_op_st_v(s, ot, s->T0, s->A0);
1145     gen_op_movl_T0_Dshift(s, ot);
1146     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1147     gen_bpt_io(s, s->tmp2_i32, ot);
1148 }
1149 
1150 static inline void gen_outs(DisasContext *s, MemOp ot)
1151 {
1152     gen_string_movl_A0_ESI(s);
1153     gen_op_ld_v(s, ot, s->T0, s->A0);
1154 
1155     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1156     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1157     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1158     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1159     gen_op_movl_T0_Dshift(s, ot);
1160     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1161     gen_bpt_io(s, s->tmp2_i32, ot);
1162 }
1163 
1164 /* same method as Valgrind : we generate jumps to current or next
1165    instruction */
1166 #define GEN_REPZ(op)                                                          \
1167 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1168                                  target_ulong cur_eip, target_ulong next_eip) \
1169 {                                                                             \
1170     TCGLabel *l2;                                                             \
1171     gen_update_cc_op(s);                                                      \
1172     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1173     gen_ ## op(s, ot);                                                        \
1174     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1175     /* a loop would cause two single step exceptions if ECX = 1               \
1176        before rep string_insn */                                              \
1177     if (s->repz_opt)                                                          \
1178         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1179     gen_jmp(s, cur_eip);                                                      \
1180 }
1181 
1182 #define GEN_REPZ2(op)                                                         \
1183 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1184                                    target_ulong cur_eip,                      \
1185                                    target_ulong next_eip,                     \
1186                                    int nz)                                    \
1187 {                                                                             \
1188     TCGLabel *l2;                                                             \
1189     gen_update_cc_op(s);                                                      \
1190     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1191     gen_ ## op(s, ot);                                                        \
1192     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1193     gen_update_cc_op(s);                                                      \
1194     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1195     if (s->repz_opt)                                                          \
1196         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1197     gen_jmp(s, cur_eip);                                                      \
1198 }
1199 
1200 GEN_REPZ(movs)
1201 GEN_REPZ(stos)
1202 GEN_REPZ(lods)
1203 GEN_REPZ(ins)
1204 GEN_REPZ(outs)
1205 GEN_REPZ2(scas)
1206 GEN_REPZ2(cmps)
1207 
1208 static void gen_helper_fp_arith_ST0_FT0(int op)
1209 {
1210     switch (op) {
1211     case 0:
1212         gen_helper_fadd_ST0_FT0(cpu_env);
1213         break;
1214     case 1:
1215         gen_helper_fmul_ST0_FT0(cpu_env);
1216         break;
1217     case 2:
1218         gen_helper_fcom_ST0_FT0(cpu_env);
1219         break;
1220     case 3:
1221         gen_helper_fcom_ST0_FT0(cpu_env);
1222         break;
1223     case 4:
1224         gen_helper_fsub_ST0_FT0(cpu_env);
1225         break;
1226     case 5:
1227         gen_helper_fsubr_ST0_FT0(cpu_env);
1228         break;
1229     case 6:
1230         gen_helper_fdiv_ST0_FT0(cpu_env);
1231         break;
1232     case 7:
1233         gen_helper_fdivr_ST0_FT0(cpu_env);
1234         break;
1235     }
1236 }
1237 
1238 /* NOTE the exception in "r" op ordering */
1239 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1240 {
1241     TCGv_i32 tmp = tcg_const_i32(opreg);
1242     switch (op) {
1243     case 0:
1244         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1245         break;
1246     case 1:
1247         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1248         break;
1249     case 4:
1250         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1251         break;
1252     case 5:
1253         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1254         break;
1255     case 6:
1256         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1257         break;
1258     case 7:
1259         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1260         break;
1261     }
1262 }
1263 
1264 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1265 {
1266     gen_update_cc_op(s);
1267     gen_jmp_im(s, cur_eip);
1268     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1269     s->base.is_jmp = DISAS_NORETURN;
1270 }
1271 
1272 /* Generate #UD for the current instruction.  The assumption here is that
1273    the instruction is known, but it isn't allowed in the current cpu mode.  */
1274 static void gen_illegal_opcode(DisasContext *s)
1275 {
1276     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1277 }
1278 
1279 /* if d == OR_TMP0, it means memory operand (address in A0) */
1280 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1281 {
1282     if (d != OR_TMP0) {
1283         if (s1->prefix & PREFIX_LOCK) {
1284             /* Lock prefix when destination is not memory.  */
1285             gen_illegal_opcode(s1);
1286             return;
1287         }
1288         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1289     } else if (!(s1->prefix & PREFIX_LOCK)) {
1290         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1291     }
1292     switch(op) {
1293     case OP_ADCL:
1294         gen_compute_eflags_c(s1, s1->tmp4);
1295         if (s1->prefix & PREFIX_LOCK) {
1296             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1297             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1298                                         s1->mem_index, ot | MO_LE);
1299         } else {
1300             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1301             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1302             gen_op_st_rm_T0_A0(s1, ot, d);
1303         }
1304         gen_op_update3_cc(s1, s1->tmp4);
1305         set_cc_op(s1, CC_OP_ADCB + ot);
1306         break;
1307     case OP_SBBL:
1308         gen_compute_eflags_c(s1, s1->tmp4);
1309         if (s1->prefix & PREFIX_LOCK) {
1310             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1311             tcg_gen_neg_tl(s1->T0, s1->T0);
1312             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1313                                         s1->mem_index, ot | MO_LE);
1314         } else {
1315             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1316             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1317             gen_op_st_rm_T0_A0(s1, ot, d);
1318         }
1319         gen_op_update3_cc(s1, s1->tmp4);
1320         set_cc_op(s1, CC_OP_SBBB + ot);
1321         break;
1322     case OP_ADDL:
1323         if (s1->prefix & PREFIX_LOCK) {
1324             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1325                                         s1->mem_index, ot | MO_LE);
1326         } else {
1327             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1328             gen_op_st_rm_T0_A0(s1, ot, d);
1329         }
1330         gen_op_update2_cc(s1);
1331         set_cc_op(s1, CC_OP_ADDB + ot);
1332         break;
1333     case OP_SUBL:
1334         if (s1->prefix & PREFIX_LOCK) {
1335             tcg_gen_neg_tl(s1->T0, s1->T1);
1336             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1337                                         s1->mem_index, ot | MO_LE);
1338             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1339         } else {
1340             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1341             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1342             gen_op_st_rm_T0_A0(s1, ot, d);
1343         }
1344         gen_op_update2_cc(s1);
1345         set_cc_op(s1, CC_OP_SUBB + ot);
1346         break;
1347     default:
1348     case OP_ANDL:
1349         if (s1->prefix & PREFIX_LOCK) {
1350             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1351                                         s1->mem_index, ot | MO_LE);
1352         } else {
1353             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1354             gen_op_st_rm_T0_A0(s1, ot, d);
1355         }
1356         gen_op_update1_cc(s1);
1357         set_cc_op(s1, CC_OP_LOGICB + ot);
1358         break;
1359     case OP_ORL:
1360         if (s1->prefix & PREFIX_LOCK) {
1361             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1362                                        s1->mem_index, ot | MO_LE);
1363         } else {
1364             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1365             gen_op_st_rm_T0_A0(s1, ot, d);
1366         }
1367         gen_op_update1_cc(s1);
1368         set_cc_op(s1, CC_OP_LOGICB + ot);
1369         break;
1370     case OP_XORL:
1371         if (s1->prefix & PREFIX_LOCK) {
1372             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1373                                         s1->mem_index, ot | MO_LE);
1374         } else {
1375             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1376             gen_op_st_rm_T0_A0(s1, ot, d);
1377         }
1378         gen_op_update1_cc(s1);
1379         set_cc_op(s1, CC_OP_LOGICB + ot);
1380         break;
1381     case OP_CMPL:
1382         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1383         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1384         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1385         set_cc_op(s1, CC_OP_SUBB + ot);
1386         break;
1387     }
1388 }
1389 
1390 /* if d == OR_TMP0, it means memory operand (address in A0) */
1391 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1392 {
1393     if (s1->prefix & PREFIX_LOCK) {
1394         if (d != OR_TMP0) {
1395             /* Lock prefix when destination is not memory */
1396             gen_illegal_opcode(s1);
1397             return;
1398         }
1399         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1400         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1401                                     s1->mem_index, ot | MO_LE);
1402     } else {
1403         if (d != OR_TMP0) {
1404             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1405         } else {
1406             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1407         }
1408         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1409         gen_op_st_rm_T0_A0(s1, ot, d);
1410     }
1411 
1412     gen_compute_eflags_c(s1, cpu_cc_src);
1413     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1414     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1415 }
1416 
1417 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1418                             TCGv shm1, TCGv count, bool is_right)
1419 {
1420     TCGv_i32 z32, s32, oldop;
1421     TCGv z_tl;
1422 
1423     /* Store the results into the CC variables.  If we know that the
1424        variable must be dead, store unconditionally.  Otherwise we'll
1425        need to not disrupt the current contents.  */
1426     z_tl = tcg_const_tl(0);
1427     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1428         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1429                            result, cpu_cc_dst);
1430     } else {
1431         tcg_gen_mov_tl(cpu_cc_dst, result);
1432     }
1433     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1434         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1435                            shm1, cpu_cc_src);
1436     } else {
1437         tcg_gen_mov_tl(cpu_cc_src, shm1);
1438     }
1439     tcg_temp_free(z_tl);
1440 
1441     /* Get the two potential CC_OP values into temporaries.  */
1442     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1443     if (s->cc_op == CC_OP_DYNAMIC) {
1444         oldop = cpu_cc_op;
1445     } else {
1446         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1447         oldop = s->tmp3_i32;
1448     }
1449 
1450     /* Conditionally store the CC_OP value.  */
1451     z32 = tcg_const_i32(0);
1452     s32 = tcg_temp_new_i32();
1453     tcg_gen_trunc_tl_i32(s32, count);
1454     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1455     tcg_temp_free_i32(z32);
1456     tcg_temp_free_i32(s32);
1457 
1458     /* The CC_OP value is no longer predictable.  */
1459     set_cc_op(s, CC_OP_DYNAMIC);
1460 }
1461 
1462 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1463                             int is_right, int is_arith)
1464 {
1465     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1466 
1467     /* load */
1468     if (op1 == OR_TMP0) {
1469         gen_op_ld_v(s, ot, s->T0, s->A0);
1470     } else {
1471         gen_op_mov_v_reg(s, ot, s->T0, op1);
1472     }
1473 
1474     tcg_gen_andi_tl(s->T1, s->T1, mask);
1475     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1476 
1477     if (is_right) {
1478         if (is_arith) {
1479             gen_exts(ot, s->T0);
1480             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1481             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1482         } else {
1483             gen_extu(ot, s->T0);
1484             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1485             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1486         }
1487     } else {
1488         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1489         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1490     }
1491 
1492     /* store */
1493     gen_op_st_rm_T0_A0(s, ot, op1);
1494 
1495     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1496 }
1497 
1498 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1499                             int is_right, int is_arith)
1500 {
1501     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1502 
1503     /* load */
1504     if (op1 == OR_TMP0)
1505         gen_op_ld_v(s, ot, s->T0, s->A0);
1506     else
1507         gen_op_mov_v_reg(s, ot, s->T0, op1);
1508 
1509     op2 &= mask;
1510     if (op2 != 0) {
1511         if (is_right) {
1512             if (is_arith) {
1513                 gen_exts(ot, s->T0);
1514                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1515                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1516             } else {
1517                 gen_extu(ot, s->T0);
1518                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1519                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1520             }
1521         } else {
1522             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1523             tcg_gen_shli_tl(s->T0, s->T0, op2);
1524         }
1525     }
1526 
1527     /* store */
1528     gen_op_st_rm_T0_A0(s, ot, op1);
1529 
1530     /* update eflags if non zero shift */
1531     if (op2 != 0) {
1532         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1533         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1534         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1535     }
1536 }
1537 
1538 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1539 {
1540     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1541     TCGv_i32 t0, t1;
1542 
1543     /* load */
1544     if (op1 == OR_TMP0) {
1545         gen_op_ld_v(s, ot, s->T0, s->A0);
1546     } else {
1547         gen_op_mov_v_reg(s, ot, s->T0, op1);
1548     }
1549 
1550     tcg_gen_andi_tl(s->T1, s->T1, mask);
1551 
1552     switch (ot) {
1553     case MO_8:
1554         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1555         tcg_gen_ext8u_tl(s->T0, s->T0);
1556         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1557         goto do_long;
1558     case MO_16:
1559         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1560         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1561         goto do_long;
1562     do_long:
1563 #ifdef TARGET_X86_64
1564     case MO_32:
1565         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1566         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1567         if (is_right) {
1568             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1569         } else {
1570             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1571         }
1572         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1573         break;
1574 #endif
1575     default:
1576         if (is_right) {
1577             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1578         } else {
1579             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1580         }
1581         break;
1582     }
1583 
1584     /* store */
1585     gen_op_st_rm_T0_A0(s, ot, op1);
1586 
1587     /* We'll need the flags computed into CC_SRC.  */
1588     gen_compute_eflags(s);
1589 
1590     /* The value that was "rotated out" is now present at the other end
1591        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1592        since we've computed the flags into CC_SRC, these variables are
1593        currently dead.  */
1594     if (is_right) {
1595         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1596         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1597         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1598     } else {
1599         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1600         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1601     }
1602     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1603     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1604 
1605     /* Now conditionally store the new CC_OP value.  If the shift count
1606        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1607        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1608        exactly as we computed above.  */
1609     t0 = tcg_const_i32(0);
1610     t1 = tcg_temp_new_i32();
1611     tcg_gen_trunc_tl_i32(t1, s->T1);
1612     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1613     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1614     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1615                         s->tmp2_i32, s->tmp3_i32);
1616     tcg_temp_free_i32(t0);
1617     tcg_temp_free_i32(t1);
1618 
1619     /* The CC_OP value is no longer predictable.  */
1620     set_cc_op(s, CC_OP_DYNAMIC);
1621 }
1622 
1623 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1624                           int is_right)
1625 {
1626     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1627     int shift;
1628 
1629     /* load */
1630     if (op1 == OR_TMP0) {
1631         gen_op_ld_v(s, ot, s->T0, s->A0);
1632     } else {
1633         gen_op_mov_v_reg(s, ot, s->T0, op1);
1634     }
1635 
1636     op2 &= mask;
1637     if (op2 != 0) {
1638         switch (ot) {
1639 #ifdef TARGET_X86_64
1640         case MO_32:
1641             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1642             if (is_right) {
1643                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1644             } else {
1645                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1646             }
1647             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1648             break;
1649 #endif
1650         default:
1651             if (is_right) {
1652                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1653             } else {
1654                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1655             }
1656             break;
1657         case MO_8:
1658             mask = 7;
1659             goto do_shifts;
1660         case MO_16:
1661             mask = 15;
1662         do_shifts:
1663             shift = op2 & mask;
1664             if (is_right) {
1665                 shift = mask + 1 - shift;
1666             }
1667             gen_extu(ot, s->T0);
1668             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1669             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1670             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1671             break;
1672         }
1673     }
1674 
1675     /* store */
1676     gen_op_st_rm_T0_A0(s, ot, op1);
1677 
1678     if (op2 != 0) {
1679         /* Compute the flags into CC_SRC.  */
1680         gen_compute_eflags(s);
1681 
1682         /* The value that was "rotated out" is now present at the other end
1683            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1684            since we've computed the flags into CC_SRC, these variables are
1685            currently dead.  */
1686         if (is_right) {
1687             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1688             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1689             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1690         } else {
1691             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1692             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1693         }
1694         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1695         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1696         set_cc_op(s, CC_OP_ADCOX);
1697     }
1698 }
1699 
1700 /* XXX: add faster immediate = 1 case */
1701 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1702                            int is_right)
1703 {
1704     gen_compute_eflags(s);
1705     assert(s->cc_op == CC_OP_EFLAGS);
1706 
1707     /* load */
1708     if (op1 == OR_TMP0)
1709         gen_op_ld_v(s, ot, s->T0, s->A0);
1710     else
1711         gen_op_mov_v_reg(s, ot, s->T0, op1);
1712 
1713     if (is_right) {
1714         switch (ot) {
1715         case MO_8:
1716             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1717             break;
1718         case MO_16:
1719             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1720             break;
1721         case MO_32:
1722             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1723             break;
1724 #ifdef TARGET_X86_64
1725         case MO_64:
1726             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1727             break;
1728 #endif
1729         default:
1730             tcg_abort();
1731         }
1732     } else {
1733         switch (ot) {
1734         case MO_8:
1735             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1736             break;
1737         case MO_16:
1738             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1739             break;
1740         case MO_32:
1741             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1742             break;
1743 #ifdef TARGET_X86_64
1744         case MO_64:
1745             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1746             break;
1747 #endif
1748         default:
1749             tcg_abort();
1750         }
1751     }
1752     /* store */
1753     gen_op_st_rm_T0_A0(s, ot, op1);
1754 }
1755 
1756 /* XXX: add faster immediate case */
1757 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1758                              bool is_right, TCGv count_in)
1759 {
1760     target_ulong mask = (ot == MO_64 ? 63 : 31);
1761     TCGv count;
1762 
1763     /* load */
1764     if (op1 == OR_TMP0) {
1765         gen_op_ld_v(s, ot, s->T0, s->A0);
1766     } else {
1767         gen_op_mov_v_reg(s, ot, s->T0, op1);
1768     }
1769 
1770     count = tcg_temp_new();
1771     tcg_gen_andi_tl(count, count_in, mask);
1772 
1773     switch (ot) {
1774     case MO_16:
1775         /* Note: we implement the Intel behaviour for shift count > 16.
1776            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1777            portion by constructing it as a 32-bit value.  */
1778         if (is_right) {
1779             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1780             tcg_gen_mov_tl(s->T1, s->T0);
1781             tcg_gen_mov_tl(s->T0, s->tmp0);
1782         } else {
1783             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1784         }
1785         /*
1786          * If TARGET_X86_64 defined then fall through into MO_32 case,
1787          * otherwise fall through default case.
1788          */
1789     case MO_32:
1790 #ifdef TARGET_X86_64
1791         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1792         tcg_gen_subi_tl(s->tmp0, count, 1);
1793         if (is_right) {
1794             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1795             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1796             tcg_gen_shr_i64(s->T0, s->T0, count);
1797         } else {
1798             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1799             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1800             tcg_gen_shl_i64(s->T0, s->T0, count);
1801             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1802             tcg_gen_shri_i64(s->T0, s->T0, 32);
1803         }
1804         break;
1805 #endif
1806     default:
1807         tcg_gen_subi_tl(s->tmp0, count, 1);
1808         if (is_right) {
1809             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1810 
1811             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1812             tcg_gen_shr_tl(s->T0, s->T0, count);
1813             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1814         } else {
1815             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1816             if (ot == MO_16) {
1817                 /* Only needed if count > 16, for Intel behaviour.  */
1818                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1819                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1820                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1821             }
1822 
1823             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1824             tcg_gen_shl_tl(s->T0, s->T0, count);
1825             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1826         }
1827         tcg_gen_movi_tl(s->tmp4, 0);
1828         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1829                            s->tmp4, s->T1);
1830         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1831         break;
1832     }
1833 
1834     /* store */
1835     gen_op_st_rm_T0_A0(s, ot, op1);
1836 
1837     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1838     tcg_temp_free(count);
1839 }
1840 
1841 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1842 {
1843     if (s != OR_TMP1)
1844         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1845     switch(op) {
1846     case OP_ROL:
1847         gen_rot_rm_T1(s1, ot, d, 0);
1848         break;
1849     case OP_ROR:
1850         gen_rot_rm_T1(s1, ot, d, 1);
1851         break;
1852     case OP_SHL:
1853     case OP_SHL1:
1854         gen_shift_rm_T1(s1, ot, d, 0, 0);
1855         break;
1856     case OP_SHR:
1857         gen_shift_rm_T1(s1, ot, d, 1, 0);
1858         break;
1859     case OP_SAR:
1860         gen_shift_rm_T1(s1, ot, d, 1, 1);
1861         break;
1862     case OP_RCL:
1863         gen_rotc_rm_T1(s1, ot, d, 0);
1864         break;
1865     case OP_RCR:
1866         gen_rotc_rm_T1(s1, ot, d, 1);
1867         break;
1868     }
1869 }
1870 
1871 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1872 {
1873     switch(op) {
1874     case OP_ROL:
1875         gen_rot_rm_im(s1, ot, d, c, 0);
1876         break;
1877     case OP_ROR:
1878         gen_rot_rm_im(s1, ot, d, c, 1);
1879         break;
1880     case OP_SHL:
1881     case OP_SHL1:
1882         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1883         break;
1884     case OP_SHR:
1885         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1886         break;
1887     case OP_SAR:
1888         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1889         break;
1890     default:
1891         /* currently not optimized */
1892         tcg_gen_movi_tl(s1->T1, c);
1893         gen_shift(s1, op, ot, d, OR_TMP1);
1894         break;
1895     }
1896 }
1897 
1898 #define X86_MAX_INSN_LENGTH 15
1899 
1900 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
1901 {
1902     uint64_t pc = s->pc;
1903 
1904     s->pc += num_bytes;
1905     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
1906         /* If the instruction's 16th byte is on a different page than the 1st, a
1907          * page fault on the second page wins over the general protection fault
1908          * caused by the instruction being too long.
1909          * This can happen even if the operand is only one byte long!
1910          */
1911         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
1912             volatile uint8_t unused =
1913                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
1914             (void) unused;
1915         }
1916         siglongjmp(s->jmpbuf, 1);
1917     }
1918 
1919     return pc;
1920 }
1921 
1922 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
1923 {
1924     return translator_ldub(env, advance_pc(env, s, 1));
1925 }
1926 
1927 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
1928 {
1929     return translator_ldsw(env, advance_pc(env, s, 2));
1930 }
1931 
1932 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
1933 {
1934     return translator_lduw(env, advance_pc(env, s, 2));
1935 }
1936 
1937 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
1938 {
1939     return translator_ldl(env, advance_pc(env, s, 4));
1940 }
1941 
1942 #ifdef TARGET_X86_64
1943 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
1944 {
1945     return translator_ldq(env, advance_pc(env, s, 8));
1946 }
1947 #endif
1948 
1949 /* Decompose an address.  */
1950 
1951 typedef struct AddressParts {
1952     int def_seg;
1953     int base;
1954     int index;
1955     int scale;
1956     target_long disp;
1957 } AddressParts;
1958 
1959 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1960                                     int modrm)
1961 {
1962     int def_seg, base, index, scale, mod, rm;
1963     target_long disp;
1964     bool havesib;
1965 
1966     def_seg = R_DS;
1967     index = -1;
1968     scale = 0;
1969     disp = 0;
1970 
1971     mod = (modrm >> 6) & 3;
1972     rm = modrm & 7;
1973     base = rm | REX_B(s);
1974 
1975     if (mod == 3) {
1976         /* Normally filtered out earlier, but including this path
1977            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1978         goto done;
1979     }
1980 
1981     switch (s->aflag) {
1982     case MO_64:
1983     case MO_32:
1984         havesib = 0;
1985         if (rm == 4) {
1986             int code = x86_ldub_code(env, s);
1987             scale = (code >> 6) & 3;
1988             index = ((code >> 3) & 7) | REX_X(s);
1989             if (index == 4) {
1990                 index = -1;  /* no index */
1991             }
1992             base = (code & 7) | REX_B(s);
1993             havesib = 1;
1994         }
1995 
1996         switch (mod) {
1997         case 0:
1998             if ((base & 7) == 5) {
1999                 base = -1;
2000                 disp = (int32_t)x86_ldl_code(env, s);
2001                 if (CODE64(s) && !havesib) {
2002                     base = -2;
2003                     disp += s->pc + s->rip_offset;
2004                 }
2005             }
2006             break;
2007         case 1:
2008             disp = (int8_t)x86_ldub_code(env, s);
2009             break;
2010         default:
2011         case 2:
2012             disp = (int32_t)x86_ldl_code(env, s);
2013             break;
2014         }
2015 
2016         /* For correct popl handling with esp.  */
2017         if (base == R_ESP && s->popl_esp_hack) {
2018             disp += s->popl_esp_hack;
2019         }
2020         if (base == R_EBP || base == R_ESP) {
2021             def_seg = R_SS;
2022         }
2023         break;
2024 
2025     case MO_16:
2026         if (mod == 0) {
2027             if (rm == 6) {
2028                 base = -1;
2029                 disp = x86_lduw_code(env, s);
2030                 break;
2031             }
2032         } else if (mod == 1) {
2033             disp = (int8_t)x86_ldub_code(env, s);
2034         } else {
2035             disp = (int16_t)x86_lduw_code(env, s);
2036         }
2037 
2038         switch (rm) {
2039         case 0:
2040             base = R_EBX;
2041             index = R_ESI;
2042             break;
2043         case 1:
2044             base = R_EBX;
2045             index = R_EDI;
2046             break;
2047         case 2:
2048             base = R_EBP;
2049             index = R_ESI;
2050             def_seg = R_SS;
2051             break;
2052         case 3:
2053             base = R_EBP;
2054             index = R_EDI;
2055             def_seg = R_SS;
2056             break;
2057         case 4:
2058             base = R_ESI;
2059             break;
2060         case 5:
2061             base = R_EDI;
2062             break;
2063         case 6:
2064             base = R_EBP;
2065             def_seg = R_SS;
2066             break;
2067         default:
2068         case 7:
2069             base = R_EBX;
2070             break;
2071         }
2072         break;
2073 
2074     default:
2075         tcg_abort();
2076     }
2077 
2078  done:
2079     return (AddressParts){ def_seg, base, index, scale, disp };
2080 }
2081 
2082 /* Compute the address, with a minimum number of TCG ops.  */
2083 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2084 {
2085     TCGv ea = NULL;
2086 
2087     if (a.index >= 0) {
2088         if (a.scale == 0) {
2089             ea = cpu_regs[a.index];
2090         } else {
2091             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2092             ea = s->A0;
2093         }
2094         if (a.base >= 0) {
2095             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2096             ea = s->A0;
2097         }
2098     } else if (a.base >= 0) {
2099         ea = cpu_regs[a.base];
2100     }
2101     if (!ea) {
2102         tcg_gen_movi_tl(s->A0, a.disp);
2103         ea = s->A0;
2104     } else if (a.disp != 0) {
2105         tcg_gen_addi_tl(s->A0, ea, a.disp);
2106         ea = s->A0;
2107     }
2108 
2109     return ea;
2110 }
2111 
2112 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2113 {
2114     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2115     TCGv ea = gen_lea_modrm_1(s, a);
2116     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2117 }
2118 
2119 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2120 {
2121     (void)gen_lea_modrm_0(env, s, modrm);
2122 }
2123 
2124 /* Used for BNDCL, BNDCU, BNDCN.  */
2125 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2126                       TCGCond cond, TCGv_i64 bndv)
2127 {
2128     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2129 
2130     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2131     if (!CODE64(s)) {
2132         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2133     }
2134     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2135     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2136     gen_helper_bndck(cpu_env, s->tmp2_i32);
2137 }
2138 
2139 /* used for LEA and MOV AX, mem */
2140 static void gen_add_A0_ds_seg(DisasContext *s)
2141 {
2142     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2143 }
2144 
2145 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2146    OR_TMP0 */
2147 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2148                            MemOp ot, int reg, int is_store)
2149 {
2150     int mod, rm;
2151 
2152     mod = (modrm >> 6) & 3;
2153     rm = (modrm & 7) | REX_B(s);
2154     if (mod == 3) {
2155         if (is_store) {
2156             if (reg != OR_TMP0)
2157                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2158             gen_op_mov_reg_v(s, ot, rm, s->T0);
2159         } else {
2160             gen_op_mov_v_reg(s, ot, s->T0, rm);
2161             if (reg != OR_TMP0)
2162                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2163         }
2164     } else {
2165         gen_lea_modrm(env, s, modrm);
2166         if (is_store) {
2167             if (reg != OR_TMP0)
2168                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2169             gen_op_st_v(s, ot, s->T0, s->A0);
2170         } else {
2171             gen_op_ld_v(s, ot, s->T0, s->A0);
2172             if (reg != OR_TMP0)
2173                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2174         }
2175     }
2176 }
2177 
2178 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2179 {
2180     uint32_t ret;
2181 
2182     switch (ot) {
2183     case MO_8:
2184         ret = x86_ldub_code(env, s);
2185         break;
2186     case MO_16:
2187         ret = x86_lduw_code(env, s);
2188         break;
2189     case MO_32:
2190 #ifdef TARGET_X86_64
2191     case MO_64:
2192 #endif
2193         ret = x86_ldl_code(env, s);
2194         break;
2195     default:
2196         tcg_abort();
2197     }
2198     return ret;
2199 }
2200 
2201 static inline int insn_const_size(MemOp ot)
2202 {
2203     if (ot <= MO_32) {
2204         return 1 << ot;
2205     } else {
2206         return 4;
2207     }
2208 }
2209 
2210 static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2211 {
2212 #ifndef CONFIG_USER_ONLY
2213     return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2214            (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2215 #else
2216     return true;
2217 #endif
2218 }
2219 
2220 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2221 {
2222     target_ulong pc = s->cs_base + eip;
2223 
2224     if (use_goto_tb(s, pc))  {
2225         /* jump to same page: we can use a direct jump */
2226         tcg_gen_goto_tb(tb_num);
2227         gen_jmp_im(s, eip);
2228         tcg_gen_exit_tb(s->base.tb, tb_num);
2229         s->base.is_jmp = DISAS_NORETURN;
2230     } else {
2231         /* jump to another page */
2232         gen_jmp_im(s, eip);
2233         gen_jr(s, s->tmp0);
2234     }
2235 }
2236 
2237 static inline void gen_jcc(DisasContext *s, int b,
2238                            target_ulong val, target_ulong next_eip)
2239 {
2240     TCGLabel *l1, *l2;
2241 
2242     if (s->jmp_opt) {
2243         l1 = gen_new_label();
2244         gen_jcc1(s, b, l1);
2245 
2246         gen_goto_tb(s, 0, next_eip);
2247 
2248         gen_set_label(l1);
2249         gen_goto_tb(s, 1, val);
2250     } else {
2251         l1 = gen_new_label();
2252         l2 = gen_new_label();
2253         gen_jcc1(s, b, l1);
2254 
2255         gen_jmp_im(s, next_eip);
2256         tcg_gen_br(l2);
2257 
2258         gen_set_label(l1);
2259         gen_jmp_im(s, val);
2260         gen_set_label(l2);
2261         gen_eob(s);
2262     }
2263 }
2264 
2265 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2266                         int modrm, int reg)
2267 {
2268     CCPrepare cc;
2269 
2270     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2271 
2272     cc = gen_prepare_cc(s, b, s->T1);
2273     if (cc.mask != -1) {
2274         TCGv t0 = tcg_temp_new();
2275         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2276         cc.reg = t0;
2277     }
2278     if (!cc.use_reg2) {
2279         cc.reg2 = tcg_const_tl(cc.imm);
2280     }
2281 
2282     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2283                        s->T0, cpu_regs[reg]);
2284     gen_op_mov_reg_v(s, ot, reg, s->T0);
2285 
2286     if (cc.mask != -1) {
2287         tcg_temp_free(cc.reg);
2288     }
2289     if (!cc.use_reg2) {
2290         tcg_temp_free(cc.reg2);
2291     }
2292 }
2293 
2294 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2295 {
2296     tcg_gen_ld32u_tl(s->T0, cpu_env,
2297                      offsetof(CPUX86State,segs[seg_reg].selector));
2298 }
2299 
2300 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2301 {
2302     tcg_gen_ext16u_tl(s->T0, s->T0);
2303     tcg_gen_st32_tl(s->T0, cpu_env,
2304                     offsetof(CPUX86State,segs[seg_reg].selector));
2305     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2306 }
2307 
2308 /* move T0 to seg_reg and compute if the CPU state may change. Never
2309    call this function with seg_reg == R_CS */
2310 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2311 {
2312     if (s->pe && !s->vm86) {
2313         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2314         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2315         /* abort translation because the addseg value may change or
2316            because ss32 may change. For R_SS, translation must always
2317            stop as a special handling must be done to disable hardware
2318            interrupts for the next instruction */
2319         if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
2320             s->base.is_jmp = DISAS_TOO_MANY;
2321         }
2322     } else {
2323         gen_op_movl_seg_T0_vm(s, seg_reg);
2324         if (seg_reg == R_SS) {
2325             s->base.is_jmp = DISAS_TOO_MANY;
2326         }
2327     }
2328 }
2329 
2330 static inline int svm_is_rep(int prefixes)
2331 {
2332     return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2333 }
2334 
2335 static inline void
2336 gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2337                               uint32_t type, uint64_t param)
2338 {
2339     /* no SVM activated; fast case */
2340     if (likely(!(s->flags & HF_GUEST_MASK)))
2341         return;
2342     gen_update_cc_op(s);
2343     gen_jmp_im(s, pc_start - s->cs_base);
2344     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2345                                          tcg_const_i64(param));
2346 }
2347 
2348 static inline void
2349 gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2350 {
2351     gen_svm_check_intercept_param(s, pc_start, type, 0);
2352 }
2353 
2354 static inline void gen_stack_update(DisasContext *s, int addend)
2355 {
2356     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2357 }
2358 
2359 /* Generate a push. It depends on ss32, addseg and dflag.  */
2360 static void gen_push_v(DisasContext *s, TCGv val)
2361 {
2362     MemOp d_ot = mo_pushpop(s, s->dflag);
2363     MemOp a_ot = mo_stacksize(s);
2364     int size = 1 << d_ot;
2365     TCGv new_esp = s->A0;
2366 
2367     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2368 
2369     if (!CODE64(s)) {
2370         if (s->addseg) {
2371             new_esp = s->tmp4;
2372             tcg_gen_mov_tl(new_esp, s->A0);
2373         }
2374         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2375     }
2376 
2377     gen_op_st_v(s, d_ot, val, s->A0);
2378     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2379 }
2380 
2381 /* two step pop is necessary for precise exceptions */
2382 static MemOp gen_pop_T0(DisasContext *s)
2383 {
2384     MemOp d_ot = mo_pushpop(s, s->dflag);
2385 
2386     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2387     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2388 
2389     return d_ot;
2390 }
2391 
2392 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2393 {
2394     gen_stack_update(s, 1 << ot);
2395 }
2396 
2397 static inline void gen_stack_A0(DisasContext *s)
2398 {
2399     gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2400 }
2401 
2402 static void gen_pusha(DisasContext *s)
2403 {
2404     MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2405     MemOp d_ot = s->dflag;
2406     int size = 1 << d_ot;
2407     int i;
2408 
2409     for (i = 0; i < 8; i++) {
2410         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2411         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2412         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2413     }
2414 
2415     gen_stack_update(s, -8 * size);
2416 }
2417 
2418 static void gen_popa(DisasContext *s)
2419 {
2420     MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2421     MemOp d_ot = s->dflag;
2422     int size = 1 << d_ot;
2423     int i;
2424 
2425     for (i = 0; i < 8; i++) {
2426         /* ESP is not reloaded */
2427         if (7 - i == R_ESP) {
2428             continue;
2429         }
2430         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2431         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2432         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2433         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2434     }
2435 
2436     gen_stack_update(s, 8 * size);
2437 }
2438 
2439 static void gen_enter(DisasContext *s, int esp_addend, int level)
2440 {
2441     MemOp d_ot = mo_pushpop(s, s->dflag);
2442     MemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2443     int size = 1 << d_ot;
2444 
2445     /* Push BP; compute FrameTemp into T1.  */
2446     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2447     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2448     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2449 
2450     level &= 31;
2451     if (level != 0) {
2452         int i;
2453 
2454         /* Copy level-1 pointers from the previous frame.  */
2455         for (i = 1; i < level; ++i) {
2456             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2457             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2458             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2459 
2460             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2461             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2462             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2463         }
2464 
2465         /* Push the current FrameTemp as the last level.  */
2466         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2467         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2468         gen_op_st_v(s, d_ot, s->T1, s->A0);
2469     }
2470 
2471     /* Copy the FrameTemp value to EBP.  */
2472     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2473 
2474     /* Compute the final value of ESP.  */
2475     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2476     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2477 }
2478 
2479 static void gen_leave(DisasContext *s)
2480 {
2481     MemOp d_ot = mo_pushpop(s, s->dflag);
2482     MemOp a_ot = mo_stacksize(s);
2483 
2484     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2485     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2486 
2487     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2488 
2489     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2490     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2491 }
2492 
2493 /* Similarly, except that the assumption here is that we don't decode
2494    the instruction at all -- either a missing opcode, an unimplemented
2495    feature, or just a bogus instruction stream.  */
2496 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2497 {
2498     gen_illegal_opcode(s);
2499 
2500     if (qemu_loglevel_mask(LOG_UNIMP)) {
2501         FILE *logfile = qemu_log_lock();
2502         target_ulong pc = s->pc_start, end = s->pc;
2503 
2504         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2505         for (; pc < end; ++pc) {
2506             qemu_log(" %02x", cpu_ldub_code(env, pc));
2507         }
2508         qemu_log("\n");
2509         qemu_log_unlock(logfile);
2510     }
2511 }
2512 
2513 /* an interrupt is different from an exception because of the
2514    privilege checks */
2515 static void gen_interrupt(DisasContext *s, int intno,
2516                           target_ulong cur_eip, target_ulong next_eip)
2517 {
2518     gen_update_cc_op(s);
2519     gen_jmp_im(s, cur_eip);
2520     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2521                                tcg_const_i32(next_eip - cur_eip));
2522     s->base.is_jmp = DISAS_NORETURN;
2523 }
2524 
2525 static void gen_debug(DisasContext *s, target_ulong cur_eip)
2526 {
2527     gen_update_cc_op(s);
2528     gen_jmp_im(s, cur_eip);
2529     gen_helper_debug(cpu_env);
2530     s->base.is_jmp = DISAS_NORETURN;
2531 }
2532 
2533 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2534 {
2535     if ((s->flags & mask) == 0) {
2536         TCGv_i32 t = tcg_temp_new_i32();
2537         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2538         tcg_gen_ori_i32(t, t, mask);
2539         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2540         tcg_temp_free_i32(t);
2541         s->flags |= mask;
2542     }
2543 }
2544 
2545 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2546 {
2547     if (s->flags & mask) {
2548         TCGv_i32 t = tcg_temp_new_i32();
2549         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2550         tcg_gen_andi_i32(t, t, ~mask);
2551         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2552         tcg_temp_free_i32(t);
2553         s->flags &= ~mask;
2554     }
2555 }
2556 
2557 /* Clear BND registers during legacy branches.  */
2558 static void gen_bnd_jmp(DisasContext *s)
2559 {
2560     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2561        and if the BNDREGs are known to be in use (non-zero) already.
2562        The helper itself will check BNDPRESERVE at runtime.  */
2563     if ((s->prefix & PREFIX_REPNZ) == 0
2564         && (s->flags & HF_MPX_EN_MASK) != 0
2565         && (s->flags & HF_MPX_IU_MASK) != 0) {
2566         gen_helper_bnd_jmp(cpu_env);
2567     }
2568 }
2569 
2570 /* Generate an end of block. Trace exception is also generated if needed.
2571    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2572    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2573    S->TF.  This is used by the syscall/sysret insns.  */
2574 static void
2575 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2576 {
2577     gen_update_cc_op(s);
2578 
2579     /* If several instructions disable interrupts, only the first does it.  */
2580     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2581         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2582     } else {
2583         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2584     }
2585 
2586     if (s->base.tb->flags & HF_RF_MASK) {
2587         gen_helper_reset_rf(cpu_env);
2588     }
2589     if (s->base.singlestep_enabled) {
2590         gen_helper_debug(cpu_env);
2591     } else if (recheck_tf) {
2592         gen_helper_rechecking_single_step(cpu_env);
2593         tcg_gen_exit_tb(NULL, 0);
2594     } else if (s->tf) {
2595         gen_helper_single_step(cpu_env);
2596     } else if (jr) {
2597         tcg_gen_lookup_and_goto_ptr();
2598     } else {
2599         tcg_gen_exit_tb(NULL, 0);
2600     }
2601     s->base.is_jmp = DISAS_NORETURN;
2602 }
2603 
2604 static inline void
2605 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2606 {
2607     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2608 }
2609 
2610 /* End of block.
2611    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2612 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2613 {
2614     gen_eob_worker(s, inhibit, false);
2615 }
2616 
2617 /* End of block, resetting the inhibit irq flag.  */
2618 static void gen_eob(DisasContext *s)
2619 {
2620     gen_eob_worker(s, false, false);
2621 }
2622 
2623 /* Jump to register */
2624 static void gen_jr(DisasContext *s, TCGv dest)
2625 {
2626     do_gen_eob_worker(s, false, false, true);
2627 }
2628 
2629 /* generate a jump to eip. No segment change must happen before as a
2630    direct call to the next block may occur */
2631 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2632 {
2633     gen_update_cc_op(s);
2634     set_cc_op(s, CC_OP_DYNAMIC);
2635     if (s->jmp_opt) {
2636         gen_goto_tb(s, tb_num, eip);
2637     } else {
2638         gen_jmp_im(s, eip);
2639         gen_eob(s);
2640     }
2641 }
2642 
2643 static void gen_jmp(DisasContext *s, target_ulong eip)
2644 {
2645     gen_jmp_tb(s, eip, 0);
2646 }
2647 
2648 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2649 {
2650     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2651     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2652 }
2653 
2654 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2655 {
2656     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2657     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2658 }
2659 
2660 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2661 {
2662     int mem_index = s->mem_index;
2663     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2664     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2665     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2666     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2667     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2668 }
2669 
2670 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2671 {
2672     int mem_index = s->mem_index;
2673     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2674     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2675     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2676     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2677     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2678 }
2679 
2680 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2681 {
2682     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2683     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2684     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2685     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2686 }
2687 
2688 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2689 {
2690     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2691     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2692 }
2693 
2694 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2695 {
2696     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2697     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2698 }
2699 
2700 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2701 {
2702     tcg_gen_movi_i64(s->tmp1_i64, 0);
2703     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2704 }
2705 
2706 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2707 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2708 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2709 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2710 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2711 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2712                                TCGv_i32 val);
2713 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2714 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2715                                TCGv val);
2716 
2717 #define SSE_SPECIAL ((void *)1)
2718 #define SSE_DUMMY ((void *)2)
2719 
2720 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2721 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2722                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2723 
2724 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2725     /* 3DNow! extensions */
2726     [0x0e] = { SSE_DUMMY }, /* femms */
2727     [0x0f] = { SSE_DUMMY }, /* pf... */
2728     /* pure SSE operations */
2729     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2730     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2731     [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2732     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2733     [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2734     [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2735     [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2736     [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2737 
2738     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2739     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2740     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2741     [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2742     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2743     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2744     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2745     [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2746     [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2747     [0x51] = SSE_FOP(sqrt),
2748     [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2749     [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2750     [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2751     [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2752     [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2753     [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2754     [0x58] = SSE_FOP(add),
2755     [0x59] = SSE_FOP(mul),
2756     [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2757                gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2758     [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2759     [0x5c] = SSE_FOP(sub),
2760     [0x5d] = SSE_FOP(min),
2761     [0x5e] = SSE_FOP(div),
2762     [0x5f] = SSE_FOP(max),
2763 
2764     [0xc2] = SSE_FOP(cmpeq),
2765     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2766                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2767 
2768     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2769     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2770     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2771 
2772     /* MMX ops and their SSE extensions */
2773     [0x60] = MMX_OP2(punpcklbw),
2774     [0x61] = MMX_OP2(punpcklwd),
2775     [0x62] = MMX_OP2(punpckldq),
2776     [0x63] = MMX_OP2(packsswb),
2777     [0x64] = MMX_OP2(pcmpgtb),
2778     [0x65] = MMX_OP2(pcmpgtw),
2779     [0x66] = MMX_OP2(pcmpgtl),
2780     [0x67] = MMX_OP2(packuswb),
2781     [0x68] = MMX_OP2(punpckhbw),
2782     [0x69] = MMX_OP2(punpckhwd),
2783     [0x6a] = MMX_OP2(punpckhdq),
2784     [0x6b] = MMX_OP2(packssdw),
2785     [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2786     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2787     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2788     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2789     [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2790                (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2791                (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2792                (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2793     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2794     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2795     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2796     [0x74] = MMX_OP2(pcmpeqb),
2797     [0x75] = MMX_OP2(pcmpeqw),
2798     [0x76] = MMX_OP2(pcmpeql),
2799     [0x77] = { SSE_DUMMY }, /* emms */
2800     [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2801     [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2802     [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2803     [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2804     [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2805     [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2806     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2807     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2808     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2809     [0xd1] = MMX_OP2(psrlw),
2810     [0xd2] = MMX_OP2(psrld),
2811     [0xd3] = MMX_OP2(psrlq),
2812     [0xd4] = MMX_OP2(paddq),
2813     [0xd5] = MMX_OP2(pmullw),
2814     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2815     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2816     [0xd8] = MMX_OP2(psubusb),
2817     [0xd9] = MMX_OP2(psubusw),
2818     [0xda] = MMX_OP2(pminub),
2819     [0xdb] = MMX_OP2(pand),
2820     [0xdc] = MMX_OP2(paddusb),
2821     [0xdd] = MMX_OP2(paddusw),
2822     [0xde] = MMX_OP2(pmaxub),
2823     [0xdf] = MMX_OP2(pandn),
2824     [0xe0] = MMX_OP2(pavgb),
2825     [0xe1] = MMX_OP2(psraw),
2826     [0xe2] = MMX_OP2(psrad),
2827     [0xe3] = MMX_OP2(pavgw),
2828     [0xe4] = MMX_OP2(pmulhuw),
2829     [0xe5] = MMX_OP2(pmulhw),
2830     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2831     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2832     [0xe8] = MMX_OP2(psubsb),
2833     [0xe9] = MMX_OP2(psubsw),
2834     [0xea] = MMX_OP2(pminsw),
2835     [0xeb] = MMX_OP2(por),
2836     [0xec] = MMX_OP2(paddsb),
2837     [0xed] = MMX_OP2(paddsw),
2838     [0xee] = MMX_OP2(pmaxsw),
2839     [0xef] = MMX_OP2(pxor),
2840     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2841     [0xf1] = MMX_OP2(psllw),
2842     [0xf2] = MMX_OP2(pslld),
2843     [0xf3] = MMX_OP2(psllq),
2844     [0xf4] = MMX_OP2(pmuludq),
2845     [0xf5] = MMX_OP2(pmaddwd),
2846     [0xf6] = MMX_OP2(psadbw),
2847     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2848                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2849     [0xf8] = MMX_OP2(psubb),
2850     [0xf9] = MMX_OP2(psubw),
2851     [0xfa] = MMX_OP2(psubl),
2852     [0xfb] = MMX_OP2(psubq),
2853     [0xfc] = MMX_OP2(paddb),
2854     [0xfd] = MMX_OP2(paddw),
2855     [0xfe] = MMX_OP2(paddl),
2856 };
2857 
2858 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2859     [0 + 2] = MMX_OP2(psrlw),
2860     [0 + 4] = MMX_OP2(psraw),
2861     [0 + 6] = MMX_OP2(psllw),
2862     [8 + 2] = MMX_OP2(psrld),
2863     [8 + 4] = MMX_OP2(psrad),
2864     [8 + 6] = MMX_OP2(pslld),
2865     [16 + 2] = MMX_OP2(psrlq),
2866     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2867     [16 + 6] = MMX_OP2(psllq),
2868     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2869 };
2870 
2871 static const SSEFunc_0_epi sse_op_table3ai[] = {
2872     gen_helper_cvtsi2ss,
2873     gen_helper_cvtsi2sd
2874 };
2875 
2876 #ifdef TARGET_X86_64
2877 static const SSEFunc_0_epl sse_op_table3aq[] = {
2878     gen_helper_cvtsq2ss,
2879     gen_helper_cvtsq2sd
2880 };
2881 #endif
2882 
2883 static const SSEFunc_i_ep sse_op_table3bi[] = {
2884     gen_helper_cvttss2si,
2885     gen_helper_cvtss2si,
2886     gen_helper_cvttsd2si,
2887     gen_helper_cvtsd2si
2888 };
2889 
2890 #ifdef TARGET_X86_64
2891 static const SSEFunc_l_ep sse_op_table3bq[] = {
2892     gen_helper_cvttss2sq,
2893     gen_helper_cvtss2sq,
2894     gen_helper_cvttsd2sq,
2895     gen_helper_cvtsd2sq
2896 };
2897 #endif
2898 
2899 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2900     SSE_FOP(cmpeq),
2901     SSE_FOP(cmplt),
2902     SSE_FOP(cmple),
2903     SSE_FOP(cmpunord),
2904     SSE_FOP(cmpneq),
2905     SSE_FOP(cmpnlt),
2906     SSE_FOP(cmpnle),
2907     SSE_FOP(cmpord),
2908 };
2909 
2910 static const SSEFunc_0_epp sse_op_table5[256] = {
2911     [0x0c] = gen_helper_pi2fw,
2912     [0x0d] = gen_helper_pi2fd,
2913     [0x1c] = gen_helper_pf2iw,
2914     [0x1d] = gen_helper_pf2id,
2915     [0x8a] = gen_helper_pfnacc,
2916     [0x8e] = gen_helper_pfpnacc,
2917     [0x90] = gen_helper_pfcmpge,
2918     [0x94] = gen_helper_pfmin,
2919     [0x96] = gen_helper_pfrcp,
2920     [0x97] = gen_helper_pfrsqrt,
2921     [0x9a] = gen_helper_pfsub,
2922     [0x9e] = gen_helper_pfadd,
2923     [0xa0] = gen_helper_pfcmpgt,
2924     [0xa4] = gen_helper_pfmax,
2925     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2926     [0xa7] = gen_helper_movq, /* pfrsqit1 */
2927     [0xaa] = gen_helper_pfsubr,
2928     [0xae] = gen_helper_pfacc,
2929     [0xb0] = gen_helper_pfcmpeq,
2930     [0xb4] = gen_helper_pfmul,
2931     [0xb6] = gen_helper_movq, /* pfrcpit2 */
2932     [0xb7] = gen_helper_pmulhrw_mmx,
2933     [0xbb] = gen_helper_pswapd,
2934     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2935 };
2936 
2937 struct SSEOpHelper_epp {
2938     SSEFunc_0_epp op[2];
2939     uint32_t ext_mask;
2940 };
2941 
2942 struct SSEOpHelper_eppi {
2943     SSEFunc_0_eppi op[2];
2944     uint32_t ext_mask;
2945 };
2946 
2947 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2948 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2949 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2950 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2951 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2952         CPUID_EXT_PCLMULQDQ }
2953 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2954 
2955 static const struct SSEOpHelper_epp sse_op_table6[256] = {
2956     [0x00] = SSSE3_OP(pshufb),
2957     [0x01] = SSSE3_OP(phaddw),
2958     [0x02] = SSSE3_OP(phaddd),
2959     [0x03] = SSSE3_OP(phaddsw),
2960     [0x04] = SSSE3_OP(pmaddubsw),
2961     [0x05] = SSSE3_OP(phsubw),
2962     [0x06] = SSSE3_OP(phsubd),
2963     [0x07] = SSSE3_OP(phsubsw),
2964     [0x08] = SSSE3_OP(psignb),
2965     [0x09] = SSSE3_OP(psignw),
2966     [0x0a] = SSSE3_OP(psignd),
2967     [0x0b] = SSSE3_OP(pmulhrsw),
2968     [0x10] = SSE41_OP(pblendvb),
2969     [0x14] = SSE41_OP(blendvps),
2970     [0x15] = SSE41_OP(blendvpd),
2971     [0x17] = SSE41_OP(ptest),
2972     [0x1c] = SSSE3_OP(pabsb),
2973     [0x1d] = SSSE3_OP(pabsw),
2974     [0x1e] = SSSE3_OP(pabsd),
2975     [0x20] = SSE41_OP(pmovsxbw),
2976     [0x21] = SSE41_OP(pmovsxbd),
2977     [0x22] = SSE41_OP(pmovsxbq),
2978     [0x23] = SSE41_OP(pmovsxwd),
2979     [0x24] = SSE41_OP(pmovsxwq),
2980     [0x25] = SSE41_OP(pmovsxdq),
2981     [0x28] = SSE41_OP(pmuldq),
2982     [0x29] = SSE41_OP(pcmpeqq),
2983     [0x2a] = SSE41_SPECIAL, /* movntqda */
2984     [0x2b] = SSE41_OP(packusdw),
2985     [0x30] = SSE41_OP(pmovzxbw),
2986     [0x31] = SSE41_OP(pmovzxbd),
2987     [0x32] = SSE41_OP(pmovzxbq),
2988     [0x33] = SSE41_OP(pmovzxwd),
2989     [0x34] = SSE41_OP(pmovzxwq),
2990     [0x35] = SSE41_OP(pmovzxdq),
2991     [0x37] = SSE42_OP(pcmpgtq),
2992     [0x38] = SSE41_OP(pminsb),
2993     [0x39] = SSE41_OP(pminsd),
2994     [0x3a] = SSE41_OP(pminuw),
2995     [0x3b] = SSE41_OP(pminud),
2996     [0x3c] = SSE41_OP(pmaxsb),
2997     [0x3d] = SSE41_OP(pmaxsd),
2998     [0x3e] = SSE41_OP(pmaxuw),
2999     [0x3f] = SSE41_OP(pmaxud),
3000     [0x40] = SSE41_OP(pmulld),
3001     [0x41] = SSE41_OP(phminposuw),
3002     [0xdb] = AESNI_OP(aesimc),
3003     [0xdc] = AESNI_OP(aesenc),
3004     [0xdd] = AESNI_OP(aesenclast),
3005     [0xde] = AESNI_OP(aesdec),
3006     [0xdf] = AESNI_OP(aesdeclast),
3007 };
3008 
3009 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3010     [0x08] = SSE41_OP(roundps),
3011     [0x09] = SSE41_OP(roundpd),
3012     [0x0a] = SSE41_OP(roundss),
3013     [0x0b] = SSE41_OP(roundsd),
3014     [0x0c] = SSE41_OP(blendps),
3015     [0x0d] = SSE41_OP(blendpd),
3016     [0x0e] = SSE41_OP(pblendw),
3017     [0x0f] = SSSE3_OP(palignr),
3018     [0x14] = SSE41_SPECIAL, /* pextrb */
3019     [0x15] = SSE41_SPECIAL, /* pextrw */
3020     [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3021     [0x17] = SSE41_SPECIAL, /* extractps */
3022     [0x20] = SSE41_SPECIAL, /* pinsrb */
3023     [0x21] = SSE41_SPECIAL, /* insertps */
3024     [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3025     [0x40] = SSE41_OP(dpps),
3026     [0x41] = SSE41_OP(dppd),
3027     [0x42] = SSE41_OP(mpsadbw),
3028     [0x44] = PCLMULQDQ_OP(pclmulqdq),
3029     [0x60] = SSE42_OP(pcmpestrm),
3030     [0x61] = SSE42_OP(pcmpestri),
3031     [0x62] = SSE42_OP(pcmpistrm),
3032     [0x63] = SSE42_OP(pcmpistri),
3033     [0xdf] = AESNI_OP(aeskeygenassist),
3034 };
3035 
3036 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3037                     target_ulong pc_start, int rex_r)
3038 {
3039     int b1, op1_offset, op2_offset, is_xmm, val;
3040     int modrm, mod, rm, reg;
3041     SSEFunc_0_epp sse_fn_epp;
3042     SSEFunc_0_eppi sse_fn_eppi;
3043     SSEFunc_0_ppi sse_fn_ppi;
3044     SSEFunc_0_eppt sse_fn_eppt;
3045     MemOp ot;
3046 
3047     b &= 0xff;
3048     if (s->prefix & PREFIX_DATA)
3049         b1 = 1;
3050     else if (s->prefix & PREFIX_REPZ)
3051         b1 = 2;
3052     else if (s->prefix & PREFIX_REPNZ)
3053         b1 = 3;
3054     else
3055         b1 = 0;
3056     sse_fn_epp = sse_op_table1[b][b1];
3057     if (!sse_fn_epp) {
3058         goto unknown_op;
3059     }
3060     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3061         is_xmm = 1;
3062     } else {
3063         if (b1 == 0) {
3064             /* MMX case */
3065             is_xmm = 0;
3066         } else {
3067             is_xmm = 1;
3068         }
3069     }
3070     /* simple MMX/SSE operation */
3071     if (s->flags & HF_TS_MASK) {
3072         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3073         return;
3074     }
3075     if (s->flags & HF_EM_MASK) {
3076     illegal_op:
3077         gen_illegal_opcode(s);
3078         return;
3079     }
3080     if (is_xmm
3081         && !(s->flags & HF_OSFXSR_MASK)
3082         && (b != 0x38 && b != 0x3a)) {
3083         goto unknown_op;
3084     }
3085     if (b == 0x0e) {
3086         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3087             /* If we were fully decoding this we might use illegal_op.  */
3088             goto unknown_op;
3089         }
3090         /* femms */
3091         gen_helper_emms(cpu_env);
3092         return;
3093     }
3094     if (b == 0x77) {
3095         /* emms */
3096         gen_helper_emms(cpu_env);
3097         return;
3098     }
3099     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3100        the static cpu state) */
3101     if (!is_xmm) {
3102         gen_helper_enter_mmx(cpu_env);
3103     }
3104 
3105     modrm = x86_ldub_code(env, s);
3106     reg = ((modrm >> 3) & 7);
3107     if (is_xmm)
3108         reg |= rex_r;
3109     mod = (modrm >> 6) & 3;
3110     if (sse_fn_epp == SSE_SPECIAL) {
3111         b |= (b1 << 8);
3112         switch(b) {
3113         case 0x0e7: /* movntq */
3114             if (mod == 3) {
3115                 goto illegal_op;
3116             }
3117             gen_lea_modrm(env, s, modrm);
3118             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3119             break;
3120         case 0x1e7: /* movntdq */
3121         case 0x02b: /* movntps */
3122         case 0x12b: /* movntps */
3123             if (mod == 3)
3124                 goto illegal_op;
3125             gen_lea_modrm(env, s, modrm);
3126             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3127             break;
3128         case 0x3f0: /* lddqu */
3129             if (mod == 3)
3130                 goto illegal_op;
3131             gen_lea_modrm(env, s, modrm);
3132             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3133             break;
3134         case 0x22b: /* movntss */
3135         case 0x32b: /* movntsd */
3136             if (mod == 3)
3137                 goto illegal_op;
3138             gen_lea_modrm(env, s, modrm);
3139             if (b1 & 1) {
3140                 gen_stq_env_A0(s, offsetof(CPUX86State,
3141                                            xmm_regs[reg].ZMM_Q(0)));
3142             } else {
3143                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3144                     xmm_regs[reg].ZMM_L(0)));
3145                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3146             }
3147             break;
3148         case 0x6e: /* movd mm, ea */
3149 #ifdef TARGET_X86_64
3150             if (s->dflag == MO_64) {
3151                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3152                 tcg_gen_st_tl(s->T0, cpu_env,
3153                               offsetof(CPUX86State, fpregs[reg].mmx));
3154             } else
3155 #endif
3156             {
3157                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3158                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3159                                  offsetof(CPUX86State,fpregs[reg].mmx));
3160                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3161                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3162             }
3163             break;
3164         case 0x16e: /* movd xmm, ea */
3165 #ifdef TARGET_X86_64
3166             if (s->dflag == MO_64) {
3167                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3168                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3169                                  offsetof(CPUX86State,xmm_regs[reg]));
3170                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3171             } else
3172 #endif
3173             {
3174                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3175                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3176                                  offsetof(CPUX86State,xmm_regs[reg]));
3177                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3178                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3179             }
3180             break;
3181         case 0x6f: /* movq mm, ea */
3182             if (mod != 3) {
3183                 gen_lea_modrm(env, s, modrm);
3184                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3185             } else {
3186                 rm = (modrm & 7);
3187                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3188                                offsetof(CPUX86State,fpregs[rm].mmx));
3189                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3190                                offsetof(CPUX86State,fpregs[reg].mmx));
3191             }
3192             break;
3193         case 0x010: /* movups */
3194         case 0x110: /* movupd */
3195         case 0x028: /* movaps */
3196         case 0x128: /* movapd */
3197         case 0x16f: /* movdqa xmm, ea */
3198         case 0x26f: /* movdqu xmm, ea */
3199             if (mod != 3) {
3200                 gen_lea_modrm(env, s, modrm);
3201                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3202             } else {
3203                 rm = (modrm & 7) | REX_B(s);
3204                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3205                             offsetof(CPUX86State,xmm_regs[rm]));
3206             }
3207             break;
3208         case 0x210: /* movss xmm, ea */
3209             if (mod != 3) {
3210                 gen_lea_modrm(env, s, modrm);
3211                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3212                 tcg_gen_st32_tl(s->T0, cpu_env,
3213                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3214                 tcg_gen_movi_tl(s->T0, 0);
3215                 tcg_gen_st32_tl(s->T0, cpu_env,
3216                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3217                 tcg_gen_st32_tl(s->T0, cpu_env,
3218                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3219                 tcg_gen_st32_tl(s->T0, cpu_env,
3220                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3221             } else {
3222                 rm = (modrm & 7) | REX_B(s);
3223                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3224                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3225             }
3226             break;
3227         case 0x310: /* movsd xmm, ea */
3228             if (mod != 3) {
3229                 gen_lea_modrm(env, s, modrm);
3230                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3231                                            xmm_regs[reg].ZMM_Q(0)));
3232                 tcg_gen_movi_tl(s->T0, 0);
3233                 tcg_gen_st32_tl(s->T0, cpu_env,
3234                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3235                 tcg_gen_st32_tl(s->T0, cpu_env,
3236                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3237             } else {
3238                 rm = (modrm & 7) | REX_B(s);
3239                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3240                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3241             }
3242             break;
3243         case 0x012: /* movlps */
3244         case 0x112: /* movlpd */
3245             if (mod != 3) {
3246                 gen_lea_modrm(env, s, modrm);
3247                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3248                                            xmm_regs[reg].ZMM_Q(0)));
3249             } else {
3250                 /* movhlps */
3251                 rm = (modrm & 7) | REX_B(s);
3252                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3253                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3254             }
3255             break;
3256         case 0x212: /* movsldup */
3257             if (mod != 3) {
3258                 gen_lea_modrm(env, s, modrm);
3259                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3260             } else {
3261                 rm = (modrm & 7) | REX_B(s);
3262                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3263                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3264                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3265                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3266             }
3267             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3268                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3269             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3270                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3271             break;
3272         case 0x312: /* movddup */
3273             if (mod != 3) {
3274                 gen_lea_modrm(env, s, modrm);
3275                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3276                                            xmm_regs[reg].ZMM_Q(0)));
3277             } else {
3278                 rm = (modrm & 7) | REX_B(s);
3279                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3280                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3281             }
3282             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3283                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3284             break;
3285         case 0x016: /* movhps */
3286         case 0x116: /* movhpd */
3287             if (mod != 3) {
3288                 gen_lea_modrm(env, s, modrm);
3289                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3290                                            xmm_regs[reg].ZMM_Q(1)));
3291             } else {
3292                 /* movlhps */
3293                 rm = (modrm & 7) | REX_B(s);
3294                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3295                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3296             }
3297             break;
3298         case 0x216: /* movshdup */
3299             if (mod != 3) {
3300                 gen_lea_modrm(env, s, modrm);
3301                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3302             } else {
3303                 rm = (modrm & 7) | REX_B(s);
3304                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3305                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3306                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3307                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3308             }
3309             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3310                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3311             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3312                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3313             break;
3314         case 0x178:
3315         case 0x378:
3316             {
3317                 int bit_index, field_length;
3318 
3319                 if (b1 == 1 && reg != 0)
3320                     goto illegal_op;
3321                 field_length = x86_ldub_code(env, s) & 0x3F;
3322                 bit_index = x86_ldub_code(env, s) & 0x3F;
3323                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3324                     offsetof(CPUX86State,xmm_regs[reg]));
3325                 if (b1 == 1)
3326                     gen_helper_extrq_i(cpu_env, s->ptr0,
3327                                        tcg_const_i32(bit_index),
3328                                        tcg_const_i32(field_length));
3329                 else
3330                     gen_helper_insertq_i(cpu_env, s->ptr0,
3331                                          tcg_const_i32(bit_index),
3332                                          tcg_const_i32(field_length));
3333             }
3334             break;
3335         case 0x7e: /* movd ea, mm */
3336 #ifdef TARGET_X86_64
3337             if (s->dflag == MO_64) {
3338                 tcg_gen_ld_i64(s->T0, cpu_env,
3339                                offsetof(CPUX86State,fpregs[reg].mmx));
3340                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3341             } else
3342 #endif
3343             {
3344                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3345                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3346                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3347             }
3348             break;
3349         case 0x17e: /* movd ea, xmm */
3350 #ifdef TARGET_X86_64
3351             if (s->dflag == MO_64) {
3352                 tcg_gen_ld_i64(s->T0, cpu_env,
3353                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3354                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3355             } else
3356 #endif
3357             {
3358                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3359                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3360                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3361             }
3362             break;
3363         case 0x27e: /* movq xmm, ea */
3364             if (mod != 3) {
3365                 gen_lea_modrm(env, s, modrm);
3366                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3367                                            xmm_regs[reg].ZMM_Q(0)));
3368             } else {
3369                 rm = (modrm & 7) | REX_B(s);
3370                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3371                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3372             }
3373             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3374             break;
3375         case 0x7f: /* movq ea, mm */
3376             if (mod != 3) {
3377                 gen_lea_modrm(env, s, modrm);
3378                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3379             } else {
3380                 rm = (modrm & 7);
3381                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3382                             offsetof(CPUX86State,fpregs[reg].mmx));
3383             }
3384             break;
3385         case 0x011: /* movups */
3386         case 0x111: /* movupd */
3387         case 0x029: /* movaps */
3388         case 0x129: /* movapd */
3389         case 0x17f: /* movdqa ea, xmm */
3390         case 0x27f: /* movdqu ea, xmm */
3391             if (mod != 3) {
3392                 gen_lea_modrm(env, s, modrm);
3393                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3394             } else {
3395                 rm = (modrm & 7) | REX_B(s);
3396                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3397                             offsetof(CPUX86State,xmm_regs[reg]));
3398             }
3399             break;
3400         case 0x211: /* movss ea, xmm */
3401             if (mod != 3) {
3402                 gen_lea_modrm(env, s, modrm);
3403                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3404                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3405                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3406             } else {
3407                 rm = (modrm & 7) | REX_B(s);
3408                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3409                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3410             }
3411             break;
3412         case 0x311: /* movsd ea, xmm */
3413             if (mod != 3) {
3414                 gen_lea_modrm(env, s, modrm);
3415                 gen_stq_env_A0(s, offsetof(CPUX86State,
3416                                            xmm_regs[reg].ZMM_Q(0)));
3417             } else {
3418                 rm = (modrm & 7) | REX_B(s);
3419                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3420                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3421             }
3422             break;
3423         case 0x013: /* movlps */
3424         case 0x113: /* movlpd */
3425             if (mod != 3) {
3426                 gen_lea_modrm(env, s, modrm);
3427                 gen_stq_env_A0(s, offsetof(CPUX86State,
3428                                            xmm_regs[reg].ZMM_Q(0)));
3429             } else {
3430                 goto illegal_op;
3431             }
3432             break;
3433         case 0x017: /* movhps */
3434         case 0x117: /* movhpd */
3435             if (mod != 3) {
3436                 gen_lea_modrm(env, s, modrm);
3437                 gen_stq_env_A0(s, offsetof(CPUX86State,
3438                                            xmm_regs[reg].ZMM_Q(1)));
3439             } else {
3440                 goto illegal_op;
3441             }
3442             break;
3443         case 0x71: /* shift mm, im */
3444         case 0x72:
3445         case 0x73:
3446         case 0x171: /* shift xmm, im */
3447         case 0x172:
3448         case 0x173:
3449             if (b1 >= 2) {
3450                 goto unknown_op;
3451             }
3452             val = x86_ldub_code(env, s);
3453             if (is_xmm) {
3454                 tcg_gen_movi_tl(s->T0, val);
3455                 tcg_gen_st32_tl(s->T0, cpu_env,
3456                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3457                 tcg_gen_movi_tl(s->T0, 0);
3458                 tcg_gen_st32_tl(s->T0, cpu_env,
3459                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3460                 op1_offset = offsetof(CPUX86State,xmm_t0);
3461             } else {
3462                 tcg_gen_movi_tl(s->T0, val);
3463                 tcg_gen_st32_tl(s->T0, cpu_env,
3464                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3465                 tcg_gen_movi_tl(s->T0, 0);
3466                 tcg_gen_st32_tl(s->T0, cpu_env,
3467                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3468                 op1_offset = offsetof(CPUX86State,mmx_t0);
3469             }
3470             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3471                                        (((modrm >> 3)) & 7)][b1];
3472             if (!sse_fn_epp) {
3473                 goto unknown_op;
3474             }
3475             if (is_xmm) {
3476                 rm = (modrm & 7) | REX_B(s);
3477                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3478             } else {
3479                 rm = (modrm & 7);
3480                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3481             }
3482             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3483             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3484             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3485             break;
3486         case 0x050: /* movmskps */
3487             rm = (modrm & 7) | REX_B(s);
3488             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3489                              offsetof(CPUX86State,xmm_regs[rm]));
3490             gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3491             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3492             break;
3493         case 0x150: /* movmskpd */
3494             rm = (modrm & 7) | REX_B(s);
3495             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3496                              offsetof(CPUX86State,xmm_regs[rm]));
3497             gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3498             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3499             break;
3500         case 0x02a: /* cvtpi2ps */
3501         case 0x12a: /* cvtpi2pd */
3502             gen_helper_enter_mmx(cpu_env);
3503             if (mod != 3) {
3504                 gen_lea_modrm(env, s, modrm);
3505                 op2_offset = offsetof(CPUX86State,mmx_t0);
3506                 gen_ldq_env_A0(s, op2_offset);
3507             } else {
3508                 rm = (modrm & 7);
3509                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3510             }
3511             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3512             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3513             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3514             switch(b >> 8) {
3515             case 0x0:
3516                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3517                 break;
3518             default:
3519             case 0x1:
3520                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3521                 break;
3522             }
3523             break;
3524         case 0x22a: /* cvtsi2ss */
3525         case 0x32a: /* cvtsi2sd */
3526             ot = mo_64_32(s->dflag);
3527             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3528             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3529             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3530             if (ot == MO_32) {
3531                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3532                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3533                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3534             } else {
3535 #ifdef TARGET_X86_64
3536                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3537                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3538 #else
3539                 goto illegal_op;
3540 #endif
3541             }
3542             break;
3543         case 0x02c: /* cvttps2pi */
3544         case 0x12c: /* cvttpd2pi */
3545         case 0x02d: /* cvtps2pi */
3546         case 0x12d: /* cvtpd2pi */
3547             gen_helper_enter_mmx(cpu_env);
3548             if (mod != 3) {
3549                 gen_lea_modrm(env, s, modrm);
3550                 op2_offset = offsetof(CPUX86State,xmm_t0);
3551                 gen_ldo_env_A0(s, op2_offset);
3552             } else {
3553                 rm = (modrm & 7) | REX_B(s);
3554                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3555             }
3556             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3557             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3558             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3559             switch(b) {
3560             case 0x02c:
3561                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3562                 break;
3563             case 0x12c:
3564                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3565                 break;
3566             case 0x02d:
3567                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3568                 break;
3569             case 0x12d:
3570                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3571                 break;
3572             }
3573             break;
3574         case 0x22c: /* cvttss2si */
3575         case 0x32c: /* cvttsd2si */
3576         case 0x22d: /* cvtss2si */
3577         case 0x32d: /* cvtsd2si */
3578             ot = mo_64_32(s->dflag);
3579             if (mod != 3) {
3580                 gen_lea_modrm(env, s, modrm);
3581                 if ((b >> 8) & 1) {
3582                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3583                 } else {
3584                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3585                     tcg_gen_st32_tl(s->T0, cpu_env,
3586                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3587                 }
3588                 op2_offset = offsetof(CPUX86State,xmm_t0);
3589             } else {
3590                 rm = (modrm & 7) | REX_B(s);
3591                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3592             }
3593             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3594             if (ot == MO_32) {
3595                 SSEFunc_i_ep sse_fn_i_ep =
3596                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3597                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3598                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3599             } else {
3600 #ifdef TARGET_X86_64
3601                 SSEFunc_l_ep sse_fn_l_ep =
3602                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3603                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3604 #else
3605                 goto illegal_op;
3606 #endif
3607             }
3608             gen_op_mov_reg_v(s, ot, reg, s->T0);
3609             break;
3610         case 0xc4: /* pinsrw */
3611         case 0x1c4:
3612             s->rip_offset = 1;
3613             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3614             val = x86_ldub_code(env, s);
3615             if (b1) {
3616                 val &= 7;
3617                 tcg_gen_st16_tl(s->T0, cpu_env,
3618                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3619             } else {
3620                 val &= 3;
3621                 tcg_gen_st16_tl(s->T0, cpu_env,
3622                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3623             }
3624             break;
3625         case 0xc5: /* pextrw */
3626         case 0x1c5:
3627             if (mod != 3)
3628                 goto illegal_op;
3629             ot = mo_64_32(s->dflag);
3630             val = x86_ldub_code(env, s);
3631             if (b1) {
3632                 val &= 7;
3633                 rm = (modrm & 7) | REX_B(s);
3634                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3635                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3636             } else {
3637                 val &= 3;
3638                 rm = (modrm & 7);
3639                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3640                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3641             }
3642             reg = ((modrm >> 3) & 7) | rex_r;
3643             gen_op_mov_reg_v(s, ot, reg, s->T0);
3644             break;
3645         case 0x1d6: /* movq ea, xmm */
3646             if (mod != 3) {
3647                 gen_lea_modrm(env, s, modrm);
3648                 gen_stq_env_A0(s, offsetof(CPUX86State,
3649                                            xmm_regs[reg].ZMM_Q(0)));
3650             } else {
3651                 rm = (modrm & 7) | REX_B(s);
3652                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3653                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3654                 gen_op_movq_env_0(s,
3655                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3656             }
3657             break;
3658         case 0x2d6: /* movq2dq */
3659             gen_helper_enter_mmx(cpu_env);
3660             rm = (modrm & 7);
3661             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3662                         offsetof(CPUX86State,fpregs[rm].mmx));
3663             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3664             break;
3665         case 0x3d6: /* movdq2q */
3666             gen_helper_enter_mmx(cpu_env);
3667             rm = (modrm & 7) | REX_B(s);
3668             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3669                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3670             break;
3671         case 0xd7: /* pmovmskb */
3672         case 0x1d7:
3673             if (mod != 3)
3674                 goto illegal_op;
3675             if (b1) {
3676                 rm = (modrm & 7) | REX_B(s);
3677                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3678                                  offsetof(CPUX86State, xmm_regs[rm]));
3679                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3680             } else {
3681                 rm = (modrm & 7);
3682                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3683                                  offsetof(CPUX86State, fpregs[rm].mmx));
3684                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3685             }
3686             reg = ((modrm >> 3) & 7) | rex_r;
3687             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3688             break;
3689 
3690         case 0x138:
3691         case 0x038:
3692             b = modrm;
3693             if ((b & 0xf0) == 0xf0) {
3694                 goto do_0f_38_fx;
3695             }
3696             modrm = x86_ldub_code(env, s);
3697             rm = modrm & 7;
3698             reg = ((modrm >> 3) & 7) | rex_r;
3699             mod = (modrm >> 6) & 3;
3700             if (b1 >= 2) {
3701                 goto unknown_op;
3702             }
3703 
3704             sse_fn_epp = sse_op_table6[b].op[b1];
3705             if (!sse_fn_epp) {
3706                 goto unknown_op;
3707             }
3708             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3709                 goto illegal_op;
3710 
3711             if (b1) {
3712                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3713                 if (mod == 3) {
3714                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3715                 } else {
3716                     op2_offset = offsetof(CPUX86State,xmm_t0);
3717                     gen_lea_modrm(env, s, modrm);
3718                     switch (b) {
3719                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3720                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3721                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3722                         gen_ldq_env_A0(s, op2_offset +
3723                                         offsetof(ZMMReg, ZMM_Q(0)));
3724                         break;
3725                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3726                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3727                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3728                                             s->mem_index, MO_LEUL);
3729                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3730                                         offsetof(ZMMReg, ZMM_L(0)));
3731                         break;
3732                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3733                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3734                                            s->mem_index, MO_LEUW);
3735                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3736                                         offsetof(ZMMReg, ZMM_W(0)));
3737                         break;
3738                     case 0x2a:            /* movntqda */
3739                         gen_ldo_env_A0(s, op1_offset);
3740                         return;
3741                     default:
3742                         gen_ldo_env_A0(s, op2_offset);
3743                     }
3744                 }
3745             } else {
3746                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3747                 if (mod == 3) {
3748                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3749                 } else {
3750                     op2_offset = offsetof(CPUX86State,mmx_t0);
3751                     gen_lea_modrm(env, s, modrm);
3752                     gen_ldq_env_A0(s, op2_offset);
3753                 }
3754             }
3755             if (sse_fn_epp == SSE_SPECIAL) {
3756                 goto unknown_op;
3757             }
3758 
3759             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3760             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3761             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3762 
3763             if (b == 0x17) {
3764                 set_cc_op(s, CC_OP_EFLAGS);
3765             }
3766             break;
3767 
3768         case 0x238:
3769         case 0x338:
3770         do_0f_38_fx:
3771             /* Various integer extensions at 0f 38 f[0-f].  */
3772             b = modrm | (b1 << 8);
3773             modrm = x86_ldub_code(env, s);
3774             reg = ((modrm >> 3) & 7) | rex_r;
3775 
3776             switch (b) {
3777             case 0x3f0: /* crc32 Gd,Eb */
3778             case 0x3f1: /* crc32 Gd,Ey */
3779             do_crc32:
3780                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3781                     goto illegal_op;
3782                 }
3783                 if ((b & 0xff) == 0xf0) {
3784                     ot = MO_8;
3785                 } else if (s->dflag != MO_64) {
3786                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3787                 } else {
3788                     ot = MO_64;
3789                 }
3790 
3791                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3792                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3793                 gen_helper_crc32(s->T0, s->tmp2_i32,
3794                                  s->T0, tcg_const_i32(8 << ot));
3795 
3796                 ot = mo_64_32(s->dflag);
3797                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3798                 break;
3799 
3800             case 0x1f0: /* crc32 or movbe */
3801             case 0x1f1:
3802                 /* For these insns, the f3 prefix is supposed to have priority
3803                    over the 66 prefix, but that's not what we implement above
3804                    setting b1.  */
3805                 if (s->prefix & PREFIX_REPNZ) {
3806                     goto do_crc32;
3807                 }
3808                 /* FALLTHRU */
3809             case 0x0f0: /* movbe Gy,My */
3810             case 0x0f1: /* movbe My,Gy */
3811                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3812                     goto illegal_op;
3813                 }
3814                 if (s->dflag != MO_64) {
3815                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3816                 } else {
3817                     ot = MO_64;
3818                 }
3819 
3820                 gen_lea_modrm(env, s, modrm);
3821                 if ((b & 1) == 0) {
3822                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
3823                                        s->mem_index, ot | MO_BE);
3824                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3825                 } else {
3826                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3827                                        s->mem_index, ot | MO_BE);
3828                 }
3829                 break;
3830 
3831             case 0x0f2: /* andn Gy, By, Ey */
3832                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3833                     || !(s->prefix & PREFIX_VEX)
3834                     || s->vex_l != 0) {
3835                     goto illegal_op;
3836                 }
3837                 ot = mo_64_32(s->dflag);
3838                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3839                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3840                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3841                 gen_op_update1_cc(s);
3842                 set_cc_op(s, CC_OP_LOGICB + ot);
3843                 break;
3844 
3845             case 0x0f7: /* bextr Gy, Ey, By */
3846                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3847                     || !(s->prefix & PREFIX_VEX)
3848                     || s->vex_l != 0) {
3849                     goto illegal_op;
3850                 }
3851                 ot = mo_64_32(s->dflag);
3852                 {
3853                     TCGv bound, zero;
3854 
3855                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3856                     /* Extract START, and shift the operand.
3857                        Shifts larger than operand size get zeros.  */
3858                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3859                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3860 
3861                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3862                     zero = tcg_const_tl(0);
3863                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3864                                        s->T0, zero);
3865                     tcg_temp_free(zero);
3866 
3867                     /* Extract the LEN into a mask.  Lengths larger than
3868                        operand size get all ones.  */
3869                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3870                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3871                                        s->A0, bound);
3872                     tcg_temp_free(bound);
3873                     tcg_gen_movi_tl(s->T1, 1);
3874                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3875                     tcg_gen_subi_tl(s->T1, s->T1, 1);
3876                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
3877 
3878                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3879                     gen_op_update1_cc(s);
3880                     set_cc_op(s, CC_OP_LOGICB + ot);
3881                 }
3882                 break;
3883 
3884             case 0x0f5: /* bzhi Gy, Ey, By */
3885                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3886                     || !(s->prefix & PREFIX_VEX)
3887                     || s->vex_l != 0) {
3888                     goto illegal_op;
3889                 }
3890                 ot = mo_64_32(s->dflag);
3891                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3892                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3893                 {
3894                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3895                     /* Note that since we're using BMILG (in order to get O
3896                        cleared) we need to store the inverse into C.  */
3897                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3898                                        s->T1, bound);
3899                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3900                                        bound, bound, s->T1);
3901                     tcg_temp_free(bound);
3902                 }
3903                 tcg_gen_movi_tl(s->A0, -1);
3904                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3905                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3906                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3907                 gen_op_update1_cc(s);
3908                 set_cc_op(s, CC_OP_BMILGB + ot);
3909                 break;
3910 
3911             case 0x3f6: /* mulx By, Gy, rdx, Ey */
3912                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3913                     || !(s->prefix & PREFIX_VEX)
3914                     || s->vex_l != 0) {
3915                     goto illegal_op;
3916                 }
3917                 ot = mo_64_32(s->dflag);
3918                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3919                 switch (ot) {
3920                 default:
3921                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3922                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3923                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3924                                       s->tmp2_i32, s->tmp3_i32);
3925                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3926                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
3927                     break;
3928 #ifdef TARGET_X86_64
3929                 case MO_64:
3930                     tcg_gen_mulu2_i64(s->T0, s->T1,
3931                                       s->T0, cpu_regs[R_EDX]);
3932                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
3933                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
3934                     break;
3935 #endif
3936                 }
3937                 break;
3938 
3939             case 0x3f5: /* pdep Gy, By, Ey */
3940                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3941                     || !(s->prefix & PREFIX_VEX)
3942                     || s->vex_l != 0) {
3943                     goto illegal_op;
3944                 }
3945                 ot = mo_64_32(s->dflag);
3946                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3947                 /* Note that by zero-extending the source operand, we
3948                    automatically handle zero-extending the result.  */
3949                 if (ot == MO_64) {
3950                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3951                 } else {
3952                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3953                 }
3954                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
3955                 break;
3956 
3957             case 0x2f5: /* pext Gy, By, Ey */
3958                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3959                     || !(s->prefix & PREFIX_VEX)
3960                     || s->vex_l != 0) {
3961                     goto illegal_op;
3962                 }
3963                 ot = mo_64_32(s->dflag);
3964                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3965                 /* Note that by zero-extending the source operand, we
3966                    automatically handle zero-extending the result.  */
3967                 if (ot == MO_64) {
3968                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3969                 } else {
3970                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3971                 }
3972                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
3973                 break;
3974 
3975             case 0x1f6: /* adcx Gy, Ey */
3976             case 0x2f6: /* adox Gy, Ey */
3977                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3978                     goto illegal_op;
3979                 } else {
3980                     TCGv carry_in, carry_out, zero;
3981                     int end_op;
3982 
3983                     ot = mo_64_32(s->dflag);
3984                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3985 
3986                     /* Re-use the carry-out from a previous round.  */
3987                     carry_in = NULL;
3988                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3989                     switch (s->cc_op) {
3990                     case CC_OP_ADCX:
3991                         if (b == 0x1f6) {
3992                             carry_in = cpu_cc_dst;
3993                             end_op = CC_OP_ADCX;
3994                         } else {
3995                             end_op = CC_OP_ADCOX;
3996                         }
3997                         break;
3998                     case CC_OP_ADOX:
3999                         if (b == 0x1f6) {
4000                             end_op = CC_OP_ADCOX;
4001                         } else {
4002                             carry_in = cpu_cc_src2;
4003                             end_op = CC_OP_ADOX;
4004                         }
4005                         break;
4006                     case CC_OP_ADCOX:
4007                         end_op = CC_OP_ADCOX;
4008                         carry_in = carry_out;
4009                         break;
4010                     default:
4011                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4012                         break;
4013                     }
4014                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4015                     if (!carry_in) {
4016                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4017                             gen_compute_eflags(s);
4018                         }
4019                         carry_in = s->tmp0;
4020                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4021                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4022                     }
4023 
4024                     switch (ot) {
4025 #ifdef TARGET_X86_64
4026                     case MO_32:
4027                         /* If we know TL is 64-bit, and we want a 32-bit
4028                            result, just do everything in 64-bit arithmetic.  */
4029                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4030                         tcg_gen_ext32u_i64(s->T0, s->T0);
4031                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4032                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4033                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4034                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4035                         break;
4036 #endif
4037                     default:
4038                         /* Otherwise compute the carry-out in two steps.  */
4039                         zero = tcg_const_tl(0);
4040                         tcg_gen_add2_tl(s->T0, carry_out,
4041                                         s->T0, zero,
4042                                         carry_in, zero);
4043                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4044                                         cpu_regs[reg], carry_out,
4045                                         s->T0, zero);
4046                         tcg_temp_free(zero);
4047                         break;
4048                     }
4049                     set_cc_op(s, end_op);
4050                 }
4051                 break;
4052 
4053             case 0x1f7: /* shlx Gy, Ey, By */
4054             case 0x2f7: /* sarx Gy, Ey, By */
4055             case 0x3f7: /* shrx Gy, Ey, By */
4056                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4057                     || !(s->prefix & PREFIX_VEX)
4058                     || s->vex_l != 0) {
4059                     goto illegal_op;
4060                 }
4061                 ot = mo_64_32(s->dflag);
4062                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4063                 if (ot == MO_64) {
4064                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4065                 } else {
4066                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4067                 }
4068                 if (b == 0x1f7) {
4069                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4070                 } else if (b == 0x2f7) {
4071                     if (ot != MO_64) {
4072                         tcg_gen_ext32s_tl(s->T0, s->T0);
4073                     }
4074                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4075                 } else {
4076                     if (ot != MO_64) {
4077                         tcg_gen_ext32u_tl(s->T0, s->T0);
4078                     }
4079                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4080                 }
4081                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4082                 break;
4083 
4084             case 0x0f3:
4085             case 0x1f3:
4086             case 0x2f3:
4087             case 0x3f3: /* Group 17 */
4088                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4089                     || !(s->prefix & PREFIX_VEX)
4090                     || s->vex_l != 0) {
4091                     goto illegal_op;
4092                 }
4093                 ot = mo_64_32(s->dflag);
4094                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4095 
4096                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4097                 switch (reg & 7) {
4098                 case 1: /* blsr By,Ey */
4099                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4100                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4101                     break;
4102                 case 2: /* blsmsk By,Ey */
4103                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4104                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4105                     break;
4106                 case 3: /* blsi By, Ey */
4107                     tcg_gen_neg_tl(s->T1, s->T0);
4108                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4109                     break;
4110                 default:
4111                     goto unknown_op;
4112                 }
4113                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4114                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4115                 set_cc_op(s, CC_OP_BMILGB + ot);
4116                 break;
4117 
4118             default:
4119                 goto unknown_op;
4120             }
4121             break;
4122 
4123         case 0x03a:
4124         case 0x13a:
4125             b = modrm;
4126             modrm = x86_ldub_code(env, s);
4127             rm = modrm & 7;
4128             reg = ((modrm >> 3) & 7) | rex_r;
4129             mod = (modrm >> 6) & 3;
4130             if (b1 >= 2) {
4131                 goto unknown_op;
4132             }
4133 
4134             sse_fn_eppi = sse_op_table7[b].op[b1];
4135             if (!sse_fn_eppi) {
4136                 goto unknown_op;
4137             }
4138             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4139                 goto illegal_op;
4140 
4141             s->rip_offset = 1;
4142 
4143             if (sse_fn_eppi == SSE_SPECIAL) {
4144                 ot = mo_64_32(s->dflag);
4145                 rm = (modrm & 7) | REX_B(s);
4146                 if (mod != 3)
4147                     gen_lea_modrm(env, s, modrm);
4148                 reg = ((modrm >> 3) & 7) | rex_r;
4149                 val = x86_ldub_code(env, s);
4150                 switch (b) {
4151                 case 0x14: /* pextrb */
4152                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4153                                             xmm_regs[reg].ZMM_B(val & 15)));
4154                     if (mod == 3) {
4155                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4156                     } else {
4157                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4158                                            s->mem_index, MO_UB);
4159                     }
4160                     break;
4161                 case 0x15: /* pextrw */
4162                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4163                                             xmm_regs[reg].ZMM_W(val & 7)));
4164                     if (mod == 3) {
4165                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4166                     } else {
4167                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4168                                            s->mem_index, MO_LEUW);
4169                     }
4170                     break;
4171                 case 0x16:
4172                     if (ot == MO_32) { /* pextrd */
4173                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4174                                         offsetof(CPUX86State,
4175                                                 xmm_regs[reg].ZMM_L(val & 3)));
4176                         if (mod == 3) {
4177                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4178                         } else {
4179                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4180                                                 s->mem_index, MO_LEUL);
4181                         }
4182                     } else { /* pextrq */
4183 #ifdef TARGET_X86_64
4184                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4185                                         offsetof(CPUX86State,
4186                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4187                         if (mod == 3) {
4188                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4189                         } else {
4190                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4191                                                 s->mem_index, MO_LEQ);
4192                         }
4193 #else
4194                         goto illegal_op;
4195 #endif
4196                     }
4197                     break;
4198                 case 0x17: /* extractps */
4199                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4200                                             xmm_regs[reg].ZMM_L(val & 3)));
4201                     if (mod == 3) {
4202                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4203                     } else {
4204                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4205                                            s->mem_index, MO_LEUL);
4206                     }
4207                     break;
4208                 case 0x20: /* pinsrb */
4209                     if (mod == 3) {
4210                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4211                     } else {
4212                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4213                                            s->mem_index, MO_UB);
4214                     }
4215                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4216                                             xmm_regs[reg].ZMM_B(val & 15)));
4217                     break;
4218                 case 0x21: /* insertps */
4219                     if (mod == 3) {
4220                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4221                                         offsetof(CPUX86State,xmm_regs[rm]
4222                                                 .ZMM_L((val >> 6) & 3)));
4223                     } else {
4224                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4225                                             s->mem_index, MO_LEUL);
4226                     }
4227                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4228                                     offsetof(CPUX86State,xmm_regs[reg]
4229                                             .ZMM_L((val >> 4) & 3)));
4230                     if ((val >> 0) & 1)
4231                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4232                                         cpu_env, offsetof(CPUX86State,
4233                                                 xmm_regs[reg].ZMM_L(0)));
4234                     if ((val >> 1) & 1)
4235                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4236                                         cpu_env, offsetof(CPUX86State,
4237                                                 xmm_regs[reg].ZMM_L(1)));
4238                     if ((val >> 2) & 1)
4239                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4240                                         cpu_env, offsetof(CPUX86State,
4241                                                 xmm_regs[reg].ZMM_L(2)));
4242                     if ((val >> 3) & 1)
4243                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4244                                         cpu_env, offsetof(CPUX86State,
4245                                                 xmm_regs[reg].ZMM_L(3)));
4246                     break;
4247                 case 0x22:
4248                     if (ot == MO_32) { /* pinsrd */
4249                         if (mod == 3) {
4250                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4251                         } else {
4252                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4253                                                 s->mem_index, MO_LEUL);
4254                         }
4255                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4256                                         offsetof(CPUX86State,
4257                                                 xmm_regs[reg].ZMM_L(val & 3)));
4258                     } else { /* pinsrq */
4259 #ifdef TARGET_X86_64
4260                         if (mod == 3) {
4261                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4262                         } else {
4263                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4264                                                 s->mem_index, MO_LEQ);
4265                         }
4266                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4267                                         offsetof(CPUX86State,
4268                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4269 #else
4270                         goto illegal_op;
4271 #endif
4272                     }
4273                     break;
4274                 }
4275                 return;
4276             }
4277 
4278             if (b1) {
4279                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4280                 if (mod == 3) {
4281                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4282                 } else {
4283                     op2_offset = offsetof(CPUX86State,xmm_t0);
4284                     gen_lea_modrm(env, s, modrm);
4285                     gen_ldo_env_A0(s, op2_offset);
4286                 }
4287             } else {
4288                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4289                 if (mod == 3) {
4290                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4291                 } else {
4292                     op2_offset = offsetof(CPUX86State,mmx_t0);
4293                     gen_lea_modrm(env, s, modrm);
4294                     gen_ldq_env_A0(s, op2_offset);
4295                 }
4296             }
4297             val = x86_ldub_code(env, s);
4298 
4299             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4300                 set_cc_op(s, CC_OP_EFLAGS);
4301 
4302                 if (s->dflag == MO_64) {
4303                     /* The helper must use entire 64-bit gp registers */
4304                     val |= 1 << 8;
4305                 }
4306             }
4307 
4308             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4309             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4310             sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4311             break;
4312 
4313         case 0x33a:
4314             /* Various integer extensions at 0f 3a f[0-f].  */
4315             b = modrm | (b1 << 8);
4316             modrm = x86_ldub_code(env, s);
4317             reg = ((modrm >> 3) & 7) | rex_r;
4318 
4319             switch (b) {
4320             case 0x3f0: /* rorx Gy,Ey, Ib */
4321                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4322                     || !(s->prefix & PREFIX_VEX)
4323                     || s->vex_l != 0) {
4324                     goto illegal_op;
4325                 }
4326                 ot = mo_64_32(s->dflag);
4327                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4328                 b = x86_ldub_code(env, s);
4329                 if (ot == MO_64) {
4330                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4331                 } else {
4332                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4333                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4334                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4335                 }
4336                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4337                 break;
4338 
4339             default:
4340                 goto unknown_op;
4341             }
4342             break;
4343 
4344         default:
4345         unknown_op:
4346             gen_unknown_opcode(env, s);
4347             return;
4348         }
4349     } else {
4350         /* generic MMX or SSE operation */
4351         switch(b) {
4352         case 0x70: /* pshufx insn */
4353         case 0xc6: /* pshufx insn */
4354         case 0xc2: /* compare insns */
4355             s->rip_offset = 1;
4356             break;
4357         default:
4358             break;
4359         }
4360         if (is_xmm) {
4361             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4362             if (mod != 3) {
4363                 int sz = 4;
4364 
4365                 gen_lea_modrm(env, s, modrm);
4366                 op2_offset = offsetof(CPUX86State,xmm_t0);
4367 
4368                 switch (b) {
4369                 case 0x50 ... 0x5a:
4370                 case 0x5c ... 0x5f:
4371                 case 0xc2:
4372                     /* Most sse scalar operations.  */
4373                     if (b1 == 2) {
4374                         sz = 2;
4375                     } else if (b1 == 3) {
4376                         sz = 3;
4377                     }
4378                     break;
4379 
4380                 case 0x2e:  /* ucomis[sd] */
4381                 case 0x2f:  /* comis[sd] */
4382                     if (b1 == 0) {
4383                         sz = 2;
4384                     } else {
4385                         sz = 3;
4386                     }
4387                     break;
4388                 }
4389 
4390                 switch (sz) {
4391                 case 2:
4392                     /* 32 bit access */
4393                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4394                     tcg_gen_st32_tl(s->T0, cpu_env,
4395                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4396                     break;
4397                 case 3:
4398                     /* 64 bit access */
4399                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4400                     break;
4401                 default:
4402                     /* 128 bit access */
4403                     gen_ldo_env_A0(s, op2_offset);
4404                     break;
4405                 }
4406             } else {
4407                 rm = (modrm & 7) | REX_B(s);
4408                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4409             }
4410         } else {
4411             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4412             if (mod != 3) {
4413                 gen_lea_modrm(env, s, modrm);
4414                 op2_offset = offsetof(CPUX86State,mmx_t0);
4415                 gen_ldq_env_A0(s, op2_offset);
4416             } else {
4417                 rm = (modrm & 7);
4418                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4419             }
4420         }
4421         switch(b) {
4422         case 0x0f: /* 3DNow! data insns */
4423             val = x86_ldub_code(env, s);
4424             sse_fn_epp = sse_op_table5[val];
4425             if (!sse_fn_epp) {
4426                 goto unknown_op;
4427             }
4428             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4429                 goto illegal_op;
4430             }
4431             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4432             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4433             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4434             break;
4435         case 0x70: /* pshufx insn */
4436         case 0xc6: /* pshufx insn */
4437             val = x86_ldub_code(env, s);
4438             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4439             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4440             /* XXX: introduce a new table? */
4441             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4442             sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4443             break;
4444         case 0xc2:
4445             /* compare insns */
4446             val = x86_ldub_code(env, s);
4447             if (val >= 8)
4448                 goto unknown_op;
4449             sse_fn_epp = sse_op_table4[val][b1];
4450 
4451             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4452             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4453             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4454             break;
4455         case 0xf7:
4456             /* maskmov : we must prepare A0 */
4457             if (mod != 3)
4458                 goto illegal_op;
4459             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4460             gen_extu(s->aflag, s->A0);
4461             gen_add_A0_ds_seg(s);
4462 
4463             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4464             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4465             /* XXX: introduce a new table? */
4466             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4467             sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4468             break;
4469         default:
4470             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4471             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4472             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4473             break;
4474         }
4475         if (b == 0x2e || b == 0x2f) {
4476             set_cc_op(s, CC_OP_EFLAGS);
4477         }
4478     }
4479 }
4480 
4481 /* convert one instruction. s->base.is_jmp is set if the translation must
4482    be stopped. Return the next pc value */
4483 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4484 {
4485     CPUX86State *env = cpu->env_ptr;
4486     int b, prefixes;
4487     int shift;
4488     MemOp ot, aflag, dflag;
4489     int modrm, reg, rm, mod, op, opreg, val;
4490     target_ulong next_eip, tval;
4491     int rex_w, rex_r;
4492     target_ulong pc_start = s->base.pc_next;
4493 
4494     s->pc_start = s->pc = pc_start;
4495     s->override = -1;
4496 #ifdef TARGET_X86_64
4497     s->rex_x = 0;
4498     s->rex_b = 0;
4499     s->x86_64_hregs = false;
4500 #endif
4501     s->rip_offset = 0; /* for relative ip address */
4502     s->vex_l = 0;
4503     s->vex_v = 0;
4504     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4505         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
4506         return s->pc;
4507     }
4508 
4509     prefixes = 0;
4510     rex_w = -1;
4511     rex_r = 0;
4512 
4513  next_byte:
4514     b = x86_ldub_code(env, s);
4515     /* Collect prefixes.  */
4516     switch (b) {
4517     case 0xf3:
4518         prefixes |= PREFIX_REPZ;
4519         goto next_byte;
4520     case 0xf2:
4521         prefixes |= PREFIX_REPNZ;
4522         goto next_byte;
4523     case 0xf0:
4524         prefixes |= PREFIX_LOCK;
4525         goto next_byte;
4526     case 0x2e:
4527         s->override = R_CS;
4528         goto next_byte;
4529     case 0x36:
4530         s->override = R_SS;
4531         goto next_byte;
4532     case 0x3e:
4533         s->override = R_DS;
4534         goto next_byte;
4535     case 0x26:
4536         s->override = R_ES;
4537         goto next_byte;
4538     case 0x64:
4539         s->override = R_FS;
4540         goto next_byte;
4541     case 0x65:
4542         s->override = R_GS;
4543         goto next_byte;
4544     case 0x66:
4545         prefixes |= PREFIX_DATA;
4546         goto next_byte;
4547     case 0x67:
4548         prefixes |= PREFIX_ADR;
4549         goto next_byte;
4550 #ifdef TARGET_X86_64
4551     case 0x40 ... 0x4f:
4552         if (CODE64(s)) {
4553             /* REX prefix */
4554             rex_w = (b >> 3) & 1;
4555             rex_r = (b & 0x4) << 1;
4556             s->rex_x = (b & 0x2) << 2;
4557             REX_B(s) = (b & 0x1) << 3;
4558             /* select uniform byte register addressing */
4559             s->x86_64_hregs = true;
4560             goto next_byte;
4561         }
4562         break;
4563 #endif
4564     case 0xc5: /* 2-byte VEX */
4565     case 0xc4: /* 3-byte VEX */
4566         /* VEX prefixes cannot be used except in 32-bit mode.
4567            Otherwise the instruction is LES or LDS.  */
4568         if (s->code32 && !s->vm86) {
4569             static const int pp_prefix[4] = {
4570                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4571             };
4572             int vex3, vex2 = x86_ldub_code(env, s);
4573 
4574             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4575                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4576                    otherwise the instruction is LES or LDS.  */
4577                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4578                 break;
4579             }
4580 
4581             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4582             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4583                             | PREFIX_LOCK | PREFIX_DATA)) {
4584                 goto illegal_op;
4585             }
4586 #ifdef TARGET_X86_64
4587             if (s->x86_64_hregs) {
4588                 goto illegal_op;
4589             }
4590 #endif
4591             rex_r = (~vex2 >> 4) & 8;
4592             if (b == 0xc5) {
4593                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4594                 vex3 = vex2;
4595                 b = x86_ldub_code(env, s) | 0x100;
4596             } else {
4597                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4598 #ifdef TARGET_X86_64
4599                 s->rex_x = (~vex2 >> 3) & 8;
4600                 s->rex_b = (~vex2 >> 2) & 8;
4601 #endif
4602                 vex3 = x86_ldub_code(env, s);
4603                 rex_w = (vex3 >> 7) & 1;
4604                 switch (vex2 & 0x1f) {
4605                 case 0x01: /* Implied 0f leading opcode bytes.  */
4606                     b = x86_ldub_code(env, s) | 0x100;
4607                     break;
4608                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4609                     b = 0x138;
4610                     break;
4611                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4612                     b = 0x13a;
4613                     break;
4614                 default:   /* Reserved for future use.  */
4615                     goto unknown_op;
4616                 }
4617             }
4618             s->vex_v = (~vex3 >> 3) & 0xf;
4619             s->vex_l = (vex3 >> 2) & 1;
4620             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4621         }
4622         break;
4623     }
4624 
4625     /* Post-process prefixes.  */
4626     if (CODE64(s)) {
4627         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4628            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4629            over 0x66 if both are present.  */
4630         dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4631         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4632         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4633     } else {
4634         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4635         if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4636             dflag = MO_32;
4637         } else {
4638             dflag = MO_16;
4639         }
4640         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4641         if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4642             aflag = MO_32;
4643         }  else {
4644             aflag = MO_16;
4645         }
4646     }
4647 
4648     s->prefix = prefixes;
4649     s->aflag = aflag;
4650     s->dflag = dflag;
4651 
4652     /* now check op code */
4653  reswitch:
4654     switch(b) {
4655     case 0x0f:
4656         /**************************/
4657         /* extended op code */
4658         b = x86_ldub_code(env, s) | 0x100;
4659         goto reswitch;
4660 
4661         /**************************/
4662         /* arith & logic */
4663     case 0x00 ... 0x05:
4664     case 0x08 ... 0x0d:
4665     case 0x10 ... 0x15:
4666     case 0x18 ... 0x1d:
4667     case 0x20 ... 0x25:
4668     case 0x28 ... 0x2d:
4669     case 0x30 ... 0x35:
4670     case 0x38 ... 0x3d:
4671         {
4672             int op, f, val;
4673             op = (b >> 3) & 7;
4674             f = (b >> 1) & 3;
4675 
4676             ot = mo_b_d(b, dflag);
4677 
4678             switch(f) {
4679             case 0: /* OP Ev, Gv */
4680                 modrm = x86_ldub_code(env, s);
4681                 reg = ((modrm >> 3) & 7) | rex_r;
4682                 mod = (modrm >> 6) & 3;
4683                 rm = (modrm & 7) | REX_B(s);
4684                 if (mod != 3) {
4685                     gen_lea_modrm(env, s, modrm);
4686                     opreg = OR_TMP0;
4687                 } else if (op == OP_XORL && rm == reg) {
4688                 xor_zero:
4689                     /* xor reg, reg optimisation */
4690                     set_cc_op(s, CC_OP_CLR);
4691                     tcg_gen_movi_tl(s->T0, 0);
4692                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4693                     break;
4694                 } else {
4695                     opreg = rm;
4696                 }
4697                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4698                 gen_op(s, op, ot, opreg);
4699                 break;
4700             case 1: /* OP Gv, Ev */
4701                 modrm = x86_ldub_code(env, s);
4702                 mod = (modrm >> 6) & 3;
4703                 reg = ((modrm >> 3) & 7) | rex_r;
4704                 rm = (modrm & 7) | REX_B(s);
4705                 if (mod != 3) {
4706                     gen_lea_modrm(env, s, modrm);
4707                     gen_op_ld_v(s, ot, s->T1, s->A0);
4708                 } else if (op == OP_XORL && rm == reg) {
4709                     goto xor_zero;
4710                 } else {
4711                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4712                 }
4713                 gen_op(s, op, ot, reg);
4714                 break;
4715             case 2: /* OP A, Iv */
4716                 val = insn_get(env, s, ot);
4717                 tcg_gen_movi_tl(s->T1, val);
4718                 gen_op(s, op, ot, OR_EAX);
4719                 break;
4720             }
4721         }
4722         break;
4723 
4724     case 0x82:
4725         if (CODE64(s))
4726             goto illegal_op;
4727         /* fall through */
4728     case 0x80: /* GRP1 */
4729     case 0x81:
4730     case 0x83:
4731         {
4732             int val;
4733 
4734             ot = mo_b_d(b, dflag);
4735 
4736             modrm = x86_ldub_code(env, s);
4737             mod = (modrm >> 6) & 3;
4738             rm = (modrm & 7) | REX_B(s);
4739             op = (modrm >> 3) & 7;
4740 
4741             if (mod != 3) {
4742                 if (b == 0x83)
4743                     s->rip_offset = 1;
4744                 else
4745                     s->rip_offset = insn_const_size(ot);
4746                 gen_lea_modrm(env, s, modrm);
4747                 opreg = OR_TMP0;
4748             } else {
4749                 opreg = rm;
4750             }
4751 
4752             switch(b) {
4753             default:
4754             case 0x80:
4755             case 0x81:
4756             case 0x82:
4757                 val = insn_get(env, s, ot);
4758                 break;
4759             case 0x83:
4760                 val = (int8_t)insn_get(env, s, MO_8);
4761                 break;
4762             }
4763             tcg_gen_movi_tl(s->T1, val);
4764             gen_op(s, op, ot, opreg);
4765         }
4766         break;
4767 
4768         /**************************/
4769         /* inc, dec, and other misc arith */
4770     case 0x40 ... 0x47: /* inc Gv */
4771         ot = dflag;
4772         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4773         break;
4774     case 0x48 ... 0x4f: /* dec Gv */
4775         ot = dflag;
4776         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4777         break;
4778     case 0xf6: /* GRP3 */
4779     case 0xf7:
4780         ot = mo_b_d(b, dflag);
4781 
4782         modrm = x86_ldub_code(env, s);
4783         mod = (modrm >> 6) & 3;
4784         rm = (modrm & 7) | REX_B(s);
4785         op = (modrm >> 3) & 7;
4786         if (mod != 3) {
4787             if (op == 0) {
4788                 s->rip_offset = insn_const_size(ot);
4789             }
4790             gen_lea_modrm(env, s, modrm);
4791             /* For those below that handle locked memory, don't load here.  */
4792             if (!(s->prefix & PREFIX_LOCK)
4793                 || op != 2) {
4794                 gen_op_ld_v(s, ot, s->T0, s->A0);
4795             }
4796         } else {
4797             gen_op_mov_v_reg(s, ot, s->T0, rm);
4798         }
4799 
4800         switch(op) {
4801         case 0: /* test */
4802             val = insn_get(env, s, ot);
4803             tcg_gen_movi_tl(s->T1, val);
4804             gen_op_testl_T0_T1_cc(s);
4805             set_cc_op(s, CC_OP_LOGICB + ot);
4806             break;
4807         case 2: /* not */
4808             if (s->prefix & PREFIX_LOCK) {
4809                 if (mod == 3) {
4810                     goto illegal_op;
4811                 }
4812                 tcg_gen_movi_tl(s->T0, ~0);
4813                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4814                                             s->mem_index, ot | MO_LE);
4815             } else {
4816                 tcg_gen_not_tl(s->T0, s->T0);
4817                 if (mod != 3) {
4818                     gen_op_st_v(s, ot, s->T0, s->A0);
4819                 } else {
4820                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4821                 }
4822             }
4823             break;
4824         case 3: /* neg */
4825             if (s->prefix & PREFIX_LOCK) {
4826                 TCGLabel *label1;
4827                 TCGv a0, t0, t1, t2;
4828 
4829                 if (mod == 3) {
4830                     goto illegal_op;
4831                 }
4832                 a0 = tcg_temp_local_new();
4833                 t0 = tcg_temp_local_new();
4834                 label1 = gen_new_label();
4835 
4836                 tcg_gen_mov_tl(a0, s->A0);
4837                 tcg_gen_mov_tl(t0, s->T0);
4838 
4839                 gen_set_label(label1);
4840                 t1 = tcg_temp_new();
4841                 t2 = tcg_temp_new();
4842                 tcg_gen_mov_tl(t2, t0);
4843                 tcg_gen_neg_tl(t1, t0);
4844                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4845                                           s->mem_index, ot | MO_LE);
4846                 tcg_temp_free(t1);
4847                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4848 
4849                 tcg_temp_free(t2);
4850                 tcg_temp_free(a0);
4851                 tcg_gen_mov_tl(s->T0, t0);
4852                 tcg_temp_free(t0);
4853             } else {
4854                 tcg_gen_neg_tl(s->T0, s->T0);
4855                 if (mod != 3) {
4856                     gen_op_st_v(s, ot, s->T0, s->A0);
4857                 } else {
4858                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4859                 }
4860             }
4861             gen_op_update_neg_cc(s);
4862             set_cc_op(s, CC_OP_SUBB + ot);
4863             break;
4864         case 4: /* mul */
4865             switch(ot) {
4866             case MO_8:
4867                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4868                 tcg_gen_ext8u_tl(s->T0, s->T0);
4869                 tcg_gen_ext8u_tl(s->T1, s->T1);
4870                 /* XXX: use 32 bit mul which could be faster */
4871                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4872                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4873                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4874                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4875                 set_cc_op(s, CC_OP_MULB);
4876                 break;
4877             case MO_16:
4878                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4879                 tcg_gen_ext16u_tl(s->T0, s->T0);
4880                 tcg_gen_ext16u_tl(s->T1, s->T1);
4881                 /* XXX: use 32 bit mul which could be faster */
4882                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4883                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4884                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4885                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4886                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4887                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4888                 set_cc_op(s, CC_OP_MULW);
4889                 break;
4890             default:
4891             case MO_32:
4892                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4893                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4894                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4895                                   s->tmp2_i32, s->tmp3_i32);
4896                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4897                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4898                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4899                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4900                 set_cc_op(s, CC_OP_MULL);
4901                 break;
4902 #ifdef TARGET_X86_64
4903             case MO_64:
4904                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4905                                   s->T0, cpu_regs[R_EAX]);
4906                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4907                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4908                 set_cc_op(s, CC_OP_MULQ);
4909                 break;
4910 #endif
4911             }
4912             break;
4913         case 5: /* imul */
4914             switch(ot) {
4915             case MO_8:
4916                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4917                 tcg_gen_ext8s_tl(s->T0, s->T0);
4918                 tcg_gen_ext8s_tl(s->T1, s->T1);
4919                 /* XXX: use 32 bit mul which could be faster */
4920                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4921                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4922                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4923                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
4924                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4925                 set_cc_op(s, CC_OP_MULB);
4926                 break;
4927             case MO_16:
4928                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4929                 tcg_gen_ext16s_tl(s->T0, s->T0);
4930                 tcg_gen_ext16s_tl(s->T1, s->T1);
4931                 /* XXX: use 32 bit mul which could be faster */
4932                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4933                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4934                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4935                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
4936                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4937                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4938                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4939                 set_cc_op(s, CC_OP_MULW);
4940                 break;
4941             default:
4942             case MO_32:
4943                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4944                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4945                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
4946                                   s->tmp2_i32, s->tmp3_i32);
4947                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4948                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4949                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
4950                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4951                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
4952                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
4953                 set_cc_op(s, CC_OP_MULL);
4954                 break;
4955 #ifdef TARGET_X86_64
4956             case MO_64:
4957                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4958                                   s->T0, cpu_regs[R_EAX]);
4959                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4960                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4961                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4962                 set_cc_op(s, CC_OP_MULQ);
4963                 break;
4964 #endif
4965             }
4966             break;
4967         case 6: /* div */
4968             switch(ot) {
4969             case MO_8:
4970                 gen_helper_divb_AL(cpu_env, s->T0);
4971                 break;
4972             case MO_16:
4973                 gen_helper_divw_AX(cpu_env, s->T0);
4974                 break;
4975             default:
4976             case MO_32:
4977                 gen_helper_divl_EAX(cpu_env, s->T0);
4978                 break;
4979 #ifdef TARGET_X86_64
4980             case MO_64:
4981                 gen_helper_divq_EAX(cpu_env, s->T0);
4982                 break;
4983 #endif
4984             }
4985             break;
4986         case 7: /* idiv */
4987             switch(ot) {
4988             case MO_8:
4989                 gen_helper_idivb_AL(cpu_env, s->T0);
4990                 break;
4991             case MO_16:
4992                 gen_helper_idivw_AX(cpu_env, s->T0);
4993                 break;
4994             default:
4995             case MO_32:
4996                 gen_helper_idivl_EAX(cpu_env, s->T0);
4997                 break;
4998 #ifdef TARGET_X86_64
4999             case MO_64:
5000                 gen_helper_idivq_EAX(cpu_env, s->T0);
5001                 break;
5002 #endif
5003             }
5004             break;
5005         default:
5006             goto unknown_op;
5007         }
5008         break;
5009 
5010     case 0xfe: /* GRP4 */
5011     case 0xff: /* GRP5 */
5012         ot = mo_b_d(b, dflag);
5013 
5014         modrm = x86_ldub_code(env, s);
5015         mod = (modrm >> 6) & 3;
5016         rm = (modrm & 7) | REX_B(s);
5017         op = (modrm >> 3) & 7;
5018         if (op >= 2 && b == 0xfe) {
5019             goto unknown_op;
5020         }
5021         if (CODE64(s)) {
5022             if (op == 2 || op == 4) {
5023                 /* operand size for jumps is 64 bit */
5024                 ot = MO_64;
5025             } else if (op == 3 || op == 5) {
5026                 ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
5027             } else if (op == 6) {
5028                 /* default push size is 64 bit */
5029                 ot = mo_pushpop(s, dflag);
5030             }
5031         }
5032         if (mod != 3) {
5033             gen_lea_modrm(env, s, modrm);
5034             if (op >= 2 && op != 3 && op != 5)
5035                 gen_op_ld_v(s, ot, s->T0, s->A0);
5036         } else {
5037             gen_op_mov_v_reg(s, ot, s->T0, rm);
5038         }
5039 
5040         switch(op) {
5041         case 0: /* inc Ev */
5042             if (mod != 3)
5043                 opreg = OR_TMP0;
5044             else
5045                 opreg = rm;
5046             gen_inc(s, ot, opreg, 1);
5047             break;
5048         case 1: /* dec Ev */
5049             if (mod != 3)
5050                 opreg = OR_TMP0;
5051             else
5052                 opreg = rm;
5053             gen_inc(s, ot, opreg, -1);
5054             break;
5055         case 2: /* call Ev */
5056             /* XXX: optimize if memory (no 'and' is necessary) */
5057             if (dflag == MO_16) {
5058                 tcg_gen_ext16u_tl(s->T0, s->T0);
5059             }
5060             next_eip = s->pc - s->cs_base;
5061             tcg_gen_movi_tl(s->T1, next_eip);
5062             gen_push_v(s, s->T1);
5063             gen_op_jmp_v(s->T0);
5064             gen_bnd_jmp(s);
5065             gen_jr(s, s->T0);
5066             break;
5067         case 3: /* lcall Ev */
5068             if (mod == 3) {
5069                 goto illegal_op;
5070             }
5071             gen_op_ld_v(s, ot, s->T1, s->A0);
5072             gen_add_A0_im(s, 1 << ot);
5073             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5074         do_lcall:
5075             if (s->pe && !s->vm86) {
5076                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5077                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5078                                            tcg_const_i32(dflag - 1),
5079                                            tcg_const_tl(s->pc - s->cs_base));
5080             } else {
5081                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5082                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5083                                       tcg_const_i32(dflag - 1),
5084                                       tcg_const_i32(s->pc - s->cs_base));
5085             }
5086             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5087             gen_jr(s, s->tmp4);
5088             break;
5089         case 4: /* jmp Ev */
5090             if (dflag == MO_16) {
5091                 tcg_gen_ext16u_tl(s->T0, s->T0);
5092             }
5093             gen_op_jmp_v(s->T0);
5094             gen_bnd_jmp(s);
5095             gen_jr(s, s->T0);
5096             break;
5097         case 5: /* ljmp Ev */
5098             if (mod == 3) {
5099                 goto illegal_op;
5100             }
5101             gen_op_ld_v(s, ot, s->T1, s->A0);
5102             gen_add_A0_im(s, 1 << ot);
5103             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5104         do_ljmp:
5105             if (s->pe && !s->vm86) {
5106                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5107                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5108                                           tcg_const_tl(s->pc - s->cs_base));
5109             } else {
5110                 gen_op_movl_seg_T0_vm(s, R_CS);
5111                 gen_op_jmp_v(s->T1);
5112             }
5113             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5114             gen_jr(s, s->tmp4);
5115             break;
5116         case 6: /* push Ev */
5117             gen_push_v(s, s->T0);
5118             break;
5119         default:
5120             goto unknown_op;
5121         }
5122         break;
5123 
5124     case 0x84: /* test Ev, Gv */
5125     case 0x85:
5126         ot = mo_b_d(b, dflag);
5127 
5128         modrm = x86_ldub_code(env, s);
5129         reg = ((modrm >> 3) & 7) | rex_r;
5130 
5131         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5132         gen_op_mov_v_reg(s, ot, s->T1, reg);
5133         gen_op_testl_T0_T1_cc(s);
5134         set_cc_op(s, CC_OP_LOGICB + ot);
5135         break;
5136 
5137     case 0xa8: /* test eAX, Iv */
5138     case 0xa9:
5139         ot = mo_b_d(b, dflag);
5140         val = insn_get(env, s, ot);
5141 
5142         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5143         tcg_gen_movi_tl(s->T1, val);
5144         gen_op_testl_T0_T1_cc(s);
5145         set_cc_op(s, CC_OP_LOGICB + ot);
5146         break;
5147 
5148     case 0x98: /* CWDE/CBW */
5149         switch (dflag) {
5150 #ifdef TARGET_X86_64
5151         case MO_64:
5152             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5153             tcg_gen_ext32s_tl(s->T0, s->T0);
5154             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5155             break;
5156 #endif
5157         case MO_32:
5158             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5159             tcg_gen_ext16s_tl(s->T0, s->T0);
5160             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5161             break;
5162         case MO_16:
5163             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5164             tcg_gen_ext8s_tl(s->T0, s->T0);
5165             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5166             break;
5167         default:
5168             tcg_abort();
5169         }
5170         break;
5171     case 0x99: /* CDQ/CWD */
5172         switch (dflag) {
5173 #ifdef TARGET_X86_64
5174         case MO_64:
5175             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5176             tcg_gen_sari_tl(s->T0, s->T0, 63);
5177             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5178             break;
5179 #endif
5180         case MO_32:
5181             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5182             tcg_gen_ext32s_tl(s->T0, s->T0);
5183             tcg_gen_sari_tl(s->T0, s->T0, 31);
5184             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5185             break;
5186         case MO_16:
5187             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5188             tcg_gen_ext16s_tl(s->T0, s->T0);
5189             tcg_gen_sari_tl(s->T0, s->T0, 15);
5190             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5191             break;
5192         default:
5193             tcg_abort();
5194         }
5195         break;
5196     case 0x1af: /* imul Gv, Ev */
5197     case 0x69: /* imul Gv, Ev, I */
5198     case 0x6b:
5199         ot = dflag;
5200         modrm = x86_ldub_code(env, s);
5201         reg = ((modrm >> 3) & 7) | rex_r;
5202         if (b == 0x69)
5203             s->rip_offset = insn_const_size(ot);
5204         else if (b == 0x6b)
5205             s->rip_offset = 1;
5206         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5207         if (b == 0x69) {
5208             val = insn_get(env, s, ot);
5209             tcg_gen_movi_tl(s->T1, val);
5210         } else if (b == 0x6b) {
5211             val = (int8_t)insn_get(env, s, MO_8);
5212             tcg_gen_movi_tl(s->T1, val);
5213         } else {
5214             gen_op_mov_v_reg(s, ot, s->T1, reg);
5215         }
5216         switch (ot) {
5217 #ifdef TARGET_X86_64
5218         case MO_64:
5219             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5220             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5221             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5222             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5223             break;
5224 #endif
5225         case MO_32:
5226             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5227             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5228             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5229                               s->tmp2_i32, s->tmp3_i32);
5230             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5231             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5232             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5233             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5234             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5235             break;
5236         default:
5237             tcg_gen_ext16s_tl(s->T0, s->T0);
5238             tcg_gen_ext16s_tl(s->T1, s->T1);
5239             /* XXX: use 32 bit mul which could be faster */
5240             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5241             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5242             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5243             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5244             gen_op_mov_reg_v(s, ot, reg, s->T0);
5245             break;
5246         }
5247         set_cc_op(s, CC_OP_MULB + ot);
5248         break;
5249     case 0x1c0:
5250     case 0x1c1: /* xadd Ev, Gv */
5251         ot = mo_b_d(b, dflag);
5252         modrm = x86_ldub_code(env, s);
5253         reg = ((modrm >> 3) & 7) | rex_r;
5254         mod = (modrm >> 6) & 3;
5255         gen_op_mov_v_reg(s, ot, s->T0, reg);
5256         if (mod == 3) {
5257             rm = (modrm & 7) | REX_B(s);
5258             gen_op_mov_v_reg(s, ot, s->T1, rm);
5259             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5260             gen_op_mov_reg_v(s, ot, reg, s->T1);
5261             gen_op_mov_reg_v(s, ot, rm, s->T0);
5262         } else {
5263             gen_lea_modrm(env, s, modrm);
5264             if (s->prefix & PREFIX_LOCK) {
5265                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5266                                             s->mem_index, ot | MO_LE);
5267                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5268             } else {
5269                 gen_op_ld_v(s, ot, s->T1, s->A0);
5270                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5271                 gen_op_st_v(s, ot, s->T0, s->A0);
5272             }
5273             gen_op_mov_reg_v(s, ot, reg, s->T1);
5274         }
5275         gen_op_update2_cc(s);
5276         set_cc_op(s, CC_OP_ADDB + ot);
5277         break;
5278     case 0x1b0:
5279     case 0x1b1: /* cmpxchg Ev, Gv */
5280         {
5281             TCGv oldv, newv, cmpv;
5282 
5283             ot = mo_b_d(b, dflag);
5284             modrm = x86_ldub_code(env, s);
5285             reg = ((modrm >> 3) & 7) | rex_r;
5286             mod = (modrm >> 6) & 3;
5287             oldv = tcg_temp_new();
5288             newv = tcg_temp_new();
5289             cmpv = tcg_temp_new();
5290             gen_op_mov_v_reg(s, ot, newv, reg);
5291             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5292 
5293             if (s->prefix & PREFIX_LOCK) {
5294                 if (mod == 3) {
5295                     goto illegal_op;
5296                 }
5297                 gen_lea_modrm(env, s, modrm);
5298                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5299                                           s->mem_index, ot | MO_LE);
5300                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5301             } else {
5302                 if (mod == 3) {
5303                     rm = (modrm & 7) | REX_B(s);
5304                     gen_op_mov_v_reg(s, ot, oldv, rm);
5305                 } else {
5306                     gen_lea_modrm(env, s, modrm);
5307                     gen_op_ld_v(s, ot, oldv, s->A0);
5308                     rm = 0; /* avoid warning */
5309                 }
5310                 gen_extu(ot, oldv);
5311                 gen_extu(ot, cmpv);
5312                 /* store value = (old == cmp ? new : old);  */
5313                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5314                 if (mod == 3) {
5315                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5316                     gen_op_mov_reg_v(s, ot, rm, newv);
5317                 } else {
5318                     /* Perform an unconditional store cycle like physical cpu;
5319                        must be before changing accumulator to ensure
5320                        idempotency if the store faults and the instruction
5321                        is restarted */
5322                     gen_op_st_v(s, ot, newv, s->A0);
5323                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5324                 }
5325             }
5326             tcg_gen_mov_tl(cpu_cc_src, oldv);
5327             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5328             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5329             set_cc_op(s, CC_OP_SUBB + ot);
5330             tcg_temp_free(oldv);
5331             tcg_temp_free(newv);
5332             tcg_temp_free(cmpv);
5333         }
5334         break;
5335     case 0x1c7: /* cmpxchg8b */
5336         modrm = x86_ldub_code(env, s);
5337         mod = (modrm >> 6) & 3;
5338         switch ((modrm >> 3) & 7) {
5339         case 1: /* CMPXCHG8, CMPXCHG16 */
5340             if (mod == 3) {
5341                 goto illegal_op;
5342             }
5343 #ifdef TARGET_X86_64
5344             if (dflag == MO_64) {
5345                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5346                     goto illegal_op;
5347                 }
5348                 gen_lea_modrm(env, s, modrm);
5349                 if ((s->prefix & PREFIX_LOCK) &&
5350                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5351                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5352                 } else {
5353                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5354                 }
5355                 set_cc_op(s, CC_OP_EFLAGS);
5356                 break;
5357             }
5358 #endif
5359             if (!(s->cpuid_features & CPUID_CX8)) {
5360                 goto illegal_op;
5361             }
5362             gen_lea_modrm(env, s, modrm);
5363             if ((s->prefix & PREFIX_LOCK) &&
5364                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5365                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5366             } else {
5367                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5368             }
5369             set_cc_op(s, CC_OP_EFLAGS);
5370             break;
5371 
5372         case 7: /* RDSEED */
5373         case 6: /* RDRAND */
5374             if (mod != 3 ||
5375                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5376                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5377                 goto illegal_op;
5378             }
5379             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5380                 gen_io_start();
5381             }
5382             gen_helper_rdrand(s->T0, cpu_env);
5383             rm = (modrm & 7) | REX_B(s);
5384             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5385             set_cc_op(s, CC_OP_EFLAGS);
5386             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5387                 gen_jmp(s, s->pc - s->cs_base);
5388             }
5389             break;
5390 
5391         default:
5392             goto illegal_op;
5393         }
5394         break;
5395 
5396         /**************************/
5397         /* push/pop */
5398     case 0x50 ... 0x57: /* push */
5399         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5400         gen_push_v(s, s->T0);
5401         break;
5402     case 0x58 ... 0x5f: /* pop */
5403         ot = gen_pop_T0(s);
5404         /* NOTE: order is important for pop %sp */
5405         gen_pop_update(s, ot);
5406         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5407         break;
5408     case 0x60: /* pusha */
5409         if (CODE64(s))
5410             goto illegal_op;
5411         gen_pusha(s);
5412         break;
5413     case 0x61: /* popa */
5414         if (CODE64(s))
5415             goto illegal_op;
5416         gen_popa(s);
5417         break;
5418     case 0x68: /* push Iv */
5419     case 0x6a:
5420         ot = mo_pushpop(s, dflag);
5421         if (b == 0x68)
5422             val = insn_get(env, s, ot);
5423         else
5424             val = (int8_t)insn_get(env, s, MO_8);
5425         tcg_gen_movi_tl(s->T0, val);
5426         gen_push_v(s, s->T0);
5427         break;
5428     case 0x8f: /* pop Ev */
5429         modrm = x86_ldub_code(env, s);
5430         mod = (modrm >> 6) & 3;
5431         ot = gen_pop_T0(s);
5432         if (mod == 3) {
5433             /* NOTE: order is important for pop %sp */
5434             gen_pop_update(s, ot);
5435             rm = (modrm & 7) | REX_B(s);
5436             gen_op_mov_reg_v(s, ot, rm, s->T0);
5437         } else {
5438             /* NOTE: order is important too for MMU exceptions */
5439             s->popl_esp_hack = 1 << ot;
5440             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5441             s->popl_esp_hack = 0;
5442             gen_pop_update(s, ot);
5443         }
5444         break;
5445     case 0xc8: /* enter */
5446         {
5447             int level;
5448             val = x86_lduw_code(env, s);
5449             level = x86_ldub_code(env, s);
5450             gen_enter(s, val, level);
5451         }
5452         break;
5453     case 0xc9: /* leave */
5454         gen_leave(s);
5455         break;
5456     case 0x06: /* push es */
5457     case 0x0e: /* push cs */
5458     case 0x16: /* push ss */
5459     case 0x1e: /* push ds */
5460         if (CODE64(s))
5461             goto illegal_op;
5462         gen_op_movl_T0_seg(s, b >> 3);
5463         gen_push_v(s, s->T0);
5464         break;
5465     case 0x1a0: /* push fs */
5466     case 0x1a8: /* push gs */
5467         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5468         gen_push_v(s, s->T0);
5469         break;
5470     case 0x07: /* pop es */
5471     case 0x17: /* pop ss */
5472     case 0x1f: /* pop ds */
5473         if (CODE64(s))
5474             goto illegal_op;
5475         reg = b >> 3;
5476         ot = gen_pop_T0(s);
5477         gen_movl_seg_T0(s, reg);
5478         gen_pop_update(s, ot);
5479         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5480         if (s->base.is_jmp) {
5481             gen_jmp_im(s, s->pc - s->cs_base);
5482             if (reg == R_SS) {
5483                 s->tf = 0;
5484                 gen_eob_inhibit_irq(s, true);
5485             } else {
5486                 gen_eob(s);
5487             }
5488         }
5489         break;
5490     case 0x1a1: /* pop fs */
5491     case 0x1a9: /* pop gs */
5492         ot = gen_pop_T0(s);
5493         gen_movl_seg_T0(s, (b >> 3) & 7);
5494         gen_pop_update(s, ot);
5495         if (s->base.is_jmp) {
5496             gen_jmp_im(s, s->pc - s->cs_base);
5497             gen_eob(s);
5498         }
5499         break;
5500 
5501         /**************************/
5502         /* mov */
5503     case 0x88:
5504     case 0x89: /* mov Gv, Ev */
5505         ot = mo_b_d(b, dflag);
5506         modrm = x86_ldub_code(env, s);
5507         reg = ((modrm >> 3) & 7) | rex_r;
5508 
5509         /* generate a generic store */
5510         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5511         break;
5512     case 0xc6:
5513     case 0xc7: /* mov Ev, Iv */
5514         ot = mo_b_d(b, dflag);
5515         modrm = x86_ldub_code(env, s);
5516         mod = (modrm >> 6) & 3;
5517         if (mod != 3) {
5518             s->rip_offset = insn_const_size(ot);
5519             gen_lea_modrm(env, s, modrm);
5520         }
5521         val = insn_get(env, s, ot);
5522         tcg_gen_movi_tl(s->T0, val);
5523         if (mod != 3) {
5524             gen_op_st_v(s, ot, s->T0, s->A0);
5525         } else {
5526             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5527         }
5528         break;
5529     case 0x8a:
5530     case 0x8b: /* mov Ev, Gv */
5531         ot = mo_b_d(b, dflag);
5532         modrm = x86_ldub_code(env, s);
5533         reg = ((modrm >> 3) & 7) | rex_r;
5534 
5535         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5536         gen_op_mov_reg_v(s, ot, reg, s->T0);
5537         break;
5538     case 0x8e: /* mov seg, Gv */
5539         modrm = x86_ldub_code(env, s);
5540         reg = (modrm >> 3) & 7;
5541         if (reg >= 6 || reg == R_CS)
5542             goto illegal_op;
5543         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5544         gen_movl_seg_T0(s, reg);
5545         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5546         if (s->base.is_jmp) {
5547             gen_jmp_im(s, s->pc - s->cs_base);
5548             if (reg == R_SS) {
5549                 s->tf = 0;
5550                 gen_eob_inhibit_irq(s, true);
5551             } else {
5552                 gen_eob(s);
5553             }
5554         }
5555         break;
5556     case 0x8c: /* mov Gv, seg */
5557         modrm = x86_ldub_code(env, s);
5558         reg = (modrm >> 3) & 7;
5559         mod = (modrm >> 6) & 3;
5560         if (reg >= 6)
5561             goto illegal_op;
5562         gen_op_movl_T0_seg(s, reg);
5563         ot = mod == 3 ? dflag : MO_16;
5564         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5565         break;
5566 
5567     case 0x1b6: /* movzbS Gv, Eb */
5568     case 0x1b7: /* movzwS Gv, Eb */
5569     case 0x1be: /* movsbS Gv, Eb */
5570     case 0x1bf: /* movswS Gv, Eb */
5571         {
5572             MemOp d_ot;
5573             MemOp s_ot;
5574 
5575             /* d_ot is the size of destination */
5576             d_ot = dflag;
5577             /* ot is the size of source */
5578             ot = (b & 1) + MO_8;
5579             /* s_ot is the sign+size of source */
5580             s_ot = b & 8 ? MO_SIGN | ot : ot;
5581 
5582             modrm = x86_ldub_code(env, s);
5583             reg = ((modrm >> 3) & 7) | rex_r;
5584             mod = (modrm >> 6) & 3;
5585             rm = (modrm & 7) | REX_B(s);
5586 
5587             if (mod == 3) {
5588                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5589                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5590                 } else {
5591                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5592                     switch (s_ot) {
5593                     case MO_UB:
5594                         tcg_gen_ext8u_tl(s->T0, s->T0);
5595                         break;
5596                     case MO_SB:
5597                         tcg_gen_ext8s_tl(s->T0, s->T0);
5598                         break;
5599                     case MO_UW:
5600                         tcg_gen_ext16u_tl(s->T0, s->T0);
5601                         break;
5602                     default:
5603                     case MO_SW:
5604                         tcg_gen_ext16s_tl(s->T0, s->T0);
5605                         break;
5606                     }
5607                 }
5608                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5609             } else {
5610                 gen_lea_modrm(env, s, modrm);
5611                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5612                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5613             }
5614         }
5615         break;
5616 
5617     case 0x8d: /* lea */
5618         modrm = x86_ldub_code(env, s);
5619         mod = (modrm >> 6) & 3;
5620         if (mod == 3)
5621             goto illegal_op;
5622         reg = ((modrm >> 3) & 7) | rex_r;
5623         {
5624             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5625             TCGv ea = gen_lea_modrm_1(s, a);
5626             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5627             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5628         }
5629         break;
5630 
5631     case 0xa0: /* mov EAX, Ov */
5632     case 0xa1:
5633     case 0xa2: /* mov Ov, EAX */
5634     case 0xa3:
5635         {
5636             target_ulong offset_addr;
5637 
5638             ot = mo_b_d(b, dflag);
5639             switch (s->aflag) {
5640 #ifdef TARGET_X86_64
5641             case MO_64:
5642                 offset_addr = x86_ldq_code(env, s);
5643                 break;
5644 #endif
5645             default:
5646                 offset_addr = insn_get(env, s, s->aflag);
5647                 break;
5648             }
5649             tcg_gen_movi_tl(s->A0, offset_addr);
5650             gen_add_A0_ds_seg(s);
5651             if ((b & 2) == 0) {
5652                 gen_op_ld_v(s, ot, s->T0, s->A0);
5653                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5654             } else {
5655                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5656                 gen_op_st_v(s, ot, s->T0, s->A0);
5657             }
5658         }
5659         break;
5660     case 0xd7: /* xlat */
5661         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5662         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5663         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5664         gen_extu(s->aflag, s->A0);
5665         gen_add_A0_ds_seg(s);
5666         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5667         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5668         break;
5669     case 0xb0 ... 0xb7: /* mov R, Ib */
5670         val = insn_get(env, s, MO_8);
5671         tcg_gen_movi_tl(s->T0, val);
5672         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5673         break;
5674     case 0xb8 ... 0xbf: /* mov R, Iv */
5675 #ifdef TARGET_X86_64
5676         if (dflag == MO_64) {
5677             uint64_t tmp;
5678             /* 64 bit case */
5679             tmp = x86_ldq_code(env, s);
5680             reg = (b & 7) | REX_B(s);
5681             tcg_gen_movi_tl(s->T0, tmp);
5682             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5683         } else
5684 #endif
5685         {
5686             ot = dflag;
5687             val = insn_get(env, s, ot);
5688             reg = (b & 7) | REX_B(s);
5689             tcg_gen_movi_tl(s->T0, val);
5690             gen_op_mov_reg_v(s, ot, reg, s->T0);
5691         }
5692         break;
5693 
5694     case 0x91 ... 0x97: /* xchg R, EAX */
5695     do_xchg_reg_eax:
5696         ot = dflag;
5697         reg = (b & 7) | REX_B(s);
5698         rm = R_EAX;
5699         goto do_xchg_reg;
5700     case 0x86:
5701     case 0x87: /* xchg Ev, Gv */
5702         ot = mo_b_d(b, dflag);
5703         modrm = x86_ldub_code(env, s);
5704         reg = ((modrm >> 3) & 7) | rex_r;
5705         mod = (modrm >> 6) & 3;
5706         if (mod == 3) {
5707             rm = (modrm & 7) | REX_B(s);
5708         do_xchg_reg:
5709             gen_op_mov_v_reg(s, ot, s->T0, reg);
5710             gen_op_mov_v_reg(s, ot, s->T1, rm);
5711             gen_op_mov_reg_v(s, ot, rm, s->T0);
5712             gen_op_mov_reg_v(s, ot, reg, s->T1);
5713         } else {
5714             gen_lea_modrm(env, s, modrm);
5715             gen_op_mov_v_reg(s, ot, s->T0, reg);
5716             /* for xchg, lock is implicit */
5717             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5718                                    s->mem_index, ot | MO_LE);
5719             gen_op_mov_reg_v(s, ot, reg, s->T1);
5720         }
5721         break;
5722     case 0xc4: /* les Gv */
5723         /* In CODE64 this is VEX3; see above.  */
5724         op = R_ES;
5725         goto do_lxx;
5726     case 0xc5: /* lds Gv */
5727         /* In CODE64 this is VEX2; see above.  */
5728         op = R_DS;
5729         goto do_lxx;
5730     case 0x1b2: /* lss Gv */
5731         op = R_SS;
5732         goto do_lxx;
5733     case 0x1b4: /* lfs Gv */
5734         op = R_FS;
5735         goto do_lxx;
5736     case 0x1b5: /* lgs Gv */
5737         op = R_GS;
5738     do_lxx:
5739         ot = dflag != MO_16 ? MO_32 : MO_16;
5740         modrm = x86_ldub_code(env, s);
5741         reg = ((modrm >> 3) & 7) | rex_r;
5742         mod = (modrm >> 6) & 3;
5743         if (mod == 3)
5744             goto illegal_op;
5745         gen_lea_modrm(env, s, modrm);
5746         gen_op_ld_v(s, ot, s->T1, s->A0);
5747         gen_add_A0_im(s, 1 << ot);
5748         /* load the segment first to handle exceptions properly */
5749         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5750         gen_movl_seg_T0(s, op);
5751         /* then put the data */
5752         gen_op_mov_reg_v(s, ot, reg, s->T1);
5753         if (s->base.is_jmp) {
5754             gen_jmp_im(s, s->pc - s->cs_base);
5755             gen_eob(s);
5756         }
5757         break;
5758 
5759         /************************/
5760         /* shifts */
5761     case 0xc0:
5762     case 0xc1:
5763         /* shift Ev,Ib */
5764         shift = 2;
5765     grp2:
5766         {
5767             ot = mo_b_d(b, dflag);
5768             modrm = x86_ldub_code(env, s);
5769             mod = (modrm >> 6) & 3;
5770             op = (modrm >> 3) & 7;
5771 
5772             if (mod != 3) {
5773                 if (shift == 2) {
5774                     s->rip_offset = 1;
5775                 }
5776                 gen_lea_modrm(env, s, modrm);
5777                 opreg = OR_TMP0;
5778             } else {
5779                 opreg = (modrm & 7) | REX_B(s);
5780             }
5781 
5782             /* simpler op */
5783             if (shift == 0) {
5784                 gen_shift(s, op, ot, opreg, OR_ECX);
5785             } else {
5786                 if (shift == 2) {
5787                     shift = x86_ldub_code(env, s);
5788                 }
5789                 gen_shifti(s, op, ot, opreg, shift);
5790             }
5791         }
5792         break;
5793     case 0xd0:
5794     case 0xd1:
5795         /* shift Ev,1 */
5796         shift = 1;
5797         goto grp2;
5798     case 0xd2:
5799     case 0xd3:
5800         /* shift Ev,cl */
5801         shift = 0;
5802         goto grp2;
5803 
5804     case 0x1a4: /* shld imm */
5805         op = 0;
5806         shift = 1;
5807         goto do_shiftd;
5808     case 0x1a5: /* shld cl */
5809         op = 0;
5810         shift = 0;
5811         goto do_shiftd;
5812     case 0x1ac: /* shrd imm */
5813         op = 1;
5814         shift = 1;
5815         goto do_shiftd;
5816     case 0x1ad: /* shrd cl */
5817         op = 1;
5818         shift = 0;
5819     do_shiftd:
5820         ot = dflag;
5821         modrm = x86_ldub_code(env, s);
5822         mod = (modrm >> 6) & 3;
5823         rm = (modrm & 7) | REX_B(s);
5824         reg = ((modrm >> 3) & 7) | rex_r;
5825         if (mod != 3) {
5826             gen_lea_modrm(env, s, modrm);
5827             opreg = OR_TMP0;
5828         } else {
5829             opreg = rm;
5830         }
5831         gen_op_mov_v_reg(s, ot, s->T1, reg);
5832 
5833         if (shift) {
5834             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5835             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5836             tcg_temp_free(imm);
5837         } else {
5838             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5839         }
5840         break;
5841 
5842         /************************/
5843         /* floats */
5844     case 0xd8 ... 0xdf:
5845         if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5846             /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5847             /* XXX: what to do if illegal op ? */
5848             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5849             break;
5850         }
5851         modrm = x86_ldub_code(env, s);
5852         mod = (modrm >> 6) & 3;
5853         rm = modrm & 7;
5854         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5855         if (mod != 3) {
5856             /* memory op */
5857             gen_lea_modrm(env, s, modrm);
5858             switch(op) {
5859             case 0x00 ... 0x07: /* fxxxs */
5860             case 0x10 ... 0x17: /* fixxxl */
5861             case 0x20 ... 0x27: /* fxxxl */
5862             case 0x30 ... 0x37: /* fixxx */
5863                 {
5864                     int op1;
5865                     op1 = op & 7;
5866 
5867                     switch(op >> 4) {
5868                     case 0:
5869                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5870                                             s->mem_index, MO_LEUL);
5871                         gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5872                         break;
5873                     case 1:
5874                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5875                                             s->mem_index, MO_LEUL);
5876                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5877                         break;
5878                     case 2:
5879                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5880                                             s->mem_index, MO_LEQ);
5881                         gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5882                         break;
5883                     case 3:
5884                     default:
5885                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5886                                             s->mem_index, MO_LESW);
5887                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5888                         break;
5889                     }
5890 
5891                     gen_helper_fp_arith_ST0_FT0(op1);
5892                     if (op1 == 3) {
5893                         /* fcomp needs pop */
5894                         gen_helper_fpop(cpu_env);
5895                     }
5896                 }
5897                 break;
5898             case 0x08: /* flds */
5899             case 0x0a: /* fsts */
5900             case 0x0b: /* fstps */
5901             case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5902             case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5903             case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5904                 switch(op & 7) {
5905                 case 0:
5906                     switch(op >> 4) {
5907                     case 0:
5908                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5909                                             s->mem_index, MO_LEUL);
5910                         gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5911                         break;
5912                     case 1:
5913                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5914                                             s->mem_index, MO_LEUL);
5915                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5916                         break;
5917                     case 2:
5918                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5919                                             s->mem_index, MO_LEQ);
5920                         gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5921                         break;
5922                     case 3:
5923                     default:
5924                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5925                                             s->mem_index, MO_LESW);
5926                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5927                         break;
5928                     }
5929                     break;
5930                 case 1:
5931                     /* XXX: the corresponding CPUID bit must be tested ! */
5932                     switch(op >> 4) {
5933                     case 1:
5934                         gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
5935                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5936                                             s->mem_index, MO_LEUL);
5937                         break;
5938                     case 2:
5939                         gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
5940                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5941                                             s->mem_index, MO_LEQ);
5942                         break;
5943                     case 3:
5944                     default:
5945                         gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
5946                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5947                                             s->mem_index, MO_LEUW);
5948                         break;
5949                     }
5950                     gen_helper_fpop(cpu_env);
5951                     break;
5952                 default:
5953                     switch(op >> 4) {
5954                     case 0:
5955                         gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
5956                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5957                                             s->mem_index, MO_LEUL);
5958                         break;
5959                     case 1:
5960                         gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
5961                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5962                                             s->mem_index, MO_LEUL);
5963                         break;
5964                     case 2:
5965                         gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
5966                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5967                                             s->mem_index, MO_LEQ);
5968                         break;
5969                     case 3:
5970                     default:
5971                         gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
5972                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5973                                             s->mem_index, MO_LEUW);
5974                         break;
5975                     }
5976                     if ((op & 7) == 3)
5977                         gen_helper_fpop(cpu_env);
5978                     break;
5979                 }
5980                 break;
5981             case 0x0c: /* fldenv mem */
5982                 gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5983                 break;
5984             case 0x0d: /* fldcw mem */
5985                 tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5986                                     s->mem_index, MO_LEUW);
5987                 gen_helper_fldcw(cpu_env, s->tmp2_i32);
5988                 break;
5989             case 0x0e: /* fnstenv mem */
5990                 gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5991                 break;
5992             case 0x0f: /* fnstcw mem */
5993                 gen_helper_fnstcw(s->tmp2_i32, cpu_env);
5994                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5995                                     s->mem_index, MO_LEUW);
5996                 break;
5997             case 0x1d: /* fldt mem */
5998                 gen_helper_fldt_ST0(cpu_env, s->A0);
5999                 break;
6000             case 0x1f: /* fstpt mem */
6001                 gen_helper_fstt_ST0(cpu_env, s->A0);
6002                 gen_helper_fpop(cpu_env);
6003                 break;
6004             case 0x2c: /* frstor mem */
6005                 gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6006                 break;
6007             case 0x2e: /* fnsave mem */
6008                 gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6009                 break;
6010             case 0x2f: /* fnstsw mem */
6011                 gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6012                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6013                                     s->mem_index, MO_LEUW);
6014                 break;
6015             case 0x3c: /* fbld */
6016                 gen_helper_fbld_ST0(cpu_env, s->A0);
6017                 break;
6018             case 0x3e: /* fbstp */
6019                 gen_helper_fbst_ST0(cpu_env, s->A0);
6020                 gen_helper_fpop(cpu_env);
6021                 break;
6022             case 0x3d: /* fildll */
6023                 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6024                 gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6025                 break;
6026             case 0x3f: /* fistpll */
6027                 gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6028                 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6029                 gen_helper_fpop(cpu_env);
6030                 break;
6031             default:
6032                 goto unknown_op;
6033             }
6034         } else {
6035             /* register float ops */
6036             opreg = rm;
6037 
6038             switch(op) {
6039             case 0x08: /* fld sti */
6040                 gen_helper_fpush(cpu_env);
6041                 gen_helper_fmov_ST0_STN(cpu_env,
6042                                         tcg_const_i32((opreg + 1) & 7));
6043                 break;
6044             case 0x09: /* fxchg sti */
6045             case 0x29: /* fxchg4 sti, undocumented op */
6046             case 0x39: /* fxchg7 sti, undocumented op */
6047                 gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6048                 break;
6049             case 0x0a: /* grp d9/2 */
6050                 switch(rm) {
6051                 case 0: /* fnop */
6052                     /* check exceptions (FreeBSD FPU probe) */
6053                     gen_helper_fwait(cpu_env);
6054                     break;
6055                 default:
6056                     goto unknown_op;
6057                 }
6058                 break;
6059             case 0x0c: /* grp d9/4 */
6060                 switch(rm) {
6061                 case 0: /* fchs */
6062                     gen_helper_fchs_ST0(cpu_env);
6063                     break;
6064                 case 1: /* fabs */
6065                     gen_helper_fabs_ST0(cpu_env);
6066                     break;
6067                 case 4: /* ftst */
6068                     gen_helper_fldz_FT0(cpu_env);
6069                     gen_helper_fcom_ST0_FT0(cpu_env);
6070                     break;
6071                 case 5: /* fxam */
6072                     gen_helper_fxam_ST0(cpu_env);
6073                     break;
6074                 default:
6075                     goto unknown_op;
6076                 }
6077                 break;
6078             case 0x0d: /* grp d9/5 */
6079                 {
6080                     switch(rm) {
6081                     case 0:
6082                         gen_helper_fpush(cpu_env);
6083                         gen_helper_fld1_ST0(cpu_env);
6084                         break;
6085                     case 1:
6086                         gen_helper_fpush(cpu_env);
6087                         gen_helper_fldl2t_ST0(cpu_env);
6088                         break;
6089                     case 2:
6090                         gen_helper_fpush(cpu_env);
6091                         gen_helper_fldl2e_ST0(cpu_env);
6092                         break;
6093                     case 3:
6094                         gen_helper_fpush(cpu_env);
6095                         gen_helper_fldpi_ST0(cpu_env);
6096                         break;
6097                     case 4:
6098                         gen_helper_fpush(cpu_env);
6099                         gen_helper_fldlg2_ST0(cpu_env);
6100                         break;
6101                     case 5:
6102                         gen_helper_fpush(cpu_env);
6103                         gen_helper_fldln2_ST0(cpu_env);
6104                         break;
6105                     case 6:
6106                         gen_helper_fpush(cpu_env);
6107                         gen_helper_fldz_ST0(cpu_env);
6108                         break;
6109                     default:
6110                         goto unknown_op;
6111                     }
6112                 }
6113                 break;
6114             case 0x0e: /* grp d9/6 */
6115                 switch(rm) {
6116                 case 0: /* f2xm1 */
6117                     gen_helper_f2xm1(cpu_env);
6118                     break;
6119                 case 1: /* fyl2x */
6120                     gen_helper_fyl2x(cpu_env);
6121                     break;
6122                 case 2: /* fptan */
6123                     gen_helper_fptan(cpu_env);
6124                     break;
6125                 case 3: /* fpatan */
6126                     gen_helper_fpatan(cpu_env);
6127                     break;
6128                 case 4: /* fxtract */
6129                     gen_helper_fxtract(cpu_env);
6130                     break;
6131                 case 5: /* fprem1 */
6132                     gen_helper_fprem1(cpu_env);
6133                     break;
6134                 case 6: /* fdecstp */
6135                     gen_helper_fdecstp(cpu_env);
6136                     break;
6137                 default:
6138                 case 7: /* fincstp */
6139                     gen_helper_fincstp(cpu_env);
6140                     break;
6141                 }
6142                 break;
6143             case 0x0f: /* grp d9/7 */
6144                 switch(rm) {
6145                 case 0: /* fprem */
6146                     gen_helper_fprem(cpu_env);
6147                     break;
6148                 case 1: /* fyl2xp1 */
6149                     gen_helper_fyl2xp1(cpu_env);
6150                     break;
6151                 case 2: /* fsqrt */
6152                     gen_helper_fsqrt(cpu_env);
6153                     break;
6154                 case 3: /* fsincos */
6155                     gen_helper_fsincos(cpu_env);
6156                     break;
6157                 case 5: /* fscale */
6158                     gen_helper_fscale(cpu_env);
6159                     break;
6160                 case 4: /* frndint */
6161                     gen_helper_frndint(cpu_env);
6162                     break;
6163                 case 6: /* fsin */
6164                     gen_helper_fsin(cpu_env);
6165                     break;
6166                 default:
6167                 case 7: /* fcos */
6168                     gen_helper_fcos(cpu_env);
6169                     break;
6170                 }
6171                 break;
6172             case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6173             case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6174             case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6175                 {
6176                     int op1;
6177 
6178                     op1 = op & 7;
6179                     if (op >= 0x20) {
6180                         gen_helper_fp_arith_STN_ST0(op1, opreg);
6181                         if (op >= 0x30)
6182                             gen_helper_fpop(cpu_env);
6183                     } else {
6184                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6185                         gen_helper_fp_arith_ST0_FT0(op1);
6186                     }
6187                 }
6188                 break;
6189             case 0x02: /* fcom */
6190             case 0x22: /* fcom2, undocumented op */
6191                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6192                 gen_helper_fcom_ST0_FT0(cpu_env);
6193                 break;
6194             case 0x03: /* fcomp */
6195             case 0x23: /* fcomp3, undocumented op */
6196             case 0x32: /* fcomp5, undocumented op */
6197                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6198                 gen_helper_fcom_ST0_FT0(cpu_env);
6199                 gen_helper_fpop(cpu_env);
6200                 break;
6201             case 0x15: /* da/5 */
6202                 switch(rm) {
6203                 case 1: /* fucompp */
6204                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6205                     gen_helper_fucom_ST0_FT0(cpu_env);
6206                     gen_helper_fpop(cpu_env);
6207                     gen_helper_fpop(cpu_env);
6208                     break;
6209                 default:
6210                     goto unknown_op;
6211                 }
6212                 break;
6213             case 0x1c:
6214                 switch(rm) {
6215                 case 0: /* feni (287 only, just do nop here) */
6216                     break;
6217                 case 1: /* fdisi (287 only, just do nop here) */
6218                     break;
6219                 case 2: /* fclex */
6220                     gen_helper_fclex(cpu_env);
6221                     break;
6222                 case 3: /* fninit */
6223                     gen_helper_fninit(cpu_env);
6224                     break;
6225                 case 4: /* fsetpm (287 only, just do nop here) */
6226                     break;
6227                 default:
6228                     goto unknown_op;
6229                 }
6230                 break;
6231             case 0x1d: /* fucomi */
6232                 if (!(s->cpuid_features & CPUID_CMOV)) {
6233                     goto illegal_op;
6234                 }
6235                 gen_update_cc_op(s);
6236                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6237                 gen_helper_fucomi_ST0_FT0(cpu_env);
6238                 set_cc_op(s, CC_OP_EFLAGS);
6239                 break;
6240             case 0x1e: /* fcomi */
6241                 if (!(s->cpuid_features & CPUID_CMOV)) {
6242                     goto illegal_op;
6243                 }
6244                 gen_update_cc_op(s);
6245                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6246                 gen_helper_fcomi_ST0_FT0(cpu_env);
6247                 set_cc_op(s, CC_OP_EFLAGS);
6248                 break;
6249             case 0x28: /* ffree sti */
6250                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6251                 break;
6252             case 0x2a: /* fst sti */
6253                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6254                 break;
6255             case 0x2b: /* fstp sti */
6256             case 0x0b: /* fstp1 sti, undocumented op */
6257             case 0x3a: /* fstp8 sti, undocumented op */
6258             case 0x3b: /* fstp9 sti, undocumented op */
6259                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6260                 gen_helper_fpop(cpu_env);
6261                 break;
6262             case 0x2c: /* fucom st(i) */
6263                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6264                 gen_helper_fucom_ST0_FT0(cpu_env);
6265                 break;
6266             case 0x2d: /* fucomp st(i) */
6267                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6268                 gen_helper_fucom_ST0_FT0(cpu_env);
6269                 gen_helper_fpop(cpu_env);
6270                 break;
6271             case 0x33: /* de/3 */
6272                 switch(rm) {
6273                 case 1: /* fcompp */
6274                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6275                     gen_helper_fcom_ST0_FT0(cpu_env);
6276                     gen_helper_fpop(cpu_env);
6277                     gen_helper_fpop(cpu_env);
6278                     break;
6279                 default:
6280                     goto unknown_op;
6281                 }
6282                 break;
6283             case 0x38: /* ffreep sti, undocumented op */
6284                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6285                 gen_helper_fpop(cpu_env);
6286                 break;
6287             case 0x3c: /* df/4 */
6288                 switch(rm) {
6289                 case 0:
6290                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6291                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6292                     gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6293                     break;
6294                 default:
6295                     goto unknown_op;
6296                 }
6297                 break;
6298             case 0x3d: /* fucomip */
6299                 if (!(s->cpuid_features & CPUID_CMOV)) {
6300                     goto illegal_op;
6301                 }
6302                 gen_update_cc_op(s);
6303                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6304                 gen_helper_fucomi_ST0_FT0(cpu_env);
6305                 gen_helper_fpop(cpu_env);
6306                 set_cc_op(s, CC_OP_EFLAGS);
6307                 break;
6308             case 0x3e: /* fcomip */
6309                 if (!(s->cpuid_features & CPUID_CMOV)) {
6310                     goto illegal_op;
6311                 }
6312                 gen_update_cc_op(s);
6313                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6314                 gen_helper_fcomi_ST0_FT0(cpu_env);
6315                 gen_helper_fpop(cpu_env);
6316                 set_cc_op(s, CC_OP_EFLAGS);
6317                 break;
6318             case 0x10 ... 0x13: /* fcmovxx */
6319             case 0x18 ... 0x1b:
6320                 {
6321                     int op1;
6322                     TCGLabel *l1;
6323                     static const uint8_t fcmov_cc[8] = {
6324                         (JCC_B << 1),
6325                         (JCC_Z << 1),
6326                         (JCC_BE << 1),
6327                         (JCC_P << 1),
6328                     };
6329 
6330                     if (!(s->cpuid_features & CPUID_CMOV)) {
6331                         goto illegal_op;
6332                     }
6333                     op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6334                     l1 = gen_new_label();
6335                     gen_jcc1_noeob(s, op1, l1);
6336                     gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6337                     gen_set_label(l1);
6338                 }
6339                 break;
6340             default:
6341                 goto unknown_op;
6342             }
6343         }
6344         break;
6345         /************************/
6346         /* string ops */
6347 
6348     case 0xa4: /* movsS */
6349     case 0xa5:
6350         ot = mo_b_d(b, dflag);
6351         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6352             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6353         } else {
6354             gen_movs(s, ot);
6355         }
6356         break;
6357 
6358     case 0xaa: /* stosS */
6359     case 0xab:
6360         ot = mo_b_d(b, dflag);
6361         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6362             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6363         } else {
6364             gen_stos(s, ot);
6365         }
6366         break;
6367     case 0xac: /* lodsS */
6368     case 0xad:
6369         ot = mo_b_d(b, dflag);
6370         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6371             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6372         } else {
6373             gen_lods(s, ot);
6374         }
6375         break;
6376     case 0xae: /* scasS */
6377     case 0xaf:
6378         ot = mo_b_d(b, dflag);
6379         if (prefixes & PREFIX_REPNZ) {
6380             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6381         } else if (prefixes & PREFIX_REPZ) {
6382             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6383         } else {
6384             gen_scas(s, ot);
6385         }
6386         break;
6387 
6388     case 0xa6: /* cmpsS */
6389     case 0xa7:
6390         ot = mo_b_d(b, dflag);
6391         if (prefixes & PREFIX_REPNZ) {
6392             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6393         } else if (prefixes & PREFIX_REPZ) {
6394             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6395         } else {
6396             gen_cmps(s, ot);
6397         }
6398         break;
6399     case 0x6c: /* insS */
6400     case 0x6d:
6401         ot = mo_b_d32(b, dflag);
6402         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6403         gen_check_io(s, ot, pc_start - s->cs_base,
6404                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6405         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6406             gen_io_start();
6407         }
6408         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6409             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6410             /* jump generated by gen_repz_ins */
6411         } else {
6412             gen_ins(s, ot);
6413             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6414                 gen_jmp(s, s->pc - s->cs_base);
6415             }
6416         }
6417         break;
6418     case 0x6e: /* outsS */
6419     case 0x6f:
6420         ot = mo_b_d32(b, dflag);
6421         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6422         gen_check_io(s, ot, pc_start - s->cs_base,
6423                      svm_is_rep(prefixes) | 4);
6424         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6425             gen_io_start();
6426         }
6427         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6428             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6429             /* jump generated by gen_repz_outs */
6430         } else {
6431             gen_outs(s, ot);
6432             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6433                 gen_jmp(s, s->pc - s->cs_base);
6434             }
6435         }
6436         break;
6437 
6438         /************************/
6439         /* port I/O */
6440 
6441     case 0xe4:
6442     case 0xe5:
6443         ot = mo_b_d32(b, dflag);
6444         val = x86_ldub_code(env, s);
6445         tcg_gen_movi_tl(s->T0, val);
6446         gen_check_io(s, ot, pc_start - s->cs_base,
6447                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6448         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6449             gen_io_start();
6450         }
6451         tcg_gen_movi_i32(s->tmp2_i32, val);
6452         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6453         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6454         gen_bpt_io(s, s->tmp2_i32, ot);
6455         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6456             gen_jmp(s, s->pc - s->cs_base);
6457         }
6458         break;
6459     case 0xe6:
6460     case 0xe7:
6461         ot = mo_b_d32(b, dflag);
6462         val = x86_ldub_code(env, s);
6463         tcg_gen_movi_tl(s->T0, val);
6464         gen_check_io(s, ot, pc_start - s->cs_base,
6465                      svm_is_rep(prefixes));
6466         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6467 
6468         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6469             gen_io_start();
6470         }
6471         tcg_gen_movi_i32(s->tmp2_i32, val);
6472         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6473         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6474         gen_bpt_io(s, s->tmp2_i32, ot);
6475         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6476             gen_jmp(s, s->pc - s->cs_base);
6477         }
6478         break;
6479     case 0xec:
6480     case 0xed:
6481         ot = mo_b_d32(b, dflag);
6482         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6483         gen_check_io(s, ot, pc_start - s->cs_base,
6484                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6485         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6486             gen_io_start();
6487         }
6488         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6489         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6490         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6491         gen_bpt_io(s, s->tmp2_i32, ot);
6492         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6493             gen_jmp(s, s->pc - s->cs_base);
6494         }
6495         break;
6496     case 0xee:
6497     case 0xef:
6498         ot = mo_b_d32(b, dflag);
6499         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6500         gen_check_io(s, ot, pc_start - s->cs_base,
6501                      svm_is_rep(prefixes));
6502         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6503 
6504         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6505             gen_io_start();
6506         }
6507         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6508         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6509         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6510         gen_bpt_io(s, s->tmp2_i32, ot);
6511         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6512             gen_jmp(s, s->pc - s->cs_base);
6513         }
6514         break;
6515 
6516         /************************/
6517         /* control */
6518     case 0xc2: /* ret im */
6519         val = x86_ldsw_code(env, s);
6520         ot = gen_pop_T0(s);
6521         gen_stack_update(s, val + (1 << ot));
6522         /* Note that gen_pop_T0 uses a zero-extending load.  */
6523         gen_op_jmp_v(s->T0);
6524         gen_bnd_jmp(s);
6525         gen_jr(s, s->T0);
6526         break;
6527     case 0xc3: /* ret */
6528         ot = gen_pop_T0(s);
6529         gen_pop_update(s, ot);
6530         /* Note that gen_pop_T0 uses a zero-extending load.  */
6531         gen_op_jmp_v(s->T0);
6532         gen_bnd_jmp(s);
6533         gen_jr(s, s->T0);
6534         break;
6535     case 0xca: /* lret im */
6536         val = x86_ldsw_code(env, s);
6537     do_lret:
6538         if (s->pe && !s->vm86) {
6539             gen_update_cc_op(s);
6540             gen_jmp_im(s, pc_start - s->cs_base);
6541             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6542                                       tcg_const_i32(val));
6543         } else {
6544             gen_stack_A0(s);
6545             /* pop offset */
6546             gen_op_ld_v(s, dflag, s->T0, s->A0);
6547             /* NOTE: keeping EIP updated is not a problem in case of
6548                exception */
6549             gen_op_jmp_v(s->T0);
6550             /* pop selector */
6551             gen_add_A0_im(s, 1 << dflag);
6552             gen_op_ld_v(s, dflag, s->T0, s->A0);
6553             gen_op_movl_seg_T0_vm(s, R_CS);
6554             /* add stack offset */
6555             gen_stack_update(s, val + (2 << dflag));
6556         }
6557         gen_eob(s);
6558         break;
6559     case 0xcb: /* lret */
6560         val = 0;
6561         goto do_lret;
6562     case 0xcf: /* iret */
6563         gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6564         if (!s->pe) {
6565             /* real mode */
6566             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6567             set_cc_op(s, CC_OP_EFLAGS);
6568         } else if (s->vm86) {
6569             if (s->iopl != 3) {
6570                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6571             } else {
6572                 gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6573                 set_cc_op(s, CC_OP_EFLAGS);
6574             }
6575         } else {
6576             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6577                                       tcg_const_i32(s->pc - s->cs_base));
6578             set_cc_op(s, CC_OP_EFLAGS);
6579         }
6580         gen_eob(s);
6581         break;
6582     case 0xe8: /* call im */
6583         {
6584             if (dflag != MO_16) {
6585                 tval = (int32_t)insn_get(env, s, MO_32);
6586             } else {
6587                 tval = (int16_t)insn_get(env, s, MO_16);
6588             }
6589             next_eip = s->pc - s->cs_base;
6590             tval += next_eip;
6591             if (dflag == MO_16) {
6592                 tval &= 0xffff;
6593             } else if (!CODE64(s)) {
6594                 tval &= 0xffffffff;
6595             }
6596             tcg_gen_movi_tl(s->T0, next_eip);
6597             gen_push_v(s, s->T0);
6598             gen_bnd_jmp(s);
6599             gen_jmp(s, tval);
6600         }
6601         break;
6602     case 0x9a: /* lcall im */
6603         {
6604             unsigned int selector, offset;
6605 
6606             if (CODE64(s))
6607                 goto illegal_op;
6608             ot = dflag;
6609             offset = insn_get(env, s, ot);
6610             selector = insn_get(env, s, MO_16);
6611 
6612             tcg_gen_movi_tl(s->T0, selector);
6613             tcg_gen_movi_tl(s->T1, offset);
6614         }
6615         goto do_lcall;
6616     case 0xe9: /* jmp im */
6617         if (dflag != MO_16) {
6618             tval = (int32_t)insn_get(env, s, MO_32);
6619         } else {
6620             tval = (int16_t)insn_get(env, s, MO_16);
6621         }
6622         tval += s->pc - s->cs_base;
6623         if (dflag == MO_16) {
6624             tval &= 0xffff;
6625         } else if (!CODE64(s)) {
6626             tval &= 0xffffffff;
6627         }
6628         gen_bnd_jmp(s);
6629         gen_jmp(s, tval);
6630         break;
6631     case 0xea: /* ljmp im */
6632         {
6633             unsigned int selector, offset;
6634 
6635             if (CODE64(s))
6636                 goto illegal_op;
6637             ot = dflag;
6638             offset = insn_get(env, s, ot);
6639             selector = insn_get(env, s, MO_16);
6640 
6641             tcg_gen_movi_tl(s->T0, selector);
6642             tcg_gen_movi_tl(s->T1, offset);
6643         }
6644         goto do_ljmp;
6645     case 0xeb: /* jmp Jb */
6646         tval = (int8_t)insn_get(env, s, MO_8);
6647         tval += s->pc - s->cs_base;
6648         if (dflag == MO_16) {
6649             tval &= 0xffff;
6650         }
6651         gen_jmp(s, tval);
6652         break;
6653     case 0x70 ... 0x7f: /* jcc Jb */
6654         tval = (int8_t)insn_get(env, s, MO_8);
6655         goto do_jcc;
6656     case 0x180 ... 0x18f: /* jcc Jv */
6657         if (dflag != MO_16) {
6658             tval = (int32_t)insn_get(env, s, MO_32);
6659         } else {
6660             tval = (int16_t)insn_get(env, s, MO_16);
6661         }
6662     do_jcc:
6663         next_eip = s->pc - s->cs_base;
6664         tval += next_eip;
6665         if (dflag == MO_16) {
6666             tval &= 0xffff;
6667         }
6668         gen_bnd_jmp(s);
6669         gen_jcc(s, b, tval, next_eip);
6670         break;
6671 
6672     case 0x190 ... 0x19f: /* setcc Gv */
6673         modrm = x86_ldub_code(env, s);
6674         gen_setcc1(s, b, s->T0);
6675         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6676         break;
6677     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6678         if (!(s->cpuid_features & CPUID_CMOV)) {
6679             goto illegal_op;
6680         }
6681         ot = dflag;
6682         modrm = x86_ldub_code(env, s);
6683         reg = ((modrm >> 3) & 7) | rex_r;
6684         gen_cmovcc1(env, s, ot, b, modrm, reg);
6685         break;
6686 
6687         /************************/
6688         /* flags */
6689     case 0x9c: /* pushf */
6690         gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6691         if (s->vm86 && s->iopl != 3) {
6692             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6693         } else {
6694             gen_update_cc_op(s);
6695             gen_helper_read_eflags(s->T0, cpu_env);
6696             gen_push_v(s, s->T0);
6697         }
6698         break;
6699     case 0x9d: /* popf */
6700         gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6701         if (s->vm86 && s->iopl != 3) {
6702             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6703         } else {
6704             ot = gen_pop_T0(s);
6705             if (s->cpl == 0) {
6706                 if (dflag != MO_16) {
6707                     gen_helper_write_eflags(cpu_env, s->T0,
6708                                             tcg_const_i32((TF_MASK | AC_MASK |
6709                                                            ID_MASK | NT_MASK |
6710                                                            IF_MASK |
6711                                                            IOPL_MASK)));
6712                 } else {
6713                     gen_helper_write_eflags(cpu_env, s->T0,
6714                                             tcg_const_i32((TF_MASK | AC_MASK |
6715                                                            ID_MASK | NT_MASK |
6716                                                            IF_MASK | IOPL_MASK)
6717                                                           & 0xffff));
6718                 }
6719             } else {
6720                 if (s->cpl <= s->iopl) {
6721                     if (dflag != MO_16) {
6722                         gen_helper_write_eflags(cpu_env, s->T0,
6723                                                 tcg_const_i32((TF_MASK |
6724                                                                AC_MASK |
6725                                                                ID_MASK |
6726                                                                NT_MASK |
6727                                                                IF_MASK)));
6728                     } else {
6729                         gen_helper_write_eflags(cpu_env, s->T0,
6730                                                 tcg_const_i32((TF_MASK |
6731                                                                AC_MASK |
6732                                                                ID_MASK |
6733                                                                NT_MASK |
6734                                                                IF_MASK)
6735                                                               & 0xffff));
6736                     }
6737                 } else {
6738                     if (dflag != MO_16) {
6739                         gen_helper_write_eflags(cpu_env, s->T0,
6740                                            tcg_const_i32((TF_MASK | AC_MASK |
6741                                                           ID_MASK | NT_MASK)));
6742                     } else {
6743                         gen_helper_write_eflags(cpu_env, s->T0,
6744                                            tcg_const_i32((TF_MASK | AC_MASK |
6745                                                           ID_MASK | NT_MASK)
6746                                                          & 0xffff));
6747                     }
6748                 }
6749             }
6750             gen_pop_update(s, ot);
6751             set_cc_op(s, CC_OP_EFLAGS);
6752             /* abort translation because TF/AC flag may change */
6753             gen_jmp_im(s, s->pc - s->cs_base);
6754             gen_eob(s);
6755         }
6756         break;
6757     case 0x9e: /* sahf */
6758         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6759             goto illegal_op;
6760         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6761         gen_compute_eflags(s);
6762         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6763         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6764         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6765         break;
6766     case 0x9f: /* lahf */
6767         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6768             goto illegal_op;
6769         gen_compute_eflags(s);
6770         /* Note: gen_compute_eflags() only gives the condition codes */
6771         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6772         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6773         break;
6774     case 0xf5: /* cmc */
6775         gen_compute_eflags(s);
6776         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6777         break;
6778     case 0xf8: /* clc */
6779         gen_compute_eflags(s);
6780         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6781         break;
6782     case 0xf9: /* stc */
6783         gen_compute_eflags(s);
6784         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6785         break;
6786     case 0xfc: /* cld */
6787         tcg_gen_movi_i32(s->tmp2_i32, 1);
6788         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6789         break;
6790     case 0xfd: /* std */
6791         tcg_gen_movi_i32(s->tmp2_i32, -1);
6792         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6793         break;
6794 
6795         /************************/
6796         /* bit operations */
6797     case 0x1ba: /* bt/bts/btr/btc Gv, im */
6798         ot = dflag;
6799         modrm = x86_ldub_code(env, s);
6800         op = (modrm >> 3) & 7;
6801         mod = (modrm >> 6) & 3;
6802         rm = (modrm & 7) | REX_B(s);
6803         if (mod != 3) {
6804             s->rip_offset = 1;
6805             gen_lea_modrm(env, s, modrm);
6806             if (!(s->prefix & PREFIX_LOCK)) {
6807                 gen_op_ld_v(s, ot, s->T0, s->A0);
6808             }
6809         } else {
6810             gen_op_mov_v_reg(s, ot, s->T0, rm);
6811         }
6812         /* load shift */
6813         val = x86_ldub_code(env, s);
6814         tcg_gen_movi_tl(s->T1, val);
6815         if (op < 4)
6816             goto unknown_op;
6817         op -= 4;
6818         goto bt_op;
6819     case 0x1a3: /* bt Gv, Ev */
6820         op = 0;
6821         goto do_btx;
6822     case 0x1ab: /* bts */
6823         op = 1;
6824         goto do_btx;
6825     case 0x1b3: /* btr */
6826         op = 2;
6827         goto do_btx;
6828     case 0x1bb: /* btc */
6829         op = 3;
6830     do_btx:
6831         ot = dflag;
6832         modrm = x86_ldub_code(env, s);
6833         reg = ((modrm >> 3) & 7) | rex_r;
6834         mod = (modrm >> 6) & 3;
6835         rm = (modrm & 7) | REX_B(s);
6836         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6837         if (mod != 3) {
6838             AddressParts a = gen_lea_modrm_0(env, s, modrm);
6839             /* specific case: we need to add a displacement */
6840             gen_exts(ot, s->T1);
6841             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6842             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6843             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6844             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6845             if (!(s->prefix & PREFIX_LOCK)) {
6846                 gen_op_ld_v(s, ot, s->T0, s->A0);
6847             }
6848         } else {
6849             gen_op_mov_v_reg(s, ot, s->T0, rm);
6850         }
6851     bt_op:
6852         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6853         tcg_gen_movi_tl(s->tmp0, 1);
6854         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6855         if (s->prefix & PREFIX_LOCK) {
6856             switch (op) {
6857             case 0: /* bt */
6858                 /* Needs no atomic ops; we surpressed the normal
6859                    memory load for LOCK above so do it now.  */
6860                 gen_op_ld_v(s, ot, s->T0, s->A0);
6861                 break;
6862             case 1: /* bts */
6863                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6864                                            s->mem_index, ot | MO_LE);
6865                 break;
6866             case 2: /* btr */
6867                 tcg_gen_not_tl(s->tmp0, s->tmp0);
6868                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6869                                             s->mem_index, ot | MO_LE);
6870                 break;
6871             default:
6872             case 3: /* btc */
6873                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6874                                             s->mem_index, ot | MO_LE);
6875                 break;
6876             }
6877             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6878         } else {
6879             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6880             switch (op) {
6881             case 0: /* bt */
6882                 /* Data already loaded; nothing to do.  */
6883                 break;
6884             case 1: /* bts */
6885                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6886                 break;
6887             case 2: /* btr */
6888                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6889                 break;
6890             default:
6891             case 3: /* btc */
6892                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6893                 break;
6894             }
6895             if (op != 0) {
6896                 if (mod != 3) {
6897                     gen_op_st_v(s, ot, s->T0, s->A0);
6898                 } else {
6899                     gen_op_mov_reg_v(s, ot, rm, s->T0);
6900                 }
6901             }
6902         }
6903 
6904         /* Delay all CC updates until after the store above.  Note that
6905            C is the result of the test, Z is unchanged, and the others
6906            are all undefined.  */
6907         switch (s->cc_op) {
6908         case CC_OP_MULB ... CC_OP_MULQ:
6909         case CC_OP_ADDB ... CC_OP_ADDQ:
6910         case CC_OP_ADCB ... CC_OP_ADCQ:
6911         case CC_OP_SUBB ... CC_OP_SUBQ:
6912         case CC_OP_SBBB ... CC_OP_SBBQ:
6913         case CC_OP_LOGICB ... CC_OP_LOGICQ:
6914         case CC_OP_INCB ... CC_OP_INCQ:
6915         case CC_OP_DECB ... CC_OP_DECQ:
6916         case CC_OP_SHLB ... CC_OP_SHLQ:
6917         case CC_OP_SARB ... CC_OP_SARQ:
6918         case CC_OP_BMILGB ... CC_OP_BMILGQ:
6919             /* Z was going to be computed from the non-zero status of CC_DST.
6920                We can get that same Z value (and the new C value) by leaving
6921                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6922                same width.  */
6923             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
6924             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6925             break;
6926         default:
6927             /* Otherwise, generate EFLAGS and replace the C bit.  */
6928             gen_compute_eflags(s);
6929             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
6930                                ctz32(CC_C), 1);
6931             break;
6932         }
6933         break;
6934     case 0x1bc: /* bsf / tzcnt */
6935     case 0x1bd: /* bsr / lzcnt */
6936         ot = dflag;
6937         modrm = x86_ldub_code(env, s);
6938         reg = ((modrm >> 3) & 7) | rex_r;
6939         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6940         gen_extu(ot, s->T0);
6941 
6942         /* Note that lzcnt and tzcnt are in different extensions.  */
6943         if ((prefixes & PREFIX_REPZ)
6944             && (b & 1
6945                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6946                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6947             int size = 8 << ot;
6948             /* For lzcnt/tzcnt, C bit is defined related to the input. */
6949             tcg_gen_mov_tl(cpu_cc_src, s->T0);
6950             if (b & 1) {
6951                 /* For lzcnt, reduce the target_ulong result by the
6952                    number of zeros that we expect to find at the top.  */
6953                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
6954                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
6955             } else {
6956                 /* For tzcnt, a zero input must return the operand size.  */
6957                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
6958             }
6959             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
6960             gen_op_update1_cc(s);
6961             set_cc_op(s, CC_OP_BMILGB + ot);
6962         } else {
6963             /* For bsr/bsf, only the Z bit is defined and it is related
6964                to the input and not the result.  */
6965             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
6966             set_cc_op(s, CC_OP_LOGICB + ot);
6967 
6968             /* ??? The manual says that the output is undefined when the
6969                input is zero, but real hardware leaves it unchanged, and
6970                real programs appear to depend on that.  Accomplish this
6971                by passing the output as the value to return upon zero.  */
6972             if (b & 1) {
6973                 /* For bsr, return the bit index of the first 1 bit,
6974                    not the count of leading zeros.  */
6975                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6976                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
6977                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
6978             } else {
6979                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
6980             }
6981         }
6982         gen_op_mov_reg_v(s, ot, reg, s->T0);
6983         break;
6984         /************************/
6985         /* bcd */
6986     case 0x27: /* daa */
6987         if (CODE64(s))
6988             goto illegal_op;
6989         gen_update_cc_op(s);
6990         gen_helper_daa(cpu_env);
6991         set_cc_op(s, CC_OP_EFLAGS);
6992         break;
6993     case 0x2f: /* das */
6994         if (CODE64(s))
6995             goto illegal_op;
6996         gen_update_cc_op(s);
6997         gen_helper_das(cpu_env);
6998         set_cc_op(s, CC_OP_EFLAGS);
6999         break;
7000     case 0x37: /* aaa */
7001         if (CODE64(s))
7002             goto illegal_op;
7003         gen_update_cc_op(s);
7004         gen_helper_aaa(cpu_env);
7005         set_cc_op(s, CC_OP_EFLAGS);
7006         break;
7007     case 0x3f: /* aas */
7008         if (CODE64(s))
7009             goto illegal_op;
7010         gen_update_cc_op(s);
7011         gen_helper_aas(cpu_env);
7012         set_cc_op(s, CC_OP_EFLAGS);
7013         break;
7014     case 0xd4: /* aam */
7015         if (CODE64(s))
7016             goto illegal_op;
7017         val = x86_ldub_code(env, s);
7018         if (val == 0) {
7019             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7020         } else {
7021             gen_helper_aam(cpu_env, tcg_const_i32(val));
7022             set_cc_op(s, CC_OP_LOGICB);
7023         }
7024         break;
7025     case 0xd5: /* aad */
7026         if (CODE64(s))
7027             goto illegal_op;
7028         val = x86_ldub_code(env, s);
7029         gen_helper_aad(cpu_env, tcg_const_i32(val));
7030         set_cc_op(s, CC_OP_LOGICB);
7031         break;
7032         /************************/
7033         /* misc */
7034     case 0x90: /* nop */
7035         /* XXX: correct lock test for all insn */
7036         if (prefixes & PREFIX_LOCK) {
7037             goto illegal_op;
7038         }
7039         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7040         if (REX_B(s)) {
7041             goto do_xchg_reg_eax;
7042         }
7043         if (prefixes & PREFIX_REPZ) {
7044             gen_update_cc_op(s);
7045             gen_jmp_im(s, pc_start - s->cs_base);
7046             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7047             s->base.is_jmp = DISAS_NORETURN;
7048         }
7049         break;
7050     case 0x9b: /* fwait */
7051         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7052             (HF_MP_MASK | HF_TS_MASK)) {
7053             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7054         } else {
7055             gen_helper_fwait(cpu_env);
7056         }
7057         break;
7058     case 0xcc: /* int3 */
7059         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7060         break;
7061     case 0xcd: /* int N */
7062         val = x86_ldub_code(env, s);
7063         if (s->vm86 && s->iopl != 3) {
7064             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7065         } else {
7066             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7067         }
7068         break;
7069     case 0xce: /* into */
7070         if (CODE64(s))
7071             goto illegal_op;
7072         gen_update_cc_op(s);
7073         gen_jmp_im(s, pc_start - s->cs_base);
7074         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7075         break;
7076 #ifdef WANT_ICEBP
7077     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7078         gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
7079         gen_debug(s, pc_start - s->cs_base);
7080         break;
7081 #endif
7082     case 0xfa: /* cli */
7083         if (!s->vm86) {
7084             if (s->cpl <= s->iopl) {
7085                 gen_helper_cli(cpu_env);
7086             } else {
7087                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7088             }
7089         } else {
7090             if (s->iopl == 3) {
7091                 gen_helper_cli(cpu_env);
7092             } else {
7093                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7094             }
7095         }
7096         break;
7097     case 0xfb: /* sti */
7098         if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
7099             gen_helper_sti(cpu_env);
7100             /* interruptions are enabled only the first insn after sti */
7101             gen_jmp_im(s, s->pc - s->cs_base);
7102             gen_eob_inhibit_irq(s, true);
7103         } else {
7104             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7105         }
7106         break;
7107     case 0x62: /* bound */
7108         if (CODE64(s))
7109             goto illegal_op;
7110         ot = dflag;
7111         modrm = x86_ldub_code(env, s);
7112         reg = (modrm >> 3) & 7;
7113         mod = (modrm >> 6) & 3;
7114         if (mod == 3)
7115             goto illegal_op;
7116         gen_op_mov_v_reg(s, ot, s->T0, reg);
7117         gen_lea_modrm(env, s, modrm);
7118         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7119         if (ot == MO_16) {
7120             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7121         } else {
7122             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7123         }
7124         break;
7125     case 0x1c8 ... 0x1cf: /* bswap reg */
7126         reg = (b & 7) | REX_B(s);
7127 #ifdef TARGET_X86_64
7128         if (dflag == MO_64) {
7129             gen_op_mov_v_reg(s, MO_64, s->T0, reg);
7130             tcg_gen_bswap64_i64(s->T0, s->T0);
7131             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
7132         } else
7133 #endif
7134         {
7135             gen_op_mov_v_reg(s, MO_32, s->T0, reg);
7136             tcg_gen_ext32u_tl(s->T0, s->T0);
7137             tcg_gen_bswap32_tl(s->T0, s->T0);
7138             gen_op_mov_reg_v(s, MO_32, reg, s->T0);
7139         }
7140         break;
7141     case 0xd6: /* salc */
7142         if (CODE64(s))
7143             goto illegal_op;
7144         gen_compute_eflags_c(s, s->T0);
7145         tcg_gen_neg_tl(s->T0, s->T0);
7146         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7147         break;
7148     case 0xe0: /* loopnz */
7149     case 0xe1: /* loopz */
7150     case 0xe2: /* loop */
7151     case 0xe3: /* jecxz */
7152         {
7153             TCGLabel *l1, *l2, *l3;
7154 
7155             tval = (int8_t)insn_get(env, s, MO_8);
7156             next_eip = s->pc - s->cs_base;
7157             tval += next_eip;
7158             if (dflag == MO_16) {
7159                 tval &= 0xffff;
7160             }
7161 
7162             l1 = gen_new_label();
7163             l2 = gen_new_label();
7164             l3 = gen_new_label();
7165             gen_update_cc_op(s);
7166             b &= 3;
7167             switch(b) {
7168             case 0: /* loopnz */
7169             case 1: /* loopz */
7170                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7171                 gen_op_jz_ecx(s, s->aflag, l3);
7172                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7173                 break;
7174             case 2: /* loop */
7175                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7176                 gen_op_jnz_ecx(s, s->aflag, l1);
7177                 break;
7178             default:
7179             case 3: /* jcxz */
7180                 gen_op_jz_ecx(s, s->aflag, l1);
7181                 break;
7182             }
7183 
7184             gen_set_label(l3);
7185             gen_jmp_im(s, next_eip);
7186             tcg_gen_br(l2);
7187 
7188             gen_set_label(l1);
7189             gen_jmp_im(s, tval);
7190             gen_set_label(l2);
7191             gen_eob(s);
7192         }
7193         break;
7194     case 0x130: /* wrmsr */
7195     case 0x132: /* rdmsr */
7196         if (s->cpl != 0) {
7197             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7198         } else {
7199             gen_update_cc_op(s);
7200             gen_jmp_im(s, pc_start - s->cs_base);
7201             if (b & 2) {
7202                 gen_helper_rdmsr(cpu_env);
7203             } else {
7204                 gen_helper_wrmsr(cpu_env);
7205             }
7206         }
7207         break;
7208     case 0x131: /* rdtsc */
7209         gen_update_cc_op(s);
7210         gen_jmp_im(s, pc_start - s->cs_base);
7211         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7212             gen_io_start();
7213         }
7214         gen_helper_rdtsc(cpu_env);
7215         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7216             gen_jmp(s, s->pc - s->cs_base);
7217         }
7218         break;
7219     case 0x133: /* rdpmc */
7220         gen_update_cc_op(s);
7221         gen_jmp_im(s, pc_start - s->cs_base);
7222         gen_helper_rdpmc(cpu_env);
7223         break;
7224     case 0x134: /* sysenter */
7225         /* For Intel SYSENTER is valid on 64-bit */
7226         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7227             goto illegal_op;
7228         if (!s->pe) {
7229             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7230         } else {
7231             gen_helper_sysenter(cpu_env);
7232             gen_eob(s);
7233         }
7234         break;
7235     case 0x135: /* sysexit */
7236         /* For Intel SYSEXIT is valid on 64-bit */
7237         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7238             goto illegal_op;
7239         if (!s->pe) {
7240             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7241         } else {
7242             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7243             gen_eob(s);
7244         }
7245         break;
7246 #ifdef TARGET_X86_64
7247     case 0x105: /* syscall */
7248         /* XXX: is it usable in real mode ? */
7249         gen_update_cc_op(s);
7250         gen_jmp_im(s, pc_start - s->cs_base);
7251         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7252         /* TF handling for the syscall insn is different. The TF bit is  checked
7253            after the syscall insn completes. This allows #DB to not be
7254            generated after one has entered CPL0 if TF is set in FMASK.  */
7255         gen_eob_worker(s, false, true);
7256         break;
7257     case 0x107: /* sysret */
7258         if (!s->pe) {
7259             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7260         } else {
7261             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7262             /* condition codes are modified only in long mode */
7263             if (s->lma) {
7264                 set_cc_op(s, CC_OP_EFLAGS);
7265             }
7266             /* TF handling for the sysret insn is different. The TF bit is
7267                checked after the sysret insn completes. This allows #DB to be
7268                generated "as if" the syscall insn in userspace has just
7269                completed.  */
7270             gen_eob_worker(s, false, true);
7271         }
7272         break;
7273 #endif
7274     case 0x1a2: /* cpuid */
7275         gen_update_cc_op(s);
7276         gen_jmp_im(s, pc_start - s->cs_base);
7277         gen_helper_cpuid(cpu_env);
7278         break;
7279     case 0xf4: /* hlt */
7280         if (s->cpl != 0) {
7281             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7282         } else {
7283             gen_update_cc_op(s);
7284             gen_jmp_im(s, pc_start - s->cs_base);
7285             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7286             s->base.is_jmp = DISAS_NORETURN;
7287         }
7288         break;
7289     case 0x100:
7290         modrm = x86_ldub_code(env, s);
7291         mod = (modrm >> 6) & 3;
7292         op = (modrm >> 3) & 7;
7293         switch(op) {
7294         case 0: /* sldt */
7295             if (!s->pe || s->vm86)
7296                 goto illegal_op;
7297             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7298             tcg_gen_ld32u_tl(s->T0, cpu_env,
7299                              offsetof(CPUX86State, ldt.selector));
7300             ot = mod == 3 ? dflag : MO_16;
7301             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7302             break;
7303         case 2: /* lldt */
7304             if (!s->pe || s->vm86)
7305                 goto illegal_op;
7306             if (s->cpl != 0) {
7307                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7308             } else {
7309                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7310                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7311                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7312                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7313             }
7314             break;
7315         case 1: /* str */
7316             if (!s->pe || s->vm86)
7317                 goto illegal_op;
7318             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7319             tcg_gen_ld32u_tl(s->T0, cpu_env,
7320                              offsetof(CPUX86State, tr.selector));
7321             ot = mod == 3 ? dflag : MO_16;
7322             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7323             break;
7324         case 3: /* ltr */
7325             if (!s->pe || s->vm86)
7326                 goto illegal_op;
7327             if (s->cpl != 0) {
7328                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7329             } else {
7330                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7331                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7332                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7333                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7334             }
7335             break;
7336         case 4: /* verr */
7337         case 5: /* verw */
7338             if (!s->pe || s->vm86)
7339                 goto illegal_op;
7340             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7341             gen_update_cc_op(s);
7342             if (op == 4) {
7343                 gen_helper_verr(cpu_env, s->T0);
7344             } else {
7345                 gen_helper_verw(cpu_env, s->T0);
7346             }
7347             set_cc_op(s, CC_OP_EFLAGS);
7348             break;
7349         default:
7350             goto unknown_op;
7351         }
7352         break;
7353 
7354     case 0x101:
7355         modrm = x86_ldub_code(env, s);
7356         switch (modrm) {
7357         CASE_MODRM_MEM_OP(0): /* sgdt */
7358             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7359             gen_lea_modrm(env, s, modrm);
7360             tcg_gen_ld32u_tl(s->T0,
7361                              cpu_env, offsetof(CPUX86State, gdt.limit));
7362             gen_op_st_v(s, MO_16, s->T0, s->A0);
7363             gen_add_A0_im(s, 2);
7364             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7365             if (dflag == MO_16) {
7366                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7367             }
7368             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7369             break;
7370 
7371         case 0xc8: /* monitor */
7372             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7373                 goto illegal_op;
7374             }
7375             gen_update_cc_op(s);
7376             gen_jmp_im(s, pc_start - s->cs_base);
7377             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7378             gen_extu(s->aflag, s->A0);
7379             gen_add_A0_ds_seg(s);
7380             gen_helper_monitor(cpu_env, s->A0);
7381             break;
7382 
7383         case 0xc9: /* mwait */
7384             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7385                 goto illegal_op;
7386             }
7387             gen_update_cc_op(s);
7388             gen_jmp_im(s, pc_start - s->cs_base);
7389             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7390             gen_eob(s);
7391             break;
7392 
7393         case 0xca: /* clac */
7394             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7395                 || s->cpl != 0) {
7396                 goto illegal_op;
7397             }
7398             gen_helper_clac(cpu_env);
7399             gen_jmp_im(s, s->pc - s->cs_base);
7400             gen_eob(s);
7401             break;
7402 
7403         case 0xcb: /* stac */
7404             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7405                 || s->cpl != 0) {
7406                 goto illegal_op;
7407             }
7408             gen_helper_stac(cpu_env);
7409             gen_jmp_im(s, s->pc - s->cs_base);
7410             gen_eob(s);
7411             break;
7412 
7413         CASE_MODRM_MEM_OP(1): /* sidt */
7414             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7415             gen_lea_modrm(env, s, modrm);
7416             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7417             gen_op_st_v(s, MO_16, s->T0, s->A0);
7418             gen_add_A0_im(s, 2);
7419             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7420             if (dflag == MO_16) {
7421                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7422             }
7423             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7424             break;
7425 
7426         case 0xd0: /* xgetbv */
7427             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7428                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7429                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7430                 goto illegal_op;
7431             }
7432             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7433             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7434             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7435             break;
7436 
7437         case 0xd1: /* xsetbv */
7438             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7439                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7440                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7441                 goto illegal_op;
7442             }
7443             if (s->cpl != 0) {
7444                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7445                 break;
7446             }
7447             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7448                                   cpu_regs[R_EDX]);
7449             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7450             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7451             /* End TB because translation flags may change.  */
7452             gen_jmp_im(s, s->pc - s->cs_base);
7453             gen_eob(s);
7454             break;
7455 
7456         case 0xd8: /* VMRUN */
7457             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7458                 goto illegal_op;
7459             }
7460             if (s->cpl != 0) {
7461                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7462                 break;
7463             }
7464             gen_update_cc_op(s);
7465             gen_jmp_im(s, pc_start - s->cs_base);
7466             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7467                              tcg_const_i32(s->pc - pc_start));
7468             tcg_gen_exit_tb(NULL, 0);
7469             s->base.is_jmp = DISAS_NORETURN;
7470             break;
7471 
7472         case 0xd9: /* VMMCALL */
7473             if (!(s->flags & HF_SVME_MASK)) {
7474                 goto illegal_op;
7475             }
7476             gen_update_cc_op(s);
7477             gen_jmp_im(s, pc_start - s->cs_base);
7478             gen_helper_vmmcall(cpu_env);
7479             break;
7480 
7481         case 0xda: /* VMLOAD */
7482             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7483                 goto illegal_op;
7484             }
7485             if (s->cpl != 0) {
7486                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7487                 break;
7488             }
7489             gen_update_cc_op(s);
7490             gen_jmp_im(s, pc_start - s->cs_base);
7491             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7492             break;
7493 
7494         case 0xdb: /* VMSAVE */
7495             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7496                 goto illegal_op;
7497             }
7498             if (s->cpl != 0) {
7499                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7500                 break;
7501             }
7502             gen_update_cc_op(s);
7503             gen_jmp_im(s, pc_start - s->cs_base);
7504             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7505             break;
7506 
7507         case 0xdc: /* STGI */
7508             if ((!(s->flags & HF_SVME_MASK)
7509                    && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7510                 || !s->pe) {
7511                 goto illegal_op;
7512             }
7513             if (s->cpl != 0) {
7514                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7515                 break;
7516             }
7517             gen_update_cc_op(s);
7518             gen_helper_stgi(cpu_env);
7519             gen_jmp_im(s, s->pc - s->cs_base);
7520             gen_eob(s);
7521             break;
7522 
7523         case 0xdd: /* CLGI */
7524             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7525                 goto illegal_op;
7526             }
7527             if (s->cpl != 0) {
7528                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7529                 break;
7530             }
7531             gen_update_cc_op(s);
7532             gen_jmp_im(s, pc_start - s->cs_base);
7533             gen_helper_clgi(cpu_env);
7534             break;
7535 
7536         case 0xde: /* SKINIT */
7537             if ((!(s->flags & HF_SVME_MASK)
7538                  && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7539                 || !s->pe) {
7540                 goto illegal_op;
7541             }
7542             gen_update_cc_op(s);
7543             gen_jmp_im(s, pc_start - s->cs_base);
7544             gen_helper_skinit(cpu_env);
7545             break;
7546 
7547         case 0xdf: /* INVLPGA */
7548             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7549                 goto illegal_op;
7550             }
7551             if (s->cpl != 0) {
7552                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7553                 break;
7554             }
7555             gen_update_cc_op(s);
7556             gen_jmp_im(s, pc_start - s->cs_base);
7557             gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7558             break;
7559 
7560         CASE_MODRM_MEM_OP(2): /* lgdt */
7561             if (s->cpl != 0) {
7562                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7563                 break;
7564             }
7565             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7566             gen_lea_modrm(env, s, modrm);
7567             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7568             gen_add_A0_im(s, 2);
7569             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7570             if (dflag == MO_16) {
7571                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7572             }
7573             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7574             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7575             break;
7576 
7577         CASE_MODRM_MEM_OP(3): /* lidt */
7578             if (s->cpl != 0) {
7579                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7580                 break;
7581             }
7582             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7583             gen_lea_modrm(env, s, modrm);
7584             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7585             gen_add_A0_im(s, 2);
7586             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7587             if (dflag == MO_16) {
7588                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7589             }
7590             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7591             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7592             break;
7593 
7594         CASE_MODRM_OP(4): /* smsw */
7595             gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7596             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7597             /*
7598              * In 32-bit mode, the higher 16 bits of the destination
7599              * register are undefined.  In practice CR0[31:0] is stored
7600              * just like in 64-bit mode.
7601              */
7602             mod = (modrm >> 6) & 3;
7603             ot = (mod != 3 ? MO_16 : s->dflag);
7604             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7605             break;
7606         case 0xee: /* rdpkru */
7607             if (prefixes & PREFIX_LOCK) {
7608                 goto illegal_op;
7609             }
7610             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7611             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7612             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7613             break;
7614         case 0xef: /* wrpkru */
7615             if (prefixes & PREFIX_LOCK) {
7616                 goto illegal_op;
7617             }
7618             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7619                                   cpu_regs[R_EDX]);
7620             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7621             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7622             break;
7623         CASE_MODRM_OP(6): /* lmsw */
7624             if (s->cpl != 0) {
7625                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7626                 break;
7627             }
7628             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7629             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7630             gen_helper_lmsw(cpu_env, s->T0);
7631             gen_jmp_im(s, s->pc - s->cs_base);
7632             gen_eob(s);
7633             break;
7634 
7635         CASE_MODRM_MEM_OP(7): /* invlpg */
7636             if (s->cpl != 0) {
7637                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7638                 break;
7639             }
7640             gen_update_cc_op(s);
7641             gen_jmp_im(s, pc_start - s->cs_base);
7642             gen_lea_modrm(env, s, modrm);
7643             gen_helper_invlpg(cpu_env, s->A0);
7644             gen_jmp_im(s, s->pc - s->cs_base);
7645             gen_eob(s);
7646             break;
7647 
7648         case 0xf8: /* swapgs */
7649 #ifdef TARGET_X86_64
7650             if (CODE64(s)) {
7651                 if (s->cpl != 0) {
7652                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7653                 } else {
7654                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7655                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7656                                   offsetof(CPUX86State, kernelgsbase));
7657                     tcg_gen_st_tl(s->T0, cpu_env,
7658                                   offsetof(CPUX86State, kernelgsbase));
7659                 }
7660                 break;
7661             }
7662 #endif
7663             goto illegal_op;
7664 
7665         case 0xf9: /* rdtscp */
7666             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7667                 goto illegal_op;
7668             }
7669             gen_update_cc_op(s);
7670             gen_jmp_im(s, pc_start - s->cs_base);
7671             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7672                 gen_io_start();
7673             }
7674             gen_helper_rdtscp(cpu_env);
7675             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7676                 gen_jmp(s, s->pc - s->cs_base);
7677             }
7678             break;
7679 
7680         default:
7681             goto unknown_op;
7682         }
7683         break;
7684 
7685     case 0x108: /* invd */
7686     case 0x109: /* wbinvd */
7687         if (s->cpl != 0) {
7688             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7689         } else {
7690             gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7691             /* nothing to do */
7692         }
7693         break;
7694     case 0x63: /* arpl or movslS (x86_64) */
7695 #ifdef TARGET_X86_64
7696         if (CODE64(s)) {
7697             int d_ot;
7698             /* d_ot is the size of destination */
7699             d_ot = dflag;
7700 
7701             modrm = x86_ldub_code(env, s);
7702             reg = ((modrm >> 3) & 7) | rex_r;
7703             mod = (modrm >> 6) & 3;
7704             rm = (modrm & 7) | REX_B(s);
7705 
7706             if (mod == 3) {
7707                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7708                 /* sign extend */
7709                 if (d_ot == MO_64) {
7710                     tcg_gen_ext32s_tl(s->T0, s->T0);
7711                 }
7712                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7713             } else {
7714                 gen_lea_modrm(env, s, modrm);
7715                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7716                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7717             }
7718         } else
7719 #endif
7720         {
7721             TCGLabel *label1;
7722             TCGv t0, t1, t2, a0;
7723 
7724             if (!s->pe || s->vm86)
7725                 goto illegal_op;
7726             t0 = tcg_temp_local_new();
7727             t1 = tcg_temp_local_new();
7728             t2 = tcg_temp_local_new();
7729             ot = MO_16;
7730             modrm = x86_ldub_code(env, s);
7731             reg = (modrm >> 3) & 7;
7732             mod = (modrm >> 6) & 3;
7733             rm = modrm & 7;
7734             if (mod != 3) {
7735                 gen_lea_modrm(env, s, modrm);
7736                 gen_op_ld_v(s, ot, t0, s->A0);
7737                 a0 = tcg_temp_local_new();
7738                 tcg_gen_mov_tl(a0, s->A0);
7739             } else {
7740                 gen_op_mov_v_reg(s, ot, t0, rm);
7741                 a0 = NULL;
7742             }
7743             gen_op_mov_v_reg(s, ot, t1, reg);
7744             tcg_gen_andi_tl(s->tmp0, t0, 3);
7745             tcg_gen_andi_tl(t1, t1, 3);
7746             tcg_gen_movi_tl(t2, 0);
7747             label1 = gen_new_label();
7748             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7749             tcg_gen_andi_tl(t0, t0, ~3);
7750             tcg_gen_or_tl(t0, t0, t1);
7751             tcg_gen_movi_tl(t2, CC_Z);
7752             gen_set_label(label1);
7753             if (mod != 3) {
7754                 gen_op_st_v(s, ot, t0, a0);
7755                 tcg_temp_free(a0);
7756            } else {
7757                 gen_op_mov_reg_v(s, ot, rm, t0);
7758             }
7759             gen_compute_eflags(s);
7760             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7761             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7762             tcg_temp_free(t0);
7763             tcg_temp_free(t1);
7764             tcg_temp_free(t2);
7765         }
7766         break;
7767     case 0x102: /* lar */
7768     case 0x103: /* lsl */
7769         {
7770             TCGLabel *label1;
7771             TCGv t0;
7772             if (!s->pe || s->vm86)
7773                 goto illegal_op;
7774             ot = dflag != MO_16 ? MO_32 : MO_16;
7775             modrm = x86_ldub_code(env, s);
7776             reg = ((modrm >> 3) & 7) | rex_r;
7777             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7778             t0 = tcg_temp_local_new();
7779             gen_update_cc_op(s);
7780             if (b == 0x102) {
7781                 gen_helper_lar(t0, cpu_env, s->T0);
7782             } else {
7783                 gen_helper_lsl(t0, cpu_env, s->T0);
7784             }
7785             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7786             label1 = gen_new_label();
7787             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7788             gen_op_mov_reg_v(s, ot, reg, t0);
7789             gen_set_label(label1);
7790             set_cc_op(s, CC_OP_EFLAGS);
7791             tcg_temp_free(t0);
7792         }
7793         break;
7794     case 0x118:
7795         modrm = x86_ldub_code(env, s);
7796         mod = (modrm >> 6) & 3;
7797         op = (modrm >> 3) & 7;
7798         switch(op) {
7799         case 0: /* prefetchnta */
7800         case 1: /* prefetchnt0 */
7801         case 2: /* prefetchnt0 */
7802         case 3: /* prefetchnt0 */
7803             if (mod == 3)
7804                 goto illegal_op;
7805             gen_nop_modrm(env, s, modrm);
7806             /* nothing more to do */
7807             break;
7808         default: /* nop (multi byte) */
7809             gen_nop_modrm(env, s, modrm);
7810             break;
7811         }
7812         break;
7813     case 0x11a:
7814         modrm = x86_ldub_code(env, s);
7815         if (s->flags & HF_MPX_EN_MASK) {
7816             mod = (modrm >> 6) & 3;
7817             reg = ((modrm >> 3) & 7) | rex_r;
7818             if (prefixes & PREFIX_REPZ) {
7819                 /* bndcl */
7820                 if (reg >= 4
7821                     || (prefixes & PREFIX_LOCK)
7822                     || s->aflag == MO_16) {
7823                     goto illegal_op;
7824                 }
7825                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7826             } else if (prefixes & PREFIX_REPNZ) {
7827                 /* bndcu */
7828                 if (reg >= 4
7829                     || (prefixes & PREFIX_LOCK)
7830                     || s->aflag == MO_16) {
7831                     goto illegal_op;
7832                 }
7833                 TCGv_i64 notu = tcg_temp_new_i64();
7834                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7835                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7836                 tcg_temp_free_i64(notu);
7837             } else if (prefixes & PREFIX_DATA) {
7838                 /* bndmov -- from reg/mem */
7839                 if (reg >= 4 || s->aflag == MO_16) {
7840                     goto illegal_op;
7841                 }
7842                 if (mod == 3) {
7843                     int reg2 = (modrm & 7) | REX_B(s);
7844                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7845                         goto illegal_op;
7846                     }
7847                     if (s->flags & HF_MPX_IU_MASK) {
7848                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7849                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7850                     }
7851                 } else {
7852                     gen_lea_modrm(env, s, modrm);
7853                     if (CODE64(s)) {
7854                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7855                                             s->mem_index, MO_LEQ);
7856                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7857                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7858                                             s->mem_index, MO_LEQ);
7859                     } else {
7860                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7861                                             s->mem_index, MO_LEUL);
7862                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7863                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7864                                             s->mem_index, MO_LEUL);
7865                     }
7866                     /* bnd registers are now in-use */
7867                     gen_set_hflag(s, HF_MPX_IU_MASK);
7868                 }
7869             } else if (mod != 3) {
7870                 /* bndldx */
7871                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7872                 if (reg >= 4
7873                     || (prefixes & PREFIX_LOCK)
7874                     || s->aflag == MO_16
7875                     || a.base < -1) {
7876                     goto illegal_op;
7877                 }
7878                 if (a.base >= 0) {
7879                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7880                 } else {
7881                     tcg_gen_movi_tl(s->A0, 0);
7882                 }
7883                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7884                 if (a.index >= 0) {
7885                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7886                 } else {
7887                     tcg_gen_movi_tl(s->T0, 0);
7888                 }
7889                 if (CODE64(s)) {
7890                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7891                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7892                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7893                 } else {
7894                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7895                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7896                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7897                 }
7898                 gen_set_hflag(s, HF_MPX_IU_MASK);
7899             }
7900         }
7901         gen_nop_modrm(env, s, modrm);
7902         break;
7903     case 0x11b:
7904         modrm = x86_ldub_code(env, s);
7905         if (s->flags & HF_MPX_EN_MASK) {
7906             mod = (modrm >> 6) & 3;
7907             reg = ((modrm >> 3) & 7) | rex_r;
7908             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7909                 /* bndmk */
7910                 if (reg >= 4
7911                     || (prefixes & PREFIX_LOCK)
7912                     || s->aflag == MO_16) {
7913                     goto illegal_op;
7914                 }
7915                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7916                 if (a.base >= 0) {
7917                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7918                     if (!CODE64(s)) {
7919                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7920                     }
7921                 } else if (a.base == -1) {
7922                     /* no base register has lower bound of 0 */
7923                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
7924                 } else {
7925                     /* rip-relative generates #ud */
7926                     goto illegal_op;
7927                 }
7928                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7929                 if (!CODE64(s)) {
7930                     tcg_gen_ext32u_tl(s->A0, s->A0);
7931                 }
7932                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7933                 /* bnd registers are now in-use */
7934                 gen_set_hflag(s, HF_MPX_IU_MASK);
7935                 break;
7936             } else if (prefixes & PREFIX_REPNZ) {
7937                 /* bndcn */
7938                 if (reg >= 4
7939                     || (prefixes & PREFIX_LOCK)
7940                     || s->aflag == MO_16) {
7941                     goto illegal_op;
7942                 }
7943                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7944             } else if (prefixes & PREFIX_DATA) {
7945                 /* bndmov -- to reg/mem */
7946                 if (reg >= 4 || s->aflag == MO_16) {
7947                     goto illegal_op;
7948                 }
7949                 if (mod == 3) {
7950                     int reg2 = (modrm & 7) | REX_B(s);
7951                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7952                         goto illegal_op;
7953                     }
7954                     if (s->flags & HF_MPX_IU_MASK) {
7955                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7956                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7957                     }
7958                 } else {
7959                     gen_lea_modrm(env, s, modrm);
7960                     if (CODE64(s)) {
7961                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7962                                             s->mem_index, MO_LEQ);
7963                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7964                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7965                                             s->mem_index, MO_LEQ);
7966                     } else {
7967                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7968                                             s->mem_index, MO_LEUL);
7969                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7970                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7971                                             s->mem_index, MO_LEUL);
7972                     }
7973                 }
7974             } else if (mod != 3) {
7975                 /* bndstx */
7976                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7977                 if (reg >= 4
7978                     || (prefixes & PREFIX_LOCK)
7979                     || s->aflag == MO_16
7980                     || a.base < -1) {
7981                     goto illegal_op;
7982                 }
7983                 if (a.base >= 0) {
7984                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7985                 } else {
7986                     tcg_gen_movi_tl(s->A0, 0);
7987                 }
7988                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7989                 if (a.index >= 0) {
7990                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7991                 } else {
7992                     tcg_gen_movi_tl(s->T0, 0);
7993                 }
7994                 if (CODE64(s)) {
7995                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
7996                                         cpu_bndl[reg], cpu_bndu[reg]);
7997                 } else {
7998                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
7999                                         cpu_bndl[reg], cpu_bndu[reg]);
8000                 }
8001             }
8002         }
8003         gen_nop_modrm(env, s, modrm);
8004         break;
8005     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8006         modrm = x86_ldub_code(env, s);
8007         gen_nop_modrm(env, s, modrm);
8008         break;
8009     case 0x120: /* mov reg, crN */
8010     case 0x122: /* mov crN, reg */
8011         if (s->cpl != 0) {
8012             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8013         } else {
8014             modrm = x86_ldub_code(env, s);
8015             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8016              * AMD documentation (24594.pdf) and testing of
8017              * intel 386 and 486 processors all show that the mod bits
8018              * are assumed to be 1's, regardless of actual values.
8019              */
8020             rm = (modrm & 7) | REX_B(s);
8021             reg = ((modrm >> 3) & 7) | rex_r;
8022             if (CODE64(s))
8023                 ot = MO_64;
8024             else
8025                 ot = MO_32;
8026             if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
8027                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8028                 reg = 8;
8029             }
8030             switch(reg) {
8031             case 0:
8032             case 2:
8033             case 3:
8034             case 4:
8035             case 8:
8036                 gen_update_cc_op(s);
8037                 gen_jmp_im(s, pc_start - s->cs_base);
8038                 if (b & 2) {
8039                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8040                         gen_io_start();
8041                     }
8042                     gen_op_mov_v_reg(s, ot, s->T0, rm);
8043                     gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
8044                                          s->T0);
8045                     gen_jmp_im(s, s->pc - s->cs_base);
8046                     gen_eob(s);
8047                 } else {
8048                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8049                         gen_io_start();
8050                     }
8051                     gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
8052                     gen_op_mov_reg_v(s, ot, rm, s->T0);
8053                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8054                         gen_jmp(s, s->pc - s->cs_base);
8055                     }
8056                 }
8057                 break;
8058             default:
8059                 goto unknown_op;
8060             }
8061         }
8062         break;
8063     case 0x121: /* mov reg, drN */
8064     case 0x123: /* mov drN, reg */
8065         if (s->cpl != 0) {
8066             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8067         } else {
8068 #ifndef CONFIG_USER_ONLY
8069             modrm = x86_ldub_code(env, s);
8070             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8071              * AMD documentation (24594.pdf) and testing of
8072              * intel 386 and 486 processors all show that the mod bits
8073              * are assumed to be 1's, regardless of actual values.
8074              */
8075             rm = (modrm & 7) | REX_B(s);
8076             reg = ((modrm >> 3) & 7) | rex_r;
8077             if (CODE64(s))
8078                 ot = MO_64;
8079             else
8080                 ot = MO_32;
8081             if (reg >= 8) {
8082                 goto illegal_op;
8083             }
8084             if (b & 2) {
8085                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
8086                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8087                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8088                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8089                 gen_jmp_im(s, s->pc - s->cs_base);
8090                 gen_eob(s);
8091             } else {
8092                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
8093                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8094                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8095                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8096             }
8097 #endif /* !CONFIG_USER_ONLY */
8098         }
8099         break;
8100     case 0x106: /* clts */
8101         if (s->cpl != 0) {
8102             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8103         } else {
8104             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
8105             gen_helper_clts(cpu_env);
8106             /* abort block because static cpu state changed */
8107             gen_jmp_im(s, s->pc - s->cs_base);
8108             gen_eob(s);
8109         }
8110         break;
8111     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8112     case 0x1c3: /* MOVNTI reg, mem */
8113         if (!(s->cpuid_features & CPUID_SSE2))
8114             goto illegal_op;
8115         ot = mo_64_32(dflag);
8116         modrm = x86_ldub_code(env, s);
8117         mod = (modrm >> 6) & 3;
8118         if (mod == 3)
8119             goto illegal_op;
8120         reg = ((modrm >> 3) & 7) | rex_r;
8121         /* generate a generic store */
8122         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8123         break;
8124     case 0x1ae:
8125         modrm = x86_ldub_code(env, s);
8126         switch (modrm) {
8127         CASE_MODRM_MEM_OP(0): /* fxsave */
8128             if (!(s->cpuid_features & CPUID_FXSR)
8129                 || (prefixes & PREFIX_LOCK)) {
8130                 goto illegal_op;
8131             }
8132             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8133                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8134                 break;
8135             }
8136             gen_lea_modrm(env, s, modrm);
8137             gen_helper_fxsave(cpu_env, s->A0);
8138             break;
8139 
8140         CASE_MODRM_MEM_OP(1): /* fxrstor */
8141             if (!(s->cpuid_features & CPUID_FXSR)
8142                 || (prefixes & PREFIX_LOCK)) {
8143                 goto illegal_op;
8144             }
8145             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8146                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8147                 break;
8148             }
8149             gen_lea_modrm(env, s, modrm);
8150             gen_helper_fxrstor(cpu_env, s->A0);
8151             break;
8152 
8153         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8154             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8155                 goto illegal_op;
8156             }
8157             if (s->flags & HF_TS_MASK) {
8158                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8159                 break;
8160             }
8161             gen_lea_modrm(env, s, modrm);
8162             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8163             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8164             break;
8165 
8166         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8167             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8168                 goto illegal_op;
8169             }
8170             if (s->flags & HF_TS_MASK) {
8171                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8172                 break;
8173             }
8174             gen_helper_update_mxcsr(cpu_env);
8175             gen_lea_modrm(env, s, modrm);
8176             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8177             gen_op_st_v(s, MO_32, s->T0, s->A0);
8178             break;
8179 
8180         CASE_MODRM_MEM_OP(4): /* xsave */
8181             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8182                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8183                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8184                 goto illegal_op;
8185             }
8186             gen_lea_modrm(env, s, modrm);
8187             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8188                                   cpu_regs[R_EDX]);
8189             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8190             break;
8191 
8192         CASE_MODRM_MEM_OP(5): /* xrstor */
8193             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8194                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8195                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8196                 goto illegal_op;
8197             }
8198             gen_lea_modrm(env, s, modrm);
8199             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8200                                   cpu_regs[R_EDX]);
8201             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8202             /* XRSTOR is how MPX is enabled, which changes how
8203                we translate.  Thus we need to end the TB.  */
8204             gen_update_cc_op(s);
8205             gen_jmp_im(s, s->pc - s->cs_base);
8206             gen_eob(s);
8207             break;
8208 
8209         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8210             if (prefixes & PREFIX_LOCK) {
8211                 goto illegal_op;
8212             }
8213             if (prefixes & PREFIX_DATA) {
8214                 /* clwb */
8215                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8216                     goto illegal_op;
8217                 }
8218                 gen_nop_modrm(env, s, modrm);
8219             } else {
8220                 /* xsaveopt */
8221                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8222                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8223                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8224                     goto illegal_op;
8225                 }
8226                 gen_lea_modrm(env, s, modrm);
8227                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8228                                       cpu_regs[R_EDX]);
8229                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8230             }
8231             break;
8232 
8233         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8234             if (prefixes & PREFIX_LOCK) {
8235                 goto illegal_op;
8236             }
8237             if (prefixes & PREFIX_DATA) {
8238                 /* clflushopt */
8239                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8240                     goto illegal_op;
8241                 }
8242             } else {
8243                 /* clflush */
8244                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8245                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8246                     goto illegal_op;
8247                 }
8248             }
8249             gen_nop_modrm(env, s, modrm);
8250             break;
8251 
8252         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8253         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8254         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8255         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8256             if (CODE64(s)
8257                 && (prefixes & PREFIX_REPZ)
8258                 && !(prefixes & PREFIX_LOCK)
8259                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8260                 TCGv base, treg, src, dst;
8261 
8262                 /* Preserve hflags bits by testing CR4 at runtime.  */
8263                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8264                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8265 
8266                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8267                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8268 
8269                 if (modrm & 0x10) {
8270                     /* wr*base */
8271                     dst = base, src = treg;
8272                 } else {
8273                     /* rd*base */
8274                     dst = treg, src = base;
8275                 }
8276 
8277                 if (s->dflag == MO_32) {
8278                     tcg_gen_ext32u_tl(dst, src);
8279                 } else {
8280                     tcg_gen_mov_tl(dst, src);
8281                 }
8282                 break;
8283             }
8284             goto unknown_op;
8285 
8286         case 0xf8: /* sfence / pcommit */
8287             if (prefixes & PREFIX_DATA) {
8288                 /* pcommit */
8289                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8290                     || (prefixes & PREFIX_LOCK)) {
8291                     goto illegal_op;
8292                 }
8293                 break;
8294             }
8295             /* fallthru */
8296         case 0xf9 ... 0xff: /* sfence */
8297             if (!(s->cpuid_features & CPUID_SSE)
8298                 || (prefixes & PREFIX_LOCK)) {
8299                 goto illegal_op;
8300             }
8301             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8302             break;
8303         case 0xe8 ... 0xef: /* lfence */
8304             if (!(s->cpuid_features & CPUID_SSE)
8305                 || (prefixes & PREFIX_LOCK)) {
8306                 goto illegal_op;
8307             }
8308             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8309             break;
8310         case 0xf0 ... 0xf7: /* mfence */
8311             if (!(s->cpuid_features & CPUID_SSE2)
8312                 || (prefixes & PREFIX_LOCK)) {
8313                 goto illegal_op;
8314             }
8315             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8316             break;
8317 
8318         default:
8319             goto unknown_op;
8320         }
8321         break;
8322 
8323     case 0x10d: /* 3DNow! prefetch(w) */
8324         modrm = x86_ldub_code(env, s);
8325         mod = (modrm >> 6) & 3;
8326         if (mod == 3)
8327             goto illegal_op;
8328         gen_nop_modrm(env, s, modrm);
8329         break;
8330     case 0x1aa: /* rsm */
8331         gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8332         if (!(s->flags & HF_SMM_MASK))
8333             goto illegal_op;
8334 #ifdef CONFIG_USER_ONLY
8335         /* we should not be in SMM mode */
8336         g_assert_not_reached();
8337 #else
8338         gen_update_cc_op(s);
8339         gen_jmp_im(s, s->pc - s->cs_base);
8340         gen_helper_rsm(cpu_env);
8341 #endif /* CONFIG_USER_ONLY */
8342         gen_eob(s);
8343         break;
8344     case 0x1b8: /* SSE4.2 popcnt */
8345         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8346              PREFIX_REPZ)
8347             goto illegal_op;
8348         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8349             goto illegal_op;
8350 
8351         modrm = x86_ldub_code(env, s);
8352         reg = ((modrm >> 3) & 7) | rex_r;
8353 
8354         if (s->prefix & PREFIX_DATA) {
8355             ot = MO_16;
8356         } else {
8357             ot = mo_64_32(dflag);
8358         }
8359 
8360         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8361         gen_extu(ot, s->T0);
8362         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8363         tcg_gen_ctpop_tl(s->T0, s->T0);
8364         gen_op_mov_reg_v(s, ot, reg, s->T0);
8365 
8366         set_cc_op(s, CC_OP_POPCNT);
8367         break;
8368     case 0x10e ... 0x10f:
8369         /* 3DNow! instructions, ignore prefixes */
8370         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8371         /* fall through */
8372     case 0x110 ... 0x117:
8373     case 0x128 ... 0x12f:
8374     case 0x138 ... 0x13a:
8375     case 0x150 ... 0x179:
8376     case 0x17c ... 0x17f:
8377     case 0x1c2:
8378     case 0x1c4 ... 0x1c6:
8379     case 0x1d0 ... 0x1fe:
8380         gen_sse(env, s, b, pc_start, rex_r);
8381         break;
8382     default:
8383         goto unknown_op;
8384     }
8385     return s->pc;
8386  illegal_op:
8387     gen_illegal_opcode(s);
8388     return s->pc;
8389  unknown_op:
8390     gen_unknown_opcode(env, s);
8391     return s->pc;
8392 }
8393 
8394 void tcg_x86_init(void)
8395 {
8396     static const char reg_names[CPU_NB_REGS][4] = {
8397 #ifdef TARGET_X86_64
8398         [R_EAX] = "rax",
8399         [R_EBX] = "rbx",
8400         [R_ECX] = "rcx",
8401         [R_EDX] = "rdx",
8402         [R_ESI] = "rsi",
8403         [R_EDI] = "rdi",
8404         [R_EBP] = "rbp",
8405         [R_ESP] = "rsp",
8406         [8]  = "r8",
8407         [9]  = "r9",
8408         [10] = "r10",
8409         [11] = "r11",
8410         [12] = "r12",
8411         [13] = "r13",
8412         [14] = "r14",
8413         [15] = "r15",
8414 #else
8415         [R_EAX] = "eax",
8416         [R_EBX] = "ebx",
8417         [R_ECX] = "ecx",
8418         [R_EDX] = "edx",
8419         [R_ESI] = "esi",
8420         [R_EDI] = "edi",
8421         [R_EBP] = "ebp",
8422         [R_ESP] = "esp",
8423 #endif
8424     };
8425     static const char seg_base_names[6][8] = {
8426         [R_CS] = "cs_base",
8427         [R_DS] = "ds_base",
8428         [R_ES] = "es_base",
8429         [R_FS] = "fs_base",
8430         [R_GS] = "gs_base",
8431         [R_SS] = "ss_base",
8432     };
8433     static const char bnd_regl_names[4][8] = {
8434         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8435     };
8436     static const char bnd_regu_names[4][8] = {
8437         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8438     };
8439     int i;
8440 
8441     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8442                                        offsetof(CPUX86State, cc_op), "cc_op");
8443     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8444                                     "cc_dst");
8445     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8446                                     "cc_src");
8447     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8448                                      "cc_src2");
8449 
8450     for (i = 0; i < CPU_NB_REGS; ++i) {
8451         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8452                                          offsetof(CPUX86State, regs[i]),
8453                                          reg_names[i]);
8454     }
8455 
8456     for (i = 0; i < 6; ++i) {
8457         cpu_seg_base[i]
8458             = tcg_global_mem_new(cpu_env,
8459                                  offsetof(CPUX86State, segs[i].base),
8460                                  seg_base_names[i]);
8461     }
8462 
8463     for (i = 0; i < 4; ++i) {
8464         cpu_bndl[i]
8465             = tcg_global_mem_new_i64(cpu_env,
8466                                      offsetof(CPUX86State, bnd_regs[i].lb),
8467                                      bnd_regl_names[i]);
8468         cpu_bndu[i]
8469             = tcg_global_mem_new_i64(cpu_env,
8470                                      offsetof(CPUX86State, bnd_regs[i].ub),
8471                                      bnd_regu_names[i]);
8472     }
8473 }
8474 
8475 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8476 {
8477     DisasContext *dc = container_of(dcbase, DisasContext, base);
8478     CPUX86State *env = cpu->env_ptr;
8479     uint32_t flags = dc->base.tb->flags;
8480     target_ulong cs_base = dc->base.tb->cs_base;
8481 
8482     dc->pe = (flags >> HF_PE_SHIFT) & 1;
8483     dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8484     dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8485     dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8486     dc->f_st = 0;
8487     dc->vm86 = (flags >> VM_SHIFT) & 1;
8488     dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8489     dc->iopl = (flags >> IOPL_SHIFT) & 3;
8490     dc->tf = (flags >> TF_SHIFT) & 1;
8491     dc->cc_op = CC_OP_DYNAMIC;
8492     dc->cc_op_dirty = false;
8493     dc->cs_base = cs_base;
8494     dc->popl_esp_hack = 0;
8495     /* select memory access functions */
8496     dc->mem_index = 0;
8497 #ifdef CONFIG_SOFTMMU
8498     dc->mem_index = cpu_mmu_index(env, false);
8499 #endif
8500     dc->cpuid_features = env->features[FEAT_1_EDX];
8501     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8502     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8503     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8504     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8505     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8506 #ifdef TARGET_X86_64
8507     dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8508     dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8509 #endif
8510     dc->flags = flags;
8511     dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled ||
8512                     (flags & HF_INHIBIT_IRQ_MASK));
8513     /* Do not optimize repz jumps at all in icount mode, because
8514        rep movsS instructions are execured with different paths
8515        in !repz_opt and repz_opt modes. The first one was used
8516        always except single step mode. And this setting
8517        disables jumps optimization and control paths become
8518        equivalent in run and single step modes.
8519        Now there will be no jump optimization for repz in
8520        record/replay modes and there will always be an
8521        additional step for ecx=0 when icount is enabled.
8522      */
8523     dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8524 #if 0
8525     /* check addseg logic */
8526     if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8527         printf("ERROR addseg\n");
8528 #endif
8529 
8530     dc->T0 = tcg_temp_new();
8531     dc->T1 = tcg_temp_new();
8532     dc->A0 = tcg_temp_new();
8533 
8534     dc->tmp0 = tcg_temp_new();
8535     dc->tmp1_i64 = tcg_temp_new_i64();
8536     dc->tmp2_i32 = tcg_temp_new_i32();
8537     dc->tmp3_i32 = tcg_temp_new_i32();
8538     dc->tmp4 = tcg_temp_new();
8539     dc->ptr0 = tcg_temp_new_ptr();
8540     dc->ptr1 = tcg_temp_new_ptr();
8541     dc->cc_srcT = tcg_temp_local_new();
8542 }
8543 
8544 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8545 {
8546 }
8547 
8548 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8549 {
8550     DisasContext *dc = container_of(dcbase, DisasContext, base);
8551 
8552     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8553 }
8554 
8555 static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8556                                      const CPUBreakpoint *bp)
8557 {
8558     DisasContext *dc = container_of(dcbase, DisasContext, base);
8559     /* If RF is set, suppress an internally generated breakpoint.  */
8560     int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8561     if (bp->flags & flags) {
8562         gen_debug(dc, dc->base.pc_next - dc->cs_base);
8563         dc->base.is_jmp = DISAS_NORETURN;
8564         /* The address covered by the breakpoint must be included in
8565            [tb->pc, tb->pc + tb->size) in order to for it to be
8566            properly cleared -- thus we increment the PC here so that
8567            the generic logic setting tb->size later does the right thing.  */
8568         dc->base.pc_next += 1;
8569         return true;
8570     } else {
8571         return false;
8572     }
8573 }
8574 
8575 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8576 {
8577     DisasContext *dc = container_of(dcbase, DisasContext, base);
8578     target_ulong pc_next;
8579 
8580 #ifdef TARGET_VSYSCALL_PAGE
8581     /*
8582      * Detect entry into the vsyscall page and invoke the syscall.
8583      */
8584     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8585         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8586         return;
8587     }
8588 #endif
8589 
8590     pc_next = disas_insn(dc, cpu);
8591 
8592     if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
8593         /* if single step mode, we generate only one instruction and
8594            generate an exception */
8595         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8596            the flag and abort the translation to give the irqs a
8597            chance to happen */
8598         dc->base.is_jmp = DISAS_TOO_MANY;
8599     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8600                && ((pc_next & TARGET_PAGE_MASK)
8601                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8602                        & TARGET_PAGE_MASK)
8603                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8604         /* Do not cross the boundary of the pages in icount mode,
8605            it can cause an exception. Do it only when boundary is
8606            crossed by the first instruction in the block.
8607            If current instruction already crossed the bound - it's ok,
8608            because an exception hasn't stopped this code.
8609          */
8610         dc->base.is_jmp = DISAS_TOO_MANY;
8611     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8612         dc->base.is_jmp = DISAS_TOO_MANY;
8613     }
8614 
8615     dc->base.pc_next = pc_next;
8616 }
8617 
8618 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8619 {
8620     DisasContext *dc = container_of(dcbase, DisasContext, base);
8621 
8622     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8623         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8624         gen_eob(dc);
8625     }
8626 }
8627 
8628 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8629                               CPUState *cpu)
8630 {
8631     DisasContext *dc = container_of(dcbase, DisasContext, base);
8632 
8633     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8634     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8635 }
8636 
8637 static const TranslatorOps i386_tr_ops = {
8638     .init_disas_context = i386_tr_init_disas_context,
8639     .tb_start           = i386_tr_tb_start,
8640     .insn_start         = i386_tr_insn_start,
8641     .breakpoint_check   = i386_tr_breakpoint_check,
8642     .translate_insn     = i386_tr_translate_insn,
8643     .tb_stop            = i386_tr_tb_stop,
8644     .disas_log          = i386_tr_disas_log,
8645 };
8646 
8647 /* generate intermediate code for basic block 'tb'.  */
8648 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8649 {
8650     DisasContext dc;
8651 
8652     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8653 }
8654 
8655 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8656                           target_ulong *data)
8657 {
8658     int cc_op = data[1];
8659     env->eip = data[0] - tb->cs_base;
8660     if (cc_op != CC_OP_DYNAMIC) {
8661         env->cc_op = cc_op;
8662     }
8663 }
8664