xref: /openbmc/qemu/target/i386/tcg/translate.c (revision 19f4ed36)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "trace-tcg.h"
34 #include "exec/log.h"
35 
36 #define PREFIX_REPZ   0x01
37 #define PREFIX_REPNZ  0x02
38 #define PREFIX_LOCK   0x04
39 #define PREFIX_DATA   0x08
40 #define PREFIX_ADR    0x10
41 #define PREFIX_VEX    0x20
42 
43 #ifdef TARGET_X86_64
44 #define CODE64(s) ((s)->code64)
45 #define REX_X(s) ((s)->rex_x)
46 #define REX_B(s) ((s)->rex_b)
47 #else
48 #define CODE64(s) 0
49 #define REX_X(s) 0
50 #define REX_B(s) 0
51 #endif
52 
53 #ifdef TARGET_X86_64
54 # define ctztl  ctz64
55 # define clztl  clz64
56 #else
57 # define ctztl  ctz32
58 # define clztl  clz32
59 #endif
60 
61 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
62 #define CASE_MODRM_MEM_OP(OP) \
63     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
64     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
65     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
66 
67 #define CASE_MODRM_OP(OP) \
68     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
69     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
70     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
71     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
72 
73 //#define MACRO_TEST   1
74 
75 /* global register indexes */
76 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
77 static TCGv_i32 cpu_cc_op;
78 static TCGv cpu_regs[CPU_NB_REGS];
79 static TCGv cpu_seg_base[6];
80 static TCGv_i64 cpu_bndl[4];
81 static TCGv_i64 cpu_bndu[4];
82 
83 #include "exec/gen-icount.h"
84 
85 typedef struct DisasContext {
86     DisasContextBase base;
87 
88     /* current insn context */
89     int override; /* -1 if no override */
90     int prefix;
91     MemOp aflag;
92     MemOp dflag;
93     target_ulong pc_start;
94     target_ulong pc; /* pc = eip + cs_base */
95     /* current block context */
96     target_ulong cs_base; /* base of CS segment */
97     int pe;     /* protected mode */
98     int code32; /* 32 bit code segment */
99 #ifdef TARGET_X86_64
100     int lma;    /* long mode active */
101     int code64; /* 64 bit code segment */
102     int rex_x, rex_b;
103 #endif
104     int vex_l;  /* vex vector length */
105     int vex_v;  /* vex vvvv register, without 1's complement.  */
106     int ss32;   /* 32 bit stack segment */
107     CCOp cc_op;  /* current CC operation */
108     bool cc_op_dirty;
109 #ifdef TARGET_X86_64
110     bool x86_64_hregs;
111 #endif
112     int addseg; /* non zero if either DS/ES/SS have a non zero base */
113     int f_st;   /* currently unused */
114     int vm86;   /* vm86 mode */
115     int cpl;
116     int iopl;
117     int tf;     /* TF cpu flag */
118     int jmp_opt; /* use direct block chaining for direct jumps */
119     int repz_opt; /* optimize jumps within repz instructions */
120     int mem_index; /* select memory access functions */
121     uint64_t flags; /* all execution flags */
122     int popl_esp_hack; /* for correct popl with esp base handling */
123     int rip_offset; /* only used in x86_64, but left for simplicity */
124     int cpuid_features;
125     int cpuid_ext_features;
126     int cpuid_ext2_features;
127     int cpuid_ext3_features;
128     int cpuid_7_0_ebx_features;
129     int cpuid_xsave_features;
130 
131     /* TCG local temps */
132     TCGv cc_srcT;
133     TCGv A0;
134     TCGv T0;
135     TCGv T1;
136 
137     /* TCG local register indexes (only used inside old micro ops) */
138     TCGv tmp0;
139     TCGv tmp4;
140     TCGv_ptr ptr0;
141     TCGv_ptr ptr1;
142     TCGv_i32 tmp2_i32;
143     TCGv_i32 tmp3_i32;
144     TCGv_i64 tmp1_i64;
145 
146     sigjmp_buf jmpbuf;
147 } DisasContext;
148 
149 static void gen_eob(DisasContext *s);
150 static void gen_jr(DisasContext *s, TCGv dest);
151 static void gen_jmp(DisasContext *s, target_ulong eip);
152 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
153 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
154 
155 /* i386 arith/logic operations */
156 enum {
157     OP_ADDL,
158     OP_ORL,
159     OP_ADCL,
160     OP_SBBL,
161     OP_ANDL,
162     OP_SUBL,
163     OP_XORL,
164     OP_CMPL,
165 };
166 
167 /* i386 shift ops */
168 enum {
169     OP_ROL,
170     OP_ROR,
171     OP_RCL,
172     OP_RCR,
173     OP_SHL,
174     OP_SHR,
175     OP_SHL1, /* undocumented */
176     OP_SAR = 7,
177 };
178 
179 enum {
180     JCC_O,
181     JCC_B,
182     JCC_Z,
183     JCC_BE,
184     JCC_S,
185     JCC_P,
186     JCC_L,
187     JCC_LE,
188 };
189 
190 enum {
191     /* I386 int registers */
192     OR_EAX,   /* MUST be even numbered */
193     OR_ECX,
194     OR_EDX,
195     OR_EBX,
196     OR_ESP,
197     OR_EBP,
198     OR_ESI,
199     OR_EDI,
200 
201     OR_TMP0 = 16,    /* temporary operand register */
202     OR_TMP1,
203     OR_A0, /* temporary register used when doing address evaluation */
204 };
205 
206 enum {
207     USES_CC_DST  = 1,
208     USES_CC_SRC  = 2,
209     USES_CC_SRC2 = 4,
210     USES_CC_SRCT = 8,
211 };
212 
213 /* Bit set if the global variable is live after setting CC_OP to X.  */
214 static const uint8_t cc_op_live[CC_OP_NB] = {
215     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
216     [CC_OP_EFLAGS] = USES_CC_SRC,
217     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
218     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
219     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
220     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
221     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
222     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
223     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
224     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
225     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
226     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
227     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
228     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
229     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
230     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
231     [CC_OP_CLR] = 0,
232     [CC_OP_POPCNT] = USES_CC_SRC,
233 };
234 
235 static void set_cc_op(DisasContext *s, CCOp op)
236 {
237     int dead;
238 
239     if (s->cc_op == op) {
240         return;
241     }
242 
243     /* Discard CC computation that will no longer be used.  */
244     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
245     if (dead & USES_CC_DST) {
246         tcg_gen_discard_tl(cpu_cc_dst);
247     }
248     if (dead & USES_CC_SRC) {
249         tcg_gen_discard_tl(cpu_cc_src);
250     }
251     if (dead & USES_CC_SRC2) {
252         tcg_gen_discard_tl(cpu_cc_src2);
253     }
254     if (dead & USES_CC_SRCT) {
255         tcg_gen_discard_tl(s->cc_srcT);
256     }
257 
258     if (op == CC_OP_DYNAMIC) {
259         /* The DYNAMIC setting is translator only, and should never be
260            stored.  Thus we always consider it clean.  */
261         s->cc_op_dirty = false;
262     } else {
263         /* Discard any computed CC_OP value (see shifts).  */
264         if (s->cc_op == CC_OP_DYNAMIC) {
265             tcg_gen_discard_i32(cpu_cc_op);
266         }
267         s->cc_op_dirty = true;
268     }
269     s->cc_op = op;
270 }
271 
272 static void gen_update_cc_op(DisasContext *s)
273 {
274     if (s->cc_op_dirty) {
275         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
276         s->cc_op_dirty = false;
277     }
278 }
279 
280 #ifdef TARGET_X86_64
281 
282 #define NB_OP_SIZES 4
283 
284 #else /* !TARGET_X86_64 */
285 
286 #define NB_OP_SIZES 3
287 
288 #endif /* !TARGET_X86_64 */
289 
290 #if defined(HOST_WORDS_BIGENDIAN)
291 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
292 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
293 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
294 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
295 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
296 #else
297 #define REG_B_OFFSET 0
298 #define REG_H_OFFSET 1
299 #define REG_W_OFFSET 0
300 #define REG_L_OFFSET 0
301 #define REG_LH_OFFSET 4
302 #endif
303 
304 /* In instruction encodings for byte register accesses the
305  * register number usually indicates "low 8 bits of register N";
306  * however there are some special cases where N 4..7 indicates
307  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
308  * true for this special case, false otherwise.
309  */
310 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
311 {
312     if (reg < 4) {
313         return false;
314     }
315 #ifdef TARGET_X86_64
316     if (reg >= 8 || s->x86_64_hregs) {
317         return false;
318     }
319 #endif
320     return true;
321 }
322 
323 /* Select the size of a push/pop operation.  */
324 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
325 {
326     if (CODE64(s)) {
327         return ot == MO_16 ? MO_16 : MO_64;
328     } else {
329         return ot;
330     }
331 }
332 
333 /* Select the size of the stack pointer.  */
334 static inline MemOp mo_stacksize(DisasContext *s)
335 {
336     return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
337 }
338 
339 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
340 static inline MemOp mo_64_32(MemOp ot)
341 {
342 #ifdef TARGET_X86_64
343     return ot == MO_64 ? MO_64 : MO_32;
344 #else
345     return MO_32;
346 #endif
347 }
348 
349 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
350    byte vs word opcodes.  */
351 static inline MemOp mo_b_d(int b, MemOp ot)
352 {
353     return b & 1 ? ot : MO_8;
354 }
355 
356 /* Select size 8 if lsb of B is clear, else OT capped at 32.
357    Used for decoding operand size of port opcodes.  */
358 static inline MemOp mo_b_d32(int b, MemOp ot)
359 {
360     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
361 }
362 
363 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
364 {
365     switch(ot) {
366     case MO_8:
367         if (!byte_reg_is_xH(s, reg)) {
368             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
369         } else {
370             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
371         }
372         break;
373     case MO_16:
374         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
375         break;
376     case MO_32:
377         /* For x86_64, this sets the higher half of register to zero.
378            For i386, this is equivalent to a mov. */
379         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
380         break;
381 #ifdef TARGET_X86_64
382     case MO_64:
383         tcg_gen_mov_tl(cpu_regs[reg], t0);
384         break;
385 #endif
386     default:
387         tcg_abort();
388     }
389 }
390 
391 static inline
392 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
393 {
394     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
395         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
396     } else {
397         tcg_gen_mov_tl(t0, cpu_regs[reg]);
398     }
399 }
400 
401 static void gen_add_A0_im(DisasContext *s, int val)
402 {
403     tcg_gen_addi_tl(s->A0, s->A0, val);
404     if (!CODE64(s)) {
405         tcg_gen_ext32u_tl(s->A0, s->A0);
406     }
407 }
408 
409 static inline void gen_op_jmp_v(TCGv dest)
410 {
411     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
412 }
413 
414 static inline
415 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
416 {
417     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
418     gen_op_mov_reg_v(s, size, reg, s->tmp0);
419 }
420 
421 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
422 {
423     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
424     gen_op_mov_reg_v(s, size, reg, s->tmp0);
425 }
426 
427 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
428 {
429     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
430 }
431 
432 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
433 {
434     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
435 }
436 
437 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
438 {
439     if (d == OR_TMP0) {
440         gen_op_st_v(s, idx, s->T0, s->A0);
441     } else {
442         gen_op_mov_reg_v(s, idx, d, s->T0);
443     }
444 }
445 
446 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
447 {
448     tcg_gen_movi_tl(s->tmp0, pc);
449     gen_op_jmp_v(s->tmp0);
450 }
451 
452 /* Compute SEG:REG into A0.  SEG is selected from the override segment
453    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
454    indicate no override.  */
455 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
456                           int def_seg, int ovr_seg)
457 {
458     switch (aflag) {
459 #ifdef TARGET_X86_64
460     case MO_64:
461         if (ovr_seg < 0) {
462             tcg_gen_mov_tl(s->A0, a0);
463             return;
464         }
465         break;
466 #endif
467     case MO_32:
468         /* 32 bit address */
469         if (ovr_seg < 0 && s->addseg) {
470             ovr_seg = def_seg;
471         }
472         if (ovr_seg < 0) {
473             tcg_gen_ext32u_tl(s->A0, a0);
474             return;
475         }
476         break;
477     case MO_16:
478         /* 16 bit address */
479         tcg_gen_ext16u_tl(s->A0, a0);
480         a0 = s->A0;
481         if (ovr_seg < 0) {
482             if (s->addseg) {
483                 ovr_seg = def_seg;
484             } else {
485                 return;
486             }
487         }
488         break;
489     default:
490         tcg_abort();
491     }
492 
493     if (ovr_seg >= 0) {
494         TCGv seg = cpu_seg_base[ovr_seg];
495 
496         if (aflag == MO_64) {
497             tcg_gen_add_tl(s->A0, a0, seg);
498         } else if (CODE64(s)) {
499             tcg_gen_ext32u_tl(s->A0, a0);
500             tcg_gen_add_tl(s->A0, s->A0, seg);
501         } else {
502             tcg_gen_add_tl(s->A0, a0, seg);
503             tcg_gen_ext32u_tl(s->A0, s->A0);
504         }
505     }
506 }
507 
508 static inline void gen_string_movl_A0_ESI(DisasContext *s)
509 {
510     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
511 }
512 
513 static inline void gen_string_movl_A0_EDI(DisasContext *s)
514 {
515     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
516 }
517 
518 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
519 {
520     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
521     tcg_gen_shli_tl(s->T0, s->T0, ot);
522 };
523 
524 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
525 {
526     switch (size) {
527     case MO_8:
528         if (sign) {
529             tcg_gen_ext8s_tl(dst, src);
530         } else {
531             tcg_gen_ext8u_tl(dst, src);
532         }
533         return dst;
534     case MO_16:
535         if (sign) {
536             tcg_gen_ext16s_tl(dst, src);
537         } else {
538             tcg_gen_ext16u_tl(dst, src);
539         }
540         return dst;
541 #ifdef TARGET_X86_64
542     case MO_32:
543         if (sign) {
544             tcg_gen_ext32s_tl(dst, src);
545         } else {
546             tcg_gen_ext32u_tl(dst, src);
547         }
548         return dst;
549 #endif
550     default:
551         return src;
552     }
553 }
554 
555 static void gen_extu(MemOp ot, TCGv reg)
556 {
557     gen_ext_tl(reg, reg, ot, false);
558 }
559 
560 static void gen_exts(MemOp ot, TCGv reg)
561 {
562     gen_ext_tl(reg, reg, ot, true);
563 }
564 
565 static inline
566 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
567 {
568     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
569     gen_extu(size, s->tmp0);
570     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
571 }
572 
573 static inline
574 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
575 {
576     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
577     gen_extu(size, s->tmp0);
578     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
579 }
580 
581 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
582 {
583     switch (ot) {
584     case MO_8:
585         gen_helper_inb(v, cpu_env, n);
586         break;
587     case MO_16:
588         gen_helper_inw(v, cpu_env, n);
589         break;
590     case MO_32:
591         gen_helper_inl(v, cpu_env, n);
592         break;
593     default:
594         tcg_abort();
595     }
596 }
597 
598 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
599 {
600     switch (ot) {
601     case MO_8:
602         gen_helper_outb(cpu_env, v, n);
603         break;
604     case MO_16:
605         gen_helper_outw(cpu_env, v, n);
606         break;
607     case MO_32:
608         gen_helper_outl(cpu_env, v, n);
609         break;
610     default:
611         tcg_abort();
612     }
613 }
614 
615 static void gen_check_io(DisasContext *s, MemOp ot, target_ulong cur_eip,
616                          uint32_t svm_flags)
617 {
618     target_ulong next_eip;
619 
620     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
621         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
622         switch (ot) {
623         case MO_8:
624             gen_helper_check_iob(cpu_env, s->tmp2_i32);
625             break;
626         case MO_16:
627             gen_helper_check_iow(cpu_env, s->tmp2_i32);
628             break;
629         case MO_32:
630             gen_helper_check_iol(cpu_env, s->tmp2_i32);
631             break;
632         default:
633             tcg_abort();
634         }
635     }
636     if(s->flags & HF_GUEST_MASK) {
637         gen_update_cc_op(s);
638         gen_jmp_im(s, cur_eip);
639         svm_flags |= (1 << (4 + ot));
640         next_eip = s->pc - s->cs_base;
641         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
642         gen_helper_svm_check_io(cpu_env, s->tmp2_i32,
643                                 tcg_const_i32(svm_flags),
644                                 tcg_const_i32(next_eip - cur_eip));
645     }
646 }
647 
648 static inline void gen_movs(DisasContext *s, MemOp ot)
649 {
650     gen_string_movl_A0_ESI(s);
651     gen_op_ld_v(s, ot, s->T0, s->A0);
652     gen_string_movl_A0_EDI(s);
653     gen_op_st_v(s, ot, s->T0, s->A0);
654     gen_op_movl_T0_Dshift(s, ot);
655     gen_op_add_reg_T0(s, s->aflag, R_ESI);
656     gen_op_add_reg_T0(s, s->aflag, R_EDI);
657 }
658 
659 static void gen_op_update1_cc(DisasContext *s)
660 {
661     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
662 }
663 
664 static void gen_op_update2_cc(DisasContext *s)
665 {
666     tcg_gen_mov_tl(cpu_cc_src, s->T1);
667     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
668 }
669 
670 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
671 {
672     tcg_gen_mov_tl(cpu_cc_src2, reg);
673     tcg_gen_mov_tl(cpu_cc_src, s->T1);
674     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
675 }
676 
677 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
678 {
679     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
680 }
681 
682 static void gen_op_update_neg_cc(DisasContext *s)
683 {
684     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
685     tcg_gen_neg_tl(cpu_cc_src, s->T0);
686     tcg_gen_movi_tl(s->cc_srcT, 0);
687 }
688 
689 /* compute all eflags to cc_src */
690 static void gen_compute_eflags(DisasContext *s)
691 {
692     TCGv zero, dst, src1, src2;
693     int live, dead;
694 
695     if (s->cc_op == CC_OP_EFLAGS) {
696         return;
697     }
698     if (s->cc_op == CC_OP_CLR) {
699         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
700         set_cc_op(s, CC_OP_EFLAGS);
701         return;
702     }
703 
704     zero = NULL;
705     dst = cpu_cc_dst;
706     src1 = cpu_cc_src;
707     src2 = cpu_cc_src2;
708 
709     /* Take care to not read values that are not live.  */
710     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
711     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
712     if (dead) {
713         zero = tcg_const_tl(0);
714         if (dead & USES_CC_DST) {
715             dst = zero;
716         }
717         if (dead & USES_CC_SRC) {
718             src1 = zero;
719         }
720         if (dead & USES_CC_SRC2) {
721             src2 = zero;
722         }
723     }
724 
725     gen_update_cc_op(s);
726     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
727     set_cc_op(s, CC_OP_EFLAGS);
728 
729     if (dead) {
730         tcg_temp_free(zero);
731     }
732 }
733 
734 typedef struct CCPrepare {
735     TCGCond cond;
736     TCGv reg;
737     TCGv reg2;
738     target_ulong imm;
739     target_ulong mask;
740     bool use_reg2;
741     bool no_setcond;
742 } CCPrepare;
743 
744 /* compute eflags.C to reg */
745 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
746 {
747     TCGv t0, t1;
748     int size, shift;
749 
750     switch (s->cc_op) {
751     case CC_OP_SUBB ... CC_OP_SUBQ:
752         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
753         size = s->cc_op - CC_OP_SUBB;
754         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
755         /* If no temporary was used, be careful not to alias t1 and t0.  */
756         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
757         tcg_gen_mov_tl(t0, s->cc_srcT);
758         gen_extu(size, t0);
759         goto add_sub;
760 
761     case CC_OP_ADDB ... CC_OP_ADDQ:
762         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
763         size = s->cc_op - CC_OP_ADDB;
764         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
765         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
766     add_sub:
767         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
768                              .reg2 = t1, .mask = -1, .use_reg2 = true };
769 
770     case CC_OP_LOGICB ... CC_OP_LOGICQ:
771     case CC_OP_CLR:
772     case CC_OP_POPCNT:
773         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
774 
775     case CC_OP_INCB ... CC_OP_INCQ:
776     case CC_OP_DECB ... CC_OP_DECQ:
777         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
778                              .mask = -1, .no_setcond = true };
779 
780     case CC_OP_SHLB ... CC_OP_SHLQ:
781         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
782         size = s->cc_op - CC_OP_SHLB;
783         shift = (8 << size) - 1;
784         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
785                              .mask = (target_ulong)1 << shift };
786 
787     case CC_OP_MULB ... CC_OP_MULQ:
788         return (CCPrepare) { .cond = TCG_COND_NE,
789                              .reg = cpu_cc_src, .mask = -1 };
790 
791     case CC_OP_BMILGB ... CC_OP_BMILGQ:
792         size = s->cc_op - CC_OP_BMILGB;
793         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
794         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
795 
796     case CC_OP_ADCX:
797     case CC_OP_ADCOX:
798         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
799                              .mask = -1, .no_setcond = true };
800 
801     case CC_OP_EFLAGS:
802     case CC_OP_SARB ... CC_OP_SARQ:
803         /* CC_SRC & 1 */
804         return (CCPrepare) { .cond = TCG_COND_NE,
805                              .reg = cpu_cc_src, .mask = CC_C };
806 
807     default:
808        /* The need to compute only C from CC_OP_DYNAMIC is important
809           in efficiently implementing e.g. INC at the start of a TB.  */
810        gen_update_cc_op(s);
811        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
812                                cpu_cc_src2, cpu_cc_op);
813        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
814                             .mask = -1, .no_setcond = true };
815     }
816 }
817 
818 /* compute eflags.P to reg */
819 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
820 {
821     gen_compute_eflags(s);
822     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
823                          .mask = CC_P };
824 }
825 
826 /* compute eflags.S to reg */
827 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
828 {
829     switch (s->cc_op) {
830     case CC_OP_DYNAMIC:
831         gen_compute_eflags(s);
832         /* FALLTHRU */
833     case CC_OP_EFLAGS:
834     case CC_OP_ADCX:
835     case CC_OP_ADOX:
836     case CC_OP_ADCOX:
837         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
838                              .mask = CC_S };
839     case CC_OP_CLR:
840     case CC_OP_POPCNT:
841         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
842     default:
843         {
844             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
845             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
846             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
847         }
848     }
849 }
850 
851 /* compute eflags.O to reg */
852 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
853 {
854     switch (s->cc_op) {
855     case CC_OP_ADOX:
856     case CC_OP_ADCOX:
857         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
858                              .mask = -1, .no_setcond = true };
859     case CC_OP_CLR:
860     case CC_OP_POPCNT:
861         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
862     default:
863         gen_compute_eflags(s);
864         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
865                              .mask = CC_O };
866     }
867 }
868 
869 /* compute eflags.Z to reg */
870 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
871 {
872     switch (s->cc_op) {
873     case CC_OP_DYNAMIC:
874         gen_compute_eflags(s);
875         /* FALLTHRU */
876     case CC_OP_EFLAGS:
877     case CC_OP_ADCX:
878     case CC_OP_ADOX:
879     case CC_OP_ADCOX:
880         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
881                              .mask = CC_Z };
882     case CC_OP_CLR:
883         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
884     case CC_OP_POPCNT:
885         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
886                              .mask = -1 };
887     default:
888         {
889             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
890             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
891             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
892         }
893     }
894 }
895 
896 /* perform a conditional store into register 'reg' according to jump opcode
897    value 'b'. In the fast case, T0 is guaranted not to be used. */
898 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
899 {
900     int inv, jcc_op, cond;
901     MemOp size;
902     CCPrepare cc;
903     TCGv t0;
904 
905     inv = b & 1;
906     jcc_op = (b >> 1) & 7;
907 
908     switch (s->cc_op) {
909     case CC_OP_SUBB ... CC_OP_SUBQ:
910         /* We optimize relational operators for the cmp/jcc case.  */
911         size = s->cc_op - CC_OP_SUBB;
912         switch (jcc_op) {
913         case JCC_BE:
914             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
915             gen_extu(size, s->tmp4);
916             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
917             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
918                                .reg2 = t0, .mask = -1, .use_reg2 = true };
919             break;
920 
921         case JCC_L:
922             cond = TCG_COND_LT;
923             goto fast_jcc_l;
924         case JCC_LE:
925             cond = TCG_COND_LE;
926         fast_jcc_l:
927             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
928             gen_exts(size, s->tmp4);
929             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
930             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
931                                .reg2 = t0, .mask = -1, .use_reg2 = true };
932             break;
933 
934         default:
935             goto slow_jcc;
936         }
937         break;
938 
939     default:
940     slow_jcc:
941         /* This actually generates good code for JC, JZ and JS.  */
942         switch (jcc_op) {
943         case JCC_O:
944             cc = gen_prepare_eflags_o(s, reg);
945             break;
946         case JCC_B:
947             cc = gen_prepare_eflags_c(s, reg);
948             break;
949         case JCC_Z:
950             cc = gen_prepare_eflags_z(s, reg);
951             break;
952         case JCC_BE:
953             gen_compute_eflags(s);
954             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
955                                .mask = CC_Z | CC_C };
956             break;
957         case JCC_S:
958             cc = gen_prepare_eflags_s(s, reg);
959             break;
960         case JCC_P:
961             cc = gen_prepare_eflags_p(s, reg);
962             break;
963         case JCC_L:
964             gen_compute_eflags(s);
965             if (reg == cpu_cc_src) {
966                 reg = s->tmp0;
967             }
968             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
969             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
970             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
971                                .mask = CC_S };
972             break;
973         default:
974         case JCC_LE:
975             gen_compute_eflags(s);
976             if (reg == cpu_cc_src) {
977                 reg = s->tmp0;
978             }
979             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
980             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
981             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
982                                .mask = CC_S | CC_Z };
983             break;
984         }
985         break;
986     }
987 
988     if (inv) {
989         cc.cond = tcg_invert_cond(cc.cond);
990     }
991     return cc;
992 }
993 
994 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
995 {
996     CCPrepare cc = gen_prepare_cc(s, b, reg);
997 
998     if (cc.no_setcond) {
999         if (cc.cond == TCG_COND_EQ) {
1000             tcg_gen_xori_tl(reg, cc.reg, 1);
1001         } else {
1002             tcg_gen_mov_tl(reg, cc.reg);
1003         }
1004         return;
1005     }
1006 
1007     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1008         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1009         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1010         tcg_gen_andi_tl(reg, reg, 1);
1011         return;
1012     }
1013     if (cc.mask != -1) {
1014         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1015         cc.reg = reg;
1016     }
1017     if (cc.use_reg2) {
1018         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1019     } else {
1020         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1021     }
1022 }
1023 
1024 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1025 {
1026     gen_setcc1(s, JCC_B << 1, reg);
1027 }
1028 
1029 /* generate a conditional jump to label 'l1' according to jump opcode
1030    value 'b'. In the fast case, T0 is guaranted not to be used. */
1031 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1032 {
1033     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1034 
1035     if (cc.mask != -1) {
1036         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1037         cc.reg = s->T0;
1038     }
1039     if (cc.use_reg2) {
1040         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1041     } else {
1042         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1043     }
1044 }
1045 
1046 /* Generate a conditional jump to label 'l1' according to jump opcode
1047    value 'b'. In the fast case, T0 is guaranted not to be used.
1048    A translation block must end soon.  */
1049 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1050 {
1051     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1052 
1053     gen_update_cc_op(s);
1054     if (cc.mask != -1) {
1055         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1056         cc.reg = s->T0;
1057     }
1058     set_cc_op(s, CC_OP_DYNAMIC);
1059     if (cc.use_reg2) {
1060         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1061     } else {
1062         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1063     }
1064 }
1065 
1066 /* XXX: does not work with gdbstub "ice" single step - not a
1067    serious problem */
1068 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1069 {
1070     TCGLabel *l1 = gen_new_label();
1071     TCGLabel *l2 = gen_new_label();
1072     gen_op_jnz_ecx(s, s->aflag, l1);
1073     gen_set_label(l2);
1074     gen_jmp_tb(s, next_eip, 1);
1075     gen_set_label(l1);
1076     return l2;
1077 }
1078 
1079 static inline void gen_stos(DisasContext *s, MemOp ot)
1080 {
1081     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1082     gen_string_movl_A0_EDI(s);
1083     gen_op_st_v(s, ot, s->T0, s->A0);
1084     gen_op_movl_T0_Dshift(s, ot);
1085     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1086 }
1087 
1088 static inline void gen_lods(DisasContext *s, MemOp ot)
1089 {
1090     gen_string_movl_A0_ESI(s);
1091     gen_op_ld_v(s, ot, s->T0, s->A0);
1092     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1093     gen_op_movl_T0_Dshift(s, ot);
1094     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1095 }
1096 
1097 static inline void gen_scas(DisasContext *s, MemOp ot)
1098 {
1099     gen_string_movl_A0_EDI(s);
1100     gen_op_ld_v(s, ot, s->T1, s->A0);
1101     gen_op(s, OP_CMPL, ot, R_EAX);
1102     gen_op_movl_T0_Dshift(s, ot);
1103     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1104 }
1105 
1106 static inline void gen_cmps(DisasContext *s, MemOp ot)
1107 {
1108     gen_string_movl_A0_EDI(s);
1109     gen_op_ld_v(s, ot, s->T1, s->A0);
1110     gen_string_movl_A0_ESI(s);
1111     gen_op(s, OP_CMPL, ot, OR_TMP0);
1112     gen_op_movl_T0_Dshift(s, ot);
1113     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1114     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1115 }
1116 
1117 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1118 {
1119     if (s->flags & HF_IOBPT_MASK) {
1120         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1121         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1122 
1123         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1124         tcg_temp_free_i32(t_size);
1125         tcg_temp_free(t_next);
1126     }
1127 }
1128 
1129 
1130 static inline void gen_ins(DisasContext *s, MemOp ot)
1131 {
1132     gen_string_movl_A0_EDI(s);
1133     /* Note: we must do this dummy write first to be restartable in
1134        case of page fault. */
1135     tcg_gen_movi_tl(s->T0, 0);
1136     gen_op_st_v(s, ot, s->T0, s->A0);
1137     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1138     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1139     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1140     gen_op_st_v(s, ot, s->T0, s->A0);
1141     gen_op_movl_T0_Dshift(s, ot);
1142     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1143     gen_bpt_io(s, s->tmp2_i32, ot);
1144 }
1145 
1146 static inline void gen_outs(DisasContext *s, MemOp ot)
1147 {
1148     gen_string_movl_A0_ESI(s);
1149     gen_op_ld_v(s, ot, s->T0, s->A0);
1150 
1151     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1152     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1153     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1154     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1155     gen_op_movl_T0_Dshift(s, ot);
1156     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1157     gen_bpt_io(s, s->tmp2_i32, ot);
1158 }
1159 
1160 /* same method as Valgrind : we generate jumps to current or next
1161    instruction */
1162 #define GEN_REPZ(op)                                                          \
1163 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1164                                  target_ulong cur_eip, target_ulong next_eip) \
1165 {                                                                             \
1166     TCGLabel *l2;                                                             \
1167     gen_update_cc_op(s);                                                      \
1168     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1169     gen_ ## op(s, ot);                                                        \
1170     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1171     /* a loop would cause two single step exceptions if ECX = 1               \
1172        before rep string_insn */                                              \
1173     if (s->repz_opt)                                                          \
1174         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1175     gen_jmp(s, cur_eip);                                                      \
1176 }
1177 
1178 #define GEN_REPZ2(op)                                                         \
1179 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1180                                    target_ulong cur_eip,                      \
1181                                    target_ulong next_eip,                     \
1182                                    int nz)                                    \
1183 {                                                                             \
1184     TCGLabel *l2;                                                             \
1185     gen_update_cc_op(s);                                                      \
1186     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1187     gen_ ## op(s, ot);                                                        \
1188     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1189     gen_update_cc_op(s);                                                      \
1190     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1191     if (s->repz_opt)                                                          \
1192         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1193     gen_jmp(s, cur_eip);                                                      \
1194 }
1195 
1196 GEN_REPZ(movs)
1197 GEN_REPZ(stos)
1198 GEN_REPZ(lods)
1199 GEN_REPZ(ins)
1200 GEN_REPZ(outs)
1201 GEN_REPZ2(scas)
1202 GEN_REPZ2(cmps)
1203 
1204 static void gen_helper_fp_arith_ST0_FT0(int op)
1205 {
1206     switch (op) {
1207     case 0:
1208         gen_helper_fadd_ST0_FT0(cpu_env);
1209         break;
1210     case 1:
1211         gen_helper_fmul_ST0_FT0(cpu_env);
1212         break;
1213     case 2:
1214         gen_helper_fcom_ST0_FT0(cpu_env);
1215         break;
1216     case 3:
1217         gen_helper_fcom_ST0_FT0(cpu_env);
1218         break;
1219     case 4:
1220         gen_helper_fsub_ST0_FT0(cpu_env);
1221         break;
1222     case 5:
1223         gen_helper_fsubr_ST0_FT0(cpu_env);
1224         break;
1225     case 6:
1226         gen_helper_fdiv_ST0_FT0(cpu_env);
1227         break;
1228     case 7:
1229         gen_helper_fdivr_ST0_FT0(cpu_env);
1230         break;
1231     }
1232 }
1233 
1234 /* NOTE the exception in "r" op ordering */
1235 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1236 {
1237     TCGv_i32 tmp = tcg_const_i32(opreg);
1238     switch (op) {
1239     case 0:
1240         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1241         break;
1242     case 1:
1243         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1244         break;
1245     case 4:
1246         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1247         break;
1248     case 5:
1249         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1250         break;
1251     case 6:
1252         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1253         break;
1254     case 7:
1255         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1256         break;
1257     }
1258 }
1259 
1260 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1261 {
1262     gen_update_cc_op(s);
1263     gen_jmp_im(s, cur_eip);
1264     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1265     s->base.is_jmp = DISAS_NORETURN;
1266 }
1267 
1268 /* Generate #UD for the current instruction.  The assumption here is that
1269    the instruction is known, but it isn't allowed in the current cpu mode.  */
1270 static void gen_illegal_opcode(DisasContext *s)
1271 {
1272     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1273 }
1274 
1275 /* if d == OR_TMP0, it means memory operand (address in A0) */
1276 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1277 {
1278     if (d != OR_TMP0) {
1279         if (s1->prefix & PREFIX_LOCK) {
1280             /* Lock prefix when destination is not memory.  */
1281             gen_illegal_opcode(s1);
1282             return;
1283         }
1284         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1285     } else if (!(s1->prefix & PREFIX_LOCK)) {
1286         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1287     }
1288     switch(op) {
1289     case OP_ADCL:
1290         gen_compute_eflags_c(s1, s1->tmp4);
1291         if (s1->prefix & PREFIX_LOCK) {
1292             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1293             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1294                                         s1->mem_index, ot | MO_LE);
1295         } else {
1296             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1297             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1298             gen_op_st_rm_T0_A0(s1, ot, d);
1299         }
1300         gen_op_update3_cc(s1, s1->tmp4);
1301         set_cc_op(s1, CC_OP_ADCB + ot);
1302         break;
1303     case OP_SBBL:
1304         gen_compute_eflags_c(s1, s1->tmp4);
1305         if (s1->prefix & PREFIX_LOCK) {
1306             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1307             tcg_gen_neg_tl(s1->T0, s1->T0);
1308             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1309                                         s1->mem_index, ot | MO_LE);
1310         } else {
1311             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1312             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1313             gen_op_st_rm_T0_A0(s1, ot, d);
1314         }
1315         gen_op_update3_cc(s1, s1->tmp4);
1316         set_cc_op(s1, CC_OP_SBBB + ot);
1317         break;
1318     case OP_ADDL:
1319         if (s1->prefix & PREFIX_LOCK) {
1320             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1321                                         s1->mem_index, ot | MO_LE);
1322         } else {
1323             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1324             gen_op_st_rm_T0_A0(s1, ot, d);
1325         }
1326         gen_op_update2_cc(s1);
1327         set_cc_op(s1, CC_OP_ADDB + ot);
1328         break;
1329     case OP_SUBL:
1330         if (s1->prefix & PREFIX_LOCK) {
1331             tcg_gen_neg_tl(s1->T0, s1->T1);
1332             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1333                                         s1->mem_index, ot | MO_LE);
1334             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1335         } else {
1336             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1337             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1338             gen_op_st_rm_T0_A0(s1, ot, d);
1339         }
1340         gen_op_update2_cc(s1);
1341         set_cc_op(s1, CC_OP_SUBB + ot);
1342         break;
1343     default:
1344     case OP_ANDL:
1345         if (s1->prefix & PREFIX_LOCK) {
1346             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1347                                         s1->mem_index, ot | MO_LE);
1348         } else {
1349             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1350             gen_op_st_rm_T0_A0(s1, ot, d);
1351         }
1352         gen_op_update1_cc(s1);
1353         set_cc_op(s1, CC_OP_LOGICB + ot);
1354         break;
1355     case OP_ORL:
1356         if (s1->prefix & PREFIX_LOCK) {
1357             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1358                                        s1->mem_index, ot | MO_LE);
1359         } else {
1360             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1361             gen_op_st_rm_T0_A0(s1, ot, d);
1362         }
1363         gen_op_update1_cc(s1);
1364         set_cc_op(s1, CC_OP_LOGICB + ot);
1365         break;
1366     case OP_XORL:
1367         if (s1->prefix & PREFIX_LOCK) {
1368             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1369                                         s1->mem_index, ot | MO_LE);
1370         } else {
1371             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1372             gen_op_st_rm_T0_A0(s1, ot, d);
1373         }
1374         gen_op_update1_cc(s1);
1375         set_cc_op(s1, CC_OP_LOGICB + ot);
1376         break;
1377     case OP_CMPL:
1378         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1379         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1380         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1381         set_cc_op(s1, CC_OP_SUBB + ot);
1382         break;
1383     }
1384 }
1385 
1386 /* if d == OR_TMP0, it means memory operand (address in A0) */
1387 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1388 {
1389     if (s1->prefix & PREFIX_LOCK) {
1390         if (d != OR_TMP0) {
1391             /* Lock prefix when destination is not memory */
1392             gen_illegal_opcode(s1);
1393             return;
1394         }
1395         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1396         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1397                                     s1->mem_index, ot | MO_LE);
1398     } else {
1399         if (d != OR_TMP0) {
1400             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1401         } else {
1402             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1403         }
1404         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1405         gen_op_st_rm_T0_A0(s1, ot, d);
1406     }
1407 
1408     gen_compute_eflags_c(s1, cpu_cc_src);
1409     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1410     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1411 }
1412 
1413 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1414                             TCGv shm1, TCGv count, bool is_right)
1415 {
1416     TCGv_i32 z32, s32, oldop;
1417     TCGv z_tl;
1418 
1419     /* Store the results into the CC variables.  If we know that the
1420        variable must be dead, store unconditionally.  Otherwise we'll
1421        need to not disrupt the current contents.  */
1422     z_tl = tcg_const_tl(0);
1423     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1424         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1425                            result, cpu_cc_dst);
1426     } else {
1427         tcg_gen_mov_tl(cpu_cc_dst, result);
1428     }
1429     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1430         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1431                            shm1, cpu_cc_src);
1432     } else {
1433         tcg_gen_mov_tl(cpu_cc_src, shm1);
1434     }
1435     tcg_temp_free(z_tl);
1436 
1437     /* Get the two potential CC_OP values into temporaries.  */
1438     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1439     if (s->cc_op == CC_OP_DYNAMIC) {
1440         oldop = cpu_cc_op;
1441     } else {
1442         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1443         oldop = s->tmp3_i32;
1444     }
1445 
1446     /* Conditionally store the CC_OP value.  */
1447     z32 = tcg_const_i32(0);
1448     s32 = tcg_temp_new_i32();
1449     tcg_gen_trunc_tl_i32(s32, count);
1450     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1451     tcg_temp_free_i32(z32);
1452     tcg_temp_free_i32(s32);
1453 
1454     /* The CC_OP value is no longer predictable.  */
1455     set_cc_op(s, CC_OP_DYNAMIC);
1456 }
1457 
1458 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1459                             int is_right, int is_arith)
1460 {
1461     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1462 
1463     /* load */
1464     if (op1 == OR_TMP0) {
1465         gen_op_ld_v(s, ot, s->T0, s->A0);
1466     } else {
1467         gen_op_mov_v_reg(s, ot, s->T0, op1);
1468     }
1469 
1470     tcg_gen_andi_tl(s->T1, s->T1, mask);
1471     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1472 
1473     if (is_right) {
1474         if (is_arith) {
1475             gen_exts(ot, s->T0);
1476             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1477             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1478         } else {
1479             gen_extu(ot, s->T0);
1480             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1481             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1482         }
1483     } else {
1484         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1485         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1486     }
1487 
1488     /* store */
1489     gen_op_st_rm_T0_A0(s, ot, op1);
1490 
1491     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1492 }
1493 
1494 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1495                             int is_right, int is_arith)
1496 {
1497     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1498 
1499     /* load */
1500     if (op1 == OR_TMP0)
1501         gen_op_ld_v(s, ot, s->T0, s->A0);
1502     else
1503         gen_op_mov_v_reg(s, ot, s->T0, op1);
1504 
1505     op2 &= mask;
1506     if (op2 != 0) {
1507         if (is_right) {
1508             if (is_arith) {
1509                 gen_exts(ot, s->T0);
1510                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1511                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1512             } else {
1513                 gen_extu(ot, s->T0);
1514                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1515                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1516             }
1517         } else {
1518             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1519             tcg_gen_shli_tl(s->T0, s->T0, op2);
1520         }
1521     }
1522 
1523     /* store */
1524     gen_op_st_rm_T0_A0(s, ot, op1);
1525 
1526     /* update eflags if non zero shift */
1527     if (op2 != 0) {
1528         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1529         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1530         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1531     }
1532 }
1533 
1534 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1535 {
1536     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1537     TCGv_i32 t0, t1;
1538 
1539     /* load */
1540     if (op1 == OR_TMP0) {
1541         gen_op_ld_v(s, ot, s->T0, s->A0);
1542     } else {
1543         gen_op_mov_v_reg(s, ot, s->T0, op1);
1544     }
1545 
1546     tcg_gen_andi_tl(s->T1, s->T1, mask);
1547 
1548     switch (ot) {
1549     case MO_8:
1550         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1551         tcg_gen_ext8u_tl(s->T0, s->T0);
1552         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1553         goto do_long;
1554     case MO_16:
1555         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1556         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1557         goto do_long;
1558     do_long:
1559 #ifdef TARGET_X86_64
1560     case MO_32:
1561         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1562         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1563         if (is_right) {
1564             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1565         } else {
1566             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1567         }
1568         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1569         break;
1570 #endif
1571     default:
1572         if (is_right) {
1573             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1574         } else {
1575             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1576         }
1577         break;
1578     }
1579 
1580     /* store */
1581     gen_op_st_rm_T0_A0(s, ot, op1);
1582 
1583     /* We'll need the flags computed into CC_SRC.  */
1584     gen_compute_eflags(s);
1585 
1586     /* The value that was "rotated out" is now present at the other end
1587        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1588        since we've computed the flags into CC_SRC, these variables are
1589        currently dead.  */
1590     if (is_right) {
1591         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1592         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1593         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1594     } else {
1595         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1596         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1597     }
1598     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1599     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1600 
1601     /* Now conditionally store the new CC_OP value.  If the shift count
1602        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1603        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1604        exactly as we computed above.  */
1605     t0 = tcg_const_i32(0);
1606     t1 = tcg_temp_new_i32();
1607     tcg_gen_trunc_tl_i32(t1, s->T1);
1608     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1609     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1610     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1611                         s->tmp2_i32, s->tmp3_i32);
1612     tcg_temp_free_i32(t0);
1613     tcg_temp_free_i32(t1);
1614 
1615     /* The CC_OP value is no longer predictable.  */
1616     set_cc_op(s, CC_OP_DYNAMIC);
1617 }
1618 
1619 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1620                           int is_right)
1621 {
1622     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1623     int shift;
1624 
1625     /* load */
1626     if (op1 == OR_TMP0) {
1627         gen_op_ld_v(s, ot, s->T0, s->A0);
1628     } else {
1629         gen_op_mov_v_reg(s, ot, s->T0, op1);
1630     }
1631 
1632     op2 &= mask;
1633     if (op2 != 0) {
1634         switch (ot) {
1635 #ifdef TARGET_X86_64
1636         case MO_32:
1637             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1638             if (is_right) {
1639                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1640             } else {
1641                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1642             }
1643             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1644             break;
1645 #endif
1646         default:
1647             if (is_right) {
1648                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1649             } else {
1650                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1651             }
1652             break;
1653         case MO_8:
1654             mask = 7;
1655             goto do_shifts;
1656         case MO_16:
1657             mask = 15;
1658         do_shifts:
1659             shift = op2 & mask;
1660             if (is_right) {
1661                 shift = mask + 1 - shift;
1662             }
1663             gen_extu(ot, s->T0);
1664             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1665             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1666             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1667             break;
1668         }
1669     }
1670 
1671     /* store */
1672     gen_op_st_rm_T0_A0(s, ot, op1);
1673 
1674     if (op2 != 0) {
1675         /* Compute the flags into CC_SRC.  */
1676         gen_compute_eflags(s);
1677 
1678         /* The value that was "rotated out" is now present at the other end
1679            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1680            since we've computed the flags into CC_SRC, these variables are
1681            currently dead.  */
1682         if (is_right) {
1683             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1684             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1685             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1686         } else {
1687             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1688             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1689         }
1690         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1691         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1692         set_cc_op(s, CC_OP_ADCOX);
1693     }
1694 }
1695 
1696 /* XXX: add faster immediate = 1 case */
1697 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1698                            int is_right)
1699 {
1700     gen_compute_eflags(s);
1701     assert(s->cc_op == CC_OP_EFLAGS);
1702 
1703     /* load */
1704     if (op1 == OR_TMP0)
1705         gen_op_ld_v(s, ot, s->T0, s->A0);
1706     else
1707         gen_op_mov_v_reg(s, ot, s->T0, op1);
1708 
1709     if (is_right) {
1710         switch (ot) {
1711         case MO_8:
1712             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1713             break;
1714         case MO_16:
1715             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1716             break;
1717         case MO_32:
1718             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1719             break;
1720 #ifdef TARGET_X86_64
1721         case MO_64:
1722             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1723             break;
1724 #endif
1725         default:
1726             tcg_abort();
1727         }
1728     } else {
1729         switch (ot) {
1730         case MO_8:
1731             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1732             break;
1733         case MO_16:
1734             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1735             break;
1736         case MO_32:
1737             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1738             break;
1739 #ifdef TARGET_X86_64
1740         case MO_64:
1741             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1742             break;
1743 #endif
1744         default:
1745             tcg_abort();
1746         }
1747     }
1748     /* store */
1749     gen_op_st_rm_T0_A0(s, ot, op1);
1750 }
1751 
1752 /* XXX: add faster immediate case */
1753 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1754                              bool is_right, TCGv count_in)
1755 {
1756     target_ulong mask = (ot == MO_64 ? 63 : 31);
1757     TCGv count;
1758 
1759     /* load */
1760     if (op1 == OR_TMP0) {
1761         gen_op_ld_v(s, ot, s->T0, s->A0);
1762     } else {
1763         gen_op_mov_v_reg(s, ot, s->T0, op1);
1764     }
1765 
1766     count = tcg_temp_new();
1767     tcg_gen_andi_tl(count, count_in, mask);
1768 
1769     switch (ot) {
1770     case MO_16:
1771         /* Note: we implement the Intel behaviour for shift count > 16.
1772            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1773            portion by constructing it as a 32-bit value.  */
1774         if (is_right) {
1775             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1776             tcg_gen_mov_tl(s->T1, s->T0);
1777             tcg_gen_mov_tl(s->T0, s->tmp0);
1778         } else {
1779             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1780         }
1781         /*
1782          * If TARGET_X86_64 defined then fall through into MO_32 case,
1783          * otherwise fall through default case.
1784          */
1785     case MO_32:
1786 #ifdef TARGET_X86_64
1787         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1788         tcg_gen_subi_tl(s->tmp0, count, 1);
1789         if (is_right) {
1790             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1791             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1792             tcg_gen_shr_i64(s->T0, s->T0, count);
1793         } else {
1794             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1795             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1796             tcg_gen_shl_i64(s->T0, s->T0, count);
1797             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1798             tcg_gen_shri_i64(s->T0, s->T0, 32);
1799         }
1800         break;
1801 #endif
1802     default:
1803         tcg_gen_subi_tl(s->tmp0, count, 1);
1804         if (is_right) {
1805             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1806 
1807             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1808             tcg_gen_shr_tl(s->T0, s->T0, count);
1809             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1810         } else {
1811             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1812             if (ot == MO_16) {
1813                 /* Only needed if count > 16, for Intel behaviour.  */
1814                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1815                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1816                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1817             }
1818 
1819             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1820             tcg_gen_shl_tl(s->T0, s->T0, count);
1821             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1822         }
1823         tcg_gen_movi_tl(s->tmp4, 0);
1824         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1825                            s->tmp4, s->T1);
1826         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1827         break;
1828     }
1829 
1830     /* store */
1831     gen_op_st_rm_T0_A0(s, ot, op1);
1832 
1833     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1834     tcg_temp_free(count);
1835 }
1836 
1837 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1838 {
1839     if (s != OR_TMP1)
1840         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1841     switch(op) {
1842     case OP_ROL:
1843         gen_rot_rm_T1(s1, ot, d, 0);
1844         break;
1845     case OP_ROR:
1846         gen_rot_rm_T1(s1, ot, d, 1);
1847         break;
1848     case OP_SHL:
1849     case OP_SHL1:
1850         gen_shift_rm_T1(s1, ot, d, 0, 0);
1851         break;
1852     case OP_SHR:
1853         gen_shift_rm_T1(s1, ot, d, 1, 0);
1854         break;
1855     case OP_SAR:
1856         gen_shift_rm_T1(s1, ot, d, 1, 1);
1857         break;
1858     case OP_RCL:
1859         gen_rotc_rm_T1(s1, ot, d, 0);
1860         break;
1861     case OP_RCR:
1862         gen_rotc_rm_T1(s1, ot, d, 1);
1863         break;
1864     }
1865 }
1866 
1867 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1868 {
1869     switch(op) {
1870     case OP_ROL:
1871         gen_rot_rm_im(s1, ot, d, c, 0);
1872         break;
1873     case OP_ROR:
1874         gen_rot_rm_im(s1, ot, d, c, 1);
1875         break;
1876     case OP_SHL:
1877     case OP_SHL1:
1878         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1879         break;
1880     case OP_SHR:
1881         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1882         break;
1883     case OP_SAR:
1884         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1885         break;
1886     default:
1887         /* currently not optimized */
1888         tcg_gen_movi_tl(s1->T1, c);
1889         gen_shift(s1, op, ot, d, OR_TMP1);
1890         break;
1891     }
1892 }
1893 
1894 #define X86_MAX_INSN_LENGTH 15
1895 
1896 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
1897 {
1898     uint64_t pc = s->pc;
1899 
1900     s->pc += num_bytes;
1901     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
1902         /* If the instruction's 16th byte is on a different page than the 1st, a
1903          * page fault on the second page wins over the general protection fault
1904          * caused by the instruction being too long.
1905          * This can happen even if the operand is only one byte long!
1906          */
1907         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
1908             volatile uint8_t unused =
1909                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
1910             (void) unused;
1911         }
1912         siglongjmp(s->jmpbuf, 1);
1913     }
1914 
1915     return pc;
1916 }
1917 
1918 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
1919 {
1920     return translator_ldub(env, advance_pc(env, s, 1));
1921 }
1922 
1923 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
1924 {
1925     return translator_ldsw(env, advance_pc(env, s, 2));
1926 }
1927 
1928 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
1929 {
1930     return translator_lduw(env, advance_pc(env, s, 2));
1931 }
1932 
1933 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
1934 {
1935     return translator_ldl(env, advance_pc(env, s, 4));
1936 }
1937 
1938 #ifdef TARGET_X86_64
1939 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
1940 {
1941     return translator_ldq(env, advance_pc(env, s, 8));
1942 }
1943 #endif
1944 
1945 /* Decompose an address.  */
1946 
1947 typedef struct AddressParts {
1948     int def_seg;
1949     int base;
1950     int index;
1951     int scale;
1952     target_long disp;
1953 } AddressParts;
1954 
1955 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
1956                                     int modrm)
1957 {
1958     int def_seg, base, index, scale, mod, rm;
1959     target_long disp;
1960     bool havesib;
1961 
1962     def_seg = R_DS;
1963     index = -1;
1964     scale = 0;
1965     disp = 0;
1966 
1967     mod = (modrm >> 6) & 3;
1968     rm = modrm & 7;
1969     base = rm | REX_B(s);
1970 
1971     if (mod == 3) {
1972         /* Normally filtered out earlier, but including this path
1973            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
1974         goto done;
1975     }
1976 
1977     switch (s->aflag) {
1978     case MO_64:
1979     case MO_32:
1980         havesib = 0;
1981         if (rm == 4) {
1982             int code = x86_ldub_code(env, s);
1983             scale = (code >> 6) & 3;
1984             index = ((code >> 3) & 7) | REX_X(s);
1985             if (index == 4) {
1986                 index = -1;  /* no index */
1987             }
1988             base = (code & 7) | REX_B(s);
1989             havesib = 1;
1990         }
1991 
1992         switch (mod) {
1993         case 0:
1994             if ((base & 7) == 5) {
1995                 base = -1;
1996                 disp = (int32_t)x86_ldl_code(env, s);
1997                 if (CODE64(s) && !havesib) {
1998                     base = -2;
1999                     disp += s->pc + s->rip_offset;
2000                 }
2001             }
2002             break;
2003         case 1:
2004             disp = (int8_t)x86_ldub_code(env, s);
2005             break;
2006         default:
2007         case 2:
2008             disp = (int32_t)x86_ldl_code(env, s);
2009             break;
2010         }
2011 
2012         /* For correct popl handling with esp.  */
2013         if (base == R_ESP && s->popl_esp_hack) {
2014             disp += s->popl_esp_hack;
2015         }
2016         if (base == R_EBP || base == R_ESP) {
2017             def_seg = R_SS;
2018         }
2019         break;
2020 
2021     case MO_16:
2022         if (mod == 0) {
2023             if (rm == 6) {
2024                 base = -1;
2025                 disp = x86_lduw_code(env, s);
2026                 break;
2027             }
2028         } else if (mod == 1) {
2029             disp = (int8_t)x86_ldub_code(env, s);
2030         } else {
2031             disp = (int16_t)x86_lduw_code(env, s);
2032         }
2033 
2034         switch (rm) {
2035         case 0:
2036             base = R_EBX;
2037             index = R_ESI;
2038             break;
2039         case 1:
2040             base = R_EBX;
2041             index = R_EDI;
2042             break;
2043         case 2:
2044             base = R_EBP;
2045             index = R_ESI;
2046             def_seg = R_SS;
2047             break;
2048         case 3:
2049             base = R_EBP;
2050             index = R_EDI;
2051             def_seg = R_SS;
2052             break;
2053         case 4:
2054             base = R_ESI;
2055             break;
2056         case 5:
2057             base = R_EDI;
2058             break;
2059         case 6:
2060             base = R_EBP;
2061             def_seg = R_SS;
2062             break;
2063         default:
2064         case 7:
2065             base = R_EBX;
2066             break;
2067         }
2068         break;
2069 
2070     default:
2071         tcg_abort();
2072     }
2073 
2074  done:
2075     return (AddressParts){ def_seg, base, index, scale, disp };
2076 }
2077 
2078 /* Compute the address, with a minimum number of TCG ops.  */
2079 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2080 {
2081     TCGv ea = NULL;
2082 
2083     if (a.index >= 0) {
2084         if (a.scale == 0) {
2085             ea = cpu_regs[a.index];
2086         } else {
2087             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2088             ea = s->A0;
2089         }
2090         if (a.base >= 0) {
2091             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2092             ea = s->A0;
2093         }
2094     } else if (a.base >= 0) {
2095         ea = cpu_regs[a.base];
2096     }
2097     if (!ea) {
2098         tcg_gen_movi_tl(s->A0, a.disp);
2099         ea = s->A0;
2100     } else if (a.disp != 0) {
2101         tcg_gen_addi_tl(s->A0, ea, a.disp);
2102         ea = s->A0;
2103     }
2104 
2105     return ea;
2106 }
2107 
2108 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2109 {
2110     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2111     TCGv ea = gen_lea_modrm_1(s, a);
2112     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2113 }
2114 
2115 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2116 {
2117     (void)gen_lea_modrm_0(env, s, modrm);
2118 }
2119 
2120 /* Used for BNDCL, BNDCU, BNDCN.  */
2121 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2122                       TCGCond cond, TCGv_i64 bndv)
2123 {
2124     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2125 
2126     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2127     if (!CODE64(s)) {
2128         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2129     }
2130     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2131     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2132     gen_helper_bndck(cpu_env, s->tmp2_i32);
2133 }
2134 
2135 /* used for LEA and MOV AX, mem */
2136 static void gen_add_A0_ds_seg(DisasContext *s)
2137 {
2138     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2139 }
2140 
2141 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2142    OR_TMP0 */
2143 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2144                            MemOp ot, int reg, int is_store)
2145 {
2146     int mod, rm;
2147 
2148     mod = (modrm >> 6) & 3;
2149     rm = (modrm & 7) | REX_B(s);
2150     if (mod == 3) {
2151         if (is_store) {
2152             if (reg != OR_TMP0)
2153                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2154             gen_op_mov_reg_v(s, ot, rm, s->T0);
2155         } else {
2156             gen_op_mov_v_reg(s, ot, s->T0, rm);
2157             if (reg != OR_TMP0)
2158                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2159         }
2160     } else {
2161         gen_lea_modrm(env, s, modrm);
2162         if (is_store) {
2163             if (reg != OR_TMP0)
2164                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2165             gen_op_st_v(s, ot, s->T0, s->A0);
2166         } else {
2167             gen_op_ld_v(s, ot, s->T0, s->A0);
2168             if (reg != OR_TMP0)
2169                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2170         }
2171     }
2172 }
2173 
2174 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2175 {
2176     uint32_t ret;
2177 
2178     switch (ot) {
2179     case MO_8:
2180         ret = x86_ldub_code(env, s);
2181         break;
2182     case MO_16:
2183         ret = x86_lduw_code(env, s);
2184         break;
2185     case MO_32:
2186 #ifdef TARGET_X86_64
2187     case MO_64:
2188 #endif
2189         ret = x86_ldl_code(env, s);
2190         break;
2191     default:
2192         tcg_abort();
2193     }
2194     return ret;
2195 }
2196 
2197 static inline int insn_const_size(MemOp ot)
2198 {
2199     if (ot <= MO_32) {
2200         return 1 << ot;
2201     } else {
2202         return 4;
2203     }
2204 }
2205 
2206 static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2207 {
2208 #ifndef CONFIG_USER_ONLY
2209     return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2210            (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2211 #else
2212     return true;
2213 #endif
2214 }
2215 
2216 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2217 {
2218     target_ulong pc = s->cs_base + eip;
2219 
2220     if (use_goto_tb(s, pc))  {
2221         /* jump to same page: we can use a direct jump */
2222         tcg_gen_goto_tb(tb_num);
2223         gen_jmp_im(s, eip);
2224         tcg_gen_exit_tb(s->base.tb, tb_num);
2225         s->base.is_jmp = DISAS_NORETURN;
2226     } else {
2227         /* jump to another page */
2228         gen_jmp_im(s, eip);
2229         gen_jr(s, s->tmp0);
2230     }
2231 }
2232 
2233 static inline void gen_jcc(DisasContext *s, int b,
2234                            target_ulong val, target_ulong next_eip)
2235 {
2236     TCGLabel *l1, *l2;
2237 
2238     if (s->jmp_opt) {
2239         l1 = gen_new_label();
2240         gen_jcc1(s, b, l1);
2241 
2242         gen_goto_tb(s, 0, next_eip);
2243 
2244         gen_set_label(l1);
2245         gen_goto_tb(s, 1, val);
2246     } else {
2247         l1 = gen_new_label();
2248         l2 = gen_new_label();
2249         gen_jcc1(s, b, l1);
2250 
2251         gen_jmp_im(s, next_eip);
2252         tcg_gen_br(l2);
2253 
2254         gen_set_label(l1);
2255         gen_jmp_im(s, val);
2256         gen_set_label(l2);
2257         gen_eob(s);
2258     }
2259 }
2260 
2261 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2262                         int modrm, int reg)
2263 {
2264     CCPrepare cc;
2265 
2266     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2267 
2268     cc = gen_prepare_cc(s, b, s->T1);
2269     if (cc.mask != -1) {
2270         TCGv t0 = tcg_temp_new();
2271         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2272         cc.reg = t0;
2273     }
2274     if (!cc.use_reg2) {
2275         cc.reg2 = tcg_const_tl(cc.imm);
2276     }
2277 
2278     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2279                        s->T0, cpu_regs[reg]);
2280     gen_op_mov_reg_v(s, ot, reg, s->T0);
2281 
2282     if (cc.mask != -1) {
2283         tcg_temp_free(cc.reg);
2284     }
2285     if (!cc.use_reg2) {
2286         tcg_temp_free(cc.reg2);
2287     }
2288 }
2289 
2290 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2291 {
2292     tcg_gen_ld32u_tl(s->T0, cpu_env,
2293                      offsetof(CPUX86State,segs[seg_reg].selector));
2294 }
2295 
2296 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2297 {
2298     tcg_gen_ext16u_tl(s->T0, s->T0);
2299     tcg_gen_st32_tl(s->T0, cpu_env,
2300                     offsetof(CPUX86State,segs[seg_reg].selector));
2301     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2302 }
2303 
2304 /* move T0 to seg_reg and compute if the CPU state may change. Never
2305    call this function with seg_reg == R_CS */
2306 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2307 {
2308     if (s->pe && !s->vm86) {
2309         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2310         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2311         /* abort translation because the addseg value may change or
2312            because ss32 may change. For R_SS, translation must always
2313            stop as a special handling must be done to disable hardware
2314            interrupts for the next instruction */
2315         if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) {
2316             s->base.is_jmp = DISAS_TOO_MANY;
2317         }
2318     } else {
2319         gen_op_movl_seg_T0_vm(s, seg_reg);
2320         if (seg_reg == R_SS) {
2321             s->base.is_jmp = DISAS_TOO_MANY;
2322         }
2323     }
2324 }
2325 
2326 static inline int svm_is_rep(int prefixes)
2327 {
2328     return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0);
2329 }
2330 
2331 static inline void
2332 gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
2333                               uint32_t type, uint64_t param)
2334 {
2335     /* no SVM activated; fast case */
2336     if (likely(!(s->flags & HF_GUEST_MASK)))
2337         return;
2338     gen_update_cc_op(s);
2339     gen_jmp_im(s, pc_start - s->cs_base);
2340     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
2341                                          tcg_const_i64(param));
2342 }
2343 
2344 static inline void
2345 gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type)
2346 {
2347     gen_svm_check_intercept_param(s, pc_start, type, 0);
2348 }
2349 
2350 static inline void gen_stack_update(DisasContext *s, int addend)
2351 {
2352     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2353 }
2354 
2355 /* Generate a push. It depends on ss32, addseg and dflag.  */
2356 static void gen_push_v(DisasContext *s, TCGv val)
2357 {
2358     MemOp d_ot = mo_pushpop(s, s->dflag);
2359     MemOp a_ot = mo_stacksize(s);
2360     int size = 1 << d_ot;
2361     TCGv new_esp = s->A0;
2362 
2363     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2364 
2365     if (!CODE64(s)) {
2366         if (s->addseg) {
2367             new_esp = s->tmp4;
2368             tcg_gen_mov_tl(new_esp, s->A0);
2369         }
2370         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2371     }
2372 
2373     gen_op_st_v(s, d_ot, val, s->A0);
2374     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2375 }
2376 
2377 /* two step pop is necessary for precise exceptions */
2378 static MemOp gen_pop_T0(DisasContext *s)
2379 {
2380     MemOp d_ot = mo_pushpop(s, s->dflag);
2381 
2382     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2383     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2384 
2385     return d_ot;
2386 }
2387 
2388 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2389 {
2390     gen_stack_update(s, 1 << ot);
2391 }
2392 
2393 static inline void gen_stack_A0(DisasContext *s)
2394 {
2395     gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2396 }
2397 
2398 static void gen_pusha(DisasContext *s)
2399 {
2400     MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2401     MemOp d_ot = s->dflag;
2402     int size = 1 << d_ot;
2403     int i;
2404 
2405     for (i = 0; i < 8; i++) {
2406         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2407         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2408         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2409     }
2410 
2411     gen_stack_update(s, -8 * size);
2412 }
2413 
2414 static void gen_popa(DisasContext *s)
2415 {
2416     MemOp s_ot = s->ss32 ? MO_32 : MO_16;
2417     MemOp d_ot = s->dflag;
2418     int size = 1 << d_ot;
2419     int i;
2420 
2421     for (i = 0; i < 8; i++) {
2422         /* ESP is not reloaded */
2423         if (7 - i == R_ESP) {
2424             continue;
2425         }
2426         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2427         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2428         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2429         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2430     }
2431 
2432     gen_stack_update(s, 8 * size);
2433 }
2434 
2435 static void gen_enter(DisasContext *s, int esp_addend, int level)
2436 {
2437     MemOp d_ot = mo_pushpop(s, s->dflag);
2438     MemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16;
2439     int size = 1 << d_ot;
2440 
2441     /* Push BP; compute FrameTemp into T1.  */
2442     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2443     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2444     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2445 
2446     level &= 31;
2447     if (level != 0) {
2448         int i;
2449 
2450         /* Copy level-1 pointers from the previous frame.  */
2451         for (i = 1; i < level; ++i) {
2452             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2453             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2454             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2455 
2456             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2457             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2458             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2459         }
2460 
2461         /* Push the current FrameTemp as the last level.  */
2462         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2463         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2464         gen_op_st_v(s, d_ot, s->T1, s->A0);
2465     }
2466 
2467     /* Copy the FrameTemp value to EBP.  */
2468     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2469 
2470     /* Compute the final value of ESP.  */
2471     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2472     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2473 }
2474 
2475 static void gen_leave(DisasContext *s)
2476 {
2477     MemOp d_ot = mo_pushpop(s, s->dflag);
2478     MemOp a_ot = mo_stacksize(s);
2479 
2480     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2481     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2482 
2483     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2484 
2485     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2486     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2487 }
2488 
2489 /* Similarly, except that the assumption here is that we don't decode
2490    the instruction at all -- either a missing opcode, an unimplemented
2491    feature, or just a bogus instruction stream.  */
2492 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2493 {
2494     gen_illegal_opcode(s);
2495 
2496     if (qemu_loglevel_mask(LOG_UNIMP)) {
2497         FILE *logfile = qemu_log_lock();
2498         target_ulong pc = s->pc_start, end = s->pc;
2499 
2500         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2501         for (; pc < end; ++pc) {
2502             qemu_log(" %02x", cpu_ldub_code(env, pc));
2503         }
2504         qemu_log("\n");
2505         qemu_log_unlock(logfile);
2506     }
2507 }
2508 
2509 /* an interrupt is different from an exception because of the
2510    privilege checks */
2511 static void gen_interrupt(DisasContext *s, int intno,
2512                           target_ulong cur_eip, target_ulong next_eip)
2513 {
2514     gen_update_cc_op(s);
2515     gen_jmp_im(s, cur_eip);
2516     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2517                                tcg_const_i32(next_eip - cur_eip));
2518     s->base.is_jmp = DISAS_NORETURN;
2519 }
2520 
2521 static void gen_debug(DisasContext *s, target_ulong cur_eip)
2522 {
2523     gen_update_cc_op(s);
2524     gen_jmp_im(s, cur_eip);
2525     gen_helper_debug(cpu_env);
2526     s->base.is_jmp = DISAS_NORETURN;
2527 }
2528 
2529 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2530 {
2531     if ((s->flags & mask) == 0) {
2532         TCGv_i32 t = tcg_temp_new_i32();
2533         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2534         tcg_gen_ori_i32(t, t, mask);
2535         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2536         tcg_temp_free_i32(t);
2537         s->flags |= mask;
2538     }
2539 }
2540 
2541 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2542 {
2543     if (s->flags & mask) {
2544         TCGv_i32 t = tcg_temp_new_i32();
2545         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2546         tcg_gen_andi_i32(t, t, ~mask);
2547         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2548         tcg_temp_free_i32(t);
2549         s->flags &= ~mask;
2550     }
2551 }
2552 
2553 /* Clear BND registers during legacy branches.  */
2554 static void gen_bnd_jmp(DisasContext *s)
2555 {
2556     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2557        and if the BNDREGs are known to be in use (non-zero) already.
2558        The helper itself will check BNDPRESERVE at runtime.  */
2559     if ((s->prefix & PREFIX_REPNZ) == 0
2560         && (s->flags & HF_MPX_EN_MASK) != 0
2561         && (s->flags & HF_MPX_IU_MASK) != 0) {
2562         gen_helper_bnd_jmp(cpu_env);
2563     }
2564 }
2565 
2566 /* Generate an end of block. Trace exception is also generated if needed.
2567    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2568    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2569    S->TF.  This is used by the syscall/sysret insns.  */
2570 static void
2571 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2572 {
2573     gen_update_cc_op(s);
2574 
2575     /* If several instructions disable interrupts, only the first does it.  */
2576     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2577         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2578     } else {
2579         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2580     }
2581 
2582     if (s->base.tb->flags & HF_RF_MASK) {
2583         gen_helper_reset_rf(cpu_env);
2584     }
2585     if (s->base.singlestep_enabled) {
2586         gen_helper_debug(cpu_env);
2587     } else if (recheck_tf) {
2588         gen_helper_rechecking_single_step(cpu_env);
2589         tcg_gen_exit_tb(NULL, 0);
2590     } else if (s->tf) {
2591         gen_helper_single_step(cpu_env);
2592     } else if (jr) {
2593         tcg_gen_lookup_and_goto_ptr();
2594     } else {
2595         tcg_gen_exit_tb(NULL, 0);
2596     }
2597     s->base.is_jmp = DISAS_NORETURN;
2598 }
2599 
2600 static inline void
2601 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2602 {
2603     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2604 }
2605 
2606 /* End of block.
2607    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2608 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2609 {
2610     gen_eob_worker(s, inhibit, false);
2611 }
2612 
2613 /* End of block, resetting the inhibit irq flag.  */
2614 static void gen_eob(DisasContext *s)
2615 {
2616     gen_eob_worker(s, false, false);
2617 }
2618 
2619 /* Jump to register */
2620 static void gen_jr(DisasContext *s, TCGv dest)
2621 {
2622     do_gen_eob_worker(s, false, false, true);
2623 }
2624 
2625 /* generate a jump to eip. No segment change must happen before as a
2626    direct call to the next block may occur */
2627 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2628 {
2629     gen_update_cc_op(s);
2630     set_cc_op(s, CC_OP_DYNAMIC);
2631     if (s->jmp_opt) {
2632         gen_goto_tb(s, tb_num, eip);
2633     } else {
2634         gen_jmp_im(s, eip);
2635         gen_eob(s);
2636     }
2637 }
2638 
2639 static void gen_jmp(DisasContext *s, target_ulong eip)
2640 {
2641     gen_jmp_tb(s, eip, 0);
2642 }
2643 
2644 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2645 {
2646     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2647     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2648 }
2649 
2650 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2651 {
2652     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2653     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2654 }
2655 
2656 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2657 {
2658     int mem_index = s->mem_index;
2659     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2660     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2661     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2662     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2663     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2664 }
2665 
2666 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2667 {
2668     int mem_index = s->mem_index;
2669     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2670     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2671     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2672     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2673     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2674 }
2675 
2676 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2677 {
2678     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2679     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2680     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2681     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2682 }
2683 
2684 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2685 {
2686     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2687     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2688 }
2689 
2690 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2691 {
2692     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2693     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2694 }
2695 
2696 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2697 {
2698     tcg_gen_movi_i64(s->tmp1_i64, 0);
2699     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2700 }
2701 
2702 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2703 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2704 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2705 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2706 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2707 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2708                                TCGv_i32 val);
2709 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2710 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2711                                TCGv val);
2712 
2713 #define SSE_SPECIAL ((void *)1)
2714 #define SSE_DUMMY ((void *)2)
2715 
2716 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2717 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2718                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2719 
2720 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2721     /* 3DNow! extensions */
2722     [0x0e] = { SSE_DUMMY }, /* femms */
2723     [0x0f] = { SSE_DUMMY }, /* pf... */
2724     /* pure SSE operations */
2725     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2726     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2727     [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2728     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2729     [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2730     [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2731     [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2732     [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2733 
2734     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2735     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2736     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2737     [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2738     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2739     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2740     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2741     [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2742     [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2743     [0x51] = SSE_FOP(sqrt),
2744     [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2745     [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2746     [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2747     [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2748     [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2749     [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2750     [0x58] = SSE_FOP(add),
2751     [0x59] = SSE_FOP(mul),
2752     [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2753                gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2754     [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2755     [0x5c] = SSE_FOP(sub),
2756     [0x5d] = SSE_FOP(min),
2757     [0x5e] = SSE_FOP(div),
2758     [0x5f] = SSE_FOP(max),
2759 
2760     [0xc2] = SSE_FOP(cmpeq),
2761     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2762                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2763 
2764     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2765     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2766     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2767 
2768     /* MMX ops and their SSE extensions */
2769     [0x60] = MMX_OP2(punpcklbw),
2770     [0x61] = MMX_OP2(punpcklwd),
2771     [0x62] = MMX_OP2(punpckldq),
2772     [0x63] = MMX_OP2(packsswb),
2773     [0x64] = MMX_OP2(pcmpgtb),
2774     [0x65] = MMX_OP2(pcmpgtw),
2775     [0x66] = MMX_OP2(pcmpgtl),
2776     [0x67] = MMX_OP2(packuswb),
2777     [0x68] = MMX_OP2(punpckhbw),
2778     [0x69] = MMX_OP2(punpckhwd),
2779     [0x6a] = MMX_OP2(punpckhdq),
2780     [0x6b] = MMX_OP2(packssdw),
2781     [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2782     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2783     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2784     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2785     [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2786                (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2787                (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2788                (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2789     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2790     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2791     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2792     [0x74] = MMX_OP2(pcmpeqb),
2793     [0x75] = MMX_OP2(pcmpeqw),
2794     [0x76] = MMX_OP2(pcmpeql),
2795     [0x77] = { SSE_DUMMY }, /* emms */
2796     [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2797     [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2798     [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2799     [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2800     [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2801     [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2802     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2803     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2804     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2805     [0xd1] = MMX_OP2(psrlw),
2806     [0xd2] = MMX_OP2(psrld),
2807     [0xd3] = MMX_OP2(psrlq),
2808     [0xd4] = MMX_OP2(paddq),
2809     [0xd5] = MMX_OP2(pmullw),
2810     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2811     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2812     [0xd8] = MMX_OP2(psubusb),
2813     [0xd9] = MMX_OP2(psubusw),
2814     [0xda] = MMX_OP2(pminub),
2815     [0xdb] = MMX_OP2(pand),
2816     [0xdc] = MMX_OP2(paddusb),
2817     [0xdd] = MMX_OP2(paddusw),
2818     [0xde] = MMX_OP2(pmaxub),
2819     [0xdf] = MMX_OP2(pandn),
2820     [0xe0] = MMX_OP2(pavgb),
2821     [0xe1] = MMX_OP2(psraw),
2822     [0xe2] = MMX_OP2(psrad),
2823     [0xe3] = MMX_OP2(pavgw),
2824     [0xe4] = MMX_OP2(pmulhuw),
2825     [0xe5] = MMX_OP2(pmulhw),
2826     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2827     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2828     [0xe8] = MMX_OP2(psubsb),
2829     [0xe9] = MMX_OP2(psubsw),
2830     [0xea] = MMX_OP2(pminsw),
2831     [0xeb] = MMX_OP2(por),
2832     [0xec] = MMX_OP2(paddsb),
2833     [0xed] = MMX_OP2(paddsw),
2834     [0xee] = MMX_OP2(pmaxsw),
2835     [0xef] = MMX_OP2(pxor),
2836     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2837     [0xf1] = MMX_OP2(psllw),
2838     [0xf2] = MMX_OP2(pslld),
2839     [0xf3] = MMX_OP2(psllq),
2840     [0xf4] = MMX_OP2(pmuludq),
2841     [0xf5] = MMX_OP2(pmaddwd),
2842     [0xf6] = MMX_OP2(psadbw),
2843     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2844                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2845     [0xf8] = MMX_OP2(psubb),
2846     [0xf9] = MMX_OP2(psubw),
2847     [0xfa] = MMX_OP2(psubl),
2848     [0xfb] = MMX_OP2(psubq),
2849     [0xfc] = MMX_OP2(paddb),
2850     [0xfd] = MMX_OP2(paddw),
2851     [0xfe] = MMX_OP2(paddl),
2852 };
2853 
2854 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2855     [0 + 2] = MMX_OP2(psrlw),
2856     [0 + 4] = MMX_OP2(psraw),
2857     [0 + 6] = MMX_OP2(psllw),
2858     [8 + 2] = MMX_OP2(psrld),
2859     [8 + 4] = MMX_OP2(psrad),
2860     [8 + 6] = MMX_OP2(pslld),
2861     [16 + 2] = MMX_OP2(psrlq),
2862     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2863     [16 + 6] = MMX_OP2(psllq),
2864     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2865 };
2866 
2867 static const SSEFunc_0_epi sse_op_table3ai[] = {
2868     gen_helper_cvtsi2ss,
2869     gen_helper_cvtsi2sd
2870 };
2871 
2872 #ifdef TARGET_X86_64
2873 static const SSEFunc_0_epl sse_op_table3aq[] = {
2874     gen_helper_cvtsq2ss,
2875     gen_helper_cvtsq2sd
2876 };
2877 #endif
2878 
2879 static const SSEFunc_i_ep sse_op_table3bi[] = {
2880     gen_helper_cvttss2si,
2881     gen_helper_cvtss2si,
2882     gen_helper_cvttsd2si,
2883     gen_helper_cvtsd2si
2884 };
2885 
2886 #ifdef TARGET_X86_64
2887 static const SSEFunc_l_ep sse_op_table3bq[] = {
2888     gen_helper_cvttss2sq,
2889     gen_helper_cvtss2sq,
2890     gen_helper_cvttsd2sq,
2891     gen_helper_cvtsd2sq
2892 };
2893 #endif
2894 
2895 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2896     SSE_FOP(cmpeq),
2897     SSE_FOP(cmplt),
2898     SSE_FOP(cmple),
2899     SSE_FOP(cmpunord),
2900     SSE_FOP(cmpneq),
2901     SSE_FOP(cmpnlt),
2902     SSE_FOP(cmpnle),
2903     SSE_FOP(cmpord),
2904 };
2905 
2906 static const SSEFunc_0_epp sse_op_table5[256] = {
2907     [0x0c] = gen_helper_pi2fw,
2908     [0x0d] = gen_helper_pi2fd,
2909     [0x1c] = gen_helper_pf2iw,
2910     [0x1d] = gen_helper_pf2id,
2911     [0x8a] = gen_helper_pfnacc,
2912     [0x8e] = gen_helper_pfpnacc,
2913     [0x90] = gen_helper_pfcmpge,
2914     [0x94] = gen_helper_pfmin,
2915     [0x96] = gen_helper_pfrcp,
2916     [0x97] = gen_helper_pfrsqrt,
2917     [0x9a] = gen_helper_pfsub,
2918     [0x9e] = gen_helper_pfadd,
2919     [0xa0] = gen_helper_pfcmpgt,
2920     [0xa4] = gen_helper_pfmax,
2921     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2922     [0xa7] = gen_helper_movq, /* pfrsqit1 */
2923     [0xaa] = gen_helper_pfsubr,
2924     [0xae] = gen_helper_pfacc,
2925     [0xb0] = gen_helper_pfcmpeq,
2926     [0xb4] = gen_helper_pfmul,
2927     [0xb6] = gen_helper_movq, /* pfrcpit2 */
2928     [0xb7] = gen_helper_pmulhrw_mmx,
2929     [0xbb] = gen_helper_pswapd,
2930     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
2931 };
2932 
2933 struct SSEOpHelper_epp {
2934     SSEFunc_0_epp op[2];
2935     uint32_t ext_mask;
2936 };
2937 
2938 struct SSEOpHelper_eppi {
2939     SSEFunc_0_eppi op[2];
2940     uint32_t ext_mask;
2941 };
2942 
2943 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
2944 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
2945 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
2946 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
2947 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
2948         CPUID_EXT_PCLMULQDQ }
2949 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
2950 
2951 static const struct SSEOpHelper_epp sse_op_table6[256] = {
2952     [0x00] = SSSE3_OP(pshufb),
2953     [0x01] = SSSE3_OP(phaddw),
2954     [0x02] = SSSE3_OP(phaddd),
2955     [0x03] = SSSE3_OP(phaddsw),
2956     [0x04] = SSSE3_OP(pmaddubsw),
2957     [0x05] = SSSE3_OP(phsubw),
2958     [0x06] = SSSE3_OP(phsubd),
2959     [0x07] = SSSE3_OP(phsubsw),
2960     [0x08] = SSSE3_OP(psignb),
2961     [0x09] = SSSE3_OP(psignw),
2962     [0x0a] = SSSE3_OP(psignd),
2963     [0x0b] = SSSE3_OP(pmulhrsw),
2964     [0x10] = SSE41_OP(pblendvb),
2965     [0x14] = SSE41_OP(blendvps),
2966     [0x15] = SSE41_OP(blendvpd),
2967     [0x17] = SSE41_OP(ptest),
2968     [0x1c] = SSSE3_OP(pabsb),
2969     [0x1d] = SSSE3_OP(pabsw),
2970     [0x1e] = SSSE3_OP(pabsd),
2971     [0x20] = SSE41_OP(pmovsxbw),
2972     [0x21] = SSE41_OP(pmovsxbd),
2973     [0x22] = SSE41_OP(pmovsxbq),
2974     [0x23] = SSE41_OP(pmovsxwd),
2975     [0x24] = SSE41_OP(pmovsxwq),
2976     [0x25] = SSE41_OP(pmovsxdq),
2977     [0x28] = SSE41_OP(pmuldq),
2978     [0x29] = SSE41_OP(pcmpeqq),
2979     [0x2a] = SSE41_SPECIAL, /* movntqda */
2980     [0x2b] = SSE41_OP(packusdw),
2981     [0x30] = SSE41_OP(pmovzxbw),
2982     [0x31] = SSE41_OP(pmovzxbd),
2983     [0x32] = SSE41_OP(pmovzxbq),
2984     [0x33] = SSE41_OP(pmovzxwd),
2985     [0x34] = SSE41_OP(pmovzxwq),
2986     [0x35] = SSE41_OP(pmovzxdq),
2987     [0x37] = SSE42_OP(pcmpgtq),
2988     [0x38] = SSE41_OP(pminsb),
2989     [0x39] = SSE41_OP(pminsd),
2990     [0x3a] = SSE41_OP(pminuw),
2991     [0x3b] = SSE41_OP(pminud),
2992     [0x3c] = SSE41_OP(pmaxsb),
2993     [0x3d] = SSE41_OP(pmaxsd),
2994     [0x3e] = SSE41_OP(pmaxuw),
2995     [0x3f] = SSE41_OP(pmaxud),
2996     [0x40] = SSE41_OP(pmulld),
2997     [0x41] = SSE41_OP(phminposuw),
2998     [0xdb] = AESNI_OP(aesimc),
2999     [0xdc] = AESNI_OP(aesenc),
3000     [0xdd] = AESNI_OP(aesenclast),
3001     [0xde] = AESNI_OP(aesdec),
3002     [0xdf] = AESNI_OP(aesdeclast),
3003 };
3004 
3005 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3006     [0x08] = SSE41_OP(roundps),
3007     [0x09] = SSE41_OP(roundpd),
3008     [0x0a] = SSE41_OP(roundss),
3009     [0x0b] = SSE41_OP(roundsd),
3010     [0x0c] = SSE41_OP(blendps),
3011     [0x0d] = SSE41_OP(blendpd),
3012     [0x0e] = SSE41_OP(pblendw),
3013     [0x0f] = SSSE3_OP(palignr),
3014     [0x14] = SSE41_SPECIAL, /* pextrb */
3015     [0x15] = SSE41_SPECIAL, /* pextrw */
3016     [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3017     [0x17] = SSE41_SPECIAL, /* extractps */
3018     [0x20] = SSE41_SPECIAL, /* pinsrb */
3019     [0x21] = SSE41_SPECIAL, /* insertps */
3020     [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3021     [0x40] = SSE41_OP(dpps),
3022     [0x41] = SSE41_OP(dppd),
3023     [0x42] = SSE41_OP(mpsadbw),
3024     [0x44] = PCLMULQDQ_OP(pclmulqdq),
3025     [0x60] = SSE42_OP(pcmpestrm),
3026     [0x61] = SSE42_OP(pcmpestri),
3027     [0x62] = SSE42_OP(pcmpistrm),
3028     [0x63] = SSE42_OP(pcmpistri),
3029     [0xdf] = AESNI_OP(aeskeygenassist),
3030 };
3031 
3032 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3033                     target_ulong pc_start, int rex_r)
3034 {
3035     int b1, op1_offset, op2_offset, is_xmm, val;
3036     int modrm, mod, rm, reg;
3037     SSEFunc_0_epp sse_fn_epp;
3038     SSEFunc_0_eppi sse_fn_eppi;
3039     SSEFunc_0_ppi sse_fn_ppi;
3040     SSEFunc_0_eppt sse_fn_eppt;
3041     MemOp ot;
3042 
3043     b &= 0xff;
3044     if (s->prefix & PREFIX_DATA)
3045         b1 = 1;
3046     else if (s->prefix & PREFIX_REPZ)
3047         b1 = 2;
3048     else if (s->prefix & PREFIX_REPNZ)
3049         b1 = 3;
3050     else
3051         b1 = 0;
3052     sse_fn_epp = sse_op_table1[b][b1];
3053     if (!sse_fn_epp) {
3054         goto unknown_op;
3055     }
3056     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3057         is_xmm = 1;
3058     } else {
3059         if (b1 == 0) {
3060             /* MMX case */
3061             is_xmm = 0;
3062         } else {
3063             is_xmm = 1;
3064         }
3065     }
3066     /* simple MMX/SSE operation */
3067     if (s->flags & HF_TS_MASK) {
3068         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3069         return;
3070     }
3071     if (s->flags & HF_EM_MASK) {
3072     illegal_op:
3073         gen_illegal_opcode(s);
3074         return;
3075     }
3076     if (is_xmm
3077         && !(s->flags & HF_OSFXSR_MASK)
3078         && (b != 0x38 && b != 0x3a)) {
3079         goto unknown_op;
3080     }
3081     if (b == 0x0e) {
3082         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3083             /* If we were fully decoding this we might use illegal_op.  */
3084             goto unknown_op;
3085         }
3086         /* femms */
3087         gen_helper_emms(cpu_env);
3088         return;
3089     }
3090     if (b == 0x77) {
3091         /* emms */
3092         gen_helper_emms(cpu_env);
3093         return;
3094     }
3095     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3096        the static cpu state) */
3097     if (!is_xmm) {
3098         gen_helper_enter_mmx(cpu_env);
3099     }
3100 
3101     modrm = x86_ldub_code(env, s);
3102     reg = ((modrm >> 3) & 7);
3103     if (is_xmm)
3104         reg |= rex_r;
3105     mod = (modrm >> 6) & 3;
3106     if (sse_fn_epp == SSE_SPECIAL) {
3107         b |= (b1 << 8);
3108         switch(b) {
3109         case 0x0e7: /* movntq */
3110             if (mod == 3) {
3111                 goto illegal_op;
3112             }
3113             gen_lea_modrm(env, s, modrm);
3114             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3115             break;
3116         case 0x1e7: /* movntdq */
3117         case 0x02b: /* movntps */
3118         case 0x12b: /* movntps */
3119             if (mod == 3)
3120                 goto illegal_op;
3121             gen_lea_modrm(env, s, modrm);
3122             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3123             break;
3124         case 0x3f0: /* lddqu */
3125             if (mod == 3)
3126                 goto illegal_op;
3127             gen_lea_modrm(env, s, modrm);
3128             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3129             break;
3130         case 0x22b: /* movntss */
3131         case 0x32b: /* movntsd */
3132             if (mod == 3)
3133                 goto illegal_op;
3134             gen_lea_modrm(env, s, modrm);
3135             if (b1 & 1) {
3136                 gen_stq_env_A0(s, offsetof(CPUX86State,
3137                                            xmm_regs[reg].ZMM_Q(0)));
3138             } else {
3139                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3140                     xmm_regs[reg].ZMM_L(0)));
3141                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3142             }
3143             break;
3144         case 0x6e: /* movd mm, ea */
3145 #ifdef TARGET_X86_64
3146             if (s->dflag == MO_64) {
3147                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3148                 tcg_gen_st_tl(s->T0, cpu_env,
3149                               offsetof(CPUX86State, fpregs[reg].mmx));
3150             } else
3151 #endif
3152             {
3153                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3154                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3155                                  offsetof(CPUX86State,fpregs[reg].mmx));
3156                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3157                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3158             }
3159             break;
3160         case 0x16e: /* movd xmm, ea */
3161 #ifdef TARGET_X86_64
3162             if (s->dflag == MO_64) {
3163                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3164                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3165                                  offsetof(CPUX86State,xmm_regs[reg]));
3166                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3167             } else
3168 #endif
3169             {
3170                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3171                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3172                                  offsetof(CPUX86State,xmm_regs[reg]));
3173                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3174                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3175             }
3176             break;
3177         case 0x6f: /* movq mm, ea */
3178             if (mod != 3) {
3179                 gen_lea_modrm(env, s, modrm);
3180                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3181             } else {
3182                 rm = (modrm & 7);
3183                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3184                                offsetof(CPUX86State,fpregs[rm].mmx));
3185                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3186                                offsetof(CPUX86State,fpregs[reg].mmx));
3187             }
3188             break;
3189         case 0x010: /* movups */
3190         case 0x110: /* movupd */
3191         case 0x028: /* movaps */
3192         case 0x128: /* movapd */
3193         case 0x16f: /* movdqa xmm, ea */
3194         case 0x26f: /* movdqu xmm, ea */
3195             if (mod != 3) {
3196                 gen_lea_modrm(env, s, modrm);
3197                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3198             } else {
3199                 rm = (modrm & 7) | REX_B(s);
3200                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3201                             offsetof(CPUX86State,xmm_regs[rm]));
3202             }
3203             break;
3204         case 0x210: /* movss xmm, ea */
3205             if (mod != 3) {
3206                 gen_lea_modrm(env, s, modrm);
3207                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3208                 tcg_gen_st32_tl(s->T0, cpu_env,
3209                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3210                 tcg_gen_movi_tl(s->T0, 0);
3211                 tcg_gen_st32_tl(s->T0, cpu_env,
3212                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3213                 tcg_gen_st32_tl(s->T0, cpu_env,
3214                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3215                 tcg_gen_st32_tl(s->T0, cpu_env,
3216                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3217             } else {
3218                 rm = (modrm & 7) | REX_B(s);
3219                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3220                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3221             }
3222             break;
3223         case 0x310: /* movsd xmm, ea */
3224             if (mod != 3) {
3225                 gen_lea_modrm(env, s, modrm);
3226                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3227                                            xmm_regs[reg].ZMM_Q(0)));
3228                 tcg_gen_movi_tl(s->T0, 0);
3229                 tcg_gen_st32_tl(s->T0, cpu_env,
3230                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3231                 tcg_gen_st32_tl(s->T0, cpu_env,
3232                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3233             } else {
3234                 rm = (modrm & 7) | REX_B(s);
3235                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3236                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3237             }
3238             break;
3239         case 0x012: /* movlps */
3240         case 0x112: /* movlpd */
3241             if (mod != 3) {
3242                 gen_lea_modrm(env, s, modrm);
3243                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3244                                            xmm_regs[reg].ZMM_Q(0)));
3245             } else {
3246                 /* movhlps */
3247                 rm = (modrm & 7) | REX_B(s);
3248                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3249                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3250             }
3251             break;
3252         case 0x212: /* movsldup */
3253             if (mod != 3) {
3254                 gen_lea_modrm(env, s, modrm);
3255                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3256             } else {
3257                 rm = (modrm & 7) | REX_B(s);
3258                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3259                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3260                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3261                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3262             }
3263             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3264                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3265             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3266                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3267             break;
3268         case 0x312: /* movddup */
3269             if (mod != 3) {
3270                 gen_lea_modrm(env, s, modrm);
3271                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3272                                            xmm_regs[reg].ZMM_Q(0)));
3273             } else {
3274                 rm = (modrm & 7) | REX_B(s);
3275                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3276                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3277             }
3278             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3279                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3280             break;
3281         case 0x016: /* movhps */
3282         case 0x116: /* movhpd */
3283             if (mod != 3) {
3284                 gen_lea_modrm(env, s, modrm);
3285                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3286                                            xmm_regs[reg].ZMM_Q(1)));
3287             } else {
3288                 /* movlhps */
3289                 rm = (modrm & 7) | REX_B(s);
3290                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3291                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3292             }
3293             break;
3294         case 0x216: /* movshdup */
3295             if (mod != 3) {
3296                 gen_lea_modrm(env, s, modrm);
3297                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3298             } else {
3299                 rm = (modrm & 7) | REX_B(s);
3300                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3301                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3302                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3303                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3304             }
3305             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3306                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3307             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3308                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3309             break;
3310         case 0x178:
3311         case 0x378:
3312             {
3313                 int bit_index, field_length;
3314 
3315                 if (b1 == 1 && reg != 0)
3316                     goto illegal_op;
3317                 field_length = x86_ldub_code(env, s) & 0x3F;
3318                 bit_index = x86_ldub_code(env, s) & 0x3F;
3319                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3320                     offsetof(CPUX86State,xmm_regs[reg]));
3321                 if (b1 == 1)
3322                     gen_helper_extrq_i(cpu_env, s->ptr0,
3323                                        tcg_const_i32(bit_index),
3324                                        tcg_const_i32(field_length));
3325                 else
3326                     gen_helper_insertq_i(cpu_env, s->ptr0,
3327                                          tcg_const_i32(bit_index),
3328                                          tcg_const_i32(field_length));
3329             }
3330             break;
3331         case 0x7e: /* movd ea, mm */
3332 #ifdef TARGET_X86_64
3333             if (s->dflag == MO_64) {
3334                 tcg_gen_ld_i64(s->T0, cpu_env,
3335                                offsetof(CPUX86State,fpregs[reg].mmx));
3336                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3337             } else
3338 #endif
3339             {
3340                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3341                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3342                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3343             }
3344             break;
3345         case 0x17e: /* movd ea, xmm */
3346 #ifdef TARGET_X86_64
3347             if (s->dflag == MO_64) {
3348                 tcg_gen_ld_i64(s->T0, cpu_env,
3349                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3350                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3351             } else
3352 #endif
3353             {
3354                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3355                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3356                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3357             }
3358             break;
3359         case 0x27e: /* movq xmm, ea */
3360             if (mod != 3) {
3361                 gen_lea_modrm(env, s, modrm);
3362                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3363                                            xmm_regs[reg].ZMM_Q(0)));
3364             } else {
3365                 rm = (modrm & 7) | REX_B(s);
3366                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3367                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3368             }
3369             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3370             break;
3371         case 0x7f: /* movq ea, mm */
3372             if (mod != 3) {
3373                 gen_lea_modrm(env, s, modrm);
3374                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3375             } else {
3376                 rm = (modrm & 7);
3377                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3378                             offsetof(CPUX86State,fpregs[reg].mmx));
3379             }
3380             break;
3381         case 0x011: /* movups */
3382         case 0x111: /* movupd */
3383         case 0x029: /* movaps */
3384         case 0x129: /* movapd */
3385         case 0x17f: /* movdqa ea, xmm */
3386         case 0x27f: /* movdqu ea, xmm */
3387             if (mod != 3) {
3388                 gen_lea_modrm(env, s, modrm);
3389                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3390             } else {
3391                 rm = (modrm & 7) | REX_B(s);
3392                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3393                             offsetof(CPUX86State,xmm_regs[reg]));
3394             }
3395             break;
3396         case 0x211: /* movss ea, xmm */
3397             if (mod != 3) {
3398                 gen_lea_modrm(env, s, modrm);
3399                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3400                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3401                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3402             } else {
3403                 rm = (modrm & 7) | REX_B(s);
3404                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3405                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3406             }
3407             break;
3408         case 0x311: /* movsd ea, xmm */
3409             if (mod != 3) {
3410                 gen_lea_modrm(env, s, modrm);
3411                 gen_stq_env_A0(s, offsetof(CPUX86State,
3412                                            xmm_regs[reg].ZMM_Q(0)));
3413             } else {
3414                 rm = (modrm & 7) | REX_B(s);
3415                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3416                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3417             }
3418             break;
3419         case 0x013: /* movlps */
3420         case 0x113: /* movlpd */
3421             if (mod != 3) {
3422                 gen_lea_modrm(env, s, modrm);
3423                 gen_stq_env_A0(s, offsetof(CPUX86State,
3424                                            xmm_regs[reg].ZMM_Q(0)));
3425             } else {
3426                 goto illegal_op;
3427             }
3428             break;
3429         case 0x017: /* movhps */
3430         case 0x117: /* movhpd */
3431             if (mod != 3) {
3432                 gen_lea_modrm(env, s, modrm);
3433                 gen_stq_env_A0(s, offsetof(CPUX86State,
3434                                            xmm_regs[reg].ZMM_Q(1)));
3435             } else {
3436                 goto illegal_op;
3437             }
3438             break;
3439         case 0x71: /* shift mm, im */
3440         case 0x72:
3441         case 0x73:
3442         case 0x171: /* shift xmm, im */
3443         case 0x172:
3444         case 0x173:
3445             if (b1 >= 2) {
3446                 goto unknown_op;
3447             }
3448             val = x86_ldub_code(env, s);
3449             if (is_xmm) {
3450                 tcg_gen_movi_tl(s->T0, val);
3451                 tcg_gen_st32_tl(s->T0, cpu_env,
3452                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3453                 tcg_gen_movi_tl(s->T0, 0);
3454                 tcg_gen_st32_tl(s->T0, cpu_env,
3455                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3456                 op1_offset = offsetof(CPUX86State,xmm_t0);
3457             } else {
3458                 tcg_gen_movi_tl(s->T0, val);
3459                 tcg_gen_st32_tl(s->T0, cpu_env,
3460                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3461                 tcg_gen_movi_tl(s->T0, 0);
3462                 tcg_gen_st32_tl(s->T0, cpu_env,
3463                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3464                 op1_offset = offsetof(CPUX86State,mmx_t0);
3465             }
3466             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3467                                        (((modrm >> 3)) & 7)][b1];
3468             if (!sse_fn_epp) {
3469                 goto unknown_op;
3470             }
3471             if (is_xmm) {
3472                 rm = (modrm & 7) | REX_B(s);
3473                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3474             } else {
3475                 rm = (modrm & 7);
3476                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3477             }
3478             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3479             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3480             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3481             break;
3482         case 0x050: /* movmskps */
3483             rm = (modrm & 7) | REX_B(s);
3484             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3485                              offsetof(CPUX86State,xmm_regs[rm]));
3486             gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3487             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3488             break;
3489         case 0x150: /* movmskpd */
3490             rm = (modrm & 7) | REX_B(s);
3491             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3492                              offsetof(CPUX86State,xmm_regs[rm]));
3493             gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3494             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3495             break;
3496         case 0x02a: /* cvtpi2ps */
3497         case 0x12a: /* cvtpi2pd */
3498             gen_helper_enter_mmx(cpu_env);
3499             if (mod != 3) {
3500                 gen_lea_modrm(env, s, modrm);
3501                 op2_offset = offsetof(CPUX86State,mmx_t0);
3502                 gen_ldq_env_A0(s, op2_offset);
3503             } else {
3504                 rm = (modrm & 7);
3505                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3506             }
3507             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3508             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3509             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3510             switch(b >> 8) {
3511             case 0x0:
3512                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3513                 break;
3514             default:
3515             case 0x1:
3516                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3517                 break;
3518             }
3519             break;
3520         case 0x22a: /* cvtsi2ss */
3521         case 0x32a: /* cvtsi2sd */
3522             ot = mo_64_32(s->dflag);
3523             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3524             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3525             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3526             if (ot == MO_32) {
3527                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3528                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3529                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3530             } else {
3531 #ifdef TARGET_X86_64
3532                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3533                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3534 #else
3535                 goto illegal_op;
3536 #endif
3537             }
3538             break;
3539         case 0x02c: /* cvttps2pi */
3540         case 0x12c: /* cvttpd2pi */
3541         case 0x02d: /* cvtps2pi */
3542         case 0x12d: /* cvtpd2pi */
3543             gen_helper_enter_mmx(cpu_env);
3544             if (mod != 3) {
3545                 gen_lea_modrm(env, s, modrm);
3546                 op2_offset = offsetof(CPUX86State,xmm_t0);
3547                 gen_ldo_env_A0(s, op2_offset);
3548             } else {
3549                 rm = (modrm & 7) | REX_B(s);
3550                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3551             }
3552             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3553             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3554             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3555             switch(b) {
3556             case 0x02c:
3557                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3558                 break;
3559             case 0x12c:
3560                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3561                 break;
3562             case 0x02d:
3563                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3564                 break;
3565             case 0x12d:
3566                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3567                 break;
3568             }
3569             break;
3570         case 0x22c: /* cvttss2si */
3571         case 0x32c: /* cvttsd2si */
3572         case 0x22d: /* cvtss2si */
3573         case 0x32d: /* cvtsd2si */
3574             ot = mo_64_32(s->dflag);
3575             if (mod != 3) {
3576                 gen_lea_modrm(env, s, modrm);
3577                 if ((b >> 8) & 1) {
3578                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3579                 } else {
3580                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3581                     tcg_gen_st32_tl(s->T0, cpu_env,
3582                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3583                 }
3584                 op2_offset = offsetof(CPUX86State,xmm_t0);
3585             } else {
3586                 rm = (modrm & 7) | REX_B(s);
3587                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3588             }
3589             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3590             if (ot == MO_32) {
3591                 SSEFunc_i_ep sse_fn_i_ep =
3592                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3593                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3594                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3595             } else {
3596 #ifdef TARGET_X86_64
3597                 SSEFunc_l_ep sse_fn_l_ep =
3598                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3599                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3600 #else
3601                 goto illegal_op;
3602 #endif
3603             }
3604             gen_op_mov_reg_v(s, ot, reg, s->T0);
3605             break;
3606         case 0xc4: /* pinsrw */
3607         case 0x1c4:
3608             s->rip_offset = 1;
3609             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3610             val = x86_ldub_code(env, s);
3611             if (b1) {
3612                 val &= 7;
3613                 tcg_gen_st16_tl(s->T0, cpu_env,
3614                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3615             } else {
3616                 val &= 3;
3617                 tcg_gen_st16_tl(s->T0, cpu_env,
3618                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3619             }
3620             break;
3621         case 0xc5: /* pextrw */
3622         case 0x1c5:
3623             if (mod != 3)
3624                 goto illegal_op;
3625             ot = mo_64_32(s->dflag);
3626             val = x86_ldub_code(env, s);
3627             if (b1) {
3628                 val &= 7;
3629                 rm = (modrm & 7) | REX_B(s);
3630                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3631                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3632             } else {
3633                 val &= 3;
3634                 rm = (modrm & 7);
3635                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3636                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3637             }
3638             reg = ((modrm >> 3) & 7) | rex_r;
3639             gen_op_mov_reg_v(s, ot, reg, s->T0);
3640             break;
3641         case 0x1d6: /* movq ea, xmm */
3642             if (mod != 3) {
3643                 gen_lea_modrm(env, s, modrm);
3644                 gen_stq_env_A0(s, offsetof(CPUX86State,
3645                                            xmm_regs[reg].ZMM_Q(0)));
3646             } else {
3647                 rm = (modrm & 7) | REX_B(s);
3648                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3649                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3650                 gen_op_movq_env_0(s,
3651                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3652             }
3653             break;
3654         case 0x2d6: /* movq2dq */
3655             gen_helper_enter_mmx(cpu_env);
3656             rm = (modrm & 7);
3657             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3658                         offsetof(CPUX86State,fpregs[rm].mmx));
3659             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3660             break;
3661         case 0x3d6: /* movdq2q */
3662             gen_helper_enter_mmx(cpu_env);
3663             rm = (modrm & 7) | REX_B(s);
3664             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3665                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3666             break;
3667         case 0xd7: /* pmovmskb */
3668         case 0x1d7:
3669             if (mod != 3)
3670                 goto illegal_op;
3671             if (b1) {
3672                 rm = (modrm & 7) | REX_B(s);
3673                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3674                                  offsetof(CPUX86State, xmm_regs[rm]));
3675                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3676             } else {
3677                 rm = (modrm & 7);
3678                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3679                                  offsetof(CPUX86State, fpregs[rm].mmx));
3680                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3681             }
3682             reg = ((modrm >> 3) & 7) | rex_r;
3683             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3684             break;
3685 
3686         case 0x138:
3687         case 0x038:
3688             b = modrm;
3689             if ((b & 0xf0) == 0xf0) {
3690                 goto do_0f_38_fx;
3691             }
3692             modrm = x86_ldub_code(env, s);
3693             rm = modrm & 7;
3694             reg = ((modrm >> 3) & 7) | rex_r;
3695             mod = (modrm >> 6) & 3;
3696             if (b1 >= 2) {
3697                 goto unknown_op;
3698             }
3699 
3700             sse_fn_epp = sse_op_table6[b].op[b1];
3701             if (!sse_fn_epp) {
3702                 goto unknown_op;
3703             }
3704             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3705                 goto illegal_op;
3706 
3707             if (b1) {
3708                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3709                 if (mod == 3) {
3710                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3711                 } else {
3712                     op2_offset = offsetof(CPUX86State,xmm_t0);
3713                     gen_lea_modrm(env, s, modrm);
3714                     switch (b) {
3715                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3716                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3717                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3718                         gen_ldq_env_A0(s, op2_offset +
3719                                         offsetof(ZMMReg, ZMM_Q(0)));
3720                         break;
3721                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3722                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3723                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3724                                             s->mem_index, MO_LEUL);
3725                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3726                                         offsetof(ZMMReg, ZMM_L(0)));
3727                         break;
3728                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3729                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3730                                            s->mem_index, MO_LEUW);
3731                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3732                                         offsetof(ZMMReg, ZMM_W(0)));
3733                         break;
3734                     case 0x2a:            /* movntqda */
3735                         gen_ldo_env_A0(s, op1_offset);
3736                         return;
3737                     default:
3738                         gen_ldo_env_A0(s, op2_offset);
3739                     }
3740                 }
3741             } else {
3742                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3743                 if (mod == 3) {
3744                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3745                 } else {
3746                     op2_offset = offsetof(CPUX86State,mmx_t0);
3747                     gen_lea_modrm(env, s, modrm);
3748                     gen_ldq_env_A0(s, op2_offset);
3749                 }
3750             }
3751             if (sse_fn_epp == SSE_SPECIAL) {
3752                 goto unknown_op;
3753             }
3754 
3755             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3756             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3757             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3758 
3759             if (b == 0x17) {
3760                 set_cc_op(s, CC_OP_EFLAGS);
3761             }
3762             break;
3763 
3764         case 0x238:
3765         case 0x338:
3766         do_0f_38_fx:
3767             /* Various integer extensions at 0f 38 f[0-f].  */
3768             b = modrm | (b1 << 8);
3769             modrm = x86_ldub_code(env, s);
3770             reg = ((modrm >> 3) & 7) | rex_r;
3771 
3772             switch (b) {
3773             case 0x3f0: /* crc32 Gd,Eb */
3774             case 0x3f1: /* crc32 Gd,Ey */
3775             do_crc32:
3776                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3777                     goto illegal_op;
3778                 }
3779                 if ((b & 0xff) == 0xf0) {
3780                     ot = MO_8;
3781                 } else if (s->dflag != MO_64) {
3782                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3783                 } else {
3784                     ot = MO_64;
3785                 }
3786 
3787                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3788                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3789                 gen_helper_crc32(s->T0, s->tmp2_i32,
3790                                  s->T0, tcg_const_i32(8 << ot));
3791 
3792                 ot = mo_64_32(s->dflag);
3793                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3794                 break;
3795 
3796             case 0x1f0: /* crc32 or movbe */
3797             case 0x1f1:
3798                 /* For these insns, the f3 prefix is supposed to have priority
3799                    over the 66 prefix, but that's not what we implement above
3800                    setting b1.  */
3801                 if (s->prefix & PREFIX_REPNZ) {
3802                     goto do_crc32;
3803                 }
3804                 /* FALLTHRU */
3805             case 0x0f0: /* movbe Gy,My */
3806             case 0x0f1: /* movbe My,Gy */
3807                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3808                     goto illegal_op;
3809                 }
3810                 if (s->dflag != MO_64) {
3811                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3812                 } else {
3813                     ot = MO_64;
3814                 }
3815 
3816                 gen_lea_modrm(env, s, modrm);
3817                 if ((b & 1) == 0) {
3818                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
3819                                        s->mem_index, ot | MO_BE);
3820                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3821                 } else {
3822                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3823                                        s->mem_index, ot | MO_BE);
3824                 }
3825                 break;
3826 
3827             case 0x0f2: /* andn Gy, By, Ey */
3828                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3829                     || !(s->prefix & PREFIX_VEX)
3830                     || s->vex_l != 0) {
3831                     goto illegal_op;
3832                 }
3833                 ot = mo_64_32(s->dflag);
3834                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3835                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3836                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3837                 gen_op_update1_cc(s);
3838                 set_cc_op(s, CC_OP_LOGICB + ot);
3839                 break;
3840 
3841             case 0x0f7: /* bextr Gy, Ey, By */
3842                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3843                     || !(s->prefix & PREFIX_VEX)
3844                     || s->vex_l != 0) {
3845                     goto illegal_op;
3846                 }
3847                 ot = mo_64_32(s->dflag);
3848                 {
3849                     TCGv bound, zero;
3850 
3851                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3852                     /* Extract START, and shift the operand.
3853                        Shifts larger than operand size get zeros.  */
3854                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3855                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3856 
3857                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3858                     zero = tcg_const_tl(0);
3859                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3860                                        s->T0, zero);
3861                     tcg_temp_free(zero);
3862 
3863                     /* Extract the LEN into a mask.  Lengths larger than
3864                        operand size get all ones.  */
3865                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3866                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3867                                        s->A0, bound);
3868                     tcg_temp_free(bound);
3869                     tcg_gen_movi_tl(s->T1, 1);
3870                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3871                     tcg_gen_subi_tl(s->T1, s->T1, 1);
3872                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
3873 
3874                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3875                     gen_op_update1_cc(s);
3876                     set_cc_op(s, CC_OP_LOGICB + ot);
3877                 }
3878                 break;
3879 
3880             case 0x0f5: /* bzhi Gy, Ey, By */
3881                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3882                     || !(s->prefix & PREFIX_VEX)
3883                     || s->vex_l != 0) {
3884                     goto illegal_op;
3885                 }
3886                 ot = mo_64_32(s->dflag);
3887                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3888                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3889                 {
3890                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3891                     /* Note that since we're using BMILG (in order to get O
3892                        cleared) we need to store the inverse into C.  */
3893                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3894                                        s->T1, bound);
3895                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3896                                        bound, bound, s->T1);
3897                     tcg_temp_free(bound);
3898                 }
3899                 tcg_gen_movi_tl(s->A0, -1);
3900                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3901                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3902                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3903                 gen_op_update1_cc(s);
3904                 set_cc_op(s, CC_OP_BMILGB + ot);
3905                 break;
3906 
3907             case 0x3f6: /* mulx By, Gy, rdx, Ey */
3908                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3909                     || !(s->prefix & PREFIX_VEX)
3910                     || s->vex_l != 0) {
3911                     goto illegal_op;
3912                 }
3913                 ot = mo_64_32(s->dflag);
3914                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3915                 switch (ot) {
3916                 default:
3917                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3918                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3919                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3920                                       s->tmp2_i32, s->tmp3_i32);
3921                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3922                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
3923                     break;
3924 #ifdef TARGET_X86_64
3925                 case MO_64:
3926                     tcg_gen_mulu2_i64(s->T0, s->T1,
3927                                       s->T0, cpu_regs[R_EDX]);
3928                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
3929                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
3930                     break;
3931 #endif
3932                 }
3933                 break;
3934 
3935             case 0x3f5: /* pdep Gy, By, Ey */
3936                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3937                     || !(s->prefix & PREFIX_VEX)
3938                     || s->vex_l != 0) {
3939                     goto illegal_op;
3940                 }
3941                 ot = mo_64_32(s->dflag);
3942                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3943                 /* Note that by zero-extending the source operand, we
3944                    automatically handle zero-extending the result.  */
3945                 if (ot == MO_64) {
3946                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3947                 } else {
3948                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3949                 }
3950                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
3951                 break;
3952 
3953             case 0x2f5: /* pext Gy, By, Ey */
3954                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3955                     || !(s->prefix & PREFIX_VEX)
3956                     || s->vex_l != 0) {
3957                     goto illegal_op;
3958                 }
3959                 ot = mo_64_32(s->dflag);
3960                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3961                 /* Note that by zero-extending the source operand, we
3962                    automatically handle zero-extending the result.  */
3963                 if (ot == MO_64) {
3964                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
3965                 } else {
3966                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
3967                 }
3968                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
3969                 break;
3970 
3971             case 0x1f6: /* adcx Gy, Ey */
3972             case 0x2f6: /* adox Gy, Ey */
3973                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
3974                     goto illegal_op;
3975                 } else {
3976                     TCGv carry_in, carry_out, zero;
3977                     int end_op;
3978 
3979                     ot = mo_64_32(s->dflag);
3980                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3981 
3982                     /* Re-use the carry-out from a previous round.  */
3983                     carry_in = NULL;
3984                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
3985                     switch (s->cc_op) {
3986                     case CC_OP_ADCX:
3987                         if (b == 0x1f6) {
3988                             carry_in = cpu_cc_dst;
3989                             end_op = CC_OP_ADCX;
3990                         } else {
3991                             end_op = CC_OP_ADCOX;
3992                         }
3993                         break;
3994                     case CC_OP_ADOX:
3995                         if (b == 0x1f6) {
3996                             end_op = CC_OP_ADCOX;
3997                         } else {
3998                             carry_in = cpu_cc_src2;
3999                             end_op = CC_OP_ADOX;
4000                         }
4001                         break;
4002                     case CC_OP_ADCOX:
4003                         end_op = CC_OP_ADCOX;
4004                         carry_in = carry_out;
4005                         break;
4006                     default:
4007                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4008                         break;
4009                     }
4010                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4011                     if (!carry_in) {
4012                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4013                             gen_compute_eflags(s);
4014                         }
4015                         carry_in = s->tmp0;
4016                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4017                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4018                     }
4019 
4020                     switch (ot) {
4021 #ifdef TARGET_X86_64
4022                     case MO_32:
4023                         /* If we know TL is 64-bit, and we want a 32-bit
4024                            result, just do everything in 64-bit arithmetic.  */
4025                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4026                         tcg_gen_ext32u_i64(s->T0, s->T0);
4027                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4028                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4029                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4030                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4031                         break;
4032 #endif
4033                     default:
4034                         /* Otherwise compute the carry-out in two steps.  */
4035                         zero = tcg_const_tl(0);
4036                         tcg_gen_add2_tl(s->T0, carry_out,
4037                                         s->T0, zero,
4038                                         carry_in, zero);
4039                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4040                                         cpu_regs[reg], carry_out,
4041                                         s->T0, zero);
4042                         tcg_temp_free(zero);
4043                         break;
4044                     }
4045                     set_cc_op(s, end_op);
4046                 }
4047                 break;
4048 
4049             case 0x1f7: /* shlx Gy, Ey, By */
4050             case 0x2f7: /* sarx Gy, Ey, By */
4051             case 0x3f7: /* shrx Gy, Ey, By */
4052                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4053                     || !(s->prefix & PREFIX_VEX)
4054                     || s->vex_l != 0) {
4055                     goto illegal_op;
4056                 }
4057                 ot = mo_64_32(s->dflag);
4058                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4059                 if (ot == MO_64) {
4060                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4061                 } else {
4062                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4063                 }
4064                 if (b == 0x1f7) {
4065                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4066                 } else if (b == 0x2f7) {
4067                     if (ot != MO_64) {
4068                         tcg_gen_ext32s_tl(s->T0, s->T0);
4069                     }
4070                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4071                 } else {
4072                     if (ot != MO_64) {
4073                         tcg_gen_ext32u_tl(s->T0, s->T0);
4074                     }
4075                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4076                 }
4077                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4078                 break;
4079 
4080             case 0x0f3:
4081             case 0x1f3:
4082             case 0x2f3:
4083             case 0x3f3: /* Group 17 */
4084                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4085                     || !(s->prefix & PREFIX_VEX)
4086                     || s->vex_l != 0) {
4087                     goto illegal_op;
4088                 }
4089                 ot = mo_64_32(s->dflag);
4090                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4091 
4092                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4093                 switch (reg & 7) {
4094                 case 1: /* blsr By,Ey */
4095                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4096                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4097                     break;
4098                 case 2: /* blsmsk By,Ey */
4099                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4100                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4101                     break;
4102                 case 3: /* blsi By, Ey */
4103                     tcg_gen_neg_tl(s->T1, s->T0);
4104                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4105                     break;
4106                 default:
4107                     goto unknown_op;
4108                 }
4109                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4110                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4111                 set_cc_op(s, CC_OP_BMILGB + ot);
4112                 break;
4113 
4114             default:
4115                 goto unknown_op;
4116             }
4117             break;
4118 
4119         case 0x03a:
4120         case 0x13a:
4121             b = modrm;
4122             modrm = x86_ldub_code(env, s);
4123             rm = modrm & 7;
4124             reg = ((modrm >> 3) & 7) | rex_r;
4125             mod = (modrm >> 6) & 3;
4126             if (b1 >= 2) {
4127                 goto unknown_op;
4128             }
4129 
4130             sse_fn_eppi = sse_op_table7[b].op[b1];
4131             if (!sse_fn_eppi) {
4132                 goto unknown_op;
4133             }
4134             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4135                 goto illegal_op;
4136 
4137             s->rip_offset = 1;
4138 
4139             if (sse_fn_eppi == SSE_SPECIAL) {
4140                 ot = mo_64_32(s->dflag);
4141                 rm = (modrm & 7) | REX_B(s);
4142                 if (mod != 3)
4143                     gen_lea_modrm(env, s, modrm);
4144                 reg = ((modrm >> 3) & 7) | rex_r;
4145                 val = x86_ldub_code(env, s);
4146                 switch (b) {
4147                 case 0x14: /* pextrb */
4148                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4149                                             xmm_regs[reg].ZMM_B(val & 15)));
4150                     if (mod == 3) {
4151                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4152                     } else {
4153                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4154                                            s->mem_index, MO_UB);
4155                     }
4156                     break;
4157                 case 0x15: /* pextrw */
4158                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4159                                             xmm_regs[reg].ZMM_W(val & 7)));
4160                     if (mod == 3) {
4161                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4162                     } else {
4163                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4164                                            s->mem_index, MO_LEUW);
4165                     }
4166                     break;
4167                 case 0x16:
4168                     if (ot == MO_32) { /* pextrd */
4169                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4170                                         offsetof(CPUX86State,
4171                                                 xmm_regs[reg].ZMM_L(val & 3)));
4172                         if (mod == 3) {
4173                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4174                         } else {
4175                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4176                                                 s->mem_index, MO_LEUL);
4177                         }
4178                     } else { /* pextrq */
4179 #ifdef TARGET_X86_64
4180                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4181                                         offsetof(CPUX86State,
4182                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4183                         if (mod == 3) {
4184                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4185                         } else {
4186                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4187                                                 s->mem_index, MO_LEQ);
4188                         }
4189 #else
4190                         goto illegal_op;
4191 #endif
4192                     }
4193                     break;
4194                 case 0x17: /* extractps */
4195                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4196                                             xmm_regs[reg].ZMM_L(val & 3)));
4197                     if (mod == 3) {
4198                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4199                     } else {
4200                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4201                                            s->mem_index, MO_LEUL);
4202                     }
4203                     break;
4204                 case 0x20: /* pinsrb */
4205                     if (mod == 3) {
4206                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4207                     } else {
4208                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4209                                            s->mem_index, MO_UB);
4210                     }
4211                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4212                                             xmm_regs[reg].ZMM_B(val & 15)));
4213                     break;
4214                 case 0x21: /* insertps */
4215                     if (mod == 3) {
4216                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4217                                         offsetof(CPUX86State,xmm_regs[rm]
4218                                                 .ZMM_L((val >> 6) & 3)));
4219                     } else {
4220                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4221                                             s->mem_index, MO_LEUL);
4222                     }
4223                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4224                                     offsetof(CPUX86State,xmm_regs[reg]
4225                                             .ZMM_L((val >> 4) & 3)));
4226                     if ((val >> 0) & 1)
4227                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4228                                         cpu_env, offsetof(CPUX86State,
4229                                                 xmm_regs[reg].ZMM_L(0)));
4230                     if ((val >> 1) & 1)
4231                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4232                                         cpu_env, offsetof(CPUX86State,
4233                                                 xmm_regs[reg].ZMM_L(1)));
4234                     if ((val >> 2) & 1)
4235                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4236                                         cpu_env, offsetof(CPUX86State,
4237                                                 xmm_regs[reg].ZMM_L(2)));
4238                     if ((val >> 3) & 1)
4239                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4240                                         cpu_env, offsetof(CPUX86State,
4241                                                 xmm_regs[reg].ZMM_L(3)));
4242                     break;
4243                 case 0x22:
4244                     if (ot == MO_32) { /* pinsrd */
4245                         if (mod == 3) {
4246                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4247                         } else {
4248                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4249                                                 s->mem_index, MO_LEUL);
4250                         }
4251                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4252                                         offsetof(CPUX86State,
4253                                                 xmm_regs[reg].ZMM_L(val & 3)));
4254                     } else { /* pinsrq */
4255 #ifdef TARGET_X86_64
4256                         if (mod == 3) {
4257                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4258                         } else {
4259                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4260                                                 s->mem_index, MO_LEQ);
4261                         }
4262                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4263                                         offsetof(CPUX86State,
4264                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4265 #else
4266                         goto illegal_op;
4267 #endif
4268                     }
4269                     break;
4270                 }
4271                 return;
4272             }
4273 
4274             if (b1) {
4275                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4276                 if (mod == 3) {
4277                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4278                 } else {
4279                     op2_offset = offsetof(CPUX86State,xmm_t0);
4280                     gen_lea_modrm(env, s, modrm);
4281                     gen_ldo_env_A0(s, op2_offset);
4282                 }
4283             } else {
4284                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4285                 if (mod == 3) {
4286                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4287                 } else {
4288                     op2_offset = offsetof(CPUX86State,mmx_t0);
4289                     gen_lea_modrm(env, s, modrm);
4290                     gen_ldq_env_A0(s, op2_offset);
4291                 }
4292             }
4293             val = x86_ldub_code(env, s);
4294 
4295             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4296                 set_cc_op(s, CC_OP_EFLAGS);
4297 
4298                 if (s->dflag == MO_64) {
4299                     /* The helper must use entire 64-bit gp registers */
4300                     val |= 1 << 8;
4301                 }
4302             }
4303 
4304             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4305             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4306             sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4307             break;
4308 
4309         case 0x33a:
4310             /* Various integer extensions at 0f 3a f[0-f].  */
4311             b = modrm | (b1 << 8);
4312             modrm = x86_ldub_code(env, s);
4313             reg = ((modrm >> 3) & 7) | rex_r;
4314 
4315             switch (b) {
4316             case 0x3f0: /* rorx Gy,Ey, Ib */
4317                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4318                     || !(s->prefix & PREFIX_VEX)
4319                     || s->vex_l != 0) {
4320                     goto illegal_op;
4321                 }
4322                 ot = mo_64_32(s->dflag);
4323                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4324                 b = x86_ldub_code(env, s);
4325                 if (ot == MO_64) {
4326                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4327                 } else {
4328                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4329                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4330                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4331                 }
4332                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4333                 break;
4334 
4335             default:
4336                 goto unknown_op;
4337             }
4338             break;
4339 
4340         default:
4341         unknown_op:
4342             gen_unknown_opcode(env, s);
4343             return;
4344         }
4345     } else {
4346         /* generic MMX or SSE operation */
4347         switch(b) {
4348         case 0x70: /* pshufx insn */
4349         case 0xc6: /* pshufx insn */
4350         case 0xc2: /* compare insns */
4351             s->rip_offset = 1;
4352             break;
4353         default:
4354             break;
4355         }
4356         if (is_xmm) {
4357             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4358             if (mod != 3) {
4359                 int sz = 4;
4360 
4361                 gen_lea_modrm(env, s, modrm);
4362                 op2_offset = offsetof(CPUX86State,xmm_t0);
4363 
4364                 switch (b) {
4365                 case 0x50 ... 0x5a:
4366                 case 0x5c ... 0x5f:
4367                 case 0xc2:
4368                     /* Most sse scalar operations.  */
4369                     if (b1 == 2) {
4370                         sz = 2;
4371                     } else if (b1 == 3) {
4372                         sz = 3;
4373                     }
4374                     break;
4375 
4376                 case 0x2e:  /* ucomis[sd] */
4377                 case 0x2f:  /* comis[sd] */
4378                     if (b1 == 0) {
4379                         sz = 2;
4380                     } else {
4381                         sz = 3;
4382                     }
4383                     break;
4384                 }
4385 
4386                 switch (sz) {
4387                 case 2:
4388                     /* 32 bit access */
4389                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4390                     tcg_gen_st32_tl(s->T0, cpu_env,
4391                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4392                     break;
4393                 case 3:
4394                     /* 64 bit access */
4395                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4396                     break;
4397                 default:
4398                     /* 128 bit access */
4399                     gen_ldo_env_A0(s, op2_offset);
4400                     break;
4401                 }
4402             } else {
4403                 rm = (modrm & 7) | REX_B(s);
4404                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4405             }
4406         } else {
4407             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4408             if (mod != 3) {
4409                 gen_lea_modrm(env, s, modrm);
4410                 op2_offset = offsetof(CPUX86State,mmx_t0);
4411                 gen_ldq_env_A0(s, op2_offset);
4412             } else {
4413                 rm = (modrm & 7);
4414                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4415             }
4416         }
4417         switch(b) {
4418         case 0x0f: /* 3DNow! data insns */
4419             val = x86_ldub_code(env, s);
4420             sse_fn_epp = sse_op_table5[val];
4421             if (!sse_fn_epp) {
4422                 goto unknown_op;
4423             }
4424             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4425                 goto illegal_op;
4426             }
4427             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4428             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4429             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4430             break;
4431         case 0x70: /* pshufx insn */
4432         case 0xc6: /* pshufx insn */
4433             val = x86_ldub_code(env, s);
4434             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4435             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4436             /* XXX: introduce a new table? */
4437             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4438             sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4439             break;
4440         case 0xc2:
4441             /* compare insns */
4442             val = x86_ldub_code(env, s);
4443             if (val >= 8)
4444                 goto unknown_op;
4445             sse_fn_epp = sse_op_table4[val][b1];
4446 
4447             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4448             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4449             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4450             break;
4451         case 0xf7:
4452             /* maskmov : we must prepare A0 */
4453             if (mod != 3)
4454                 goto illegal_op;
4455             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4456             gen_extu(s->aflag, s->A0);
4457             gen_add_A0_ds_seg(s);
4458 
4459             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4460             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4461             /* XXX: introduce a new table? */
4462             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4463             sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4464             break;
4465         default:
4466             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4467             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4468             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4469             break;
4470         }
4471         if (b == 0x2e || b == 0x2f) {
4472             set_cc_op(s, CC_OP_EFLAGS);
4473         }
4474     }
4475 }
4476 
4477 /* convert one instruction. s->base.is_jmp is set if the translation must
4478    be stopped. Return the next pc value */
4479 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4480 {
4481     CPUX86State *env = cpu->env_ptr;
4482     int b, prefixes;
4483     int shift;
4484     MemOp ot, aflag, dflag;
4485     int modrm, reg, rm, mod, op, opreg, val;
4486     target_ulong next_eip, tval;
4487     int rex_w, rex_r;
4488     target_ulong pc_start = s->base.pc_next;
4489 
4490     s->pc_start = s->pc = pc_start;
4491     s->override = -1;
4492 #ifdef TARGET_X86_64
4493     s->rex_x = 0;
4494     s->rex_b = 0;
4495     s->x86_64_hregs = false;
4496 #endif
4497     s->rip_offset = 0; /* for relative ip address */
4498     s->vex_l = 0;
4499     s->vex_v = 0;
4500     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4501         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
4502         return s->pc;
4503     }
4504 
4505     prefixes = 0;
4506     rex_w = -1;
4507     rex_r = 0;
4508 
4509  next_byte:
4510     b = x86_ldub_code(env, s);
4511     /* Collect prefixes.  */
4512     switch (b) {
4513     case 0xf3:
4514         prefixes |= PREFIX_REPZ;
4515         goto next_byte;
4516     case 0xf2:
4517         prefixes |= PREFIX_REPNZ;
4518         goto next_byte;
4519     case 0xf0:
4520         prefixes |= PREFIX_LOCK;
4521         goto next_byte;
4522     case 0x2e:
4523         s->override = R_CS;
4524         goto next_byte;
4525     case 0x36:
4526         s->override = R_SS;
4527         goto next_byte;
4528     case 0x3e:
4529         s->override = R_DS;
4530         goto next_byte;
4531     case 0x26:
4532         s->override = R_ES;
4533         goto next_byte;
4534     case 0x64:
4535         s->override = R_FS;
4536         goto next_byte;
4537     case 0x65:
4538         s->override = R_GS;
4539         goto next_byte;
4540     case 0x66:
4541         prefixes |= PREFIX_DATA;
4542         goto next_byte;
4543     case 0x67:
4544         prefixes |= PREFIX_ADR;
4545         goto next_byte;
4546 #ifdef TARGET_X86_64
4547     case 0x40 ... 0x4f:
4548         if (CODE64(s)) {
4549             /* REX prefix */
4550             rex_w = (b >> 3) & 1;
4551             rex_r = (b & 0x4) << 1;
4552             s->rex_x = (b & 0x2) << 2;
4553             REX_B(s) = (b & 0x1) << 3;
4554             /* select uniform byte register addressing */
4555             s->x86_64_hregs = true;
4556             goto next_byte;
4557         }
4558         break;
4559 #endif
4560     case 0xc5: /* 2-byte VEX */
4561     case 0xc4: /* 3-byte VEX */
4562         /* VEX prefixes cannot be used except in 32-bit mode.
4563            Otherwise the instruction is LES or LDS.  */
4564         if (s->code32 && !s->vm86) {
4565             static const int pp_prefix[4] = {
4566                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4567             };
4568             int vex3, vex2 = x86_ldub_code(env, s);
4569 
4570             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4571                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4572                    otherwise the instruction is LES or LDS.  */
4573                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4574                 break;
4575             }
4576 
4577             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4578             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4579                             | PREFIX_LOCK | PREFIX_DATA)) {
4580                 goto illegal_op;
4581             }
4582 #ifdef TARGET_X86_64
4583             if (s->x86_64_hregs) {
4584                 goto illegal_op;
4585             }
4586 #endif
4587             rex_r = (~vex2 >> 4) & 8;
4588             if (b == 0xc5) {
4589                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4590                 vex3 = vex2;
4591                 b = x86_ldub_code(env, s) | 0x100;
4592             } else {
4593                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4594 #ifdef TARGET_X86_64
4595                 s->rex_x = (~vex2 >> 3) & 8;
4596                 s->rex_b = (~vex2 >> 2) & 8;
4597 #endif
4598                 vex3 = x86_ldub_code(env, s);
4599                 rex_w = (vex3 >> 7) & 1;
4600                 switch (vex2 & 0x1f) {
4601                 case 0x01: /* Implied 0f leading opcode bytes.  */
4602                     b = x86_ldub_code(env, s) | 0x100;
4603                     break;
4604                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4605                     b = 0x138;
4606                     break;
4607                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4608                     b = 0x13a;
4609                     break;
4610                 default:   /* Reserved for future use.  */
4611                     goto unknown_op;
4612                 }
4613             }
4614             s->vex_v = (~vex3 >> 3) & 0xf;
4615             s->vex_l = (vex3 >> 2) & 1;
4616             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4617         }
4618         break;
4619     }
4620 
4621     /* Post-process prefixes.  */
4622     if (CODE64(s)) {
4623         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4624            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4625            over 0x66 if both are present.  */
4626         dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4627         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4628         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4629     } else {
4630         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4631         if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) {
4632             dflag = MO_32;
4633         } else {
4634             dflag = MO_16;
4635         }
4636         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4637         if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) {
4638             aflag = MO_32;
4639         }  else {
4640             aflag = MO_16;
4641         }
4642     }
4643 
4644     s->prefix = prefixes;
4645     s->aflag = aflag;
4646     s->dflag = dflag;
4647 
4648     /* now check op code */
4649  reswitch:
4650     switch(b) {
4651     case 0x0f:
4652         /**************************/
4653         /* extended op code */
4654         b = x86_ldub_code(env, s) | 0x100;
4655         goto reswitch;
4656 
4657         /**************************/
4658         /* arith & logic */
4659     case 0x00 ... 0x05:
4660     case 0x08 ... 0x0d:
4661     case 0x10 ... 0x15:
4662     case 0x18 ... 0x1d:
4663     case 0x20 ... 0x25:
4664     case 0x28 ... 0x2d:
4665     case 0x30 ... 0x35:
4666     case 0x38 ... 0x3d:
4667         {
4668             int op, f, val;
4669             op = (b >> 3) & 7;
4670             f = (b >> 1) & 3;
4671 
4672             ot = mo_b_d(b, dflag);
4673 
4674             switch(f) {
4675             case 0: /* OP Ev, Gv */
4676                 modrm = x86_ldub_code(env, s);
4677                 reg = ((modrm >> 3) & 7) | rex_r;
4678                 mod = (modrm >> 6) & 3;
4679                 rm = (modrm & 7) | REX_B(s);
4680                 if (mod != 3) {
4681                     gen_lea_modrm(env, s, modrm);
4682                     opreg = OR_TMP0;
4683                 } else if (op == OP_XORL && rm == reg) {
4684                 xor_zero:
4685                     /* xor reg, reg optimisation */
4686                     set_cc_op(s, CC_OP_CLR);
4687                     tcg_gen_movi_tl(s->T0, 0);
4688                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4689                     break;
4690                 } else {
4691                     opreg = rm;
4692                 }
4693                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4694                 gen_op(s, op, ot, opreg);
4695                 break;
4696             case 1: /* OP Gv, Ev */
4697                 modrm = x86_ldub_code(env, s);
4698                 mod = (modrm >> 6) & 3;
4699                 reg = ((modrm >> 3) & 7) | rex_r;
4700                 rm = (modrm & 7) | REX_B(s);
4701                 if (mod != 3) {
4702                     gen_lea_modrm(env, s, modrm);
4703                     gen_op_ld_v(s, ot, s->T1, s->A0);
4704                 } else if (op == OP_XORL && rm == reg) {
4705                     goto xor_zero;
4706                 } else {
4707                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4708                 }
4709                 gen_op(s, op, ot, reg);
4710                 break;
4711             case 2: /* OP A, Iv */
4712                 val = insn_get(env, s, ot);
4713                 tcg_gen_movi_tl(s->T1, val);
4714                 gen_op(s, op, ot, OR_EAX);
4715                 break;
4716             }
4717         }
4718         break;
4719 
4720     case 0x82:
4721         if (CODE64(s))
4722             goto illegal_op;
4723         /* fall through */
4724     case 0x80: /* GRP1 */
4725     case 0x81:
4726     case 0x83:
4727         {
4728             int val;
4729 
4730             ot = mo_b_d(b, dflag);
4731 
4732             modrm = x86_ldub_code(env, s);
4733             mod = (modrm >> 6) & 3;
4734             rm = (modrm & 7) | REX_B(s);
4735             op = (modrm >> 3) & 7;
4736 
4737             if (mod != 3) {
4738                 if (b == 0x83)
4739                     s->rip_offset = 1;
4740                 else
4741                     s->rip_offset = insn_const_size(ot);
4742                 gen_lea_modrm(env, s, modrm);
4743                 opreg = OR_TMP0;
4744             } else {
4745                 opreg = rm;
4746             }
4747 
4748             switch(b) {
4749             default:
4750             case 0x80:
4751             case 0x81:
4752             case 0x82:
4753                 val = insn_get(env, s, ot);
4754                 break;
4755             case 0x83:
4756                 val = (int8_t)insn_get(env, s, MO_8);
4757                 break;
4758             }
4759             tcg_gen_movi_tl(s->T1, val);
4760             gen_op(s, op, ot, opreg);
4761         }
4762         break;
4763 
4764         /**************************/
4765         /* inc, dec, and other misc arith */
4766     case 0x40 ... 0x47: /* inc Gv */
4767         ot = dflag;
4768         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4769         break;
4770     case 0x48 ... 0x4f: /* dec Gv */
4771         ot = dflag;
4772         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4773         break;
4774     case 0xf6: /* GRP3 */
4775     case 0xf7:
4776         ot = mo_b_d(b, dflag);
4777 
4778         modrm = x86_ldub_code(env, s);
4779         mod = (modrm >> 6) & 3;
4780         rm = (modrm & 7) | REX_B(s);
4781         op = (modrm >> 3) & 7;
4782         if (mod != 3) {
4783             if (op == 0) {
4784                 s->rip_offset = insn_const_size(ot);
4785             }
4786             gen_lea_modrm(env, s, modrm);
4787             /* For those below that handle locked memory, don't load here.  */
4788             if (!(s->prefix & PREFIX_LOCK)
4789                 || op != 2) {
4790                 gen_op_ld_v(s, ot, s->T0, s->A0);
4791             }
4792         } else {
4793             gen_op_mov_v_reg(s, ot, s->T0, rm);
4794         }
4795 
4796         switch(op) {
4797         case 0: /* test */
4798             val = insn_get(env, s, ot);
4799             tcg_gen_movi_tl(s->T1, val);
4800             gen_op_testl_T0_T1_cc(s);
4801             set_cc_op(s, CC_OP_LOGICB + ot);
4802             break;
4803         case 2: /* not */
4804             if (s->prefix & PREFIX_LOCK) {
4805                 if (mod == 3) {
4806                     goto illegal_op;
4807                 }
4808                 tcg_gen_movi_tl(s->T0, ~0);
4809                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4810                                             s->mem_index, ot | MO_LE);
4811             } else {
4812                 tcg_gen_not_tl(s->T0, s->T0);
4813                 if (mod != 3) {
4814                     gen_op_st_v(s, ot, s->T0, s->A0);
4815                 } else {
4816                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4817                 }
4818             }
4819             break;
4820         case 3: /* neg */
4821             if (s->prefix & PREFIX_LOCK) {
4822                 TCGLabel *label1;
4823                 TCGv a0, t0, t1, t2;
4824 
4825                 if (mod == 3) {
4826                     goto illegal_op;
4827                 }
4828                 a0 = tcg_temp_local_new();
4829                 t0 = tcg_temp_local_new();
4830                 label1 = gen_new_label();
4831 
4832                 tcg_gen_mov_tl(a0, s->A0);
4833                 tcg_gen_mov_tl(t0, s->T0);
4834 
4835                 gen_set_label(label1);
4836                 t1 = tcg_temp_new();
4837                 t2 = tcg_temp_new();
4838                 tcg_gen_mov_tl(t2, t0);
4839                 tcg_gen_neg_tl(t1, t0);
4840                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4841                                           s->mem_index, ot | MO_LE);
4842                 tcg_temp_free(t1);
4843                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4844 
4845                 tcg_temp_free(t2);
4846                 tcg_temp_free(a0);
4847                 tcg_gen_mov_tl(s->T0, t0);
4848                 tcg_temp_free(t0);
4849             } else {
4850                 tcg_gen_neg_tl(s->T0, s->T0);
4851                 if (mod != 3) {
4852                     gen_op_st_v(s, ot, s->T0, s->A0);
4853                 } else {
4854                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4855                 }
4856             }
4857             gen_op_update_neg_cc(s);
4858             set_cc_op(s, CC_OP_SUBB + ot);
4859             break;
4860         case 4: /* mul */
4861             switch(ot) {
4862             case MO_8:
4863                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4864                 tcg_gen_ext8u_tl(s->T0, s->T0);
4865                 tcg_gen_ext8u_tl(s->T1, s->T1);
4866                 /* XXX: use 32 bit mul which could be faster */
4867                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4868                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4869                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4870                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4871                 set_cc_op(s, CC_OP_MULB);
4872                 break;
4873             case MO_16:
4874                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4875                 tcg_gen_ext16u_tl(s->T0, s->T0);
4876                 tcg_gen_ext16u_tl(s->T1, s->T1);
4877                 /* XXX: use 32 bit mul which could be faster */
4878                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4879                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4880                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4881                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4882                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4883                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4884                 set_cc_op(s, CC_OP_MULW);
4885                 break;
4886             default:
4887             case MO_32:
4888                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4889                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4890                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4891                                   s->tmp2_i32, s->tmp3_i32);
4892                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4893                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4894                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4895                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4896                 set_cc_op(s, CC_OP_MULL);
4897                 break;
4898 #ifdef TARGET_X86_64
4899             case MO_64:
4900                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4901                                   s->T0, cpu_regs[R_EAX]);
4902                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4903                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4904                 set_cc_op(s, CC_OP_MULQ);
4905                 break;
4906 #endif
4907             }
4908             break;
4909         case 5: /* imul */
4910             switch(ot) {
4911             case MO_8:
4912                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4913                 tcg_gen_ext8s_tl(s->T0, s->T0);
4914                 tcg_gen_ext8s_tl(s->T1, s->T1);
4915                 /* XXX: use 32 bit mul which could be faster */
4916                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4917                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4918                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4919                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
4920                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4921                 set_cc_op(s, CC_OP_MULB);
4922                 break;
4923             case MO_16:
4924                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4925                 tcg_gen_ext16s_tl(s->T0, s->T0);
4926                 tcg_gen_ext16s_tl(s->T1, s->T1);
4927                 /* XXX: use 32 bit mul which could be faster */
4928                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4929                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4930                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4931                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
4932                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4933                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4934                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4935                 set_cc_op(s, CC_OP_MULW);
4936                 break;
4937             default:
4938             case MO_32:
4939                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4940                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4941                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
4942                                   s->tmp2_i32, s->tmp3_i32);
4943                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4944                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4945                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
4946                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4947                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
4948                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
4949                 set_cc_op(s, CC_OP_MULL);
4950                 break;
4951 #ifdef TARGET_X86_64
4952             case MO_64:
4953                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4954                                   s->T0, cpu_regs[R_EAX]);
4955                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4956                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
4957                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
4958                 set_cc_op(s, CC_OP_MULQ);
4959                 break;
4960 #endif
4961             }
4962             break;
4963         case 6: /* div */
4964             switch(ot) {
4965             case MO_8:
4966                 gen_helper_divb_AL(cpu_env, s->T0);
4967                 break;
4968             case MO_16:
4969                 gen_helper_divw_AX(cpu_env, s->T0);
4970                 break;
4971             default:
4972             case MO_32:
4973                 gen_helper_divl_EAX(cpu_env, s->T0);
4974                 break;
4975 #ifdef TARGET_X86_64
4976             case MO_64:
4977                 gen_helper_divq_EAX(cpu_env, s->T0);
4978                 break;
4979 #endif
4980             }
4981             break;
4982         case 7: /* idiv */
4983             switch(ot) {
4984             case MO_8:
4985                 gen_helper_idivb_AL(cpu_env, s->T0);
4986                 break;
4987             case MO_16:
4988                 gen_helper_idivw_AX(cpu_env, s->T0);
4989                 break;
4990             default:
4991             case MO_32:
4992                 gen_helper_idivl_EAX(cpu_env, s->T0);
4993                 break;
4994 #ifdef TARGET_X86_64
4995             case MO_64:
4996                 gen_helper_idivq_EAX(cpu_env, s->T0);
4997                 break;
4998 #endif
4999             }
5000             break;
5001         default:
5002             goto unknown_op;
5003         }
5004         break;
5005 
5006     case 0xfe: /* GRP4 */
5007     case 0xff: /* GRP5 */
5008         ot = mo_b_d(b, dflag);
5009 
5010         modrm = x86_ldub_code(env, s);
5011         mod = (modrm >> 6) & 3;
5012         rm = (modrm & 7) | REX_B(s);
5013         op = (modrm >> 3) & 7;
5014         if (op >= 2 && b == 0xfe) {
5015             goto unknown_op;
5016         }
5017         if (CODE64(s)) {
5018             if (op == 2 || op == 4) {
5019                 /* operand size for jumps is 64 bit */
5020                 ot = MO_64;
5021             } else if (op == 3 || op == 5) {
5022                 ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16;
5023             } else if (op == 6) {
5024                 /* default push size is 64 bit */
5025                 ot = mo_pushpop(s, dflag);
5026             }
5027         }
5028         if (mod != 3) {
5029             gen_lea_modrm(env, s, modrm);
5030             if (op >= 2 && op != 3 && op != 5)
5031                 gen_op_ld_v(s, ot, s->T0, s->A0);
5032         } else {
5033             gen_op_mov_v_reg(s, ot, s->T0, rm);
5034         }
5035 
5036         switch(op) {
5037         case 0: /* inc Ev */
5038             if (mod != 3)
5039                 opreg = OR_TMP0;
5040             else
5041                 opreg = rm;
5042             gen_inc(s, ot, opreg, 1);
5043             break;
5044         case 1: /* dec Ev */
5045             if (mod != 3)
5046                 opreg = OR_TMP0;
5047             else
5048                 opreg = rm;
5049             gen_inc(s, ot, opreg, -1);
5050             break;
5051         case 2: /* call Ev */
5052             /* XXX: optimize if memory (no 'and' is necessary) */
5053             if (dflag == MO_16) {
5054                 tcg_gen_ext16u_tl(s->T0, s->T0);
5055             }
5056             next_eip = s->pc - s->cs_base;
5057             tcg_gen_movi_tl(s->T1, next_eip);
5058             gen_push_v(s, s->T1);
5059             gen_op_jmp_v(s->T0);
5060             gen_bnd_jmp(s);
5061             gen_jr(s, s->T0);
5062             break;
5063         case 3: /* lcall Ev */
5064             if (mod == 3) {
5065                 goto illegal_op;
5066             }
5067             gen_op_ld_v(s, ot, s->T1, s->A0);
5068             gen_add_A0_im(s, 1 << ot);
5069             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5070         do_lcall:
5071             if (s->pe && !s->vm86) {
5072                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5073                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5074                                            tcg_const_i32(dflag - 1),
5075                                            tcg_const_tl(s->pc - s->cs_base));
5076             } else {
5077                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5078                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5079                                       tcg_const_i32(dflag - 1),
5080                                       tcg_const_i32(s->pc - s->cs_base));
5081             }
5082             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5083             gen_jr(s, s->tmp4);
5084             break;
5085         case 4: /* jmp Ev */
5086             if (dflag == MO_16) {
5087                 tcg_gen_ext16u_tl(s->T0, s->T0);
5088             }
5089             gen_op_jmp_v(s->T0);
5090             gen_bnd_jmp(s);
5091             gen_jr(s, s->T0);
5092             break;
5093         case 5: /* ljmp Ev */
5094             if (mod == 3) {
5095                 goto illegal_op;
5096             }
5097             gen_op_ld_v(s, ot, s->T1, s->A0);
5098             gen_add_A0_im(s, 1 << ot);
5099             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5100         do_ljmp:
5101             if (s->pe && !s->vm86) {
5102                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5103                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5104                                           tcg_const_tl(s->pc - s->cs_base));
5105             } else {
5106                 gen_op_movl_seg_T0_vm(s, R_CS);
5107                 gen_op_jmp_v(s->T1);
5108             }
5109             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5110             gen_jr(s, s->tmp4);
5111             break;
5112         case 6: /* push Ev */
5113             gen_push_v(s, s->T0);
5114             break;
5115         default:
5116             goto unknown_op;
5117         }
5118         break;
5119 
5120     case 0x84: /* test Ev, Gv */
5121     case 0x85:
5122         ot = mo_b_d(b, dflag);
5123 
5124         modrm = x86_ldub_code(env, s);
5125         reg = ((modrm >> 3) & 7) | rex_r;
5126 
5127         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5128         gen_op_mov_v_reg(s, ot, s->T1, reg);
5129         gen_op_testl_T0_T1_cc(s);
5130         set_cc_op(s, CC_OP_LOGICB + ot);
5131         break;
5132 
5133     case 0xa8: /* test eAX, Iv */
5134     case 0xa9:
5135         ot = mo_b_d(b, dflag);
5136         val = insn_get(env, s, ot);
5137 
5138         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5139         tcg_gen_movi_tl(s->T1, val);
5140         gen_op_testl_T0_T1_cc(s);
5141         set_cc_op(s, CC_OP_LOGICB + ot);
5142         break;
5143 
5144     case 0x98: /* CWDE/CBW */
5145         switch (dflag) {
5146 #ifdef TARGET_X86_64
5147         case MO_64:
5148             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5149             tcg_gen_ext32s_tl(s->T0, s->T0);
5150             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5151             break;
5152 #endif
5153         case MO_32:
5154             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5155             tcg_gen_ext16s_tl(s->T0, s->T0);
5156             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5157             break;
5158         case MO_16:
5159             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5160             tcg_gen_ext8s_tl(s->T0, s->T0);
5161             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5162             break;
5163         default:
5164             tcg_abort();
5165         }
5166         break;
5167     case 0x99: /* CDQ/CWD */
5168         switch (dflag) {
5169 #ifdef TARGET_X86_64
5170         case MO_64:
5171             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5172             tcg_gen_sari_tl(s->T0, s->T0, 63);
5173             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5174             break;
5175 #endif
5176         case MO_32:
5177             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5178             tcg_gen_ext32s_tl(s->T0, s->T0);
5179             tcg_gen_sari_tl(s->T0, s->T0, 31);
5180             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5181             break;
5182         case MO_16:
5183             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5184             tcg_gen_ext16s_tl(s->T0, s->T0);
5185             tcg_gen_sari_tl(s->T0, s->T0, 15);
5186             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5187             break;
5188         default:
5189             tcg_abort();
5190         }
5191         break;
5192     case 0x1af: /* imul Gv, Ev */
5193     case 0x69: /* imul Gv, Ev, I */
5194     case 0x6b:
5195         ot = dflag;
5196         modrm = x86_ldub_code(env, s);
5197         reg = ((modrm >> 3) & 7) | rex_r;
5198         if (b == 0x69)
5199             s->rip_offset = insn_const_size(ot);
5200         else if (b == 0x6b)
5201             s->rip_offset = 1;
5202         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5203         if (b == 0x69) {
5204             val = insn_get(env, s, ot);
5205             tcg_gen_movi_tl(s->T1, val);
5206         } else if (b == 0x6b) {
5207             val = (int8_t)insn_get(env, s, MO_8);
5208             tcg_gen_movi_tl(s->T1, val);
5209         } else {
5210             gen_op_mov_v_reg(s, ot, s->T1, reg);
5211         }
5212         switch (ot) {
5213 #ifdef TARGET_X86_64
5214         case MO_64:
5215             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5216             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5217             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5218             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5219             break;
5220 #endif
5221         case MO_32:
5222             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5223             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5224             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5225                               s->tmp2_i32, s->tmp3_i32);
5226             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5227             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5228             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5229             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5230             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5231             break;
5232         default:
5233             tcg_gen_ext16s_tl(s->T0, s->T0);
5234             tcg_gen_ext16s_tl(s->T1, s->T1);
5235             /* XXX: use 32 bit mul which could be faster */
5236             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5237             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5238             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5239             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5240             gen_op_mov_reg_v(s, ot, reg, s->T0);
5241             break;
5242         }
5243         set_cc_op(s, CC_OP_MULB + ot);
5244         break;
5245     case 0x1c0:
5246     case 0x1c1: /* xadd Ev, Gv */
5247         ot = mo_b_d(b, dflag);
5248         modrm = x86_ldub_code(env, s);
5249         reg = ((modrm >> 3) & 7) | rex_r;
5250         mod = (modrm >> 6) & 3;
5251         gen_op_mov_v_reg(s, ot, s->T0, reg);
5252         if (mod == 3) {
5253             rm = (modrm & 7) | REX_B(s);
5254             gen_op_mov_v_reg(s, ot, s->T1, rm);
5255             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5256             gen_op_mov_reg_v(s, ot, reg, s->T1);
5257             gen_op_mov_reg_v(s, ot, rm, s->T0);
5258         } else {
5259             gen_lea_modrm(env, s, modrm);
5260             if (s->prefix & PREFIX_LOCK) {
5261                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5262                                             s->mem_index, ot | MO_LE);
5263                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5264             } else {
5265                 gen_op_ld_v(s, ot, s->T1, s->A0);
5266                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5267                 gen_op_st_v(s, ot, s->T0, s->A0);
5268             }
5269             gen_op_mov_reg_v(s, ot, reg, s->T1);
5270         }
5271         gen_op_update2_cc(s);
5272         set_cc_op(s, CC_OP_ADDB + ot);
5273         break;
5274     case 0x1b0:
5275     case 0x1b1: /* cmpxchg Ev, Gv */
5276         {
5277             TCGv oldv, newv, cmpv;
5278 
5279             ot = mo_b_d(b, dflag);
5280             modrm = x86_ldub_code(env, s);
5281             reg = ((modrm >> 3) & 7) | rex_r;
5282             mod = (modrm >> 6) & 3;
5283             oldv = tcg_temp_new();
5284             newv = tcg_temp_new();
5285             cmpv = tcg_temp_new();
5286             gen_op_mov_v_reg(s, ot, newv, reg);
5287             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5288 
5289             if (s->prefix & PREFIX_LOCK) {
5290                 if (mod == 3) {
5291                     goto illegal_op;
5292                 }
5293                 gen_lea_modrm(env, s, modrm);
5294                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5295                                           s->mem_index, ot | MO_LE);
5296                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5297             } else {
5298                 if (mod == 3) {
5299                     rm = (modrm & 7) | REX_B(s);
5300                     gen_op_mov_v_reg(s, ot, oldv, rm);
5301                 } else {
5302                     gen_lea_modrm(env, s, modrm);
5303                     gen_op_ld_v(s, ot, oldv, s->A0);
5304                     rm = 0; /* avoid warning */
5305                 }
5306                 gen_extu(ot, oldv);
5307                 gen_extu(ot, cmpv);
5308                 /* store value = (old == cmp ? new : old);  */
5309                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5310                 if (mod == 3) {
5311                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5312                     gen_op_mov_reg_v(s, ot, rm, newv);
5313                 } else {
5314                     /* Perform an unconditional store cycle like physical cpu;
5315                        must be before changing accumulator to ensure
5316                        idempotency if the store faults and the instruction
5317                        is restarted */
5318                     gen_op_st_v(s, ot, newv, s->A0);
5319                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5320                 }
5321             }
5322             tcg_gen_mov_tl(cpu_cc_src, oldv);
5323             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5324             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5325             set_cc_op(s, CC_OP_SUBB + ot);
5326             tcg_temp_free(oldv);
5327             tcg_temp_free(newv);
5328             tcg_temp_free(cmpv);
5329         }
5330         break;
5331     case 0x1c7: /* cmpxchg8b */
5332         modrm = x86_ldub_code(env, s);
5333         mod = (modrm >> 6) & 3;
5334         switch ((modrm >> 3) & 7) {
5335         case 1: /* CMPXCHG8, CMPXCHG16 */
5336             if (mod == 3) {
5337                 goto illegal_op;
5338             }
5339 #ifdef TARGET_X86_64
5340             if (dflag == MO_64) {
5341                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5342                     goto illegal_op;
5343                 }
5344                 gen_lea_modrm(env, s, modrm);
5345                 if ((s->prefix & PREFIX_LOCK) &&
5346                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5347                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5348                 } else {
5349                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5350                 }
5351                 set_cc_op(s, CC_OP_EFLAGS);
5352                 break;
5353             }
5354 #endif
5355             if (!(s->cpuid_features & CPUID_CX8)) {
5356                 goto illegal_op;
5357             }
5358             gen_lea_modrm(env, s, modrm);
5359             if ((s->prefix & PREFIX_LOCK) &&
5360                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5361                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5362             } else {
5363                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5364             }
5365             set_cc_op(s, CC_OP_EFLAGS);
5366             break;
5367 
5368         case 7: /* RDSEED */
5369         case 6: /* RDRAND */
5370             if (mod != 3 ||
5371                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5372                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5373                 goto illegal_op;
5374             }
5375             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5376                 gen_io_start();
5377             }
5378             gen_helper_rdrand(s->T0, cpu_env);
5379             rm = (modrm & 7) | REX_B(s);
5380             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5381             set_cc_op(s, CC_OP_EFLAGS);
5382             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5383                 gen_jmp(s, s->pc - s->cs_base);
5384             }
5385             break;
5386 
5387         default:
5388             goto illegal_op;
5389         }
5390         break;
5391 
5392         /**************************/
5393         /* push/pop */
5394     case 0x50 ... 0x57: /* push */
5395         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5396         gen_push_v(s, s->T0);
5397         break;
5398     case 0x58 ... 0x5f: /* pop */
5399         ot = gen_pop_T0(s);
5400         /* NOTE: order is important for pop %sp */
5401         gen_pop_update(s, ot);
5402         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5403         break;
5404     case 0x60: /* pusha */
5405         if (CODE64(s))
5406             goto illegal_op;
5407         gen_pusha(s);
5408         break;
5409     case 0x61: /* popa */
5410         if (CODE64(s))
5411             goto illegal_op;
5412         gen_popa(s);
5413         break;
5414     case 0x68: /* push Iv */
5415     case 0x6a:
5416         ot = mo_pushpop(s, dflag);
5417         if (b == 0x68)
5418             val = insn_get(env, s, ot);
5419         else
5420             val = (int8_t)insn_get(env, s, MO_8);
5421         tcg_gen_movi_tl(s->T0, val);
5422         gen_push_v(s, s->T0);
5423         break;
5424     case 0x8f: /* pop Ev */
5425         modrm = x86_ldub_code(env, s);
5426         mod = (modrm >> 6) & 3;
5427         ot = gen_pop_T0(s);
5428         if (mod == 3) {
5429             /* NOTE: order is important for pop %sp */
5430             gen_pop_update(s, ot);
5431             rm = (modrm & 7) | REX_B(s);
5432             gen_op_mov_reg_v(s, ot, rm, s->T0);
5433         } else {
5434             /* NOTE: order is important too for MMU exceptions */
5435             s->popl_esp_hack = 1 << ot;
5436             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5437             s->popl_esp_hack = 0;
5438             gen_pop_update(s, ot);
5439         }
5440         break;
5441     case 0xc8: /* enter */
5442         {
5443             int level;
5444             val = x86_lduw_code(env, s);
5445             level = x86_ldub_code(env, s);
5446             gen_enter(s, val, level);
5447         }
5448         break;
5449     case 0xc9: /* leave */
5450         gen_leave(s);
5451         break;
5452     case 0x06: /* push es */
5453     case 0x0e: /* push cs */
5454     case 0x16: /* push ss */
5455     case 0x1e: /* push ds */
5456         if (CODE64(s))
5457             goto illegal_op;
5458         gen_op_movl_T0_seg(s, b >> 3);
5459         gen_push_v(s, s->T0);
5460         break;
5461     case 0x1a0: /* push fs */
5462     case 0x1a8: /* push gs */
5463         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5464         gen_push_v(s, s->T0);
5465         break;
5466     case 0x07: /* pop es */
5467     case 0x17: /* pop ss */
5468     case 0x1f: /* pop ds */
5469         if (CODE64(s))
5470             goto illegal_op;
5471         reg = b >> 3;
5472         ot = gen_pop_T0(s);
5473         gen_movl_seg_T0(s, reg);
5474         gen_pop_update(s, ot);
5475         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5476         if (s->base.is_jmp) {
5477             gen_jmp_im(s, s->pc - s->cs_base);
5478             if (reg == R_SS) {
5479                 s->tf = 0;
5480                 gen_eob_inhibit_irq(s, true);
5481             } else {
5482                 gen_eob(s);
5483             }
5484         }
5485         break;
5486     case 0x1a1: /* pop fs */
5487     case 0x1a9: /* pop gs */
5488         ot = gen_pop_T0(s);
5489         gen_movl_seg_T0(s, (b >> 3) & 7);
5490         gen_pop_update(s, ot);
5491         if (s->base.is_jmp) {
5492             gen_jmp_im(s, s->pc - s->cs_base);
5493             gen_eob(s);
5494         }
5495         break;
5496 
5497         /**************************/
5498         /* mov */
5499     case 0x88:
5500     case 0x89: /* mov Gv, Ev */
5501         ot = mo_b_d(b, dflag);
5502         modrm = x86_ldub_code(env, s);
5503         reg = ((modrm >> 3) & 7) | rex_r;
5504 
5505         /* generate a generic store */
5506         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5507         break;
5508     case 0xc6:
5509     case 0xc7: /* mov Ev, Iv */
5510         ot = mo_b_d(b, dflag);
5511         modrm = x86_ldub_code(env, s);
5512         mod = (modrm >> 6) & 3;
5513         if (mod != 3) {
5514             s->rip_offset = insn_const_size(ot);
5515             gen_lea_modrm(env, s, modrm);
5516         }
5517         val = insn_get(env, s, ot);
5518         tcg_gen_movi_tl(s->T0, val);
5519         if (mod != 3) {
5520             gen_op_st_v(s, ot, s->T0, s->A0);
5521         } else {
5522             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5523         }
5524         break;
5525     case 0x8a:
5526     case 0x8b: /* mov Ev, Gv */
5527         ot = mo_b_d(b, dflag);
5528         modrm = x86_ldub_code(env, s);
5529         reg = ((modrm >> 3) & 7) | rex_r;
5530 
5531         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5532         gen_op_mov_reg_v(s, ot, reg, s->T0);
5533         break;
5534     case 0x8e: /* mov seg, Gv */
5535         modrm = x86_ldub_code(env, s);
5536         reg = (modrm >> 3) & 7;
5537         if (reg >= 6 || reg == R_CS)
5538             goto illegal_op;
5539         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5540         gen_movl_seg_T0(s, reg);
5541         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5542         if (s->base.is_jmp) {
5543             gen_jmp_im(s, s->pc - s->cs_base);
5544             if (reg == R_SS) {
5545                 s->tf = 0;
5546                 gen_eob_inhibit_irq(s, true);
5547             } else {
5548                 gen_eob(s);
5549             }
5550         }
5551         break;
5552     case 0x8c: /* mov Gv, seg */
5553         modrm = x86_ldub_code(env, s);
5554         reg = (modrm >> 3) & 7;
5555         mod = (modrm >> 6) & 3;
5556         if (reg >= 6)
5557             goto illegal_op;
5558         gen_op_movl_T0_seg(s, reg);
5559         ot = mod == 3 ? dflag : MO_16;
5560         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5561         break;
5562 
5563     case 0x1b6: /* movzbS Gv, Eb */
5564     case 0x1b7: /* movzwS Gv, Eb */
5565     case 0x1be: /* movsbS Gv, Eb */
5566     case 0x1bf: /* movswS Gv, Eb */
5567         {
5568             MemOp d_ot;
5569             MemOp s_ot;
5570 
5571             /* d_ot is the size of destination */
5572             d_ot = dflag;
5573             /* ot is the size of source */
5574             ot = (b & 1) + MO_8;
5575             /* s_ot is the sign+size of source */
5576             s_ot = b & 8 ? MO_SIGN | ot : ot;
5577 
5578             modrm = x86_ldub_code(env, s);
5579             reg = ((modrm >> 3) & 7) | rex_r;
5580             mod = (modrm >> 6) & 3;
5581             rm = (modrm & 7) | REX_B(s);
5582 
5583             if (mod == 3) {
5584                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5585                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5586                 } else {
5587                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5588                     switch (s_ot) {
5589                     case MO_UB:
5590                         tcg_gen_ext8u_tl(s->T0, s->T0);
5591                         break;
5592                     case MO_SB:
5593                         tcg_gen_ext8s_tl(s->T0, s->T0);
5594                         break;
5595                     case MO_UW:
5596                         tcg_gen_ext16u_tl(s->T0, s->T0);
5597                         break;
5598                     default:
5599                     case MO_SW:
5600                         tcg_gen_ext16s_tl(s->T0, s->T0);
5601                         break;
5602                     }
5603                 }
5604                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5605             } else {
5606                 gen_lea_modrm(env, s, modrm);
5607                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5608                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5609             }
5610         }
5611         break;
5612 
5613     case 0x8d: /* lea */
5614         modrm = x86_ldub_code(env, s);
5615         mod = (modrm >> 6) & 3;
5616         if (mod == 3)
5617             goto illegal_op;
5618         reg = ((modrm >> 3) & 7) | rex_r;
5619         {
5620             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5621             TCGv ea = gen_lea_modrm_1(s, a);
5622             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5623             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5624         }
5625         break;
5626 
5627     case 0xa0: /* mov EAX, Ov */
5628     case 0xa1:
5629     case 0xa2: /* mov Ov, EAX */
5630     case 0xa3:
5631         {
5632             target_ulong offset_addr;
5633 
5634             ot = mo_b_d(b, dflag);
5635             switch (s->aflag) {
5636 #ifdef TARGET_X86_64
5637             case MO_64:
5638                 offset_addr = x86_ldq_code(env, s);
5639                 break;
5640 #endif
5641             default:
5642                 offset_addr = insn_get(env, s, s->aflag);
5643                 break;
5644             }
5645             tcg_gen_movi_tl(s->A0, offset_addr);
5646             gen_add_A0_ds_seg(s);
5647             if ((b & 2) == 0) {
5648                 gen_op_ld_v(s, ot, s->T0, s->A0);
5649                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5650             } else {
5651                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5652                 gen_op_st_v(s, ot, s->T0, s->A0);
5653             }
5654         }
5655         break;
5656     case 0xd7: /* xlat */
5657         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5658         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5659         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5660         gen_extu(s->aflag, s->A0);
5661         gen_add_A0_ds_seg(s);
5662         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5663         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5664         break;
5665     case 0xb0 ... 0xb7: /* mov R, Ib */
5666         val = insn_get(env, s, MO_8);
5667         tcg_gen_movi_tl(s->T0, val);
5668         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5669         break;
5670     case 0xb8 ... 0xbf: /* mov R, Iv */
5671 #ifdef TARGET_X86_64
5672         if (dflag == MO_64) {
5673             uint64_t tmp;
5674             /* 64 bit case */
5675             tmp = x86_ldq_code(env, s);
5676             reg = (b & 7) | REX_B(s);
5677             tcg_gen_movi_tl(s->T0, tmp);
5678             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5679         } else
5680 #endif
5681         {
5682             ot = dflag;
5683             val = insn_get(env, s, ot);
5684             reg = (b & 7) | REX_B(s);
5685             tcg_gen_movi_tl(s->T0, val);
5686             gen_op_mov_reg_v(s, ot, reg, s->T0);
5687         }
5688         break;
5689 
5690     case 0x91 ... 0x97: /* xchg R, EAX */
5691     do_xchg_reg_eax:
5692         ot = dflag;
5693         reg = (b & 7) | REX_B(s);
5694         rm = R_EAX;
5695         goto do_xchg_reg;
5696     case 0x86:
5697     case 0x87: /* xchg Ev, Gv */
5698         ot = mo_b_d(b, dflag);
5699         modrm = x86_ldub_code(env, s);
5700         reg = ((modrm >> 3) & 7) | rex_r;
5701         mod = (modrm >> 6) & 3;
5702         if (mod == 3) {
5703             rm = (modrm & 7) | REX_B(s);
5704         do_xchg_reg:
5705             gen_op_mov_v_reg(s, ot, s->T0, reg);
5706             gen_op_mov_v_reg(s, ot, s->T1, rm);
5707             gen_op_mov_reg_v(s, ot, rm, s->T0);
5708             gen_op_mov_reg_v(s, ot, reg, s->T1);
5709         } else {
5710             gen_lea_modrm(env, s, modrm);
5711             gen_op_mov_v_reg(s, ot, s->T0, reg);
5712             /* for xchg, lock is implicit */
5713             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5714                                    s->mem_index, ot | MO_LE);
5715             gen_op_mov_reg_v(s, ot, reg, s->T1);
5716         }
5717         break;
5718     case 0xc4: /* les Gv */
5719         /* In CODE64 this is VEX3; see above.  */
5720         op = R_ES;
5721         goto do_lxx;
5722     case 0xc5: /* lds Gv */
5723         /* In CODE64 this is VEX2; see above.  */
5724         op = R_DS;
5725         goto do_lxx;
5726     case 0x1b2: /* lss Gv */
5727         op = R_SS;
5728         goto do_lxx;
5729     case 0x1b4: /* lfs Gv */
5730         op = R_FS;
5731         goto do_lxx;
5732     case 0x1b5: /* lgs Gv */
5733         op = R_GS;
5734     do_lxx:
5735         ot = dflag != MO_16 ? MO_32 : MO_16;
5736         modrm = x86_ldub_code(env, s);
5737         reg = ((modrm >> 3) & 7) | rex_r;
5738         mod = (modrm >> 6) & 3;
5739         if (mod == 3)
5740             goto illegal_op;
5741         gen_lea_modrm(env, s, modrm);
5742         gen_op_ld_v(s, ot, s->T1, s->A0);
5743         gen_add_A0_im(s, 1 << ot);
5744         /* load the segment first to handle exceptions properly */
5745         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5746         gen_movl_seg_T0(s, op);
5747         /* then put the data */
5748         gen_op_mov_reg_v(s, ot, reg, s->T1);
5749         if (s->base.is_jmp) {
5750             gen_jmp_im(s, s->pc - s->cs_base);
5751             gen_eob(s);
5752         }
5753         break;
5754 
5755         /************************/
5756         /* shifts */
5757     case 0xc0:
5758     case 0xc1:
5759         /* shift Ev,Ib */
5760         shift = 2;
5761     grp2:
5762         {
5763             ot = mo_b_d(b, dflag);
5764             modrm = x86_ldub_code(env, s);
5765             mod = (modrm >> 6) & 3;
5766             op = (modrm >> 3) & 7;
5767 
5768             if (mod != 3) {
5769                 if (shift == 2) {
5770                     s->rip_offset = 1;
5771                 }
5772                 gen_lea_modrm(env, s, modrm);
5773                 opreg = OR_TMP0;
5774             } else {
5775                 opreg = (modrm & 7) | REX_B(s);
5776             }
5777 
5778             /* simpler op */
5779             if (shift == 0) {
5780                 gen_shift(s, op, ot, opreg, OR_ECX);
5781             } else {
5782                 if (shift == 2) {
5783                     shift = x86_ldub_code(env, s);
5784                 }
5785                 gen_shifti(s, op, ot, opreg, shift);
5786             }
5787         }
5788         break;
5789     case 0xd0:
5790     case 0xd1:
5791         /* shift Ev,1 */
5792         shift = 1;
5793         goto grp2;
5794     case 0xd2:
5795     case 0xd3:
5796         /* shift Ev,cl */
5797         shift = 0;
5798         goto grp2;
5799 
5800     case 0x1a4: /* shld imm */
5801         op = 0;
5802         shift = 1;
5803         goto do_shiftd;
5804     case 0x1a5: /* shld cl */
5805         op = 0;
5806         shift = 0;
5807         goto do_shiftd;
5808     case 0x1ac: /* shrd imm */
5809         op = 1;
5810         shift = 1;
5811         goto do_shiftd;
5812     case 0x1ad: /* shrd cl */
5813         op = 1;
5814         shift = 0;
5815     do_shiftd:
5816         ot = dflag;
5817         modrm = x86_ldub_code(env, s);
5818         mod = (modrm >> 6) & 3;
5819         rm = (modrm & 7) | REX_B(s);
5820         reg = ((modrm >> 3) & 7) | rex_r;
5821         if (mod != 3) {
5822             gen_lea_modrm(env, s, modrm);
5823             opreg = OR_TMP0;
5824         } else {
5825             opreg = rm;
5826         }
5827         gen_op_mov_v_reg(s, ot, s->T1, reg);
5828 
5829         if (shift) {
5830             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5831             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5832             tcg_temp_free(imm);
5833         } else {
5834             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5835         }
5836         break;
5837 
5838         /************************/
5839         /* floats */
5840     case 0xd8 ... 0xdf:
5841         if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5842             /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5843             /* XXX: what to do if illegal op ? */
5844             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5845             break;
5846         }
5847         modrm = x86_ldub_code(env, s);
5848         mod = (modrm >> 6) & 3;
5849         rm = modrm & 7;
5850         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5851         if (mod != 3) {
5852             /* memory op */
5853             gen_lea_modrm(env, s, modrm);
5854             switch(op) {
5855             case 0x00 ... 0x07: /* fxxxs */
5856             case 0x10 ... 0x17: /* fixxxl */
5857             case 0x20 ... 0x27: /* fxxxl */
5858             case 0x30 ... 0x37: /* fixxx */
5859                 {
5860                     int op1;
5861                     op1 = op & 7;
5862 
5863                     switch(op >> 4) {
5864                     case 0:
5865                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5866                                             s->mem_index, MO_LEUL);
5867                         gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5868                         break;
5869                     case 1:
5870                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5871                                             s->mem_index, MO_LEUL);
5872                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5873                         break;
5874                     case 2:
5875                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5876                                             s->mem_index, MO_LEQ);
5877                         gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5878                         break;
5879                     case 3:
5880                     default:
5881                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5882                                             s->mem_index, MO_LESW);
5883                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5884                         break;
5885                     }
5886 
5887                     gen_helper_fp_arith_ST0_FT0(op1);
5888                     if (op1 == 3) {
5889                         /* fcomp needs pop */
5890                         gen_helper_fpop(cpu_env);
5891                     }
5892                 }
5893                 break;
5894             case 0x08: /* flds */
5895             case 0x0a: /* fsts */
5896             case 0x0b: /* fstps */
5897             case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5898             case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5899             case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5900                 switch(op & 7) {
5901                 case 0:
5902                     switch(op >> 4) {
5903                     case 0:
5904                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5905                                             s->mem_index, MO_LEUL);
5906                         gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5907                         break;
5908                     case 1:
5909                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5910                                             s->mem_index, MO_LEUL);
5911                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5912                         break;
5913                     case 2:
5914                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5915                                             s->mem_index, MO_LEQ);
5916                         gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5917                         break;
5918                     case 3:
5919                     default:
5920                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5921                                             s->mem_index, MO_LESW);
5922                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5923                         break;
5924                     }
5925                     break;
5926                 case 1:
5927                     /* XXX: the corresponding CPUID bit must be tested ! */
5928                     switch(op >> 4) {
5929                     case 1:
5930                         gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
5931                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5932                                             s->mem_index, MO_LEUL);
5933                         break;
5934                     case 2:
5935                         gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
5936                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5937                                             s->mem_index, MO_LEQ);
5938                         break;
5939                     case 3:
5940                     default:
5941                         gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
5942                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5943                                             s->mem_index, MO_LEUW);
5944                         break;
5945                     }
5946                     gen_helper_fpop(cpu_env);
5947                     break;
5948                 default:
5949                     switch(op >> 4) {
5950                     case 0:
5951                         gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
5952                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5953                                             s->mem_index, MO_LEUL);
5954                         break;
5955                     case 1:
5956                         gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
5957                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5958                                             s->mem_index, MO_LEUL);
5959                         break;
5960                     case 2:
5961                         gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
5962                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
5963                                             s->mem_index, MO_LEQ);
5964                         break;
5965                     case 3:
5966                     default:
5967                         gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
5968                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5969                                             s->mem_index, MO_LEUW);
5970                         break;
5971                     }
5972                     if ((op & 7) == 3)
5973                         gen_helper_fpop(cpu_env);
5974                     break;
5975                 }
5976                 break;
5977             case 0x0c: /* fldenv mem */
5978                 gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5979                 break;
5980             case 0x0d: /* fldcw mem */
5981                 tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5982                                     s->mem_index, MO_LEUW);
5983                 gen_helper_fldcw(cpu_env, s->tmp2_i32);
5984                 break;
5985             case 0x0e: /* fnstenv mem */
5986                 gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
5987                 break;
5988             case 0x0f: /* fnstcw mem */
5989                 gen_helper_fnstcw(s->tmp2_i32, cpu_env);
5990                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
5991                                     s->mem_index, MO_LEUW);
5992                 break;
5993             case 0x1d: /* fldt mem */
5994                 gen_helper_fldt_ST0(cpu_env, s->A0);
5995                 break;
5996             case 0x1f: /* fstpt mem */
5997                 gen_helper_fstt_ST0(cpu_env, s->A0);
5998                 gen_helper_fpop(cpu_env);
5999                 break;
6000             case 0x2c: /* frstor mem */
6001                 gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6002                 break;
6003             case 0x2e: /* fnsave mem */
6004                 gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6005                 break;
6006             case 0x2f: /* fnstsw mem */
6007                 gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6008                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6009                                     s->mem_index, MO_LEUW);
6010                 break;
6011             case 0x3c: /* fbld */
6012                 gen_helper_fbld_ST0(cpu_env, s->A0);
6013                 break;
6014             case 0x3e: /* fbstp */
6015                 gen_helper_fbst_ST0(cpu_env, s->A0);
6016                 gen_helper_fpop(cpu_env);
6017                 break;
6018             case 0x3d: /* fildll */
6019                 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6020                 gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6021                 break;
6022             case 0x3f: /* fistpll */
6023                 gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6024                 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6025                 gen_helper_fpop(cpu_env);
6026                 break;
6027             default:
6028                 goto unknown_op;
6029             }
6030         } else {
6031             /* register float ops */
6032             opreg = rm;
6033 
6034             switch(op) {
6035             case 0x08: /* fld sti */
6036                 gen_helper_fpush(cpu_env);
6037                 gen_helper_fmov_ST0_STN(cpu_env,
6038                                         tcg_const_i32((opreg + 1) & 7));
6039                 break;
6040             case 0x09: /* fxchg sti */
6041             case 0x29: /* fxchg4 sti, undocumented op */
6042             case 0x39: /* fxchg7 sti, undocumented op */
6043                 gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6044                 break;
6045             case 0x0a: /* grp d9/2 */
6046                 switch(rm) {
6047                 case 0: /* fnop */
6048                     /* check exceptions (FreeBSD FPU probe) */
6049                     gen_helper_fwait(cpu_env);
6050                     break;
6051                 default:
6052                     goto unknown_op;
6053                 }
6054                 break;
6055             case 0x0c: /* grp d9/4 */
6056                 switch(rm) {
6057                 case 0: /* fchs */
6058                     gen_helper_fchs_ST0(cpu_env);
6059                     break;
6060                 case 1: /* fabs */
6061                     gen_helper_fabs_ST0(cpu_env);
6062                     break;
6063                 case 4: /* ftst */
6064                     gen_helper_fldz_FT0(cpu_env);
6065                     gen_helper_fcom_ST0_FT0(cpu_env);
6066                     break;
6067                 case 5: /* fxam */
6068                     gen_helper_fxam_ST0(cpu_env);
6069                     break;
6070                 default:
6071                     goto unknown_op;
6072                 }
6073                 break;
6074             case 0x0d: /* grp d9/5 */
6075                 {
6076                     switch(rm) {
6077                     case 0:
6078                         gen_helper_fpush(cpu_env);
6079                         gen_helper_fld1_ST0(cpu_env);
6080                         break;
6081                     case 1:
6082                         gen_helper_fpush(cpu_env);
6083                         gen_helper_fldl2t_ST0(cpu_env);
6084                         break;
6085                     case 2:
6086                         gen_helper_fpush(cpu_env);
6087                         gen_helper_fldl2e_ST0(cpu_env);
6088                         break;
6089                     case 3:
6090                         gen_helper_fpush(cpu_env);
6091                         gen_helper_fldpi_ST0(cpu_env);
6092                         break;
6093                     case 4:
6094                         gen_helper_fpush(cpu_env);
6095                         gen_helper_fldlg2_ST0(cpu_env);
6096                         break;
6097                     case 5:
6098                         gen_helper_fpush(cpu_env);
6099                         gen_helper_fldln2_ST0(cpu_env);
6100                         break;
6101                     case 6:
6102                         gen_helper_fpush(cpu_env);
6103                         gen_helper_fldz_ST0(cpu_env);
6104                         break;
6105                     default:
6106                         goto unknown_op;
6107                     }
6108                 }
6109                 break;
6110             case 0x0e: /* grp d9/6 */
6111                 switch(rm) {
6112                 case 0: /* f2xm1 */
6113                     gen_helper_f2xm1(cpu_env);
6114                     break;
6115                 case 1: /* fyl2x */
6116                     gen_helper_fyl2x(cpu_env);
6117                     break;
6118                 case 2: /* fptan */
6119                     gen_helper_fptan(cpu_env);
6120                     break;
6121                 case 3: /* fpatan */
6122                     gen_helper_fpatan(cpu_env);
6123                     break;
6124                 case 4: /* fxtract */
6125                     gen_helper_fxtract(cpu_env);
6126                     break;
6127                 case 5: /* fprem1 */
6128                     gen_helper_fprem1(cpu_env);
6129                     break;
6130                 case 6: /* fdecstp */
6131                     gen_helper_fdecstp(cpu_env);
6132                     break;
6133                 default:
6134                 case 7: /* fincstp */
6135                     gen_helper_fincstp(cpu_env);
6136                     break;
6137                 }
6138                 break;
6139             case 0x0f: /* grp d9/7 */
6140                 switch(rm) {
6141                 case 0: /* fprem */
6142                     gen_helper_fprem(cpu_env);
6143                     break;
6144                 case 1: /* fyl2xp1 */
6145                     gen_helper_fyl2xp1(cpu_env);
6146                     break;
6147                 case 2: /* fsqrt */
6148                     gen_helper_fsqrt(cpu_env);
6149                     break;
6150                 case 3: /* fsincos */
6151                     gen_helper_fsincos(cpu_env);
6152                     break;
6153                 case 5: /* fscale */
6154                     gen_helper_fscale(cpu_env);
6155                     break;
6156                 case 4: /* frndint */
6157                     gen_helper_frndint(cpu_env);
6158                     break;
6159                 case 6: /* fsin */
6160                     gen_helper_fsin(cpu_env);
6161                     break;
6162                 default:
6163                 case 7: /* fcos */
6164                     gen_helper_fcos(cpu_env);
6165                     break;
6166                 }
6167                 break;
6168             case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6169             case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6170             case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6171                 {
6172                     int op1;
6173 
6174                     op1 = op & 7;
6175                     if (op >= 0x20) {
6176                         gen_helper_fp_arith_STN_ST0(op1, opreg);
6177                         if (op >= 0x30)
6178                             gen_helper_fpop(cpu_env);
6179                     } else {
6180                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6181                         gen_helper_fp_arith_ST0_FT0(op1);
6182                     }
6183                 }
6184                 break;
6185             case 0x02: /* fcom */
6186             case 0x22: /* fcom2, undocumented op */
6187                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6188                 gen_helper_fcom_ST0_FT0(cpu_env);
6189                 break;
6190             case 0x03: /* fcomp */
6191             case 0x23: /* fcomp3, undocumented op */
6192             case 0x32: /* fcomp5, undocumented op */
6193                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6194                 gen_helper_fcom_ST0_FT0(cpu_env);
6195                 gen_helper_fpop(cpu_env);
6196                 break;
6197             case 0x15: /* da/5 */
6198                 switch(rm) {
6199                 case 1: /* fucompp */
6200                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6201                     gen_helper_fucom_ST0_FT0(cpu_env);
6202                     gen_helper_fpop(cpu_env);
6203                     gen_helper_fpop(cpu_env);
6204                     break;
6205                 default:
6206                     goto unknown_op;
6207                 }
6208                 break;
6209             case 0x1c:
6210                 switch(rm) {
6211                 case 0: /* feni (287 only, just do nop here) */
6212                     break;
6213                 case 1: /* fdisi (287 only, just do nop here) */
6214                     break;
6215                 case 2: /* fclex */
6216                     gen_helper_fclex(cpu_env);
6217                     break;
6218                 case 3: /* fninit */
6219                     gen_helper_fninit(cpu_env);
6220                     break;
6221                 case 4: /* fsetpm (287 only, just do nop here) */
6222                     break;
6223                 default:
6224                     goto unknown_op;
6225                 }
6226                 break;
6227             case 0x1d: /* fucomi */
6228                 if (!(s->cpuid_features & CPUID_CMOV)) {
6229                     goto illegal_op;
6230                 }
6231                 gen_update_cc_op(s);
6232                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6233                 gen_helper_fucomi_ST0_FT0(cpu_env);
6234                 set_cc_op(s, CC_OP_EFLAGS);
6235                 break;
6236             case 0x1e: /* fcomi */
6237                 if (!(s->cpuid_features & CPUID_CMOV)) {
6238                     goto illegal_op;
6239                 }
6240                 gen_update_cc_op(s);
6241                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6242                 gen_helper_fcomi_ST0_FT0(cpu_env);
6243                 set_cc_op(s, CC_OP_EFLAGS);
6244                 break;
6245             case 0x28: /* ffree sti */
6246                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6247                 break;
6248             case 0x2a: /* fst sti */
6249                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6250                 break;
6251             case 0x2b: /* fstp sti */
6252             case 0x0b: /* fstp1 sti, undocumented op */
6253             case 0x3a: /* fstp8 sti, undocumented op */
6254             case 0x3b: /* fstp9 sti, undocumented op */
6255                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6256                 gen_helper_fpop(cpu_env);
6257                 break;
6258             case 0x2c: /* fucom st(i) */
6259                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6260                 gen_helper_fucom_ST0_FT0(cpu_env);
6261                 break;
6262             case 0x2d: /* fucomp st(i) */
6263                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6264                 gen_helper_fucom_ST0_FT0(cpu_env);
6265                 gen_helper_fpop(cpu_env);
6266                 break;
6267             case 0x33: /* de/3 */
6268                 switch(rm) {
6269                 case 1: /* fcompp */
6270                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6271                     gen_helper_fcom_ST0_FT0(cpu_env);
6272                     gen_helper_fpop(cpu_env);
6273                     gen_helper_fpop(cpu_env);
6274                     break;
6275                 default:
6276                     goto unknown_op;
6277                 }
6278                 break;
6279             case 0x38: /* ffreep sti, undocumented op */
6280                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6281                 gen_helper_fpop(cpu_env);
6282                 break;
6283             case 0x3c: /* df/4 */
6284                 switch(rm) {
6285                 case 0:
6286                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6287                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6288                     gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6289                     break;
6290                 default:
6291                     goto unknown_op;
6292                 }
6293                 break;
6294             case 0x3d: /* fucomip */
6295                 if (!(s->cpuid_features & CPUID_CMOV)) {
6296                     goto illegal_op;
6297                 }
6298                 gen_update_cc_op(s);
6299                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6300                 gen_helper_fucomi_ST0_FT0(cpu_env);
6301                 gen_helper_fpop(cpu_env);
6302                 set_cc_op(s, CC_OP_EFLAGS);
6303                 break;
6304             case 0x3e: /* fcomip */
6305                 if (!(s->cpuid_features & CPUID_CMOV)) {
6306                     goto illegal_op;
6307                 }
6308                 gen_update_cc_op(s);
6309                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6310                 gen_helper_fcomi_ST0_FT0(cpu_env);
6311                 gen_helper_fpop(cpu_env);
6312                 set_cc_op(s, CC_OP_EFLAGS);
6313                 break;
6314             case 0x10 ... 0x13: /* fcmovxx */
6315             case 0x18 ... 0x1b:
6316                 {
6317                     int op1;
6318                     TCGLabel *l1;
6319                     static const uint8_t fcmov_cc[8] = {
6320                         (JCC_B << 1),
6321                         (JCC_Z << 1),
6322                         (JCC_BE << 1),
6323                         (JCC_P << 1),
6324                     };
6325 
6326                     if (!(s->cpuid_features & CPUID_CMOV)) {
6327                         goto illegal_op;
6328                     }
6329                     op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6330                     l1 = gen_new_label();
6331                     gen_jcc1_noeob(s, op1, l1);
6332                     gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6333                     gen_set_label(l1);
6334                 }
6335                 break;
6336             default:
6337                 goto unknown_op;
6338             }
6339         }
6340         break;
6341         /************************/
6342         /* string ops */
6343 
6344     case 0xa4: /* movsS */
6345     case 0xa5:
6346         ot = mo_b_d(b, dflag);
6347         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6348             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6349         } else {
6350             gen_movs(s, ot);
6351         }
6352         break;
6353 
6354     case 0xaa: /* stosS */
6355     case 0xab:
6356         ot = mo_b_d(b, dflag);
6357         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6358             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6359         } else {
6360             gen_stos(s, ot);
6361         }
6362         break;
6363     case 0xac: /* lodsS */
6364     case 0xad:
6365         ot = mo_b_d(b, dflag);
6366         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6367             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6368         } else {
6369             gen_lods(s, ot);
6370         }
6371         break;
6372     case 0xae: /* scasS */
6373     case 0xaf:
6374         ot = mo_b_d(b, dflag);
6375         if (prefixes & PREFIX_REPNZ) {
6376             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6377         } else if (prefixes & PREFIX_REPZ) {
6378             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6379         } else {
6380             gen_scas(s, ot);
6381         }
6382         break;
6383 
6384     case 0xa6: /* cmpsS */
6385     case 0xa7:
6386         ot = mo_b_d(b, dflag);
6387         if (prefixes & PREFIX_REPNZ) {
6388             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6389         } else if (prefixes & PREFIX_REPZ) {
6390             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6391         } else {
6392             gen_cmps(s, ot);
6393         }
6394         break;
6395     case 0x6c: /* insS */
6396     case 0x6d:
6397         ot = mo_b_d32(b, dflag);
6398         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6399         gen_check_io(s, ot, pc_start - s->cs_base,
6400                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
6401         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6402             gen_io_start();
6403         }
6404         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6405             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6406             /* jump generated by gen_repz_ins */
6407         } else {
6408             gen_ins(s, ot);
6409             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6410                 gen_jmp(s, s->pc - s->cs_base);
6411             }
6412         }
6413         break;
6414     case 0x6e: /* outsS */
6415     case 0x6f:
6416         ot = mo_b_d32(b, dflag);
6417         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6418         gen_check_io(s, ot, pc_start - s->cs_base,
6419                      svm_is_rep(prefixes) | 4);
6420         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6421             gen_io_start();
6422         }
6423         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6424             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6425             /* jump generated by gen_repz_outs */
6426         } else {
6427             gen_outs(s, ot);
6428             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6429                 gen_jmp(s, s->pc - s->cs_base);
6430             }
6431         }
6432         break;
6433 
6434         /************************/
6435         /* port I/O */
6436 
6437     case 0xe4:
6438     case 0xe5:
6439         ot = mo_b_d32(b, dflag);
6440         val = x86_ldub_code(env, s);
6441         tcg_gen_movi_tl(s->T0, val);
6442         gen_check_io(s, ot, pc_start - s->cs_base,
6443                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6444         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6445             gen_io_start();
6446         }
6447         tcg_gen_movi_i32(s->tmp2_i32, val);
6448         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6449         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6450         gen_bpt_io(s, s->tmp2_i32, ot);
6451         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6452             gen_jmp(s, s->pc - s->cs_base);
6453         }
6454         break;
6455     case 0xe6:
6456     case 0xe7:
6457         ot = mo_b_d32(b, dflag);
6458         val = x86_ldub_code(env, s);
6459         tcg_gen_movi_tl(s->T0, val);
6460         gen_check_io(s, ot, pc_start - s->cs_base,
6461                      svm_is_rep(prefixes));
6462         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6463 
6464         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6465             gen_io_start();
6466         }
6467         tcg_gen_movi_i32(s->tmp2_i32, val);
6468         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6469         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6470         gen_bpt_io(s, s->tmp2_i32, ot);
6471         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6472             gen_jmp(s, s->pc - s->cs_base);
6473         }
6474         break;
6475     case 0xec:
6476     case 0xed:
6477         ot = mo_b_d32(b, dflag);
6478         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6479         gen_check_io(s, ot, pc_start - s->cs_base,
6480                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
6481         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6482             gen_io_start();
6483         }
6484         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6485         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6486         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6487         gen_bpt_io(s, s->tmp2_i32, ot);
6488         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6489             gen_jmp(s, s->pc - s->cs_base);
6490         }
6491         break;
6492     case 0xee:
6493     case 0xef:
6494         ot = mo_b_d32(b, dflag);
6495         tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]);
6496         gen_check_io(s, ot, pc_start - s->cs_base,
6497                      svm_is_rep(prefixes));
6498         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6499 
6500         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6501             gen_io_start();
6502         }
6503         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
6504         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6505         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6506         gen_bpt_io(s, s->tmp2_i32, ot);
6507         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6508             gen_jmp(s, s->pc - s->cs_base);
6509         }
6510         break;
6511 
6512         /************************/
6513         /* control */
6514     case 0xc2: /* ret im */
6515         val = x86_ldsw_code(env, s);
6516         ot = gen_pop_T0(s);
6517         gen_stack_update(s, val + (1 << ot));
6518         /* Note that gen_pop_T0 uses a zero-extending load.  */
6519         gen_op_jmp_v(s->T0);
6520         gen_bnd_jmp(s);
6521         gen_jr(s, s->T0);
6522         break;
6523     case 0xc3: /* ret */
6524         ot = gen_pop_T0(s);
6525         gen_pop_update(s, ot);
6526         /* Note that gen_pop_T0 uses a zero-extending load.  */
6527         gen_op_jmp_v(s->T0);
6528         gen_bnd_jmp(s);
6529         gen_jr(s, s->T0);
6530         break;
6531     case 0xca: /* lret im */
6532         val = x86_ldsw_code(env, s);
6533     do_lret:
6534         if (s->pe && !s->vm86) {
6535             gen_update_cc_op(s);
6536             gen_jmp_im(s, pc_start - s->cs_base);
6537             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6538                                       tcg_const_i32(val));
6539         } else {
6540             gen_stack_A0(s);
6541             /* pop offset */
6542             gen_op_ld_v(s, dflag, s->T0, s->A0);
6543             /* NOTE: keeping EIP updated is not a problem in case of
6544                exception */
6545             gen_op_jmp_v(s->T0);
6546             /* pop selector */
6547             gen_add_A0_im(s, 1 << dflag);
6548             gen_op_ld_v(s, dflag, s->T0, s->A0);
6549             gen_op_movl_seg_T0_vm(s, R_CS);
6550             /* add stack offset */
6551             gen_stack_update(s, val + (2 << dflag));
6552         }
6553         gen_eob(s);
6554         break;
6555     case 0xcb: /* lret */
6556         val = 0;
6557         goto do_lret;
6558     case 0xcf: /* iret */
6559         gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
6560         if (!s->pe) {
6561             /* real mode */
6562             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6563             set_cc_op(s, CC_OP_EFLAGS);
6564         } else if (s->vm86) {
6565             if (s->iopl != 3) {
6566                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6567             } else {
6568                 gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6569                 set_cc_op(s, CC_OP_EFLAGS);
6570             }
6571         } else {
6572             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6573                                       tcg_const_i32(s->pc - s->cs_base));
6574             set_cc_op(s, CC_OP_EFLAGS);
6575         }
6576         gen_eob(s);
6577         break;
6578     case 0xe8: /* call im */
6579         {
6580             if (dflag != MO_16) {
6581                 tval = (int32_t)insn_get(env, s, MO_32);
6582             } else {
6583                 tval = (int16_t)insn_get(env, s, MO_16);
6584             }
6585             next_eip = s->pc - s->cs_base;
6586             tval += next_eip;
6587             if (dflag == MO_16) {
6588                 tval &= 0xffff;
6589             } else if (!CODE64(s)) {
6590                 tval &= 0xffffffff;
6591             }
6592             tcg_gen_movi_tl(s->T0, next_eip);
6593             gen_push_v(s, s->T0);
6594             gen_bnd_jmp(s);
6595             gen_jmp(s, tval);
6596         }
6597         break;
6598     case 0x9a: /* lcall im */
6599         {
6600             unsigned int selector, offset;
6601 
6602             if (CODE64(s))
6603                 goto illegal_op;
6604             ot = dflag;
6605             offset = insn_get(env, s, ot);
6606             selector = insn_get(env, s, MO_16);
6607 
6608             tcg_gen_movi_tl(s->T0, selector);
6609             tcg_gen_movi_tl(s->T1, offset);
6610         }
6611         goto do_lcall;
6612     case 0xe9: /* jmp im */
6613         if (dflag != MO_16) {
6614             tval = (int32_t)insn_get(env, s, MO_32);
6615         } else {
6616             tval = (int16_t)insn_get(env, s, MO_16);
6617         }
6618         tval += s->pc - s->cs_base;
6619         if (dflag == MO_16) {
6620             tval &= 0xffff;
6621         } else if (!CODE64(s)) {
6622             tval &= 0xffffffff;
6623         }
6624         gen_bnd_jmp(s);
6625         gen_jmp(s, tval);
6626         break;
6627     case 0xea: /* ljmp im */
6628         {
6629             unsigned int selector, offset;
6630 
6631             if (CODE64(s))
6632                 goto illegal_op;
6633             ot = dflag;
6634             offset = insn_get(env, s, ot);
6635             selector = insn_get(env, s, MO_16);
6636 
6637             tcg_gen_movi_tl(s->T0, selector);
6638             tcg_gen_movi_tl(s->T1, offset);
6639         }
6640         goto do_ljmp;
6641     case 0xeb: /* jmp Jb */
6642         tval = (int8_t)insn_get(env, s, MO_8);
6643         tval += s->pc - s->cs_base;
6644         if (dflag == MO_16) {
6645             tval &= 0xffff;
6646         }
6647         gen_jmp(s, tval);
6648         break;
6649     case 0x70 ... 0x7f: /* jcc Jb */
6650         tval = (int8_t)insn_get(env, s, MO_8);
6651         goto do_jcc;
6652     case 0x180 ... 0x18f: /* jcc Jv */
6653         if (dflag != MO_16) {
6654             tval = (int32_t)insn_get(env, s, MO_32);
6655         } else {
6656             tval = (int16_t)insn_get(env, s, MO_16);
6657         }
6658     do_jcc:
6659         next_eip = s->pc - s->cs_base;
6660         tval += next_eip;
6661         if (dflag == MO_16) {
6662             tval &= 0xffff;
6663         }
6664         gen_bnd_jmp(s);
6665         gen_jcc(s, b, tval, next_eip);
6666         break;
6667 
6668     case 0x190 ... 0x19f: /* setcc Gv */
6669         modrm = x86_ldub_code(env, s);
6670         gen_setcc1(s, b, s->T0);
6671         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6672         break;
6673     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6674         if (!(s->cpuid_features & CPUID_CMOV)) {
6675             goto illegal_op;
6676         }
6677         ot = dflag;
6678         modrm = x86_ldub_code(env, s);
6679         reg = ((modrm >> 3) & 7) | rex_r;
6680         gen_cmovcc1(env, s, ot, b, modrm, reg);
6681         break;
6682 
6683         /************************/
6684         /* flags */
6685     case 0x9c: /* pushf */
6686         gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF);
6687         if (s->vm86 && s->iopl != 3) {
6688             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6689         } else {
6690             gen_update_cc_op(s);
6691             gen_helper_read_eflags(s->T0, cpu_env);
6692             gen_push_v(s, s->T0);
6693         }
6694         break;
6695     case 0x9d: /* popf */
6696         gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF);
6697         if (s->vm86 && s->iopl != 3) {
6698             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
6699         } else {
6700             ot = gen_pop_T0(s);
6701             if (s->cpl == 0) {
6702                 if (dflag != MO_16) {
6703                     gen_helper_write_eflags(cpu_env, s->T0,
6704                                             tcg_const_i32((TF_MASK | AC_MASK |
6705                                                            ID_MASK | NT_MASK |
6706                                                            IF_MASK |
6707                                                            IOPL_MASK)));
6708                 } else {
6709                     gen_helper_write_eflags(cpu_env, s->T0,
6710                                             tcg_const_i32((TF_MASK | AC_MASK |
6711                                                            ID_MASK | NT_MASK |
6712                                                            IF_MASK | IOPL_MASK)
6713                                                           & 0xffff));
6714                 }
6715             } else {
6716                 if (s->cpl <= s->iopl) {
6717                     if (dflag != MO_16) {
6718                         gen_helper_write_eflags(cpu_env, s->T0,
6719                                                 tcg_const_i32((TF_MASK |
6720                                                                AC_MASK |
6721                                                                ID_MASK |
6722                                                                NT_MASK |
6723                                                                IF_MASK)));
6724                     } else {
6725                         gen_helper_write_eflags(cpu_env, s->T0,
6726                                                 tcg_const_i32((TF_MASK |
6727                                                                AC_MASK |
6728                                                                ID_MASK |
6729                                                                NT_MASK |
6730                                                                IF_MASK)
6731                                                               & 0xffff));
6732                     }
6733                 } else {
6734                     if (dflag != MO_16) {
6735                         gen_helper_write_eflags(cpu_env, s->T0,
6736                                            tcg_const_i32((TF_MASK | AC_MASK |
6737                                                           ID_MASK | NT_MASK)));
6738                     } else {
6739                         gen_helper_write_eflags(cpu_env, s->T0,
6740                                            tcg_const_i32((TF_MASK | AC_MASK |
6741                                                           ID_MASK | NT_MASK)
6742                                                          & 0xffff));
6743                     }
6744                 }
6745             }
6746             gen_pop_update(s, ot);
6747             set_cc_op(s, CC_OP_EFLAGS);
6748             /* abort translation because TF/AC flag may change */
6749             gen_jmp_im(s, s->pc - s->cs_base);
6750             gen_eob(s);
6751         }
6752         break;
6753     case 0x9e: /* sahf */
6754         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6755             goto illegal_op;
6756         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6757         gen_compute_eflags(s);
6758         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6759         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6760         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6761         break;
6762     case 0x9f: /* lahf */
6763         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6764             goto illegal_op;
6765         gen_compute_eflags(s);
6766         /* Note: gen_compute_eflags() only gives the condition codes */
6767         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6768         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6769         break;
6770     case 0xf5: /* cmc */
6771         gen_compute_eflags(s);
6772         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6773         break;
6774     case 0xf8: /* clc */
6775         gen_compute_eflags(s);
6776         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6777         break;
6778     case 0xf9: /* stc */
6779         gen_compute_eflags(s);
6780         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6781         break;
6782     case 0xfc: /* cld */
6783         tcg_gen_movi_i32(s->tmp2_i32, 1);
6784         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6785         break;
6786     case 0xfd: /* std */
6787         tcg_gen_movi_i32(s->tmp2_i32, -1);
6788         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6789         break;
6790 
6791         /************************/
6792         /* bit operations */
6793     case 0x1ba: /* bt/bts/btr/btc Gv, im */
6794         ot = dflag;
6795         modrm = x86_ldub_code(env, s);
6796         op = (modrm >> 3) & 7;
6797         mod = (modrm >> 6) & 3;
6798         rm = (modrm & 7) | REX_B(s);
6799         if (mod != 3) {
6800             s->rip_offset = 1;
6801             gen_lea_modrm(env, s, modrm);
6802             if (!(s->prefix & PREFIX_LOCK)) {
6803                 gen_op_ld_v(s, ot, s->T0, s->A0);
6804             }
6805         } else {
6806             gen_op_mov_v_reg(s, ot, s->T0, rm);
6807         }
6808         /* load shift */
6809         val = x86_ldub_code(env, s);
6810         tcg_gen_movi_tl(s->T1, val);
6811         if (op < 4)
6812             goto unknown_op;
6813         op -= 4;
6814         goto bt_op;
6815     case 0x1a3: /* bt Gv, Ev */
6816         op = 0;
6817         goto do_btx;
6818     case 0x1ab: /* bts */
6819         op = 1;
6820         goto do_btx;
6821     case 0x1b3: /* btr */
6822         op = 2;
6823         goto do_btx;
6824     case 0x1bb: /* btc */
6825         op = 3;
6826     do_btx:
6827         ot = dflag;
6828         modrm = x86_ldub_code(env, s);
6829         reg = ((modrm >> 3) & 7) | rex_r;
6830         mod = (modrm >> 6) & 3;
6831         rm = (modrm & 7) | REX_B(s);
6832         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6833         if (mod != 3) {
6834             AddressParts a = gen_lea_modrm_0(env, s, modrm);
6835             /* specific case: we need to add a displacement */
6836             gen_exts(ot, s->T1);
6837             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6838             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6839             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6840             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6841             if (!(s->prefix & PREFIX_LOCK)) {
6842                 gen_op_ld_v(s, ot, s->T0, s->A0);
6843             }
6844         } else {
6845             gen_op_mov_v_reg(s, ot, s->T0, rm);
6846         }
6847     bt_op:
6848         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6849         tcg_gen_movi_tl(s->tmp0, 1);
6850         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6851         if (s->prefix & PREFIX_LOCK) {
6852             switch (op) {
6853             case 0: /* bt */
6854                 /* Needs no atomic ops; we surpressed the normal
6855                    memory load for LOCK above so do it now.  */
6856                 gen_op_ld_v(s, ot, s->T0, s->A0);
6857                 break;
6858             case 1: /* bts */
6859                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6860                                            s->mem_index, ot | MO_LE);
6861                 break;
6862             case 2: /* btr */
6863                 tcg_gen_not_tl(s->tmp0, s->tmp0);
6864                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6865                                             s->mem_index, ot | MO_LE);
6866                 break;
6867             default:
6868             case 3: /* btc */
6869                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6870                                             s->mem_index, ot | MO_LE);
6871                 break;
6872             }
6873             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6874         } else {
6875             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6876             switch (op) {
6877             case 0: /* bt */
6878                 /* Data already loaded; nothing to do.  */
6879                 break;
6880             case 1: /* bts */
6881                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6882                 break;
6883             case 2: /* btr */
6884                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6885                 break;
6886             default:
6887             case 3: /* btc */
6888                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6889                 break;
6890             }
6891             if (op != 0) {
6892                 if (mod != 3) {
6893                     gen_op_st_v(s, ot, s->T0, s->A0);
6894                 } else {
6895                     gen_op_mov_reg_v(s, ot, rm, s->T0);
6896                 }
6897             }
6898         }
6899 
6900         /* Delay all CC updates until after the store above.  Note that
6901            C is the result of the test, Z is unchanged, and the others
6902            are all undefined.  */
6903         switch (s->cc_op) {
6904         case CC_OP_MULB ... CC_OP_MULQ:
6905         case CC_OP_ADDB ... CC_OP_ADDQ:
6906         case CC_OP_ADCB ... CC_OP_ADCQ:
6907         case CC_OP_SUBB ... CC_OP_SUBQ:
6908         case CC_OP_SBBB ... CC_OP_SBBQ:
6909         case CC_OP_LOGICB ... CC_OP_LOGICQ:
6910         case CC_OP_INCB ... CC_OP_INCQ:
6911         case CC_OP_DECB ... CC_OP_DECQ:
6912         case CC_OP_SHLB ... CC_OP_SHLQ:
6913         case CC_OP_SARB ... CC_OP_SARQ:
6914         case CC_OP_BMILGB ... CC_OP_BMILGQ:
6915             /* Z was going to be computed from the non-zero status of CC_DST.
6916                We can get that same Z value (and the new C value) by leaving
6917                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
6918                same width.  */
6919             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
6920             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
6921             break;
6922         default:
6923             /* Otherwise, generate EFLAGS and replace the C bit.  */
6924             gen_compute_eflags(s);
6925             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
6926                                ctz32(CC_C), 1);
6927             break;
6928         }
6929         break;
6930     case 0x1bc: /* bsf / tzcnt */
6931     case 0x1bd: /* bsr / lzcnt */
6932         ot = dflag;
6933         modrm = x86_ldub_code(env, s);
6934         reg = ((modrm >> 3) & 7) | rex_r;
6935         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
6936         gen_extu(ot, s->T0);
6937 
6938         /* Note that lzcnt and tzcnt are in different extensions.  */
6939         if ((prefixes & PREFIX_REPZ)
6940             && (b & 1
6941                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
6942                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
6943             int size = 8 << ot;
6944             /* For lzcnt/tzcnt, C bit is defined related to the input. */
6945             tcg_gen_mov_tl(cpu_cc_src, s->T0);
6946             if (b & 1) {
6947                 /* For lzcnt, reduce the target_ulong result by the
6948                    number of zeros that we expect to find at the top.  */
6949                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
6950                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
6951             } else {
6952                 /* For tzcnt, a zero input must return the operand size.  */
6953                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
6954             }
6955             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
6956             gen_op_update1_cc(s);
6957             set_cc_op(s, CC_OP_BMILGB + ot);
6958         } else {
6959             /* For bsr/bsf, only the Z bit is defined and it is related
6960                to the input and not the result.  */
6961             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
6962             set_cc_op(s, CC_OP_LOGICB + ot);
6963 
6964             /* ??? The manual says that the output is undefined when the
6965                input is zero, but real hardware leaves it unchanged, and
6966                real programs appear to depend on that.  Accomplish this
6967                by passing the output as the value to return upon zero.  */
6968             if (b & 1) {
6969                 /* For bsr, return the bit index of the first 1 bit,
6970                    not the count of leading zeros.  */
6971                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
6972                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
6973                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
6974             } else {
6975                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
6976             }
6977         }
6978         gen_op_mov_reg_v(s, ot, reg, s->T0);
6979         break;
6980         /************************/
6981         /* bcd */
6982     case 0x27: /* daa */
6983         if (CODE64(s))
6984             goto illegal_op;
6985         gen_update_cc_op(s);
6986         gen_helper_daa(cpu_env);
6987         set_cc_op(s, CC_OP_EFLAGS);
6988         break;
6989     case 0x2f: /* das */
6990         if (CODE64(s))
6991             goto illegal_op;
6992         gen_update_cc_op(s);
6993         gen_helper_das(cpu_env);
6994         set_cc_op(s, CC_OP_EFLAGS);
6995         break;
6996     case 0x37: /* aaa */
6997         if (CODE64(s))
6998             goto illegal_op;
6999         gen_update_cc_op(s);
7000         gen_helper_aaa(cpu_env);
7001         set_cc_op(s, CC_OP_EFLAGS);
7002         break;
7003     case 0x3f: /* aas */
7004         if (CODE64(s))
7005             goto illegal_op;
7006         gen_update_cc_op(s);
7007         gen_helper_aas(cpu_env);
7008         set_cc_op(s, CC_OP_EFLAGS);
7009         break;
7010     case 0xd4: /* aam */
7011         if (CODE64(s))
7012             goto illegal_op;
7013         val = x86_ldub_code(env, s);
7014         if (val == 0) {
7015             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7016         } else {
7017             gen_helper_aam(cpu_env, tcg_const_i32(val));
7018             set_cc_op(s, CC_OP_LOGICB);
7019         }
7020         break;
7021     case 0xd5: /* aad */
7022         if (CODE64(s))
7023             goto illegal_op;
7024         val = x86_ldub_code(env, s);
7025         gen_helper_aad(cpu_env, tcg_const_i32(val));
7026         set_cc_op(s, CC_OP_LOGICB);
7027         break;
7028         /************************/
7029         /* misc */
7030     case 0x90: /* nop */
7031         /* XXX: correct lock test for all insn */
7032         if (prefixes & PREFIX_LOCK) {
7033             goto illegal_op;
7034         }
7035         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7036         if (REX_B(s)) {
7037             goto do_xchg_reg_eax;
7038         }
7039         if (prefixes & PREFIX_REPZ) {
7040             gen_update_cc_op(s);
7041             gen_jmp_im(s, pc_start - s->cs_base);
7042             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7043             s->base.is_jmp = DISAS_NORETURN;
7044         }
7045         break;
7046     case 0x9b: /* fwait */
7047         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7048             (HF_MP_MASK | HF_TS_MASK)) {
7049             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7050         } else {
7051             gen_helper_fwait(cpu_env);
7052         }
7053         break;
7054     case 0xcc: /* int3 */
7055         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7056         break;
7057     case 0xcd: /* int N */
7058         val = x86_ldub_code(env, s);
7059         if (s->vm86 && s->iopl != 3) {
7060             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7061         } else {
7062             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7063         }
7064         break;
7065     case 0xce: /* into */
7066         if (CODE64(s))
7067             goto illegal_op;
7068         gen_update_cc_op(s);
7069         gen_jmp_im(s, pc_start - s->cs_base);
7070         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7071         break;
7072 #ifdef WANT_ICEBP
7073     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7074         gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP);
7075         gen_debug(s, pc_start - s->cs_base);
7076         break;
7077 #endif
7078     case 0xfa: /* cli */
7079         if (!s->vm86) {
7080             if (s->cpl <= s->iopl) {
7081                 gen_helper_cli(cpu_env);
7082             } else {
7083                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7084             }
7085         } else {
7086             if (s->iopl == 3) {
7087                 gen_helper_cli(cpu_env);
7088             } else {
7089                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7090             }
7091         }
7092         break;
7093     case 0xfb: /* sti */
7094         if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) {
7095             gen_helper_sti(cpu_env);
7096             /* interruptions are enabled only the first insn after sti */
7097             gen_jmp_im(s, s->pc - s->cs_base);
7098             gen_eob_inhibit_irq(s, true);
7099         } else {
7100             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7101         }
7102         break;
7103     case 0x62: /* bound */
7104         if (CODE64(s))
7105             goto illegal_op;
7106         ot = dflag;
7107         modrm = x86_ldub_code(env, s);
7108         reg = (modrm >> 3) & 7;
7109         mod = (modrm >> 6) & 3;
7110         if (mod == 3)
7111             goto illegal_op;
7112         gen_op_mov_v_reg(s, ot, s->T0, reg);
7113         gen_lea_modrm(env, s, modrm);
7114         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7115         if (ot == MO_16) {
7116             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7117         } else {
7118             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7119         }
7120         break;
7121     case 0x1c8 ... 0x1cf: /* bswap reg */
7122         reg = (b & 7) | REX_B(s);
7123 #ifdef TARGET_X86_64
7124         if (dflag == MO_64) {
7125             gen_op_mov_v_reg(s, MO_64, s->T0, reg);
7126             tcg_gen_bswap64_i64(s->T0, s->T0);
7127             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
7128         } else
7129 #endif
7130         {
7131             gen_op_mov_v_reg(s, MO_32, s->T0, reg);
7132             tcg_gen_ext32u_tl(s->T0, s->T0);
7133             tcg_gen_bswap32_tl(s->T0, s->T0);
7134             gen_op_mov_reg_v(s, MO_32, reg, s->T0);
7135         }
7136         break;
7137     case 0xd6: /* salc */
7138         if (CODE64(s))
7139             goto illegal_op;
7140         gen_compute_eflags_c(s, s->T0);
7141         tcg_gen_neg_tl(s->T0, s->T0);
7142         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7143         break;
7144     case 0xe0: /* loopnz */
7145     case 0xe1: /* loopz */
7146     case 0xe2: /* loop */
7147     case 0xe3: /* jecxz */
7148         {
7149             TCGLabel *l1, *l2, *l3;
7150 
7151             tval = (int8_t)insn_get(env, s, MO_8);
7152             next_eip = s->pc - s->cs_base;
7153             tval += next_eip;
7154             if (dflag == MO_16) {
7155                 tval &= 0xffff;
7156             }
7157 
7158             l1 = gen_new_label();
7159             l2 = gen_new_label();
7160             l3 = gen_new_label();
7161             gen_update_cc_op(s);
7162             b &= 3;
7163             switch(b) {
7164             case 0: /* loopnz */
7165             case 1: /* loopz */
7166                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7167                 gen_op_jz_ecx(s, s->aflag, l3);
7168                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7169                 break;
7170             case 2: /* loop */
7171                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7172                 gen_op_jnz_ecx(s, s->aflag, l1);
7173                 break;
7174             default:
7175             case 3: /* jcxz */
7176                 gen_op_jz_ecx(s, s->aflag, l1);
7177                 break;
7178             }
7179 
7180             gen_set_label(l3);
7181             gen_jmp_im(s, next_eip);
7182             tcg_gen_br(l2);
7183 
7184             gen_set_label(l1);
7185             gen_jmp_im(s, tval);
7186             gen_set_label(l2);
7187             gen_eob(s);
7188         }
7189         break;
7190     case 0x130: /* wrmsr */
7191     case 0x132: /* rdmsr */
7192         if (s->cpl != 0) {
7193             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7194         } else {
7195             gen_update_cc_op(s);
7196             gen_jmp_im(s, pc_start - s->cs_base);
7197             if (b & 2) {
7198                 gen_helper_rdmsr(cpu_env);
7199             } else {
7200                 gen_helper_wrmsr(cpu_env);
7201             }
7202         }
7203         break;
7204     case 0x131: /* rdtsc */
7205         gen_update_cc_op(s);
7206         gen_jmp_im(s, pc_start - s->cs_base);
7207         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7208             gen_io_start();
7209         }
7210         gen_helper_rdtsc(cpu_env);
7211         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7212             gen_jmp(s, s->pc - s->cs_base);
7213         }
7214         break;
7215     case 0x133: /* rdpmc */
7216         gen_update_cc_op(s);
7217         gen_jmp_im(s, pc_start - s->cs_base);
7218         gen_helper_rdpmc(cpu_env);
7219         break;
7220     case 0x134: /* sysenter */
7221         /* For Intel SYSENTER is valid on 64-bit */
7222         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7223             goto illegal_op;
7224         if (!s->pe) {
7225             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7226         } else {
7227             gen_helper_sysenter(cpu_env);
7228             gen_eob(s);
7229         }
7230         break;
7231     case 0x135: /* sysexit */
7232         /* For Intel SYSEXIT is valid on 64-bit */
7233         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7234             goto illegal_op;
7235         if (!s->pe) {
7236             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7237         } else {
7238             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7239             gen_eob(s);
7240         }
7241         break;
7242 #ifdef TARGET_X86_64
7243     case 0x105: /* syscall */
7244         /* XXX: is it usable in real mode ? */
7245         gen_update_cc_op(s);
7246         gen_jmp_im(s, pc_start - s->cs_base);
7247         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7248         /* TF handling for the syscall insn is different. The TF bit is  checked
7249            after the syscall insn completes. This allows #DB to not be
7250            generated after one has entered CPL0 if TF is set in FMASK.  */
7251         gen_eob_worker(s, false, true);
7252         break;
7253     case 0x107: /* sysret */
7254         if (!s->pe) {
7255             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7256         } else {
7257             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7258             /* condition codes are modified only in long mode */
7259             if (s->lma) {
7260                 set_cc_op(s, CC_OP_EFLAGS);
7261             }
7262             /* TF handling for the sysret insn is different. The TF bit is
7263                checked after the sysret insn completes. This allows #DB to be
7264                generated "as if" the syscall insn in userspace has just
7265                completed.  */
7266             gen_eob_worker(s, false, true);
7267         }
7268         break;
7269 #endif
7270     case 0x1a2: /* cpuid */
7271         gen_update_cc_op(s);
7272         gen_jmp_im(s, pc_start - s->cs_base);
7273         gen_helper_cpuid(cpu_env);
7274         break;
7275     case 0xf4: /* hlt */
7276         if (s->cpl != 0) {
7277             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7278         } else {
7279             gen_update_cc_op(s);
7280             gen_jmp_im(s, pc_start - s->cs_base);
7281             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7282             s->base.is_jmp = DISAS_NORETURN;
7283         }
7284         break;
7285     case 0x100:
7286         modrm = x86_ldub_code(env, s);
7287         mod = (modrm >> 6) & 3;
7288         op = (modrm >> 3) & 7;
7289         switch(op) {
7290         case 0: /* sldt */
7291             if (!s->pe || s->vm86)
7292                 goto illegal_op;
7293             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
7294             tcg_gen_ld32u_tl(s->T0, cpu_env,
7295                              offsetof(CPUX86State, ldt.selector));
7296             ot = mod == 3 ? dflag : MO_16;
7297             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7298             break;
7299         case 2: /* lldt */
7300             if (!s->pe || s->vm86)
7301                 goto illegal_op;
7302             if (s->cpl != 0) {
7303                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7304             } else {
7305                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
7306                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7307                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7308                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7309             }
7310             break;
7311         case 1: /* str */
7312             if (!s->pe || s->vm86)
7313                 goto illegal_op;
7314             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
7315             tcg_gen_ld32u_tl(s->T0, cpu_env,
7316                              offsetof(CPUX86State, tr.selector));
7317             ot = mod == 3 ? dflag : MO_16;
7318             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7319             break;
7320         case 3: /* ltr */
7321             if (!s->pe || s->vm86)
7322                 goto illegal_op;
7323             if (s->cpl != 0) {
7324                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7325             } else {
7326                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
7327                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7328                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7329                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7330             }
7331             break;
7332         case 4: /* verr */
7333         case 5: /* verw */
7334             if (!s->pe || s->vm86)
7335                 goto illegal_op;
7336             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7337             gen_update_cc_op(s);
7338             if (op == 4) {
7339                 gen_helper_verr(cpu_env, s->T0);
7340             } else {
7341                 gen_helper_verw(cpu_env, s->T0);
7342             }
7343             set_cc_op(s, CC_OP_EFLAGS);
7344             break;
7345         default:
7346             goto unknown_op;
7347         }
7348         break;
7349 
7350     case 0x101:
7351         modrm = x86_ldub_code(env, s);
7352         switch (modrm) {
7353         CASE_MODRM_MEM_OP(0): /* sgdt */
7354             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
7355             gen_lea_modrm(env, s, modrm);
7356             tcg_gen_ld32u_tl(s->T0,
7357                              cpu_env, offsetof(CPUX86State, gdt.limit));
7358             gen_op_st_v(s, MO_16, s->T0, s->A0);
7359             gen_add_A0_im(s, 2);
7360             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7361             if (dflag == MO_16) {
7362                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7363             }
7364             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7365             break;
7366 
7367         case 0xc8: /* monitor */
7368             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7369                 goto illegal_op;
7370             }
7371             gen_update_cc_op(s);
7372             gen_jmp_im(s, pc_start - s->cs_base);
7373             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7374             gen_extu(s->aflag, s->A0);
7375             gen_add_A0_ds_seg(s);
7376             gen_helper_monitor(cpu_env, s->A0);
7377             break;
7378 
7379         case 0xc9: /* mwait */
7380             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) {
7381                 goto illegal_op;
7382             }
7383             gen_update_cc_op(s);
7384             gen_jmp_im(s, pc_start - s->cs_base);
7385             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7386             gen_eob(s);
7387             break;
7388 
7389         case 0xca: /* clac */
7390             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7391                 || s->cpl != 0) {
7392                 goto illegal_op;
7393             }
7394             gen_helper_clac(cpu_env);
7395             gen_jmp_im(s, s->pc - s->cs_base);
7396             gen_eob(s);
7397             break;
7398 
7399         case 0xcb: /* stac */
7400             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7401                 || s->cpl != 0) {
7402                 goto illegal_op;
7403             }
7404             gen_helper_stac(cpu_env);
7405             gen_jmp_im(s, s->pc - s->cs_base);
7406             gen_eob(s);
7407             break;
7408 
7409         CASE_MODRM_MEM_OP(1): /* sidt */
7410             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
7411             gen_lea_modrm(env, s, modrm);
7412             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7413             gen_op_st_v(s, MO_16, s->T0, s->A0);
7414             gen_add_A0_im(s, 2);
7415             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7416             if (dflag == MO_16) {
7417                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7418             }
7419             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7420             break;
7421 
7422         case 0xd0: /* xgetbv */
7423             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7424                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7425                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7426                 goto illegal_op;
7427             }
7428             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7429             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7430             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7431             break;
7432 
7433         case 0xd1: /* xsetbv */
7434             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7435                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7436                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7437                 goto illegal_op;
7438             }
7439             if (s->cpl != 0) {
7440                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7441                 break;
7442             }
7443             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7444                                   cpu_regs[R_EDX]);
7445             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7446             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7447             /* End TB because translation flags may change.  */
7448             gen_jmp_im(s, s->pc - s->cs_base);
7449             gen_eob(s);
7450             break;
7451 
7452         case 0xd8: /* VMRUN */
7453             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7454                 goto illegal_op;
7455             }
7456             if (s->cpl != 0) {
7457                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7458                 break;
7459             }
7460             gen_update_cc_op(s);
7461             gen_jmp_im(s, pc_start - s->cs_base);
7462             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7463                              tcg_const_i32(s->pc - pc_start));
7464             tcg_gen_exit_tb(NULL, 0);
7465             s->base.is_jmp = DISAS_NORETURN;
7466             break;
7467 
7468         case 0xd9: /* VMMCALL */
7469             if (!(s->flags & HF_SVME_MASK)) {
7470                 goto illegal_op;
7471             }
7472             gen_update_cc_op(s);
7473             gen_jmp_im(s, pc_start - s->cs_base);
7474             gen_helper_vmmcall(cpu_env);
7475             break;
7476 
7477         case 0xda: /* VMLOAD */
7478             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7479                 goto illegal_op;
7480             }
7481             if (s->cpl != 0) {
7482                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7483                 break;
7484             }
7485             gen_update_cc_op(s);
7486             gen_jmp_im(s, pc_start - s->cs_base);
7487             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7488             break;
7489 
7490         case 0xdb: /* VMSAVE */
7491             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7492                 goto illegal_op;
7493             }
7494             if (s->cpl != 0) {
7495                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7496                 break;
7497             }
7498             gen_update_cc_op(s);
7499             gen_jmp_im(s, pc_start - s->cs_base);
7500             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7501             break;
7502 
7503         case 0xdc: /* STGI */
7504             if ((!(s->flags & HF_SVME_MASK)
7505                    && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7506                 || !s->pe) {
7507                 goto illegal_op;
7508             }
7509             if (s->cpl != 0) {
7510                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7511                 break;
7512             }
7513             gen_update_cc_op(s);
7514             gen_helper_stgi(cpu_env);
7515             gen_jmp_im(s, s->pc - s->cs_base);
7516             gen_eob(s);
7517             break;
7518 
7519         case 0xdd: /* CLGI */
7520             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7521                 goto illegal_op;
7522             }
7523             if (s->cpl != 0) {
7524                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7525                 break;
7526             }
7527             gen_update_cc_op(s);
7528             gen_jmp_im(s, pc_start - s->cs_base);
7529             gen_helper_clgi(cpu_env);
7530             break;
7531 
7532         case 0xde: /* SKINIT */
7533             if ((!(s->flags & HF_SVME_MASK)
7534                  && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7535                 || !s->pe) {
7536                 goto illegal_op;
7537             }
7538             gen_update_cc_op(s);
7539             gen_jmp_im(s, pc_start - s->cs_base);
7540             gen_helper_skinit(cpu_env);
7541             break;
7542 
7543         case 0xdf: /* INVLPGA */
7544             if (!(s->flags & HF_SVME_MASK) || !s->pe) {
7545                 goto illegal_op;
7546             }
7547             if (s->cpl != 0) {
7548                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7549                 break;
7550             }
7551             gen_update_cc_op(s);
7552             gen_jmp_im(s, pc_start - s->cs_base);
7553             gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1));
7554             break;
7555 
7556         CASE_MODRM_MEM_OP(2): /* lgdt */
7557             if (s->cpl != 0) {
7558                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7559                 break;
7560             }
7561             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE);
7562             gen_lea_modrm(env, s, modrm);
7563             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7564             gen_add_A0_im(s, 2);
7565             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7566             if (dflag == MO_16) {
7567                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7568             }
7569             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7570             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7571             break;
7572 
7573         CASE_MODRM_MEM_OP(3): /* lidt */
7574             if (s->cpl != 0) {
7575                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7576                 break;
7577             }
7578             gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE);
7579             gen_lea_modrm(env, s, modrm);
7580             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7581             gen_add_A0_im(s, 2);
7582             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7583             if (dflag == MO_16) {
7584                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7585             }
7586             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7587             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7588             break;
7589 
7590         CASE_MODRM_OP(4): /* smsw */
7591             gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0);
7592             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7593             /*
7594              * In 32-bit mode, the higher 16 bits of the destination
7595              * register are undefined.  In practice CR0[31:0] is stored
7596              * just like in 64-bit mode.
7597              */
7598             mod = (modrm >> 6) & 3;
7599             ot = (mod != 3 ? MO_16 : s->dflag);
7600             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7601             break;
7602         case 0xee: /* rdpkru */
7603             if (prefixes & PREFIX_LOCK) {
7604                 goto illegal_op;
7605             }
7606             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7607             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7608             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7609             break;
7610         case 0xef: /* wrpkru */
7611             if (prefixes & PREFIX_LOCK) {
7612                 goto illegal_op;
7613             }
7614             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7615                                   cpu_regs[R_EDX]);
7616             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7617             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7618             break;
7619         CASE_MODRM_OP(6): /* lmsw */
7620             if (s->cpl != 0) {
7621                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7622                 break;
7623             }
7624             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
7625             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7626             gen_helper_lmsw(cpu_env, s->T0);
7627             gen_jmp_im(s, s->pc - s->cs_base);
7628             gen_eob(s);
7629             break;
7630 
7631         CASE_MODRM_MEM_OP(7): /* invlpg */
7632             if (s->cpl != 0) {
7633                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7634                 break;
7635             }
7636             gen_update_cc_op(s);
7637             gen_jmp_im(s, pc_start - s->cs_base);
7638             gen_lea_modrm(env, s, modrm);
7639             gen_helper_invlpg(cpu_env, s->A0);
7640             gen_jmp_im(s, s->pc - s->cs_base);
7641             gen_eob(s);
7642             break;
7643 
7644         case 0xf8: /* swapgs */
7645 #ifdef TARGET_X86_64
7646             if (CODE64(s)) {
7647                 if (s->cpl != 0) {
7648                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7649                 } else {
7650                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7651                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7652                                   offsetof(CPUX86State, kernelgsbase));
7653                     tcg_gen_st_tl(s->T0, cpu_env,
7654                                   offsetof(CPUX86State, kernelgsbase));
7655                 }
7656                 break;
7657             }
7658 #endif
7659             goto illegal_op;
7660 
7661         case 0xf9: /* rdtscp */
7662             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7663                 goto illegal_op;
7664             }
7665             gen_update_cc_op(s);
7666             gen_jmp_im(s, pc_start - s->cs_base);
7667             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7668                 gen_io_start();
7669             }
7670             gen_helper_rdtscp(cpu_env);
7671             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7672                 gen_jmp(s, s->pc - s->cs_base);
7673             }
7674             break;
7675 
7676         default:
7677             goto unknown_op;
7678         }
7679         break;
7680 
7681     case 0x108: /* invd */
7682     case 0x109: /* wbinvd */
7683         if (s->cpl != 0) {
7684             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
7685         } else {
7686             gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7687             /* nothing to do */
7688         }
7689         break;
7690     case 0x63: /* arpl or movslS (x86_64) */
7691 #ifdef TARGET_X86_64
7692         if (CODE64(s)) {
7693             int d_ot;
7694             /* d_ot is the size of destination */
7695             d_ot = dflag;
7696 
7697             modrm = x86_ldub_code(env, s);
7698             reg = ((modrm >> 3) & 7) | rex_r;
7699             mod = (modrm >> 6) & 3;
7700             rm = (modrm & 7) | REX_B(s);
7701 
7702             if (mod == 3) {
7703                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7704                 /* sign extend */
7705                 if (d_ot == MO_64) {
7706                     tcg_gen_ext32s_tl(s->T0, s->T0);
7707                 }
7708                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7709             } else {
7710                 gen_lea_modrm(env, s, modrm);
7711                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7712                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7713             }
7714         } else
7715 #endif
7716         {
7717             TCGLabel *label1;
7718             TCGv t0, t1, t2, a0;
7719 
7720             if (!s->pe || s->vm86)
7721                 goto illegal_op;
7722             t0 = tcg_temp_local_new();
7723             t1 = tcg_temp_local_new();
7724             t2 = tcg_temp_local_new();
7725             ot = MO_16;
7726             modrm = x86_ldub_code(env, s);
7727             reg = (modrm >> 3) & 7;
7728             mod = (modrm >> 6) & 3;
7729             rm = modrm & 7;
7730             if (mod != 3) {
7731                 gen_lea_modrm(env, s, modrm);
7732                 gen_op_ld_v(s, ot, t0, s->A0);
7733                 a0 = tcg_temp_local_new();
7734                 tcg_gen_mov_tl(a0, s->A0);
7735             } else {
7736                 gen_op_mov_v_reg(s, ot, t0, rm);
7737                 a0 = NULL;
7738             }
7739             gen_op_mov_v_reg(s, ot, t1, reg);
7740             tcg_gen_andi_tl(s->tmp0, t0, 3);
7741             tcg_gen_andi_tl(t1, t1, 3);
7742             tcg_gen_movi_tl(t2, 0);
7743             label1 = gen_new_label();
7744             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7745             tcg_gen_andi_tl(t0, t0, ~3);
7746             tcg_gen_or_tl(t0, t0, t1);
7747             tcg_gen_movi_tl(t2, CC_Z);
7748             gen_set_label(label1);
7749             if (mod != 3) {
7750                 gen_op_st_v(s, ot, t0, a0);
7751                 tcg_temp_free(a0);
7752            } else {
7753                 gen_op_mov_reg_v(s, ot, rm, t0);
7754             }
7755             gen_compute_eflags(s);
7756             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7757             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7758             tcg_temp_free(t0);
7759             tcg_temp_free(t1);
7760             tcg_temp_free(t2);
7761         }
7762         break;
7763     case 0x102: /* lar */
7764     case 0x103: /* lsl */
7765         {
7766             TCGLabel *label1;
7767             TCGv t0;
7768             if (!s->pe || s->vm86)
7769                 goto illegal_op;
7770             ot = dflag != MO_16 ? MO_32 : MO_16;
7771             modrm = x86_ldub_code(env, s);
7772             reg = ((modrm >> 3) & 7) | rex_r;
7773             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7774             t0 = tcg_temp_local_new();
7775             gen_update_cc_op(s);
7776             if (b == 0x102) {
7777                 gen_helper_lar(t0, cpu_env, s->T0);
7778             } else {
7779                 gen_helper_lsl(t0, cpu_env, s->T0);
7780             }
7781             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7782             label1 = gen_new_label();
7783             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7784             gen_op_mov_reg_v(s, ot, reg, t0);
7785             gen_set_label(label1);
7786             set_cc_op(s, CC_OP_EFLAGS);
7787             tcg_temp_free(t0);
7788         }
7789         break;
7790     case 0x118:
7791         modrm = x86_ldub_code(env, s);
7792         mod = (modrm >> 6) & 3;
7793         op = (modrm >> 3) & 7;
7794         switch(op) {
7795         case 0: /* prefetchnta */
7796         case 1: /* prefetchnt0 */
7797         case 2: /* prefetchnt0 */
7798         case 3: /* prefetchnt0 */
7799             if (mod == 3)
7800                 goto illegal_op;
7801             gen_nop_modrm(env, s, modrm);
7802             /* nothing more to do */
7803             break;
7804         default: /* nop (multi byte) */
7805             gen_nop_modrm(env, s, modrm);
7806             break;
7807         }
7808         break;
7809     case 0x11a:
7810         modrm = x86_ldub_code(env, s);
7811         if (s->flags & HF_MPX_EN_MASK) {
7812             mod = (modrm >> 6) & 3;
7813             reg = ((modrm >> 3) & 7) | rex_r;
7814             if (prefixes & PREFIX_REPZ) {
7815                 /* bndcl */
7816                 if (reg >= 4
7817                     || (prefixes & PREFIX_LOCK)
7818                     || s->aflag == MO_16) {
7819                     goto illegal_op;
7820                 }
7821                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7822             } else if (prefixes & PREFIX_REPNZ) {
7823                 /* bndcu */
7824                 if (reg >= 4
7825                     || (prefixes & PREFIX_LOCK)
7826                     || s->aflag == MO_16) {
7827                     goto illegal_op;
7828                 }
7829                 TCGv_i64 notu = tcg_temp_new_i64();
7830                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7831                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7832                 tcg_temp_free_i64(notu);
7833             } else if (prefixes & PREFIX_DATA) {
7834                 /* bndmov -- from reg/mem */
7835                 if (reg >= 4 || s->aflag == MO_16) {
7836                     goto illegal_op;
7837                 }
7838                 if (mod == 3) {
7839                     int reg2 = (modrm & 7) | REX_B(s);
7840                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7841                         goto illegal_op;
7842                     }
7843                     if (s->flags & HF_MPX_IU_MASK) {
7844                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7845                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7846                     }
7847                 } else {
7848                     gen_lea_modrm(env, s, modrm);
7849                     if (CODE64(s)) {
7850                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7851                                             s->mem_index, MO_LEQ);
7852                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7853                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7854                                             s->mem_index, MO_LEQ);
7855                     } else {
7856                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7857                                             s->mem_index, MO_LEUL);
7858                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7859                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7860                                             s->mem_index, MO_LEUL);
7861                     }
7862                     /* bnd registers are now in-use */
7863                     gen_set_hflag(s, HF_MPX_IU_MASK);
7864                 }
7865             } else if (mod != 3) {
7866                 /* bndldx */
7867                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7868                 if (reg >= 4
7869                     || (prefixes & PREFIX_LOCK)
7870                     || s->aflag == MO_16
7871                     || a.base < -1) {
7872                     goto illegal_op;
7873                 }
7874                 if (a.base >= 0) {
7875                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7876                 } else {
7877                     tcg_gen_movi_tl(s->A0, 0);
7878                 }
7879                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7880                 if (a.index >= 0) {
7881                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7882                 } else {
7883                     tcg_gen_movi_tl(s->T0, 0);
7884                 }
7885                 if (CODE64(s)) {
7886                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7887                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7888                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7889                 } else {
7890                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7891                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7892                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7893                 }
7894                 gen_set_hflag(s, HF_MPX_IU_MASK);
7895             }
7896         }
7897         gen_nop_modrm(env, s, modrm);
7898         break;
7899     case 0x11b:
7900         modrm = x86_ldub_code(env, s);
7901         if (s->flags & HF_MPX_EN_MASK) {
7902             mod = (modrm >> 6) & 3;
7903             reg = ((modrm >> 3) & 7) | rex_r;
7904             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7905                 /* bndmk */
7906                 if (reg >= 4
7907                     || (prefixes & PREFIX_LOCK)
7908                     || s->aflag == MO_16) {
7909                     goto illegal_op;
7910                 }
7911                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7912                 if (a.base >= 0) {
7913                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7914                     if (!CODE64(s)) {
7915                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7916                     }
7917                 } else if (a.base == -1) {
7918                     /* no base register has lower bound of 0 */
7919                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
7920                 } else {
7921                     /* rip-relative generates #ud */
7922                     goto illegal_op;
7923                 }
7924                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7925                 if (!CODE64(s)) {
7926                     tcg_gen_ext32u_tl(s->A0, s->A0);
7927                 }
7928                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7929                 /* bnd registers are now in-use */
7930                 gen_set_hflag(s, HF_MPX_IU_MASK);
7931                 break;
7932             } else if (prefixes & PREFIX_REPNZ) {
7933                 /* bndcn */
7934                 if (reg >= 4
7935                     || (prefixes & PREFIX_LOCK)
7936                     || s->aflag == MO_16) {
7937                     goto illegal_op;
7938                 }
7939                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7940             } else if (prefixes & PREFIX_DATA) {
7941                 /* bndmov -- to reg/mem */
7942                 if (reg >= 4 || s->aflag == MO_16) {
7943                     goto illegal_op;
7944                 }
7945                 if (mod == 3) {
7946                     int reg2 = (modrm & 7) | REX_B(s);
7947                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7948                         goto illegal_op;
7949                     }
7950                     if (s->flags & HF_MPX_IU_MASK) {
7951                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
7952                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
7953                     }
7954                 } else {
7955                     gen_lea_modrm(env, s, modrm);
7956                     if (CODE64(s)) {
7957                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7958                                             s->mem_index, MO_LEQ);
7959                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7960                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7961                                             s->mem_index, MO_LEQ);
7962                     } else {
7963                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
7964                                             s->mem_index, MO_LEUL);
7965                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7966                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
7967                                             s->mem_index, MO_LEUL);
7968                     }
7969                 }
7970             } else if (mod != 3) {
7971                 /* bndstx */
7972                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7973                 if (reg >= 4
7974                     || (prefixes & PREFIX_LOCK)
7975                     || s->aflag == MO_16
7976                     || a.base < -1) {
7977                     goto illegal_op;
7978                 }
7979                 if (a.base >= 0) {
7980                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7981                 } else {
7982                     tcg_gen_movi_tl(s->A0, 0);
7983                 }
7984                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7985                 if (a.index >= 0) {
7986                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7987                 } else {
7988                     tcg_gen_movi_tl(s->T0, 0);
7989                 }
7990                 if (CODE64(s)) {
7991                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
7992                                         cpu_bndl[reg], cpu_bndu[reg]);
7993                 } else {
7994                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
7995                                         cpu_bndl[reg], cpu_bndu[reg]);
7996                 }
7997             }
7998         }
7999         gen_nop_modrm(env, s, modrm);
8000         break;
8001     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8002         modrm = x86_ldub_code(env, s);
8003         gen_nop_modrm(env, s, modrm);
8004         break;
8005     case 0x120: /* mov reg, crN */
8006     case 0x122: /* mov crN, reg */
8007         if (s->cpl != 0) {
8008             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8009         } else {
8010             modrm = x86_ldub_code(env, s);
8011             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8012              * AMD documentation (24594.pdf) and testing of
8013              * intel 386 and 486 processors all show that the mod bits
8014              * are assumed to be 1's, regardless of actual values.
8015              */
8016             rm = (modrm & 7) | REX_B(s);
8017             reg = ((modrm >> 3) & 7) | rex_r;
8018             if (CODE64(s))
8019                 ot = MO_64;
8020             else
8021                 ot = MO_32;
8022             if ((prefixes & PREFIX_LOCK) && (reg == 0) &&
8023                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8024                 reg = 8;
8025             }
8026             switch(reg) {
8027             case 0:
8028             case 2:
8029             case 3:
8030             case 4:
8031             case 8:
8032                 gen_update_cc_op(s);
8033                 gen_jmp_im(s, pc_start - s->cs_base);
8034                 if (b & 2) {
8035                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8036                         gen_io_start();
8037                     }
8038                     gen_op_mov_v_reg(s, ot, s->T0, rm);
8039                     gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
8040                                          s->T0);
8041                     gen_jmp_im(s, s->pc - s->cs_base);
8042                     gen_eob(s);
8043                 } else {
8044                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8045                         gen_io_start();
8046                     }
8047                     gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg));
8048                     gen_op_mov_reg_v(s, ot, rm, s->T0);
8049                     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8050                         gen_jmp(s, s->pc - s->cs_base);
8051                     }
8052                 }
8053                 break;
8054             default:
8055                 goto unknown_op;
8056             }
8057         }
8058         break;
8059     case 0x121: /* mov reg, drN */
8060     case 0x123: /* mov drN, reg */
8061         if (s->cpl != 0) {
8062             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8063         } else {
8064             modrm = x86_ldub_code(env, s);
8065             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8066              * AMD documentation (24594.pdf) and testing of
8067              * intel 386 and 486 processors all show that the mod bits
8068              * are assumed to be 1's, regardless of actual values.
8069              */
8070             rm = (modrm & 7) | REX_B(s);
8071             reg = ((modrm >> 3) & 7) | rex_r;
8072             if (CODE64(s))
8073                 ot = MO_64;
8074             else
8075                 ot = MO_32;
8076             if (reg >= 8) {
8077                 goto illegal_op;
8078             }
8079             if (b & 2) {
8080                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
8081                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8082                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8083                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8084                 gen_jmp_im(s, s->pc - s->cs_base);
8085                 gen_eob(s);
8086             } else {
8087                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg);
8088                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8089                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8090                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8091             }
8092         }
8093         break;
8094     case 0x106: /* clts */
8095         if (s->cpl != 0) {
8096             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
8097         } else {
8098             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
8099             gen_helper_clts(cpu_env);
8100             /* abort block because static cpu state changed */
8101             gen_jmp_im(s, s->pc - s->cs_base);
8102             gen_eob(s);
8103         }
8104         break;
8105     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8106     case 0x1c3: /* MOVNTI reg, mem */
8107         if (!(s->cpuid_features & CPUID_SSE2))
8108             goto illegal_op;
8109         ot = mo_64_32(dflag);
8110         modrm = x86_ldub_code(env, s);
8111         mod = (modrm >> 6) & 3;
8112         if (mod == 3)
8113             goto illegal_op;
8114         reg = ((modrm >> 3) & 7) | rex_r;
8115         /* generate a generic store */
8116         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8117         break;
8118     case 0x1ae:
8119         modrm = x86_ldub_code(env, s);
8120         switch (modrm) {
8121         CASE_MODRM_MEM_OP(0): /* fxsave */
8122             if (!(s->cpuid_features & CPUID_FXSR)
8123                 || (prefixes & PREFIX_LOCK)) {
8124                 goto illegal_op;
8125             }
8126             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8127                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8128                 break;
8129             }
8130             gen_lea_modrm(env, s, modrm);
8131             gen_helper_fxsave(cpu_env, s->A0);
8132             break;
8133 
8134         CASE_MODRM_MEM_OP(1): /* fxrstor */
8135             if (!(s->cpuid_features & CPUID_FXSR)
8136                 || (prefixes & PREFIX_LOCK)) {
8137                 goto illegal_op;
8138             }
8139             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8140                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8141                 break;
8142             }
8143             gen_lea_modrm(env, s, modrm);
8144             gen_helper_fxrstor(cpu_env, s->A0);
8145             break;
8146 
8147         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8148             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8149                 goto illegal_op;
8150             }
8151             if (s->flags & HF_TS_MASK) {
8152                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8153                 break;
8154             }
8155             gen_lea_modrm(env, s, modrm);
8156             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8157             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8158             break;
8159 
8160         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8161             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8162                 goto illegal_op;
8163             }
8164             if (s->flags & HF_TS_MASK) {
8165                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8166                 break;
8167             }
8168             gen_helper_update_mxcsr(cpu_env);
8169             gen_lea_modrm(env, s, modrm);
8170             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8171             gen_op_st_v(s, MO_32, s->T0, s->A0);
8172             break;
8173 
8174         CASE_MODRM_MEM_OP(4): /* xsave */
8175             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8176                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8177                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8178                 goto illegal_op;
8179             }
8180             gen_lea_modrm(env, s, modrm);
8181             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8182                                   cpu_regs[R_EDX]);
8183             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8184             break;
8185 
8186         CASE_MODRM_MEM_OP(5): /* xrstor */
8187             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8188                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8189                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8190                 goto illegal_op;
8191             }
8192             gen_lea_modrm(env, s, modrm);
8193             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8194                                   cpu_regs[R_EDX]);
8195             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8196             /* XRSTOR is how MPX is enabled, which changes how
8197                we translate.  Thus we need to end the TB.  */
8198             gen_update_cc_op(s);
8199             gen_jmp_im(s, s->pc - s->cs_base);
8200             gen_eob(s);
8201             break;
8202 
8203         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8204             if (prefixes & PREFIX_LOCK) {
8205                 goto illegal_op;
8206             }
8207             if (prefixes & PREFIX_DATA) {
8208                 /* clwb */
8209                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8210                     goto illegal_op;
8211                 }
8212                 gen_nop_modrm(env, s, modrm);
8213             } else {
8214                 /* xsaveopt */
8215                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8216                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8217                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8218                     goto illegal_op;
8219                 }
8220                 gen_lea_modrm(env, s, modrm);
8221                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8222                                       cpu_regs[R_EDX]);
8223                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8224             }
8225             break;
8226 
8227         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8228             if (prefixes & PREFIX_LOCK) {
8229                 goto illegal_op;
8230             }
8231             if (prefixes & PREFIX_DATA) {
8232                 /* clflushopt */
8233                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8234                     goto illegal_op;
8235                 }
8236             } else {
8237                 /* clflush */
8238                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8239                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8240                     goto illegal_op;
8241                 }
8242             }
8243             gen_nop_modrm(env, s, modrm);
8244             break;
8245 
8246         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8247         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8248         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8249         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8250             if (CODE64(s)
8251                 && (prefixes & PREFIX_REPZ)
8252                 && !(prefixes & PREFIX_LOCK)
8253                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8254                 TCGv base, treg, src, dst;
8255 
8256                 /* Preserve hflags bits by testing CR4 at runtime.  */
8257                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8258                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8259 
8260                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8261                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8262 
8263                 if (modrm & 0x10) {
8264                     /* wr*base */
8265                     dst = base, src = treg;
8266                 } else {
8267                     /* rd*base */
8268                     dst = treg, src = base;
8269                 }
8270 
8271                 if (s->dflag == MO_32) {
8272                     tcg_gen_ext32u_tl(dst, src);
8273                 } else {
8274                     tcg_gen_mov_tl(dst, src);
8275                 }
8276                 break;
8277             }
8278             goto unknown_op;
8279 
8280         case 0xf8: /* sfence / pcommit */
8281             if (prefixes & PREFIX_DATA) {
8282                 /* pcommit */
8283                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8284                     || (prefixes & PREFIX_LOCK)) {
8285                     goto illegal_op;
8286                 }
8287                 break;
8288             }
8289             /* fallthru */
8290         case 0xf9 ... 0xff: /* sfence */
8291             if (!(s->cpuid_features & CPUID_SSE)
8292                 || (prefixes & PREFIX_LOCK)) {
8293                 goto illegal_op;
8294             }
8295             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8296             break;
8297         case 0xe8 ... 0xef: /* lfence */
8298             if (!(s->cpuid_features & CPUID_SSE)
8299                 || (prefixes & PREFIX_LOCK)) {
8300                 goto illegal_op;
8301             }
8302             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8303             break;
8304         case 0xf0 ... 0xf7: /* mfence */
8305             if (!(s->cpuid_features & CPUID_SSE2)
8306                 || (prefixes & PREFIX_LOCK)) {
8307                 goto illegal_op;
8308             }
8309             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8310             break;
8311 
8312         default:
8313             goto unknown_op;
8314         }
8315         break;
8316 
8317     case 0x10d: /* 3DNow! prefetch(w) */
8318         modrm = x86_ldub_code(env, s);
8319         mod = (modrm >> 6) & 3;
8320         if (mod == 3)
8321             goto illegal_op;
8322         gen_nop_modrm(env, s, modrm);
8323         break;
8324     case 0x1aa: /* rsm */
8325         gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM);
8326         if (!(s->flags & HF_SMM_MASK))
8327             goto illegal_op;
8328         gen_update_cc_op(s);
8329         gen_jmp_im(s, s->pc - s->cs_base);
8330         gen_helper_rsm(cpu_env);
8331         gen_eob(s);
8332         break;
8333     case 0x1b8: /* SSE4.2 popcnt */
8334         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8335              PREFIX_REPZ)
8336             goto illegal_op;
8337         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8338             goto illegal_op;
8339 
8340         modrm = x86_ldub_code(env, s);
8341         reg = ((modrm >> 3) & 7) | rex_r;
8342 
8343         if (s->prefix & PREFIX_DATA) {
8344             ot = MO_16;
8345         } else {
8346             ot = mo_64_32(dflag);
8347         }
8348 
8349         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8350         gen_extu(ot, s->T0);
8351         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8352         tcg_gen_ctpop_tl(s->T0, s->T0);
8353         gen_op_mov_reg_v(s, ot, reg, s->T0);
8354 
8355         set_cc_op(s, CC_OP_POPCNT);
8356         break;
8357     case 0x10e ... 0x10f:
8358         /* 3DNow! instructions, ignore prefixes */
8359         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8360         /* fall through */
8361     case 0x110 ... 0x117:
8362     case 0x128 ... 0x12f:
8363     case 0x138 ... 0x13a:
8364     case 0x150 ... 0x179:
8365     case 0x17c ... 0x17f:
8366     case 0x1c2:
8367     case 0x1c4 ... 0x1c6:
8368     case 0x1d0 ... 0x1fe:
8369         gen_sse(env, s, b, pc_start, rex_r);
8370         break;
8371     default:
8372         goto unknown_op;
8373     }
8374     return s->pc;
8375  illegal_op:
8376     gen_illegal_opcode(s);
8377     return s->pc;
8378  unknown_op:
8379     gen_unknown_opcode(env, s);
8380     return s->pc;
8381 }
8382 
8383 void tcg_x86_init(void)
8384 {
8385     static const char reg_names[CPU_NB_REGS][4] = {
8386 #ifdef TARGET_X86_64
8387         [R_EAX] = "rax",
8388         [R_EBX] = "rbx",
8389         [R_ECX] = "rcx",
8390         [R_EDX] = "rdx",
8391         [R_ESI] = "rsi",
8392         [R_EDI] = "rdi",
8393         [R_EBP] = "rbp",
8394         [R_ESP] = "rsp",
8395         [8]  = "r8",
8396         [9]  = "r9",
8397         [10] = "r10",
8398         [11] = "r11",
8399         [12] = "r12",
8400         [13] = "r13",
8401         [14] = "r14",
8402         [15] = "r15",
8403 #else
8404         [R_EAX] = "eax",
8405         [R_EBX] = "ebx",
8406         [R_ECX] = "ecx",
8407         [R_EDX] = "edx",
8408         [R_ESI] = "esi",
8409         [R_EDI] = "edi",
8410         [R_EBP] = "ebp",
8411         [R_ESP] = "esp",
8412 #endif
8413     };
8414     static const char seg_base_names[6][8] = {
8415         [R_CS] = "cs_base",
8416         [R_DS] = "ds_base",
8417         [R_ES] = "es_base",
8418         [R_FS] = "fs_base",
8419         [R_GS] = "gs_base",
8420         [R_SS] = "ss_base",
8421     };
8422     static const char bnd_regl_names[4][8] = {
8423         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8424     };
8425     static const char bnd_regu_names[4][8] = {
8426         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8427     };
8428     int i;
8429 
8430     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8431                                        offsetof(CPUX86State, cc_op), "cc_op");
8432     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8433                                     "cc_dst");
8434     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8435                                     "cc_src");
8436     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8437                                      "cc_src2");
8438 
8439     for (i = 0; i < CPU_NB_REGS; ++i) {
8440         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8441                                          offsetof(CPUX86State, regs[i]),
8442                                          reg_names[i]);
8443     }
8444 
8445     for (i = 0; i < 6; ++i) {
8446         cpu_seg_base[i]
8447             = tcg_global_mem_new(cpu_env,
8448                                  offsetof(CPUX86State, segs[i].base),
8449                                  seg_base_names[i]);
8450     }
8451 
8452     for (i = 0; i < 4; ++i) {
8453         cpu_bndl[i]
8454             = tcg_global_mem_new_i64(cpu_env,
8455                                      offsetof(CPUX86State, bnd_regs[i].lb),
8456                                      bnd_regl_names[i]);
8457         cpu_bndu[i]
8458             = tcg_global_mem_new_i64(cpu_env,
8459                                      offsetof(CPUX86State, bnd_regs[i].ub),
8460                                      bnd_regu_names[i]);
8461     }
8462 }
8463 
8464 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8465 {
8466     DisasContext *dc = container_of(dcbase, DisasContext, base);
8467     CPUX86State *env = cpu->env_ptr;
8468     uint32_t flags = dc->base.tb->flags;
8469     target_ulong cs_base = dc->base.tb->cs_base;
8470 
8471     dc->pe = (flags >> HF_PE_SHIFT) & 1;
8472     dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
8473     dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
8474     dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1;
8475     dc->f_st = 0;
8476     dc->vm86 = (flags >> VM_SHIFT) & 1;
8477     dc->cpl = (flags >> HF_CPL_SHIFT) & 3;
8478     dc->iopl = (flags >> IOPL_SHIFT) & 3;
8479     dc->tf = (flags >> TF_SHIFT) & 1;
8480     dc->cc_op = CC_OP_DYNAMIC;
8481     dc->cc_op_dirty = false;
8482     dc->cs_base = cs_base;
8483     dc->popl_esp_hack = 0;
8484     /* select memory access functions */
8485     dc->mem_index = 0;
8486 #ifdef CONFIG_SOFTMMU
8487     dc->mem_index = cpu_mmu_index(env, false);
8488 #endif
8489     dc->cpuid_features = env->features[FEAT_1_EDX];
8490     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8491     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8492     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8493     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8494     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8495 #ifdef TARGET_X86_64
8496     dc->lma = (flags >> HF_LMA_SHIFT) & 1;
8497     dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
8498 #endif
8499     dc->flags = flags;
8500     dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled ||
8501                     (flags & HF_INHIBIT_IRQ_MASK));
8502     /* Do not optimize repz jumps at all in icount mode, because
8503        rep movsS instructions are execured with different paths
8504        in !repz_opt and repz_opt modes. The first one was used
8505        always except single step mode. And this setting
8506        disables jumps optimization and control paths become
8507        equivalent in run and single step modes.
8508        Now there will be no jump optimization for repz in
8509        record/replay modes and there will always be an
8510        additional step for ecx=0 when icount is enabled.
8511      */
8512     dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8513 #if 0
8514     /* check addseg logic */
8515     if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32))
8516         printf("ERROR addseg\n");
8517 #endif
8518 
8519     dc->T0 = tcg_temp_new();
8520     dc->T1 = tcg_temp_new();
8521     dc->A0 = tcg_temp_new();
8522 
8523     dc->tmp0 = tcg_temp_new();
8524     dc->tmp1_i64 = tcg_temp_new_i64();
8525     dc->tmp2_i32 = tcg_temp_new_i32();
8526     dc->tmp3_i32 = tcg_temp_new_i32();
8527     dc->tmp4 = tcg_temp_new();
8528     dc->ptr0 = tcg_temp_new_ptr();
8529     dc->ptr1 = tcg_temp_new_ptr();
8530     dc->cc_srcT = tcg_temp_local_new();
8531 }
8532 
8533 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8534 {
8535 }
8536 
8537 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8538 {
8539     DisasContext *dc = container_of(dcbase, DisasContext, base);
8540 
8541     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8542 }
8543 
8544 static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8545                                      const CPUBreakpoint *bp)
8546 {
8547     DisasContext *dc = container_of(dcbase, DisasContext, base);
8548     /* If RF is set, suppress an internally generated breakpoint.  */
8549     int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8550     if (bp->flags & flags) {
8551         gen_debug(dc, dc->base.pc_next - dc->cs_base);
8552         dc->base.is_jmp = DISAS_NORETURN;
8553         /* The address covered by the breakpoint must be included in
8554            [tb->pc, tb->pc + tb->size) in order to for it to be
8555            properly cleared -- thus we increment the PC here so that
8556            the generic logic setting tb->size later does the right thing.  */
8557         dc->base.pc_next += 1;
8558         return true;
8559     } else {
8560         return false;
8561     }
8562 }
8563 
8564 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8565 {
8566     DisasContext *dc = container_of(dcbase, DisasContext, base);
8567     target_ulong pc_next;
8568 
8569 #ifdef TARGET_VSYSCALL_PAGE
8570     /*
8571      * Detect entry into the vsyscall page and invoke the syscall.
8572      */
8573     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8574         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8575         return;
8576     }
8577 #endif
8578 
8579     pc_next = disas_insn(dc, cpu);
8580 
8581     if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) {
8582         /* if single step mode, we generate only one instruction and
8583            generate an exception */
8584         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8585            the flag and abort the translation to give the irqs a
8586            chance to happen */
8587         dc->base.is_jmp = DISAS_TOO_MANY;
8588     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8589                && ((pc_next & TARGET_PAGE_MASK)
8590                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8591                        & TARGET_PAGE_MASK)
8592                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8593         /* Do not cross the boundary of the pages in icount mode,
8594            it can cause an exception. Do it only when boundary is
8595            crossed by the first instruction in the block.
8596            If current instruction already crossed the bound - it's ok,
8597            because an exception hasn't stopped this code.
8598          */
8599         dc->base.is_jmp = DISAS_TOO_MANY;
8600     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8601         dc->base.is_jmp = DISAS_TOO_MANY;
8602     }
8603 
8604     dc->base.pc_next = pc_next;
8605 }
8606 
8607 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8608 {
8609     DisasContext *dc = container_of(dcbase, DisasContext, base);
8610 
8611     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8612         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8613         gen_eob(dc);
8614     }
8615 }
8616 
8617 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8618                               CPUState *cpu)
8619 {
8620     DisasContext *dc = container_of(dcbase, DisasContext, base);
8621 
8622     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8623     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8624 }
8625 
8626 static const TranslatorOps i386_tr_ops = {
8627     .init_disas_context = i386_tr_init_disas_context,
8628     .tb_start           = i386_tr_tb_start,
8629     .insn_start         = i386_tr_insn_start,
8630     .breakpoint_check   = i386_tr_breakpoint_check,
8631     .translate_insn     = i386_tr_translate_insn,
8632     .tb_stop            = i386_tr_tb_stop,
8633     .disas_log          = i386_tr_disas_log,
8634 };
8635 
8636 /* generate intermediate code for basic block 'tb'.  */
8637 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8638 {
8639     DisasContext dc;
8640 
8641     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8642 }
8643 
8644 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8645                           target_ulong *data)
8646 {
8647     int cc_op = data[1];
8648     env->eip = data[0] - tb->cs_base;
8649     if (cc_op != CC_OP_DYNAMIC) {
8650         env->cc_op = cc_op;
8651     }
8652 }
8653