xref: /openbmc/qemu/target/i386/tcg/translate.c (revision 812b31d3)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "trace-tcg.h"
34 #include "exec/log.h"
35 
36 #define PREFIX_REPZ   0x01
37 #define PREFIX_REPNZ  0x02
38 #define PREFIX_LOCK   0x04
39 #define PREFIX_DATA   0x08
40 #define PREFIX_ADR    0x10
41 #define PREFIX_VEX    0x20
42 #define PREFIX_REX    0x40
43 
44 #ifdef TARGET_X86_64
45 # define ctztl  ctz64
46 # define clztl  clz64
47 #else
48 # define ctztl  ctz32
49 # define clztl  clz32
50 #endif
51 
52 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
53 #define CASE_MODRM_MEM_OP(OP) \
54     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
55     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
56     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
57 
58 #define CASE_MODRM_OP(OP) \
59     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
60     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
61     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
62     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
63 
64 //#define MACRO_TEST   1
65 
66 /* global register indexes */
67 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
68 static TCGv_i32 cpu_cc_op;
69 static TCGv cpu_regs[CPU_NB_REGS];
70 static TCGv cpu_seg_base[6];
71 static TCGv_i64 cpu_bndl[4];
72 static TCGv_i64 cpu_bndu[4];
73 
74 #include "exec/gen-icount.h"
75 
76 typedef struct DisasContext {
77     DisasContextBase base;
78 
79     target_ulong pc;       /* pc = eip + cs_base */
80     target_ulong pc_start; /* pc at TB entry */
81     target_ulong cs_base;  /* base of CS segment */
82 
83     MemOp aflag;
84     MemOp dflag;
85 
86     int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
87     uint8_t prefix;
88 
89 #ifndef CONFIG_USER_ONLY
90     uint8_t cpl;   /* code priv level */
91     uint8_t iopl;  /* i/o priv level */
92 #endif
93     uint8_t vex_l;  /* vex vector length */
94     uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
95     uint8_t popl_esp_hack; /* for correct popl with esp base handling */
96     uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
97 
98 #ifdef TARGET_X86_64
99     uint8_t rex_r;
100     uint8_t rex_x;
101     uint8_t rex_b;
102     bool rex_w;
103 #endif
104     bool jmp_opt; /* use direct block chaining for direct jumps */
105     bool repz_opt; /* optimize jumps within repz instructions */
106     bool cc_op_dirty;
107 
108     CCOp cc_op;  /* current CC operation */
109     int mem_index; /* select memory access functions */
110     uint32_t flags; /* all execution flags */
111     int cpuid_features;
112     int cpuid_ext_features;
113     int cpuid_ext2_features;
114     int cpuid_ext3_features;
115     int cpuid_7_0_ebx_features;
116     int cpuid_xsave_features;
117 
118     /* TCG local temps */
119     TCGv cc_srcT;
120     TCGv A0;
121     TCGv T0;
122     TCGv T1;
123 
124     /* TCG local register indexes (only used inside old micro ops) */
125     TCGv tmp0;
126     TCGv tmp4;
127     TCGv_ptr ptr0;
128     TCGv_ptr ptr1;
129     TCGv_i32 tmp2_i32;
130     TCGv_i32 tmp3_i32;
131     TCGv_i64 tmp1_i64;
132 
133     sigjmp_buf jmpbuf;
134 } DisasContext;
135 
136 /* The environment in which user-only runs is constrained. */
137 #ifdef CONFIG_USER_ONLY
138 #define PE(S)     true
139 #define CPL(S)    3
140 #define IOPL(S)   0
141 #define SVME(S)   false
142 #define GUEST(S)  false
143 #else
144 #define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
145 #define CPL(S)    ((S)->cpl)
146 #define IOPL(S)   ((S)->iopl)
147 #define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
148 #define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
149 #endif
150 #if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
151 #define VM86(S)   false
152 #define CODE32(S) true
153 #define SS32(S)   true
154 #define ADDSEG(S) false
155 #else
156 #define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
157 #define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
158 #define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
159 #define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
160 #endif
161 #if !defined(TARGET_X86_64)
162 #define CODE64(S) false
163 #define LMA(S)    false
164 #elif defined(CONFIG_USER_ONLY)
165 #define CODE64(S) true
166 #define LMA(S)    true
167 #else
168 #define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
169 #define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
170 #endif
171 
172 #ifdef TARGET_X86_64
173 #define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
174 #define REX_W(S)       ((S)->rex_w)
175 #define REX_R(S)       ((S)->rex_r + 0)
176 #define REX_X(S)       ((S)->rex_x + 0)
177 #define REX_B(S)       ((S)->rex_b + 0)
178 #else
179 #define REX_PREFIX(S)  false
180 #define REX_W(S)       false
181 #define REX_R(S)       0
182 #define REX_X(S)       0
183 #define REX_B(S)       0
184 #endif
185 
186 /*
187  * Many sysemu-only helpers are not reachable for user-only.
188  * Define stub generators here, so that we need not either sprinkle
189  * ifdefs through the translator, nor provide the helper function.
190  */
191 #define STUB_HELPER(NAME, ...) \
192     static inline void gen_helper_##NAME(__VA_ARGS__) \
193     { qemu_build_not_reached(); }
194 
195 #ifdef CONFIG_USER_ONLY
196 STUB_HELPER(clgi, TCGv_env env)
197 STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
198 STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
199 STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
200 STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
201 STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
202 STUB_HELPER(monitor, TCGv_env env, TCGv addr)
203 STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
204 STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
205 STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
206 STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
207 STUB_HELPER(rdmsr, TCGv_env env)
208 STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
209 STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
210 STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
211 STUB_HELPER(stgi, TCGv_env env)
212 STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
213 STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
214 STUB_HELPER(vmmcall, TCGv_env env)
215 STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
216 STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
217 STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
218 STUB_HELPER(wrmsr, TCGv_env env)
219 #endif
220 
221 static void gen_eob(DisasContext *s);
222 static void gen_jr(DisasContext *s, TCGv dest);
223 static void gen_jmp(DisasContext *s, target_ulong eip);
224 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
225 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
226 static void gen_exception_gpf(DisasContext *s);
227 
228 /* i386 arith/logic operations */
229 enum {
230     OP_ADDL,
231     OP_ORL,
232     OP_ADCL,
233     OP_SBBL,
234     OP_ANDL,
235     OP_SUBL,
236     OP_XORL,
237     OP_CMPL,
238 };
239 
240 /* i386 shift ops */
241 enum {
242     OP_ROL,
243     OP_ROR,
244     OP_RCL,
245     OP_RCR,
246     OP_SHL,
247     OP_SHR,
248     OP_SHL1, /* undocumented */
249     OP_SAR = 7,
250 };
251 
252 enum {
253     JCC_O,
254     JCC_B,
255     JCC_Z,
256     JCC_BE,
257     JCC_S,
258     JCC_P,
259     JCC_L,
260     JCC_LE,
261 };
262 
263 enum {
264     /* I386 int registers */
265     OR_EAX,   /* MUST be even numbered */
266     OR_ECX,
267     OR_EDX,
268     OR_EBX,
269     OR_ESP,
270     OR_EBP,
271     OR_ESI,
272     OR_EDI,
273 
274     OR_TMP0 = 16,    /* temporary operand register */
275     OR_TMP1,
276     OR_A0, /* temporary register used when doing address evaluation */
277 };
278 
279 enum {
280     USES_CC_DST  = 1,
281     USES_CC_SRC  = 2,
282     USES_CC_SRC2 = 4,
283     USES_CC_SRCT = 8,
284 };
285 
286 /* Bit set if the global variable is live after setting CC_OP to X.  */
287 static const uint8_t cc_op_live[CC_OP_NB] = {
288     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
289     [CC_OP_EFLAGS] = USES_CC_SRC,
290     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
291     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
292     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
293     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
294     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
295     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
296     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
297     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
298     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
299     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
300     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
301     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
302     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
303     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
304     [CC_OP_CLR] = 0,
305     [CC_OP_POPCNT] = USES_CC_SRC,
306 };
307 
308 static void set_cc_op(DisasContext *s, CCOp op)
309 {
310     int dead;
311 
312     if (s->cc_op == op) {
313         return;
314     }
315 
316     /* Discard CC computation that will no longer be used.  */
317     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
318     if (dead & USES_CC_DST) {
319         tcg_gen_discard_tl(cpu_cc_dst);
320     }
321     if (dead & USES_CC_SRC) {
322         tcg_gen_discard_tl(cpu_cc_src);
323     }
324     if (dead & USES_CC_SRC2) {
325         tcg_gen_discard_tl(cpu_cc_src2);
326     }
327     if (dead & USES_CC_SRCT) {
328         tcg_gen_discard_tl(s->cc_srcT);
329     }
330 
331     if (op == CC_OP_DYNAMIC) {
332         /* The DYNAMIC setting is translator only, and should never be
333            stored.  Thus we always consider it clean.  */
334         s->cc_op_dirty = false;
335     } else {
336         /* Discard any computed CC_OP value (see shifts).  */
337         if (s->cc_op == CC_OP_DYNAMIC) {
338             tcg_gen_discard_i32(cpu_cc_op);
339         }
340         s->cc_op_dirty = true;
341     }
342     s->cc_op = op;
343 }
344 
345 static void gen_update_cc_op(DisasContext *s)
346 {
347     if (s->cc_op_dirty) {
348         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
349         s->cc_op_dirty = false;
350     }
351 }
352 
353 #ifdef TARGET_X86_64
354 
355 #define NB_OP_SIZES 4
356 
357 #else /* !TARGET_X86_64 */
358 
359 #define NB_OP_SIZES 3
360 
361 #endif /* !TARGET_X86_64 */
362 
363 #if defined(HOST_WORDS_BIGENDIAN)
364 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
365 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
366 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
367 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
368 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
369 #else
370 #define REG_B_OFFSET 0
371 #define REG_H_OFFSET 1
372 #define REG_W_OFFSET 0
373 #define REG_L_OFFSET 0
374 #define REG_LH_OFFSET 4
375 #endif
376 
377 /* In instruction encodings for byte register accesses the
378  * register number usually indicates "low 8 bits of register N";
379  * however there are some special cases where N 4..7 indicates
380  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
381  * true for this special case, false otherwise.
382  */
383 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
384 {
385     /* Any time the REX prefix is present, byte registers are uniform */
386     if (reg < 4 || REX_PREFIX(s)) {
387         return false;
388     }
389     return true;
390 }
391 
392 /* Select the size of a push/pop operation.  */
393 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
394 {
395     if (CODE64(s)) {
396         return ot == MO_16 ? MO_16 : MO_64;
397     } else {
398         return ot;
399     }
400 }
401 
402 /* Select the size of the stack pointer.  */
403 static inline MemOp mo_stacksize(DisasContext *s)
404 {
405     return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
406 }
407 
408 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
409 static inline MemOp mo_64_32(MemOp ot)
410 {
411 #ifdef TARGET_X86_64
412     return ot == MO_64 ? MO_64 : MO_32;
413 #else
414     return MO_32;
415 #endif
416 }
417 
418 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
419    byte vs word opcodes.  */
420 static inline MemOp mo_b_d(int b, MemOp ot)
421 {
422     return b & 1 ? ot : MO_8;
423 }
424 
425 /* Select size 8 if lsb of B is clear, else OT capped at 32.
426    Used for decoding operand size of port opcodes.  */
427 static inline MemOp mo_b_d32(int b, MemOp ot)
428 {
429     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
430 }
431 
432 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
433 {
434     switch(ot) {
435     case MO_8:
436         if (!byte_reg_is_xH(s, reg)) {
437             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
438         } else {
439             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
440         }
441         break;
442     case MO_16:
443         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
444         break;
445     case MO_32:
446         /* For x86_64, this sets the higher half of register to zero.
447            For i386, this is equivalent to a mov. */
448         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
449         break;
450 #ifdef TARGET_X86_64
451     case MO_64:
452         tcg_gen_mov_tl(cpu_regs[reg], t0);
453         break;
454 #endif
455     default:
456         tcg_abort();
457     }
458 }
459 
460 static inline
461 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
462 {
463     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
464         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
465     } else {
466         tcg_gen_mov_tl(t0, cpu_regs[reg]);
467     }
468 }
469 
470 static void gen_add_A0_im(DisasContext *s, int val)
471 {
472     tcg_gen_addi_tl(s->A0, s->A0, val);
473     if (!CODE64(s)) {
474         tcg_gen_ext32u_tl(s->A0, s->A0);
475     }
476 }
477 
478 static inline void gen_op_jmp_v(TCGv dest)
479 {
480     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
481 }
482 
483 static inline
484 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
485 {
486     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
487     gen_op_mov_reg_v(s, size, reg, s->tmp0);
488 }
489 
490 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
491 {
492     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
493     gen_op_mov_reg_v(s, size, reg, s->tmp0);
494 }
495 
496 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
497 {
498     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
499 }
500 
501 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
502 {
503     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
504 }
505 
506 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
507 {
508     if (d == OR_TMP0) {
509         gen_op_st_v(s, idx, s->T0, s->A0);
510     } else {
511         gen_op_mov_reg_v(s, idx, d, s->T0);
512     }
513 }
514 
515 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
516 {
517     tcg_gen_movi_tl(s->tmp0, pc);
518     gen_op_jmp_v(s->tmp0);
519 }
520 
521 /* Compute SEG:REG into A0.  SEG is selected from the override segment
522    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
523    indicate no override.  */
524 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
525                           int def_seg, int ovr_seg)
526 {
527     switch (aflag) {
528 #ifdef TARGET_X86_64
529     case MO_64:
530         if (ovr_seg < 0) {
531             tcg_gen_mov_tl(s->A0, a0);
532             return;
533         }
534         break;
535 #endif
536     case MO_32:
537         /* 32 bit address */
538         if (ovr_seg < 0 && ADDSEG(s)) {
539             ovr_seg = def_seg;
540         }
541         if (ovr_seg < 0) {
542             tcg_gen_ext32u_tl(s->A0, a0);
543             return;
544         }
545         break;
546     case MO_16:
547         /* 16 bit address */
548         tcg_gen_ext16u_tl(s->A0, a0);
549         a0 = s->A0;
550         if (ovr_seg < 0) {
551             if (ADDSEG(s)) {
552                 ovr_seg = def_seg;
553             } else {
554                 return;
555             }
556         }
557         break;
558     default:
559         tcg_abort();
560     }
561 
562     if (ovr_seg >= 0) {
563         TCGv seg = cpu_seg_base[ovr_seg];
564 
565         if (aflag == MO_64) {
566             tcg_gen_add_tl(s->A0, a0, seg);
567         } else if (CODE64(s)) {
568             tcg_gen_ext32u_tl(s->A0, a0);
569             tcg_gen_add_tl(s->A0, s->A0, seg);
570         } else {
571             tcg_gen_add_tl(s->A0, a0, seg);
572             tcg_gen_ext32u_tl(s->A0, s->A0);
573         }
574     }
575 }
576 
577 static inline void gen_string_movl_A0_ESI(DisasContext *s)
578 {
579     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
580 }
581 
582 static inline void gen_string_movl_A0_EDI(DisasContext *s)
583 {
584     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
585 }
586 
587 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
588 {
589     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
590     tcg_gen_shli_tl(s->T0, s->T0, ot);
591 };
592 
593 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
594 {
595     switch (size) {
596     case MO_8:
597         if (sign) {
598             tcg_gen_ext8s_tl(dst, src);
599         } else {
600             tcg_gen_ext8u_tl(dst, src);
601         }
602         return dst;
603     case MO_16:
604         if (sign) {
605             tcg_gen_ext16s_tl(dst, src);
606         } else {
607             tcg_gen_ext16u_tl(dst, src);
608         }
609         return dst;
610 #ifdef TARGET_X86_64
611     case MO_32:
612         if (sign) {
613             tcg_gen_ext32s_tl(dst, src);
614         } else {
615             tcg_gen_ext32u_tl(dst, src);
616         }
617         return dst;
618 #endif
619     default:
620         return src;
621     }
622 }
623 
624 static void gen_extu(MemOp ot, TCGv reg)
625 {
626     gen_ext_tl(reg, reg, ot, false);
627 }
628 
629 static void gen_exts(MemOp ot, TCGv reg)
630 {
631     gen_ext_tl(reg, reg, ot, true);
632 }
633 
634 static inline
635 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
636 {
637     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
638     gen_extu(size, s->tmp0);
639     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
640 }
641 
642 static inline
643 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
644 {
645     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
646     gen_extu(size, s->tmp0);
647     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
648 }
649 
650 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
651 {
652     switch (ot) {
653     case MO_8:
654         gen_helper_inb(v, cpu_env, n);
655         break;
656     case MO_16:
657         gen_helper_inw(v, cpu_env, n);
658         break;
659     case MO_32:
660         gen_helper_inl(v, cpu_env, n);
661         break;
662     default:
663         tcg_abort();
664     }
665 }
666 
667 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
668 {
669     switch (ot) {
670     case MO_8:
671         gen_helper_outb(cpu_env, v, n);
672         break;
673     case MO_16:
674         gen_helper_outw(cpu_env, v, n);
675         break;
676     case MO_32:
677         gen_helper_outl(cpu_env, v, n);
678         break;
679     default:
680         tcg_abort();
681     }
682 }
683 
684 /*
685  * Validate that access to [port, port + 1<<ot) is allowed.
686  * Raise #GP, or VMM exit if not.
687  */
688 static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
689                          uint32_t svm_flags)
690 {
691 #ifdef CONFIG_USER_ONLY
692     /*
693      * We do not implement the ioperm(2) syscall, so the TSS check
694      * will always fail.
695      */
696     gen_exception_gpf(s);
697     return false;
698 #else
699     if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
700         gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
701     }
702     if (GUEST(s)) {
703         target_ulong cur_eip = s->base.pc_next - s->cs_base;
704         target_ulong next_eip = s->pc - s->cs_base;
705 
706         gen_update_cc_op(s);
707         gen_jmp_im(s, cur_eip);
708         if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
709             svm_flags |= SVM_IOIO_REP_MASK;
710         }
711         svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
712         gen_helper_svm_check_io(cpu_env, port,
713                                 tcg_constant_i32(svm_flags),
714                                 tcg_constant_i32(next_eip - cur_eip));
715     }
716     return true;
717 #endif
718 }
719 
720 static inline void gen_movs(DisasContext *s, MemOp ot)
721 {
722     gen_string_movl_A0_ESI(s);
723     gen_op_ld_v(s, ot, s->T0, s->A0);
724     gen_string_movl_A0_EDI(s);
725     gen_op_st_v(s, ot, s->T0, s->A0);
726     gen_op_movl_T0_Dshift(s, ot);
727     gen_op_add_reg_T0(s, s->aflag, R_ESI);
728     gen_op_add_reg_T0(s, s->aflag, R_EDI);
729 }
730 
731 static void gen_op_update1_cc(DisasContext *s)
732 {
733     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
734 }
735 
736 static void gen_op_update2_cc(DisasContext *s)
737 {
738     tcg_gen_mov_tl(cpu_cc_src, s->T1);
739     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
740 }
741 
742 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
743 {
744     tcg_gen_mov_tl(cpu_cc_src2, reg);
745     tcg_gen_mov_tl(cpu_cc_src, s->T1);
746     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
747 }
748 
749 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
750 {
751     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
752 }
753 
754 static void gen_op_update_neg_cc(DisasContext *s)
755 {
756     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
757     tcg_gen_neg_tl(cpu_cc_src, s->T0);
758     tcg_gen_movi_tl(s->cc_srcT, 0);
759 }
760 
761 /* compute all eflags to cc_src */
762 static void gen_compute_eflags(DisasContext *s)
763 {
764     TCGv zero, dst, src1, src2;
765     int live, dead;
766 
767     if (s->cc_op == CC_OP_EFLAGS) {
768         return;
769     }
770     if (s->cc_op == CC_OP_CLR) {
771         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
772         set_cc_op(s, CC_OP_EFLAGS);
773         return;
774     }
775 
776     zero = NULL;
777     dst = cpu_cc_dst;
778     src1 = cpu_cc_src;
779     src2 = cpu_cc_src2;
780 
781     /* Take care to not read values that are not live.  */
782     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
783     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
784     if (dead) {
785         zero = tcg_const_tl(0);
786         if (dead & USES_CC_DST) {
787             dst = zero;
788         }
789         if (dead & USES_CC_SRC) {
790             src1 = zero;
791         }
792         if (dead & USES_CC_SRC2) {
793             src2 = zero;
794         }
795     }
796 
797     gen_update_cc_op(s);
798     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
799     set_cc_op(s, CC_OP_EFLAGS);
800 
801     if (dead) {
802         tcg_temp_free(zero);
803     }
804 }
805 
806 typedef struct CCPrepare {
807     TCGCond cond;
808     TCGv reg;
809     TCGv reg2;
810     target_ulong imm;
811     target_ulong mask;
812     bool use_reg2;
813     bool no_setcond;
814 } CCPrepare;
815 
816 /* compute eflags.C to reg */
817 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
818 {
819     TCGv t0, t1;
820     int size, shift;
821 
822     switch (s->cc_op) {
823     case CC_OP_SUBB ... CC_OP_SUBQ:
824         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
825         size = s->cc_op - CC_OP_SUBB;
826         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
827         /* If no temporary was used, be careful not to alias t1 and t0.  */
828         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
829         tcg_gen_mov_tl(t0, s->cc_srcT);
830         gen_extu(size, t0);
831         goto add_sub;
832 
833     case CC_OP_ADDB ... CC_OP_ADDQ:
834         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
835         size = s->cc_op - CC_OP_ADDB;
836         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
837         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
838     add_sub:
839         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
840                              .reg2 = t1, .mask = -1, .use_reg2 = true };
841 
842     case CC_OP_LOGICB ... CC_OP_LOGICQ:
843     case CC_OP_CLR:
844     case CC_OP_POPCNT:
845         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
846 
847     case CC_OP_INCB ... CC_OP_INCQ:
848     case CC_OP_DECB ... CC_OP_DECQ:
849         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
850                              .mask = -1, .no_setcond = true };
851 
852     case CC_OP_SHLB ... CC_OP_SHLQ:
853         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
854         size = s->cc_op - CC_OP_SHLB;
855         shift = (8 << size) - 1;
856         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
857                              .mask = (target_ulong)1 << shift };
858 
859     case CC_OP_MULB ... CC_OP_MULQ:
860         return (CCPrepare) { .cond = TCG_COND_NE,
861                              .reg = cpu_cc_src, .mask = -1 };
862 
863     case CC_OP_BMILGB ... CC_OP_BMILGQ:
864         size = s->cc_op - CC_OP_BMILGB;
865         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
866         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
867 
868     case CC_OP_ADCX:
869     case CC_OP_ADCOX:
870         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
871                              .mask = -1, .no_setcond = true };
872 
873     case CC_OP_EFLAGS:
874     case CC_OP_SARB ... CC_OP_SARQ:
875         /* CC_SRC & 1 */
876         return (CCPrepare) { .cond = TCG_COND_NE,
877                              .reg = cpu_cc_src, .mask = CC_C };
878 
879     default:
880        /* The need to compute only C from CC_OP_DYNAMIC is important
881           in efficiently implementing e.g. INC at the start of a TB.  */
882        gen_update_cc_op(s);
883        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
884                                cpu_cc_src2, cpu_cc_op);
885        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
886                             .mask = -1, .no_setcond = true };
887     }
888 }
889 
890 /* compute eflags.P to reg */
891 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
892 {
893     gen_compute_eflags(s);
894     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
895                          .mask = CC_P };
896 }
897 
898 /* compute eflags.S to reg */
899 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
900 {
901     switch (s->cc_op) {
902     case CC_OP_DYNAMIC:
903         gen_compute_eflags(s);
904         /* FALLTHRU */
905     case CC_OP_EFLAGS:
906     case CC_OP_ADCX:
907     case CC_OP_ADOX:
908     case CC_OP_ADCOX:
909         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
910                              .mask = CC_S };
911     case CC_OP_CLR:
912     case CC_OP_POPCNT:
913         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
914     default:
915         {
916             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
917             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
918             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
919         }
920     }
921 }
922 
923 /* compute eflags.O to reg */
924 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
925 {
926     switch (s->cc_op) {
927     case CC_OP_ADOX:
928     case CC_OP_ADCOX:
929         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
930                              .mask = -1, .no_setcond = true };
931     case CC_OP_CLR:
932     case CC_OP_POPCNT:
933         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
934     default:
935         gen_compute_eflags(s);
936         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
937                              .mask = CC_O };
938     }
939 }
940 
941 /* compute eflags.Z to reg */
942 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
943 {
944     switch (s->cc_op) {
945     case CC_OP_DYNAMIC:
946         gen_compute_eflags(s);
947         /* FALLTHRU */
948     case CC_OP_EFLAGS:
949     case CC_OP_ADCX:
950     case CC_OP_ADOX:
951     case CC_OP_ADCOX:
952         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
953                              .mask = CC_Z };
954     case CC_OP_CLR:
955         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
956     case CC_OP_POPCNT:
957         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
958                              .mask = -1 };
959     default:
960         {
961             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
962             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
963             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
964         }
965     }
966 }
967 
968 /* perform a conditional store into register 'reg' according to jump opcode
969    value 'b'. In the fast case, T0 is guaranted not to be used. */
970 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
971 {
972     int inv, jcc_op, cond;
973     MemOp size;
974     CCPrepare cc;
975     TCGv t0;
976 
977     inv = b & 1;
978     jcc_op = (b >> 1) & 7;
979 
980     switch (s->cc_op) {
981     case CC_OP_SUBB ... CC_OP_SUBQ:
982         /* We optimize relational operators for the cmp/jcc case.  */
983         size = s->cc_op - CC_OP_SUBB;
984         switch (jcc_op) {
985         case JCC_BE:
986             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
987             gen_extu(size, s->tmp4);
988             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
989             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
990                                .reg2 = t0, .mask = -1, .use_reg2 = true };
991             break;
992 
993         case JCC_L:
994             cond = TCG_COND_LT;
995             goto fast_jcc_l;
996         case JCC_LE:
997             cond = TCG_COND_LE;
998         fast_jcc_l:
999             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
1000             gen_exts(size, s->tmp4);
1001             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1002             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1003                                .reg2 = t0, .mask = -1, .use_reg2 = true };
1004             break;
1005 
1006         default:
1007             goto slow_jcc;
1008         }
1009         break;
1010 
1011     default:
1012     slow_jcc:
1013         /* This actually generates good code for JC, JZ and JS.  */
1014         switch (jcc_op) {
1015         case JCC_O:
1016             cc = gen_prepare_eflags_o(s, reg);
1017             break;
1018         case JCC_B:
1019             cc = gen_prepare_eflags_c(s, reg);
1020             break;
1021         case JCC_Z:
1022             cc = gen_prepare_eflags_z(s, reg);
1023             break;
1024         case JCC_BE:
1025             gen_compute_eflags(s);
1026             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1027                                .mask = CC_Z | CC_C };
1028             break;
1029         case JCC_S:
1030             cc = gen_prepare_eflags_s(s, reg);
1031             break;
1032         case JCC_P:
1033             cc = gen_prepare_eflags_p(s, reg);
1034             break;
1035         case JCC_L:
1036             gen_compute_eflags(s);
1037             if (reg == cpu_cc_src) {
1038                 reg = s->tmp0;
1039             }
1040             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1041             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1042             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1043                                .mask = CC_S };
1044             break;
1045         default:
1046         case JCC_LE:
1047             gen_compute_eflags(s);
1048             if (reg == cpu_cc_src) {
1049                 reg = s->tmp0;
1050             }
1051             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1052             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1053             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1054                                .mask = CC_S | CC_Z };
1055             break;
1056         }
1057         break;
1058     }
1059 
1060     if (inv) {
1061         cc.cond = tcg_invert_cond(cc.cond);
1062     }
1063     return cc;
1064 }
1065 
1066 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1067 {
1068     CCPrepare cc = gen_prepare_cc(s, b, reg);
1069 
1070     if (cc.no_setcond) {
1071         if (cc.cond == TCG_COND_EQ) {
1072             tcg_gen_xori_tl(reg, cc.reg, 1);
1073         } else {
1074             tcg_gen_mov_tl(reg, cc.reg);
1075         }
1076         return;
1077     }
1078 
1079     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1080         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1081         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1082         tcg_gen_andi_tl(reg, reg, 1);
1083         return;
1084     }
1085     if (cc.mask != -1) {
1086         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1087         cc.reg = reg;
1088     }
1089     if (cc.use_reg2) {
1090         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1091     } else {
1092         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1093     }
1094 }
1095 
1096 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1097 {
1098     gen_setcc1(s, JCC_B << 1, reg);
1099 }
1100 
1101 /* generate a conditional jump to label 'l1' according to jump opcode
1102    value 'b'. In the fast case, T0 is guaranted not to be used. */
1103 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1104 {
1105     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1106 
1107     if (cc.mask != -1) {
1108         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1109         cc.reg = s->T0;
1110     }
1111     if (cc.use_reg2) {
1112         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1113     } else {
1114         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1115     }
1116 }
1117 
1118 /* Generate a conditional jump to label 'l1' according to jump opcode
1119    value 'b'. In the fast case, T0 is guaranted not to be used.
1120    A translation block must end soon.  */
1121 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1122 {
1123     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1124 
1125     gen_update_cc_op(s);
1126     if (cc.mask != -1) {
1127         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1128         cc.reg = s->T0;
1129     }
1130     set_cc_op(s, CC_OP_DYNAMIC);
1131     if (cc.use_reg2) {
1132         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1133     } else {
1134         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1135     }
1136 }
1137 
1138 /* XXX: does not work with gdbstub "ice" single step - not a
1139    serious problem */
1140 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1141 {
1142     TCGLabel *l1 = gen_new_label();
1143     TCGLabel *l2 = gen_new_label();
1144     gen_op_jnz_ecx(s, s->aflag, l1);
1145     gen_set_label(l2);
1146     gen_jmp_tb(s, next_eip, 1);
1147     gen_set_label(l1);
1148     return l2;
1149 }
1150 
1151 static inline void gen_stos(DisasContext *s, MemOp ot)
1152 {
1153     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1154     gen_string_movl_A0_EDI(s);
1155     gen_op_st_v(s, ot, s->T0, s->A0);
1156     gen_op_movl_T0_Dshift(s, ot);
1157     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1158 }
1159 
1160 static inline void gen_lods(DisasContext *s, MemOp ot)
1161 {
1162     gen_string_movl_A0_ESI(s);
1163     gen_op_ld_v(s, ot, s->T0, s->A0);
1164     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1165     gen_op_movl_T0_Dshift(s, ot);
1166     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1167 }
1168 
1169 static inline void gen_scas(DisasContext *s, MemOp ot)
1170 {
1171     gen_string_movl_A0_EDI(s);
1172     gen_op_ld_v(s, ot, s->T1, s->A0);
1173     gen_op(s, OP_CMPL, ot, R_EAX);
1174     gen_op_movl_T0_Dshift(s, ot);
1175     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1176 }
1177 
1178 static inline void gen_cmps(DisasContext *s, MemOp ot)
1179 {
1180     gen_string_movl_A0_EDI(s);
1181     gen_op_ld_v(s, ot, s->T1, s->A0);
1182     gen_string_movl_A0_ESI(s);
1183     gen_op(s, OP_CMPL, ot, OR_TMP0);
1184     gen_op_movl_T0_Dshift(s, ot);
1185     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1186     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1187 }
1188 
1189 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1190 {
1191     if (s->flags & HF_IOBPT_MASK) {
1192 #ifdef CONFIG_USER_ONLY
1193         /* user-mode cpu should not be in IOBPT mode */
1194         g_assert_not_reached();
1195 #else
1196         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1197         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1198 
1199         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1200         tcg_temp_free_i32(t_size);
1201         tcg_temp_free(t_next);
1202 #endif /* CONFIG_USER_ONLY */
1203     }
1204 }
1205 
1206 static inline void gen_ins(DisasContext *s, MemOp ot)
1207 {
1208     gen_string_movl_A0_EDI(s);
1209     /* Note: we must do this dummy write first to be restartable in
1210        case of page fault. */
1211     tcg_gen_movi_tl(s->T0, 0);
1212     gen_op_st_v(s, ot, s->T0, s->A0);
1213     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1214     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1215     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1216     gen_op_st_v(s, ot, s->T0, s->A0);
1217     gen_op_movl_T0_Dshift(s, ot);
1218     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1219     gen_bpt_io(s, s->tmp2_i32, ot);
1220 }
1221 
1222 static inline void gen_outs(DisasContext *s, MemOp ot)
1223 {
1224     gen_string_movl_A0_ESI(s);
1225     gen_op_ld_v(s, ot, s->T0, s->A0);
1226 
1227     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1228     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1229     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1230     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1231     gen_op_movl_T0_Dshift(s, ot);
1232     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1233     gen_bpt_io(s, s->tmp2_i32, ot);
1234 }
1235 
1236 /* same method as Valgrind : we generate jumps to current or next
1237    instruction */
1238 #define GEN_REPZ(op)                                                          \
1239 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1240                                  target_ulong cur_eip, target_ulong next_eip) \
1241 {                                                                             \
1242     TCGLabel *l2;                                                             \
1243     gen_update_cc_op(s);                                                      \
1244     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1245     gen_ ## op(s, ot);                                                        \
1246     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1247     /* a loop would cause two single step exceptions if ECX = 1               \
1248        before rep string_insn */                                              \
1249     if (s->repz_opt)                                                          \
1250         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1251     gen_jmp(s, cur_eip);                                                      \
1252 }
1253 
1254 #define GEN_REPZ2(op)                                                         \
1255 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1256                                    target_ulong cur_eip,                      \
1257                                    target_ulong next_eip,                     \
1258                                    int nz)                                    \
1259 {                                                                             \
1260     TCGLabel *l2;                                                             \
1261     gen_update_cc_op(s);                                                      \
1262     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1263     gen_ ## op(s, ot);                                                        \
1264     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1265     gen_update_cc_op(s);                                                      \
1266     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1267     if (s->repz_opt)                                                          \
1268         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1269     gen_jmp(s, cur_eip);                                                      \
1270 }
1271 
1272 GEN_REPZ(movs)
1273 GEN_REPZ(stos)
1274 GEN_REPZ(lods)
1275 GEN_REPZ(ins)
1276 GEN_REPZ(outs)
1277 GEN_REPZ2(scas)
1278 GEN_REPZ2(cmps)
1279 
1280 static void gen_helper_fp_arith_ST0_FT0(int op)
1281 {
1282     switch (op) {
1283     case 0:
1284         gen_helper_fadd_ST0_FT0(cpu_env);
1285         break;
1286     case 1:
1287         gen_helper_fmul_ST0_FT0(cpu_env);
1288         break;
1289     case 2:
1290         gen_helper_fcom_ST0_FT0(cpu_env);
1291         break;
1292     case 3:
1293         gen_helper_fcom_ST0_FT0(cpu_env);
1294         break;
1295     case 4:
1296         gen_helper_fsub_ST0_FT0(cpu_env);
1297         break;
1298     case 5:
1299         gen_helper_fsubr_ST0_FT0(cpu_env);
1300         break;
1301     case 6:
1302         gen_helper_fdiv_ST0_FT0(cpu_env);
1303         break;
1304     case 7:
1305         gen_helper_fdivr_ST0_FT0(cpu_env);
1306         break;
1307     }
1308 }
1309 
1310 /* NOTE the exception in "r" op ordering */
1311 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1312 {
1313     TCGv_i32 tmp = tcg_const_i32(opreg);
1314     switch (op) {
1315     case 0:
1316         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1317         break;
1318     case 1:
1319         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1320         break;
1321     case 4:
1322         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1323         break;
1324     case 5:
1325         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1326         break;
1327     case 6:
1328         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1329         break;
1330     case 7:
1331         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1332         break;
1333     }
1334 }
1335 
1336 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1337 {
1338     gen_update_cc_op(s);
1339     gen_jmp_im(s, cur_eip);
1340     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1341     s->base.is_jmp = DISAS_NORETURN;
1342 }
1343 
1344 /* Generate #UD for the current instruction.  The assumption here is that
1345    the instruction is known, but it isn't allowed in the current cpu mode.  */
1346 static void gen_illegal_opcode(DisasContext *s)
1347 {
1348     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1349 }
1350 
1351 /* Generate #GP for the current instruction. */
1352 static void gen_exception_gpf(DisasContext *s)
1353 {
1354     gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1355 }
1356 
1357 /* Check for cpl == 0; if not, raise #GP and return false. */
1358 static bool check_cpl0(DisasContext *s)
1359 {
1360     if (CPL(s) == 0) {
1361         return true;
1362     }
1363     gen_exception_gpf(s);
1364     return false;
1365 }
1366 
1367 /* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1368 static bool check_vm86_iopl(DisasContext *s)
1369 {
1370     if (!VM86(s) || IOPL(s) == 3) {
1371         return true;
1372     }
1373     gen_exception_gpf(s);
1374     return false;
1375 }
1376 
1377 /* Check for iopl allowing access; if not, raise #GP and return false. */
1378 static bool check_iopl(DisasContext *s)
1379 {
1380     if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1381         return true;
1382     }
1383     gen_exception_gpf(s);
1384     return false;
1385 }
1386 
1387 /* if d == OR_TMP0, it means memory operand (address in A0) */
1388 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1389 {
1390     if (d != OR_TMP0) {
1391         if (s1->prefix & PREFIX_LOCK) {
1392             /* Lock prefix when destination is not memory.  */
1393             gen_illegal_opcode(s1);
1394             return;
1395         }
1396         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1397     } else if (!(s1->prefix & PREFIX_LOCK)) {
1398         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1399     }
1400     switch(op) {
1401     case OP_ADCL:
1402         gen_compute_eflags_c(s1, s1->tmp4);
1403         if (s1->prefix & PREFIX_LOCK) {
1404             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1405             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1406                                         s1->mem_index, ot | MO_LE);
1407         } else {
1408             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1409             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1410             gen_op_st_rm_T0_A0(s1, ot, d);
1411         }
1412         gen_op_update3_cc(s1, s1->tmp4);
1413         set_cc_op(s1, CC_OP_ADCB + ot);
1414         break;
1415     case OP_SBBL:
1416         gen_compute_eflags_c(s1, s1->tmp4);
1417         if (s1->prefix & PREFIX_LOCK) {
1418             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1419             tcg_gen_neg_tl(s1->T0, s1->T0);
1420             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1421                                         s1->mem_index, ot | MO_LE);
1422         } else {
1423             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1424             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1425             gen_op_st_rm_T0_A0(s1, ot, d);
1426         }
1427         gen_op_update3_cc(s1, s1->tmp4);
1428         set_cc_op(s1, CC_OP_SBBB + ot);
1429         break;
1430     case OP_ADDL:
1431         if (s1->prefix & PREFIX_LOCK) {
1432             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1433                                         s1->mem_index, ot | MO_LE);
1434         } else {
1435             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1436             gen_op_st_rm_T0_A0(s1, ot, d);
1437         }
1438         gen_op_update2_cc(s1);
1439         set_cc_op(s1, CC_OP_ADDB + ot);
1440         break;
1441     case OP_SUBL:
1442         if (s1->prefix & PREFIX_LOCK) {
1443             tcg_gen_neg_tl(s1->T0, s1->T1);
1444             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1445                                         s1->mem_index, ot | MO_LE);
1446             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1447         } else {
1448             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1449             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1450             gen_op_st_rm_T0_A0(s1, ot, d);
1451         }
1452         gen_op_update2_cc(s1);
1453         set_cc_op(s1, CC_OP_SUBB + ot);
1454         break;
1455     default:
1456     case OP_ANDL:
1457         if (s1->prefix & PREFIX_LOCK) {
1458             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1459                                         s1->mem_index, ot | MO_LE);
1460         } else {
1461             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1462             gen_op_st_rm_T0_A0(s1, ot, d);
1463         }
1464         gen_op_update1_cc(s1);
1465         set_cc_op(s1, CC_OP_LOGICB + ot);
1466         break;
1467     case OP_ORL:
1468         if (s1->prefix & PREFIX_LOCK) {
1469             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1470                                        s1->mem_index, ot | MO_LE);
1471         } else {
1472             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1473             gen_op_st_rm_T0_A0(s1, ot, d);
1474         }
1475         gen_op_update1_cc(s1);
1476         set_cc_op(s1, CC_OP_LOGICB + ot);
1477         break;
1478     case OP_XORL:
1479         if (s1->prefix & PREFIX_LOCK) {
1480             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1481                                         s1->mem_index, ot | MO_LE);
1482         } else {
1483             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1484             gen_op_st_rm_T0_A0(s1, ot, d);
1485         }
1486         gen_op_update1_cc(s1);
1487         set_cc_op(s1, CC_OP_LOGICB + ot);
1488         break;
1489     case OP_CMPL:
1490         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1491         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1492         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1493         set_cc_op(s1, CC_OP_SUBB + ot);
1494         break;
1495     }
1496 }
1497 
1498 /* if d == OR_TMP0, it means memory operand (address in A0) */
1499 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1500 {
1501     if (s1->prefix & PREFIX_LOCK) {
1502         if (d != OR_TMP0) {
1503             /* Lock prefix when destination is not memory */
1504             gen_illegal_opcode(s1);
1505             return;
1506         }
1507         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1508         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1509                                     s1->mem_index, ot | MO_LE);
1510     } else {
1511         if (d != OR_TMP0) {
1512             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1513         } else {
1514             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1515         }
1516         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1517         gen_op_st_rm_T0_A0(s1, ot, d);
1518     }
1519 
1520     gen_compute_eflags_c(s1, cpu_cc_src);
1521     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1522     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1523 }
1524 
1525 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1526                             TCGv shm1, TCGv count, bool is_right)
1527 {
1528     TCGv_i32 z32, s32, oldop;
1529     TCGv z_tl;
1530 
1531     /* Store the results into the CC variables.  If we know that the
1532        variable must be dead, store unconditionally.  Otherwise we'll
1533        need to not disrupt the current contents.  */
1534     z_tl = tcg_const_tl(0);
1535     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1536         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1537                            result, cpu_cc_dst);
1538     } else {
1539         tcg_gen_mov_tl(cpu_cc_dst, result);
1540     }
1541     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1542         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1543                            shm1, cpu_cc_src);
1544     } else {
1545         tcg_gen_mov_tl(cpu_cc_src, shm1);
1546     }
1547     tcg_temp_free(z_tl);
1548 
1549     /* Get the two potential CC_OP values into temporaries.  */
1550     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1551     if (s->cc_op == CC_OP_DYNAMIC) {
1552         oldop = cpu_cc_op;
1553     } else {
1554         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1555         oldop = s->tmp3_i32;
1556     }
1557 
1558     /* Conditionally store the CC_OP value.  */
1559     z32 = tcg_const_i32(0);
1560     s32 = tcg_temp_new_i32();
1561     tcg_gen_trunc_tl_i32(s32, count);
1562     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1563     tcg_temp_free_i32(z32);
1564     tcg_temp_free_i32(s32);
1565 
1566     /* The CC_OP value is no longer predictable.  */
1567     set_cc_op(s, CC_OP_DYNAMIC);
1568 }
1569 
1570 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1571                             int is_right, int is_arith)
1572 {
1573     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1574 
1575     /* load */
1576     if (op1 == OR_TMP0) {
1577         gen_op_ld_v(s, ot, s->T0, s->A0);
1578     } else {
1579         gen_op_mov_v_reg(s, ot, s->T0, op1);
1580     }
1581 
1582     tcg_gen_andi_tl(s->T1, s->T1, mask);
1583     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1584 
1585     if (is_right) {
1586         if (is_arith) {
1587             gen_exts(ot, s->T0);
1588             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1589             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1590         } else {
1591             gen_extu(ot, s->T0);
1592             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1593             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1594         }
1595     } else {
1596         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1597         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1598     }
1599 
1600     /* store */
1601     gen_op_st_rm_T0_A0(s, ot, op1);
1602 
1603     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1604 }
1605 
1606 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1607                             int is_right, int is_arith)
1608 {
1609     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1610 
1611     /* load */
1612     if (op1 == OR_TMP0)
1613         gen_op_ld_v(s, ot, s->T0, s->A0);
1614     else
1615         gen_op_mov_v_reg(s, ot, s->T0, op1);
1616 
1617     op2 &= mask;
1618     if (op2 != 0) {
1619         if (is_right) {
1620             if (is_arith) {
1621                 gen_exts(ot, s->T0);
1622                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1623                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1624             } else {
1625                 gen_extu(ot, s->T0);
1626                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1627                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1628             }
1629         } else {
1630             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1631             tcg_gen_shli_tl(s->T0, s->T0, op2);
1632         }
1633     }
1634 
1635     /* store */
1636     gen_op_st_rm_T0_A0(s, ot, op1);
1637 
1638     /* update eflags if non zero shift */
1639     if (op2 != 0) {
1640         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1641         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1642         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1643     }
1644 }
1645 
1646 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1647 {
1648     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1649     TCGv_i32 t0, t1;
1650 
1651     /* load */
1652     if (op1 == OR_TMP0) {
1653         gen_op_ld_v(s, ot, s->T0, s->A0);
1654     } else {
1655         gen_op_mov_v_reg(s, ot, s->T0, op1);
1656     }
1657 
1658     tcg_gen_andi_tl(s->T1, s->T1, mask);
1659 
1660     switch (ot) {
1661     case MO_8:
1662         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1663         tcg_gen_ext8u_tl(s->T0, s->T0);
1664         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1665         goto do_long;
1666     case MO_16:
1667         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1668         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1669         goto do_long;
1670     do_long:
1671 #ifdef TARGET_X86_64
1672     case MO_32:
1673         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1674         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1675         if (is_right) {
1676             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1677         } else {
1678             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1679         }
1680         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1681         break;
1682 #endif
1683     default:
1684         if (is_right) {
1685             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1686         } else {
1687             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1688         }
1689         break;
1690     }
1691 
1692     /* store */
1693     gen_op_st_rm_T0_A0(s, ot, op1);
1694 
1695     /* We'll need the flags computed into CC_SRC.  */
1696     gen_compute_eflags(s);
1697 
1698     /* The value that was "rotated out" is now present at the other end
1699        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1700        since we've computed the flags into CC_SRC, these variables are
1701        currently dead.  */
1702     if (is_right) {
1703         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1704         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1705         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1706     } else {
1707         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1708         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1709     }
1710     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1711     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1712 
1713     /* Now conditionally store the new CC_OP value.  If the shift count
1714        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1715        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1716        exactly as we computed above.  */
1717     t0 = tcg_const_i32(0);
1718     t1 = tcg_temp_new_i32();
1719     tcg_gen_trunc_tl_i32(t1, s->T1);
1720     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1721     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1722     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1723                         s->tmp2_i32, s->tmp3_i32);
1724     tcg_temp_free_i32(t0);
1725     tcg_temp_free_i32(t1);
1726 
1727     /* The CC_OP value is no longer predictable.  */
1728     set_cc_op(s, CC_OP_DYNAMIC);
1729 }
1730 
1731 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1732                           int is_right)
1733 {
1734     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1735     int shift;
1736 
1737     /* load */
1738     if (op1 == OR_TMP0) {
1739         gen_op_ld_v(s, ot, s->T0, s->A0);
1740     } else {
1741         gen_op_mov_v_reg(s, ot, s->T0, op1);
1742     }
1743 
1744     op2 &= mask;
1745     if (op2 != 0) {
1746         switch (ot) {
1747 #ifdef TARGET_X86_64
1748         case MO_32:
1749             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1750             if (is_right) {
1751                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1752             } else {
1753                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1754             }
1755             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1756             break;
1757 #endif
1758         default:
1759             if (is_right) {
1760                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1761             } else {
1762                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1763             }
1764             break;
1765         case MO_8:
1766             mask = 7;
1767             goto do_shifts;
1768         case MO_16:
1769             mask = 15;
1770         do_shifts:
1771             shift = op2 & mask;
1772             if (is_right) {
1773                 shift = mask + 1 - shift;
1774             }
1775             gen_extu(ot, s->T0);
1776             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1777             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1778             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1779             break;
1780         }
1781     }
1782 
1783     /* store */
1784     gen_op_st_rm_T0_A0(s, ot, op1);
1785 
1786     if (op2 != 0) {
1787         /* Compute the flags into CC_SRC.  */
1788         gen_compute_eflags(s);
1789 
1790         /* The value that was "rotated out" is now present at the other end
1791            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1792            since we've computed the flags into CC_SRC, these variables are
1793            currently dead.  */
1794         if (is_right) {
1795             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1796             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1797             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1798         } else {
1799             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1800             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1801         }
1802         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1803         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1804         set_cc_op(s, CC_OP_ADCOX);
1805     }
1806 }
1807 
1808 /* XXX: add faster immediate = 1 case */
1809 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1810                            int is_right)
1811 {
1812     gen_compute_eflags(s);
1813     assert(s->cc_op == CC_OP_EFLAGS);
1814 
1815     /* load */
1816     if (op1 == OR_TMP0)
1817         gen_op_ld_v(s, ot, s->T0, s->A0);
1818     else
1819         gen_op_mov_v_reg(s, ot, s->T0, op1);
1820 
1821     if (is_right) {
1822         switch (ot) {
1823         case MO_8:
1824             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1825             break;
1826         case MO_16:
1827             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1828             break;
1829         case MO_32:
1830             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1831             break;
1832 #ifdef TARGET_X86_64
1833         case MO_64:
1834             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1835             break;
1836 #endif
1837         default:
1838             tcg_abort();
1839         }
1840     } else {
1841         switch (ot) {
1842         case MO_8:
1843             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1844             break;
1845         case MO_16:
1846             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1847             break;
1848         case MO_32:
1849             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1850             break;
1851 #ifdef TARGET_X86_64
1852         case MO_64:
1853             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1854             break;
1855 #endif
1856         default:
1857             tcg_abort();
1858         }
1859     }
1860     /* store */
1861     gen_op_st_rm_T0_A0(s, ot, op1);
1862 }
1863 
1864 /* XXX: add faster immediate case */
1865 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1866                              bool is_right, TCGv count_in)
1867 {
1868     target_ulong mask = (ot == MO_64 ? 63 : 31);
1869     TCGv count;
1870 
1871     /* load */
1872     if (op1 == OR_TMP0) {
1873         gen_op_ld_v(s, ot, s->T0, s->A0);
1874     } else {
1875         gen_op_mov_v_reg(s, ot, s->T0, op1);
1876     }
1877 
1878     count = tcg_temp_new();
1879     tcg_gen_andi_tl(count, count_in, mask);
1880 
1881     switch (ot) {
1882     case MO_16:
1883         /* Note: we implement the Intel behaviour for shift count > 16.
1884            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1885            portion by constructing it as a 32-bit value.  */
1886         if (is_right) {
1887             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1888             tcg_gen_mov_tl(s->T1, s->T0);
1889             tcg_gen_mov_tl(s->T0, s->tmp0);
1890         } else {
1891             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1892         }
1893         /*
1894          * If TARGET_X86_64 defined then fall through into MO_32 case,
1895          * otherwise fall through default case.
1896          */
1897     case MO_32:
1898 #ifdef TARGET_X86_64
1899         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1900         tcg_gen_subi_tl(s->tmp0, count, 1);
1901         if (is_right) {
1902             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1903             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1904             tcg_gen_shr_i64(s->T0, s->T0, count);
1905         } else {
1906             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1907             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1908             tcg_gen_shl_i64(s->T0, s->T0, count);
1909             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1910             tcg_gen_shri_i64(s->T0, s->T0, 32);
1911         }
1912         break;
1913 #endif
1914     default:
1915         tcg_gen_subi_tl(s->tmp0, count, 1);
1916         if (is_right) {
1917             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1918 
1919             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1920             tcg_gen_shr_tl(s->T0, s->T0, count);
1921             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1922         } else {
1923             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1924             if (ot == MO_16) {
1925                 /* Only needed if count > 16, for Intel behaviour.  */
1926                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1927                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1928                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1929             }
1930 
1931             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1932             tcg_gen_shl_tl(s->T0, s->T0, count);
1933             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1934         }
1935         tcg_gen_movi_tl(s->tmp4, 0);
1936         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1937                            s->tmp4, s->T1);
1938         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1939         break;
1940     }
1941 
1942     /* store */
1943     gen_op_st_rm_T0_A0(s, ot, op1);
1944 
1945     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1946     tcg_temp_free(count);
1947 }
1948 
1949 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1950 {
1951     if (s != OR_TMP1)
1952         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1953     switch(op) {
1954     case OP_ROL:
1955         gen_rot_rm_T1(s1, ot, d, 0);
1956         break;
1957     case OP_ROR:
1958         gen_rot_rm_T1(s1, ot, d, 1);
1959         break;
1960     case OP_SHL:
1961     case OP_SHL1:
1962         gen_shift_rm_T1(s1, ot, d, 0, 0);
1963         break;
1964     case OP_SHR:
1965         gen_shift_rm_T1(s1, ot, d, 1, 0);
1966         break;
1967     case OP_SAR:
1968         gen_shift_rm_T1(s1, ot, d, 1, 1);
1969         break;
1970     case OP_RCL:
1971         gen_rotc_rm_T1(s1, ot, d, 0);
1972         break;
1973     case OP_RCR:
1974         gen_rotc_rm_T1(s1, ot, d, 1);
1975         break;
1976     }
1977 }
1978 
1979 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1980 {
1981     switch(op) {
1982     case OP_ROL:
1983         gen_rot_rm_im(s1, ot, d, c, 0);
1984         break;
1985     case OP_ROR:
1986         gen_rot_rm_im(s1, ot, d, c, 1);
1987         break;
1988     case OP_SHL:
1989     case OP_SHL1:
1990         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1991         break;
1992     case OP_SHR:
1993         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1994         break;
1995     case OP_SAR:
1996         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1997         break;
1998     default:
1999         /* currently not optimized */
2000         tcg_gen_movi_tl(s1->T1, c);
2001         gen_shift(s1, op, ot, d, OR_TMP1);
2002         break;
2003     }
2004 }
2005 
2006 #define X86_MAX_INSN_LENGTH 15
2007 
2008 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2009 {
2010     uint64_t pc = s->pc;
2011 
2012     s->pc += num_bytes;
2013     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2014         /* If the instruction's 16th byte is on a different page than the 1st, a
2015          * page fault on the second page wins over the general protection fault
2016          * caused by the instruction being too long.
2017          * This can happen even if the operand is only one byte long!
2018          */
2019         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2020             volatile uint8_t unused =
2021                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2022             (void) unused;
2023         }
2024         siglongjmp(s->jmpbuf, 1);
2025     }
2026 
2027     return pc;
2028 }
2029 
2030 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2031 {
2032     return translator_ldub(env, advance_pc(env, s, 1));
2033 }
2034 
2035 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2036 {
2037     return translator_ldsw(env, advance_pc(env, s, 2));
2038 }
2039 
2040 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2041 {
2042     return translator_lduw(env, advance_pc(env, s, 2));
2043 }
2044 
2045 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2046 {
2047     return translator_ldl(env, advance_pc(env, s, 4));
2048 }
2049 
2050 #ifdef TARGET_X86_64
2051 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2052 {
2053     return translator_ldq(env, advance_pc(env, s, 8));
2054 }
2055 #endif
2056 
2057 /* Decompose an address.  */
2058 
2059 typedef struct AddressParts {
2060     int def_seg;
2061     int base;
2062     int index;
2063     int scale;
2064     target_long disp;
2065 } AddressParts;
2066 
2067 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2068                                     int modrm)
2069 {
2070     int def_seg, base, index, scale, mod, rm;
2071     target_long disp;
2072     bool havesib;
2073 
2074     def_seg = R_DS;
2075     index = -1;
2076     scale = 0;
2077     disp = 0;
2078 
2079     mod = (modrm >> 6) & 3;
2080     rm = modrm & 7;
2081     base = rm | REX_B(s);
2082 
2083     if (mod == 3) {
2084         /* Normally filtered out earlier, but including this path
2085            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
2086         goto done;
2087     }
2088 
2089     switch (s->aflag) {
2090     case MO_64:
2091     case MO_32:
2092         havesib = 0;
2093         if (rm == 4) {
2094             int code = x86_ldub_code(env, s);
2095             scale = (code >> 6) & 3;
2096             index = ((code >> 3) & 7) | REX_X(s);
2097             if (index == 4) {
2098                 index = -1;  /* no index */
2099             }
2100             base = (code & 7) | REX_B(s);
2101             havesib = 1;
2102         }
2103 
2104         switch (mod) {
2105         case 0:
2106             if ((base & 7) == 5) {
2107                 base = -1;
2108                 disp = (int32_t)x86_ldl_code(env, s);
2109                 if (CODE64(s) && !havesib) {
2110                     base = -2;
2111                     disp += s->pc + s->rip_offset;
2112                 }
2113             }
2114             break;
2115         case 1:
2116             disp = (int8_t)x86_ldub_code(env, s);
2117             break;
2118         default:
2119         case 2:
2120             disp = (int32_t)x86_ldl_code(env, s);
2121             break;
2122         }
2123 
2124         /* For correct popl handling with esp.  */
2125         if (base == R_ESP && s->popl_esp_hack) {
2126             disp += s->popl_esp_hack;
2127         }
2128         if (base == R_EBP || base == R_ESP) {
2129             def_seg = R_SS;
2130         }
2131         break;
2132 
2133     case MO_16:
2134         if (mod == 0) {
2135             if (rm == 6) {
2136                 base = -1;
2137                 disp = x86_lduw_code(env, s);
2138                 break;
2139             }
2140         } else if (mod == 1) {
2141             disp = (int8_t)x86_ldub_code(env, s);
2142         } else {
2143             disp = (int16_t)x86_lduw_code(env, s);
2144         }
2145 
2146         switch (rm) {
2147         case 0:
2148             base = R_EBX;
2149             index = R_ESI;
2150             break;
2151         case 1:
2152             base = R_EBX;
2153             index = R_EDI;
2154             break;
2155         case 2:
2156             base = R_EBP;
2157             index = R_ESI;
2158             def_seg = R_SS;
2159             break;
2160         case 3:
2161             base = R_EBP;
2162             index = R_EDI;
2163             def_seg = R_SS;
2164             break;
2165         case 4:
2166             base = R_ESI;
2167             break;
2168         case 5:
2169             base = R_EDI;
2170             break;
2171         case 6:
2172             base = R_EBP;
2173             def_seg = R_SS;
2174             break;
2175         default:
2176         case 7:
2177             base = R_EBX;
2178             break;
2179         }
2180         break;
2181 
2182     default:
2183         tcg_abort();
2184     }
2185 
2186  done:
2187     return (AddressParts){ def_seg, base, index, scale, disp };
2188 }
2189 
2190 /* Compute the address, with a minimum number of TCG ops.  */
2191 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2192 {
2193     TCGv ea = NULL;
2194 
2195     if (a.index >= 0) {
2196         if (a.scale == 0) {
2197             ea = cpu_regs[a.index];
2198         } else {
2199             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2200             ea = s->A0;
2201         }
2202         if (a.base >= 0) {
2203             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2204             ea = s->A0;
2205         }
2206     } else if (a.base >= 0) {
2207         ea = cpu_regs[a.base];
2208     }
2209     if (!ea) {
2210         tcg_gen_movi_tl(s->A0, a.disp);
2211         ea = s->A0;
2212     } else if (a.disp != 0) {
2213         tcg_gen_addi_tl(s->A0, ea, a.disp);
2214         ea = s->A0;
2215     }
2216 
2217     return ea;
2218 }
2219 
2220 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2221 {
2222     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2223     TCGv ea = gen_lea_modrm_1(s, a);
2224     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2225 }
2226 
2227 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2228 {
2229     (void)gen_lea_modrm_0(env, s, modrm);
2230 }
2231 
2232 /* Used for BNDCL, BNDCU, BNDCN.  */
2233 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2234                       TCGCond cond, TCGv_i64 bndv)
2235 {
2236     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2237 
2238     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2239     if (!CODE64(s)) {
2240         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2241     }
2242     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2243     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2244     gen_helper_bndck(cpu_env, s->tmp2_i32);
2245 }
2246 
2247 /* used for LEA and MOV AX, mem */
2248 static void gen_add_A0_ds_seg(DisasContext *s)
2249 {
2250     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2251 }
2252 
2253 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2254    OR_TMP0 */
2255 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2256                            MemOp ot, int reg, int is_store)
2257 {
2258     int mod, rm;
2259 
2260     mod = (modrm >> 6) & 3;
2261     rm = (modrm & 7) | REX_B(s);
2262     if (mod == 3) {
2263         if (is_store) {
2264             if (reg != OR_TMP0)
2265                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2266             gen_op_mov_reg_v(s, ot, rm, s->T0);
2267         } else {
2268             gen_op_mov_v_reg(s, ot, s->T0, rm);
2269             if (reg != OR_TMP0)
2270                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2271         }
2272     } else {
2273         gen_lea_modrm(env, s, modrm);
2274         if (is_store) {
2275             if (reg != OR_TMP0)
2276                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2277             gen_op_st_v(s, ot, s->T0, s->A0);
2278         } else {
2279             gen_op_ld_v(s, ot, s->T0, s->A0);
2280             if (reg != OR_TMP0)
2281                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2282         }
2283     }
2284 }
2285 
2286 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2287 {
2288     uint32_t ret;
2289 
2290     switch (ot) {
2291     case MO_8:
2292         ret = x86_ldub_code(env, s);
2293         break;
2294     case MO_16:
2295         ret = x86_lduw_code(env, s);
2296         break;
2297     case MO_32:
2298 #ifdef TARGET_X86_64
2299     case MO_64:
2300 #endif
2301         ret = x86_ldl_code(env, s);
2302         break;
2303     default:
2304         tcg_abort();
2305     }
2306     return ret;
2307 }
2308 
2309 static inline int insn_const_size(MemOp ot)
2310 {
2311     if (ot <= MO_32) {
2312         return 1 << ot;
2313     } else {
2314         return 4;
2315     }
2316 }
2317 
2318 static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2319 {
2320 #ifndef CONFIG_USER_ONLY
2321     return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2322            (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2323 #else
2324     return true;
2325 #endif
2326 }
2327 
2328 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2329 {
2330     target_ulong pc = s->cs_base + eip;
2331 
2332     if (use_goto_tb(s, pc))  {
2333         /* jump to same page: we can use a direct jump */
2334         tcg_gen_goto_tb(tb_num);
2335         gen_jmp_im(s, eip);
2336         tcg_gen_exit_tb(s->base.tb, tb_num);
2337         s->base.is_jmp = DISAS_NORETURN;
2338     } else {
2339         /* jump to another page */
2340         gen_jmp_im(s, eip);
2341         gen_jr(s, s->tmp0);
2342     }
2343 }
2344 
2345 static inline void gen_jcc(DisasContext *s, int b,
2346                            target_ulong val, target_ulong next_eip)
2347 {
2348     TCGLabel *l1, *l2;
2349 
2350     if (s->jmp_opt) {
2351         l1 = gen_new_label();
2352         gen_jcc1(s, b, l1);
2353 
2354         gen_goto_tb(s, 0, next_eip);
2355 
2356         gen_set_label(l1);
2357         gen_goto_tb(s, 1, val);
2358     } else {
2359         l1 = gen_new_label();
2360         l2 = gen_new_label();
2361         gen_jcc1(s, b, l1);
2362 
2363         gen_jmp_im(s, next_eip);
2364         tcg_gen_br(l2);
2365 
2366         gen_set_label(l1);
2367         gen_jmp_im(s, val);
2368         gen_set_label(l2);
2369         gen_eob(s);
2370     }
2371 }
2372 
2373 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2374                         int modrm, int reg)
2375 {
2376     CCPrepare cc;
2377 
2378     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2379 
2380     cc = gen_prepare_cc(s, b, s->T1);
2381     if (cc.mask != -1) {
2382         TCGv t0 = tcg_temp_new();
2383         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2384         cc.reg = t0;
2385     }
2386     if (!cc.use_reg2) {
2387         cc.reg2 = tcg_const_tl(cc.imm);
2388     }
2389 
2390     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2391                        s->T0, cpu_regs[reg]);
2392     gen_op_mov_reg_v(s, ot, reg, s->T0);
2393 
2394     if (cc.mask != -1) {
2395         tcg_temp_free(cc.reg);
2396     }
2397     if (!cc.use_reg2) {
2398         tcg_temp_free(cc.reg2);
2399     }
2400 }
2401 
2402 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2403 {
2404     tcg_gen_ld32u_tl(s->T0, cpu_env,
2405                      offsetof(CPUX86State,segs[seg_reg].selector));
2406 }
2407 
2408 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2409 {
2410     tcg_gen_ext16u_tl(s->T0, s->T0);
2411     tcg_gen_st32_tl(s->T0, cpu_env,
2412                     offsetof(CPUX86State,segs[seg_reg].selector));
2413     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2414 }
2415 
2416 /* move T0 to seg_reg and compute if the CPU state may change. Never
2417    call this function with seg_reg == R_CS */
2418 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2419 {
2420     if (PE(s) && !VM86(s)) {
2421         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2422         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2423         /* abort translation because the addseg value may change or
2424            because ss32 may change. For R_SS, translation must always
2425            stop as a special handling must be done to disable hardware
2426            interrupts for the next instruction */
2427         if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2428             s->base.is_jmp = DISAS_TOO_MANY;
2429         }
2430     } else {
2431         gen_op_movl_seg_T0_vm(s, seg_reg);
2432         if (seg_reg == R_SS) {
2433             s->base.is_jmp = DISAS_TOO_MANY;
2434         }
2435     }
2436 }
2437 
2438 static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2439 {
2440     /* no SVM activated; fast case */
2441     if (likely(!GUEST(s))) {
2442         return;
2443     }
2444     gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2445 }
2446 
2447 static inline void gen_stack_update(DisasContext *s, int addend)
2448 {
2449     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2450 }
2451 
2452 /* Generate a push. It depends on ss32, addseg and dflag.  */
2453 static void gen_push_v(DisasContext *s, TCGv val)
2454 {
2455     MemOp d_ot = mo_pushpop(s, s->dflag);
2456     MemOp a_ot = mo_stacksize(s);
2457     int size = 1 << d_ot;
2458     TCGv new_esp = s->A0;
2459 
2460     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2461 
2462     if (!CODE64(s)) {
2463         if (ADDSEG(s)) {
2464             new_esp = s->tmp4;
2465             tcg_gen_mov_tl(new_esp, s->A0);
2466         }
2467         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2468     }
2469 
2470     gen_op_st_v(s, d_ot, val, s->A0);
2471     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2472 }
2473 
2474 /* two step pop is necessary for precise exceptions */
2475 static MemOp gen_pop_T0(DisasContext *s)
2476 {
2477     MemOp d_ot = mo_pushpop(s, s->dflag);
2478 
2479     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2480     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2481 
2482     return d_ot;
2483 }
2484 
2485 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2486 {
2487     gen_stack_update(s, 1 << ot);
2488 }
2489 
2490 static inline void gen_stack_A0(DisasContext *s)
2491 {
2492     gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2493 }
2494 
2495 static void gen_pusha(DisasContext *s)
2496 {
2497     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2498     MemOp d_ot = s->dflag;
2499     int size = 1 << d_ot;
2500     int i;
2501 
2502     for (i = 0; i < 8; i++) {
2503         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2504         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2505         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2506     }
2507 
2508     gen_stack_update(s, -8 * size);
2509 }
2510 
2511 static void gen_popa(DisasContext *s)
2512 {
2513     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2514     MemOp d_ot = s->dflag;
2515     int size = 1 << d_ot;
2516     int i;
2517 
2518     for (i = 0; i < 8; i++) {
2519         /* ESP is not reloaded */
2520         if (7 - i == R_ESP) {
2521             continue;
2522         }
2523         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2524         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2525         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2526         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2527     }
2528 
2529     gen_stack_update(s, 8 * size);
2530 }
2531 
2532 static void gen_enter(DisasContext *s, int esp_addend, int level)
2533 {
2534     MemOp d_ot = mo_pushpop(s, s->dflag);
2535     MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2536     int size = 1 << d_ot;
2537 
2538     /* Push BP; compute FrameTemp into T1.  */
2539     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2540     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2541     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2542 
2543     level &= 31;
2544     if (level != 0) {
2545         int i;
2546 
2547         /* Copy level-1 pointers from the previous frame.  */
2548         for (i = 1; i < level; ++i) {
2549             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2550             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2551             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2552 
2553             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2554             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2555             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2556         }
2557 
2558         /* Push the current FrameTemp as the last level.  */
2559         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2560         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2561         gen_op_st_v(s, d_ot, s->T1, s->A0);
2562     }
2563 
2564     /* Copy the FrameTemp value to EBP.  */
2565     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2566 
2567     /* Compute the final value of ESP.  */
2568     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2569     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2570 }
2571 
2572 static void gen_leave(DisasContext *s)
2573 {
2574     MemOp d_ot = mo_pushpop(s, s->dflag);
2575     MemOp a_ot = mo_stacksize(s);
2576 
2577     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2578     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2579 
2580     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2581 
2582     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2583     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2584 }
2585 
2586 /* Similarly, except that the assumption here is that we don't decode
2587    the instruction at all -- either a missing opcode, an unimplemented
2588    feature, or just a bogus instruction stream.  */
2589 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2590 {
2591     gen_illegal_opcode(s);
2592 
2593     if (qemu_loglevel_mask(LOG_UNIMP)) {
2594         FILE *logfile = qemu_log_lock();
2595         target_ulong pc = s->pc_start, end = s->pc;
2596 
2597         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2598         for (; pc < end; ++pc) {
2599             qemu_log(" %02x", cpu_ldub_code(env, pc));
2600         }
2601         qemu_log("\n");
2602         qemu_log_unlock(logfile);
2603     }
2604 }
2605 
2606 /* an interrupt is different from an exception because of the
2607    privilege checks */
2608 static void gen_interrupt(DisasContext *s, int intno,
2609                           target_ulong cur_eip, target_ulong next_eip)
2610 {
2611     gen_update_cc_op(s);
2612     gen_jmp_im(s, cur_eip);
2613     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2614                                tcg_const_i32(next_eip - cur_eip));
2615     s->base.is_jmp = DISAS_NORETURN;
2616 }
2617 
2618 static void gen_debug(DisasContext *s)
2619 {
2620     gen_update_cc_op(s);
2621     gen_jmp_im(s, s->base.pc_next - s->cs_base);
2622     gen_helper_debug(cpu_env);
2623     s->base.is_jmp = DISAS_NORETURN;
2624 }
2625 
2626 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2627 {
2628     if ((s->flags & mask) == 0) {
2629         TCGv_i32 t = tcg_temp_new_i32();
2630         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2631         tcg_gen_ori_i32(t, t, mask);
2632         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2633         tcg_temp_free_i32(t);
2634         s->flags |= mask;
2635     }
2636 }
2637 
2638 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2639 {
2640     if (s->flags & mask) {
2641         TCGv_i32 t = tcg_temp_new_i32();
2642         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2643         tcg_gen_andi_i32(t, t, ~mask);
2644         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2645         tcg_temp_free_i32(t);
2646         s->flags &= ~mask;
2647     }
2648 }
2649 
2650 /* Clear BND registers during legacy branches.  */
2651 static void gen_bnd_jmp(DisasContext *s)
2652 {
2653     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2654        and if the BNDREGs are known to be in use (non-zero) already.
2655        The helper itself will check BNDPRESERVE at runtime.  */
2656     if ((s->prefix & PREFIX_REPNZ) == 0
2657         && (s->flags & HF_MPX_EN_MASK) != 0
2658         && (s->flags & HF_MPX_IU_MASK) != 0) {
2659         gen_helper_bnd_jmp(cpu_env);
2660     }
2661 }
2662 
2663 /* Generate an end of block. Trace exception is also generated if needed.
2664    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2665    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2666    S->TF.  This is used by the syscall/sysret insns.  */
2667 static void
2668 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2669 {
2670     gen_update_cc_op(s);
2671 
2672     /* If several instructions disable interrupts, only the first does it.  */
2673     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2674         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2675     } else {
2676         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2677     }
2678 
2679     if (s->base.tb->flags & HF_RF_MASK) {
2680         gen_helper_reset_rf(cpu_env);
2681     }
2682     if (s->base.singlestep_enabled) {
2683         gen_helper_debug(cpu_env);
2684     } else if (recheck_tf) {
2685         gen_helper_rechecking_single_step(cpu_env);
2686         tcg_gen_exit_tb(NULL, 0);
2687     } else if (s->flags & HF_TF_MASK) {
2688         gen_helper_single_step(cpu_env);
2689     } else if (jr) {
2690         tcg_gen_lookup_and_goto_ptr();
2691     } else {
2692         tcg_gen_exit_tb(NULL, 0);
2693     }
2694     s->base.is_jmp = DISAS_NORETURN;
2695 }
2696 
2697 static inline void
2698 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2699 {
2700     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2701 }
2702 
2703 /* End of block.
2704    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2705 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2706 {
2707     gen_eob_worker(s, inhibit, false);
2708 }
2709 
2710 /* End of block, resetting the inhibit irq flag.  */
2711 static void gen_eob(DisasContext *s)
2712 {
2713     gen_eob_worker(s, false, false);
2714 }
2715 
2716 /* Jump to register */
2717 static void gen_jr(DisasContext *s, TCGv dest)
2718 {
2719     do_gen_eob_worker(s, false, false, true);
2720 }
2721 
2722 /* generate a jump to eip. No segment change must happen before as a
2723    direct call to the next block may occur */
2724 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2725 {
2726     gen_update_cc_op(s);
2727     set_cc_op(s, CC_OP_DYNAMIC);
2728     if (s->jmp_opt) {
2729         gen_goto_tb(s, tb_num, eip);
2730     } else {
2731         gen_jmp_im(s, eip);
2732         gen_eob(s);
2733     }
2734 }
2735 
2736 static void gen_jmp(DisasContext *s, target_ulong eip)
2737 {
2738     gen_jmp_tb(s, eip, 0);
2739 }
2740 
2741 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2742 {
2743     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2744     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2745 }
2746 
2747 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2748 {
2749     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2750     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2751 }
2752 
2753 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2754 {
2755     int mem_index = s->mem_index;
2756     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2757     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2758     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2759     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2760     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2761 }
2762 
2763 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2764 {
2765     int mem_index = s->mem_index;
2766     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2767     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2768     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2769     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2770     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2771 }
2772 
2773 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2774 {
2775     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2776     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2777     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2778     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2779 }
2780 
2781 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2782 {
2783     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2784     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2785 }
2786 
2787 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2788 {
2789     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2790     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2791 }
2792 
2793 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2794 {
2795     tcg_gen_movi_i64(s->tmp1_i64, 0);
2796     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2797 }
2798 
2799 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2800 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2801 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2802 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2803 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2804 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2805                                TCGv_i32 val);
2806 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2807 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2808                                TCGv val);
2809 
2810 #define SSE_SPECIAL ((void *)1)
2811 #define SSE_DUMMY ((void *)2)
2812 
2813 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2814 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2815                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2816 
2817 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2818     /* 3DNow! extensions */
2819     [0x0e] = { SSE_DUMMY }, /* femms */
2820     [0x0f] = { SSE_DUMMY }, /* pf... */
2821     /* pure SSE operations */
2822     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2823     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2824     [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2825     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2826     [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2827     [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2828     [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2829     [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2830 
2831     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2832     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2833     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2834     [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2835     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2836     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2837     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2838     [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2839     [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2840     [0x51] = SSE_FOP(sqrt),
2841     [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2842     [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2843     [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2844     [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2845     [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2846     [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2847     [0x58] = SSE_FOP(add),
2848     [0x59] = SSE_FOP(mul),
2849     [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2850                gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2851     [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2852     [0x5c] = SSE_FOP(sub),
2853     [0x5d] = SSE_FOP(min),
2854     [0x5e] = SSE_FOP(div),
2855     [0x5f] = SSE_FOP(max),
2856 
2857     [0xc2] = SSE_FOP(cmpeq),
2858     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2859                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2860 
2861     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2862     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2863     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2864 
2865     /* MMX ops and their SSE extensions */
2866     [0x60] = MMX_OP2(punpcklbw),
2867     [0x61] = MMX_OP2(punpcklwd),
2868     [0x62] = MMX_OP2(punpckldq),
2869     [0x63] = MMX_OP2(packsswb),
2870     [0x64] = MMX_OP2(pcmpgtb),
2871     [0x65] = MMX_OP2(pcmpgtw),
2872     [0x66] = MMX_OP2(pcmpgtl),
2873     [0x67] = MMX_OP2(packuswb),
2874     [0x68] = MMX_OP2(punpckhbw),
2875     [0x69] = MMX_OP2(punpckhwd),
2876     [0x6a] = MMX_OP2(punpckhdq),
2877     [0x6b] = MMX_OP2(packssdw),
2878     [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2879     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2880     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2881     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2882     [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2883                (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2884                (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2885                (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2886     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2887     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2888     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2889     [0x74] = MMX_OP2(pcmpeqb),
2890     [0x75] = MMX_OP2(pcmpeqw),
2891     [0x76] = MMX_OP2(pcmpeql),
2892     [0x77] = { SSE_DUMMY }, /* emms */
2893     [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2894     [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2895     [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2896     [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2897     [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2898     [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2899     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2900     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2901     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2902     [0xd1] = MMX_OP2(psrlw),
2903     [0xd2] = MMX_OP2(psrld),
2904     [0xd3] = MMX_OP2(psrlq),
2905     [0xd4] = MMX_OP2(paddq),
2906     [0xd5] = MMX_OP2(pmullw),
2907     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2908     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2909     [0xd8] = MMX_OP2(psubusb),
2910     [0xd9] = MMX_OP2(psubusw),
2911     [0xda] = MMX_OP2(pminub),
2912     [0xdb] = MMX_OP2(pand),
2913     [0xdc] = MMX_OP2(paddusb),
2914     [0xdd] = MMX_OP2(paddusw),
2915     [0xde] = MMX_OP2(pmaxub),
2916     [0xdf] = MMX_OP2(pandn),
2917     [0xe0] = MMX_OP2(pavgb),
2918     [0xe1] = MMX_OP2(psraw),
2919     [0xe2] = MMX_OP2(psrad),
2920     [0xe3] = MMX_OP2(pavgw),
2921     [0xe4] = MMX_OP2(pmulhuw),
2922     [0xe5] = MMX_OP2(pmulhw),
2923     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2924     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2925     [0xe8] = MMX_OP2(psubsb),
2926     [0xe9] = MMX_OP2(psubsw),
2927     [0xea] = MMX_OP2(pminsw),
2928     [0xeb] = MMX_OP2(por),
2929     [0xec] = MMX_OP2(paddsb),
2930     [0xed] = MMX_OP2(paddsw),
2931     [0xee] = MMX_OP2(pmaxsw),
2932     [0xef] = MMX_OP2(pxor),
2933     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2934     [0xf1] = MMX_OP2(psllw),
2935     [0xf2] = MMX_OP2(pslld),
2936     [0xf3] = MMX_OP2(psllq),
2937     [0xf4] = MMX_OP2(pmuludq),
2938     [0xf5] = MMX_OP2(pmaddwd),
2939     [0xf6] = MMX_OP2(psadbw),
2940     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2941                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2942     [0xf8] = MMX_OP2(psubb),
2943     [0xf9] = MMX_OP2(psubw),
2944     [0xfa] = MMX_OP2(psubl),
2945     [0xfb] = MMX_OP2(psubq),
2946     [0xfc] = MMX_OP2(paddb),
2947     [0xfd] = MMX_OP2(paddw),
2948     [0xfe] = MMX_OP2(paddl),
2949 };
2950 
2951 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2952     [0 + 2] = MMX_OP2(psrlw),
2953     [0 + 4] = MMX_OP2(psraw),
2954     [0 + 6] = MMX_OP2(psllw),
2955     [8 + 2] = MMX_OP2(psrld),
2956     [8 + 4] = MMX_OP2(psrad),
2957     [8 + 6] = MMX_OP2(pslld),
2958     [16 + 2] = MMX_OP2(psrlq),
2959     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2960     [16 + 6] = MMX_OP2(psllq),
2961     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2962 };
2963 
2964 static const SSEFunc_0_epi sse_op_table3ai[] = {
2965     gen_helper_cvtsi2ss,
2966     gen_helper_cvtsi2sd
2967 };
2968 
2969 #ifdef TARGET_X86_64
2970 static const SSEFunc_0_epl sse_op_table3aq[] = {
2971     gen_helper_cvtsq2ss,
2972     gen_helper_cvtsq2sd
2973 };
2974 #endif
2975 
2976 static const SSEFunc_i_ep sse_op_table3bi[] = {
2977     gen_helper_cvttss2si,
2978     gen_helper_cvtss2si,
2979     gen_helper_cvttsd2si,
2980     gen_helper_cvtsd2si
2981 };
2982 
2983 #ifdef TARGET_X86_64
2984 static const SSEFunc_l_ep sse_op_table3bq[] = {
2985     gen_helper_cvttss2sq,
2986     gen_helper_cvtss2sq,
2987     gen_helper_cvttsd2sq,
2988     gen_helper_cvtsd2sq
2989 };
2990 #endif
2991 
2992 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2993     SSE_FOP(cmpeq),
2994     SSE_FOP(cmplt),
2995     SSE_FOP(cmple),
2996     SSE_FOP(cmpunord),
2997     SSE_FOP(cmpneq),
2998     SSE_FOP(cmpnlt),
2999     SSE_FOP(cmpnle),
3000     SSE_FOP(cmpord),
3001 };
3002 
3003 static const SSEFunc_0_epp sse_op_table5[256] = {
3004     [0x0c] = gen_helper_pi2fw,
3005     [0x0d] = gen_helper_pi2fd,
3006     [0x1c] = gen_helper_pf2iw,
3007     [0x1d] = gen_helper_pf2id,
3008     [0x8a] = gen_helper_pfnacc,
3009     [0x8e] = gen_helper_pfpnacc,
3010     [0x90] = gen_helper_pfcmpge,
3011     [0x94] = gen_helper_pfmin,
3012     [0x96] = gen_helper_pfrcp,
3013     [0x97] = gen_helper_pfrsqrt,
3014     [0x9a] = gen_helper_pfsub,
3015     [0x9e] = gen_helper_pfadd,
3016     [0xa0] = gen_helper_pfcmpgt,
3017     [0xa4] = gen_helper_pfmax,
3018     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
3019     [0xa7] = gen_helper_movq, /* pfrsqit1 */
3020     [0xaa] = gen_helper_pfsubr,
3021     [0xae] = gen_helper_pfacc,
3022     [0xb0] = gen_helper_pfcmpeq,
3023     [0xb4] = gen_helper_pfmul,
3024     [0xb6] = gen_helper_movq, /* pfrcpit2 */
3025     [0xb7] = gen_helper_pmulhrw_mmx,
3026     [0xbb] = gen_helper_pswapd,
3027     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
3028 };
3029 
3030 struct SSEOpHelper_epp {
3031     SSEFunc_0_epp op[2];
3032     uint32_t ext_mask;
3033 };
3034 
3035 struct SSEOpHelper_eppi {
3036     SSEFunc_0_eppi op[2];
3037     uint32_t ext_mask;
3038 };
3039 
3040 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
3041 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
3042 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
3043 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
3044 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
3045         CPUID_EXT_PCLMULQDQ }
3046 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
3047 
3048 static const struct SSEOpHelper_epp sse_op_table6[256] = {
3049     [0x00] = SSSE3_OP(pshufb),
3050     [0x01] = SSSE3_OP(phaddw),
3051     [0x02] = SSSE3_OP(phaddd),
3052     [0x03] = SSSE3_OP(phaddsw),
3053     [0x04] = SSSE3_OP(pmaddubsw),
3054     [0x05] = SSSE3_OP(phsubw),
3055     [0x06] = SSSE3_OP(phsubd),
3056     [0x07] = SSSE3_OP(phsubsw),
3057     [0x08] = SSSE3_OP(psignb),
3058     [0x09] = SSSE3_OP(psignw),
3059     [0x0a] = SSSE3_OP(psignd),
3060     [0x0b] = SSSE3_OP(pmulhrsw),
3061     [0x10] = SSE41_OP(pblendvb),
3062     [0x14] = SSE41_OP(blendvps),
3063     [0x15] = SSE41_OP(blendvpd),
3064     [0x17] = SSE41_OP(ptest),
3065     [0x1c] = SSSE3_OP(pabsb),
3066     [0x1d] = SSSE3_OP(pabsw),
3067     [0x1e] = SSSE3_OP(pabsd),
3068     [0x20] = SSE41_OP(pmovsxbw),
3069     [0x21] = SSE41_OP(pmovsxbd),
3070     [0x22] = SSE41_OP(pmovsxbq),
3071     [0x23] = SSE41_OP(pmovsxwd),
3072     [0x24] = SSE41_OP(pmovsxwq),
3073     [0x25] = SSE41_OP(pmovsxdq),
3074     [0x28] = SSE41_OP(pmuldq),
3075     [0x29] = SSE41_OP(pcmpeqq),
3076     [0x2a] = SSE41_SPECIAL, /* movntqda */
3077     [0x2b] = SSE41_OP(packusdw),
3078     [0x30] = SSE41_OP(pmovzxbw),
3079     [0x31] = SSE41_OP(pmovzxbd),
3080     [0x32] = SSE41_OP(pmovzxbq),
3081     [0x33] = SSE41_OP(pmovzxwd),
3082     [0x34] = SSE41_OP(pmovzxwq),
3083     [0x35] = SSE41_OP(pmovzxdq),
3084     [0x37] = SSE42_OP(pcmpgtq),
3085     [0x38] = SSE41_OP(pminsb),
3086     [0x39] = SSE41_OP(pminsd),
3087     [0x3a] = SSE41_OP(pminuw),
3088     [0x3b] = SSE41_OP(pminud),
3089     [0x3c] = SSE41_OP(pmaxsb),
3090     [0x3d] = SSE41_OP(pmaxsd),
3091     [0x3e] = SSE41_OP(pmaxuw),
3092     [0x3f] = SSE41_OP(pmaxud),
3093     [0x40] = SSE41_OP(pmulld),
3094     [0x41] = SSE41_OP(phminposuw),
3095     [0xdb] = AESNI_OP(aesimc),
3096     [0xdc] = AESNI_OP(aesenc),
3097     [0xdd] = AESNI_OP(aesenclast),
3098     [0xde] = AESNI_OP(aesdec),
3099     [0xdf] = AESNI_OP(aesdeclast),
3100 };
3101 
3102 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3103     [0x08] = SSE41_OP(roundps),
3104     [0x09] = SSE41_OP(roundpd),
3105     [0x0a] = SSE41_OP(roundss),
3106     [0x0b] = SSE41_OP(roundsd),
3107     [0x0c] = SSE41_OP(blendps),
3108     [0x0d] = SSE41_OP(blendpd),
3109     [0x0e] = SSE41_OP(pblendw),
3110     [0x0f] = SSSE3_OP(palignr),
3111     [0x14] = SSE41_SPECIAL, /* pextrb */
3112     [0x15] = SSE41_SPECIAL, /* pextrw */
3113     [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3114     [0x17] = SSE41_SPECIAL, /* extractps */
3115     [0x20] = SSE41_SPECIAL, /* pinsrb */
3116     [0x21] = SSE41_SPECIAL, /* insertps */
3117     [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3118     [0x40] = SSE41_OP(dpps),
3119     [0x41] = SSE41_OP(dppd),
3120     [0x42] = SSE41_OP(mpsadbw),
3121     [0x44] = PCLMULQDQ_OP(pclmulqdq),
3122     [0x60] = SSE42_OP(pcmpestrm),
3123     [0x61] = SSE42_OP(pcmpestri),
3124     [0x62] = SSE42_OP(pcmpistrm),
3125     [0x63] = SSE42_OP(pcmpistri),
3126     [0xdf] = AESNI_OP(aeskeygenassist),
3127 };
3128 
3129 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3130                     target_ulong pc_start)
3131 {
3132     int b1, op1_offset, op2_offset, is_xmm, val;
3133     int modrm, mod, rm, reg;
3134     SSEFunc_0_epp sse_fn_epp;
3135     SSEFunc_0_eppi sse_fn_eppi;
3136     SSEFunc_0_ppi sse_fn_ppi;
3137     SSEFunc_0_eppt sse_fn_eppt;
3138     MemOp ot;
3139 
3140     b &= 0xff;
3141     if (s->prefix & PREFIX_DATA)
3142         b1 = 1;
3143     else if (s->prefix & PREFIX_REPZ)
3144         b1 = 2;
3145     else if (s->prefix & PREFIX_REPNZ)
3146         b1 = 3;
3147     else
3148         b1 = 0;
3149     sse_fn_epp = sse_op_table1[b][b1];
3150     if (!sse_fn_epp) {
3151         goto unknown_op;
3152     }
3153     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3154         is_xmm = 1;
3155     } else {
3156         if (b1 == 0) {
3157             /* MMX case */
3158             is_xmm = 0;
3159         } else {
3160             is_xmm = 1;
3161         }
3162     }
3163     /* simple MMX/SSE operation */
3164     if (s->flags & HF_TS_MASK) {
3165         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3166         return;
3167     }
3168     if (s->flags & HF_EM_MASK) {
3169     illegal_op:
3170         gen_illegal_opcode(s);
3171         return;
3172     }
3173     if (is_xmm
3174         && !(s->flags & HF_OSFXSR_MASK)
3175         && (b != 0x38 && b != 0x3a)) {
3176         goto unknown_op;
3177     }
3178     if (b == 0x0e) {
3179         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3180             /* If we were fully decoding this we might use illegal_op.  */
3181             goto unknown_op;
3182         }
3183         /* femms */
3184         gen_helper_emms(cpu_env);
3185         return;
3186     }
3187     if (b == 0x77) {
3188         /* emms */
3189         gen_helper_emms(cpu_env);
3190         return;
3191     }
3192     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3193        the static cpu state) */
3194     if (!is_xmm) {
3195         gen_helper_enter_mmx(cpu_env);
3196     }
3197 
3198     modrm = x86_ldub_code(env, s);
3199     reg = ((modrm >> 3) & 7);
3200     if (is_xmm) {
3201         reg |= REX_R(s);
3202     }
3203     mod = (modrm >> 6) & 3;
3204     if (sse_fn_epp == SSE_SPECIAL) {
3205         b |= (b1 << 8);
3206         switch(b) {
3207         case 0x0e7: /* movntq */
3208             if (mod == 3) {
3209                 goto illegal_op;
3210             }
3211             gen_lea_modrm(env, s, modrm);
3212             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3213             break;
3214         case 0x1e7: /* movntdq */
3215         case 0x02b: /* movntps */
3216         case 0x12b: /* movntps */
3217             if (mod == 3)
3218                 goto illegal_op;
3219             gen_lea_modrm(env, s, modrm);
3220             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3221             break;
3222         case 0x3f0: /* lddqu */
3223             if (mod == 3)
3224                 goto illegal_op;
3225             gen_lea_modrm(env, s, modrm);
3226             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3227             break;
3228         case 0x22b: /* movntss */
3229         case 0x32b: /* movntsd */
3230             if (mod == 3)
3231                 goto illegal_op;
3232             gen_lea_modrm(env, s, modrm);
3233             if (b1 & 1) {
3234                 gen_stq_env_A0(s, offsetof(CPUX86State,
3235                                            xmm_regs[reg].ZMM_Q(0)));
3236             } else {
3237                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3238                     xmm_regs[reg].ZMM_L(0)));
3239                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3240             }
3241             break;
3242         case 0x6e: /* movd mm, ea */
3243 #ifdef TARGET_X86_64
3244             if (s->dflag == MO_64) {
3245                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3246                 tcg_gen_st_tl(s->T0, cpu_env,
3247                               offsetof(CPUX86State, fpregs[reg].mmx));
3248             } else
3249 #endif
3250             {
3251                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3252                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3253                                  offsetof(CPUX86State,fpregs[reg].mmx));
3254                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3255                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3256             }
3257             break;
3258         case 0x16e: /* movd xmm, ea */
3259 #ifdef TARGET_X86_64
3260             if (s->dflag == MO_64) {
3261                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3262                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3263                                  offsetof(CPUX86State,xmm_regs[reg]));
3264                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3265             } else
3266 #endif
3267             {
3268                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3269                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3270                                  offsetof(CPUX86State,xmm_regs[reg]));
3271                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3272                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3273             }
3274             break;
3275         case 0x6f: /* movq mm, ea */
3276             if (mod != 3) {
3277                 gen_lea_modrm(env, s, modrm);
3278                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3279             } else {
3280                 rm = (modrm & 7);
3281                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3282                                offsetof(CPUX86State,fpregs[rm].mmx));
3283                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3284                                offsetof(CPUX86State,fpregs[reg].mmx));
3285             }
3286             break;
3287         case 0x010: /* movups */
3288         case 0x110: /* movupd */
3289         case 0x028: /* movaps */
3290         case 0x128: /* movapd */
3291         case 0x16f: /* movdqa xmm, ea */
3292         case 0x26f: /* movdqu xmm, ea */
3293             if (mod != 3) {
3294                 gen_lea_modrm(env, s, modrm);
3295                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3296             } else {
3297                 rm = (modrm & 7) | REX_B(s);
3298                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3299                             offsetof(CPUX86State,xmm_regs[rm]));
3300             }
3301             break;
3302         case 0x210: /* movss xmm, ea */
3303             if (mod != 3) {
3304                 gen_lea_modrm(env, s, modrm);
3305                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3306                 tcg_gen_st32_tl(s->T0, cpu_env,
3307                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3308                 tcg_gen_movi_tl(s->T0, 0);
3309                 tcg_gen_st32_tl(s->T0, cpu_env,
3310                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3311                 tcg_gen_st32_tl(s->T0, cpu_env,
3312                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3313                 tcg_gen_st32_tl(s->T0, cpu_env,
3314                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3315             } else {
3316                 rm = (modrm & 7) | REX_B(s);
3317                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3318                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3319             }
3320             break;
3321         case 0x310: /* movsd xmm, ea */
3322             if (mod != 3) {
3323                 gen_lea_modrm(env, s, modrm);
3324                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3325                                            xmm_regs[reg].ZMM_Q(0)));
3326                 tcg_gen_movi_tl(s->T0, 0);
3327                 tcg_gen_st32_tl(s->T0, cpu_env,
3328                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3329                 tcg_gen_st32_tl(s->T0, cpu_env,
3330                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3331             } else {
3332                 rm = (modrm & 7) | REX_B(s);
3333                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3334                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3335             }
3336             break;
3337         case 0x012: /* movlps */
3338         case 0x112: /* movlpd */
3339             if (mod != 3) {
3340                 gen_lea_modrm(env, s, modrm);
3341                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3342                                            xmm_regs[reg].ZMM_Q(0)));
3343             } else {
3344                 /* movhlps */
3345                 rm = (modrm & 7) | REX_B(s);
3346                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3347                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3348             }
3349             break;
3350         case 0x212: /* movsldup */
3351             if (mod != 3) {
3352                 gen_lea_modrm(env, s, modrm);
3353                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3354             } else {
3355                 rm = (modrm & 7) | REX_B(s);
3356                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3357                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3358                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3359                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3360             }
3361             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3362                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3363             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3364                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3365             break;
3366         case 0x312: /* movddup */
3367             if (mod != 3) {
3368                 gen_lea_modrm(env, s, modrm);
3369                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3370                                            xmm_regs[reg].ZMM_Q(0)));
3371             } else {
3372                 rm = (modrm & 7) | REX_B(s);
3373                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3374                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3375             }
3376             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3377                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3378             break;
3379         case 0x016: /* movhps */
3380         case 0x116: /* movhpd */
3381             if (mod != 3) {
3382                 gen_lea_modrm(env, s, modrm);
3383                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3384                                            xmm_regs[reg].ZMM_Q(1)));
3385             } else {
3386                 /* movlhps */
3387                 rm = (modrm & 7) | REX_B(s);
3388                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3389                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3390             }
3391             break;
3392         case 0x216: /* movshdup */
3393             if (mod != 3) {
3394                 gen_lea_modrm(env, s, modrm);
3395                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3396             } else {
3397                 rm = (modrm & 7) | REX_B(s);
3398                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3399                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3400                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3401                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3402             }
3403             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3404                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3405             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3406                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3407             break;
3408         case 0x178:
3409         case 0x378:
3410             {
3411                 int bit_index, field_length;
3412 
3413                 if (b1 == 1 && reg != 0)
3414                     goto illegal_op;
3415                 field_length = x86_ldub_code(env, s) & 0x3F;
3416                 bit_index = x86_ldub_code(env, s) & 0x3F;
3417                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3418                     offsetof(CPUX86State,xmm_regs[reg]));
3419                 if (b1 == 1)
3420                     gen_helper_extrq_i(cpu_env, s->ptr0,
3421                                        tcg_const_i32(bit_index),
3422                                        tcg_const_i32(field_length));
3423                 else
3424                     gen_helper_insertq_i(cpu_env, s->ptr0,
3425                                          tcg_const_i32(bit_index),
3426                                          tcg_const_i32(field_length));
3427             }
3428             break;
3429         case 0x7e: /* movd ea, mm */
3430 #ifdef TARGET_X86_64
3431             if (s->dflag == MO_64) {
3432                 tcg_gen_ld_i64(s->T0, cpu_env,
3433                                offsetof(CPUX86State,fpregs[reg].mmx));
3434                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3435             } else
3436 #endif
3437             {
3438                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3439                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3440                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3441             }
3442             break;
3443         case 0x17e: /* movd ea, xmm */
3444 #ifdef TARGET_X86_64
3445             if (s->dflag == MO_64) {
3446                 tcg_gen_ld_i64(s->T0, cpu_env,
3447                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3448                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3449             } else
3450 #endif
3451             {
3452                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3453                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3454                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3455             }
3456             break;
3457         case 0x27e: /* movq xmm, ea */
3458             if (mod != 3) {
3459                 gen_lea_modrm(env, s, modrm);
3460                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3461                                            xmm_regs[reg].ZMM_Q(0)));
3462             } else {
3463                 rm = (modrm & 7) | REX_B(s);
3464                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3465                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3466             }
3467             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3468             break;
3469         case 0x7f: /* movq ea, mm */
3470             if (mod != 3) {
3471                 gen_lea_modrm(env, s, modrm);
3472                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3473             } else {
3474                 rm = (modrm & 7);
3475                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3476                             offsetof(CPUX86State,fpregs[reg].mmx));
3477             }
3478             break;
3479         case 0x011: /* movups */
3480         case 0x111: /* movupd */
3481         case 0x029: /* movaps */
3482         case 0x129: /* movapd */
3483         case 0x17f: /* movdqa ea, xmm */
3484         case 0x27f: /* movdqu ea, xmm */
3485             if (mod != 3) {
3486                 gen_lea_modrm(env, s, modrm);
3487                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3488             } else {
3489                 rm = (modrm & 7) | REX_B(s);
3490                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3491                             offsetof(CPUX86State,xmm_regs[reg]));
3492             }
3493             break;
3494         case 0x211: /* movss ea, xmm */
3495             if (mod != 3) {
3496                 gen_lea_modrm(env, s, modrm);
3497                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3498                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3499                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3500             } else {
3501                 rm = (modrm & 7) | REX_B(s);
3502                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3503                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3504             }
3505             break;
3506         case 0x311: /* movsd ea, xmm */
3507             if (mod != 3) {
3508                 gen_lea_modrm(env, s, modrm);
3509                 gen_stq_env_A0(s, offsetof(CPUX86State,
3510                                            xmm_regs[reg].ZMM_Q(0)));
3511             } else {
3512                 rm = (modrm & 7) | REX_B(s);
3513                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3514                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3515             }
3516             break;
3517         case 0x013: /* movlps */
3518         case 0x113: /* movlpd */
3519             if (mod != 3) {
3520                 gen_lea_modrm(env, s, modrm);
3521                 gen_stq_env_A0(s, offsetof(CPUX86State,
3522                                            xmm_regs[reg].ZMM_Q(0)));
3523             } else {
3524                 goto illegal_op;
3525             }
3526             break;
3527         case 0x017: /* movhps */
3528         case 0x117: /* movhpd */
3529             if (mod != 3) {
3530                 gen_lea_modrm(env, s, modrm);
3531                 gen_stq_env_A0(s, offsetof(CPUX86State,
3532                                            xmm_regs[reg].ZMM_Q(1)));
3533             } else {
3534                 goto illegal_op;
3535             }
3536             break;
3537         case 0x71: /* shift mm, im */
3538         case 0x72:
3539         case 0x73:
3540         case 0x171: /* shift xmm, im */
3541         case 0x172:
3542         case 0x173:
3543             if (b1 >= 2) {
3544                 goto unknown_op;
3545             }
3546             val = x86_ldub_code(env, s);
3547             if (is_xmm) {
3548                 tcg_gen_movi_tl(s->T0, val);
3549                 tcg_gen_st32_tl(s->T0, cpu_env,
3550                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3551                 tcg_gen_movi_tl(s->T0, 0);
3552                 tcg_gen_st32_tl(s->T0, cpu_env,
3553                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3554                 op1_offset = offsetof(CPUX86State,xmm_t0);
3555             } else {
3556                 tcg_gen_movi_tl(s->T0, val);
3557                 tcg_gen_st32_tl(s->T0, cpu_env,
3558                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3559                 tcg_gen_movi_tl(s->T0, 0);
3560                 tcg_gen_st32_tl(s->T0, cpu_env,
3561                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3562                 op1_offset = offsetof(CPUX86State,mmx_t0);
3563             }
3564             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3565                                        (((modrm >> 3)) & 7)][b1];
3566             if (!sse_fn_epp) {
3567                 goto unknown_op;
3568             }
3569             if (is_xmm) {
3570                 rm = (modrm & 7) | REX_B(s);
3571                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3572             } else {
3573                 rm = (modrm & 7);
3574                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3575             }
3576             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3577             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3578             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3579             break;
3580         case 0x050: /* movmskps */
3581             rm = (modrm & 7) | REX_B(s);
3582             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3583                              offsetof(CPUX86State,xmm_regs[rm]));
3584             gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3585             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3586             break;
3587         case 0x150: /* movmskpd */
3588             rm = (modrm & 7) | REX_B(s);
3589             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3590                              offsetof(CPUX86State,xmm_regs[rm]));
3591             gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3592             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3593             break;
3594         case 0x02a: /* cvtpi2ps */
3595         case 0x12a: /* cvtpi2pd */
3596             gen_helper_enter_mmx(cpu_env);
3597             if (mod != 3) {
3598                 gen_lea_modrm(env, s, modrm);
3599                 op2_offset = offsetof(CPUX86State,mmx_t0);
3600                 gen_ldq_env_A0(s, op2_offset);
3601             } else {
3602                 rm = (modrm & 7);
3603                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3604             }
3605             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3606             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3607             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3608             switch(b >> 8) {
3609             case 0x0:
3610                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3611                 break;
3612             default:
3613             case 0x1:
3614                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3615                 break;
3616             }
3617             break;
3618         case 0x22a: /* cvtsi2ss */
3619         case 0x32a: /* cvtsi2sd */
3620             ot = mo_64_32(s->dflag);
3621             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3622             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3623             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3624             if (ot == MO_32) {
3625                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3626                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3627                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3628             } else {
3629 #ifdef TARGET_X86_64
3630                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3631                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3632 #else
3633                 goto illegal_op;
3634 #endif
3635             }
3636             break;
3637         case 0x02c: /* cvttps2pi */
3638         case 0x12c: /* cvttpd2pi */
3639         case 0x02d: /* cvtps2pi */
3640         case 0x12d: /* cvtpd2pi */
3641             gen_helper_enter_mmx(cpu_env);
3642             if (mod != 3) {
3643                 gen_lea_modrm(env, s, modrm);
3644                 op2_offset = offsetof(CPUX86State,xmm_t0);
3645                 gen_ldo_env_A0(s, op2_offset);
3646             } else {
3647                 rm = (modrm & 7) | REX_B(s);
3648                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3649             }
3650             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3651             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3652             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3653             switch(b) {
3654             case 0x02c:
3655                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3656                 break;
3657             case 0x12c:
3658                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3659                 break;
3660             case 0x02d:
3661                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3662                 break;
3663             case 0x12d:
3664                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3665                 break;
3666             }
3667             break;
3668         case 0x22c: /* cvttss2si */
3669         case 0x32c: /* cvttsd2si */
3670         case 0x22d: /* cvtss2si */
3671         case 0x32d: /* cvtsd2si */
3672             ot = mo_64_32(s->dflag);
3673             if (mod != 3) {
3674                 gen_lea_modrm(env, s, modrm);
3675                 if ((b >> 8) & 1) {
3676                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3677                 } else {
3678                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3679                     tcg_gen_st32_tl(s->T0, cpu_env,
3680                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3681                 }
3682                 op2_offset = offsetof(CPUX86State,xmm_t0);
3683             } else {
3684                 rm = (modrm & 7) | REX_B(s);
3685                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3686             }
3687             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3688             if (ot == MO_32) {
3689                 SSEFunc_i_ep sse_fn_i_ep =
3690                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3691                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3692                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3693             } else {
3694 #ifdef TARGET_X86_64
3695                 SSEFunc_l_ep sse_fn_l_ep =
3696                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3697                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3698 #else
3699                 goto illegal_op;
3700 #endif
3701             }
3702             gen_op_mov_reg_v(s, ot, reg, s->T0);
3703             break;
3704         case 0xc4: /* pinsrw */
3705         case 0x1c4:
3706             s->rip_offset = 1;
3707             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3708             val = x86_ldub_code(env, s);
3709             if (b1) {
3710                 val &= 7;
3711                 tcg_gen_st16_tl(s->T0, cpu_env,
3712                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3713             } else {
3714                 val &= 3;
3715                 tcg_gen_st16_tl(s->T0, cpu_env,
3716                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3717             }
3718             break;
3719         case 0xc5: /* pextrw */
3720         case 0x1c5:
3721             if (mod != 3)
3722                 goto illegal_op;
3723             ot = mo_64_32(s->dflag);
3724             val = x86_ldub_code(env, s);
3725             if (b1) {
3726                 val &= 7;
3727                 rm = (modrm & 7) | REX_B(s);
3728                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3729                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3730             } else {
3731                 val &= 3;
3732                 rm = (modrm & 7);
3733                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3734                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3735             }
3736             reg = ((modrm >> 3) & 7) | REX_R(s);
3737             gen_op_mov_reg_v(s, ot, reg, s->T0);
3738             break;
3739         case 0x1d6: /* movq ea, xmm */
3740             if (mod != 3) {
3741                 gen_lea_modrm(env, s, modrm);
3742                 gen_stq_env_A0(s, offsetof(CPUX86State,
3743                                            xmm_regs[reg].ZMM_Q(0)));
3744             } else {
3745                 rm = (modrm & 7) | REX_B(s);
3746                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3747                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3748                 gen_op_movq_env_0(s,
3749                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3750             }
3751             break;
3752         case 0x2d6: /* movq2dq */
3753             gen_helper_enter_mmx(cpu_env);
3754             rm = (modrm & 7);
3755             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3756                         offsetof(CPUX86State,fpregs[rm].mmx));
3757             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3758             break;
3759         case 0x3d6: /* movdq2q */
3760             gen_helper_enter_mmx(cpu_env);
3761             rm = (modrm & 7) | REX_B(s);
3762             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3763                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3764             break;
3765         case 0xd7: /* pmovmskb */
3766         case 0x1d7:
3767             if (mod != 3)
3768                 goto illegal_op;
3769             if (b1) {
3770                 rm = (modrm & 7) | REX_B(s);
3771                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3772                                  offsetof(CPUX86State, xmm_regs[rm]));
3773                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3774             } else {
3775                 rm = (modrm & 7);
3776                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3777                                  offsetof(CPUX86State, fpregs[rm].mmx));
3778                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3779             }
3780             reg = ((modrm >> 3) & 7) | REX_R(s);
3781             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3782             break;
3783 
3784         case 0x138:
3785         case 0x038:
3786             b = modrm;
3787             if ((b & 0xf0) == 0xf0) {
3788                 goto do_0f_38_fx;
3789             }
3790             modrm = x86_ldub_code(env, s);
3791             rm = modrm & 7;
3792             reg = ((modrm >> 3) & 7) | REX_R(s);
3793             mod = (modrm >> 6) & 3;
3794             if (b1 >= 2) {
3795                 goto unknown_op;
3796             }
3797 
3798             sse_fn_epp = sse_op_table6[b].op[b1];
3799             if (!sse_fn_epp) {
3800                 goto unknown_op;
3801             }
3802             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3803                 goto illegal_op;
3804 
3805             if (b1) {
3806                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3807                 if (mod == 3) {
3808                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3809                 } else {
3810                     op2_offset = offsetof(CPUX86State,xmm_t0);
3811                     gen_lea_modrm(env, s, modrm);
3812                     switch (b) {
3813                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3814                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3815                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3816                         gen_ldq_env_A0(s, op2_offset +
3817                                         offsetof(ZMMReg, ZMM_Q(0)));
3818                         break;
3819                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3820                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3821                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3822                                             s->mem_index, MO_LEUL);
3823                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3824                                         offsetof(ZMMReg, ZMM_L(0)));
3825                         break;
3826                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3827                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3828                                            s->mem_index, MO_LEUW);
3829                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3830                                         offsetof(ZMMReg, ZMM_W(0)));
3831                         break;
3832                     case 0x2a:            /* movntqda */
3833                         gen_ldo_env_A0(s, op1_offset);
3834                         return;
3835                     default:
3836                         gen_ldo_env_A0(s, op2_offset);
3837                     }
3838                 }
3839             } else {
3840                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3841                 if (mod == 3) {
3842                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3843                 } else {
3844                     op2_offset = offsetof(CPUX86State,mmx_t0);
3845                     gen_lea_modrm(env, s, modrm);
3846                     gen_ldq_env_A0(s, op2_offset);
3847                 }
3848             }
3849             if (sse_fn_epp == SSE_SPECIAL) {
3850                 goto unknown_op;
3851             }
3852 
3853             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3854             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3855             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3856 
3857             if (b == 0x17) {
3858                 set_cc_op(s, CC_OP_EFLAGS);
3859             }
3860             break;
3861 
3862         case 0x238:
3863         case 0x338:
3864         do_0f_38_fx:
3865             /* Various integer extensions at 0f 38 f[0-f].  */
3866             b = modrm | (b1 << 8);
3867             modrm = x86_ldub_code(env, s);
3868             reg = ((modrm >> 3) & 7) | REX_R(s);
3869 
3870             switch (b) {
3871             case 0x3f0: /* crc32 Gd,Eb */
3872             case 0x3f1: /* crc32 Gd,Ey */
3873             do_crc32:
3874                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3875                     goto illegal_op;
3876                 }
3877                 if ((b & 0xff) == 0xf0) {
3878                     ot = MO_8;
3879                 } else if (s->dflag != MO_64) {
3880                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3881                 } else {
3882                     ot = MO_64;
3883                 }
3884 
3885                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3886                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3887                 gen_helper_crc32(s->T0, s->tmp2_i32,
3888                                  s->T0, tcg_const_i32(8 << ot));
3889 
3890                 ot = mo_64_32(s->dflag);
3891                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3892                 break;
3893 
3894             case 0x1f0: /* crc32 or movbe */
3895             case 0x1f1:
3896                 /* For these insns, the f3 prefix is supposed to have priority
3897                    over the 66 prefix, but that's not what we implement above
3898                    setting b1.  */
3899                 if (s->prefix & PREFIX_REPNZ) {
3900                     goto do_crc32;
3901                 }
3902                 /* FALLTHRU */
3903             case 0x0f0: /* movbe Gy,My */
3904             case 0x0f1: /* movbe My,Gy */
3905                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3906                     goto illegal_op;
3907                 }
3908                 if (s->dflag != MO_64) {
3909                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3910                 } else {
3911                     ot = MO_64;
3912                 }
3913 
3914                 gen_lea_modrm(env, s, modrm);
3915                 if ((b & 1) == 0) {
3916                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
3917                                        s->mem_index, ot | MO_BE);
3918                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3919                 } else {
3920                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3921                                        s->mem_index, ot | MO_BE);
3922                 }
3923                 break;
3924 
3925             case 0x0f2: /* andn Gy, By, Ey */
3926                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3927                     || !(s->prefix & PREFIX_VEX)
3928                     || s->vex_l != 0) {
3929                     goto illegal_op;
3930                 }
3931                 ot = mo_64_32(s->dflag);
3932                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3933                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3934                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3935                 gen_op_update1_cc(s);
3936                 set_cc_op(s, CC_OP_LOGICB + ot);
3937                 break;
3938 
3939             case 0x0f7: /* bextr Gy, Ey, By */
3940                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3941                     || !(s->prefix & PREFIX_VEX)
3942                     || s->vex_l != 0) {
3943                     goto illegal_op;
3944                 }
3945                 ot = mo_64_32(s->dflag);
3946                 {
3947                     TCGv bound, zero;
3948 
3949                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3950                     /* Extract START, and shift the operand.
3951                        Shifts larger than operand size get zeros.  */
3952                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3953                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3954 
3955                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3956                     zero = tcg_const_tl(0);
3957                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3958                                        s->T0, zero);
3959                     tcg_temp_free(zero);
3960 
3961                     /* Extract the LEN into a mask.  Lengths larger than
3962                        operand size get all ones.  */
3963                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3964                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3965                                        s->A0, bound);
3966                     tcg_temp_free(bound);
3967                     tcg_gen_movi_tl(s->T1, 1);
3968                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3969                     tcg_gen_subi_tl(s->T1, s->T1, 1);
3970                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
3971 
3972                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3973                     gen_op_update1_cc(s);
3974                     set_cc_op(s, CC_OP_LOGICB + ot);
3975                 }
3976                 break;
3977 
3978             case 0x0f5: /* bzhi Gy, Ey, By */
3979                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3980                     || !(s->prefix & PREFIX_VEX)
3981                     || s->vex_l != 0) {
3982                     goto illegal_op;
3983                 }
3984                 ot = mo_64_32(s->dflag);
3985                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3986                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3987                 {
3988                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3989                     /* Note that since we're using BMILG (in order to get O
3990                        cleared) we need to store the inverse into C.  */
3991                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3992                                        s->T1, bound);
3993                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3994                                        bound, bound, s->T1);
3995                     tcg_temp_free(bound);
3996                 }
3997                 tcg_gen_movi_tl(s->A0, -1);
3998                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3999                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
4000                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4001                 gen_op_update1_cc(s);
4002                 set_cc_op(s, CC_OP_BMILGB + ot);
4003                 break;
4004 
4005             case 0x3f6: /* mulx By, Gy, rdx, Ey */
4006                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4007                     || !(s->prefix & PREFIX_VEX)
4008                     || s->vex_l != 0) {
4009                     goto illegal_op;
4010                 }
4011                 ot = mo_64_32(s->dflag);
4012                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4013                 switch (ot) {
4014                 default:
4015                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4016                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
4017                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4018                                       s->tmp2_i32, s->tmp3_i32);
4019                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
4020                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
4021                     break;
4022 #ifdef TARGET_X86_64
4023                 case MO_64:
4024                     tcg_gen_mulu2_i64(s->T0, s->T1,
4025                                       s->T0, cpu_regs[R_EDX]);
4026                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
4027                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
4028                     break;
4029 #endif
4030                 }
4031                 break;
4032 
4033             case 0x3f5: /* pdep Gy, By, Ey */
4034                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4035                     || !(s->prefix & PREFIX_VEX)
4036                     || s->vex_l != 0) {
4037                     goto illegal_op;
4038                 }
4039                 ot = mo_64_32(s->dflag);
4040                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4041                 /* Note that by zero-extending the source operand, we
4042                    automatically handle zero-extending the result.  */
4043                 if (ot == MO_64) {
4044                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4045                 } else {
4046                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4047                 }
4048                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
4049                 break;
4050 
4051             case 0x2f5: /* pext Gy, By, Ey */
4052                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4053                     || !(s->prefix & PREFIX_VEX)
4054                     || s->vex_l != 0) {
4055                     goto illegal_op;
4056                 }
4057                 ot = mo_64_32(s->dflag);
4058                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4059                 /* Note that by zero-extending the source operand, we
4060                    automatically handle zero-extending the result.  */
4061                 if (ot == MO_64) {
4062                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4063                 } else {
4064                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4065                 }
4066                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
4067                 break;
4068 
4069             case 0x1f6: /* adcx Gy, Ey */
4070             case 0x2f6: /* adox Gy, Ey */
4071                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
4072                     goto illegal_op;
4073                 } else {
4074                     TCGv carry_in, carry_out, zero;
4075                     int end_op;
4076 
4077                     ot = mo_64_32(s->dflag);
4078                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4079 
4080                     /* Re-use the carry-out from a previous round.  */
4081                     carry_in = NULL;
4082                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
4083                     switch (s->cc_op) {
4084                     case CC_OP_ADCX:
4085                         if (b == 0x1f6) {
4086                             carry_in = cpu_cc_dst;
4087                             end_op = CC_OP_ADCX;
4088                         } else {
4089                             end_op = CC_OP_ADCOX;
4090                         }
4091                         break;
4092                     case CC_OP_ADOX:
4093                         if (b == 0x1f6) {
4094                             end_op = CC_OP_ADCOX;
4095                         } else {
4096                             carry_in = cpu_cc_src2;
4097                             end_op = CC_OP_ADOX;
4098                         }
4099                         break;
4100                     case CC_OP_ADCOX:
4101                         end_op = CC_OP_ADCOX;
4102                         carry_in = carry_out;
4103                         break;
4104                     default:
4105                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4106                         break;
4107                     }
4108                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4109                     if (!carry_in) {
4110                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4111                             gen_compute_eflags(s);
4112                         }
4113                         carry_in = s->tmp0;
4114                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4115                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4116                     }
4117 
4118                     switch (ot) {
4119 #ifdef TARGET_X86_64
4120                     case MO_32:
4121                         /* If we know TL is 64-bit, and we want a 32-bit
4122                            result, just do everything in 64-bit arithmetic.  */
4123                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4124                         tcg_gen_ext32u_i64(s->T0, s->T0);
4125                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4126                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4127                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4128                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4129                         break;
4130 #endif
4131                     default:
4132                         /* Otherwise compute the carry-out in two steps.  */
4133                         zero = tcg_const_tl(0);
4134                         tcg_gen_add2_tl(s->T0, carry_out,
4135                                         s->T0, zero,
4136                                         carry_in, zero);
4137                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4138                                         cpu_regs[reg], carry_out,
4139                                         s->T0, zero);
4140                         tcg_temp_free(zero);
4141                         break;
4142                     }
4143                     set_cc_op(s, end_op);
4144                 }
4145                 break;
4146 
4147             case 0x1f7: /* shlx Gy, Ey, By */
4148             case 0x2f7: /* sarx Gy, Ey, By */
4149             case 0x3f7: /* shrx Gy, Ey, By */
4150                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4151                     || !(s->prefix & PREFIX_VEX)
4152                     || s->vex_l != 0) {
4153                     goto illegal_op;
4154                 }
4155                 ot = mo_64_32(s->dflag);
4156                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4157                 if (ot == MO_64) {
4158                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4159                 } else {
4160                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4161                 }
4162                 if (b == 0x1f7) {
4163                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4164                 } else if (b == 0x2f7) {
4165                     if (ot != MO_64) {
4166                         tcg_gen_ext32s_tl(s->T0, s->T0);
4167                     }
4168                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4169                 } else {
4170                     if (ot != MO_64) {
4171                         tcg_gen_ext32u_tl(s->T0, s->T0);
4172                     }
4173                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4174                 }
4175                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4176                 break;
4177 
4178             case 0x0f3:
4179             case 0x1f3:
4180             case 0x2f3:
4181             case 0x3f3: /* Group 17 */
4182                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4183                     || !(s->prefix & PREFIX_VEX)
4184                     || s->vex_l != 0) {
4185                     goto illegal_op;
4186                 }
4187                 ot = mo_64_32(s->dflag);
4188                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4189 
4190                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4191                 switch (reg & 7) {
4192                 case 1: /* blsr By,Ey */
4193                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4194                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4195                     break;
4196                 case 2: /* blsmsk By,Ey */
4197                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4198                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4199                     break;
4200                 case 3: /* blsi By, Ey */
4201                     tcg_gen_neg_tl(s->T1, s->T0);
4202                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4203                     break;
4204                 default:
4205                     goto unknown_op;
4206                 }
4207                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4208                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4209                 set_cc_op(s, CC_OP_BMILGB + ot);
4210                 break;
4211 
4212             default:
4213                 goto unknown_op;
4214             }
4215             break;
4216 
4217         case 0x03a:
4218         case 0x13a:
4219             b = modrm;
4220             modrm = x86_ldub_code(env, s);
4221             rm = modrm & 7;
4222             reg = ((modrm >> 3) & 7) | REX_R(s);
4223             mod = (modrm >> 6) & 3;
4224             if (b1 >= 2) {
4225                 goto unknown_op;
4226             }
4227 
4228             sse_fn_eppi = sse_op_table7[b].op[b1];
4229             if (!sse_fn_eppi) {
4230                 goto unknown_op;
4231             }
4232             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4233                 goto illegal_op;
4234 
4235             s->rip_offset = 1;
4236 
4237             if (sse_fn_eppi == SSE_SPECIAL) {
4238                 ot = mo_64_32(s->dflag);
4239                 rm = (modrm & 7) | REX_B(s);
4240                 if (mod != 3)
4241                     gen_lea_modrm(env, s, modrm);
4242                 reg = ((modrm >> 3) & 7) | REX_R(s);
4243                 val = x86_ldub_code(env, s);
4244                 switch (b) {
4245                 case 0x14: /* pextrb */
4246                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4247                                             xmm_regs[reg].ZMM_B(val & 15)));
4248                     if (mod == 3) {
4249                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4250                     } else {
4251                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4252                                            s->mem_index, MO_UB);
4253                     }
4254                     break;
4255                 case 0x15: /* pextrw */
4256                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4257                                             xmm_regs[reg].ZMM_W(val & 7)));
4258                     if (mod == 3) {
4259                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4260                     } else {
4261                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4262                                            s->mem_index, MO_LEUW);
4263                     }
4264                     break;
4265                 case 0x16:
4266                     if (ot == MO_32) { /* pextrd */
4267                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4268                                         offsetof(CPUX86State,
4269                                                 xmm_regs[reg].ZMM_L(val & 3)));
4270                         if (mod == 3) {
4271                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4272                         } else {
4273                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4274                                                 s->mem_index, MO_LEUL);
4275                         }
4276                     } else { /* pextrq */
4277 #ifdef TARGET_X86_64
4278                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4279                                         offsetof(CPUX86State,
4280                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4281                         if (mod == 3) {
4282                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4283                         } else {
4284                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4285                                                 s->mem_index, MO_LEQ);
4286                         }
4287 #else
4288                         goto illegal_op;
4289 #endif
4290                     }
4291                     break;
4292                 case 0x17: /* extractps */
4293                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4294                                             xmm_regs[reg].ZMM_L(val & 3)));
4295                     if (mod == 3) {
4296                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4297                     } else {
4298                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4299                                            s->mem_index, MO_LEUL);
4300                     }
4301                     break;
4302                 case 0x20: /* pinsrb */
4303                     if (mod == 3) {
4304                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4305                     } else {
4306                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4307                                            s->mem_index, MO_UB);
4308                     }
4309                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4310                                             xmm_regs[reg].ZMM_B(val & 15)));
4311                     break;
4312                 case 0x21: /* insertps */
4313                     if (mod == 3) {
4314                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4315                                         offsetof(CPUX86State,xmm_regs[rm]
4316                                                 .ZMM_L((val >> 6) & 3)));
4317                     } else {
4318                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4319                                             s->mem_index, MO_LEUL);
4320                     }
4321                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4322                                     offsetof(CPUX86State,xmm_regs[reg]
4323                                             .ZMM_L((val >> 4) & 3)));
4324                     if ((val >> 0) & 1)
4325                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4326                                         cpu_env, offsetof(CPUX86State,
4327                                                 xmm_regs[reg].ZMM_L(0)));
4328                     if ((val >> 1) & 1)
4329                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4330                                         cpu_env, offsetof(CPUX86State,
4331                                                 xmm_regs[reg].ZMM_L(1)));
4332                     if ((val >> 2) & 1)
4333                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4334                                         cpu_env, offsetof(CPUX86State,
4335                                                 xmm_regs[reg].ZMM_L(2)));
4336                     if ((val >> 3) & 1)
4337                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4338                                         cpu_env, offsetof(CPUX86State,
4339                                                 xmm_regs[reg].ZMM_L(3)));
4340                     break;
4341                 case 0x22:
4342                     if (ot == MO_32) { /* pinsrd */
4343                         if (mod == 3) {
4344                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4345                         } else {
4346                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4347                                                 s->mem_index, MO_LEUL);
4348                         }
4349                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4350                                         offsetof(CPUX86State,
4351                                                 xmm_regs[reg].ZMM_L(val & 3)));
4352                     } else { /* pinsrq */
4353 #ifdef TARGET_X86_64
4354                         if (mod == 3) {
4355                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4356                         } else {
4357                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4358                                                 s->mem_index, MO_LEQ);
4359                         }
4360                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4361                                         offsetof(CPUX86State,
4362                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4363 #else
4364                         goto illegal_op;
4365 #endif
4366                     }
4367                     break;
4368                 }
4369                 return;
4370             }
4371 
4372             if (b1) {
4373                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4374                 if (mod == 3) {
4375                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4376                 } else {
4377                     op2_offset = offsetof(CPUX86State,xmm_t0);
4378                     gen_lea_modrm(env, s, modrm);
4379                     gen_ldo_env_A0(s, op2_offset);
4380                 }
4381             } else {
4382                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4383                 if (mod == 3) {
4384                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4385                 } else {
4386                     op2_offset = offsetof(CPUX86State,mmx_t0);
4387                     gen_lea_modrm(env, s, modrm);
4388                     gen_ldq_env_A0(s, op2_offset);
4389                 }
4390             }
4391             val = x86_ldub_code(env, s);
4392 
4393             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4394                 set_cc_op(s, CC_OP_EFLAGS);
4395 
4396                 if (s->dflag == MO_64) {
4397                     /* The helper must use entire 64-bit gp registers */
4398                     val |= 1 << 8;
4399                 }
4400             }
4401 
4402             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4403             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4404             sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4405             break;
4406 
4407         case 0x33a:
4408             /* Various integer extensions at 0f 3a f[0-f].  */
4409             b = modrm | (b1 << 8);
4410             modrm = x86_ldub_code(env, s);
4411             reg = ((modrm >> 3) & 7) | REX_R(s);
4412 
4413             switch (b) {
4414             case 0x3f0: /* rorx Gy,Ey, Ib */
4415                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4416                     || !(s->prefix & PREFIX_VEX)
4417                     || s->vex_l != 0) {
4418                     goto illegal_op;
4419                 }
4420                 ot = mo_64_32(s->dflag);
4421                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4422                 b = x86_ldub_code(env, s);
4423                 if (ot == MO_64) {
4424                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4425                 } else {
4426                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4427                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4428                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4429                 }
4430                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4431                 break;
4432 
4433             default:
4434                 goto unknown_op;
4435             }
4436             break;
4437 
4438         default:
4439         unknown_op:
4440             gen_unknown_opcode(env, s);
4441             return;
4442         }
4443     } else {
4444         /* generic MMX or SSE operation */
4445         switch(b) {
4446         case 0x70: /* pshufx insn */
4447         case 0xc6: /* pshufx insn */
4448         case 0xc2: /* compare insns */
4449             s->rip_offset = 1;
4450             break;
4451         default:
4452             break;
4453         }
4454         if (is_xmm) {
4455             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4456             if (mod != 3) {
4457                 int sz = 4;
4458 
4459                 gen_lea_modrm(env, s, modrm);
4460                 op2_offset = offsetof(CPUX86State,xmm_t0);
4461 
4462                 switch (b) {
4463                 case 0x50 ... 0x5a:
4464                 case 0x5c ... 0x5f:
4465                 case 0xc2:
4466                     /* Most sse scalar operations.  */
4467                     if (b1 == 2) {
4468                         sz = 2;
4469                     } else if (b1 == 3) {
4470                         sz = 3;
4471                     }
4472                     break;
4473 
4474                 case 0x2e:  /* ucomis[sd] */
4475                 case 0x2f:  /* comis[sd] */
4476                     if (b1 == 0) {
4477                         sz = 2;
4478                     } else {
4479                         sz = 3;
4480                     }
4481                     break;
4482                 }
4483 
4484                 switch (sz) {
4485                 case 2:
4486                     /* 32 bit access */
4487                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4488                     tcg_gen_st32_tl(s->T0, cpu_env,
4489                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4490                     break;
4491                 case 3:
4492                     /* 64 bit access */
4493                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4494                     break;
4495                 default:
4496                     /* 128 bit access */
4497                     gen_ldo_env_A0(s, op2_offset);
4498                     break;
4499                 }
4500             } else {
4501                 rm = (modrm & 7) | REX_B(s);
4502                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4503             }
4504         } else {
4505             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4506             if (mod != 3) {
4507                 gen_lea_modrm(env, s, modrm);
4508                 op2_offset = offsetof(CPUX86State,mmx_t0);
4509                 gen_ldq_env_A0(s, op2_offset);
4510             } else {
4511                 rm = (modrm & 7);
4512                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4513             }
4514         }
4515         switch(b) {
4516         case 0x0f: /* 3DNow! data insns */
4517             val = x86_ldub_code(env, s);
4518             sse_fn_epp = sse_op_table5[val];
4519             if (!sse_fn_epp) {
4520                 goto unknown_op;
4521             }
4522             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4523                 goto illegal_op;
4524             }
4525             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4526             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4527             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4528             break;
4529         case 0x70: /* pshufx insn */
4530         case 0xc6: /* pshufx insn */
4531             val = x86_ldub_code(env, s);
4532             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4533             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4534             /* XXX: introduce a new table? */
4535             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4536             sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4537             break;
4538         case 0xc2:
4539             /* compare insns */
4540             val = x86_ldub_code(env, s);
4541             if (val >= 8)
4542                 goto unknown_op;
4543             sse_fn_epp = sse_op_table4[val][b1];
4544 
4545             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4546             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4547             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4548             break;
4549         case 0xf7:
4550             /* maskmov : we must prepare A0 */
4551             if (mod != 3)
4552                 goto illegal_op;
4553             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4554             gen_extu(s->aflag, s->A0);
4555             gen_add_A0_ds_seg(s);
4556 
4557             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4558             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4559             /* XXX: introduce a new table? */
4560             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4561             sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4562             break;
4563         default:
4564             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4565             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4566             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4567             break;
4568         }
4569         if (b == 0x2e || b == 0x2f) {
4570             set_cc_op(s, CC_OP_EFLAGS);
4571         }
4572     }
4573 }
4574 
4575 /* convert one instruction. s->base.is_jmp is set if the translation must
4576    be stopped. Return the next pc value */
4577 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4578 {
4579     CPUX86State *env = cpu->env_ptr;
4580     int b, prefixes;
4581     int shift;
4582     MemOp ot, aflag, dflag;
4583     int modrm, reg, rm, mod, op, opreg, val;
4584     target_ulong next_eip, tval;
4585     target_ulong pc_start = s->base.pc_next;
4586 
4587     s->pc_start = s->pc = pc_start;
4588     s->override = -1;
4589 #ifdef TARGET_X86_64
4590     s->rex_w = false;
4591     s->rex_r = 0;
4592     s->rex_x = 0;
4593     s->rex_b = 0;
4594 #endif
4595     s->rip_offset = 0; /* for relative ip address */
4596     s->vex_l = 0;
4597     s->vex_v = 0;
4598     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4599         gen_exception_gpf(s);
4600         return s->pc;
4601     }
4602 
4603     prefixes = 0;
4604 
4605  next_byte:
4606     b = x86_ldub_code(env, s);
4607     /* Collect prefixes.  */
4608     switch (b) {
4609     case 0xf3:
4610         prefixes |= PREFIX_REPZ;
4611         goto next_byte;
4612     case 0xf2:
4613         prefixes |= PREFIX_REPNZ;
4614         goto next_byte;
4615     case 0xf0:
4616         prefixes |= PREFIX_LOCK;
4617         goto next_byte;
4618     case 0x2e:
4619         s->override = R_CS;
4620         goto next_byte;
4621     case 0x36:
4622         s->override = R_SS;
4623         goto next_byte;
4624     case 0x3e:
4625         s->override = R_DS;
4626         goto next_byte;
4627     case 0x26:
4628         s->override = R_ES;
4629         goto next_byte;
4630     case 0x64:
4631         s->override = R_FS;
4632         goto next_byte;
4633     case 0x65:
4634         s->override = R_GS;
4635         goto next_byte;
4636     case 0x66:
4637         prefixes |= PREFIX_DATA;
4638         goto next_byte;
4639     case 0x67:
4640         prefixes |= PREFIX_ADR;
4641         goto next_byte;
4642 #ifdef TARGET_X86_64
4643     case 0x40 ... 0x4f:
4644         if (CODE64(s)) {
4645             /* REX prefix */
4646             prefixes |= PREFIX_REX;
4647             s->rex_w = (b >> 3) & 1;
4648             s->rex_r = (b & 0x4) << 1;
4649             s->rex_x = (b & 0x2) << 2;
4650             s->rex_b = (b & 0x1) << 3;
4651             goto next_byte;
4652         }
4653         break;
4654 #endif
4655     case 0xc5: /* 2-byte VEX */
4656     case 0xc4: /* 3-byte VEX */
4657         /* VEX prefixes cannot be used except in 32-bit mode.
4658            Otherwise the instruction is LES or LDS.  */
4659         if (CODE32(s) && !VM86(s)) {
4660             static const int pp_prefix[4] = {
4661                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4662             };
4663             int vex3, vex2 = x86_ldub_code(env, s);
4664 
4665             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4666                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4667                    otherwise the instruction is LES or LDS.  */
4668                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4669                 break;
4670             }
4671 
4672             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4673             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4674                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
4675                 goto illegal_op;
4676             }
4677 #ifdef TARGET_X86_64
4678             s->rex_r = (~vex2 >> 4) & 8;
4679 #endif
4680             if (b == 0xc5) {
4681                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4682                 vex3 = vex2;
4683                 b = x86_ldub_code(env, s) | 0x100;
4684             } else {
4685                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4686                 vex3 = x86_ldub_code(env, s);
4687 #ifdef TARGET_X86_64
4688                 s->rex_x = (~vex2 >> 3) & 8;
4689                 s->rex_b = (~vex2 >> 2) & 8;
4690                 s->rex_w = (vex3 >> 7) & 1;
4691 #endif
4692                 switch (vex2 & 0x1f) {
4693                 case 0x01: /* Implied 0f leading opcode bytes.  */
4694                     b = x86_ldub_code(env, s) | 0x100;
4695                     break;
4696                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4697                     b = 0x138;
4698                     break;
4699                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4700                     b = 0x13a;
4701                     break;
4702                 default:   /* Reserved for future use.  */
4703                     goto unknown_op;
4704                 }
4705             }
4706             s->vex_v = (~vex3 >> 3) & 0xf;
4707             s->vex_l = (vex3 >> 2) & 1;
4708             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4709         }
4710         break;
4711     }
4712 
4713     /* Post-process prefixes.  */
4714     if (CODE64(s)) {
4715         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4716            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4717            over 0x66 if both are present.  */
4718         dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4719         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4720         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4721     } else {
4722         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4723         if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
4724             dflag = MO_32;
4725         } else {
4726             dflag = MO_16;
4727         }
4728         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4729         if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
4730             aflag = MO_32;
4731         }  else {
4732             aflag = MO_16;
4733         }
4734     }
4735 
4736     s->prefix = prefixes;
4737     s->aflag = aflag;
4738     s->dflag = dflag;
4739 
4740     /* now check op code */
4741  reswitch:
4742     switch(b) {
4743     case 0x0f:
4744         /**************************/
4745         /* extended op code */
4746         b = x86_ldub_code(env, s) | 0x100;
4747         goto reswitch;
4748 
4749         /**************************/
4750         /* arith & logic */
4751     case 0x00 ... 0x05:
4752     case 0x08 ... 0x0d:
4753     case 0x10 ... 0x15:
4754     case 0x18 ... 0x1d:
4755     case 0x20 ... 0x25:
4756     case 0x28 ... 0x2d:
4757     case 0x30 ... 0x35:
4758     case 0x38 ... 0x3d:
4759         {
4760             int op, f, val;
4761             op = (b >> 3) & 7;
4762             f = (b >> 1) & 3;
4763 
4764             ot = mo_b_d(b, dflag);
4765 
4766             switch(f) {
4767             case 0: /* OP Ev, Gv */
4768                 modrm = x86_ldub_code(env, s);
4769                 reg = ((modrm >> 3) & 7) | REX_R(s);
4770                 mod = (modrm >> 6) & 3;
4771                 rm = (modrm & 7) | REX_B(s);
4772                 if (mod != 3) {
4773                     gen_lea_modrm(env, s, modrm);
4774                     opreg = OR_TMP0;
4775                 } else if (op == OP_XORL && rm == reg) {
4776                 xor_zero:
4777                     /* xor reg, reg optimisation */
4778                     set_cc_op(s, CC_OP_CLR);
4779                     tcg_gen_movi_tl(s->T0, 0);
4780                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4781                     break;
4782                 } else {
4783                     opreg = rm;
4784                 }
4785                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4786                 gen_op(s, op, ot, opreg);
4787                 break;
4788             case 1: /* OP Gv, Ev */
4789                 modrm = x86_ldub_code(env, s);
4790                 mod = (modrm >> 6) & 3;
4791                 reg = ((modrm >> 3) & 7) | REX_R(s);
4792                 rm = (modrm & 7) | REX_B(s);
4793                 if (mod != 3) {
4794                     gen_lea_modrm(env, s, modrm);
4795                     gen_op_ld_v(s, ot, s->T1, s->A0);
4796                 } else if (op == OP_XORL && rm == reg) {
4797                     goto xor_zero;
4798                 } else {
4799                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4800                 }
4801                 gen_op(s, op, ot, reg);
4802                 break;
4803             case 2: /* OP A, Iv */
4804                 val = insn_get(env, s, ot);
4805                 tcg_gen_movi_tl(s->T1, val);
4806                 gen_op(s, op, ot, OR_EAX);
4807                 break;
4808             }
4809         }
4810         break;
4811 
4812     case 0x82:
4813         if (CODE64(s))
4814             goto illegal_op;
4815         /* fall through */
4816     case 0x80: /* GRP1 */
4817     case 0x81:
4818     case 0x83:
4819         {
4820             int val;
4821 
4822             ot = mo_b_d(b, dflag);
4823 
4824             modrm = x86_ldub_code(env, s);
4825             mod = (modrm >> 6) & 3;
4826             rm = (modrm & 7) | REX_B(s);
4827             op = (modrm >> 3) & 7;
4828 
4829             if (mod != 3) {
4830                 if (b == 0x83)
4831                     s->rip_offset = 1;
4832                 else
4833                     s->rip_offset = insn_const_size(ot);
4834                 gen_lea_modrm(env, s, modrm);
4835                 opreg = OR_TMP0;
4836             } else {
4837                 opreg = rm;
4838             }
4839 
4840             switch(b) {
4841             default:
4842             case 0x80:
4843             case 0x81:
4844             case 0x82:
4845                 val = insn_get(env, s, ot);
4846                 break;
4847             case 0x83:
4848                 val = (int8_t)insn_get(env, s, MO_8);
4849                 break;
4850             }
4851             tcg_gen_movi_tl(s->T1, val);
4852             gen_op(s, op, ot, opreg);
4853         }
4854         break;
4855 
4856         /**************************/
4857         /* inc, dec, and other misc arith */
4858     case 0x40 ... 0x47: /* inc Gv */
4859         ot = dflag;
4860         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4861         break;
4862     case 0x48 ... 0x4f: /* dec Gv */
4863         ot = dflag;
4864         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4865         break;
4866     case 0xf6: /* GRP3 */
4867     case 0xf7:
4868         ot = mo_b_d(b, dflag);
4869 
4870         modrm = x86_ldub_code(env, s);
4871         mod = (modrm >> 6) & 3;
4872         rm = (modrm & 7) | REX_B(s);
4873         op = (modrm >> 3) & 7;
4874         if (mod != 3) {
4875             if (op == 0) {
4876                 s->rip_offset = insn_const_size(ot);
4877             }
4878             gen_lea_modrm(env, s, modrm);
4879             /* For those below that handle locked memory, don't load here.  */
4880             if (!(s->prefix & PREFIX_LOCK)
4881                 || op != 2) {
4882                 gen_op_ld_v(s, ot, s->T0, s->A0);
4883             }
4884         } else {
4885             gen_op_mov_v_reg(s, ot, s->T0, rm);
4886         }
4887 
4888         switch(op) {
4889         case 0: /* test */
4890             val = insn_get(env, s, ot);
4891             tcg_gen_movi_tl(s->T1, val);
4892             gen_op_testl_T0_T1_cc(s);
4893             set_cc_op(s, CC_OP_LOGICB + ot);
4894             break;
4895         case 2: /* not */
4896             if (s->prefix & PREFIX_LOCK) {
4897                 if (mod == 3) {
4898                     goto illegal_op;
4899                 }
4900                 tcg_gen_movi_tl(s->T0, ~0);
4901                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4902                                             s->mem_index, ot | MO_LE);
4903             } else {
4904                 tcg_gen_not_tl(s->T0, s->T0);
4905                 if (mod != 3) {
4906                     gen_op_st_v(s, ot, s->T0, s->A0);
4907                 } else {
4908                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4909                 }
4910             }
4911             break;
4912         case 3: /* neg */
4913             if (s->prefix & PREFIX_LOCK) {
4914                 TCGLabel *label1;
4915                 TCGv a0, t0, t1, t2;
4916 
4917                 if (mod == 3) {
4918                     goto illegal_op;
4919                 }
4920                 a0 = tcg_temp_local_new();
4921                 t0 = tcg_temp_local_new();
4922                 label1 = gen_new_label();
4923 
4924                 tcg_gen_mov_tl(a0, s->A0);
4925                 tcg_gen_mov_tl(t0, s->T0);
4926 
4927                 gen_set_label(label1);
4928                 t1 = tcg_temp_new();
4929                 t2 = tcg_temp_new();
4930                 tcg_gen_mov_tl(t2, t0);
4931                 tcg_gen_neg_tl(t1, t0);
4932                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4933                                           s->mem_index, ot | MO_LE);
4934                 tcg_temp_free(t1);
4935                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4936 
4937                 tcg_temp_free(t2);
4938                 tcg_temp_free(a0);
4939                 tcg_gen_mov_tl(s->T0, t0);
4940                 tcg_temp_free(t0);
4941             } else {
4942                 tcg_gen_neg_tl(s->T0, s->T0);
4943                 if (mod != 3) {
4944                     gen_op_st_v(s, ot, s->T0, s->A0);
4945                 } else {
4946                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4947                 }
4948             }
4949             gen_op_update_neg_cc(s);
4950             set_cc_op(s, CC_OP_SUBB + ot);
4951             break;
4952         case 4: /* mul */
4953             switch(ot) {
4954             case MO_8:
4955                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4956                 tcg_gen_ext8u_tl(s->T0, s->T0);
4957                 tcg_gen_ext8u_tl(s->T1, s->T1);
4958                 /* XXX: use 32 bit mul which could be faster */
4959                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4960                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4961                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4962                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4963                 set_cc_op(s, CC_OP_MULB);
4964                 break;
4965             case MO_16:
4966                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4967                 tcg_gen_ext16u_tl(s->T0, s->T0);
4968                 tcg_gen_ext16u_tl(s->T1, s->T1);
4969                 /* XXX: use 32 bit mul which could be faster */
4970                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4971                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4972                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4973                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4974                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4975                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4976                 set_cc_op(s, CC_OP_MULW);
4977                 break;
4978             default:
4979             case MO_32:
4980                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4981                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4982                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4983                                   s->tmp2_i32, s->tmp3_i32);
4984                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4985                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4986                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4987                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4988                 set_cc_op(s, CC_OP_MULL);
4989                 break;
4990 #ifdef TARGET_X86_64
4991             case MO_64:
4992                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4993                                   s->T0, cpu_regs[R_EAX]);
4994                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4995                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4996                 set_cc_op(s, CC_OP_MULQ);
4997                 break;
4998 #endif
4999             }
5000             break;
5001         case 5: /* imul */
5002             switch(ot) {
5003             case MO_8:
5004                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
5005                 tcg_gen_ext8s_tl(s->T0, s->T0);
5006                 tcg_gen_ext8s_tl(s->T1, s->T1);
5007                 /* XXX: use 32 bit mul which could be faster */
5008                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5009                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5010                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5011                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
5012                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5013                 set_cc_op(s, CC_OP_MULB);
5014                 break;
5015             case MO_16:
5016                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
5017                 tcg_gen_ext16s_tl(s->T0, s->T0);
5018                 tcg_gen_ext16s_tl(s->T1, s->T1);
5019                 /* XXX: use 32 bit mul which could be faster */
5020                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5021                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5022                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5023                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
5024                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5025                 tcg_gen_shri_tl(s->T0, s->T0, 16);
5026                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5027                 set_cc_op(s, CC_OP_MULW);
5028                 break;
5029             default:
5030             case MO_32:
5031                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5032                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5033                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5034                                   s->tmp2_i32, s->tmp3_i32);
5035                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5036                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5037                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5038                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5039                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5040                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5041                 set_cc_op(s, CC_OP_MULL);
5042                 break;
5043 #ifdef TARGET_X86_64
5044             case MO_64:
5045                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5046                                   s->T0, cpu_regs[R_EAX]);
5047                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5048                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
5049                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
5050                 set_cc_op(s, CC_OP_MULQ);
5051                 break;
5052 #endif
5053             }
5054             break;
5055         case 6: /* div */
5056             switch(ot) {
5057             case MO_8:
5058                 gen_helper_divb_AL(cpu_env, s->T0);
5059                 break;
5060             case MO_16:
5061                 gen_helper_divw_AX(cpu_env, s->T0);
5062                 break;
5063             default:
5064             case MO_32:
5065                 gen_helper_divl_EAX(cpu_env, s->T0);
5066                 break;
5067 #ifdef TARGET_X86_64
5068             case MO_64:
5069                 gen_helper_divq_EAX(cpu_env, s->T0);
5070                 break;
5071 #endif
5072             }
5073             break;
5074         case 7: /* idiv */
5075             switch(ot) {
5076             case MO_8:
5077                 gen_helper_idivb_AL(cpu_env, s->T0);
5078                 break;
5079             case MO_16:
5080                 gen_helper_idivw_AX(cpu_env, s->T0);
5081                 break;
5082             default:
5083             case MO_32:
5084                 gen_helper_idivl_EAX(cpu_env, s->T0);
5085                 break;
5086 #ifdef TARGET_X86_64
5087             case MO_64:
5088                 gen_helper_idivq_EAX(cpu_env, s->T0);
5089                 break;
5090 #endif
5091             }
5092             break;
5093         default:
5094             goto unknown_op;
5095         }
5096         break;
5097 
5098     case 0xfe: /* GRP4 */
5099     case 0xff: /* GRP5 */
5100         ot = mo_b_d(b, dflag);
5101 
5102         modrm = x86_ldub_code(env, s);
5103         mod = (modrm >> 6) & 3;
5104         rm = (modrm & 7) | REX_B(s);
5105         op = (modrm >> 3) & 7;
5106         if (op >= 2 && b == 0xfe) {
5107             goto unknown_op;
5108         }
5109         if (CODE64(s)) {
5110             if (op == 2 || op == 4) {
5111                 /* operand size for jumps is 64 bit */
5112                 ot = MO_64;
5113             } else if (op == 3 || op == 5) {
5114                 ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
5115             } else if (op == 6) {
5116                 /* default push size is 64 bit */
5117                 ot = mo_pushpop(s, dflag);
5118             }
5119         }
5120         if (mod != 3) {
5121             gen_lea_modrm(env, s, modrm);
5122             if (op >= 2 && op != 3 && op != 5)
5123                 gen_op_ld_v(s, ot, s->T0, s->A0);
5124         } else {
5125             gen_op_mov_v_reg(s, ot, s->T0, rm);
5126         }
5127 
5128         switch(op) {
5129         case 0: /* inc Ev */
5130             if (mod != 3)
5131                 opreg = OR_TMP0;
5132             else
5133                 opreg = rm;
5134             gen_inc(s, ot, opreg, 1);
5135             break;
5136         case 1: /* dec Ev */
5137             if (mod != 3)
5138                 opreg = OR_TMP0;
5139             else
5140                 opreg = rm;
5141             gen_inc(s, ot, opreg, -1);
5142             break;
5143         case 2: /* call Ev */
5144             /* XXX: optimize if memory (no 'and' is necessary) */
5145             if (dflag == MO_16) {
5146                 tcg_gen_ext16u_tl(s->T0, s->T0);
5147             }
5148             next_eip = s->pc - s->cs_base;
5149             tcg_gen_movi_tl(s->T1, next_eip);
5150             gen_push_v(s, s->T1);
5151             gen_op_jmp_v(s->T0);
5152             gen_bnd_jmp(s);
5153             gen_jr(s, s->T0);
5154             break;
5155         case 3: /* lcall Ev */
5156             if (mod == 3) {
5157                 goto illegal_op;
5158             }
5159             gen_op_ld_v(s, ot, s->T1, s->A0);
5160             gen_add_A0_im(s, 1 << ot);
5161             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5162         do_lcall:
5163             if (PE(s) && !VM86(s)) {
5164                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5165                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5166                                            tcg_const_i32(dflag - 1),
5167                                            tcg_const_tl(s->pc - s->cs_base));
5168             } else {
5169                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5170                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5171                                       tcg_const_i32(dflag - 1),
5172                                       tcg_const_i32(s->pc - s->cs_base));
5173             }
5174             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5175             gen_jr(s, s->tmp4);
5176             break;
5177         case 4: /* jmp Ev */
5178             if (dflag == MO_16) {
5179                 tcg_gen_ext16u_tl(s->T0, s->T0);
5180             }
5181             gen_op_jmp_v(s->T0);
5182             gen_bnd_jmp(s);
5183             gen_jr(s, s->T0);
5184             break;
5185         case 5: /* ljmp Ev */
5186             if (mod == 3) {
5187                 goto illegal_op;
5188             }
5189             gen_op_ld_v(s, ot, s->T1, s->A0);
5190             gen_add_A0_im(s, 1 << ot);
5191             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5192         do_ljmp:
5193             if (PE(s) && !VM86(s)) {
5194                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5195                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5196                                           tcg_const_tl(s->pc - s->cs_base));
5197             } else {
5198                 gen_op_movl_seg_T0_vm(s, R_CS);
5199                 gen_op_jmp_v(s->T1);
5200             }
5201             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5202             gen_jr(s, s->tmp4);
5203             break;
5204         case 6: /* push Ev */
5205             gen_push_v(s, s->T0);
5206             break;
5207         default:
5208             goto unknown_op;
5209         }
5210         break;
5211 
5212     case 0x84: /* test Ev, Gv */
5213     case 0x85:
5214         ot = mo_b_d(b, dflag);
5215 
5216         modrm = x86_ldub_code(env, s);
5217         reg = ((modrm >> 3) & 7) | REX_R(s);
5218 
5219         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5220         gen_op_mov_v_reg(s, ot, s->T1, reg);
5221         gen_op_testl_T0_T1_cc(s);
5222         set_cc_op(s, CC_OP_LOGICB + ot);
5223         break;
5224 
5225     case 0xa8: /* test eAX, Iv */
5226     case 0xa9:
5227         ot = mo_b_d(b, dflag);
5228         val = insn_get(env, s, ot);
5229 
5230         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5231         tcg_gen_movi_tl(s->T1, val);
5232         gen_op_testl_T0_T1_cc(s);
5233         set_cc_op(s, CC_OP_LOGICB + ot);
5234         break;
5235 
5236     case 0x98: /* CWDE/CBW */
5237         switch (dflag) {
5238 #ifdef TARGET_X86_64
5239         case MO_64:
5240             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5241             tcg_gen_ext32s_tl(s->T0, s->T0);
5242             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5243             break;
5244 #endif
5245         case MO_32:
5246             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5247             tcg_gen_ext16s_tl(s->T0, s->T0);
5248             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5249             break;
5250         case MO_16:
5251             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5252             tcg_gen_ext8s_tl(s->T0, s->T0);
5253             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5254             break;
5255         default:
5256             tcg_abort();
5257         }
5258         break;
5259     case 0x99: /* CDQ/CWD */
5260         switch (dflag) {
5261 #ifdef TARGET_X86_64
5262         case MO_64:
5263             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5264             tcg_gen_sari_tl(s->T0, s->T0, 63);
5265             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5266             break;
5267 #endif
5268         case MO_32:
5269             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5270             tcg_gen_ext32s_tl(s->T0, s->T0);
5271             tcg_gen_sari_tl(s->T0, s->T0, 31);
5272             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5273             break;
5274         case MO_16:
5275             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5276             tcg_gen_ext16s_tl(s->T0, s->T0);
5277             tcg_gen_sari_tl(s->T0, s->T0, 15);
5278             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5279             break;
5280         default:
5281             tcg_abort();
5282         }
5283         break;
5284     case 0x1af: /* imul Gv, Ev */
5285     case 0x69: /* imul Gv, Ev, I */
5286     case 0x6b:
5287         ot = dflag;
5288         modrm = x86_ldub_code(env, s);
5289         reg = ((modrm >> 3) & 7) | REX_R(s);
5290         if (b == 0x69)
5291             s->rip_offset = insn_const_size(ot);
5292         else if (b == 0x6b)
5293             s->rip_offset = 1;
5294         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5295         if (b == 0x69) {
5296             val = insn_get(env, s, ot);
5297             tcg_gen_movi_tl(s->T1, val);
5298         } else if (b == 0x6b) {
5299             val = (int8_t)insn_get(env, s, MO_8);
5300             tcg_gen_movi_tl(s->T1, val);
5301         } else {
5302             gen_op_mov_v_reg(s, ot, s->T1, reg);
5303         }
5304         switch (ot) {
5305 #ifdef TARGET_X86_64
5306         case MO_64:
5307             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5308             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5309             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5310             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5311             break;
5312 #endif
5313         case MO_32:
5314             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5315             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5316             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5317                               s->tmp2_i32, s->tmp3_i32);
5318             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5319             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5320             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5321             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5322             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5323             break;
5324         default:
5325             tcg_gen_ext16s_tl(s->T0, s->T0);
5326             tcg_gen_ext16s_tl(s->T1, s->T1);
5327             /* XXX: use 32 bit mul which could be faster */
5328             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5329             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5330             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5331             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5332             gen_op_mov_reg_v(s, ot, reg, s->T0);
5333             break;
5334         }
5335         set_cc_op(s, CC_OP_MULB + ot);
5336         break;
5337     case 0x1c0:
5338     case 0x1c1: /* xadd Ev, Gv */
5339         ot = mo_b_d(b, dflag);
5340         modrm = x86_ldub_code(env, s);
5341         reg = ((modrm >> 3) & 7) | REX_R(s);
5342         mod = (modrm >> 6) & 3;
5343         gen_op_mov_v_reg(s, ot, s->T0, reg);
5344         if (mod == 3) {
5345             rm = (modrm & 7) | REX_B(s);
5346             gen_op_mov_v_reg(s, ot, s->T1, rm);
5347             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5348             gen_op_mov_reg_v(s, ot, reg, s->T1);
5349             gen_op_mov_reg_v(s, ot, rm, s->T0);
5350         } else {
5351             gen_lea_modrm(env, s, modrm);
5352             if (s->prefix & PREFIX_LOCK) {
5353                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5354                                             s->mem_index, ot | MO_LE);
5355                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5356             } else {
5357                 gen_op_ld_v(s, ot, s->T1, s->A0);
5358                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5359                 gen_op_st_v(s, ot, s->T0, s->A0);
5360             }
5361             gen_op_mov_reg_v(s, ot, reg, s->T1);
5362         }
5363         gen_op_update2_cc(s);
5364         set_cc_op(s, CC_OP_ADDB + ot);
5365         break;
5366     case 0x1b0:
5367     case 0x1b1: /* cmpxchg Ev, Gv */
5368         {
5369             TCGv oldv, newv, cmpv;
5370 
5371             ot = mo_b_d(b, dflag);
5372             modrm = x86_ldub_code(env, s);
5373             reg = ((modrm >> 3) & 7) | REX_R(s);
5374             mod = (modrm >> 6) & 3;
5375             oldv = tcg_temp_new();
5376             newv = tcg_temp_new();
5377             cmpv = tcg_temp_new();
5378             gen_op_mov_v_reg(s, ot, newv, reg);
5379             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5380 
5381             if (s->prefix & PREFIX_LOCK) {
5382                 if (mod == 3) {
5383                     goto illegal_op;
5384                 }
5385                 gen_lea_modrm(env, s, modrm);
5386                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5387                                           s->mem_index, ot | MO_LE);
5388                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5389             } else {
5390                 if (mod == 3) {
5391                     rm = (modrm & 7) | REX_B(s);
5392                     gen_op_mov_v_reg(s, ot, oldv, rm);
5393                 } else {
5394                     gen_lea_modrm(env, s, modrm);
5395                     gen_op_ld_v(s, ot, oldv, s->A0);
5396                     rm = 0; /* avoid warning */
5397                 }
5398                 gen_extu(ot, oldv);
5399                 gen_extu(ot, cmpv);
5400                 /* store value = (old == cmp ? new : old);  */
5401                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5402                 if (mod == 3) {
5403                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5404                     gen_op_mov_reg_v(s, ot, rm, newv);
5405                 } else {
5406                     /* Perform an unconditional store cycle like physical cpu;
5407                        must be before changing accumulator to ensure
5408                        idempotency if the store faults and the instruction
5409                        is restarted */
5410                     gen_op_st_v(s, ot, newv, s->A0);
5411                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5412                 }
5413             }
5414             tcg_gen_mov_tl(cpu_cc_src, oldv);
5415             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5416             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5417             set_cc_op(s, CC_OP_SUBB + ot);
5418             tcg_temp_free(oldv);
5419             tcg_temp_free(newv);
5420             tcg_temp_free(cmpv);
5421         }
5422         break;
5423     case 0x1c7: /* cmpxchg8b */
5424         modrm = x86_ldub_code(env, s);
5425         mod = (modrm >> 6) & 3;
5426         switch ((modrm >> 3) & 7) {
5427         case 1: /* CMPXCHG8, CMPXCHG16 */
5428             if (mod == 3) {
5429                 goto illegal_op;
5430             }
5431 #ifdef TARGET_X86_64
5432             if (dflag == MO_64) {
5433                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5434                     goto illegal_op;
5435                 }
5436                 gen_lea_modrm(env, s, modrm);
5437                 if ((s->prefix & PREFIX_LOCK) &&
5438                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5439                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5440                 } else {
5441                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5442                 }
5443                 set_cc_op(s, CC_OP_EFLAGS);
5444                 break;
5445             }
5446 #endif
5447             if (!(s->cpuid_features & CPUID_CX8)) {
5448                 goto illegal_op;
5449             }
5450             gen_lea_modrm(env, s, modrm);
5451             if ((s->prefix & PREFIX_LOCK) &&
5452                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5453                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5454             } else {
5455                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5456             }
5457             set_cc_op(s, CC_OP_EFLAGS);
5458             break;
5459 
5460         case 7: /* RDSEED */
5461         case 6: /* RDRAND */
5462             if (mod != 3 ||
5463                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5464                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5465                 goto illegal_op;
5466             }
5467             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5468                 gen_io_start();
5469             }
5470             gen_helper_rdrand(s->T0, cpu_env);
5471             rm = (modrm & 7) | REX_B(s);
5472             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5473             set_cc_op(s, CC_OP_EFLAGS);
5474             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5475                 gen_jmp(s, s->pc - s->cs_base);
5476             }
5477             break;
5478 
5479         default:
5480             goto illegal_op;
5481         }
5482         break;
5483 
5484         /**************************/
5485         /* push/pop */
5486     case 0x50 ... 0x57: /* push */
5487         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5488         gen_push_v(s, s->T0);
5489         break;
5490     case 0x58 ... 0x5f: /* pop */
5491         ot = gen_pop_T0(s);
5492         /* NOTE: order is important for pop %sp */
5493         gen_pop_update(s, ot);
5494         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5495         break;
5496     case 0x60: /* pusha */
5497         if (CODE64(s))
5498             goto illegal_op;
5499         gen_pusha(s);
5500         break;
5501     case 0x61: /* popa */
5502         if (CODE64(s))
5503             goto illegal_op;
5504         gen_popa(s);
5505         break;
5506     case 0x68: /* push Iv */
5507     case 0x6a:
5508         ot = mo_pushpop(s, dflag);
5509         if (b == 0x68)
5510             val = insn_get(env, s, ot);
5511         else
5512             val = (int8_t)insn_get(env, s, MO_8);
5513         tcg_gen_movi_tl(s->T0, val);
5514         gen_push_v(s, s->T0);
5515         break;
5516     case 0x8f: /* pop Ev */
5517         modrm = x86_ldub_code(env, s);
5518         mod = (modrm >> 6) & 3;
5519         ot = gen_pop_T0(s);
5520         if (mod == 3) {
5521             /* NOTE: order is important for pop %sp */
5522             gen_pop_update(s, ot);
5523             rm = (modrm & 7) | REX_B(s);
5524             gen_op_mov_reg_v(s, ot, rm, s->T0);
5525         } else {
5526             /* NOTE: order is important too for MMU exceptions */
5527             s->popl_esp_hack = 1 << ot;
5528             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5529             s->popl_esp_hack = 0;
5530             gen_pop_update(s, ot);
5531         }
5532         break;
5533     case 0xc8: /* enter */
5534         {
5535             int level;
5536             val = x86_lduw_code(env, s);
5537             level = x86_ldub_code(env, s);
5538             gen_enter(s, val, level);
5539         }
5540         break;
5541     case 0xc9: /* leave */
5542         gen_leave(s);
5543         break;
5544     case 0x06: /* push es */
5545     case 0x0e: /* push cs */
5546     case 0x16: /* push ss */
5547     case 0x1e: /* push ds */
5548         if (CODE64(s))
5549             goto illegal_op;
5550         gen_op_movl_T0_seg(s, b >> 3);
5551         gen_push_v(s, s->T0);
5552         break;
5553     case 0x1a0: /* push fs */
5554     case 0x1a8: /* push gs */
5555         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5556         gen_push_v(s, s->T0);
5557         break;
5558     case 0x07: /* pop es */
5559     case 0x17: /* pop ss */
5560     case 0x1f: /* pop ds */
5561         if (CODE64(s))
5562             goto illegal_op;
5563         reg = b >> 3;
5564         ot = gen_pop_T0(s);
5565         gen_movl_seg_T0(s, reg);
5566         gen_pop_update(s, ot);
5567         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5568         if (s->base.is_jmp) {
5569             gen_jmp_im(s, s->pc - s->cs_base);
5570             if (reg == R_SS) {
5571                 s->flags &= ~HF_TF_MASK;
5572                 gen_eob_inhibit_irq(s, true);
5573             } else {
5574                 gen_eob(s);
5575             }
5576         }
5577         break;
5578     case 0x1a1: /* pop fs */
5579     case 0x1a9: /* pop gs */
5580         ot = gen_pop_T0(s);
5581         gen_movl_seg_T0(s, (b >> 3) & 7);
5582         gen_pop_update(s, ot);
5583         if (s->base.is_jmp) {
5584             gen_jmp_im(s, s->pc - s->cs_base);
5585             gen_eob(s);
5586         }
5587         break;
5588 
5589         /**************************/
5590         /* mov */
5591     case 0x88:
5592     case 0x89: /* mov Gv, Ev */
5593         ot = mo_b_d(b, dflag);
5594         modrm = x86_ldub_code(env, s);
5595         reg = ((modrm >> 3) & 7) | REX_R(s);
5596 
5597         /* generate a generic store */
5598         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5599         break;
5600     case 0xc6:
5601     case 0xc7: /* mov Ev, Iv */
5602         ot = mo_b_d(b, dflag);
5603         modrm = x86_ldub_code(env, s);
5604         mod = (modrm >> 6) & 3;
5605         if (mod != 3) {
5606             s->rip_offset = insn_const_size(ot);
5607             gen_lea_modrm(env, s, modrm);
5608         }
5609         val = insn_get(env, s, ot);
5610         tcg_gen_movi_tl(s->T0, val);
5611         if (mod != 3) {
5612             gen_op_st_v(s, ot, s->T0, s->A0);
5613         } else {
5614             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5615         }
5616         break;
5617     case 0x8a:
5618     case 0x8b: /* mov Ev, Gv */
5619         ot = mo_b_d(b, dflag);
5620         modrm = x86_ldub_code(env, s);
5621         reg = ((modrm >> 3) & 7) | REX_R(s);
5622 
5623         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5624         gen_op_mov_reg_v(s, ot, reg, s->T0);
5625         break;
5626     case 0x8e: /* mov seg, Gv */
5627         modrm = x86_ldub_code(env, s);
5628         reg = (modrm >> 3) & 7;
5629         if (reg >= 6 || reg == R_CS)
5630             goto illegal_op;
5631         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5632         gen_movl_seg_T0(s, reg);
5633         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5634         if (s->base.is_jmp) {
5635             gen_jmp_im(s, s->pc - s->cs_base);
5636             if (reg == R_SS) {
5637                 s->flags &= ~HF_TF_MASK;
5638                 gen_eob_inhibit_irq(s, true);
5639             } else {
5640                 gen_eob(s);
5641             }
5642         }
5643         break;
5644     case 0x8c: /* mov Gv, seg */
5645         modrm = x86_ldub_code(env, s);
5646         reg = (modrm >> 3) & 7;
5647         mod = (modrm >> 6) & 3;
5648         if (reg >= 6)
5649             goto illegal_op;
5650         gen_op_movl_T0_seg(s, reg);
5651         ot = mod == 3 ? dflag : MO_16;
5652         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5653         break;
5654 
5655     case 0x1b6: /* movzbS Gv, Eb */
5656     case 0x1b7: /* movzwS Gv, Eb */
5657     case 0x1be: /* movsbS Gv, Eb */
5658     case 0x1bf: /* movswS Gv, Eb */
5659         {
5660             MemOp d_ot;
5661             MemOp s_ot;
5662 
5663             /* d_ot is the size of destination */
5664             d_ot = dflag;
5665             /* ot is the size of source */
5666             ot = (b & 1) + MO_8;
5667             /* s_ot is the sign+size of source */
5668             s_ot = b & 8 ? MO_SIGN | ot : ot;
5669 
5670             modrm = x86_ldub_code(env, s);
5671             reg = ((modrm >> 3) & 7) | REX_R(s);
5672             mod = (modrm >> 6) & 3;
5673             rm = (modrm & 7) | REX_B(s);
5674 
5675             if (mod == 3) {
5676                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5677                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5678                 } else {
5679                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5680                     switch (s_ot) {
5681                     case MO_UB:
5682                         tcg_gen_ext8u_tl(s->T0, s->T0);
5683                         break;
5684                     case MO_SB:
5685                         tcg_gen_ext8s_tl(s->T0, s->T0);
5686                         break;
5687                     case MO_UW:
5688                         tcg_gen_ext16u_tl(s->T0, s->T0);
5689                         break;
5690                     default:
5691                     case MO_SW:
5692                         tcg_gen_ext16s_tl(s->T0, s->T0);
5693                         break;
5694                     }
5695                 }
5696                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5697             } else {
5698                 gen_lea_modrm(env, s, modrm);
5699                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5700                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5701             }
5702         }
5703         break;
5704 
5705     case 0x8d: /* lea */
5706         modrm = x86_ldub_code(env, s);
5707         mod = (modrm >> 6) & 3;
5708         if (mod == 3)
5709             goto illegal_op;
5710         reg = ((modrm >> 3) & 7) | REX_R(s);
5711         {
5712             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5713             TCGv ea = gen_lea_modrm_1(s, a);
5714             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5715             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5716         }
5717         break;
5718 
5719     case 0xa0: /* mov EAX, Ov */
5720     case 0xa1:
5721     case 0xa2: /* mov Ov, EAX */
5722     case 0xa3:
5723         {
5724             target_ulong offset_addr;
5725 
5726             ot = mo_b_d(b, dflag);
5727             switch (s->aflag) {
5728 #ifdef TARGET_X86_64
5729             case MO_64:
5730                 offset_addr = x86_ldq_code(env, s);
5731                 break;
5732 #endif
5733             default:
5734                 offset_addr = insn_get(env, s, s->aflag);
5735                 break;
5736             }
5737             tcg_gen_movi_tl(s->A0, offset_addr);
5738             gen_add_A0_ds_seg(s);
5739             if ((b & 2) == 0) {
5740                 gen_op_ld_v(s, ot, s->T0, s->A0);
5741                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5742             } else {
5743                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5744                 gen_op_st_v(s, ot, s->T0, s->A0);
5745             }
5746         }
5747         break;
5748     case 0xd7: /* xlat */
5749         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5750         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5751         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5752         gen_extu(s->aflag, s->A0);
5753         gen_add_A0_ds_seg(s);
5754         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5755         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5756         break;
5757     case 0xb0 ... 0xb7: /* mov R, Ib */
5758         val = insn_get(env, s, MO_8);
5759         tcg_gen_movi_tl(s->T0, val);
5760         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5761         break;
5762     case 0xb8 ... 0xbf: /* mov R, Iv */
5763 #ifdef TARGET_X86_64
5764         if (dflag == MO_64) {
5765             uint64_t tmp;
5766             /* 64 bit case */
5767             tmp = x86_ldq_code(env, s);
5768             reg = (b & 7) | REX_B(s);
5769             tcg_gen_movi_tl(s->T0, tmp);
5770             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5771         } else
5772 #endif
5773         {
5774             ot = dflag;
5775             val = insn_get(env, s, ot);
5776             reg = (b & 7) | REX_B(s);
5777             tcg_gen_movi_tl(s->T0, val);
5778             gen_op_mov_reg_v(s, ot, reg, s->T0);
5779         }
5780         break;
5781 
5782     case 0x91 ... 0x97: /* xchg R, EAX */
5783     do_xchg_reg_eax:
5784         ot = dflag;
5785         reg = (b & 7) | REX_B(s);
5786         rm = R_EAX;
5787         goto do_xchg_reg;
5788     case 0x86:
5789     case 0x87: /* xchg Ev, Gv */
5790         ot = mo_b_d(b, dflag);
5791         modrm = x86_ldub_code(env, s);
5792         reg = ((modrm >> 3) & 7) | REX_R(s);
5793         mod = (modrm >> 6) & 3;
5794         if (mod == 3) {
5795             rm = (modrm & 7) | REX_B(s);
5796         do_xchg_reg:
5797             gen_op_mov_v_reg(s, ot, s->T0, reg);
5798             gen_op_mov_v_reg(s, ot, s->T1, rm);
5799             gen_op_mov_reg_v(s, ot, rm, s->T0);
5800             gen_op_mov_reg_v(s, ot, reg, s->T1);
5801         } else {
5802             gen_lea_modrm(env, s, modrm);
5803             gen_op_mov_v_reg(s, ot, s->T0, reg);
5804             /* for xchg, lock is implicit */
5805             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5806                                    s->mem_index, ot | MO_LE);
5807             gen_op_mov_reg_v(s, ot, reg, s->T1);
5808         }
5809         break;
5810     case 0xc4: /* les Gv */
5811         /* In CODE64 this is VEX3; see above.  */
5812         op = R_ES;
5813         goto do_lxx;
5814     case 0xc5: /* lds Gv */
5815         /* In CODE64 this is VEX2; see above.  */
5816         op = R_DS;
5817         goto do_lxx;
5818     case 0x1b2: /* lss Gv */
5819         op = R_SS;
5820         goto do_lxx;
5821     case 0x1b4: /* lfs Gv */
5822         op = R_FS;
5823         goto do_lxx;
5824     case 0x1b5: /* lgs Gv */
5825         op = R_GS;
5826     do_lxx:
5827         ot = dflag != MO_16 ? MO_32 : MO_16;
5828         modrm = x86_ldub_code(env, s);
5829         reg = ((modrm >> 3) & 7) | REX_R(s);
5830         mod = (modrm >> 6) & 3;
5831         if (mod == 3)
5832             goto illegal_op;
5833         gen_lea_modrm(env, s, modrm);
5834         gen_op_ld_v(s, ot, s->T1, s->A0);
5835         gen_add_A0_im(s, 1 << ot);
5836         /* load the segment first to handle exceptions properly */
5837         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5838         gen_movl_seg_T0(s, op);
5839         /* then put the data */
5840         gen_op_mov_reg_v(s, ot, reg, s->T1);
5841         if (s->base.is_jmp) {
5842             gen_jmp_im(s, s->pc - s->cs_base);
5843             gen_eob(s);
5844         }
5845         break;
5846 
5847         /************************/
5848         /* shifts */
5849     case 0xc0:
5850     case 0xc1:
5851         /* shift Ev,Ib */
5852         shift = 2;
5853     grp2:
5854         {
5855             ot = mo_b_d(b, dflag);
5856             modrm = x86_ldub_code(env, s);
5857             mod = (modrm >> 6) & 3;
5858             op = (modrm >> 3) & 7;
5859 
5860             if (mod != 3) {
5861                 if (shift == 2) {
5862                     s->rip_offset = 1;
5863                 }
5864                 gen_lea_modrm(env, s, modrm);
5865                 opreg = OR_TMP0;
5866             } else {
5867                 opreg = (modrm & 7) | REX_B(s);
5868             }
5869 
5870             /* simpler op */
5871             if (shift == 0) {
5872                 gen_shift(s, op, ot, opreg, OR_ECX);
5873             } else {
5874                 if (shift == 2) {
5875                     shift = x86_ldub_code(env, s);
5876                 }
5877                 gen_shifti(s, op, ot, opreg, shift);
5878             }
5879         }
5880         break;
5881     case 0xd0:
5882     case 0xd1:
5883         /* shift Ev,1 */
5884         shift = 1;
5885         goto grp2;
5886     case 0xd2:
5887     case 0xd3:
5888         /* shift Ev,cl */
5889         shift = 0;
5890         goto grp2;
5891 
5892     case 0x1a4: /* shld imm */
5893         op = 0;
5894         shift = 1;
5895         goto do_shiftd;
5896     case 0x1a5: /* shld cl */
5897         op = 0;
5898         shift = 0;
5899         goto do_shiftd;
5900     case 0x1ac: /* shrd imm */
5901         op = 1;
5902         shift = 1;
5903         goto do_shiftd;
5904     case 0x1ad: /* shrd cl */
5905         op = 1;
5906         shift = 0;
5907     do_shiftd:
5908         ot = dflag;
5909         modrm = x86_ldub_code(env, s);
5910         mod = (modrm >> 6) & 3;
5911         rm = (modrm & 7) | REX_B(s);
5912         reg = ((modrm >> 3) & 7) | REX_R(s);
5913         if (mod != 3) {
5914             gen_lea_modrm(env, s, modrm);
5915             opreg = OR_TMP0;
5916         } else {
5917             opreg = rm;
5918         }
5919         gen_op_mov_v_reg(s, ot, s->T1, reg);
5920 
5921         if (shift) {
5922             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5923             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5924             tcg_temp_free(imm);
5925         } else {
5926             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5927         }
5928         break;
5929 
5930         /************************/
5931         /* floats */
5932     case 0xd8 ... 0xdf:
5933         if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5934             /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5935             /* XXX: what to do if illegal op ? */
5936             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5937             break;
5938         }
5939         modrm = x86_ldub_code(env, s);
5940         mod = (modrm >> 6) & 3;
5941         rm = modrm & 7;
5942         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5943         if (mod != 3) {
5944             /* memory op */
5945             gen_lea_modrm(env, s, modrm);
5946             switch(op) {
5947             case 0x00 ... 0x07: /* fxxxs */
5948             case 0x10 ... 0x17: /* fixxxl */
5949             case 0x20 ... 0x27: /* fxxxl */
5950             case 0x30 ... 0x37: /* fixxx */
5951                 {
5952                     int op1;
5953                     op1 = op & 7;
5954 
5955                     switch(op >> 4) {
5956                     case 0:
5957                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5958                                             s->mem_index, MO_LEUL);
5959                         gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5960                         break;
5961                     case 1:
5962                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5963                                             s->mem_index, MO_LEUL);
5964                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5965                         break;
5966                     case 2:
5967                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5968                                             s->mem_index, MO_LEQ);
5969                         gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5970                         break;
5971                     case 3:
5972                     default:
5973                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5974                                             s->mem_index, MO_LESW);
5975                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5976                         break;
5977                     }
5978 
5979                     gen_helper_fp_arith_ST0_FT0(op1);
5980                     if (op1 == 3) {
5981                         /* fcomp needs pop */
5982                         gen_helper_fpop(cpu_env);
5983                     }
5984                 }
5985                 break;
5986             case 0x08: /* flds */
5987             case 0x0a: /* fsts */
5988             case 0x0b: /* fstps */
5989             case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5990             case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5991             case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5992                 switch(op & 7) {
5993                 case 0:
5994                     switch(op >> 4) {
5995                     case 0:
5996                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5997                                             s->mem_index, MO_LEUL);
5998                         gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5999                         break;
6000                     case 1:
6001                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6002                                             s->mem_index, MO_LEUL);
6003                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6004                         break;
6005                     case 2:
6006                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6007                                             s->mem_index, MO_LEQ);
6008                         gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
6009                         break;
6010                     case 3:
6011                     default:
6012                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6013                                             s->mem_index, MO_LESW);
6014                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6015                         break;
6016                     }
6017                     break;
6018                 case 1:
6019                     /* XXX: the corresponding CPUID bit must be tested ! */
6020                     switch(op >> 4) {
6021                     case 1:
6022                         gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
6023                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6024                                             s->mem_index, MO_LEUL);
6025                         break;
6026                     case 2:
6027                         gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
6028                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6029                                             s->mem_index, MO_LEQ);
6030                         break;
6031                     case 3:
6032                     default:
6033                         gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
6034                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6035                                             s->mem_index, MO_LEUW);
6036                         break;
6037                     }
6038                     gen_helper_fpop(cpu_env);
6039                     break;
6040                 default:
6041                     switch(op >> 4) {
6042                     case 0:
6043                         gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
6044                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6045                                             s->mem_index, MO_LEUL);
6046                         break;
6047                     case 1:
6048                         gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
6049                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6050                                             s->mem_index, MO_LEUL);
6051                         break;
6052                     case 2:
6053                         gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
6054                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6055                                             s->mem_index, MO_LEQ);
6056                         break;
6057                     case 3:
6058                     default:
6059                         gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
6060                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6061                                             s->mem_index, MO_LEUW);
6062                         break;
6063                     }
6064                     if ((op & 7) == 3)
6065                         gen_helper_fpop(cpu_env);
6066                     break;
6067                 }
6068                 break;
6069             case 0x0c: /* fldenv mem */
6070                 gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6071                 break;
6072             case 0x0d: /* fldcw mem */
6073                 tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6074                                     s->mem_index, MO_LEUW);
6075                 gen_helper_fldcw(cpu_env, s->tmp2_i32);
6076                 break;
6077             case 0x0e: /* fnstenv mem */
6078                 gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6079                 break;
6080             case 0x0f: /* fnstcw mem */
6081                 gen_helper_fnstcw(s->tmp2_i32, cpu_env);
6082                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6083                                     s->mem_index, MO_LEUW);
6084                 break;
6085             case 0x1d: /* fldt mem */
6086                 gen_helper_fldt_ST0(cpu_env, s->A0);
6087                 break;
6088             case 0x1f: /* fstpt mem */
6089                 gen_helper_fstt_ST0(cpu_env, s->A0);
6090                 gen_helper_fpop(cpu_env);
6091                 break;
6092             case 0x2c: /* frstor mem */
6093                 gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6094                 break;
6095             case 0x2e: /* fnsave mem */
6096                 gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6097                 break;
6098             case 0x2f: /* fnstsw mem */
6099                 gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6100                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6101                                     s->mem_index, MO_LEUW);
6102                 break;
6103             case 0x3c: /* fbld */
6104                 gen_helper_fbld_ST0(cpu_env, s->A0);
6105                 break;
6106             case 0x3e: /* fbstp */
6107                 gen_helper_fbst_ST0(cpu_env, s->A0);
6108                 gen_helper_fpop(cpu_env);
6109                 break;
6110             case 0x3d: /* fildll */
6111                 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6112                 gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6113                 break;
6114             case 0x3f: /* fistpll */
6115                 gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6116                 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6117                 gen_helper_fpop(cpu_env);
6118                 break;
6119             default:
6120                 goto unknown_op;
6121             }
6122         } else {
6123             /* register float ops */
6124             opreg = rm;
6125 
6126             switch(op) {
6127             case 0x08: /* fld sti */
6128                 gen_helper_fpush(cpu_env);
6129                 gen_helper_fmov_ST0_STN(cpu_env,
6130                                         tcg_const_i32((opreg + 1) & 7));
6131                 break;
6132             case 0x09: /* fxchg sti */
6133             case 0x29: /* fxchg4 sti, undocumented op */
6134             case 0x39: /* fxchg7 sti, undocumented op */
6135                 gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6136                 break;
6137             case 0x0a: /* grp d9/2 */
6138                 switch(rm) {
6139                 case 0: /* fnop */
6140                     /* check exceptions (FreeBSD FPU probe) */
6141                     gen_helper_fwait(cpu_env);
6142                     break;
6143                 default:
6144                     goto unknown_op;
6145                 }
6146                 break;
6147             case 0x0c: /* grp d9/4 */
6148                 switch(rm) {
6149                 case 0: /* fchs */
6150                     gen_helper_fchs_ST0(cpu_env);
6151                     break;
6152                 case 1: /* fabs */
6153                     gen_helper_fabs_ST0(cpu_env);
6154                     break;
6155                 case 4: /* ftst */
6156                     gen_helper_fldz_FT0(cpu_env);
6157                     gen_helper_fcom_ST0_FT0(cpu_env);
6158                     break;
6159                 case 5: /* fxam */
6160                     gen_helper_fxam_ST0(cpu_env);
6161                     break;
6162                 default:
6163                     goto unknown_op;
6164                 }
6165                 break;
6166             case 0x0d: /* grp d9/5 */
6167                 {
6168                     switch(rm) {
6169                     case 0:
6170                         gen_helper_fpush(cpu_env);
6171                         gen_helper_fld1_ST0(cpu_env);
6172                         break;
6173                     case 1:
6174                         gen_helper_fpush(cpu_env);
6175                         gen_helper_fldl2t_ST0(cpu_env);
6176                         break;
6177                     case 2:
6178                         gen_helper_fpush(cpu_env);
6179                         gen_helper_fldl2e_ST0(cpu_env);
6180                         break;
6181                     case 3:
6182                         gen_helper_fpush(cpu_env);
6183                         gen_helper_fldpi_ST0(cpu_env);
6184                         break;
6185                     case 4:
6186                         gen_helper_fpush(cpu_env);
6187                         gen_helper_fldlg2_ST0(cpu_env);
6188                         break;
6189                     case 5:
6190                         gen_helper_fpush(cpu_env);
6191                         gen_helper_fldln2_ST0(cpu_env);
6192                         break;
6193                     case 6:
6194                         gen_helper_fpush(cpu_env);
6195                         gen_helper_fldz_ST0(cpu_env);
6196                         break;
6197                     default:
6198                         goto unknown_op;
6199                     }
6200                 }
6201                 break;
6202             case 0x0e: /* grp d9/6 */
6203                 switch(rm) {
6204                 case 0: /* f2xm1 */
6205                     gen_helper_f2xm1(cpu_env);
6206                     break;
6207                 case 1: /* fyl2x */
6208                     gen_helper_fyl2x(cpu_env);
6209                     break;
6210                 case 2: /* fptan */
6211                     gen_helper_fptan(cpu_env);
6212                     break;
6213                 case 3: /* fpatan */
6214                     gen_helper_fpatan(cpu_env);
6215                     break;
6216                 case 4: /* fxtract */
6217                     gen_helper_fxtract(cpu_env);
6218                     break;
6219                 case 5: /* fprem1 */
6220                     gen_helper_fprem1(cpu_env);
6221                     break;
6222                 case 6: /* fdecstp */
6223                     gen_helper_fdecstp(cpu_env);
6224                     break;
6225                 default:
6226                 case 7: /* fincstp */
6227                     gen_helper_fincstp(cpu_env);
6228                     break;
6229                 }
6230                 break;
6231             case 0x0f: /* grp d9/7 */
6232                 switch(rm) {
6233                 case 0: /* fprem */
6234                     gen_helper_fprem(cpu_env);
6235                     break;
6236                 case 1: /* fyl2xp1 */
6237                     gen_helper_fyl2xp1(cpu_env);
6238                     break;
6239                 case 2: /* fsqrt */
6240                     gen_helper_fsqrt(cpu_env);
6241                     break;
6242                 case 3: /* fsincos */
6243                     gen_helper_fsincos(cpu_env);
6244                     break;
6245                 case 5: /* fscale */
6246                     gen_helper_fscale(cpu_env);
6247                     break;
6248                 case 4: /* frndint */
6249                     gen_helper_frndint(cpu_env);
6250                     break;
6251                 case 6: /* fsin */
6252                     gen_helper_fsin(cpu_env);
6253                     break;
6254                 default:
6255                 case 7: /* fcos */
6256                     gen_helper_fcos(cpu_env);
6257                     break;
6258                 }
6259                 break;
6260             case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6261             case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6262             case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6263                 {
6264                     int op1;
6265 
6266                     op1 = op & 7;
6267                     if (op >= 0x20) {
6268                         gen_helper_fp_arith_STN_ST0(op1, opreg);
6269                         if (op >= 0x30)
6270                             gen_helper_fpop(cpu_env);
6271                     } else {
6272                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6273                         gen_helper_fp_arith_ST0_FT0(op1);
6274                     }
6275                 }
6276                 break;
6277             case 0x02: /* fcom */
6278             case 0x22: /* fcom2, undocumented op */
6279                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6280                 gen_helper_fcom_ST0_FT0(cpu_env);
6281                 break;
6282             case 0x03: /* fcomp */
6283             case 0x23: /* fcomp3, undocumented op */
6284             case 0x32: /* fcomp5, undocumented op */
6285                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6286                 gen_helper_fcom_ST0_FT0(cpu_env);
6287                 gen_helper_fpop(cpu_env);
6288                 break;
6289             case 0x15: /* da/5 */
6290                 switch(rm) {
6291                 case 1: /* fucompp */
6292                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6293                     gen_helper_fucom_ST0_FT0(cpu_env);
6294                     gen_helper_fpop(cpu_env);
6295                     gen_helper_fpop(cpu_env);
6296                     break;
6297                 default:
6298                     goto unknown_op;
6299                 }
6300                 break;
6301             case 0x1c:
6302                 switch(rm) {
6303                 case 0: /* feni (287 only, just do nop here) */
6304                     break;
6305                 case 1: /* fdisi (287 only, just do nop here) */
6306                     break;
6307                 case 2: /* fclex */
6308                     gen_helper_fclex(cpu_env);
6309                     break;
6310                 case 3: /* fninit */
6311                     gen_helper_fninit(cpu_env);
6312                     break;
6313                 case 4: /* fsetpm (287 only, just do nop here) */
6314                     break;
6315                 default:
6316                     goto unknown_op;
6317                 }
6318                 break;
6319             case 0x1d: /* fucomi */
6320                 if (!(s->cpuid_features & CPUID_CMOV)) {
6321                     goto illegal_op;
6322                 }
6323                 gen_update_cc_op(s);
6324                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6325                 gen_helper_fucomi_ST0_FT0(cpu_env);
6326                 set_cc_op(s, CC_OP_EFLAGS);
6327                 break;
6328             case 0x1e: /* fcomi */
6329                 if (!(s->cpuid_features & CPUID_CMOV)) {
6330                     goto illegal_op;
6331                 }
6332                 gen_update_cc_op(s);
6333                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6334                 gen_helper_fcomi_ST0_FT0(cpu_env);
6335                 set_cc_op(s, CC_OP_EFLAGS);
6336                 break;
6337             case 0x28: /* ffree sti */
6338                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6339                 break;
6340             case 0x2a: /* fst sti */
6341                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6342                 break;
6343             case 0x2b: /* fstp sti */
6344             case 0x0b: /* fstp1 sti, undocumented op */
6345             case 0x3a: /* fstp8 sti, undocumented op */
6346             case 0x3b: /* fstp9 sti, undocumented op */
6347                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6348                 gen_helper_fpop(cpu_env);
6349                 break;
6350             case 0x2c: /* fucom st(i) */
6351                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6352                 gen_helper_fucom_ST0_FT0(cpu_env);
6353                 break;
6354             case 0x2d: /* fucomp st(i) */
6355                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6356                 gen_helper_fucom_ST0_FT0(cpu_env);
6357                 gen_helper_fpop(cpu_env);
6358                 break;
6359             case 0x33: /* de/3 */
6360                 switch(rm) {
6361                 case 1: /* fcompp */
6362                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6363                     gen_helper_fcom_ST0_FT0(cpu_env);
6364                     gen_helper_fpop(cpu_env);
6365                     gen_helper_fpop(cpu_env);
6366                     break;
6367                 default:
6368                     goto unknown_op;
6369                 }
6370                 break;
6371             case 0x38: /* ffreep sti, undocumented op */
6372                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6373                 gen_helper_fpop(cpu_env);
6374                 break;
6375             case 0x3c: /* df/4 */
6376                 switch(rm) {
6377                 case 0:
6378                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6379                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6380                     gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6381                     break;
6382                 default:
6383                     goto unknown_op;
6384                 }
6385                 break;
6386             case 0x3d: /* fucomip */
6387                 if (!(s->cpuid_features & CPUID_CMOV)) {
6388                     goto illegal_op;
6389                 }
6390                 gen_update_cc_op(s);
6391                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6392                 gen_helper_fucomi_ST0_FT0(cpu_env);
6393                 gen_helper_fpop(cpu_env);
6394                 set_cc_op(s, CC_OP_EFLAGS);
6395                 break;
6396             case 0x3e: /* fcomip */
6397                 if (!(s->cpuid_features & CPUID_CMOV)) {
6398                     goto illegal_op;
6399                 }
6400                 gen_update_cc_op(s);
6401                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6402                 gen_helper_fcomi_ST0_FT0(cpu_env);
6403                 gen_helper_fpop(cpu_env);
6404                 set_cc_op(s, CC_OP_EFLAGS);
6405                 break;
6406             case 0x10 ... 0x13: /* fcmovxx */
6407             case 0x18 ... 0x1b:
6408                 {
6409                     int op1;
6410                     TCGLabel *l1;
6411                     static const uint8_t fcmov_cc[8] = {
6412                         (JCC_B << 1),
6413                         (JCC_Z << 1),
6414                         (JCC_BE << 1),
6415                         (JCC_P << 1),
6416                     };
6417 
6418                     if (!(s->cpuid_features & CPUID_CMOV)) {
6419                         goto illegal_op;
6420                     }
6421                     op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6422                     l1 = gen_new_label();
6423                     gen_jcc1_noeob(s, op1, l1);
6424                     gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6425                     gen_set_label(l1);
6426                 }
6427                 break;
6428             default:
6429                 goto unknown_op;
6430             }
6431         }
6432         break;
6433         /************************/
6434         /* string ops */
6435 
6436     case 0xa4: /* movsS */
6437     case 0xa5:
6438         ot = mo_b_d(b, dflag);
6439         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6440             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6441         } else {
6442             gen_movs(s, ot);
6443         }
6444         break;
6445 
6446     case 0xaa: /* stosS */
6447     case 0xab:
6448         ot = mo_b_d(b, dflag);
6449         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6450             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6451         } else {
6452             gen_stos(s, ot);
6453         }
6454         break;
6455     case 0xac: /* lodsS */
6456     case 0xad:
6457         ot = mo_b_d(b, dflag);
6458         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6459             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6460         } else {
6461             gen_lods(s, ot);
6462         }
6463         break;
6464     case 0xae: /* scasS */
6465     case 0xaf:
6466         ot = mo_b_d(b, dflag);
6467         if (prefixes & PREFIX_REPNZ) {
6468             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6469         } else if (prefixes & PREFIX_REPZ) {
6470             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6471         } else {
6472             gen_scas(s, ot);
6473         }
6474         break;
6475 
6476     case 0xa6: /* cmpsS */
6477     case 0xa7:
6478         ot = mo_b_d(b, dflag);
6479         if (prefixes & PREFIX_REPNZ) {
6480             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6481         } else if (prefixes & PREFIX_REPZ) {
6482             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6483         } else {
6484             gen_cmps(s, ot);
6485         }
6486         break;
6487     case 0x6c: /* insS */
6488     case 0x6d:
6489         ot = mo_b_d32(b, dflag);
6490         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6491         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6492         if (!gen_check_io(s, ot, s->tmp2_i32,
6493                           SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
6494             break;
6495         }
6496         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6497             gen_io_start();
6498         }
6499         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6500             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6501             /* jump generated by gen_repz_ins */
6502         } else {
6503             gen_ins(s, ot);
6504             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6505                 gen_jmp(s, s->pc - s->cs_base);
6506             }
6507         }
6508         break;
6509     case 0x6e: /* outsS */
6510     case 0x6f:
6511         ot = mo_b_d32(b, dflag);
6512         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6513         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6514         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
6515             break;
6516         }
6517         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6518             gen_io_start();
6519         }
6520         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6521             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6522             /* jump generated by gen_repz_outs */
6523         } else {
6524             gen_outs(s, ot);
6525             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6526                 gen_jmp(s, s->pc - s->cs_base);
6527             }
6528         }
6529         break;
6530 
6531         /************************/
6532         /* port I/O */
6533 
6534     case 0xe4:
6535     case 0xe5:
6536         ot = mo_b_d32(b, dflag);
6537         val = x86_ldub_code(env, s);
6538         tcg_gen_movi_i32(s->tmp2_i32, val);
6539         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6540             break;
6541         }
6542         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6543             gen_io_start();
6544         }
6545         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6546         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6547         gen_bpt_io(s, s->tmp2_i32, ot);
6548         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6549             gen_jmp(s, s->pc - s->cs_base);
6550         }
6551         break;
6552     case 0xe6:
6553     case 0xe7:
6554         ot = mo_b_d32(b, dflag);
6555         val = x86_ldub_code(env, s);
6556         tcg_gen_movi_i32(s->tmp2_i32, val);
6557         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6558             break;
6559         }
6560         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6561             gen_io_start();
6562         }
6563         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6564         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6565         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6566         gen_bpt_io(s, s->tmp2_i32, ot);
6567         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6568             gen_jmp(s, s->pc - s->cs_base);
6569         }
6570         break;
6571     case 0xec:
6572     case 0xed:
6573         ot = mo_b_d32(b, dflag);
6574         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6575         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6576         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6577             break;
6578         }
6579         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6580             gen_io_start();
6581         }
6582         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6583         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6584         gen_bpt_io(s, s->tmp2_i32, ot);
6585         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6586             gen_jmp(s, s->pc - s->cs_base);
6587         }
6588         break;
6589     case 0xee:
6590     case 0xef:
6591         ot = mo_b_d32(b, dflag);
6592         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6593         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6594         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6595             break;
6596         }
6597         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6598             gen_io_start();
6599         }
6600         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6601         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6602         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6603         gen_bpt_io(s, s->tmp2_i32, ot);
6604         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6605             gen_jmp(s, s->pc - s->cs_base);
6606         }
6607         break;
6608 
6609         /************************/
6610         /* control */
6611     case 0xc2: /* ret im */
6612         val = x86_ldsw_code(env, s);
6613         ot = gen_pop_T0(s);
6614         gen_stack_update(s, val + (1 << ot));
6615         /* Note that gen_pop_T0 uses a zero-extending load.  */
6616         gen_op_jmp_v(s->T0);
6617         gen_bnd_jmp(s);
6618         gen_jr(s, s->T0);
6619         break;
6620     case 0xc3: /* ret */
6621         ot = gen_pop_T0(s);
6622         gen_pop_update(s, ot);
6623         /* Note that gen_pop_T0 uses a zero-extending load.  */
6624         gen_op_jmp_v(s->T0);
6625         gen_bnd_jmp(s);
6626         gen_jr(s, s->T0);
6627         break;
6628     case 0xca: /* lret im */
6629         val = x86_ldsw_code(env, s);
6630     do_lret:
6631         if (PE(s) && !VM86(s)) {
6632             gen_update_cc_op(s);
6633             gen_jmp_im(s, pc_start - s->cs_base);
6634             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6635                                       tcg_const_i32(val));
6636         } else {
6637             gen_stack_A0(s);
6638             /* pop offset */
6639             gen_op_ld_v(s, dflag, s->T0, s->A0);
6640             /* NOTE: keeping EIP updated is not a problem in case of
6641                exception */
6642             gen_op_jmp_v(s->T0);
6643             /* pop selector */
6644             gen_add_A0_im(s, 1 << dflag);
6645             gen_op_ld_v(s, dflag, s->T0, s->A0);
6646             gen_op_movl_seg_T0_vm(s, R_CS);
6647             /* add stack offset */
6648             gen_stack_update(s, val + (2 << dflag));
6649         }
6650         gen_eob(s);
6651         break;
6652     case 0xcb: /* lret */
6653         val = 0;
6654         goto do_lret;
6655     case 0xcf: /* iret */
6656         gen_svm_check_intercept(s, SVM_EXIT_IRET);
6657         if (!PE(s) || VM86(s)) {
6658             /* real mode or vm86 mode */
6659             if (!check_vm86_iopl(s)) {
6660                 break;
6661             }
6662             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6663         } else {
6664             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6665                                       tcg_const_i32(s->pc - s->cs_base));
6666         }
6667         set_cc_op(s, CC_OP_EFLAGS);
6668         gen_eob(s);
6669         break;
6670     case 0xe8: /* call im */
6671         {
6672             if (dflag != MO_16) {
6673                 tval = (int32_t)insn_get(env, s, MO_32);
6674             } else {
6675                 tval = (int16_t)insn_get(env, s, MO_16);
6676             }
6677             next_eip = s->pc - s->cs_base;
6678             tval += next_eip;
6679             if (dflag == MO_16) {
6680                 tval &= 0xffff;
6681             } else if (!CODE64(s)) {
6682                 tval &= 0xffffffff;
6683             }
6684             tcg_gen_movi_tl(s->T0, next_eip);
6685             gen_push_v(s, s->T0);
6686             gen_bnd_jmp(s);
6687             gen_jmp(s, tval);
6688         }
6689         break;
6690     case 0x9a: /* lcall im */
6691         {
6692             unsigned int selector, offset;
6693 
6694             if (CODE64(s))
6695                 goto illegal_op;
6696             ot = dflag;
6697             offset = insn_get(env, s, ot);
6698             selector = insn_get(env, s, MO_16);
6699 
6700             tcg_gen_movi_tl(s->T0, selector);
6701             tcg_gen_movi_tl(s->T1, offset);
6702         }
6703         goto do_lcall;
6704     case 0xe9: /* jmp im */
6705         if (dflag != MO_16) {
6706             tval = (int32_t)insn_get(env, s, MO_32);
6707         } else {
6708             tval = (int16_t)insn_get(env, s, MO_16);
6709         }
6710         tval += s->pc - s->cs_base;
6711         if (dflag == MO_16) {
6712             tval &= 0xffff;
6713         } else if (!CODE64(s)) {
6714             tval &= 0xffffffff;
6715         }
6716         gen_bnd_jmp(s);
6717         gen_jmp(s, tval);
6718         break;
6719     case 0xea: /* ljmp im */
6720         {
6721             unsigned int selector, offset;
6722 
6723             if (CODE64(s))
6724                 goto illegal_op;
6725             ot = dflag;
6726             offset = insn_get(env, s, ot);
6727             selector = insn_get(env, s, MO_16);
6728 
6729             tcg_gen_movi_tl(s->T0, selector);
6730             tcg_gen_movi_tl(s->T1, offset);
6731         }
6732         goto do_ljmp;
6733     case 0xeb: /* jmp Jb */
6734         tval = (int8_t)insn_get(env, s, MO_8);
6735         tval += s->pc - s->cs_base;
6736         if (dflag == MO_16) {
6737             tval &= 0xffff;
6738         }
6739         gen_jmp(s, tval);
6740         break;
6741     case 0x70 ... 0x7f: /* jcc Jb */
6742         tval = (int8_t)insn_get(env, s, MO_8);
6743         goto do_jcc;
6744     case 0x180 ... 0x18f: /* jcc Jv */
6745         if (dflag != MO_16) {
6746             tval = (int32_t)insn_get(env, s, MO_32);
6747         } else {
6748             tval = (int16_t)insn_get(env, s, MO_16);
6749         }
6750     do_jcc:
6751         next_eip = s->pc - s->cs_base;
6752         tval += next_eip;
6753         if (dflag == MO_16) {
6754             tval &= 0xffff;
6755         }
6756         gen_bnd_jmp(s);
6757         gen_jcc(s, b, tval, next_eip);
6758         break;
6759 
6760     case 0x190 ... 0x19f: /* setcc Gv */
6761         modrm = x86_ldub_code(env, s);
6762         gen_setcc1(s, b, s->T0);
6763         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6764         break;
6765     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6766         if (!(s->cpuid_features & CPUID_CMOV)) {
6767             goto illegal_op;
6768         }
6769         ot = dflag;
6770         modrm = x86_ldub_code(env, s);
6771         reg = ((modrm >> 3) & 7) | REX_R(s);
6772         gen_cmovcc1(env, s, ot, b, modrm, reg);
6773         break;
6774 
6775         /************************/
6776         /* flags */
6777     case 0x9c: /* pushf */
6778         gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
6779         if (check_vm86_iopl(s)) {
6780             gen_update_cc_op(s);
6781             gen_helper_read_eflags(s->T0, cpu_env);
6782             gen_push_v(s, s->T0);
6783         }
6784         break;
6785     case 0x9d: /* popf */
6786         gen_svm_check_intercept(s, SVM_EXIT_POPF);
6787         if (check_vm86_iopl(s)) {
6788             ot = gen_pop_T0(s);
6789             if (CPL(s) == 0) {
6790                 if (dflag != MO_16) {
6791                     gen_helper_write_eflags(cpu_env, s->T0,
6792                                             tcg_const_i32((TF_MASK | AC_MASK |
6793                                                            ID_MASK | NT_MASK |
6794                                                            IF_MASK |
6795                                                            IOPL_MASK)));
6796                 } else {
6797                     gen_helper_write_eflags(cpu_env, s->T0,
6798                                             tcg_const_i32((TF_MASK | AC_MASK |
6799                                                            ID_MASK | NT_MASK |
6800                                                            IF_MASK | IOPL_MASK)
6801                                                           & 0xffff));
6802                 }
6803             } else {
6804                 if (CPL(s) <= IOPL(s)) {
6805                     if (dflag != MO_16) {
6806                         gen_helper_write_eflags(cpu_env, s->T0,
6807                                                 tcg_const_i32((TF_MASK |
6808                                                                AC_MASK |
6809                                                                ID_MASK |
6810                                                                NT_MASK |
6811                                                                IF_MASK)));
6812                     } else {
6813                         gen_helper_write_eflags(cpu_env, s->T0,
6814                                                 tcg_const_i32((TF_MASK |
6815                                                                AC_MASK |
6816                                                                ID_MASK |
6817                                                                NT_MASK |
6818                                                                IF_MASK)
6819                                                               & 0xffff));
6820                     }
6821                 } else {
6822                     if (dflag != MO_16) {
6823                         gen_helper_write_eflags(cpu_env, s->T0,
6824                                            tcg_const_i32((TF_MASK | AC_MASK |
6825                                                           ID_MASK | NT_MASK)));
6826                     } else {
6827                         gen_helper_write_eflags(cpu_env, s->T0,
6828                                            tcg_const_i32((TF_MASK | AC_MASK |
6829                                                           ID_MASK | NT_MASK)
6830                                                          & 0xffff));
6831                     }
6832                 }
6833             }
6834             gen_pop_update(s, ot);
6835             set_cc_op(s, CC_OP_EFLAGS);
6836             /* abort translation because TF/AC flag may change */
6837             gen_jmp_im(s, s->pc - s->cs_base);
6838             gen_eob(s);
6839         }
6840         break;
6841     case 0x9e: /* sahf */
6842         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6843             goto illegal_op;
6844         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6845         gen_compute_eflags(s);
6846         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6847         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6848         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6849         break;
6850     case 0x9f: /* lahf */
6851         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6852             goto illegal_op;
6853         gen_compute_eflags(s);
6854         /* Note: gen_compute_eflags() only gives the condition codes */
6855         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6856         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6857         break;
6858     case 0xf5: /* cmc */
6859         gen_compute_eflags(s);
6860         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6861         break;
6862     case 0xf8: /* clc */
6863         gen_compute_eflags(s);
6864         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6865         break;
6866     case 0xf9: /* stc */
6867         gen_compute_eflags(s);
6868         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6869         break;
6870     case 0xfc: /* cld */
6871         tcg_gen_movi_i32(s->tmp2_i32, 1);
6872         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6873         break;
6874     case 0xfd: /* std */
6875         tcg_gen_movi_i32(s->tmp2_i32, -1);
6876         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6877         break;
6878 
6879         /************************/
6880         /* bit operations */
6881     case 0x1ba: /* bt/bts/btr/btc Gv, im */
6882         ot = dflag;
6883         modrm = x86_ldub_code(env, s);
6884         op = (modrm >> 3) & 7;
6885         mod = (modrm >> 6) & 3;
6886         rm = (modrm & 7) | REX_B(s);
6887         if (mod != 3) {
6888             s->rip_offset = 1;
6889             gen_lea_modrm(env, s, modrm);
6890             if (!(s->prefix & PREFIX_LOCK)) {
6891                 gen_op_ld_v(s, ot, s->T0, s->A0);
6892             }
6893         } else {
6894             gen_op_mov_v_reg(s, ot, s->T0, rm);
6895         }
6896         /* load shift */
6897         val = x86_ldub_code(env, s);
6898         tcg_gen_movi_tl(s->T1, val);
6899         if (op < 4)
6900             goto unknown_op;
6901         op -= 4;
6902         goto bt_op;
6903     case 0x1a3: /* bt Gv, Ev */
6904         op = 0;
6905         goto do_btx;
6906     case 0x1ab: /* bts */
6907         op = 1;
6908         goto do_btx;
6909     case 0x1b3: /* btr */
6910         op = 2;
6911         goto do_btx;
6912     case 0x1bb: /* btc */
6913         op = 3;
6914     do_btx:
6915         ot = dflag;
6916         modrm = x86_ldub_code(env, s);
6917         reg = ((modrm >> 3) & 7) | REX_R(s);
6918         mod = (modrm >> 6) & 3;
6919         rm = (modrm & 7) | REX_B(s);
6920         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6921         if (mod != 3) {
6922             AddressParts a = gen_lea_modrm_0(env, s, modrm);
6923             /* specific case: we need to add a displacement */
6924             gen_exts(ot, s->T1);
6925             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6926             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6927             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6928             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6929             if (!(s->prefix & PREFIX_LOCK)) {
6930                 gen_op_ld_v(s, ot, s->T0, s->A0);
6931             }
6932         } else {
6933             gen_op_mov_v_reg(s, ot, s->T0, rm);
6934         }
6935     bt_op:
6936         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6937         tcg_gen_movi_tl(s->tmp0, 1);
6938         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6939         if (s->prefix & PREFIX_LOCK) {
6940             switch (op) {
6941             case 0: /* bt */
6942                 /* Needs no atomic ops; we surpressed the normal
6943                    memory load for LOCK above so do it now.  */
6944                 gen_op_ld_v(s, ot, s->T0, s->A0);
6945                 break;
6946             case 1: /* bts */
6947                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6948                                            s->mem_index, ot | MO_LE);
6949                 break;
6950             case 2: /* btr */
6951                 tcg_gen_not_tl(s->tmp0, s->tmp0);
6952                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6953                                             s->mem_index, ot | MO_LE);
6954                 break;
6955             default:
6956             case 3: /* btc */
6957                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6958                                             s->mem_index, ot | MO_LE);
6959                 break;
6960             }
6961             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6962         } else {
6963             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6964             switch (op) {
6965             case 0: /* bt */
6966                 /* Data already loaded; nothing to do.  */
6967                 break;
6968             case 1: /* bts */
6969                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6970                 break;
6971             case 2: /* btr */
6972                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6973                 break;
6974             default:
6975             case 3: /* btc */
6976                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6977                 break;
6978             }
6979             if (op != 0) {
6980                 if (mod != 3) {
6981                     gen_op_st_v(s, ot, s->T0, s->A0);
6982                 } else {
6983                     gen_op_mov_reg_v(s, ot, rm, s->T0);
6984                 }
6985             }
6986         }
6987 
6988         /* Delay all CC updates until after the store above.  Note that
6989            C is the result of the test, Z is unchanged, and the others
6990            are all undefined.  */
6991         switch (s->cc_op) {
6992         case CC_OP_MULB ... CC_OP_MULQ:
6993         case CC_OP_ADDB ... CC_OP_ADDQ:
6994         case CC_OP_ADCB ... CC_OP_ADCQ:
6995         case CC_OP_SUBB ... CC_OP_SUBQ:
6996         case CC_OP_SBBB ... CC_OP_SBBQ:
6997         case CC_OP_LOGICB ... CC_OP_LOGICQ:
6998         case CC_OP_INCB ... CC_OP_INCQ:
6999         case CC_OP_DECB ... CC_OP_DECQ:
7000         case CC_OP_SHLB ... CC_OP_SHLQ:
7001         case CC_OP_SARB ... CC_OP_SARQ:
7002         case CC_OP_BMILGB ... CC_OP_BMILGQ:
7003             /* Z was going to be computed from the non-zero status of CC_DST.
7004                We can get that same Z value (and the new C value) by leaving
7005                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
7006                same width.  */
7007             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
7008             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
7009             break;
7010         default:
7011             /* Otherwise, generate EFLAGS and replace the C bit.  */
7012             gen_compute_eflags(s);
7013             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
7014                                ctz32(CC_C), 1);
7015             break;
7016         }
7017         break;
7018     case 0x1bc: /* bsf / tzcnt */
7019     case 0x1bd: /* bsr / lzcnt */
7020         ot = dflag;
7021         modrm = x86_ldub_code(env, s);
7022         reg = ((modrm >> 3) & 7) | REX_R(s);
7023         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7024         gen_extu(ot, s->T0);
7025 
7026         /* Note that lzcnt and tzcnt are in different extensions.  */
7027         if ((prefixes & PREFIX_REPZ)
7028             && (b & 1
7029                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
7030                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
7031             int size = 8 << ot;
7032             /* For lzcnt/tzcnt, C bit is defined related to the input. */
7033             tcg_gen_mov_tl(cpu_cc_src, s->T0);
7034             if (b & 1) {
7035                 /* For lzcnt, reduce the target_ulong result by the
7036                    number of zeros that we expect to find at the top.  */
7037                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
7038                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
7039             } else {
7040                 /* For tzcnt, a zero input must return the operand size.  */
7041                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
7042             }
7043             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
7044             gen_op_update1_cc(s);
7045             set_cc_op(s, CC_OP_BMILGB + ot);
7046         } else {
7047             /* For bsr/bsf, only the Z bit is defined and it is related
7048                to the input and not the result.  */
7049             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
7050             set_cc_op(s, CC_OP_LOGICB + ot);
7051 
7052             /* ??? The manual says that the output is undefined when the
7053                input is zero, but real hardware leaves it unchanged, and
7054                real programs appear to depend on that.  Accomplish this
7055                by passing the output as the value to return upon zero.  */
7056             if (b & 1) {
7057                 /* For bsr, return the bit index of the first 1 bit,
7058                    not the count of leading zeros.  */
7059                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
7060                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
7061                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
7062             } else {
7063                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
7064             }
7065         }
7066         gen_op_mov_reg_v(s, ot, reg, s->T0);
7067         break;
7068         /************************/
7069         /* bcd */
7070     case 0x27: /* daa */
7071         if (CODE64(s))
7072             goto illegal_op;
7073         gen_update_cc_op(s);
7074         gen_helper_daa(cpu_env);
7075         set_cc_op(s, CC_OP_EFLAGS);
7076         break;
7077     case 0x2f: /* das */
7078         if (CODE64(s))
7079             goto illegal_op;
7080         gen_update_cc_op(s);
7081         gen_helper_das(cpu_env);
7082         set_cc_op(s, CC_OP_EFLAGS);
7083         break;
7084     case 0x37: /* aaa */
7085         if (CODE64(s))
7086             goto illegal_op;
7087         gen_update_cc_op(s);
7088         gen_helper_aaa(cpu_env);
7089         set_cc_op(s, CC_OP_EFLAGS);
7090         break;
7091     case 0x3f: /* aas */
7092         if (CODE64(s))
7093             goto illegal_op;
7094         gen_update_cc_op(s);
7095         gen_helper_aas(cpu_env);
7096         set_cc_op(s, CC_OP_EFLAGS);
7097         break;
7098     case 0xd4: /* aam */
7099         if (CODE64(s))
7100             goto illegal_op;
7101         val = x86_ldub_code(env, s);
7102         if (val == 0) {
7103             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7104         } else {
7105             gen_helper_aam(cpu_env, tcg_const_i32(val));
7106             set_cc_op(s, CC_OP_LOGICB);
7107         }
7108         break;
7109     case 0xd5: /* aad */
7110         if (CODE64(s))
7111             goto illegal_op;
7112         val = x86_ldub_code(env, s);
7113         gen_helper_aad(cpu_env, tcg_const_i32(val));
7114         set_cc_op(s, CC_OP_LOGICB);
7115         break;
7116         /************************/
7117         /* misc */
7118     case 0x90: /* nop */
7119         /* XXX: correct lock test for all insn */
7120         if (prefixes & PREFIX_LOCK) {
7121             goto illegal_op;
7122         }
7123         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7124         if (REX_B(s)) {
7125             goto do_xchg_reg_eax;
7126         }
7127         if (prefixes & PREFIX_REPZ) {
7128             gen_update_cc_op(s);
7129             gen_jmp_im(s, pc_start - s->cs_base);
7130             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7131             s->base.is_jmp = DISAS_NORETURN;
7132         }
7133         break;
7134     case 0x9b: /* fwait */
7135         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7136             (HF_MP_MASK | HF_TS_MASK)) {
7137             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7138         } else {
7139             gen_helper_fwait(cpu_env);
7140         }
7141         break;
7142     case 0xcc: /* int3 */
7143         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7144         break;
7145     case 0xcd: /* int N */
7146         val = x86_ldub_code(env, s);
7147         if (check_vm86_iopl(s)) {
7148             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7149         }
7150         break;
7151     case 0xce: /* into */
7152         if (CODE64(s))
7153             goto illegal_op;
7154         gen_update_cc_op(s);
7155         gen_jmp_im(s, pc_start - s->cs_base);
7156         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7157         break;
7158 #ifdef WANT_ICEBP
7159     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7160         gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
7161         gen_debug(s);
7162         break;
7163 #endif
7164     case 0xfa: /* cli */
7165         if (check_iopl(s)) {
7166             gen_helper_cli(cpu_env);
7167         }
7168         break;
7169     case 0xfb: /* sti */
7170         if (check_iopl(s)) {
7171             gen_helper_sti(cpu_env);
7172             /* interruptions are enabled only the first insn after sti */
7173             gen_jmp_im(s, s->pc - s->cs_base);
7174             gen_eob_inhibit_irq(s, true);
7175         }
7176         break;
7177     case 0x62: /* bound */
7178         if (CODE64(s))
7179             goto illegal_op;
7180         ot = dflag;
7181         modrm = x86_ldub_code(env, s);
7182         reg = (modrm >> 3) & 7;
7183         mod = (modrm >> 6) & 3;
7184         if (mod == 3)
7185             goto illegal_op;
7186         gen_op_mov_v_reg(s, ot, s->T0, reg);
7187         gen_lea_modrm(env, s, modrm);
7188         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7189         if (ot == MO_16) {
7190             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7191         } else {
7192             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7193         }
7194         break;
7195     case 0x1c8 ... 0x1cf: /* bswap reg */
7196         reg = (b & 7) | REX_B(s);
7197 #ifdef TARGET_X86_64
7198         if (dflag == MO_64) {
7199             tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
7200             break;
7201         }
7202 #endif
7203         tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
7204         break;
7205     case 0xd6: /* salc */
7206         if (CODE64(s))
7207             goto illegal_op;
7208         gen_compute_eflags_c(s, s->T0);
7209         tcg_gen_neg_tl(s->T0, s->T0);
7210         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7211         break;
7212     case 0xe0: /* loopnz */
7213     case 0xe1: /* loopz */
7214     case 0xe2: /* loop */
7215     case 0xe3: /* jecxz */
7216         {
7217             TCGLabel *l1, *l2, *l3;
7218 
7219             tval = (int8_t)insn_get(env, s, MO_8);
7220             next_eip = s->pc - s->cs_base;
7221             tval += next_eip;
7222             if (dflag == MO_16) {
7223                 tval &= 0xffff;
7224             }
7225 
7226             l1 = gen_new_label();
7227             l2 = gen_new_label();
7228             l3 = gen_new_label();
7229             gen_update_cc_op(s);
7230             b &= 3;
7231             switch(b) {
7232             case 0: /* loopnz */
7233             case 1: /* loopz */
7234                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7235                 gen_op_jz_ecx(s, s->aflag, l3);
7236                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7237                 break;
7238             case 2: /* loop */
7239                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7240                 gen_op_jnz_ecx(s, s->aflag, l1);
7241                 break;
7242             default:
7243             case 3: /* jcxz */
7244                 gen_op_jz_ecx(s, s->aflag, l1);
7245                 break;
7246             }
7247 
7248             gen_set_label(l3);
7249             gen_jmp_im(s, next_eip);
7250             tcg_gen_br(l2);
7251 
7252             gen_set_label(l1);
7253             gen_jmp_im(s, tval);
7254             gen_set_label(l2);
7255             gen_eob(s);
7256         }
7257         break;
7258     case 0x130: /* wrmsr */
7259     case 0x132: /* rdmsr */
7260         if (check_cpl0(s)) {
7261             gen_update_cc_op(s);
7262             gen_jmp_im(s, pc_start - s->cs_base);
7263             if (b & 2) {
7264                 gen_helper_rdmsr(cpu_env);
7265             } else {
7266                 gen_helper_wrmsr(cpu_env);
7267                 gen_jmp_im(s, s->pc - s->cs_base);
7268                 gen_eob(s);
7269             }
7270         }
7271         break;
7272     case 0x131: /* rdtsc */
7273         gen_update_cc_op(s);
7274         gen_jmp_im(s, pc_start - s->cs_base);
7275         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7276             gen_io_start();
7277         }
7278         gen_helper_rdtsc(cpu_env);
7279         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7280             gen_jmp(s, s->pc - s->cs_base);
7281         }
7282         break;
7283     case 0x133: /* rdpmc */
7284         gen_update_cc_op(s);
7285         gen_jmp_im(s, pc_start - s->cs_base);
7286         gen_helper_rdpmc(cpu_env);
7287         s->base.is_jmp = DISAS_NORETURN;
7288         break;
7289     case 0x134: /* sysenter */
7290         /* For Intel SYSENTER is valid on 64-bit */
7291         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7292             goto illegal_op;
7293         if (!PE(s)) {
7294             gen_exception_gpf(s);
7295         } else {
7296             gen_helper_sysenter(cpu_env);
7297             gen_eob(s);
7298         }
7299         break;
7300     case 0x135: /* sysexit */
7301         /* For Intel SYSEXIT is valid on 64-bit */
7302         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7303             goto illegal_op;
7304         if (!PE(s)) {
7305             gen_exception_gpf(s);
7306         } else {
7307             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7308             gen_eob(s);
7309         }
7310         break;
7311 #ifdef TARGET_X86_64
7312     case 0x105: /* syscall */
7313         /* XXX: is it usable in real mode ? */
7314         gen_update_cc_op(s);
7315         gen_jmp_im(s, pc_start - s->cs_base);
7316         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7317         /* TF handling for the syscall insn is different. The TF bit is  checked
7318            after the syscall insn completes. This allows #DB to not be
7319            generated after one has entered CPL0 if TF is set in FMASK.  */
7320         gen_eob_worker(s, false, true);
7321         break;
7322     case 0x107: /* sysret */
7323         if (!PE(s)) {
7324             gen_exception_gpf(s);
7325         } else {
7326             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7327             /* condition codes are modified only in long mode */
7328             if (LMA(s)) {
7329                 set_cc_op(s, CC_OP_EFLAGS);
7330             }
7331             /* TF handling for the sysret insn is different. The TF bit is
7332                checked after the sysret insn completes. This allows #DB to be
7333                generated "as if" the syscall insn in userspace has just
7334                completed.  */
7335             gen_eob_worker(s, false, true);
7336         }
7337         break;
7338 #endif
7339     case 0x1a2: /* cpuid */
7340         gen_update_cc_op(s);
7341         gen_jmp_im(s, pc_start - s->cs_base);
7342         gen_helper_cpuid(cpu_env);
7343         break;
7344     case 0xf4: /* hlt */
7345         if (check_cpl0(s)) {
7346             gen_update_cc_op(s);
7347             gen_jmp_im(s, pc_start - s->cs_base);
7348             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7349             s->base.is_jmp = DISAS_NORETURN;
7350         }
7351         break;
7352     case 0x100:
7353         modrm = x86_ldub_code(env, s);
7354         mod = (modrm >> 6) & 3;
7355         op = (modrm >> 3) & 7;
7356         switch(op) {
7357         case 0: /* sldt */
7358             if (!PE(s) || VM86(s))
7359                 goto illegal_op;
7360             gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
7361             tcg_gen_ld32u_tl(s->T0, cpu_env,
7362                              offsetof(CPUX86State, ldt.selector));
7363             ot = mod == 3 ? dflag : MO_16;
7364             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7365             break;
7366         case 2: /* lldt */
7367             if (!PE(s) || VM86(s))
7368                 goto illegal_op;
7369             if (check_cpl0(s)) {
7370                 gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
7371                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7372                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7373                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7374             }
7375             break;
7376         case 1: /* str */
7377             if (!PE(s) || VM86(s))
7378                 goto illegal_op;
7379             gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
7380             tcg_gen_ld32u_tl(s->T0, cpu_env,
7381                              offsetof(CPUX86State, tr.selector));
7382             ot = mod == 3 ? dflag : MO_16;
7383             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7384             break;
7385         case 3: /* ltr */
7386             if (!PE(s) || VM86(s))
7387                 goto illegal_op;
7388             if (check_cpl0(s)) {
7389                 gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
7390                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7391                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7392                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7393             }
7394             break;
7395         case 4: /* verr */
7396         case 5: /* verw */
7397             if (!PE(s) || VM86(s))
7398                 goto illegal_op;
7399             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7400             gen_update_cc_op(s);
7401             if (op == 4) {
7402                 gen_helper_verr(cpu_env, s->T0);
7403             } else {
7404                 gen_helper_verw(cpu_env, s->T0);
7405             }
7406             set_cc_op(s, CC_OP_EFLAGS);
7407             break;
7408         default:
7409             goto unknown_op;
7410         }
7411         break;
7412 
7413     case 0x101:
7414         modrm = x86_ldub_code(env, s);
7415         switch (modrm) {
7416         CASE_MODRM_MEM_OP(0): /* sgdt */
7417             gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
7418             gen_lea_modrm(env, s, modrm);
7419             tcg_gen_ld32u_tl(s->T0,
7420                              cpu_env, offsetof(CPUX86State, gdt.limit));
7421             gen_op_st_v(s, MO_16, s->T0, s->A0);
7422             gen_add_A0_im(s, 2);
7423             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7424             if (dflag == MO_16) {
7425                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7426             }
7427             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7428             break;
7429 
7430         case 0xc8: /* monitor */
7431             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7432                 goto illegal_op;
7433             }
7434             gen_update_cc_op(s);
7435             gen_jmp_im(s, pc_start - s->cs_base);
7436             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7437             gen_extu(s->aflag, s->A0);
7438             gen_add_A0_ds_seg(s);
7439             gen_helper_monitor(cpu_env, s->A0);
7440             break;
7441 
7442         case 0xc9: /* mwait */
7443             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7444                 goto illegal_op;
7445             }
7446             gen_update_cc_op(s);
7447             gen_jmp_im(s, pc_start - s->cs_base);
7448             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7449             s->base.is_jmp = DISAS_NORETURN;
7450             break;
7451 
7452         case 0xca: /* clac */
7453             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7454                 || CPL(s) != 0) {
7455                 goto illegal_op;
7456             }
7457             gen_helper_clac(cpu_env);
7458             gen_jmp_im(s, s->pc - s->cs_base);
7459             gen_eob(s);
7460             break;
7461 
7462         case 0xcb: /* stac */
7463             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7464                 || CPL(s) != 0) {
7465                 goto illegal_op;
7466             }
7467             gen_helper_stac(cpu_env);
7468             gen_jmp_im(s, s->pc - s->cs_base);
7469             gen_eob(s);
7470             break;
7471 
7472         CASE_MODRM_MEM_OP(1): /* sidt */
7473             gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
7474             gen_lea_modrm(env, s, modrm);
7475             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7476             gen_op_st_v(s, MO_16, s->T0, s->A0);
7477             gen_add_A0_im(s, 2);
7478             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7479             if (dflag == MO_16) {
7480                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7481             }
7482             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7483             break;
7484 
7485         case 0xd0: /* xgetbv */
7486             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7487                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7488                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7489                 goto illegal_op;
7490             }
7491             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7492             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7493             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7494             break;
7495 
7496         case 0xd1: /* xsetbv */
7497             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7498                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7499                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7500                 goto illegal_op;
7501             }
7502             if (!check_cpl0(s)) {
7503                 break;
7504             }
7505             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7506                                   cpu_regs[R_EDX]);
7507             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7508             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7509             /* End TB because translation flags may change.  */
7510             gen_jmp_im(s, s->pc - s->cs_base);
7511             gen_eob(s);
7512             break;
7513 
7514         case 0xd8: /* VMRUN */
7515             if (!SVME(s) || !PE(s)) {
7516                 goto illegal_op;
7517             }
7518             if (!check_cpl0(s)) {
7519                 break;
7520             }
7521             gen_update_cc_op(s);
7522             gen_jmp_im(s, pc_start - s->cs_base);
7523             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7524                              tcg_const_i32(s->pc - pc_start));
7525             tcg_gen_exit_tb(NULL, 0);
7526             s->base.is_jmp = DISAS_NORETURN;
7527             break;
7528 
7529         case 0xd9: /* VMMCALL */
7530             if (!SVME(s)) {
7531                 goto illegal_op;
7532             }
7533             gen_update_cc_op(s);
7534             gen_jmp_im(s, pc_start - s->cs_base);
7535             gen_helper_vmmcall(cpu_env);
7536             break;
7537 
7538         case 0xda: /* VMLOAD */
7539             if (!SVME(s) || !PE(s)) {
7540                 goto illegal_op;
7541             }
7542             if (!check_cpl0(s)) {
7543                 break;
7544             }
7545             gen_update_cc_op(s);
7546             gen_jmp_im(s, pc_start - s->cs_base);
7547             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7548             break;
7549 
7550         case 0xdb: /* VMSAVE */
7551             if (!SVME(s) || !PE(s)) {
7552                 goto illegal_op;
7553             }
7554             if (!check_cpl0(s)) {
7555                 break;
7556             }
7557             gen_update_cc_op(s);
7558             gen_jmp_im(s, pc_start - s->cs_base);
7559             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7560             break;
7561 
7562         case 0xdc: /* STGI */
7563             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7564                 || !PE(s)) {
7565                 goto illegal_op;
7566             }
7567             if (!check_cpl0(s)) {
7568                 break;
7569             }
7570             gen_update_cc_op(s);
7571             gen_helper_stgi(cpu_env);
7572             gen_jmp_im(s, s->pc - s->cs_base);
7573             gen_eob(s);
7574             break;
7575 
7576         case 0xdd: /* CLGI */
7577             if (!SVME(s) || !PE(s)) {
7578                 goto illegal_op;
7579             }
7580             if (!check_cpl0(s)) {
7581                 break;
7582             }
7583             gen_update_cc_op(s);
7584             gen_jmp_im(s, pc_start - s->cs_base);
7585             gen_helper_clgi(cpu_env);
7586             break;
7587 
7588         case 0xde: /* SKINIT */
7589             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7590                 || !PE(s)) {
7591                 goto illegal_op;
7592             }
7593             gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
7594             /* If not intercepted, not implemented -- raise #UD. */
7595             goto illegal_op;
7596 
7597         case 0xdf: /* INVLPGA */
7598             if (!SVME(s) || !PE(s)) {
7599                 goto illegal_op;
7600             }
7601             if (!check_cpl0(s)) {
7602                 break;
7603             }
7604             gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
7605             if (s->aflag == MO_64) {
7606                 tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7607             } else {
7608                 tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
7609             }
7610             gen_helper_flush_page(cpu_env, s->A0);
7611             gen_jmp_im(s, s->pc - s->cs_base);
7612             gen_eob(s);
7613             break;
7614 
7615         CASE_MODRM_MEM_OP(2): /* lgdt */
7616             if (!check_cpl0(s)) {
7617                 break;
7618             }
7619             gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
7620             gen_lea_modrm(env, s, modrm);
7621             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7622             gen_add_A0_im(s, 2);
7623             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7624             if (dflag == MO_16) {
7625                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7626             }
7627             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7628             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7629             break;
7630 
7631         CASE_MODRM_MEM_OP(3): /* lidt */
7632             if (!check_cpl0(s)) {
7633                 break;
7634             }
7635             gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
7636             gen_lea_modrm(env, s, modrm);
7637             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7638             gen_add_A0_im(s, 2);
7639             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7640             if (dflag == MO_16) {
7641                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7642             }
7643             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7644             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7645             break;
7646 
7647         CASE_MODRM_OP(4): /* smsw */
7648             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
7649             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7650             /*
7651              * In 32-bit mode, the higher 16 bits of the destination
7652              * register are undefined.  In practice CR0[31:0] is stored
7653              * just like in 64-bit mode.
7654              */
7655             mod = (modrm >> 6) & 3;
7656             ot = (mod != 3 ? MO_16 : s->dflag);
7657             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7658             break;
7659         case 0xee: /* rdpkru */
7660             if (prefixes & PREFIX_LOCK) {
7661                 goto illegal_op;
7662             }
7663             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7664             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7665             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7666             break;
7667         case 0xef: /* wrpkru */
7668             if (prefixes & PREFIX_LOCK) {
7669                 goto illegal_op;
7670             }
7671             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7672                                   cpu_regs[R_EDX]);
7673             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7674             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7675             break;
7676 
7677         CASE_MODRM_OP(6): /* lmsw */
7678             if (!check_cpl0(s)) {
7679                 break;
7680             }
7681             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
7682             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7683             /*
7684              * Only the 4 lower bits of CR0 are modified.
7685              * PE cannot be set to zero if already set to one.
7686              */
7687             tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
7688             tcg_gen_andi_tl(s->T0, s->T0, 0xf);
7689             tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
7690             tcg_gen_or_tl(s->T0, s->T0, s->T1);
7691             gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
7692             gen_jmp_im(s, s->pc - s->cs_base);
7693             gen_eob(s);
7694             break;
7695 
7696         CASE_MODRM_MEM_OP(7): /* invlpg */
7697             if (!check_cpl0(s)) {
7698                 break;
7699             }
7700             gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
7701             gen_lea_modrm(env, s, modrm);
7702             gen_helper_flush_page(cpu_env, s->A0);
7703             gen_jmp_im(s, s->pc - s->cs_base);
7704             gen_eob(s);
7705             break;
7706 
7707         case 0xf8: /* swapgs */
7708 #ifdef TARGET_X86_64
7709             if (CODE64(s)) {
7710                 if (check_cpl0(s)) {
7711                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7712                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7713                                   offsetof(CPUX86State, kernelgsbase));
7714                     tcg_gen_st_tl(s->T0, cpu_env,
7715                                   offsetof(CPUX86State, kernelgsbase));
7716                 }
7717                 break;
7718             }
7719 #endif
7720             goto illegal_op;
7721 
7722         case 0xf9: /* rdtscp */
7723             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7724                 goto illegal_op;
7725             }
7726             gen_update_cc_op(s);
7727             gen_jmp_im(s, pc_start - s->cs_base);
7728             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7729                 gen_io_start();
7730             }
7731             gen_helper_rdtscp(cpu_env);
7732             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7733                 gen_jmp(s, s->pc - s->cs_base);
7734             }
7735             break;
7736 
7737         default:
7738             goto unknown_op;
7739         }
7740         break;
7741 
7742     case 0x108: /* invd */
7743     case 0x109: /* wbinvd */
7744         if (check_cpl0(s)) {
7745             gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7746             /* nothing to do */
7747         }
7748         break;
7749     case 0x63: /* arpl or movslS (x86_64) */
7750 #ifdef TARGET_X86_64
7751         if (CODE64(s)) {
7752             int d_ot;
7753             /* d_ot is the size of destination */
7754             d_ot = dflag;
7755 
7756             modrm = x86_ldub_code(env, s);
7757             reg = ((modrm >> 3) & 7) | REX_R(s);
7758             mod = (modrm >> 6) & 3;
7759             rm = (modrm & 7) | REX_B(s);
7760 
7761             if (mod == 3) {
7762                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7763                 /* sign extend */
7764                 if (d_ot == MO_64) {
7765                     tcg_gen_ext32s_tl(s->T0, s->T0);
7766                 }
7767                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7768             } else {
7769                 gen_lea_modrm(env, s, modrm);
7770                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7771                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7772             }
7773         } else
7774 #endif
7775         {
7776             TCGLabel *label1;
7777             TCGv t0, t1, t2, a0;
7778 
7779             if (!PE(s) || VM86(s))
7780                 goto illegal_op;
7781             t0 = tcg_temp_local_new();
7782             t1 = tcg_temp_local_new();
7783             t2 = tcg_temp_local_new();
7784             ot = MO_16;
7785             modrm = x86_ldub_code(env, s);
7786             reg = (modrm >> 3) & 7;
7787             mod = (modrm >> 6) & 3;
7788             rm = modrm & 7;
7789             if (mod != 3) {
7790                 gen_lea_modrm(env, s, modrm);
7791                 gen_op_ld_v(s, ot, t0, s->A0);
7792                 a0 = tcg_temp_local_new();
7793                 tcg_gen_mov_tl(a0, s->A0);
7794             } else {
7795                 gen_op_mov_v_reg(s, ot, t0, rm);
7796                 a0 = NULL;
7797             }
7798             gen_op_mov_v_reg(s, ot, t1, reg);
7799             tcg_gen_andi_tl(s->tmp0, t0, 3);
7800             tcg_gen_andi_tl(t1, t1, 3);
7801             tcg_gen_movi_tl(t2, 0);
7802             label1 = gen_new_label();
7803             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7804             tcg_gen_andi_tl(t0, t0, ~3);
7805             tcg_gen_or_tl(t0, t0, t1);
7806             tcg_gen_movi_tl(t2, CC_Z);
7807             gen_set_label(label1);
7808             if (mod != 3) {
7809                 gen_op_st_v(s, ot, t0, a0);
7810                 tcg_temp_free(a0);
7811            } else {
7812                 gen_op_mov_reg_v(s, ot, rm, t0);
7813             }
7814             gen_compute_eflags(s);
7815             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7816             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7817             tcg_temp_free(t0);
7818             tcg_temp_free(t1);
7819             tcg_temp_free(t2);
7820         }
7821         break;
7822     case 0x102: /* lar */
7823     case 0x103: /* lsl */
7824         {
7825             TCGLabel *label1;
7826             TCGv t0;
7827             if (!PE(s) || VM86(s))
7828                 goto illegal_op;
7829             ot = dflag != MO_16 ? MO_32 : MO_16;
7830             modrm = x86_ldub_code(env, s);
7831             reg = ((modrm >> 3) & 7) | REX_R(s);
7832             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7833             t0 = tcg_temp_local_new();
7834             gen_update_cc_op(s);
7835             if (b == 0x102) {
7836                 gen_helper_lar(t0, cpu_env, s->T0);
7837             } else {
7838                 gen_helper_lsl(t0, cpu_env, s->T0);
7839             }
7840             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7841             label1 = gen_new_label();
7842             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7843             gen_op_mov_reg_v(s, ot, reg, t0);
7844             gen_set_label(label1);
7845             set_cc_op(s, CC_OP_EFLAGS);
7846             tcg_temp_free(t0);
7847         }
7848         break;
7849     case 0x118:
7850         modrm = x86_ldub_code(env, s);
7851         mod = (modrm >> 6) & 3;
7852         op = (modrm >> 3) & 7;
7853         switch(op) {
7854         case 0: /* prefetchnta */
7855         case 1: /* prefetchnt0 */
7856         case 2: /* prefetchnt0 */
7857         case 3: /* prefetchnt0 */
7858             if (mod == 3)
7859                 goto illegal_op;
7860             gen_nop_modrm(env, s, modrm);
7861             /* nothing more to do */
7862             break;
7863         default: /* nop (multi byte) */
7864             gen_nop_modrm(env, s, modrm);
7865             break;
7866         }
7867         break;
7868     case 0x11a:
7869         modrm = x86_ldub_code(env, s);
7870         if (s->flags & HF_MPX_EN_MASK) {
7871             mod = (modrm >> 6) & 3;
7872             reg = ((modrm >> 3) & 7) | REX_R(s);
7873             if (prefixes & PREFIX_REPZ) {
7874                 /* bndcl */
7875                 if (reg >= 4
7876                     || (prefixes & PREFIX_LOCK)
7877                     || s->aflag == MO_16) {
7878                     goto illegal_op;
7879                 }
7880                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7881             } else if (prefixes & PREFIX_REPNZ) {
7882                 /* bndcu */
7883                 if (reg >= 4
7884                     || (prefixes & PREFIX_LOCK)
7885                     || s->aflag == MO_16) {
7886                     goto illegal_op;
7887                 }
7888                 TCGv_i64 notu = tcg_temp_new_i64();
7889                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7890                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7891                 tcg_temp_free_i64(notu);
7892             } else if (prefixes & PREFIX_DATA) {
7893                 /* bndmov -- from reg/mem */
7894                 if (reg >= 4 || s->aflag == MO_16) {
7895                     goto illegal_op;
7896                 }
7897                 if (mod == 3) {
7898                     int reg2 = (modrm & 7) | REX_B(s);
7899                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7900                         goto illegal_op;
7901                     }
7902                     if (s->flags & HF_MPX_IU_MASK) {
7903                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7904                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7905                     }
7906                 } else {
7907                     gen_lea_modrm(env, s, modrm);
7908                     if (CODE64(s)) {
7909                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7910                                             s->mem_index, MO_LEQ);
7911                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7912                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7913                                             s->mem_index, MO_LEQ);
7914                     } else {
7915                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7916                                             s->mem_index, MO_LEUL);
7917                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7918                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7919                                             s->mem_index, MO_LEUL);
7920                     }
7921                     /* bnd registers are now in-use */
7922                     gen_set_hflag(s, HF_MPX_IU_MASK);
7923                 }
7924             } else if (mod != 3) {
7925                 /* bndldx */
7926                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7927                 if (reg >= 4
7928                     || (prefixes & PREFIX_LOCK)
7929                     || s->aflag == MO_16
7930                     || a.base < -1) {
7931                     goto illegal_op;
7932                 }
7933                 if (a.base >= 0) {
7934                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7935                 } else {
7936                     tcg_gen_movi_tl(s->A0, 0);
7937                 }
7938                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7939                 if (a.index >= 0) {
7940                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7941                 } else {
7942                     tcg_gen_movi_tl(s->T0, 0);
7943                 }
7944                 if (CODE64(s)) {
7945                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7946                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7947                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7948                 } else {
7949                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7950                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7951                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7952                 }
7953                 gen_set_hflag(s, HF_MPX_IU_MASK);
7954             }
7955         }
7956         gen_nop_modrm(env, s, modrm);
7957         break;
7958     case 0x11b:
7959         modrm = x86_ldub_code(env, s);
7960         if (s->flags & HF_MPX_EN_MASK) {
7961             mod = (modrm >> 6) & 3;
7962             reg = ((modrm >> 3) & 7) | REX_R(s);
7963             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7964                 /* bndmk */
7965                 if (reg >= 4
7966                     || (prefixes & PREFIX_LOCK)
7967                     || s->aflag == MO_16) {
7968                     goto illegal_op;
7969                 }
7970                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7971                 if (a.base >= 0) {
7972                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7973                     if (!CODE64(s)) {
7974                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7975                     }
7976                 } else if (a.base == -1) {
7977                     /* no base register has lower bound of 0 */
7978                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
7979                 } else {
7980                     /* rip-relative generates #ud */
7981                     goto illegal_op;
7982                 }
7983                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7984                 if (!CODE64(s)) {
7985                     tcg_gen_ext32u_tl(s->A0, s->A0);
7986                 }
7987                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7988                 /* bnd registers are now in-use */
7989                 gen_set_hflag(s, HF_MPX_IU_MASK);
7990                 break;
7991             } else if (prefixes & PREFIX_REPNZ) {
7992                 /* bndcn */
7993                 if (reg >= 4
7994                     || (prefixes & PREFIX_LOCK)
7995                     || s->aflag == MO_16) {
7996                     goto illegal_op;
7997                 }
7998                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
7999             } else if (prefixes & PREFIX_DATA) {
8000                 /* bndmov -- to reg/mem */
8001                 if (reg >= 4 || s->aflag == MO_16) {
8002                     goto illegal_op;
8003                 }
8004                 if (mod == 3) {
8005                     int reg2 = (modrm & 7) | REX_B(s);
8006                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8007                         goto illegal_op;
8008                     }
8009                     if (s->flags & HF_MPX_IU_MASK) {
8010                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
8011                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
8012                     }
8013                 } else {
8014                     gen_lea_modrm(env, s, modrm);
8015                     if (CODE64(s)) {
8016                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8017                                             s->mem_index, MO_LEQ);
8018                         tcg_gen_addi_tl(s->A0, s->A0, 8);
8019                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8020                                             s->mem_index, MO_LEQ);
8021                     } else {
8022                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8023                                             s->mem_index, MO_LEUL);
8024                         tcg_gen_addi_tl(s->A0, s->A0, 4);
8025                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8026                                             s->mem_index, MO_LEUL);
8027                     }
8028                 }
8029             } else if (mod != 3) {
8030                 /* bndstx */
8031                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8032                 if (reg >= 4
8033                     || (prefixes & PREFIX_LOCK)
8034                     || s->aflag == MO_16
8035                     || a.base < -1) {
8036                     goto illegal_op;
8037                 }
8038                 if (a.base >= 0) {
8039                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8040                 } else {
8041                     tcg_gen_movi_tl(s->A0, 0);
8042                 }
8043                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8044                 if (a.index >= 0) {
8045                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8046                 } else {
8047                     tcg_gen_movi_tl(s->T0, 0);
8048                 }
8049                 if (CODE64(s)) {
8050                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
8051                                         cpu_bndl[reg], cpu_bndu[reg]);
8052                 } else {
8053                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
8054                                         cpu_bndl[reg], cpu_bndu[reg]);
8055                 }
8056             }
8057         }
8058         gen_nop_modrm(env, s, modrm);
8059         break;
8060     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8061         modrm = x86_ldub_code(env, s);
8062         gen_nop_modrm(env, s, modrm);
8063         break;
8064 
8065     case 0x120: /* mov reg, crN */
8066     case 0x122: /* mov crN, reg */
8067         if (!check_cpl0(s)) {
8068             break;
8069         }
8070         modrm = x86_ldub_code(env, s);
8071         /*
8072          * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8073          * AMD documentation (24594.pdf) and testing of Intel 386 and 486
8074          * processors all show that the mod bits are assumed to be 1's,
8075          * regardless of actual values.
8076          */
8077         rm = (modrm & 7) | REX_B(s);
8078         reg = ((modrm >> 3) & 7) | REX_R(s);
8079         switch (reg) {
8080         case 0:
8081             if ((prefixes & PREFIX_LOCK) &&
8082                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8083                 reg = 8;
8084             }
8085             break;
8086         case 2:
8087         case 3:
8088         case 4:
8089         case 8:
8090             break;
8091         default:
8092             goto unknown_op;
8093         }
8094         ot  = (CODE64(s) ? MO_64 : MO_32);
8095 
8096         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8097             gen_io_start();
8098         }
8099         if (b & 2) {
8100             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
8101             gen_op_mov_v_reg(s, ot, s->T0, rm);
8102             gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
8103             gen_jmp_im(s, s->pc - s->cs_base);
8104             gen_eob(s);
8105         } else {
8106             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
8107             gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
8108             gen_op_mov_reg_v(s, ot, rm, s->T0);
8109             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8110                 gen_jmp(s, s->pc - s->cs_base);
8111             }
8112         }
8113         break;
8114 
8115     case 0x121: /* mov reg, drN */
8116     case 0x123: /* mov drN, reg */
8117         if (check_cpl0(s)) {
8118             modrm = x86_ldub_code(env, s);
8119             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8120              * AMD documentation (24594.pdf) and testing of
8121              * intel 386 and 486 processors all show that the mod bits
8122              * are assumed to be 1's, regardless of actual values.
8123              */
8124             rm = (modrm & 7) | REX_B(s);
8125             reg = ((modrm >> 3) & 7) | REX_R(s);
8126             if (CODE64(s))
8127                 ot = MO_64;
8128             else
8129                 ot = MO_32;
8130             if (reg >= 8) {
8131                 goto illegal_op;
8132             }
8133             if (b & 2) {
8134                 gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
8135                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8136                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8137                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8138                 gen_jmp_im(s, s->pc - s->cs_base);
8139                 gen_eob(s);
8140             } else {
8141                 gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
8142                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8143                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8144                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8145             }
8146         }
8147         break;
8148     case 0x106: /* clts */
8149         if (check_cpl0(s)) {
8150             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
8151             gen_helper_clts(cpu_env);
8152             /* abort block because static cpu state changed */
8153             gen_jmp_im(s, s->pc - s->cs_base);
8154             gen_eob(s);
8155         }
8156         break;
8157     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8158     case 0x1c3: /* MOVNTI reg, mem */
8159         if (!(s->cpuid_features & CPUID_SSE2))
8160             goto illegal_op;
8161         ot = mo_64_32(dflag);
8162         modrm = x86_ldub_code(env, s);
8163         mod = (modrm >> 6) & 3;
8164         if (mod == 3)
8165             goto illegal_op;
8166         reg = ((modrm >> 3) & 7) | REX_R(s);
8167         /* generate a generic store */
8168         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8169         break;
8170     case 0x1ae:
8171         modrm = x86_ldub_code(env, s);
8172         switch (modrm) {
8173         CASE_MODRM_MEM_OP(0): /* fxsave */
8174             if (!(s->cpuid_features & CPUID_FXSR)
8175                 || (prefixes & PREFIX_LOCK)) {
8176                 goto illegal_op;
8177             }
8178             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8179                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8180                 break;
8181             }
8182             gen_lea_modrm(env, s, modrm);
8183             gen_helper_fxsave(cpu_env, s->A0);
8184             break;
8185 
8186         CASE_MODRM_MEM_OP(1): /* fxrstor */
8187             if (!(s->cpuid_features & CPUID_FXSR)
8188                 || (prefixes & PREFIX_LOCK)) {
8189                 goto illegal_op;
8190             }
8191             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8192                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8193                 break;
8194             }
8195             gen_lea_modrm(env, s, modrm);
8196             gen_helper_fxrstor(cpu_env, s->A0);
8197             break;
8198 
8199         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8200             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8201                 goto illegal_op;
8202             }
8203             if (s->flags & HF_TS_MASK) {
8204                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8205                 break;
8206             }
8207             gen_lea_modrm(env, s, modrm);
8208             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8209             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8210             break;
8211 
8212         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8213             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8214                 goto illegal_op;
8215             }
8216             if (s->flags & HF_TS_MASK) {
8217                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8218                 break;
8219             }
8220             gen_helper_update_mxcsr(cpu_env);
8221             gen_lea_modrm(env, s, modrm);
8222             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8223             gen_op_st_v(s, MO_32, s->T0, s->A0);
8224             break;
8225 
8226         CASE_MODRM_MEM_OP(4): /* xsave */
8227             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8228                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8229                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8230                 goto illegal_op;
8231             }
8232             gen_lea_modrm(env, s, modrm);
8233             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8234                                   cpu_regs[R_EDX]);
8235             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8236             break;
8237 
8238         CASE_MODRM_MEM_OP(5): /* xrstor */
8239             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8240                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8241                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8242                 goto illegal_op;
8243             }
8244             gen_lea_modrm(env, s, modrm);
8245             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8246                                   cpu_regs[R_EDX]);
8247             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8248             /* XRSTOR is how MPX is enabled, which changes how
8249                we translate.  Thus we need to end the TB.  */
8250             gen_update_cc_op(s);
8251             gen_jmp_im(s, s->pc - s->cs_base);
8252             gen_eob(s);
8253             break;
8254 
8255         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8256             if (prefixes & PREFIX_LOCK) {
8257                 goto illegal_op;
8258             }
8259             if (prefixes & PREFIX_DATA) {
8260                 /* clwb */
8261                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8262                     goto illegal_op;
8263                 }
8264                 gen_nop_modrm(env, s, modrm);
8265             } else {
8266                 /* xsaveopt */
8267                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8268                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8269                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8270                     goto illegal_op;
8271                 }
8272                 gen_lea_modrm(env, s, modrm);
8273                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8274                                       cpu_regs[R_EDX]);
8275                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8276             }
8277             break;
8278 
8279         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8280             if (prefixes & PREFIX_LOCK) {
8281                 goto illegal_op;
8282             }
8283             if (prefixes & PREFIX_DATA) {
8284                 /* clflushopt */
8285                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8286                     goto illegal_op;
8287                 }
8288             } else {
8289                 /* clflush */
8290                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8291                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8292                     goto illegal_op;
8293                 }
8294             }
8295             gen_nop_modrm(env, s, modrm);
8296             break;
8297 
8298         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8299         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8300         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8301         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8302             if (CODE64(s)
8303                 && (prefixes & PREFIX_REPZ)
8304                 && !(prefixes & PREFIX_LOCK)
8305                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8306                 TCGv base, treg, src, dst;
8307 
8308                 /* Preserve hflags bits by testing CR4 at runtime.  */
8309                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8310                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8311 
8312                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8313                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8314 
8315                 if (modrm & 0x10) {
8316                     /* wr*base */
8317                     dst = base, src = treg;
8318                 } else {
8319                     /* rd*base */
8320                     dst = treg, src = base;
8321                 }
8322 
8323                 if (s->dflag == MO_32) {
8324                     tcg_gen_ext32u_tl(dst, src);
8325                 } else {
8326                     tcg_gen_mov_tl(dst, src);
8327                 }
8328                 break;
8329             }
8330             goto unknown_op;
8331 
8332         case 0xf8: /* sfence / pcommit */
8333             if (prefixes & PREFIX_DATA) {
8334                 /* pcommit */
8335                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8336                     || (prefixes & PREFIX_LOCK)) {
8337                     goto illegal_op;
8338                 }
8339                 break;
8340             }
8341             /* fallthru */
8342         case 0xf9 ... 0xff: /* sfence */
8343             if (!(s->cpuid_features & CPUID_SSE)
8344                 || (prefixes & PREFIX_LOCK)) {
8345                 goto illegal_op;
8346             }
8347             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8348             break;
8349         case 0xe8 ... 0xef: /* lfence */
8350             if (!(s->cpuid_features & CPUID_SSE)
8351                 || (prefixes & PREFIX_LOCK)) {
8352                 goto illegal_op;
8353             }
8354             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8355             break;
8356         case 0xf0 ... 0xf7: /* mfence */
8357             if (!(s->cpuid_features & CPUID_SSE2)
8358                 || (prefixes & PREFIX_LOCK)) {
8359                 goto illegal_op;
8360             }
8361             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8362             break;
8363 
8364         default:
8365             goto unknown_op;
8366         }
8367         break;
8368 
8369     case 0x10d: /* 3DNow! prefetch(w) */
8370         modrm = x86_ldub_code(env, s);
8371         mod = (modrm >> 6) & 3;
8372         if (mod == 3)
8373             goto illegal_op;
8374         gen_nop_modrm(env, s, modrm);
8375         break;
8376     case 0x1aa: /* rsm */
8377         gen_svm_check_intercept(s, SVM_EXIT_RSM);
8378         if (!(s->flags & HF_SMM_MASK))
8379             goto illegal_op;
8380 #ifdef CONFIG_USER_ONLY
8381         /* we should not be in SMM mode */
8382         g_assert_not_reached();
8383 #else
8384         gen_update_cc_op(s);
8385         gen_jmp_im(s, s->pc - s->cs_base);
8386         gen_helper_rsm(cpu_env);
8387 #endif /* CONFIG_USER_ONLY */
8388         gen_eob(s);
8389         break;
8390     case 0x1b8: /* SSE4.2 popcnt */
8391         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8392              PREFIX_REPZ)
8393             goto illegal_op;
8394         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8395             goto illegal_op;
8396 
8397         modrm = x86_ldub_code(env, s);
8398         reg = ((modrm >> 3) & 7) | REX_R(s);
8399 
8400         if (s->prefix & PREFIX_DATA) {
8401             ot = MO_16;
8402         } else {
8403             ot = mo_64_32(dflag);
8404         }
8405 
8406         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8407         gen_extu(ot, s->T0);
8408         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8409         tcg_gen_ctpop_tl(s->T0, s->T0);
8410         gen_op_mov_reg_v(s, ot, reg, s->T0);
8411 
8412         set_cc_op(s, CC_OP_POPCNT);
8413         break;
8414     case 0x10e ... 0x10f:
8415         /* 3DNow! instructions, ignore prefixes */
8416         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8417         /* fall through */
8418     case 0x110 ... 0x117:
8419     case 0x128 ... 0x12f:
8420     case 0x138 ... 0x13a:
8421     case 0x150 ... 0x179:
8422     case 0x17c ... 0x17f:
8423     case 0x1c2:
8424     case 0x1c4 ... 0x1c6:
8425     case 0x1d0 ... 0x1fe:
8426         gen_sse(env, s, b, pc_start);
8427         break;
8428     default:
8429         goto unknown_op;
8430     }
8431     return s->pc;
8432  illegal_op:
8433     gen_illegal_opcode(s);
8434     return s->pc;
8435  unknown_op:
8436     gen_unknown_opcode(env, s);
8437     return s->pc;
8438 }
8439 
8440 void tcg_x86_init(void)
8441 {
8442     static const char reg_names[CPU_NB_REGS][4] = {
8443 #ifdef TARGET_X86_64
8444         [R_EAX] = "rax",
8445         [R_EBX] = "rbx",
8446         [R_ECX] = "rcx",
8447         [R_EDX] = "rdx",
8448         [R_ESI] = "rsi",
8449         [R_EDI] = "rdi",
8450         [R_EBP] = "rbp",
8451         [R_ESP] = "rsp",
8452         [8]  = "r8",
8453         [9]  = "r9",
8454         [10] = "r10",
8455         [11] = "r11",
8456         [12] = "r12",
8457         [13] = "r13",
8458         [14] = "r14",
8459         [15] = "r15",
8460 #else
8461         [R_EAX] = "eax",
8462         [R_EBX] = "ebx",
8463         [R_ECX] = "ecx",
8464         [R_EDX] = "edx",
8465         [R_ESI] = "esi",
8466         [R_EDI] = "edi",
8467         [R_EBP] = "ebp",
8468         [R_ESP] = "esp",
8469 #endif
8470     };
8471     static const char seg_base_names[6][8] = {
8472         [R_CS] = "cs_base",
8473         [R_DS] = "ds_base",
8474         [R_ES] = "es_base",
8475         [R_FS] = "fs_base",
8476         [R_GS] = "gs_base",
8477         [R_SS] = "ss_base",
8478     };
8479     static const char bnd_regl_names[4][8] = {
8480         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8481     };
8482     static const char bnd_regu_names[4][8] = {
8483         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8484     };
8485     int i;
8486 
8487     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8488                                        offsetof(CPUX86State, cc_op), "cc_op");
8489     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8490                                     "cc_dst");
8491     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8492                                     "cc_src");
8493     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8494                                      "cc_src2");
8495 
8496     for (i = 0; i < CPU_NB_REGS; ++i) {
8497         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8498                                          offsetof(CPUX86State, regs[i]),
8499                                          reg_names[i]);
8500     }
8501 
8502     for (i = 0; i < 6; ++i) {
8503         cpu_seg_base[i]
8504             = tcg_global_mem_new(cpu_env,
8505                                  offsetof(CPUX86State, segs[i].base),
8506                                  seg_base_names[i]);
8507     }
8508 
8509     for (i = 0; i < 4; ++i) {
8510         cpu_bndl[i]
8511             = tcg_global_mem_new_i64(cpu_env,
8512                                      offsetof(CPUX86State, bnd_regs[i].lb),
8513                                      bnd_regl_names[i]);
8514         cpu_bndu[i]
8515             = tcg_global_mem_new_i64(cpu_env,
8516                                      offsetof(CPUX86State, bnd_regs[i].ub),
8517                                      bnd_regu_names[i]);
8518     }
8519 }
8520 
8521 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8522 {
8523     DisasContext *dc = container_of(dcbase, DisasContext, base);
8524     CPUX86State *env = cpu->env_ptr;
8525     uint32_t flags = dc->base.tb->flags;
8526     int cpl = (flags >> HF_CPL_SHIFT) & 3;
8527     int iopl = (flags >> IOPL_SHIFT) & 3;
8528 
8529     dc->cs_base = dc->base.tb->cs_base;
8530     dc->flags = flags;
8531 #ifndef CONFIG_USER_ONLY
8532     dc->cpl = cpl;
8533     dc->iopl = iopl;
8534 #endif
8535 
8536     /* We make some simplifying assumptions; validate they're correct. */
8537     g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
8538     g_assert(CPL(dc) == cpl);
8539     g_assert(IOPL(dc) == iopl);
8540     g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
8541     g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
8542     g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
8543     g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
8544     g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
8545     g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
8546     g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
8547     g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
8548 
8549     dc->cc_op = CC_OP_DYNAMIC;
8550     dc->cc_op_dirty = false;
8551     dc->popl_esp_hack = 0;
8552     /* select memory access functions */
8553     dc->mem_index = 0;
8554 #ifdef CONFIG_SOFTMMU
8555     dc->mem_index = cpu_mmu_index(env, false);
8556 #endif
8557     dc->cpuid_features = env->features[FEAT_1_EDX];
8558     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8559     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8560     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8561     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8562     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8563     dc->jmp_opt = !(dc->base.singlestep_enabled ||
8564                     (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
8565     /*
8566      * If jmp_opt, we want to handle each string instruction individually.
8567      * For icount also disable repz optimization so that each iteration
8568      * is accounted separately.
8569      */
8570     dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8571 
8572     dc->T0 = tcg_temp_new();
8573     dc->T1 = tcg_temp_new();
8574     dc->A0 = tcg_temp_new();
8575 
8576     dc->tmp0 = tcg_temp_new();
8577     dc->tmp1_i64 = tcg_temp_new_i64();
8578     dc->tmp2_i32 = tcg_temp_new_i32();
8579     dc->tmp3_i32 = tcg_temp_new_i32();
8580     dc->tmp4 = tcg_temp_new();
8581     dc->ptr0 = tcg_temp_new_ptr();
8582     dc->ptr1 = tcg_temp_new_ptr();
8583     dc->cc_srcT = tcg_temp_local_new();
8584 }
8585 
8586 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8587 {
8588 }
8589 
8590 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8591 {
8592     DisasContext *dc = container_of(dcbase, DisasContext, base);
8593 
8594     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8595 }
8596 
8597 static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8598                                      const CPUBreakpoint *bp)
8599 {
8600     DisasContext *dc = container_of(dcbase, DisasContext, base);
8601     /* If RF is set, suppress an internally generated breakpoint.  */
8602     int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8603     if (bp->flags & flags) {
8604         gen_debug(dc);
8605         /* The address covered by the breakpoint must be included in
8606            [tb->pc, tb->pc + tb->size) in order to for it to be
8607            properly cleared -- thus we increment the PC here so that
8608            the generic logic setting tb->size later does the right thing.  */
8609         dc->base.pc_next += 1;
8610         return true;
8611     } else {
8612         return false;
8613     }
8614 }
8615 
8616 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8617 {
8618     DisasContext *dc = container_of(dcbase, DisasContext, base);
8619     target_ulong pc_next;
8620 
8621 #ifdef TARGET_VSYSCALL_PAGE
8622     /*
8623      * Detect entry into the vsyscall page and invoke the syscall.
8624      */
8625     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8626         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8627         dc->base.pc_next = dc->pc + 1;
8628         return;
8629     }
8630 #endif
8631 
8632     pc_next = disas_insn(dc, cpu);
8633 
8634     if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
8635         /* if single step mode, we generate only one instruction and
8636            generate an exception */
8637         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8638            the flag and abort the translation to give the irqs a
8639            chance to happen */
8640         dc->base.is_jmp = DISAS_TOO_MANY;
8641     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8642                && ((pc_next & TARGET_PAGE_MASK)
8643                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8644                        & TARGET_PAGE_MASK)
8645                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8646         /* Do not cross the boundary of the pages in icount mode,
8647            it can cause an exception. Do it only when boundary is
8648            crossed by the first instruction in the block.
8649            If current instruction already crossed the bound - it's ok,
8650            because an exception hasn't stopped this code.
8651          */
8652         dc->base.is_jmp = DISAS_TOO_MANY;
8653     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8654         dc->base.is_jmp = DISAS_TOO_MANY;
8655     }
8656 
8657     dc->base.pc_next = pc_next;
8658 }
8659 
8660 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8661 {
8662     DisasContext *dc = container_of(dcbase, DisasContext, base);
8663 
8664     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8665         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8666         gen_eob(dc);
8667     }
8668 }
8669 
8670 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8671                               CPUState *cpu)
8672 {
8673     DisasContext *dc = container_of(dcbase, DisasContext, base);
8674 
8675     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8676     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8677 }
8678 
8679 static const TranslatorOps i386_tr_ops = {
8680     .init_disas_context = i386_tr_init_disas_context,
8681     .tb_start           = i386_tr_tb_start,
8682     .insn_start         = i386_tr_insn_start,
8683     .breakpoint_check   = i386_tr_breakpoint_check,
8684     .translate_insn     = i386_tr_translate_insn,
8685     .tb_stop            = i386_tr_tb_stop,
8686     .disas_log          = i386_tr_disas_log,
8687 };
8688 
8689 /* generate intermediate code for basic block 'tb'.  */
8690 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8691 {
8692     DisasContext dc;
8693 
8694     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8695 }
8696 
8697 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8698                           target_ulong *data)
8699 {
8700     int cc_op = data[1];
8701     env->eip = data[0] - tb->cs_base;
8702     if (cc_op != CC_OP_DYNAMIC) {
8703         env->cc_op = cc_op;
8704     }
8705 }
8706