xref: /openbmc/qemu/target/i386/tcg/translate.c (revision 744c72a8)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "trace-tcg.h"
34 #include "exec/log.h"
35 
36 #define PREFIX_REPZ   0x01
37 #define PREFIX_REPNZ  0x02
38 #define PREFIX_LOCK   0x04
39 #define PREFIX_DATA   0x08
40 #define PREFIX_ADR    0x10
41 #define PREFIX_VEX    0x20
42 #define PREFIX_REX    0x40
43 
44 #ifdef TARGET_X86_64
45 # define ctztl  ctz64
46 # define clztl  clz64
47 #else
48 # define ctztl  ctz32
49 # define clztl  clz32
50 #endif
51 
52 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
53 #define CASE_MODRM_MEM_OP(OP) \
54     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
55     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
56     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
57 
58 #define CASE_MODRM_OP(OP) \
59     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
60     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
61     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
62     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
63 
64 //#define MACRO_TEST   1
65 
66 /* global register indexes */
67 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
68 static TCGv_i32 cpu_cc_op;
69 static TCGv cpu_regs[CPU_NB_REGS];
70 static TCGv cpu_seg_base[6];
71 static TCGv_i64 cpu_bndl[4];
72 static TCGv_i64 cpu_bndu[4];
73 
74 #include "exec/gen-icount.h"
75 
76 typedef struct DisasContext {
77     DisasContextBase base;
78 
79     target_ulong pc;       /* pc = eip + cs_base */
80     target_ulong pc_start; /* pc at TB entry */
81     target_ulong cs_base;  /* base of CS segment */
82 
83     MemOp aflag;
84     MemOp dflag;
85 
86     int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
87     uint8_t prefix;
88 
89 #ifndef CONFIG_USER_ONLY
90     uint8_t cpl;   /* code priv level */
91     uint8_t iopl;  /* i/o priv level */
92 #endif
93     uint8_t vex_l;  /* vex vector length */
94     uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
95     uint8_t popl_esp_hack; /* for correct popl with esp base handling */
96     uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
97 
98 #ifdef TARGET_X86_64
99     uint8_t rex_r;
100     uint8_t rex_x;
101     uint8_t rex_b;
102     bool rex_w;
103 #endif
104     bool jmp_opt; /* use direct block chaining for direct jumps */
105     bool repz_opt; /* optimize jumps within repz instructions */
106     bool cc_op_dirty;
107 
108     CCOp cc_op;  /* current CC operation */
109     int mem_index; /* select memory access functions */
110     uint32_t flags; /* all execution flags */
111     int cpuid_features;
112     int cpuid_ext_features;
113     int cpuid_ext2_features;
114     int cpuid_ext3_features;
115     int cpuid_7_0_ebx_features;
116     int cpuid_xsave_features;
117 
118     /* TCG local temps */
119     TCGv cc_srcT;
120     TCGv A0;
121     TCGv T0;
122     TCGv T1;
123 
124     /* TCG local register indexes (only used inside old micro ops) */
125     TCGv tmp0;
126     TCGv tmp4;
127     TCGv_ptr ptr0;
128     TCGv_ptr ptr1;
129     TCGv_i32 tmp2_i32;
130     TCGv_i32 tmp3_i32;
131     TCGv_i64 tmp1_i64;
132 
133     sigjmp_buf jmpbuf;
134 } DisasContext;
135 
136 /* The environment in which user-only runs is constrained. */
137 #ifdef CONFIG_USER_ONLY
138 #define PE(S)     true
139 #define CPL(S)    3
140 #define IOPL(S)   0
141 #define SVME(S)   false
142 #define GUEST(S)  false
143 #else
144 #define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
145 #define CPL(S)    ((S)->cpl)
146 #define IOPL(S)   ((S)->iopl)
147 #define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
148 #define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
149 #endif
150 #if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
151 #define VM86(S)   false
152 #define CODE32(S) true
153 #define SS32(S)   true
154 #define ADDSEG(S) false
155 #else
156 #define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
157 #define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
158 #define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
159 #define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
160 #endif
161 #if !defined(TARGET_X86_64)
162 #define CODE64(S) false
163 #define LMA(S)    false
164 #elif defined(CONFIG_USER_ONLY)
165 #define CODE64(S) true
166 #define LMA(S)    true
167 #else
168 #define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
169 #define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
170 #endif
171 
172 #ifdef TARGET_X86_64
173 #define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
174 #define REX_W(S)       ((S)->rex_w)
175 #define REX_R(S)       ((S)->rex_r + 0)
176 #define REX_X(S)       ((S)->rex_x + 0)
177 #define REX_B(S)       ((S)->rex_b + 0)
178 #else
179 #define REX_PREFIX(S)  false
180 #define REX_W(S)       false
181 #define REX_R(S)       0
182 #define REX_X(S)       0
183 #define REX_B(S)       0
184 #endif
185 
186 /*
187  * Many sysemu-only helpers are not reachable for user-only.
188  * Define stub generators here, so that we need not either sprinkle
189  * ifdefs through the translator, nor provide the helper function.
190  */
191 #define STUB_HELPER(NAME, ...) \
192     static inline void gen_helper_##NAME(__VA_ARGS__) \
193     { qemu_build_not_reached(); }
194 
195 #ifdef CONFIG_USER_ONLY
196 STUB_HELPER(clgi, TCGv_env env)
197 STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
198 STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
199 STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
200 STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
201 STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
202 STUB_HELPER(monitor, TCGv_env env, TCGv addr)
203 STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
204 STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
205 STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
206 STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
207 STUB_HELPER(rdmsr, TCGv_env env)
208 STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
209 STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
210 STUB_HELPER(stgi, TCGv_env env)
211 STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
212 STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
213 STUB_HELPER(vmmcall, TCGv_env env)
214 STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
215 STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
216 STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
217 STUB_HELPER(wrmsr, TCGv_env env)
218 #endif
219 
220 static void gen_eob(DisasContext *s);
221 static void gen_jr(DisasContext *s, TCGv dest);
222 static void gen_jmp(DisasContext *s, target_ulong eip);
223 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
224 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
225 static void gen_exception_gpf(DisasContext *s);
226 
227 /* i386 arith/logic operations */
228 enum {
229     OP_ADDL,
230     OP_ORL,
231     OP_ADCL,
232     OP_SBBL,
233     OP_ANDL,
234     OP_SUBL,
235     OP_XORL,
236     OP_CMPL,
237 };
238 
239 /* i386 shift ops */
240 enum {
241     OP_ROL,
242     OP_ROR,
243     OP_RCL,
244     OP_RCR,
245     OP_SHL,
246     OP_SHR,
247     OP_SHL1, /* undocumented */
248     OP_SAR = 7,
249 };
250 
251 enum {
252     JCC_O,
253     JCC_B,
254     JCC_Z,
255     JCC_BE,
256     JCC_S,
257     JCC_P,
258     JCC_L,
259     JCC_LE,
260 };
261 
262 enum {
263     /* I386 int registers */
264     OR_EAX,   /* MUST be even numbered */
265     OR_ECX,
266     OR_EDX,
267     OR_EBX,
268     OR_ESP,
269     OR_EBP,
270     OR_ESI,
271     OR_EDI,
272 
273     OR_TMP0 = 16,    /* temporary operand register */
274     OR_TMP1,
275     OR_A0, /* temporary register used when doing address evaluation */
276 };
277 
278 enum {
279     USES_CC_DST  = 1,
280     USES_CC_SRC  = 2,
281     USES_CC_SRC2 = 4,
282     USES_CC_SRCT = 8,
283 };
284 
285 /* Bit set if the global variable is live after setting CC_OP to X.  */
286 static const uint8_t cc_op_live[CC_OP_NB] = {
287     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
288     [CC_OP_EFLAGS] = USES_CC_SRC,
289     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
290     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
291     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
292     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
293     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
294     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
295     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
296     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
297     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
298     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
299     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
300     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
301     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
302     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
303     [CC_OP_CLR] = 0,
304     [CC_OP_POPCNT] = USES_CC_SRC,
305 };
306 
307 static void set_cc_op(DisasContext *s, CCOp op)
308 {
309     int dead;
310 
311     if (s->cc_op == op) {
312         return;
313     }
314 
315     /* Discard CC computation that will no longer be used.  */
316     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
317     if (dead & USES_CC_DST) {
318         tcg_gen_discard_tl(cpu_cc_dst);
319     }
320     if (dead & USES_CC_SRC) {
321         tcg_gen_discard_tl(cpu_cc_src);
322     }
323     if (dead & USES_CC_SRC2) {
324         tcg_gen_discard_tl(cpu_cc_src2);
325     }
326     if (dead & USES_CC_SRCT) {
327         tcg_gen_discard_tl(s->cc_srcT);
328     }
329 
330     if (op == CC_OP_DYNAMIC) {
331         /* The DYNAMIC setting is translator only, and should never be
332            stored.  Thus we always consider it clean.  */
333         s->cc_op_dirty = false;
334     } else {
335         /* Discard any computed CC_OP value (see shifts).  */
336         if (s->cc_op == CC_OP_DYNAMIC) {
337             tcg_gen_discard_i32(cpu_cc_op);
338         }
339         s->cc_op_dirty = true;
340     }
341     s->cc_op = op;
342 }
343 
344 static void gen_update_cc_op(DisasContext *s)
345 {
346     if (s->cc_op_dirty) {
347         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
348         s->cc_op_dirty = false;
349     }
350 }
351 
352 #ifdef TARGET_X86_64
353 
354 #define NB_OP_SIZES 4
355 
356 #else /* !TARGET_X86_64 */
357 
358 #define NB_OP_SIZES 3
359 
360 #endif /* !TARGET_X86_64 */
361 
362 #if defined(HOST_WORDS_BIGENDIAN)
363 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
364 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
365 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
366 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
367 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
368 #else
369 #define REG_B_OFFSET 0
370 #define REG_H_OFFSET 1
371 #define REG_W_OFFSET 0
372 #define REG_L_OFFSET 0
373 #define REG_LH_OFFSET 4
374 #endif
375 
376 /* In instruction encodings for byte register accesses the
377  * register number usually indicates "low 8 bits of register N";
378  * however there are some special cases where N 4..7 indicates
379  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
380  * true for this special case, false otherwise.
381  */
382 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
383 {
384     /* Any time the REX prefix is present, byte registers are uniform */
385     if (reg < 4 || REX_PREFIX(s)) {
386         return false;
387     }
388     return true;
389 }
390 
391 /* Select the size of a push/pop operation.  */
392 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
393 {
394     if (CODE64(s)) {
395         return ot == MO_16 ? MO_16 : MO_64;
396     } else {
397         return ot;
398     }
399 }
400 
401 /* Select the size of the stack pointer.  */
402 static inline MemOp mo_stacksize(DisasContext *s)
403 {
404     return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
405 }
406 
407 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
408 static inline MemOp mo_64_32(MemOp ot)
409 {
410 #ifdef TARGET_X86_64
411     return ot == MO_64 ? MO_64 : MO_32;
412 #else
413     return MO_32;
414 #endif
415 }
416 
417 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
418    byte vs word opcodes.  */
419 static inline MemOp mo_b_d(int b, MemOp ot)
420 {
421     return b & 1 ? ot : MO_8;
422 }
423 
424 /* Select size 8 if lsb of B is clear, else OT capped at 32.
425    Used for decoding operand size of port opcodes.  */
426 static inline MemOp mo_b_d32(int b, MemOp ot)
427 {
428     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
429 }
430 
431 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
432 {
433     switch(ot) {
434     case MO_8:
435         if (!byte_reg_is_xH(s, reg)) {
436             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
437         } else {
438             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
439         }
440         break;
441     case MO_16:
442         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
443         break;
444     case MO_32:
445         /* For x86_64, this sets the higher half of register to zero.
446            For i386, this is equivalent to a mov. */
447         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
448         break;
449 #ifdef TARGET_X86_64
450     case MO_64:
451         tcg_gen_mov_tl(cpu_regs[reg], t0);
452         break;
453 #endif
454     default:
455         tcg_abort();
456     }
457 }
458 
459 static inline
460 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
461 {
462     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
463         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
464     } else {
465         tcg_gen_mov_tl(t0, cpu_regs[reg]);
466     }
467 }
468 
469 static void gen_add_A0_im(DisasContext *s, int val)
470 {
471     tcg_gen_addi_tl(s->A0, s->A0, val);
472     if (!CODE64(s)) {
473         tcg_gen_ext32u_tl(s->A0, s->A0);
474     }
475 }
476 
477 static inline void gen_op_jmp_v(TCGv dest)
478 {
479     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
480 }
481 
482 static inline
483 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
484 {
485     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
486     gen_op_mov_reg_v(s, size, reg, s->tmp0);
487 }
488 
489 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
490 {
491     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
492     gen_op_mov_reg_v(s, size, reg, s->tmp0);
493 }
494 
495 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
496 {
497     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
498 }
499 
500 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
501 {
502     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
503 }
504 
505 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
506 {
507     if (d == OR_TMP0) {
508         gen_op_st_v(s, idx, s->T0, s->A0);
509     } else {
510         gen_op_mov_reg_v(s, idx, d, s->T0);
511     }
512 }
513 
514 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
515 {
516     tcg_gen_movi_tl(s->tmp0, pc);
517     gen_op_jmp_v(s->tmp0);
518 }
519 
520 /* Compute SEG:REG into A0.  SEG is selected from the override segment
521    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
522    indicate no override.  */
523 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
524                           int def_seg, int ovr_seg)
525 {
526     switch (aflag) {
527 #ifdef TARGET_X86_64
528     case MO_64:
529         if (ovr_seg < 0) {
530             tcg_gen_mov_tl(s->A0, a0);
531             return;
532         }
533         break;
534 #endif
535     case MO_32:
536         /* 32 bit address */
537         if (ovr_seg < 0 && ADDSEG(s)) {
538             ovr_seg = def_seg;
539         }
540         if (ovr_seg < 0) {
541             tcg_gen_ext32u_tl(s->A0, a0);
542             return;
543         }
544         break;
545     case MO_16:
546         /* 16 bit address */
547         tcg_gen_ext16u_tl(s->A0, a0);
548         a0 = s->A0;
549         if (ovr_seg < 0) {
550             if (ADDSEG(s)) {
551                 ovr_seg = def_seg;
552             } else {
553                 return;
554             }
555         }
556         break;
557     default:
558         tcg_abort();
559     }
560 
561     if (ovr_seg >= 0) {
562         TCGv seg = cpu_seg_base[ovr_seg];
563 
564         if (aflag == MO_64) {
565             tcg_gen_add_tl(s->A0, a0, seg);
566         } else if (CODE64(s)) {
567             tcg_gen_ext32u_tl(s->A0, a0);
568             tcg_gen_add_tl(s->A0, s->A0, seg);
569         } else {
570             tcg_gen_add_tl(s->A0, a0, seg);
571             tcg_gen_ext32u_tl(s->A0, s->A0);
572         }
573     }
574 }
575 
576 static inline void gen_string_movl_A0_ESI(DisasContext *s)
577 {
578     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
579 }
580 
581 static inline void gen_string_movl_A0_EDI(DisasContext *s)
582 {
583     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
584 }
585 
586 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
587 {
588     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
589     tcg_gen_shli_tl(s->T0, s->T0, ot);
590 };
591 
592 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
593 {
594     switch (size) {
595     case MO_8:
596         if (sign) {
597             tcg_gen_ext8s_tl(dst, src);
598         } else {
599             tcg_gen_ext8u_tl(dst, src);
600         }
601         return dst;
602     case MO_16:
603         if (sign) {
604             tcg_gen_ext16s_tl(dst, src);
605         } else {
606             tcg_gen_ext16u_tl(dst, src);
607         }
608         return dst;
609 #ifdef TARGET_X86_64
610     case MO_32:
611         if (sign) {
612             tcg_gen_ext32s_tl(dst, src);
613         } else {
614             tcg_gen_ext32u_tl(dst, src);
615         }
616         return dst;
617 #endif
618     default:
619         return src;
620     }
621 }
622 
623 static void gen_extu(MemOp ot, TCGv reg)
624 {
625     gen_ext_tl(reg, reg, ot, false);
626 }
627 
628 static void gen_exts(MemOp ot, TCGv reg)
629 {
630     gen_ext_tl(reg, reg, ot, true);
631 }
632 
633 static inline
634 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
635 {
636     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
637     gen_extu(size, s->tmp0);
638     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
639 }
640 
641 static inline
642 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
643 {
644     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
645     gen_extu(size, s->tmp0);
646     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
647 }
648 
649 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
650 {
651     switch (ot) {
652     case MO_8:
653         gen_helper_inb(v, cpu_env, n);
654         break;
655     case MO_16:
656         gen_helper_inw(v, cpu_env, n);
657         break;
658     case MO_32:
659         gen_helper_inl(v, cpu_env, n);
660         break;
661     default:
662         tcg_abort();
663     }
664 }
665 
666 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
667 {
668     switch (ot) {
669     case MO_8:
670         gen_helper_outb(cpu_env, v, n);
671         break;
672     case MO_16:
673         gen_helper_outw(cpu_env, v, n);
674         break;
675     case MO_32:
676         gen_helper_outl(cpu_env, v, n);
677         break;
678     default:
679         tcg_abort();
680     }
681 }
682 
683 /*
684  * Validate that access to [port, port + 1<<ot) is allowed.
685  * Raise #GP, or VMM exit if not.
686  */
687 static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
688                          uint32_t svm_flags)
689 {
690 #ifdef CONFIG_USER_ONLY
691     /*
692      * We do not implement the ioperm(2) syscall, so the TSS check
693      * will always fail.
694      */
695     gen_exception_gpf(s);
696     return false;
697 #else
698     if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
699         gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
700     }
701     if (GUEST(s)) {
702         target_ulong cur_eip = s->base.pc_next - s->cs_base;
703         target_ulong next_eip = s->pc - s->cs_base;
704 
705         gen_update_cc_op(s);
706         gen_jmp_im(s, cur_eip);
707         if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
708             svm_flags |= SVM_IOIO_REP_MASK;
709         }
710         svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
711         gen_helper_svm_check_io(cpu_env, port,
712                                 tcg_constant_i32(svm_flags),
713                                 tcg_constant_i32(next_eip - cur_eip));
714     }
715     return true;
716 #endif
717 }
718 
719 static inline void gen_movs(DisasContext *s, MemOp ot)
720 {
721     gen_string_movl_A0_ESI(s);
722     gen_op_ld_v(s, ot, s->T0, s->A0);
723     gen_string_movl_A0_EDI(s);
724     gen_op_st_v(s, ot, s->T0, s->A0);
725     gen_op_movl_T0_Dshift(s, ot);
726     gen_op_add_reg_T0(s, s->aflag, R_ESI);
727     gen_op_add_reg_T0(s, s->aflag, R_EDI);
728 }
729 
730 static void gen_op_update1_cc(DisasContext *s)
731 {
732     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
733 }
734 
735 static void gen_op_update2_cc(DisasContext *s)
736 {
737     tcg_gen_mov_tl(cpu_cc_src, s->T1);
738     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
739 }
740 
741 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
742 {
743     tcg_gen_mov_tl(cpu_cc_src2, reg);
744     tcg_gen_mov_tl(cpu_cc_src, s->T1);
745     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
746 }
747 
748 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
749 {
750     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
751 }
752 
753 static void gen_op_update_neg_cc(DisasContext *s)
754 {
755     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
756     tcg_gen_neg_tl(cpu_cc_src, s->T0);
757     tcg_gen_movi_tl(s->cc_srcT, 0);
758 }
759 
760 /* compute all eflags to cc_src */
761 static void gen_compute_eflags(DisasContext *s)
762 {
763     TCGv zero, dst, src1, src2;
764     int live, dead;
765 
766     if (s->cc_op == CC_OP_EFLAGS) {
767         return;
768     }
769     if (s->cc_op == CC_OP_CLR) {
770         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
771         set_cc_op(s, CC_OP_EFLAGS);
772         return;
773     }
774 
775     zero = NULL;
776     dst = cpu_cc_dst;
777     src1 = cpu_cc_src;
778     src2 = cpu_cc_src2;
779 
780     /* Take care to not read values that are not live.  */
781     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
782     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
783     if (dead) {
784         zero = tcg_const_tl(0);
785         if (dead & USES_CC_DST) {
786             dst = zero;
787         }
788         if (dead & USES_CC_SRC) {
789             src1 = zero;
790         }
791         if (dead & USES_CC_SRC2) {
792             src2 = zero;
793         }
794     }
795 
796     gen_update_cc_op(s);
797     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
798     set_cc_op(s, CC_OP_EFLAGS);
799 
800     if (dead) {
801         tcg_temp_free(zero);
802     }
803 }
804 
805 typedef struct CCPrepare {
806     TCGCond cond;
807     TCGv reg;
808     TCGv reg2;
809     target_ulong imm;
810     target_ulong mask;
811     bool use_reg2;
812     bool no_setcond;
813 } CCPrepare;
814 
815 /* compute eflags.C to reg */
816 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
817 {
818     TCGv t0, t1;
819     int size, shift;
820 
821     switch (s->cc_op) {
822     case CC_OP_SUBB ... CC_OP_SUBQ:
823         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
824         size = s->cc_op - CC_OP_SUBB;
825         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
826         /* If no temporary was used, be careful not to alias t1 and t0.  */
827         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
828         tcg_gen_mov_tl(t0, s->cc_srcT);
829         gen_extu(size, t0);
830         goto add_sub;
831 
832     case CC_OP_ADDB ... CC_OP_ADDQ:
833         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
834         size = s->cc_op - CC_OP_ADDB;
835         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
836         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
837     add_sub:
838         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
839                              .reg2 = t1, .mask = -1, .use_reg2 = true };
840 
841     case CC_OP_LOGICB ... CC_OP_LOGICQ:
842     case CC_OP_CLR:
843     case CC_OP_POPCNT:
844         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
845 
846     case CC_OP_INCB ... CC_OP_INCQ:
847     case CC_OP_DECB ... CC_OP_DECQ:
848         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
849                              .mask = -1, .no_setcond = true };
850 
851     case CC_OP_SHLB ... CC_OP_SHLQ:
852         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
853         size = s->cc_op - CC_OP_SHLB;
854         shift = (8 << size) - 1;
855         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
856                              .mask = (target_ulong)1 << shift };
857 
858     case CC_OP_MULB ... CC_OP_MULQ:
859         return (CCPrepare) { .cond = TCG_COND_NE,
860                              .reg = cpu_cc_src, .mask = -1 };
861 
862     case CC_OP_BMILGB ... CC_OP_BMILGQ:
863         size = s->cc_op - CC_OP_BMILGB;
864         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
865         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
866 
867     case CC_OP_ADCX:
868     case CC_OP_ADCOX:
869         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
870                              .mask = -1, .no_setcond = true };
871 
872     case CC_OP_EFLAGS:
873     case CC_OP_SARB ... CC_OP_SARQ:
874         /* CC_SRC & 1 */
875         return (CCPrepare) { .cond = TCG_COND_NE,
876                              .reg = cpu_cc_src, .mask = CC_C };
877 
878     default:
879        /* The need to compute only C from CC_OP_DYNAMIC is important
880           in efficiently implementing e.g. INC at the start of a TB.  */
881        gen_update_cc_op(s);
882        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
883                                cpu_cc_src2, cpu_cc_op);
884        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
885                             .mask = -1, .no_setcond = true };
886     }
887 }
888 
889 /* compute eflags.P to reg */
890 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
891 {
892     gen_compute_eflags(s);
893     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
894                          .mask = CC_P };
895 }
896 
897 /* compute eflags.S to reg */
898 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
899 {
900     switch (s->cc_op) {
901     case CC_OP_DYNAMIC:
902         gen_compute_eflags(s);
903         /* FALLTHRU */
904     case CC_OP_EFLAGS:
905     case CC_OP_ADCX:
906     case CC_OP_ADOX:
907     case CC_OP_ADCOX:
908         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
909                              .mask = CC_S };
910     case CC_OP_CLR:
911     case CC_OP_POPCNT:
912         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
913     default:
914         {
915             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
916             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
917             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
918         }
919     }
920 }
921 
922 /* compute eflags.O to reg */
923 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
924 {
925     switch (s->cc_op) {
926     case CC_OP_ADOX:
927     case CC_OP_ADCOX:
928         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
929                              .mask = -1, .no_setcond = true };
930     case CC_OP_CLR:
931     case CC_OP_POPCNT:
932         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
933     default:
934         gen_compute_eflags(s);
935         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
936                              .mask = CC_O };
937     }
938 }
939 
940 /* compute eflags.Z to reg */
941 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
942 {
943     switch (s->cc_op) {
944     case CC_OP_DYNAMIC:
945         gen_compute_eflags(s);
946         /* FALLTHRU */
947     case CC_OP_EFLAGS:
948     case CC_OP_ADCX:
949     case CC_OP_ADOX:
950     case CC_OP_ADCOX:
951         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
952                              .mask = CC_Z };
953     case CC_OP_CLR:
954         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
955     case CC_OP_POPCNT:
956         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
957                              .mask = -1 };
958     default:
959         {
960             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
961             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
962             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
963         }
964     }
965 }
966 
967 /* perform a conditional store into register 'reg' according to jump opcode
968    value 'b'. In the fast case, T0 is guaranted not to be used. */
969 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
970 {
971     int inv, jcc_op, cond;
972     MemOp size;
973     CCPrepare cc;
974     TCGv t0;
975 
976     inv = b & 1;
977     jcc_op = (b >> 1) & 7;
978 
979     switch (s->cc_op) {
980     case CC_OP_SUBB ... CC_OP_SUBQ:
981         /* We optimize relational operators for the cmp/jcc case.  */
982         size = s->cc_op - CC_OP_SUBB;
983         switch (jcc_op) {
984         case JCC_BE:
985             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
986             gen_extu(size, s->tmp4);
987             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
988             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
989                                .reg2 = t0, .mask = -1, .use_reg2 = true };
990             break;
991 
992         case JCC_L:
993             cond = TCG_COND_LT;
994             goto fast_jcc_l;
995         case JCC_LE:
996             cond = TCG_COND_LE;
997         fast_jcc_l:
998             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
999             gen_exts(size, s->tmp4);
1000             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1001             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1002                                .reg2 = t0, .mask = -1, .use_reg2 = true };
1003             break;
1004 
1005         default:
1006             goto slow_jcc;
1007         }
1008         break;
1009 
1010     default:
1011     slow_jcc:
1012         /* This actually generates good code for JC, JZ and JS.  */
1013         switch (jcc_op) {
1014         case JCC_O:
1015             cc = gen_prepare_eflags_o(s, reg);
1016             break;
1017         case JCC_B:
1018             cc = gen_prepare_eflags_c(s, reg);
1019             break;
1020         case JCC_Z:
1021             cc = gen_prepare_eflags_z(s, reg);
1022             break;
1023         case JCC_BE:
1024             gen_compute_eflags(s);
1025             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1026                                .mask = CC_Z | CC_C };
1027             break;
1028         case JCC_S:
1029             cc = gen_prepare_eflags_s(s, reg);
1030             break;
1031         case JCC_P:
1032             cc = gen_prepare_eflags_p(s, reg);
1033             break;
1034         case JCC_L:
1035             gen_compute_eflags(s);
1036             if (reg == cpu_cc_src) {
1037                 reg = s->tmp0;
1038             }
1039             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1040             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1041             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1042                                .mask = CC_S };
1043             break;
1044         default:
1045         case JCC_LE:
1046             gen_compute_eflags(s);
1047             if (reg == cpu_cc_src) {
1048                 reg = s->tmp0;
1049             }
1050             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1051             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1052             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1053                                .mask = CC_S | CC_Z };
1054             break;
1055         }
1056         break;
1057     }
1058 
1059     if (inv) {
1060         cc.cond = tcg_invert_cond(cc.cond);
1061     }
1062     return cc;
1063 }
1064 
1065 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1066 {
1067     CCPrepare cc = gen_prepare_cc(s, b, reg);
1068 
1069     if (cc.no_setcond) {
1070         if (cc.cond == TCG_COND_EQ) {
1071             tcg_gen_xori_tl(reg, cc.reg, 1);
1072         } else {
1073             tcg_gen_mov_tl(reg, cc.reg);
1074         }
1075         return;
1076     }
1077 
1078     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1079         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1080         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1081         tcg_gen_andi_tl(reg, reg, 1);
1082         return;
1083     }
1084     if (cc.mask != -1) {
1085         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1086         cc.reg = reg;
1087     }
1088     if (cc.use_reg2) {
1089         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1090     } else {
1091         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1092     }
1093 }
1094 
1095 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1096 {
1097     gen_setcc1(s, JCC_B << 1, reg);
1098 }
1099 
1100 /* generate a conditional jump to label 'l1' according to jump opcode
1101    value 'b'. In the fast case, T0 is guaranted not to be used. */
1102 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1103 {
1104     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1105 
1106     if (cc.mask != -1) {
1107         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1108         cc.reg = s->T0;
1109     }
1110     if (cc.use_reg2) {
1111         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1112     } else {
1113         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1114     }
1115 }
1116 
1117 /* Generate a conditional jump to label 'l1' according to jump opcode
1118    value 'b'. In the fast case, T0 is guaranted not to be used.
1119    A translation block must end soon.  */
1120 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1121 {
1122     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1123 
1124     gen_update_cc_op(s);
1125     if (cc.mask != -1) {
1126         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1127         cc.reg = s->T0;
1128     }
1129     set_cc_op(s, CC_OP_DYNAMIC);
1130     if (cc.use_reg2) {
1131         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1132     } else {
1133         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1134     }
1135 }
1136 
1137 /* XXX: does not work with gdbstub "ice" single step - not a
1138    serious problem */
1139 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1140 {
1141     TCGLabel *l1 = gen_new_label();
1142     TCGLabel *l2 = gen_new_label();
1143     gen_op_jnz_ecx(s, s->aflag, l1);
1144     gen_set_label(l2);
1145     gen_jmp_tb(s, next_eip, 1);
1146     gen_set_label(l1);
1147     return l2;
1148 }
1149 
1150 static inline void gen_stos(DisasContext *s, MemOp ot)
1151 {
1152     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1153     gen_string_movl_A0_EDI(s);
1154     gen_op_st_v(s, ot, s->T0, s->A0);
1155     gen_op_movl_T0_Dshift(s, ot);
1156     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1157 }
1158 
1159 static inline void gen_lods(DisasContext *s, MemOp ot)
1160 {
1161     gen_string_movl_A0_ESI(s);
1162     gen_op_ld_v(s, ot, s->T0, s->A0);
1163     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1164     gen_op_movl_T0_Dshift(s, ot);
1165     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1166 }
1167 
1168 static inline void gen_scas(DisasContext *s, MemOp ot)
1169 {
1170     gen_string_movl_A0_EDI(s);
1171     gen_op_ld_v(s, ot, s->T1, s->A0);
1172     gen_op(s, OP_CMPL, ot, R_EAX);
1173     gen_op_movl_T0_Dshift(s, ot);
1174     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1175 }
1176 
1177 static inline void gen_cmps(DisasContext *s, MemOp ot)
1178 {
1179     gen_string_movl_A0_EDI(s);
1180     gen_op_ld_v(s, ot, s->T1, s->A0);
1181     gen_string_movl_A0_ESI(s);
1182     gen_op(s, OP_CMPL, ot, OR_TMP0);
1183     gen_op_movl_T0_Dshift(s, ot);
1184     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1185     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1186 }
1187 
1188 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1189 {
1190     if (s->flags & HF_IOBPT_MASK) {
1191 #ifdef CONFIG_USER_ONLY
1192         /* user-mode cpu should not be in IOBPT mode */
1193         g_assert_not_reached();
1194 #else
1195         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1196         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1197 
1198         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1199         tcg_temp_free_i32(t_size);
1200         tcg_temp_free(t_next);
1201 #endif /* CONFIG_USER_ONLY */
1202     }
1203 }
1204 
1205 static inline void gen_ins(DisasContext *s, MemOp ot)
1206 {
1207     gen_string_movl_A0_EDI(s);
1208     /* Note: we must do this dummy write first to be restartable in
1209        case of page fault. */
1210     tcg_gen_movi_tl(s->T0, 0);
1211     gen_op_st_v(s, ot, s->T0, s->A0);
1212     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1213     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1214     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1215     gen_op_st_v(s, ot, s->T0, s->A0);
1216     gen_op_movl_T0_Dshift(s, ot);
1217     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1218     gen_bpt_io(s, s->tmp2_i32, ot);
1219 }
1220 
1221 static inline void gen_outs(DisasContext *s, MemOp ot)
1222 {
1223     gen_string_movl_A0_ESI(s);
1224     gen_op_ld_v(s, ot, s->T0, s->A0);
1225 
1226     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1227     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1228     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1229     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1230     gen_op_movl_T0_Dshift(s, ot);
1231     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1232     gen_bpt_io(s, s->tmp2_i32, ot);
1233 }
1234 
1235 /* same method as Valgrind : we generate jumps to current or next
1236    instruction */
1237 #define GEN_REPZ(op)                                                          \
1238 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1239                                  target_ulong cur_eip, target_ulong next_eip) \
1240 {                                                                             \
1241     TCGLabel *l2;                                                             \
1242     gen_update_cc_op(s);                                                      \
1243     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1244     gen_ ## op(s, ot);                                                        \
1245     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1246     /* a loop would cause two single step exceptions if ECX = 1               \
1247        before rep string_insn */                                              \
1248     if (s->repz_opt)                                                          \
1249         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1250     gen_jmp(s, cur_eip);                                                      \
1251 }
1252 
1253 #define GEN_REPZ2(op)                                                         \
1254 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1255                                    target_ulong cur_eip,                      \
1256                                    target_ulong next_eip,                     \
1257                                    int nz)                                    \
1258 {                                                                             \
1259     TCGLabel *l2;                                                             \
1260     gen_update_cc_op(s);                                                      \
1261     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1262     gen_ ## op(s, ot);                                                        \
1263     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1264     gen_update_cc_op(s);                                                      \
1265     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1266     if (s->repz_opt)                                                          \
1267         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1268     gen_jmp(s, cur_eip);                                                      \
1269 }
1270 
1271 GEN_REPZ(movs)
1272 GEN_REPZ(stos)
1273 GEN_REPZ(lods)
1274 GEN_REPZ(ins)
1275 GEN_REPZ(outs)
1276 GEN_REPZ2(scas)
1277 GEN_REPZ2(cmps)
1278 
1279 static void gen_helper_fp_arith_ST0_FT0(int op)
1280 {
1281     switch (op) {
1282     case 0:
1283         gen_helper_fadd_ST0_FT0(cpu_env);
1284         break;
1285     case 1:
1286         gen_helper_fmul_ST0_FT0(cpu_env);
1287         break;
1288     case 2:
1289         gen_helper_fcom_ST0_FT0(cpu_env);
1290         break;
1291     case 3:
1292         gen_helper_fcom_ST0_FT0(cpu_env);
1293         break;
1294     case 4:
1295         gen_helper_fsub_ST0_FT0(cpu_env);
1296         break;
1297     case 5:
1298         gen_helper_fsubr_ST0_FT0(cpu_env);
1299         break;
1300     case 6:
1301         gen_helper_fdiv_ST0_FT0(cpu_env);
1302         break;
1303     case 7:
1304         gen_helper_fdivr_ST0_FT0(cpu_env);
1305         break;
1306     }
1307 }
1308 
1309 /* NOTE the exception in "r" op ordering */
1310 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1311 {
1312     TCGv_i32 tmp = tcg_const_i32(opreg);
1313     switch (op) {
1314     case 0:
1315         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1316         break;
1317     case 1:
1318         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1319         break;
1320     case 4:
1321         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1322         break;
1323     case 5:
1324         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1325         break;
1326     case 6:
1327         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1328         break;
1329     case 7:
1330         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1331         break;
1332     }
1333 }
1334 
1335 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1336 {
1337     gen_update_cc_op(s);
1338     gen_jmp_im(s, cur_eip);
1339     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1340     s->base.is_jmp = DISAS_NORETURN;
1341 }
1342 
1343 /* Generate #UD for the current instruction.  The assumption here is that
1344    the instruction is known, but it isn't allowed in the current cpu mode.  */
1345 static void gen_illegal_opcode(DisasContext *s)
1346 {
1347     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1348 }
1349 
1350 /* Generate #GP for the current instruction. */
1351 static void gen_exception_gpf(DisasContext *s)
1352 {
1353     gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1354 }
1355 
1356 /* Check for cpl == 0; if not, raise #GP and return false. */
1357 static bool check_cpl0(DisasContext *s)
1358 {
1359     if (CPL(s) == 0) {
1360         return true;
1361     }
1362     gen_exception_gpf(s);
1363     return false;
1364 }
1365 
1366 /* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1367 static bool check_vm86_iopl(DisasContext *s)
1368 {
1369     if (!VM86(s) || IOPL(s) == 3) {
1370         return true;
1371     }
1372     gen_exception_gpf(s);
1373     return false;
1374 }
1375 
1376 /* Check for iopl allowing access; if not, raise #GP and return false. */
1377 static bool check_iopl(DisasContext *s)
1378 {
1379     if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1380         return true;
1381     }
1382     gen_exception_gpf(s);
1383     return false;
1384 }
1385 
1386 /* if d == OR_TMP0, it means memory operand (address in A0) */
1387 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1388 {
1389     if (d != OR_TMP0) {
1390         if (s1->prefix & PREFIX_LOCK) {
1391             /* Lock prefix when destination is not memory.  */
1392             gen_illegal_opcode(s1);
1393             return;
1394         }
1395         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1396     } else if (!(s1->prefix & PREFIX_LOCK)) {
1397         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1398     }
1399     switch(op) {
1400     case OP_ADCL:
1401         gen_compute_eflags_c(s1, s1->tmp4);
1402         if (s1->prefix & PREFIX_LOCK) {
1403             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1404             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1405                                         s1->mem_index, ot | MO_LE);
1406         } else {
1407             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1408             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1409             gen_op_st_rm_T0_A0(s1, ot, d);
1410         }
1411         gen_op_update3_cc(s1, s1->tmp4);
1412         set_cc_op(s1, CC_OP_ADCB + ot);
1413         break;
1414     case OP_SBBL:
1415         gen_compute_eflags_c(s1, s1->tmp4);
1416         if (s1->prefix & PREFIX_LOCK) {
1417             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1418             tcg_gen_neg_tl(s1->T0, s1->T0);
1419             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1420                                         s1->mem_index, ot | MO_LE);
1421         } else {
1422             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1423             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1424             gen_op_st_rm_T0_A0(s1, ot, d);
1425         }
1426         gen_op_update3_cc(s1, s1->tmp4);
1427         set_cc_op(s1, CC_OP_SBBB + ot);
1428         break;
1429     case OP_ADDL:
1430         if (s1->prefix & PREFIX_LOCK) {
1431             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1432                                         s1->mem_index, ot | MO_LE);
1433         } else {
1434             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1435             gen_op_st_rm_T0_A0(s1, ot, d);
1436         }
1437         gen_op_update2_cc(s1);
1438         set_cc_op(s1, CC_OP_ADDB + ot);
1439         break;
1440     case OP_SUBL:
1441         if (s1->prefix & PREFIX_LOCK) {
1442             tcg_gen_neg_tl(s1->T0, s1->T1);
1443             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1444                                         s1->mem_index, ot | MO_LE);
1445             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1446         } else {
1447             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1448             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1449             gen_op_st_rm_T0_A0(s1, ot, d);
1450         }
1451         gen_op_update2_cc(s1);
1452         set_cc_op(s1, CC_OP_SUBB + ot);
1453         break;
1454     default:
1455     case OP_ANDL:
1456         if (s1->prefix & PREFIX_LOCK) {
1457             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1458                                         s1->mem_index, ot | MO_LE);
1459         } else {
1460             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1461             gen_op_st_rm_T0_A0(s1, ot, d);
1462         }
1463         gen_op_update1_cc(s1);
1464         set_cc_op(s1, CC_OP_LOGICB + ot);
1465         break;
1466     case OP_ORL:
1467         if (s1->prefix & PREFIX_LOCK) {
1468             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1469                                        s1->mem_index, ot | MO_LE);
1470         } else {
1471             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1472             gen_op_st_rm_T0_A0(s1, ot, d);
1473         }
1474         gen_op_update1_cc(s1);
1475         set_cc_op(s1, CC_OP_LOGICB + ot);
1476         break;
1477     case OP_XORL:
1478         if (s1->prefix & PREFIX_LOCK) {
1479             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1480                                         s1->mem_index, ot | MO_LE);
1481         } else {
1482             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1483             gen_op_st_rm_T0_A0(s1, ot, d);
1484         }
1485         gen_op_update1_cc(s1);
1486         set_cc_op(s1, CC_OP_LOGICB + ot);
1487         break;
1488     case OP_CMPL:
1489         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1490         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1491         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1492         set_cc_op(s1, CC_OP_SUBB + ot);
1493         break;
1494     }
1495 }
1496 
1497 /* if d == OR_TMP0, it means memory operand (address in A0) */
1498 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1499 {
1500     if (s1->prefix & PREFIX_LOCK) {
1501         if (d != OR_TMP0) {
1502             /* Lock prefix when destination is not memory */
1503             gen_illegal_opcode(s1);
1504             return;
1505         }
1506         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1507         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1508                                     s1->mem_index, ot | MO_LE);
1509     } else {
1510         if (d != OR_TMP0) {
1511             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1512         } else {
1513             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1514         }
1515         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1516         gen_op_st_rm_T0_A0(s1, ot, d);
1517     }
1518 
1519     gen_compute_eflags_c(s1, cpu_cc_src);
1520     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1521     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1522 }
1523 
1524 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1525                             TCGv shm1, TCGv count, bool is_right)
1526 {
1527     TCGv_i32 z32, s32, oldop;
1528     TCGv z_tl;
1529 
1530     /* Store the results into the CC variables.  If we know that the
1531        variable must be dead, store unconditionally.  Otherwise we'll
1532        need to not disrupt the current contents.  */
1533     z_tl = tcg_const_tl(0);
1534     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1535         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1536                            result, cpu_cc_dst);
1537     } else {
1538         tcg_gen_mov_tl(cpu_cc_dst, result);
1539     }
1540     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1541         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1542                            shm1, cpu_cc_src);
1543     } else {
1544         tcg_gen_mov_tl(cpu_cc_src, shm1);
1545     }
1546     tcg_temp_free(z_tl);
1547 
1548     /* Get the two potential CC_OP values into temporaries.  */
1549     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1550     if (s->cc_op == CC_OP_DYNAMIC) {
1551         oldop = cpu_cc_op;
1552     } else {
1553         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1554         oldop = s->tmp3_i32;
1555     }
1556 
1557     /* Conditionally store the CC_OP value.  */
1558     z32 = tcg_const_i32(0);
1559     s32 = tcg_temp_new_i32();
1560     tcg_gen_trunc_tl_i32(s32, count);
1561     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1562     tcg_temp_free_i32(z32);
1563     tcg_temp_free_i32(s32);
1564 
1565     /* The CC_OP value is no longer predictable.  */
1566     set_cc_op(s, CC_OP_DYNAMIC);
1567 }
1568 
1569 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1570                             int is_right, int is_arith)
1571 {
1572     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1573 
1574     /* load */
1575     if (op1 == OR_TMP0) {
1576         gen_op_ld_v(s, ot, s->T0, s->A0);
1577     } else {
1578         gen_op_mov_v_reg(s, ot, s->T0, op1);
1579     }
1580 
1581     tcg_gen_andi_tl(s->T1, s->T1, mask);
1582     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1583 
1584     if (is_right) {
1585         if (is_arith) {
1586             gen_exts(ot, s->T0);
1587             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1588             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1589         } else {
1590             gen_extu(ot, s->T0);
1591             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1592             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1593         }
1594     } else {
1595         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1596         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1597     }
1598 
1599     /* store */
1600     gen_op_st_rm_T0_A0(s, ot, op1);
1601 
1602     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1603 }
1604 
1605 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1606                             int is_right, int is_arith)
1607 {
1608     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1609 
1610     /* load */
1611     if (op1 == OR_TMP0)
1612         gen_op_ld_v(s, ot, s->T0, s->A0);
1613     else
1614         gen_op_mov_v_reg(s, ot, s->T0, op1);
1615 
1616     op2 &= mask;
1617     if (op2 != 0) {
1618         if (is_right) {
1619             if (is_arith) {
1620                 gen_exts(ot, s->T0);
1621                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1622                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1623             } else {
1624                 gen_extu(ot, s->T0);
1625                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1626                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1627             }
1628         } else {
1629             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1630             tcg_gen_shli_tl(s->T0, s->T0, op2);
1631         }
1632     }
1633 
1634     /* store */
1635     gen_op_st_rm_T0_A0(s, ot, op1);
1636 
1637     /* update eflags if non zero shift */
1638     if (op2 != 0) {
1639         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1640         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1641         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1642     }
1643 }
1644 
1645 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1646 {
1647     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1648     TCGv_i32 t0, t1;
1649 
1650     /* load */
1651     if (op1 == OR_TMP0) {
1652         gen_op_ld_v(s, ot, s->T0, s->A0);
1653     } else {
1654         gen_op_mov_v_reg(s, ot, s->T0, op1);
1655     }
1656 
1657     tcg_gen_andi_tl(s->T1, s->T1, mask);
1658 
1659     switch (ot) {
1660     case MO_8:
1661         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1662         tcg_gen_ext8u_tl(s->T0, s->T0);
1663         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1664         goto do_long;
1665     case MO_16:
1666         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1667         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1668         goto do_long;
1669     do_long:
1670 #ifdef TARGET_X86_64
1671     case MO_32:
1672         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1673         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1674         if (is_right) {
1675             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1676         } else {
1677             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1678         }
1679         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1680         break;
1681 #endif
1682     default:
1683         if (is_right) {
1684             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1685         } else {
1686             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1687         }
1688         break;
1689     }
1690 
1691     /* store */
1692     gen_op_st_rm_T0_A0(s, ot, op1);
1693 
1694     /* We'll need the flags computed into CC_SRC.  */
1695     gen_compute_eflags(s);
1696 
1697     /* The value that was "rotated out" is now present at the other end
1698        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1699        since we've computed the flags into CC_SRC, these variables are
1700        currently dead.  */
1701     if (is_right) {
1702         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1703         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1704         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1705     } else {
1706         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1707         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1708     }
1709     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1710     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1711 
1712     /* Now conditionally store the new CC_OP value.  If the shift count
1713        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1714        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1715        exactly as we computed above.  */
1716     t0 = tcg_const_i32(0);
1717     t1 = tcg_temp_new_i32();
1718     tcg_gen_trunc_tl_i32(t1, s->T1);
1719     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1720     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1721     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1722                         s->tmp2_i32, s->tmp3_i32);
1723     tcg_temp_free_i32(t0);
1724     tcg_temp_free_i32(t1);
1725 
1726     /* The CC_OP value is no longer predictable.  */
1727     set_cc_op(s, CC_OP_DYNAMIC);
1728 }
1729 
1730 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1731                           int is_right)
1732 {
1733     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1734     int shift;
1735 
1736     /* load */
1737     if (op1 == OR_TMP0) {
1738         gen_op_ld_v(s, ot, s->T0, s->A0);
1739     } else {
1740         gen_op_mov_v_reg(s, ot, s->T0, op1);
1741     }
1742 
1743     op2 &= mask;
1744     if (op2 != 0) {
1745         switch (ot) {
1746 #ifdef TARGET_X86_64
1747         case MO_32:
1748             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1749             if (is_right) {
1750                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1751             } else {
1752                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1753             }
1754             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1755             break;
1756 #endif
1757         default:
1758             if (is_right) {
1759                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1760             } else {
1761                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1762             }
1763             break;
1764         case MO_8:
1765             mask = 7;
1766             goto do_shifts;
1767         case MO_16:
1768             mask = 15;
1769         do_shifts:
1770             shift = op2 & mask;
1771             if (is_right) {
1772                 shift = mask + 1 - shift;
1773             }
1774             gen_extu(ot, s->T0);
1775             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1776             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1777             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1778             break;
1779         }
1780     }
1781 
1782     /* store */
1783     gen_op_st_rm_T0_A0(s, ot, op1);
1784 
1785     if (op2 != 0) {
1786         /* Compute the flags into CC_SRC.  */
1787         gen_compute_eflags(s);
1788 
1789         /* The value that was "rotated out" is now present at the other end
1790            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1791            since we've computed the flags into CC_SRC, these variables are
1792            currently dead.  */
1793         if (is_right) {
1794             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1795             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1796             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1797         } else {
1798             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1799             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1800         }
1801         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1802         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1803         set_cc_op(s, CC_OP_ADCOX);
1804     }
1805 }
1806 
1807 /* XXX: add faster immediate = 1 case */
1808 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1809                            int is_right)
1810 {
1811     gen_compute_eflags(s);
1812     assert(s->cc_op == CC_OP_EFLAGS);
1813 
1814     /* load */
1815     if (op1 == OR_TMP0)
1816         gen_op_ld_v(s, ot, s->T0, s->A0);
1817     else
1818         gen_op_mov_v_reg(s, ot, s->T0, op1);
1819 
1820     if (is_right) {
1821         switch (ot) {
1822         case MO_8:
1823             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1824             break;
1825         case MO_16:
1826             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1827             break;
1828         case MO_32:
1829             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1830             break;
1831 #ifdef TARGET_X86_64
1832         case MO_64:
1833             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1834             break;
1835 #endif
1836         default:
1837             tcg_abort();
1838         }
1839     } else {
1840         switch (ot) {
1841         case MO_8:
1842             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1843             break;
1844         case MO_16:
1845             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1846             break;
1847         case MO_32:
1848             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1849             break;
1850 #ifdef TARGET_X86_64
1851         case MO_64:
1852             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1853             break;
1854 #endif
1855         default:
1856             tcg_abort();
1857         }
1858     }
1859     /* store */
1860     gen_op_st_rm_T0_A0(s, ot, op1);
1861 }
1862 
1863 /* XXX: add faster immediate case */
1864 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1865                              bool is_right, TCGv count_in)
1866 {
1867     target_ulong mask = (ot == MO_64 ? 63 : 31);
1868     TCGv count;
1869 
1870     /* load */
1871     if (op1 == OR_TMP0) {
1872         gen_op_ld_v(s, ot, s->T0, s->A0);
1873     } else {
1874         gen_op_mov_v_reg(s, ot, s->T0, op1);
1875     }
1876 
1877     count = tcg_temp_new();
1878     tcg_gen_andi_tl(count, count_in, mask);
1879 
1880     switch (ot) {
1881     case MO_16:
1882         /* Note: we implement the Intel behaviour for shift count > 16.
1883            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1884            portion by constructing it as a 32-bit value.  */
1885         if (is_right) {
1886             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1887             tcg_gen_mov_tl(s->T1, s->T0);
1888             tcg_gen_mov_tl(s->T0, s->tmp0);
1889         } else {
1890             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1891         }
1892         /*
1893          * If TARGET_X86_64 defined then fall through into MO_32 case,
1894          * otherwise fall through default case.
1895          */
1896     case MO_32:
1897 #ifdef TARGET_X86_64
1898         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1899         tcg_gen_subi_tl(s->tmp0, count, 1);
1900         if (is_right) {
1901             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1902             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1903             tcg_gen_shr_i64(s->T0, s->T0, count);
1904         } else {
1905             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1906             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1907             tcg_gen_shl_i64(s->T0, s->T0, count);
1908             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1909             tcg_gen_shri_i64(s->T0, s->T0, 32);
1910         }
1911         break;
1912 #endif
1913     default:
1914         tcg_gen_subi_tl(s->tmp0, count, 1);
1915         if (is_right) {
1916             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1917 
1918             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1919             tcg_gen_shr_tl(s->T0, s->T0, count);
1920             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1921         } else {
1922             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1923             if (ot == MO_16) {
1924                 /* Only needed if count > 16, for Intel behaviour.  */
1925                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1926                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1927                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1928             }
1929 
1930             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1931             tcg_gen_shl_tl(s->T0, s->T0, count);
1932             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1933         }
1934         tcg_gen_movi_tl(s->tmp4, 0);
1935         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1936                            s->tmp4, s->T1);
1937         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1938         break;
1939     }
1940 
1941     /* store */
1942     gen_op_st_rm_T0_A0(s, ot, op1);
1943 
1944     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1945     tcg_temp_free(count);
1946 }
1947 
1948 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1949 {
1950     if (s != OR_TMP1)
1951         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1952     switch(op) {
1953     case OP_ROL:
1954         gen_rot_rm_T1(s1, ot, d, 0);
1955         break;
1956     case OP_ROR:
1957         gen_rot_rm_T1(s1, ot, d, 1);
1958         break;
1959     case OP_SHL:
1960     case OP_SHL1:
1961         gen_shift_rm_T1(s1, ot, d, 0, 0);
1962         break;
1963     case OP_SHR:
1964         gen_shift_rm_T1(s1, ot, d, 1, 0);
1965         break;
1966     case OP_SAR:
1967         gen_shift_rm_T1(s1, ot, d, 1, 1);
1968         break;
1969     case OP_RCL:
1970         gen_rotc_rm_T1(s1, ot, d, 0);
1971         break;
1972     case OP_RCR:
1973         gen_rotc_rm_T1(s1, ot, d, 1);
1974         break;
1975     }
1976 }
1977 
1978 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1979 {
1980     switch(op) {
1981     case OP_ROL:
1982         gen_rot_rm_im(s1, ot, d, c, 0);
1983         break;
1984     case OP_ROR:
1985         gen_rot_rm_im(s1, ot, d, c, 1);
1986         break;
1987     case OP_SHL:
1988     case OP_SHL1:
1989         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1990         break;
1991     case OP_SHR:
1992         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1993         break;
1994     case OP_SAR:
1995         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1996         break;
1997     default:
1998         /* currently not optimized */
1999         tcg_gen_movi_tl(s1->T1, c);
2000         gen_shift(s1, op, ot, d, OR_TMP1);
2001         break;
2002     }
2003 }
2004 
2005 #define X86_MAX_INSN_LENGTH 15
2006 
2007 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2008 {
2009     uint64_t pc = s->pc;
2010 
2011     s->pc += num_bytes;
2012     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2013         /* If the instruction's 16th byte is on a different page than the 1st, a
2014          * page fault on the second page wins over the general protection fault
2015          * caused by the instruction being too long.
2016          * This can happen even if the operand is only one byte long!
2017          */
2018         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2019             volatile uint8_t unused =
2020                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2021             (void) unused;
2022         }
2023         siglongjmp(s->jmpbuf, 1);
2024     }
2025 
2026     return pc;
2027 }
2028 
2029 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2030 {
2031     return translator_ldub(env, advance_pc(env, s, 1));
2032 }
2033 
2034 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2035 {
2036     return translator_ldsw(env, advance_pc(env, s, 2));
2037 }
2038 
2039 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2040 {
2041     return translator_lduw(env, advance_pc(env, s, 2));
2042 }
2043 
2044 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2045 {
2046     return translator_ldl(env, advance_pc(env, s, 4));
2047 }
2048 
2049 #ifdef TARGET_X86_64
2050 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2051 {
2052     return translator_ldq(env, advance_pc(env, s, 8));
2053 }
2054 #endif
2055 
2056 /* Decompose an address.  */
2057 
2058 typedef struct AddressParts {
2059     int def_seg;
2060     int base;
2061     int index;
2062     int scale;
2063     target_long disp;
2064 } AddressParts;
2065 
2066 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2067                                     int modrm)
2068 {
2069     int def_seg, base, index, scale, mod, rm;
2070     target_long disp;
2071     bool havesib;
2072 
2073     def_seg = R_DS;
2074     index = -1;
2075     scale = 0;
2076     disp = 0;
2077 
2078     mod = (modrm >> 6) & 3;
2079     rm = modrm & 7;
2080     base = rm | REX_B(s);
2081 
2082     if (mod == 3) {
2083         /* Normally filtered out earlier, but including this path
2084            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
2085         goto done;
2086     }
2087 
2088     switch (s->aflag) {
2089     case MO_64:
2090     case MO_32:
2091         havesib = 0;
2092         if (rm == 4) {
2093             int code = x86_ldub_code(env, s);
2094             scale = (code >> 6) & 3;
2095             index = ((code >> 3) & 7) | REX_X(s);
2096             if (index == 4) {
2097                 index = -1;  /* no index */
2098             }
2099             base = (code & 7) | REX_B(s);
2100             havesib = 1;
2101         }
2102 
2103         switch (mod) {
2104         case 0:
2105             if ((base & 7) == 5) {
2106                 base = -1;
2107                 disp = (int32_t)x86_ldl_code(env, s);
2108                 if (CODE64(s) && !havesib) {
2109                     base = -2;
2110                     disp += s->pc + s->rip_offset;
2111                 }
2112             }
2113             break;
2114         case 1:
2115             disp = (int8_t)x86_ldub_code(env, s);
2116             break;
2117         default:
2118         case 2:
2119             disp = (int32_t)x86_ldl_code(env, s);
2120             break;
2121         }
2122 
2123         /* For correct popl handling with esp.  */
2124         if (base == R_ESP && s->popl_esp_hack) {
2125             disp += s->popl_esp_hack;
2126         }
2127         if (base == R_EBP || base == R_ESP) {
2128             def_seg = R_SS;
2129         }
2130         break;
2131 
2132     case MO_16:
2133         if (mod == 0) {
2134             if (rm == 6) {
2135                 base = -1;
2136                 disp = x86_lduw_code(env, s);
2137                 break;
2138             }
2139         } else if (mod == 1) {
2140             disp = (int8_t)x86_ldub_code(env, s);
2141         } else {
2142             disp = (int16_t)x86_lduw_code(env, s);
2143         }
2144 
2145         switch (rm) {
2146         case 0:
2147             base = R_EBX;
2148             index = R_ESI;
2149             break;
2150         case 1:
2151             base = R_EBX;
2152             index = R_EDI;
2153             break;
2154         case 2:
2155             base = R_EBP;
2156             index = R_ESI;
2157             def_seg = R_SS;
2158             break;
2159         case 3:
2160             base = R_EBP;
2161             index = R_EDI;
2162             def_seg = R_SS;
2163             break;
2164         case 4:
2165             base = R_ESI;
2166             break;
2167         case 5:
2168             base = R_EDI;
2169             break;
2170         case 6:
2171             base = R_EBP;
2172             def_seg = R_SS;
2173             break;
2174         default:
2175         case 7:
2176             base = R_EBX;
2177             break;
2178         }
2179         break;
2180 
2181     default:
2182         tcg_abort();
2183     }
2184 
2185  done:
2186     return (AddressParts){ def_seg, base, index, scale, disp };
2187 }
2188 
2189 /* Compute the address, with a minimum number of TCG ops.  */
2190 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2191 {
2192     TCGv ea = NULL;
2193 
2194     if (a.index >= 0) {
2195         if (a.scale == 0) {
2196             ea = cpu_regs[a.index];
2197         } else {
2198             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2199             ea = s->A0;
2200         }
2201         if (a.base >= 0) {
2202             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2203             ea = s->A0;
2204         }
2205     } else if (a.base >= 0) {
2206         ea = cpu_regs[a.base];
2207     }
2208     if (!ea) {
2209         tcg_gen_movi_tl(s->A0, a.disp);
2210         ea = s->A0;
2211     } else if (a.disp != 0) {
2212         tcg_gen_addi_tl(s->A0, ea, a.disp);
2213         ea = s->A0;
2214     }
2215 
2216     return ea;
2217 }
2218 
2219 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2220 {
2221     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2222     TCGv ea = gen_lea_modrm_1(s, a);
2223     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2224 }
2225 
2226 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2227 {
2228     (void)gen_lea_modrm_0(env, s, modrm);
2229 }
2230 
2231 /* Used for BNDCL, BNDCU, BNDCN.  */
2232 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2233                       TCGCond cond, TCGv_i64 bndv)
2234 {
2235     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2236 
2237     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2238     if (!CODE64(s)) {
2239         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2240     }
2241     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2242     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2243     gen_helper_bndck(cpu_env, s->tmp2_i32);
2244 }
2245 
2246 /* used for LEA and MOV AX, mem */
2247 static void gen_add_A0_ds_seg(DisasContext *s)
2248 {
2249     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2250 }
2251 
2252 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2253    OR_TMP0 */
2254 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2255                            MemOp ot, int reg, int is_store)
2256 {
2257     int mod, rm;
2258 
2259     mod = (modrm >> 6) & 3;
2260     rm = (modrm & 7) | REX_B(s);
2261     if (mod == 3) {
2262         if (is_store) {
2263             if (reg != OR_TMP0)
2264                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2265             gen_op_mov_reg_v(s, ot, rm, s->T0);
2266         } else {
2267             gen_op_mov_v_reg(s, ot, s->T0, rm);
2268             if (reg != OR_TMP0)
2269                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2270         }
2271     } else {
2272         gen_lea_modrm(env, s, modrm);
2273         if (is_store) {
2274             if (reg != OR_TMP0)
2275                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2276             gen_op_st_v(s, ot, s->T0, s->A0);
2277         } else {
2278             gen_op_ld_v(s, ot, s->T0, s->A0);
2279             if (reg != OR_TMP0)
2280                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2281         }
2282     }
2283 }
2284 
2285 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2286 {
2287     uint32_t ret;
2288 
2289     switch (ot) {
2290     case MO_8:
2291         ret = x86_ldub_code(env, s);
2292         break;
2293     case MO_16:
2294         ret = x86_lduw_code(env, s);
2295         break;
2296     case MO_32:
2297 #ifdef TARGET_X86_64
2298     case MO_64:
2299 #endif
2300         ret = x86_ldl_code(env, s);
2301         break;
2302     default:
2303         tcg_abort();
2304     }
2305     return ret;
2306 }
2307 
2308 static inline int insn_const_size(MemOp ot)
2309 {
2310     if (ot <= MO_32) {
2311         return 1 << ot;
2312     } else {
2313         return 4;
2314     }
2315 }
2316 
2317 static inline bool use_goto_tb(DisasContext *s, target_ulong pc)
2318 {
2319 #ifndef CONFIG_USER_ONLY
2320     return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) ||
2321            (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK);
2322 #else
2323     return true;
2324 #endif
2325 }
2326 
2327 static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2328 {
2329     target_ulong pc = s->cs_base + eip;
2330 
2331     if (use_goto_tb(s, pc))  {
2332         /* jump to same page: we can use a direct jump */
2333         tcg_gen_goto_tb(tb_num);
2334         gen_jmp_im(s, eip);
2335         tcg_gen_exit_tb(s->base.tb, tb_num);
2336         s->base.is_jmp = DISAS_NORETURN;
2337     } else {
2338         /* jump to another page */
2339         gen_jmp_im(s, eip);
2340         gen_jr(s, s->tmp0);
2341     }
2342 }
2343 
2344 static inline void gen_jcc(DisasContext *s, int b,
2345                            target_ulong val, target_ulong next_eip)
2346 {
2347     TCGLabel *l1, *l2;
2348 
2349     if (s->jmp_opt) {
2350         l1 = gen_new_label();
2351         gen_jcc1(s, b, l1);
2352 
2353         gen_goto_tb(s, 0, next_eip);
2354 
2355         gen_set_label(l1);
2356         gen_goto_tb(s, 1, val);
2357     } else {
2358         l1 = gen_new_label();
2359         l2 = gen_new_label();
2360         gen_jcc1(s, b, l1);
2361 
2362         gen_jmp_im(s, next_eip);
2363         tcg_gen_br(l2);
2364 
2365         gen_set_label(l1);
2366         gen_jmp_im(s, val);
2367         gen_set_label(l2);
2368         gen_eob(s);
2369     }
2370 }
2371 
2372 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2373                         int modrm, int reg)
2374 {
2375     CCPrepare cc;
2376 
2377     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2378 
2379     cc = gen_prepare_cc(s, b, s->T1);
2380     if (cc.mask != -1) {
2381         TCGv t0 = tcg_temp_new();
2382         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2383         cc.reg = t0;
2384     }
2385     if (!cc.use_reg2) {
2386         cc.reg2 = tcg_const_tl(cc.imm);
2387     }
2388 
2389     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2390                        s->T0, cpu_regs[reg]);
2391     gen_op_mov_reg_v(s, ot, reg, s->T0);
2392 
2393     if (cc.mask != -1) {
2394         tcg_temp_free(cc.reg);
2395     }
2396     if (!cc.use_reg2) {
2397         tcg_temp_free(cc.reg2);
2398     }
2399 }
2400 
2401 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2402 {
2403     tcg_gen_ld32u_tl(s->T0, cpu_env,
2404                      offsetof(CPUX86State,segs[seg_reg].selector));
2405 }
2406 
2407 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2408 {
2409     tcg_gen_ext16u_tl(s->T0, s->T0);
2410     tcg_gen_st32_tl(s->T0, cpu_env,
2411                     offsetof(CPUX86State,segs[seg_reg].selector));
2412     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2413 }
2414 
2415 /* move T0 to seg_reg and compute if the CPU state may change. Never
2416    call this function with seg_reg == R_CS */
2417 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2418 {
2419     if (PE(s) && !VM86(s)) {
2420         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2421         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2422         /* abort translation because the addseg value may change or
2423            because ss32 may change. For R_SS, translation must always
2424            stop as a special handling must be done to disable hardware
2425            interrupts for the next instruction */
2426         if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2427             s->base.is_jmp = DISAS_TOO_MANY;
2428         }
2429     } else {
2430         gen_op_movl_seg_T0_vm(s, seg_reg);
2431         if (seg_reg == R_SS) {
2432             s->base.is_jmp = DISAS_TOO_MANY;
2433         }
2434     }
2435 }
2436 
2437 static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2438 {
2439     /* no SVM activated; fast case */
2440     if (likely(!GUEST(s))) {
2441         return;
2442     }
2443     gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2444 }
2445 
2446 static inline void gen_stack_update(DisasContext *s, int addend)
2447 {
2448     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2449 }
2450 
2451 /* Generate a push. It depends on ss32, addseg and dflag.  */
2452 static void gen_push_v(DisasContext *s, TCGv val)
2453 {
2454     MemOp d_ot = mo_pushpop(s, s->dflag);
2455     MemOp a_ot = mo_stacksize(s);
2456     int size = 1 << d_ot;
2457     TCGv new_esp = s->A0;
2458 
2459     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2460 
2461     if (!CODE64(s)) {
2462         if (ADDSEG(s)) {
2463             new_esp = s->tmp4;
2464             tcg_gen_mov_tl(new_esp, s->A0);
2465         }
2466         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2467     }
2468 
2469     gen_op_st_v(s, d_ot, val, s->A0);
2470     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2471 }
2472 
2473 /* two step pop is necessary for precise exceptions */
2474 static MemOp gen_pop_T0(DisasContext *s)
2475 {
2476     MemOp d_ot = mo_pushpop(s, s->dflag);
2477 
2478     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2479     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2480 
2481     return d_ot;
2482 }
2483 
2484 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2485 {
2486     gen_stack_update(s, 1 << ot);
2487 }
2488 
2489 static inline void gen_stack_A0(DisasContext *s)
2490 {
2491     gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2492 }
2493 
2494 static void gen_pusha(DisasContext *s)
2495 {
2496     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2497     MemOp d_ot = s->dflag;
2498     int size = 1 << d_ot;
2499     int i;
2500 
2501     for (i = 0; i < 8; i++) {
2502         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2503         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2504         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2505     }
2506 
2507     gen_stack_update(s, -8 * size);
2508 }
2509 
2510 static void gen_popa(DisasContext *s)
2511 {
2512     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2513     MemOp d_ot = s->dflag;
2514     int size = 1 << d_ot;
2515     int i;
2516 
2517     for (i = 0; i < 8; i++) {
2518         /* ESP is not reloaded */
2519         if (7 - i == R_ESP) {
2520             continue;
2521         }
2522         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2523         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2524         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2525         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2526     }
2527 
2528     gen_stack_update(s, 8 * size);
2529 }
2530 
2531 static void gen_enter(DisasContext *s, int esp_addend, int level)
2532 {
2533     MemOp d_ot = mo_pushpop(s, s->dflag);
2534     MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2535     int size = 1 << d_ot;
2536 
2537     /* Push BP; compute FrameTemp into T1.  */
2538     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2539     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2540     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2541 
2542     level &= 31;
2543     if (level != 0) {
2544         int i;
2545 
2546         /* Copy level-1 pointers from the previous frame.  */
2547         for (i = 1; i < level; ++i) {
2548             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2549             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2550             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2551 
2552             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2553             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2554             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2555         }
2556 
2557         /* Push the current FrameTemp as the last level.  */
2558         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2559         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2560         gen_op_st_v(s, d_ot, s->T1, s->A0);
2561     }
2562 
2563     /* Copy the FrameTemp value to EBP.  */
2564     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2565 
2566     /* Compute the final value of ESP.  */
2567     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2568     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2569 }
2570 
2571 static void gen_leave(DisasContext *s)
2572 {
2573     MemOp d_ot = mo_pushpop(s, s->dflag);
2574     MemOp a_ot = mo_stacksize(s);
2575 
2576     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2577     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2578 
2579     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2580 
2581     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2582     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2583 }
2584 
2585 /* Similarly, except that the assumption here is that we don't decode
2586    the instruction at all -- either a missing opcode, an unimplemented
2587    feature, or just a bogus instruction stream.  */
2588 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2589 {
2590     gen_illegal_opcode(s);
2591 
2592     if (qemu_loglevel_mask(LOG_UNIMP)) {
2593         FILE *logfile = qemu_log_lock();
2594         target_ulong pc = s->pc_start, end = s->pc;
2595 
2596         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2597         for (; pc < end; ++pc) {
2598             qemu_log(" %02x", cpu_ldub_code(env, pc));
2599         }
2600         qemu_log("\n");
2601         qemu_log_unlock(logfile);
2602     }
2603 }
2604 
2605 /* an interrupt is different from an exception because of the
2606    privilege checks */
2607 static void gen_interrupt(DisasContext *s, int intno,
2608                           target_ulong cur_eip, target_ulong next_eip)
2609 {
2610     gen_update_cc_op(s);
2611     gen_jmp_im(s, cur_eip);
2612     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2613                                tcg_const_i32(next_eip - cur_eip));
2614     s->base.is_jmp = DISAS_NORETURN;
2615 }
2616 
2617 static void gen_debug(DisasContext *s)
2618 {
2619     gen_update_cc_op(s);
2620     gen_jmp_im(s, s->base.pc_next - s->cs_base);
2621     gen_helper_debug(cpu_env);
2622     s->base.is_jmp = DISAS_NORETURN;
2623 }
2624 
2625 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2626 {
2627     if ((s->flags & mask) == 0) {
2628         TCGv_i32 t = tcg_temp_new_i32();
2629         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2630         tcg_gen_ori_i32(t, t, mask);
2631         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2632         tcg_temp_free_i32(t);
2633         s->flags |= mask;
2634     }
2635 }
2636 
2637 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2638 {
2639     if (s->flags & mask) {
2640         TCGv_i32 t = tcg_temp_new_i32();
2641         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2642         tcg_gen_andi_i32(t, t, ~mask);
2643         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2644         tcg_temp_free_i32(t);
2645         s->flags &= ~mask;
2646     }
2647 }
2648 
2649 /* Clear BND registers during legacy branches.  */
2650 static void gen_bnd_jmp(DisasContext *s)
2651 {
2652     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2653        and if the BNDREGs are known to be in use (non-zero) already.
2654        The helper itself will check BNDPRESERVE at runtime.  */
2655     if ((s->prefix & PREFIX_REPNZ) == 0
2656         && (s->flags & HF_MPX_EN_MASK) != 0
2657         && (s->flags & HF_MPX_IU_MASK) != 0) {
2658         gen_helper_bnd_jmp(cpu_env);
2659     }
2660 }
2661 
2662 /* Generate an end of block. Trace exception is also generated if needed.
2663    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2664    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2665    S->TF.  This is used by the syscall/sysret insns.  */
2666 static void
2667 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2668 {
2669     gen_update_cc_op(s);
2670 
2671     /* If several instructions disable interrupts, only the first does it.  */
2672     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2673         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2674     } else {
2675         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2676     }
2677 
2678     if (s->base.tb->flags & HF_RF_MASK) {
2679         gen_helper_reset_rf(cpu_env);
2680     }
2681     if (s->base.singlestep_enabled) {
2682         gen_helper_debug(cpu_env);
2683     } else if (recheck_tf) {
2684         gen_helper_rechecking_single_step(cpu_env);
2685         tcg_gen_exit_tb(NULL, 0);
2686     } else if (s->flags & HF_TF_MASK) {
2687         gen_helper_single_step(cpu_env);
2688     } else if (jr) {
2689         tcg_gen_lookup_and_goto_ptr();
2690     } else {
2691         tcg_gen_exit_tb(NULL, 0);
2692     }
2693     s->base.is_jmp = DISAS_NORETURN;
2694 }
2695 
2696 static inline void
2697 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2698 {
2699     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2700 }
2701 
2702 /* End of block.
2703    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2704 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2705 {
2706     gen_eob_worker(s, inhibit, false);
2707 }
2708 
2709 /* End of block, resetting the inhibit irq flag.  */
2710 static void gen_eob(DisasContext *s)
2711 {
2712     gen_eob_worker(s, false, false);
2713 }
2714 
2715 /* Jump to register */
2716 static void gen_jr(DisasContext *s, TCGv dest)
2717 {
2718     do_gen_eob_worker(s, false, false, true);
2719 }
2720 
2721 /* generate a jump to eip. No segment change must happen before as a
2722    direct call to the next block may occur */
2723 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2724 {
2725     gen_update_cc_op(s);
2726     set_cc_op(s, CC_OP_DYNAMIC);
2727     if (s->jmp_opt) {
2728         gen_goto_tb(s, tb_num, eip);
2729     } else {
2730         gen_jmp_im(s, eip);
2731         gen_eob(s);
2732     }
2733 }
2734 
2735 static void gen_jmp(DisasContext *s, target_ulong eip)
2736 {
2737     gen_jmp_tb(s, eip, 0);
2738 }
2739 
2740 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2741 {
2742     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2743     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2744 }
2745 
2746 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2747 {
2748     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2749     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
2750 }
2751 
2752 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2753 {
2754     int mem_index = s->mem_index;
2755     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2756     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2757     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2758     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2759     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2760 }
2761 
2762 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2763 {
2764     int mem_index = s->mem_index;
2765     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2766     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ);
2767     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2768     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2769     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ);
2770 }
2771 
2772 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2773 {
2774     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2775     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2776     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2777     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2778 }
2779 
2780 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2781 {
2782     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2783     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2784 }
2785 
2786 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2787 {
2788     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2789     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2790 }
2791 
2792 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2793 {
2794     tcg_gen_movi_i64(s->tmp1_i64, 0);
2795     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2796 }
2797 
2798 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2799 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2800 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2801 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2802 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2803 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2804                                TCGv_i32 val);
2805 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2806 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2807                                TCGv val);
2808 
2809 #define SSE_SPECIAL ((void *)1)
2810 #define SSE_DUMMY ((void *)2)
2811 
2812 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2813 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2814                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2815 
2816 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2817     /* 3DNow! extensions */
2818     [0x0e] = { SSE_DUMMY }, /* femms */
2819     [0x0f] = { SSE_DUMMY }, /* pf... */
2820     /* pure SSE operations */
2821     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2822     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2823     [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2824     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2825     [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2826     [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2827     [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2828     [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2829 
2830     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2831     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2832     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2833     [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2834     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2835     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2836     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2837     [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2838     [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2839     [0x51] = SSE_FOP(sqrt),
2840     [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2841     [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2842     [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2843     [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2844     [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2845     [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2846     [0x58] = SSE_FOP(add),
2847     [0x59] = SSE_FOP(mul),
2848     [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2849                gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2850     [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2851     [0x5c] = SSE_FOP(sub),
2852     [0x5d] = SSE_FOP(min),
2853     [0x5e] = SSE_FOP(div),
2854     [0x5f] = SSE_FOP(max),
2855 
2856     [0xc2] = SSE_FOP(cmpeq),
2857     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2858                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2859 
2860     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2861     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2862     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2863 
2864     /* MMX ops and their SSE extensions */
2865     [0x60] = MMX_OP2(punpcklbw),
2866     [0x61] = MMX_OP2(punpcklwd),
2867     [0x62] = MMX_OP2(punpckldq),
2868     [0x63] = MMX_OP2(packsswb),
2869     [0x64] = MMX_OP2(pcmpgtb),
2870     [0x65] = MMX_OP2(pcmpgtw),
2871     [0x66] = MMX_OP2(pcmpgtl),
2872     [0x67] = MMX_OP2(packuswb),
2873     [0x68] = MMX_OP2(punpckhbw),
2874     [0x69] = MMX_OP2(punpckhwd),
2875     [0x6a] = MMX_OP2(punpckhdq),
2876     [0x6b] = MMX_OP2(packssdw),
2877     [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2878     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2879     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2880     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2881     [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2882                (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2883                (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2884                (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2885     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2886     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2887     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2888     [0x74] = MMX_OP2(pcmpeqb),
2889     [0x75] = MMX_OP2(pcmpeqw),
2890     [0x76] = MMX_OP2(pcmpeql),
2891     [0x77] = { SSE_DUMMY }, /* emms */
2892     [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2893     [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2894     [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2895     [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2896     [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2897     [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2898     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2899     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2900     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2901     [0xd1] = MMX_OP2(psrlw),
2902     [0xd2] = MMX_OP2(psrld),
2903     [0xd3] = MMX_OP2(psrlq),
2904     [0xd4] = MMX_OP2(paddq),
2905     [0xd5] = MMX_OP2(pmullw),
2906     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2907     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2908     [0xd8] = MMX_OP2(psubusb),
2909     [0xd9] = MMX_OP2(psubusw),
2910     [0xda] = MMX_OP2(pminub),
2911     [0xdb] = MMX_OP2(pand),
2912     [0xdc] = MMX_OP2(paddusb),
2913     [0xdd] = MMX_OP2(paddusw),
2914     [0xde] = MMX_OP2(pmaxub),
2915     [0xdf] = MMX_OP2(pandn),
2916     [0xe0] = MMX_OP2(pavgb),
2917     [0xe1] = MMX_OP2(psraw),
2918     [0xe2] = MMX_OP2(psrad),
2919     [0xe3] = MMX_OP2(pavgw),
2920     [0xe4] = MMX_OP2(pmulhuw),
2921     [0xe5] = MMX_OP2(pmulhw),
2922     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2923     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2924     [0xe8] = MMX_OP2(psubsb),
2925     [0xe9] = MMX_OP2(psubsw),
2926     [0xea] = MMX_OP2(pminsw),
2927     [0xeb] = MMX_OP2(por),
2928     [0xec] = MMX_OP2(paddsb),
2929     [0xed] = MMX_OP2(paddsw),
2930     [0xee] = MMX_OP2(pmaxsw),
2931     [0xef] = MMX_OP2(pxor),
2932     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2933     [0xf1] = MMX_OP2(psllw),
2934     [0xf2] = MMX_OP2(pslld),
2935     [0xf3] = MMX_OP2(psllq),
2936     [0xf4] = MMX_OP2(pmuludq),
2937     [0xf5] = MMX_OP2(pmaddwd),
2938     [0xf6] = MMX_OP2(psadbw),
2939     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2940                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2941     [0xf8] = MMX_OP2(psubb),
2942     [0xf9] = MMX_OP2(psubw),
2943     [0xfa] = MMX_OP2(psubl),
2944     [0xfb] = MMX_OP2(psubq),
2945     [0xfc] = MMX_OP2(paddb),
2946     [0xfd] = MMX_OP2(paddw),
2947     [0xfe] = MMX_OP2(paddl),
2948 };
2949 
2950 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2951     [0 + 2] = MMX_OP2(psrlw),
2952     [0 + 4] = MMX_OP2(psraw),
2953     [0 + 6] = MMX_OP2(psllw),
2954     [8 + 2] = MMX_OP2(psrld),
2955     [8 + 4] = MMX_OP2(psrad),
2956     [8 + 6] = MMX_OP2(pslld),
2957     [16 + 2] = MMX_OP2(psrlq),
2958     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2959     [16 + 6] = MMX_OP2(psllq),
2960     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2961 };
2962 
2963 static const SSEFunc_0_epi sse_op_table3ai[] = {
2964     gen_helper_cvtsi2ss,
2965     gen_helper_cvtsi2sd
2966 };
2967 
2968 #ifdef TARGET_X86_64
2969 static const SSEFunc_0_epl sse_op_table3aq[] = {
2970     gen_helper_cvtsq2ss,
2971     gen_helper_cvtsq2sd
2972 };
2973 #endif
2974 
2975 static const SSEFunc_i_ep sse_op_table3bi[] = {
2976     gen_helper_cvttss2si,
2977     gen_helper_cvtss2si,
2978     gen_helper_cvttsd2si,
2979     gen_helper_cvtsd2si
2980 };
2981 
2982 #ifdef TARGET_X86_64
2983 static const SSEFunc_l_ep sse_op_table3bq[] = {
2984     gen_helper_cvttss2sq,
2985     gen_helper_cvtss2sq,
2986     gen_helper_cvttsd2sq,
2987     gen_helper_cvtsd2sq
2988 };
2989 #endif
2990 
2991 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2992     SSE_FOP(cmpeq),
2993     SSE_FOP(cmplt),
2994     SSE_FOP(cmple),
2995     SSE_FOP(cmpunord),
2996     SSE_FOP(cmpneq),
2997     SSE_FOP(cmpnlt),
2998     SSE_FOP(cmpnle),
2999     SSE_FOP(cmpord),
3000 };
3001 
3002 static const SSEFunc_0_epp sse_op_table5[256] = {
3003     [0x0c] = gen_helper_pi2fw,
3004     [0x0d] = gen_helper_pi2fd,
3005     [0x1c] = gen_helper_pf2iw,
3006     [0x1d] = gen_helper_pf2id,
3007     [0x8a] = gen_helper_pfnacc,
3008     [0x8e] = gen_helper_pfpnacc,
3009     [0x90] = gen_helper_pfcmpge,
3010     [0x94] = gen_helper_pfmin,
3011     [0x96] = gen_helper_pfrcp,
3012     [0x97] = gen_helper_pfrsqrt,
3013     [0x9a] = gen_helper_pfsub,
3014     [0x9e] = gen_helper_pfadd,
3015     [0xa0] = gen_helper_pfcmpgt,
3016     [0xa4] = gen_helper_pfmax,
3017     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
3018     [0xa7] = gen_helper_movq, /* pfrsqit1 */
3019     [0xaa] = gen_helper_pfsubr,
3020     [0xae] = gen_helper_pfacc,
3021     [0xb0] = gen_helper_pfcmpeq,
3022     [0xb4] = gen_helper_pfmul,
3023     [0xb6] = gen_helper_movq, /* pfrcpit2 */
3024     [0xb7] = gen_helper_pmulhrw_mmx,
3025     [0xbb] = gen_helper_pswapd,
3026     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
3027 };
3028 
3029 struct SSEOpHelper_epp {
3030     SSEFunc_0_epp op[2];
3031     uint32_t ext_mask;
3032 };
3033 
3034 struct SSEOpHelper_eppi {
3035     SSEFunc_0_eppi op[2];
3036     uint32_t ext_mask;
3037 };
3038 
3039 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
3040 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
3041 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
3042 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
3043 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
3044         CPUID_EXT_PCLMULQDQ }
3045 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
3046 
3047 static const struct SSEOpHelper_epp sse_op_table6[256] = {
3048     [0x00] = SSSE3_OP(pshufb),
3049     [0x01] = SSSE3_OP(phaddw),
3050     [0x02] = SSSE3_OP(phaddd),
3051     [0x03] = SSSE3_OP(phaddsw),
3052     [0x04] = SSSE3_OP(pmaddubsw),
3053     [0x05] = SSSE3_OP(phsubw),
3054     [0x06] = SSSE3_OP(phsubd),
3055     [0x07] = SSSE3_OP(phsubsw),
3056     [0x08] = SSSE3_OP(psignb),
3057     [0x09] = SSSE3_OP(psignw),
3058     [0x0a] = SSSE3_OP(psignd),
3059     [0x0b] = SSSE3_OP(pmulhrsw),
3060     [0x10] = SSE41_OP(pblendvb),
3061     [0x14] = SSE41_OP(blendvps),
3062     [0x15] = SSE41_OP(blendvpd),
3063     [0x17] = SSE41_OP(ptest),
3064     [0x1c] = SSSE3_OP(pabsb),
3065     [0x1d] = SSSE3_OP(pabsw),
3066     [0x1e] = SSSE3_OP(pabsd),
3067     [0x20] = SSE41_OP(pmovsxbw),
3068     [0x21] = SSE41_OP(pmovsxbd),
3069     [0x22] = SSE41_OP(pmovsxbq),
3070     [0x23] = SSE41_OP(pmovsxwd),
3071     [0x24] = SSE41_OP(pmovsxwq),
3072     [0x25] = SSE41_OP(pmovsxdq),
3073     [0x28] = SSE41_OP(pmuldq),
3074     [0x29] = SSE41_OP(pcmpeqq),
3075     [0x2a] = SSE41_SPECIAL, /* movntqda */
3076     [0x2b] = SSE41_OP(packusdw),
3077     [0x30] = SSE41_OP(pmovzxbw),
3078     [0x31] = SSE41_OP(pmovzxbd),
3079     [0x32] = SSE41_OP(pmovzxbq),
3080     [0x33] = SSE41_OP(pmovzxwd),
3081     [0x34] = SSE41_OP(pmovzxwq),
3082     [0x35] = SSE41_OP(pmovzxdq),
3083     [0x37] = SSE42_OP(pcmpgtq),
3084     [0x38] = SSE41_OP(pminsb),
3085     [0x39] = SSE41_OP(pminsd),
3086     [0x3a] = SSE41_OP(pminuw),
3087     [0x3b] = SSE41_OP(pminud),
3088     [0x3c] = SSE41_OP(pmaxsb),
3089     [0x3d] = SSE41_OP(pmaxsd),
3090     [0x3e] = SSE41_OP(pmaxuw),
3091     [0x3f] = SSE41_OP(pmaxud),
3092     [0x40] = SSE41_OP(pmulld),
3093     [0x41] = SSE41_OP(phminposuw),
3094     [0xdb] = AESNI_OP(aesimc),
3095     [0xdc] = AESNI_OP(aesenc),
3096     [0xdd] = AESNI_OP(aesenclast),
3097     [0xde] = AESNI_OP(aesdec),
3098     [0xdf] = AESNI_OP(aesdeclast),
3099 };
3100 
3101 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3102     [0x08] = SSE41_OP(roundps),
3103     [0x09] = SSE41_OP(roundpd),
3104     [0x0a] = SSE41_OP(roundss),
3105     [0x0b] = SSE41_OP(roundsd),
3106     [0x0c] = SSE41_OP(blendps),
3107     [0x0d] = SSE41_OP(blendpd),
3108     [0x0e] = SSE41_OP(pblendw),
3109     [0x0f] = SSSE3_OP(palignr),
3110     [0x14] = SSE41_SPECIAL, /* pextrb */
3111     [0x15] = SSE41_SPECIAL, /* pextrw */
3112     [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3113     [0x17] = SSE41_SPECIAL, /* extractps */
3114     [0x20] = SSE41_SPECIAL, /* pinsrb */
3115     [0x21] = SSE41_SPECIAL, /* insertps */
3116     [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3117     [0x40] = SSE41_OP(dpps),
3118     [0x41] = SSE41_OP(dppd),
3119     [0x42] = SSE41_OP(mpsadbw),
3120     [0x44] = PCLMULQDQ_OP(pclmulqdq),
3121     [0x60] = SSE42_OP(pcmpestrm),
3122     [0x61] = SSE42_OP(pcmpestri),
3123     [0x62] = SSE42_OP(pcmpistrm),
3124     [0x63] = SSE42_OP(pcmpistri),
3125     [0xdf] = AESNI_OP(aeskeygenassist),
3126 };
3127 
3128 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3129                     target_ulong pc_start)
3130 {
3131     int b1, op1_offset, op2_offset, is_xmm, val;
3132     int modrm, mod, rm, reg;
3133     SSEFunc_0_epp sse_fn_epp;
3134     SSEFunc_0_eppi sse_fn_eppi;
3135     SSEFunc_0_ppi sse_fn_ppi;
3136     SSEFunc_0_eppt sse_fn_eppt;
3137     MemOp ot;
3138 
3139     b &= 0xff;
3140     if (s->prefix & PREFIX_DATA)
3141         b1 = 1;
3142     else if (s->prefix & PREFIX_REPZ)
3143         b1 = 2;
3144     else if (s->prefix & PREFIX_REPNZ)
3145         b1 = 3;
3146     else
3147         b1 = 0;
3148     sse_fn_epp = sse_op_table1[b][b1];
3149     if (!sse_fn_epp) {
3150         goto unknown_op;
3151     }
3152     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3153         is_xmm = 1;
3154     } else {
3155         if (b1 == 0) {
3156             /* MMX case */
3157             is_xmm = 0;
3158         } else {
3159             is_xmm = 1;
3160         }
3161     }
3162     /* simple MMX/SSE operation */
3163     if (s->flags & HF_TS_MASK) {
3164         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3165         return;
3166     }
3167     if (s->flags & HF_EM_MASK) {
3168     illegal_op:
3169         gen_illegal_opcode(s);
3170         return;
3171     }
3172     if (is_xmm
3173         && !(s->flags & HF_OSFXSR_MASK)
3174         && (b != 0x38 && b != 0x3a)) {
3175         goto unknown_op;
3176     }
3177     if (b == 0x0e) {
3178         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3179             /* If we were fully decoding this we might use illegal_op.  */
3180             goto unknown_op;
3181         }
3182         /* femms */
3183         gen_helper_emms(cpu_env);
3184         return;
3185     }
3186     if (b == 0x77) {
3187         /* emms */
3188         gen_helper_emms(cpu_env);
3189         return;
3190     }
3191     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3192        the static cpu state) */
3193     if (!is_xmm) {
3194         gen_helper_enter_mmx(cpu_env);
3195     }
3196 
3197     modrm = x86_ldub_code(env, s);
3198     reg = ((modrm >> 3) & 7);
3199     if (is_xmm) {
3200         reg |= REX_R(s);
3201     }
3202     mod = (modrm >> 6) & 3;
3203     if (sse_fn_epp == SSE_SPECIAL) {
3204         b |= (b1 << 8);
3205         switch(b) {
3206         case 0x0e7: /* movntq */
3207             if (mod == 3) {
3208                 goto illegal_op;
3209             }
3210             gen_lea_modrm(env, s, modrm);
3211             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3212             break;
3213         case 0x1e7: /* movntdq */
3214         case 0x02b: /* movntps */
3215         case 0x12b: /* movntps */
3216             if (mod == 3)
3217                 goto illegal_op;
3218             gen_lea_modrm(env, s, modrm);
3219             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3220             break;
3221         case 0x3f0: /* lddqu */
3222             if (mod == 3)
3223                 goto illegal_op;
3224             gen_lea_modrm(env, s, modrm);
3225             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3226             break;
3227         case 0x22b: /* movntss */
3228         case 0x32b: /* movntsd */
3229             if (mod == 3)
3230                 goto illegal_op;
3231             gen_lea_modrm(env, s, modrm);
3232             if (b1 & 1) {
3233                 gen_stq_env_A0(s, offsetof(CPUX86State,
3234                                            xmm_regs[reg].ZMM_Q(0)));
3235             } else {
3236                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3237                     xmm_regs[reg].ZMM_L(0)));
3238                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3239             }
3240             break;
3241         case 0x6e: /* movd mm, ea */
3242 #ifdef TARGET_X86_64
3243             if (s->dflag == MO_64) {
3244                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3245                 tcg_gen_st_tl(s->T0, cpu_env,
3246                               offsetof(CPUX86State, fpregs[reg].mmx));
3247             } else
3248 #endif
3249             {
3250                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3251                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3252                                  offsetof(CPUX86State,fpregs[reg].mmx));
3253                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3254                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3255             }
3256             break;
3257         case 0x16e: /* movd xmm, ea */
3258 #ifdef TARGET_X86_64
3259             if (s->dflag == MO_64) {
3260                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3261                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3262                                  offsetof(CPUX86State,xmm_regs[reg]));
3263                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3264             } else
3265 #endif
3266             {
3267                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3268                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3269                                  offsetof(CPUX86State,xmm_regs[reg]));
3270                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3271                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3272             }
3273             break;
3274         case 0x6f: /* movq mm, ea */
3275             if (mod != 3) {
3276                 gen_lea_modrm(env, s, modrm);
3277                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3278             } else {
3279                 rm = (modrm & 7);
3280                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3281                                offsetof(CPUX86State,fpregs[rm].mmx));
3282                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3283                                offsetof(CPUX86State,fpregs[reg].mmx));
3284             }
3285             break;
3286         case 0x010: /* movups */
3287         case 0x110: /* movupd */
3288         case 0x028: /* movaps */
3289         case 0x128: /* movapd */
3290         case 0x16f: /* movdqa xmm, ea */
3291         case 0x26f: /* movdqu xmm, ea */
3292             if (mod != 3) {
3293                 gen_lea_modrm(env, s, modrm);
3294                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3295             } else {
3296                 rm = (modrm & 7) | REX_B(s);
3297                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3298                             offsetof(CPUX86State,xmm_regs[rm]));
3299             }
3300             break;
3301         case 0x210: /* movss xmm, ea */
3302             if (mod != 3) {
3303                 gen_lea_modrm(env, s, modrm);
3304                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3305                 tcg_gen_st32_tl(s->T0, cpu_env,
3306                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3307                 tcg_gen_movi_tl(s->T0, 0);
3308                 tcg_gen_st32_tl(s->T0, cpu_env,
3309                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3310                 tcg_gen_st32_tl(s->T0, cpu_env,
3311                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3312                 tcg_gen_st32_tl(s->T0, cpu_env,
3313                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3314             } else {
3315                 rm = (modrm & 7) | REX_B(s);
3316                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3317                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3318             }
3319             break;
3320         case 0x310: /* movsd xmm, ea */
3321             if (mod != 3) {
3322                 gen_lea_modrm(env, s, modrm);
3323                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3324                                            xmm_regs[reg].ZMM_Q(0)));
3325                 tcg_gen_movi_tl(s->T0, 0);
3326                 tcg_gen_st32_tl(s->T0, cpu_env,
3327                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3328                 tcg_gen_st32_tl(s->T0, cpu_env,
3329                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3330             } else {
3331                 rm = (modrm & 7) | REX_B(s);
3332                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3333                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3334             }
3335             break;
3336         case 0x012: /* movlps */
3337         case 0x112: /* movlpd */
3338             if (mod != 3) {
3339                 gen_lea_modrm(env, s, modrm);
3340                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3341                                            xmm_regs[reg].ZMM_Q(0)));
3342             } else {
3343                 /* movhlps */
3344                 rm = (modrm & 7) | REX_B(s);
3345                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3346                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3347             }
3348             break;
3349         case 0x212: /* movsldup */
3350             if (mod != 3) {
3351                 gen_lea_modrm(env, s, modrm);
3352                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3353             } else {
3354                 rm = (modrm & 7) | REX_B(s);
3355                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3356                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3357                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3358                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3359             }
3360             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3361                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3362             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3363                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3364             break;
3365         case 0x312: /* movddup */
3366             if (mod != 3) {
3367                 gen_lea_modrm(env, s, modrm);
3368                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3369                                            xmm_regs[reg].ZMM_Q(0)));
3370             } else {
3371                 rm = (modrm & 7) | REX_B(s);
3372                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3373                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3374             }
3375             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3376                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3377             break;
3378         case 0x016: /* movhps */
3379         case 0x116: /* movhpd */
3380             if (mod != 3) {
3381                 gen_lea_modrm(env, s, modrm);
3382                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3383                                            xmm_regs[reg].ZMM_Q(1)));
3384             } else {
3385                 /* movlhps */
3386                 rm = (modrm & 7) | REX_B(s);
3387                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3388                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3389             }
3390             break;
3391         case 0x216: /* movshdup */
3392             if (mod != 3) {
3393                 gen_lea_modrm(env, s, modrm);
3394                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3395             } else {
3396                 rm = (modrm & 7) | REX_B(s);
3397                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3398                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3399                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3400                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3401             }
3402             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3403                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3404             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3405                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3406             break;
3407         case 0x178:
3408         case 0x378:
3409             {
3410                 int bit_index, field_length;
3411 
3412                 if (b1 == 1 && reg != 0)
3413                     goto illegal_op;
3414                 field_length = x86_ldub_code(env, s) & 0x3F;
3415                 bit_index = x86_ldub_code(env, s) & 0x3F;
3416                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3417                     offsetof(CPUX86State,xmm_regs[reg]));
3418                 if (b1 == 1)
3419                     gen_helper_extrq_i(cpu_env, s->ptr0,
3420                                        tcg_const_i32(bit_index),
3421                                        tcg_const_i32(field_length));
3422                 else
3423                     gen_helper_insertq_i(cpu_env, s->ptr0,
3424                                          tcg_const_i32(bit_index),
3425                                          tcg_const_i32(field_length));
3426             }
3427             break;
3428         case 0x7e: /* movd ea, mm */
3429 #ifdef TARGET_X86_64
3430             if (s->dflag == MO_64) {
3431                 tcg_gen_ld_i64(s->T0, cpu_env,
3432                                offsetof(CPUX86State,fpregs[reg].mmx));
3433                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3434             } else
3435 #endif
3436             {
3437                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3438                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3439                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3440             }
3441             break;
3442         case 0x17e: /* movd ea, xmm */
3443 #ifdef TARGET_X86_64
3444             if (s->dflag == MO_64) {
3445                 tcg_gen_ld_i64(s->T0, cpu_env,
3446                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3447                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3448             } else
3449 #endif
3450             {
3451                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3452                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3453                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3454             }
3455             break;
3456         case 0x27e: /* movq xmm, ea */
3457             if (mod != 3) {
3458                 gen_lea_modrm(env, s, modrm);
3459                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3460                                            xmm_regs[reg].ZMM_Q(0)));
3461             } else {
3462                 rm = (modrm & 7) | REX_B(s);
3463                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3464                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3465             }
3466             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3467             break;
3468         case 0x7f: /* movq ea, mm */
3469             if (mod != 3) {
3470                 gen_lea_modrm(env, s, modrm);
3471                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3472             } else {
3473                 rm = (modrm & 7);
3474                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3475                             offsetof(CPUX86State,fpregs[reg].mmx));
3476             }
3477             break;
3478         case 0x011: /* movups */
3479         case 0x111: /* movupd */
3480         case 0x029: /* movaps */
3481         case 0x129: /* movapd */
3482         case 0x17f: /* movdqa ea, xmm */
3483         case 0x27f: /* movdqu ea, xmm */
3484             if (mod != 3) {
3485                 gen_lea_modrm(env, s, modrm);
3486                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3487             } else {
3488                 rm = (modrm & 7) | REX_B(s);
3489                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3490                             offsetof(CPUX86State,xmm_regs[reg]));
3491             }
3492             break;
3493         case 0x211: /* movss ea, xmm */
3494             if (mod != 3) {
3495                 gen_lea_modrm(env, s, modrm);
3496                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3497                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3498                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3499             } else {
3500                 rm = (modrm & 7) | REX_B(s);
3501                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3502                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3503             }
3504             break;
3505         case 0x311: /* movsd ea, xmm */
3506             if (mod != 3) {
3507                 gen_lea_modrm(env, s, modrm);
3508                 gen_stq_env_A0(s, offsetof(CPUX86State,
3509                                            xmm_regs[reg].ZMM_Q(0)));
3510             } else {
3511                 rm = (modrm & 7) | REX_B(s);
3512                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3513                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3514             }
3515             break;
3516         case 0x013: /* movlps */
3517         case 0x113: /* movlpd */
3518             if (mod != 3) {
3519                 gen_lea_modrm(env, s, modrm);
3520                 gen_stq_env_A0(s, offsetof(CPUX86State,
3521                                            xmm_regs[reg].ZMM_Q(0)));
3522             } else {
3523                 goto illegal_op;
3524             }
3525             break;
3526         case 0x017: /* movhps */
3527         case 0x117: /* movhpd */
3528             if (mod != 3) {
3529                 gen_lea_modrm(env, s, modrm);
3530                 gen_stq_env_A0(s, offsetof(CPUX86State,
3531                                            xmm_regs[reg].ZMM_Q(1)));
3532             } else {
3533                 goto illegal_op;
3534             }
3535             break;
3536         case 0x71: /* shift mm, im */
3537         case 0x72:
3538         case 0x73:
3539         case 0x171: /* shift xmm, im */
3540         case 0x172:
3541         case 0x173:
3542             if (b1 >= 2) {
3543                 goto unknown_op;
3544             }
3545             val = x86_ldub_code(env, s);
3546             if (is_xmm) {
3547                 tcg_gen_movi_tl(s->T0, val);
3548                 tcg_gen_st32_tl(s->T0, cpu_env,
3549                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3550                 tcg_gen_movi_tl(s->T0, 0);
3551                 tcg_gen_st32_tl(s->T0, cpu_env,
3552                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3553                 op1_offset = offsetof(CPUX86State,xmm_t0);
3554             } else {
3555                 tcg_gen_movi_tl(s->T0, val);
3556                 tcg_gen_st32_tl(s->T0, cpu_env,
3557                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3558                 tcg_gen_movi_tl(s->T0, 0);
3559                 tcg_gen_st32_tl(s->T0, cpu_env,
3560                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3561                 op1_offset = offsetof(CPUX86State,mmx_t0);
3562             }
3563             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3564                                        (((modrm >> 3)) & 7)][b1];
3565             if (!sse_fn_epp) {
3566                 goto unknown_op;
3567             }
3568             if (is_xmm) {
3569                 rm = (modrm & 7) | REX_B(s);
3570                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3571             } else {
3572                 rm = (modrm & 7);
3573                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3574             }
3575             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3576             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3577             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3578             break;
3579         case 0x050: /* movmskps */
3580             rm = (modrm & 7) | REX_B(s);
3581             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3582                              offsetof(CPUX86State,xmm_regs[rm]));
3583             gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3584             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3585             break;
3586         case 0x150: /* movmskpd */
3587             rm = (modrm & 7) | REX_B(s);
3588             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3589                              offsetof(CPUX86State,xmm_regs[rm]));
3590             gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3591             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3592             break;
3593         case 0x02a: /* cvtpi2ps */
3594         case 0x12a: /* cvtpi2pd */
3595             gen_helper_enter_mmx(cpu_env);
3596             if (mod != 3) {
3597                 gen_lea_modrm(env, s, modrm);
3598                 op2_offset = offsetof(CPUX86State,mmx_t0);
3599                 gen_ldq_env_A0(s, op2_offset);
3600             } else {
3601                 rm = (modrm & 7);
3602                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3603             }
3604             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3605             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3606             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3607             switch(b >> 8) {
3608             case 0x0:
3609                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3610                 break;
3611             default:
3612             case 0x1:
3613                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3614                 break;
3615             }
3616             break;
3617         case 0x22a: /* cvtsi2ss */
3618         case 0x32a: /* cvtsi2sd */
3619             ot = mo_64_32(s->dflag);
3620             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3621             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3622             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3623             if (ot == MO_32) {
3624                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3625                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3626                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3627             } else {
3628 #ifdef TARGET_X86_64
3629                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3630                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3631 #else
3632                 goto illegal_op;
3633 #endif
3634             }
3635             break;
3636         case 0x02c: /* cvttps2pi */
3637         case 0x12c: /* cvttpd2pi */
3638         case 0x02d: /* cvtps2pi */
3639         case 0x12d: /* cvtpd2pi */
3640             gen_helper_enter_mmx(cpu_env);
3641             if (mod != 3) {
3642                 gen_lea_modrm(env, s, modrm);
3643                 op2_offset = offsetof(CPUX86State,xmm_t0);
3644                 gen_ldo_env_A0(s, op2_offset);
3645             } else {
3646                 rm = (modrm & 7) | REX_B(s);
3647                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3648             }
3649             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3650             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3651             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3652             switch(b) {
3653             case 0x02c:
3654                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3655                 break;
3656             case 0x12c:
3657                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3658                 break;
3659             case 0x02d:
3660                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3661                 break;
3662             case 0x12d:
3663                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3664                 break;
3665             }
3666             break;
3667         case 0x22c: /* cvttss2si */
3668         case 0x32c: /* cvttsd2si */
3669         case 0x22d: /* cvtss2si */
3670         case 0x32d: /* cvtsd2si */
3671             ot = mo_64_32(s->dflag);
3672             if (mod != 3) {
3673                 gen_lea_modrm(env, s, modrm);
3674                 if ((b >> 8) & 1) {
3675                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3676                 } else {
3677                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3678                     tcg_gen_st32_tl(s->T0, cpu_env,
3679                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3680                 }
3681                 op2_offset = offsetof(CPUX86State,xmm_t0);
3682             } else {
3683                 rm = (modrm & 7) | REX_B(s);
3684                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3685             }
3686             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3687             if (ot == MO_32) {
3688                 SSEFunc_i_ep sse_fn_i_ep =
3689                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3690                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3691                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3692             } else {
3693 #ifdef TARGET_X86_64
3694                 SSEFunc_l_ep sse_fn_l_ep =
3695                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3696                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3697 #else
3698                 goto illegal_op;
3699 #endif
3700             }
3701             gen_op_mov_reg_v(s, ot, reg, s->T0);
3702             break;
3703         case 0xc4: /* pinsrw */
3704         case 0x1c4:
3705             s->rip_offset = 1;
3706             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3707             val = x86_ldub_code(env, s);
3708             if (b1) {
3709                 val &= 7;
3710                 tcg_gen_st16_tl(s->T0, cpu_env,
3711                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3712             } else {
3713                 val &= 3;
3714                 tcg_gen_st16_tl(s->T0, cpu_env,
3715                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3716             }
3717             break;
3718         case 0xc5: /* pextrw */
3719         case 0x1c5:
3720             if (mod != 3)
3721                 goto illegal_op;
3722             ot = mo_64_32(s->dflag);
3723             val = x86_ldub_code(env, s);
3724             if (b1) {
3725                 val &= 7;
3726                 rm = (modrm & 7) | REX_B(s);
3727                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3728                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3729             } else {
3730                 val &= 3;
3731                 rm = (modrm & 7);
3732                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3733                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3734             }
3735             reg = ((modrm >> 3) & 7) | REX_R(s);
3736             gen_op_mov_reg_v(s, ot, reg, s->T0);
3737             break;
3738         case 0x1d6: /* movq ea, xmm */
3739             if (mod != 3) {
3740                 gen_lea_modrm(env, s, modrm);
3741                 gen_stq_env_A0(s, offsetof(CPUX86State,
3742                                            xmm_regs[reg].ZMM_Q(0)));
3743             } else {
3744                 rm = (modrm & 7) | REX_B(s);
3745                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3746                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3747                 gen_op_movq_env_0(s,
3748                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3749             }
3750             break;
3751         case 0x2d6: /* movq2dq */
3752             gen_helper_enter_mmx(cpu_env);
3753             rm = (modrm & 7);
3754             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3755                         offsetof(CPUX86State,fpregs[rm].mmx));
3756             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3757             break;
3758         case 0x3d6: /* movdq2q */
3759             gen_helper_enter_mmx(cpu_env);
3760             rm = (modrm & 7) | REX_B(s);
3761             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3762                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3763             break;
3764         case 0xd7: /* pmovmskb */
3765         case 0x1d7:
3766             if (mod != 3)
3767                 goto illegal_op;
3768             if (b1) {
3769                 rm = (modrm & 7) | REX_B(s);
3770                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3771                                  offsetof(CPUX86State, xmm_regs[rm]));
3772                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3773             } else {
3774                 rm = (modrm & 7);
3775                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3776                                  offsetof(CPUX86State, fpregs[rm].mmx));
3777                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3778             }
3779             reg = ((modrm >> 3) & 7) | REX_R(s);
3780             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3781             break;
3782 
3783         case 0x138:
3784         case 0x038:
3785             b = modrm;
3786             if ((b & 0xf0) == 0xf0) {
3787                 goto do_0f_38_fx;
3788             }
3789             modrm = x86_ldub_code(env, s);
3790             rm = modrm & 7;
3791             reg = ((modrm >> 3) & 7) | REX_R(s);
3792             mod = (modrm >> 6) & 3;
3793             if (b1 >= 2) {
3794                 goto unknown_op;
3795             }
3796 
3797             sse_fn_epp = sse_op_table6[b].op[b1];
3798             if (!sse_fn_epp) {
3799                 goto unknown_op;
3800             }
3801             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3802                 goto illegal_op;
3803 
3804             if (b1) {
3805                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3806                 if (mod == 3) {
3807                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3808                 } else {
3809                     op2_offset = offsetof(CPUX86State,xmm_t0);
3810                     gen_lea_modrm(env, s, modrm);
3811                     switch (b) {
3812                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3813                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3814                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3815                         gen_ldq_env_A0(s, op2_offset +
3816                                         offsetof(ZMMReg, ZMM_Q(0)));
3817                         break;
3818                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3819                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3820                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3821                                             s->mem_index, MO_LEUL);
3822                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3823                                         offsetof(ZMMReg, ZMM_L(0)));
3824                         break;
3825                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3826                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3827                                            s->mem_index, MO_LEUW);
3828                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3829                                         offsetof(ZMMReg, ZMM_W(0)));
3830                         break;
3831                     case 0x2a:            /* movntqda */
3832                         gen_ldo_env_A0(s, op1_offset);
3833                         return;
3834                     default:
3835                         gen_ldo_env_A0(s, op2_offset);
3836                     }
3837                 }
3838             } else {
3839                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3840                 if (mod == 3) {
3841                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3842                 } else {
3843                     op2_offset = offsetof(CPUX86State,mmx_t0);
3844                     gen_lea_modrm(env, s, modrm);
3845                     gen_ldq_env_A0(s, op2_offset);
3846                 }
3847             }
3848             if (sse_fn_epp == SSE_SPECIAL) {
3849                 goto unknown_op;
3850             }
3851 
3852             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3853             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3854             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3855 
3856             if (b == 0x17) {
3857                 set_cc_op(s, CC_OP_EFLAGS);
3858             }
3859             break;
3860 
3861         case 0x238:
3862         case 0x338:
3863         do_0f_38_fx:
3864             /* Various integer extensions at 0f 38 f[0-f].  */
3865             b = modrm | (b1 << 8);
3866             modrm = x86_ldub_code(env, s);
3867             reg = ((modrm >> 3) & 7) | REX_R(s);
3868 
3869             switch (b) {
3870             case 0x3f0: /* crc32 Gd,Eb */
3871             case 0x3f1: /* crc32 Gd,Ey */
3872             do_crc32:
3873                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3874                     goto illegal_op;
3875                 }
3876                 if ((b & 0xff) == 0xf0) {
3877                     ot = MO_8;
3878                 } else if (s->dflag != MO_64) {
3879                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3880                 } else {
3881                     ot = MO_64;
3882                 }
3883 
3884                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3885                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3886                 gen_helper_crc32(s->T0, s->tmp2_i32,
3887                                  s->T0, tcg_const_i32(8 << ot));
3888 
3889                 ot = mo_64_32(s->dflag);
3890                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3891                 break;
3892 
3893             case 0x1f0: /* crc32 or movbe */
3894             case 0x1f1:
3895                 /* For these insns, the f3 prefix is supposed to have priority
3896                    over the 66 prefix, but that's not what we implement above
3897                    setting b1.  */
3898                 if (s->prefix & PREFIX_REPNZ) {
3899                     goto do_crc32;
3900                 }
3901                 /* FALLTHRU */
3902             case 0x0f0: /* movbe Gy,My */
3903             case 0x0f1: /* movbe My,Gy */
3904                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3905                     goto illegal_op;
3906                 }
3907                 if (s->dflag != MO_64) {
3908                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3909                 } else {
3910                     ot = MO_64;
3911                 }
3912 
3913                 gen_lea_modrm(env, s, modrm);
3914                 if ((b & 1) == 0) {
3915                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
3916                                        s->mem_index, ot | MO_BE);
3917                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3918                 } else {
3919                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3920                                        s->mem_index, ot | MO_BE);
3921                 }
3922                 break;
3923 
3924             case 0x0f2: /* andn Gy, By, Ey */
3925                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3926                     || !(s->prefix & PREFIX_VEX)
3927                     || s->vex_l != 0) {
3928                     goto illegal_op;
3929                 }
3930                 ot = mo_64_32(s->dflag);
3931                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3932                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3933                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3934                 gen_op_update1_cc(s);
3935                 set_cc_op(s, CC_OP_LOGICB + ot);
3936                 break;
3937 
3938             case 0x0f7: /* bextr Gy, Ey, By */
3939                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3940                     || !(s->prefix & PREFIX_VEX)
3941                     || s->vex_l != 0) {
3942                     goto illegal_op;
3943                 }
3944                 ot = mo_64_32(s->dflag);
3945                 {
3946                     TCGv bound, zero;
3947 
3948                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3949                     /* Extract START, and shift the operand.
3950                        Shifts larger than operand size get zeros.  */
3951                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3952                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3953 
3954                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3955                     zero = tcg_const_tl(0);
3956                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3957                                        s->T0, zero);
3958                     tcg_temp_free(zero);
3959 
3960                     /* Extract the LEN into a mask.  Lengths larger than
3961                        operand size get all ones.  */
3962                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3963                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3964                                        s->A0, bound);
3965                     tcg_temp_free(bound);
3966                     tcg_gen_movi_tl(s->T1, 1);
3967                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3968                     tcg_gen_subi_tl(s->T1, s->T1, 1);
3969                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
3970 
3971                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3972                     gen_op_update1_cc(s);
3973                     set_cc_op(s, CC_OP_LOGICB + ot);
3974                 }
3975                 break;
3976 
3977             case 0x0f5: /* bzhi Gy, Ey, By */
3978                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3979                     || !(s->prefix & PREFIX_VEX)
3980                     || s->vex_l != 0) {
3981                     goto illegal_op;
3982                 }
3983                 ot = mo_64_32(s->dflag);
3984                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3985                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3986                 {
3987                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3988                     /* Note that since we're using BMILG (in order to get O
3989                        cleared) we need to store the inverse into C.  */
3990                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3991                                        s->T1, bound);
3992                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3993                                        bound, bound, s->T1);
3994                     tcg_temp_free(bound);
3995                 }
3996                 tcg_gen_movi_tl(s->A0, -1);
3997                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3998                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3999                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4000                 gen_op_update1_cc(s);
4001                 set_cc_op(s, CC_OP_BMILGB + ot);
4002                 break;
4003 
4004             case 0x3f6: /* mulx By, Gy, rdx, Ey */
4005                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4006                     || !(s->prefix & PREFIX_VEX)
4007                     || s->vex_l != 0) {
4008                     goto illegal_op;
4009                 }
4010                 ot = mo_64_32(s->dflag);
4011                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4012                 switch (ot) {
4013                 default:
4014                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4015                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
4016                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4017                                       s->tmp2_i32, s->tmp3_i32);
4018                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
4019                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
4020                     break;
4021 #ifdef TARGET_X86_64
4022                 case MO_64:
4023                     tcg_gen_mulu2_i64(s->T0, s->T1,
4024                                       s->T0, cpu_regs[R_EDX]);
4025                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
4026                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
4027                     break;
4028 #endif
4029                 }
4030                 break;
4031 
4032             case 0x3f5: /* pdep Gy, By, Ey */
4033                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4034                     || !(s->prefix & PREFIX_VEX)
4035                     || s->vex_l != 0) {
4036                     goto illegal_op;
4037                 }
4038                 ot = mo_64_32(s->dflag);
4039                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4040                 /* Note that by zero-extending the source operand, we
4041                    automatically handle zero-extending the result.  */
4042                 if (ot == MO_64) {
4043                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4044                 } else {
4045                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4046                 }
4047                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
4048                 break;
4049 
4050             case 0x2f5: /* pext Gy, By, Ey */
4051                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4052                     || !(s->prefix & PREFIX_VEX)
4053                     || s->vex_l != 0) {
4054                     goto illegal_op;
4055                 }
4056                 ot = mo_64_32(s->dflag);
4057                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4058                 /* Note that by zero-extending the source operand, we
4059                    automatically handle zero-extending the result.  */
4060                 if (ot == MO_64) {
4061                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4062                 } else {
4063                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4064                 }
4065                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
4066                 break;
4067 
4068             case 0x1f6: /* adcx Gy, Ey */
4069             case 0x2f6: /* adox Gy, Ey */
4070                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
4071                     goto illegal_op;
4072                 } else {
4073                     TCGv carry_in, carry_out, zero;
4074                     int end_op;
4075 
4076                     ot = mo_64_32(s->dflag);
4077                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4078 
4079                     /* Re-use the carry-out from a previous round.  */
4080                     carry_in = NULL;
4081                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
4082                     switch (s->cc_op) {
4083                     case CC_OP_ADCX:
4084                         if (b == 0x1f6) {
4085                             carry_in = cpu_cc_dst;
4086                             end_op = CC_OP_ADCX;
4087                         } else {
4088                             end_op = CC_OP_ADCOX;
4089                         }
4090                         break;
4091                     case CC_OP_ADOX:
4092                         if (b == 0x1f6) {
4093                             end_op = CC_OP_ADCOX;
4094                         } else {
4095                             carry_in = cpu_cc_src2;
4096                             end_op = CC_OP_ADOX;
4097                         }
4098                         break;
4099                     case CC_OP_ADCOX:
4100                         end_op = CC_OP_ADCOX;
4101                         carry_in = carry_out;
4102                         break;
4103                     default:
4104                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4105                         break;
4106                     }
4107                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4108                     if (!carry_in) {
4109                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4110                             gen_compute_eflags(s);
4111                         }
4112                         carry_in = s->tmp0;
4113                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4114                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4115                     }
4116 
4117                     switch (ot) {
4118 #ifdef TARGET_X86_64
4119                     case MO_32:
4120                         /* If we know TL is 64-bit, and we want a 32-bit
4121                            result, just do everything in 64-bit arithmetic.  */
4122                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4123                         tcg_gen_ext32u_i64(s->T0, s->T0);
4124                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4125                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4126                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4127                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4128                         break;
4129 #endif
4130                     default:
4131                         /* Otherwise compute the carry-out in two steps.  */
4132                         zero = tcg_const_tl(0);
4133                         tcg_gen_add2_tl(s->T0, carry_out,
4134                                         s->T0, zero,
4135                                         carry_in, zero);
4136                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4137                                         cpu_regs[reg], carry_out,
4138                                         s->T0, zero);
4139                         tcg_temp_free(zero);
4140                         break;
4141                     }
4142                     set_cc_op(s, end_op);
4143                 }
4144                 break;
4145 
4146             case 0x1f7: /* shlx Gy, Ey, By */
4147             case 0x2f7: /* sarx Gy, Ey, By */
4148             case 0x3f7: /* shrx Gy, Ey, By */
4149                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4150                     || !(s->prefix & PREFIX_VEX)
4151                     || s->vex_l != 0) {
4152                     goto illegal_op;
4153                 }
4154                 ot = mo_64_32(s->dflag);
4155                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4156                 if (ot == MO_64) {
4157                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4158                 } else {
4159                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4160                 }
4161                 if (b == 0x1f7) {
4162                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4163                 } else if (b == 0x2f7) {
4164                     if (ot != MO_64) {
4165                         tcg_gen_ext32s_tl(s->T0, s->T0);
4166                     }
4167                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4168                 } else {
4169                     if (ot != MO_64) {
4170                         tcg_gen_ext32u_tl(s->T0, s->T0);
4171                     }
4172                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4173                 }
4174                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4175                 break;
4176 
4177             case 0x0f3:
4178             case 0x1f3:
4179             case 0x2f3:
4180             case 0x3f3: /* Group 17 */
4181                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4182                     || !(s->prefix & PREFIX_VEX)
4183                     || s->vex_l != 0) {
4184                     goto illegal_op;
4185                 }
4186                 ot = mo_64_32(s->dflag);
4187                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4188 
4189                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4190                 switch (reg & 7) {
4191                 case 1: /* blsr By,Ey */
4192                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4193                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4194                     break;
4195                 case 2: /* blsmsk By,Ey */
4196                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4197                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4198                     break;
4199                 case 3: /* blsi By, Ey */
4200                     tcg_gen_neg_tl(s->T1, s->T0);
4201                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4202                     break;
4203                 default:
4204                     goto unknown_op;
4205                 }
4206                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4207                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4208                 set_cc_op(s, CC_OP_BMILGB + ot);
4209                 break;
4210 
4211             default:
4212                 goto unknown_op;
4213             }
4214             break;
4215 
4216         case 0x03a:
4217         case 0x13a:
4218             b = modrm;
4219             modrm = x86_ldub_code(env, s);
4220             rm = modrm & 7;
4221             reg = ((modrm >> 3) & 7) | REX_R(s);
4222             mod = (modrm >> 6) & 3;
4223             if (b1 >= 2) {
4224                 goto unknown_op;
4225             }
4226 
4227             sse_fn_eppi = sse_op_table7[b].op[b1];
4228             if (!sse_fn_eppi) {
4229                 goto unknown_op;
4230             }
4231             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4232                 goto illegal_op;
4233 
4234             s->rip_offset = 1;
4235 
4236             if (sse_fn_eppi == SSE_SPECIAL) {
4237                 ot = mo_64_32(s->dflag);
4238                 rm = (modrm & 7) | REX_B(s);
4239                 if (mod != 3)
4240                     gen_lea_modrm(env, s, modrm);
4241                 reg = ((modrm >> 3) & 7) | REX_R(s);
4242                 val = x86_ldub_code(env, s);
4243                 switch (b) {
4244                 case 0x14: /* pextrb */
4245                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4246                                             xmm_regs[reg].ZMM_B(val & 15)));
4247                     if (mod == 3) {
4248                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4249                     } else {
4250                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4251                                            s->mem_index, MO_UB);
4252                     }
4253                     break;
4254                 case 0x15: /* pextrw */
4255                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4256                                             xmm_regs[reg].ZMM_W(val & 7)));
4257                     if (mod == 3) {
4258                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4259                     } else {
4260                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4261                                            s->mem_index, MO_LEUW);
4262                     }
4263                     break;
4264                 case 0x16:
4265                     if (ot == MO_32) { /* pextrd */
4266                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4267                                         offsetof(CPUX86State,
4268                                                 xmm_regs[reg].ZMM_L(val & 3)));
4269                         if (mod == 3) {
4270                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4271                         } else {
4272                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4273                                                 s->mem_index, MO_LEUL);
4274                         }
4275                     } else { /* pextrq */
4276 #ifdef TARGET_X86_64
4277                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4278                                         offsetof(CPUX86State,
4279                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4280                         if (mod == 3) {
4281                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4282                         } else {
4283                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4284                                                 s->mem_index, MO_LEQ);
4285                         }
4286 #else
4287                         goto illegal_op;
4288 #endif
4289                     }
4290                     break;
4291                 case 0x17: /* extractps */
4292                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4293                                             xmm_regs[reg].ZMM_L(val & 3)));
4294                     if (mod == 3) {
4295                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4296                     } else {
4297                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4298                                            s->mem_index, MO_LEUL);
4299                     }
4300                     break;
4301                 case 0x20: /* pinsrb */
4302                     if (mod == 3) {
4303                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4304                     } else {
4305                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4306                                            s->mem_index, MO_UB);
4307                     }
4308                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4309                                             xmm_regs[reg].ZMM_B(val & 15)));
4310                     break;
4311                 case 0x21: /* insertps */
4312                     if (mod == 3) {
4313                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4314                                         offsetof(CPUX86State,xmm_regs[rm]
4315                                                 .ZMM_L((val >> 6) & 3)));
4316                     } else {
4317                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4318                                             s->mem_index, MO_LEUL);
4319                     }
4320                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4321                                     offsetof(CPUX86State,xmm_regs[reg]
4322                                             .ZMM_L((val >> 4) & 3)));
4323                     if ((val >> 0) & 1)
4324                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4325                                         cpu_env, offsetof(CPUX86State,
4326                                                 xmm_regs[reg].ZMM_L(0)));
4327                     if ((val >> 1) & 1)
4328                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4329                                         cpu_env, offsetof(CPUX86State,
4330                                                 xmm_regs[reg].ZMM_L(1)));
4331                     if ((val >> 2) & 1)
4332                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4333                                         cpu_env, offsetof(CPUX86State,
4334                                                 xmm_regs[reg].ZMM_L(2)));
4335                     if ((val >> 3) & 1)
4336                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4337                                         cpu_env, offsetof(CPUX86State,
4338                                                 xmm_regs[reg].ZMM_L(3)));
4339                     break;
4340                 case 0x22:
4341                     if (ot == MO_32) { /* pinsrd */
4342                         if (mod == 3) {
4343                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4344                         } else {
4345                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4346                                                 s->mem_index, MO_LEUL);
4347                         }
4348                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4349                                         offsetof(CPUX86State,
4350                                                 xmm_regs[reg].ZMM_L(val & 3)));
4351                     } else { /* pinsrq */
4352 #ifdef TARGET_X86_64
4353                         if (mod == 3) {
4354                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4355                         } else {
4356                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4357                                                 s->mem_index, MO_LEQ);
4358                         }
4359                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4360                                         offsetof(CPUX86State,
4361                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4362 #else
4363                         goto illegal_op;
4364 #endif
4365                     }
4366                     break;
4367                 }
4368                 return;
4369             }
4370 
4371             if (b1) {
4372                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4373                 if (mod == 3) {
4374                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4375                 } else {
4376                     op2_offset = offsetof(CPUX86State,xmm_t0);
4377                     gen_lea_modrm(env, s, modrm);
4378                     gen_ldo_env_A0(s, op2_offset);
4379                 }
4380             } else {
4381                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4382                 if (mod == 3) {
4383                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4384                 } else {
4385                     op2_offset = offsetof(CPUX86State,mmx_t0);
4386                     gen_lea_modrm(env, s, modrm);
4387                     gen_ldq_env_A0(s, op2_offset);
4388                 }
4389             }
4390             val = x86_ldub_code(env, s);
4391 
4392             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4393                 set_cc_op(s, CC_OP_EFLAGS);
4394 
4395                 if (s->dflag == MO_64) {
4396                     /* The helper must use entire 64-bit gp registers */
4397                     val |= 1 << 8;
4398                 }
4399             }
4400 
4401             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4402             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4403             sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4404             break;
4405 
4406         case 0x33a:
4407             /* Various integer extensions at 0f 3a f[0-f].  */
4408             b = modrm | (b1 << 8);
4409             modrm = x86_ldub_code(env, s);
4410             reg = ((modrm >> 3) & 7) | REX_R(s);
4411 
4412             switch (b) {
4413             case 0x3f0: /* rorx Gy,Ey, Ib */
4414                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4415                     || !(s->prefix & PREFIX_VEX)
4416                     || s->vex_l != 0) {
4417                     goto illegal_op;
4418                 }
4419                 ot = mo_64_32(s->dflag);
4420                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4421                 b = x86_ldub_code(env, s);
4422                 if (ot == MO_64) {
4423                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4424                 } else {
4425                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4426                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4427                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4428                 }
4429                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4430                 break;
4431 
4432             default:
4433                 goto unknown_op;
4434             }
4435             break;
4436 
4437         default:
4438         unknown_op:
4439             gen_unknown_opcode(env, s);
4440             return;
4441         }
4442     } else {
4443         /* generic MMX or SSE operation */
4444         switch(b) {
4445         case 0x70: /* pshufx insn */
4446         case 0xc6: /* pshufx insn */
4447         case 0xc2: /* compare insns */
4448             s->rip_offset = 1;
4449             break;
4450         default:
4451             break;
4452         }
4453         if (is_xmm) {
4454             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4455             if (mod != 3) {
4456                 int sz = 4;
4457 
4458                 gen_lea_modrm(env, s, modrm);
4459                 op2_offset = offsetof(CPUX86State,xmm_t0);
4460 
4461                 switch (b) {
4462                 case 0x50 ... 0x5a:
4463                 case 0x5c ... 0x5f:
4464                 case 0xc2:
4465                     /* Most sse scalar operations.  */
4466                     if (b1 == 2) {
4467                         sz = 2;
4468                     } else if (b1 == 3) {
4469                         sz = 3;
4470                     }
4471                     break;
4472 
4473                 case 0x2e:  /* ucomis[sd] */
4474                 case 0x2f:  /* comis[sd] */
4475                     if (b1 == 0) {
4476                         sz = 2;
4477                     } else {
4478                         sz = 3;
4479                     }
4480                     break;
4481                 }
4482 
4483                 switch (sz) {
4484                 case 2:
4485                     /* 32 bit access */
4486                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4487                     tcg_gen_st32_tl(s->T0, cpu_env,
4488                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4489                     break;
4490                 case 3:
4491                     /* 64 bit access */
4492                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4493                     break;
4494                 default:
4495                     /* 128 bit access */
4496                     gen_ldo_env_A0(s, op2_offset);
4497                     break;
4498                 }
4499             } else {
4500                 rm = (modrm & 7) | REX_B(s);
4501                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4502             }
4503         } else {
4504             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4505             if (mod != 3) {
4506                 gen_lea_modrm(env, s, modrm);
4507                 op2_offset = offsetof(CPUX86State,mmx_t0);
4508                 gen_ldq_env_A0(s, op2_offset);
4509             } else {
4510                 rm = (modrm & 7);
4511                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4512             }
4513         }
4514         switch(b) {
4515         case 0x0f: /* 3DNow! data insns */
4516             val = x86_ldub_code(env, s);
4517             sse_fn_epp = sse_op_table5[val];
4518             if (!sse_fn_epp) {
4519                 goto unknown_op;
4520             }
4521             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4522                 goto illegal_op;
4523             }
4524             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4525             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4526             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4527             break;
4528         case 0x70: /* pshufx insn */
4529         case 0xc6: /* pshufx insn */
4530             val = x86_ldub_code(env, s);
4531             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4532             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4533             /* XXX: introduce a new table? */
4534             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4535             sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4536             break;
4537         case 0xc2:
4538             /* compare insns */
4539             val = x86_ldub_code(env, s);
4540             if (val >= 8)
4541                 goto unknown_op;
4542             sse_fn_epp = sse_op_table4[val][b1];
4543 
4544             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4545             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4546             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4547             break;
4548         case 0xf7:
4549             /* maskmov : we must prepare A0 */
4550             if (mod != 3)
4551                 goto illegal_op;
4552             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4553             gen_extu(s->aflag, s->A0);
4554             gen_add_A0_ds_seg(s);
4555 
4556             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4557             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4558             /* XXX: introduce a new table? */
4559             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4560             sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4561             break;
4562         default:
4563             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4564             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4565             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4566             break;
4567         }
4568         if (b == 0x2e || b == 0x2f) {
4569             set_cc_op(s, CC_OP_EFLAGS);
4570         }
4571     }
4572 }
4573 
4574 /* convert one instruction. s->base.is_jmp is set if the translation must
4575    be stopped. Return the next pc value */
4576 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4577 {
4578     CPUX86State *env = cpu->env_ptr;
4579     int b, prefixes;
4580     int shift;
4581     MemOp ot, aflag, dflag;
4582     int modrm, reg, rm, mod, op, opreg, val;
4583     target_ulong next_eip, tval;
4584     target_ulong pc_start = s->base.pc_next;
4585 
4586     s->pc_start = s->pc = pc_start;
4587     s->override = -1;
4588 #ifdef TARGET_X86_64
4589     s->rex_w = false;
4590     s->rex_r = 0;
4591     s->rex_x = 0;
4592     s->rex_b = 0;
4593 #endif
4594     s->rip_offset = 0; /* for relative ip address */
4595     s->vex_l = 0;
4596     s->vex_v = 0;
4597     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4598         gen_exception_gpf(s);
4599         return s->pc;
4600     }
4601 
4602     prefixes = 0;
4603 
4604  next_byte:
4605     b = x86_ldub_code(env, s);
4606     /* Collect prefixes.  */
4607     switch (b) {
4608     case 0xf3:
4609         prefixes |= PREFIX_REPZ;
4610         goto next_byte;
4611     case 0xf2:
4612         prefixes |= PREFIX_REPNZ;
4613         goto next_byte;
4614     case 0xf0:
4615         prefixes |= PREFIX_LOCK;
4616         goto next_byte;
4617     case 0x2e:
4618         s->override = R_CS;
4619         goto next_byte;
4620     case 0x36:
4621         s->override = R_SS;
4622         goto next_byte;
4623     case 0x3e:
4624         s->override = R_DS;
4625         goto next_byte;
4626     case 0x26:
4627         s->override = R_ES;
4628         goto next_byte;
4629     case 0x64:
4630         s->override = R_FS;
4631         goto next_byte;
4632     case 0x65:
4633         s->override = R_GS;
4634         goto next_byte;
4635     case 0x66:
4636         prefixes |= PREFIX_DATA;
4637         goto next_byte;
4638     case 0x67:
4639         prefixes |= PREFIX_ADR;
4640         goto next_byte;
4641 #ifdef TARGET_X86_64
4642     case 0x40 ... 0x4f:
4643         if (CODE64(s)) {
4644             /* REX prefix */
4645             prefixes |= PREFIX_REX;
4646             s->rex_w = (b >> 3) & 1;
4647             s->rex_r = (b & 0x4) << 1;
4648             s->rex_x = (b & 0x2) << 2;
4649             s->rex_b = (b & 0x1) << 3;
4650             goto next_byte;
4651         }
4652         break;
4653 #endif
4654     case 0xc5: /* 2-byte VEX */
4655     case 0xc4: /* 3-byte VEX */
4656         /* VEX prefixes cannot be used except in 32-bit mode.
4657            Otherwise the instruction is LES or LDS.  */
4658         if (CODE32(s) && !VM86(s)) {
4659             static const int pp_prefix[4] = {
4660                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4661             };
4662             int vex3, vex2 = x86_ldub_code(env, s);
4663 
4664             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4665                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4666                    otherwise the instruction is LES or LDS.  */
4667                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4668                 break;
4669             }
4670 
4671             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4672             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4673                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
4674                 goto illegal_op;
4675             }
4676 #ifdef TARGET_X86_64
4677             s->rex_r = (~vex2 >> 4) & 8;
4678 #endif
4679             if (b == 0xc5) {
4680                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4681                 vex3 = vex2;
4682                 b = x86_ldub_code(env, s) | 0x100;
4683             } else {
4684                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4685                 vex3 = x86_ldub_code(env, s);
4686 #ifdef TARGET_X86_64
4687                 s->rex_x = (~vex2 >> 3) & 8;
4688                 s->rex_b = (~vex2 >> 2) & 8;
4689                 s->rex_w = (vex3 >> 7) & 1;
4690 #endif
4691                 switch (vex2 & 0x1f) {
4692                 case 0x01: /* Implied 0f leading opcode bytes.  */
4693                     b = x86_ldub_code(env, s) | 0x100;
4694                     break;
4695                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4696                     b = 0x138;
4697                     break;
4698                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4699                     b = 0x13a;
4700                     break;
4701                 default:   /* Reserved for future use.  */
4702                     goto unknown_op;
4703                 }
4704             }
4705             s->vex_v = (~vex3 >> 3) & 0xf;
4706             s->vex_l = (vex3 >> 2) & 1;
4707             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4708         }
4709         break;
4710     }
4711 
4712     /* Post-process prefixes.  */
4713     if (CODE64(s)) {
4714         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4715            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4716            over 0x66 if both are present.  */
4717         dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4718         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4719         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4720     } else {
4721         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4722         if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
4723             dflag = MO_32;
4724         } else {
4725             dflag = MO_16;
4726         }
4727         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4728         if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
4729             aflag = MO_32;
4730         }  else {
4731             aflag = MO_16;
4732         }
4733     }
4734 
4735     s->prefix = prefixes;
4736     s->aflag = aflag;
4737     s->dflag = dflag;
4738 
4739     /* now check op code */
4740  reswitch:
4741     switch(b) {
4742     case 0x0f:
4743         /**************************/
4744         /* extended op code */
4745         b = x86_ldub_code(env, s) | 0x100;
4746         goto reswitch;
4747 
4748         /**************************/
4749         /* arith & logic */
4750     case 0x00 ... 0x05:
4751     case 0x08 ... 0x0d:
4752     case 0x10 ... 0x15:
4753     case 0x18 ... 0x1d:
4754     case 0x20 ... 0x25:
4755     case 0x28 ... 0x2d:
4756     case 0x30 ... 0x35:
4757     case 0x38 ... 0x3d:
4758         {
4759             int op, f, val;
4760             op = (b >> 3) & 7;
4761             f = (b >> 1) & 3;
4762 
4763             ot = mo_b_d(b, dflag);
4764 
4765             switch(f) {
4766             case 0: /* OP Ev, Gv */
4767                 modrm = x86_ldub_code(env, s);
4768                 reg = ((modrm >> 3) & 7) | REX_R(s);
4769                 mod = (modrm >> 6) & 3;
4770                 rm = (modrm & 7) | REX_B(s);
4771                 if (mod != 3) {
4772                     gen_lea_modrm(env, s, modrm);
4773                     opreg = OR_TMP0;
4774                 } else if (op == OP_XORL && rm == reg) {
4775                 xor_zero:
4776                     /* xor reg, reg optimisation */
4777                     set_cc_op(s, CC_OP_CLR);
4778                     tcg_gen_movi_tl(s->T0, 0);
4779                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4780                     break;
4781                 } else {
4782                     opreg = rm;
4783                 }
4784                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4785                 gen_op(s, op, ot, opreg);
4786                 break;
4787             case 1: /* OP Gv, Ev */
4788                 modrm = x86_ldub_code(env, s);
4789                 mod = (modrm >> 6) & 3;
4790                 reg = ((modrm >> 3) & 7) | REX_R(s);
4791                 rm = (modrm & 7) | REX_B(s);
4792                 if (mod != 3) {
4793                     gen_lea_modrm(env, s, modrm);
4794                     gen_op_ld_v(s, ot, s->T1, s->A0);
4795                 } else if (op == OP_XORL && rm == reg) {
4796                     goto xor_zero;
4797                 } else {
4798                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4799                 }
4800                 gen_op(s, op, ot, reg);
4801                 break;
4802             case 2: /* OP A, Iv */
4803                 val = insn_get(env, s, ot);
4804                 tcg_gen_movi_tl(s->T1, val);
4805                 gen_op(s, op, ot, OR_EAX);
4806                 break;
4807             }
4808         }
4809         break;
4810 
4811     case 0x82:
4812         if (CODE64(s))
4813             goto illegal_op;
4814         /* fall through */
4815     case 0x80: /* GRP1 */
4816     case 0x81:
4817     case 0x83:
4818         {
4819             int val;
4820 
4821             ot = mo_b_d(b, dflag);
4822 
4823             modrm = x86_ldub_code(env, s);
4824             mod = (modrm >> 6) & 3;
4825             rm = (modrm & 7) | REX_B(s);
4826             op = (modrm >> 3) & 7;
4827 
4828             if (mod != 3) {
4829                 if (b == 0x83)
4830                     s->rip_offset = 1;
4831                 else
4832                     s->rip_offset = insn_const_size(ot);
4833                 gen_lea_modrm(env, s, modrm);
4834                 opreg = OR_TMP0;
4835             } else {
4836                 opreg = rm;
4837             }
4838 
4839             switch(b) {
4840             default:
4841             case 0x80:
4842             case 0x81:
4843             case 0x82:
4844                 val = insn_get(env, s, ot);
4845                 break;
4846             case 0x83:
4847                 val = (int8_t)insn_get(env, s, MO_8);
4848                 break;
4849             }
4850             tcg_gen_movi_tl(s->T1, val);
4851             gen_op(s, op, ot, opreg);
4852         }
4853         break;
4854 
4855         /**************************/
4856         /* inc, dec, and other misc arith */
4857     case 0x40 ... 0x47: /* inc Gv */
4858         ot = dflag;
4859         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4860         break;
4861     case 0x48 ... 0x4f: /* dec Gv */
4862         ot = dflag;
4863         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4864         break;
4865     case 0xf6: /* GRP3 */
4866     case 0xf7:
4867         ot = mo_b_d(b, dflag);
4868 
4869         modrm = x86_ldub_code(env, s);
4870         mod = (modrm >> 6) & 3;
4871         rm = (modrm & 7) | REX_B(s);
4872         op = (modrm >> 3) & 7;
4873         if (mod != 3) {
4874             if (op == 0) {
4875                 s->rip_offset = insn_const_size(ot);
4876             }
4877             gen_lea_modrm(env, s, modrm);
4878             /* For those below that handle locked memory, don't load here.  */
4879             if (!(s->prefix & PREFIX_LOCK)
4880                 || op != 2) {
4881                 gen_op_ld_v(s, ot, s->T0, s->A0);
4882             }
4883         } else {
4884             gen_op_mov_v_reg(s, ot, s->T0, rm);
4885         }
4886 
4887         switch(op) {
4888         case 0: /* test */
4889             val = insn_get(env, s, ot);
4890             tcg_gen_movi_tl(s->T1, val);
4891             gen_op_testl_T0_T1_cc(s);
4892             set_cc_op(s, CC_OP_LOGICB + ot);
4893             break;
4894         case 2: /* not */
4895             if (s->prefix & PREFIX_LOCK) {
4896                 if (mod == 3) {
4897                     goto illegal_op;
4898                 }
4899                 tcg_gen_movi_tl(s->T0, ~0);
4900                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4901                                             s->mem_index, ot | MO_LE);
4902             } else {
4903                 tcg_gen_not_tl(s->T0, s->T0);
4904                 if (mod != 3) {
4905                     gen_op_st_v(s, ot, s->T0, s->A0);
4906                 } else {
4907                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4908                 }
4909             }
4910             break;
4911         case 3: /* neg */
4912             if (s->prefix & PREFIX_LOCK) {
4913                 TCGLabel *label1;
4914                 TCGv a0, t0, t1, t2;
4915 
4916                 if (mod == 3) {
4917                     goto illegal_op;
4918                 }
4919                 a0 = tcg_temp_local_new();
4920                 t0 = tcg_temp_local_new();
4921                 label1 = gen_new_label();
4922 
4923                 tcg_gen_mov_tl(a0, s->A0);
4924                 tcg_gen_mov_tl(t0, s->T0);
4925 
4926                 gen_set_label(label1);
4927                 t1 = tcg_temp_new();
4928                 t2 = tcg_temp_new();
4929                 tcg_gen_mov_tl(t2, t0);
4930                 tcg_gen_neg_tl(t1, t0);
4931                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4932                                           s->mem_index, ot | MO_LE);
4933                 tcg_temp_free(t1);
4934                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4935 
4936                 tcg_temp_free(t2);
4937                 tcg_temp_free(a0);
4938                 tcg_gen_mov_tl(s->T0, t0);
4939                 tcg_temp_free(t0);
4940             } else {
4941                 tcg_gen_neg_tl(s->T0, s->T0);
4942                 if (mod != 3) {
4943                     gen_op_st_v(s, ot, s->T0, s->A0);
4944                 } else {
4945                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4946                 }
4947             }
4948             gen_op_update_neg_cc(s);
4949             set_cc_op(s, CC_OP_SUBB + ot);
4950             break;
4951         case 4: /* mul */
4952             switch(ot) {
4953             case MO_8:
4954                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4955                 tcg_gen_ext8u_tl(s->T0, s->T0);
4956                 tcg_gen_ext8u_tl(s->T1, s->T1);
4957                 /* XXX: use 32 bit mul which could be faster */
4958                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4959                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4960                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4961                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4962                 set_cc_op(s, CC_OP_MULB);
4963                 break;
4964             case MO_16:
4965                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4966                 tcg_gen_ext16u_tl(s->T0, s->T0);
4967                 tcg_gen_ext16u_tl(s->T1, s->T1);
4968                 /* XXX: use 32 bit mul which could be faster */
4969                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4970                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4971                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4972                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4973                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4974                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4975                 set_cc_op(s, CC_OP_MULW);
4976                 break;
4977             default:
4978             case MO_32:
4979                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4980                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4981                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4982                                   s->tmp2_i32, s->tmp3_i32);
4983                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4984                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4985                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4986                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4987                 set_cc_op(s, CC_OP_MULL);
4988                 break;
4989 #ifdef TARGET_X86_64
4990             case MO_64:
4991                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4992                                   s->T0, cpu_regs[R_EAX]);
4993                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4994                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4995                 set_cc_op(s, CC_OP_MULQ);
4996                 break;
4997 #endif
4998             }
4999             break;
5000         case 5: /* imul */
5001             switch(ot) {
5002             case MO_8:
5003                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
5004                 tcg_gen_ext8s_tl(s->T0, s->T0);
5005                 tcg_gen_ext8s_tl(s->T1, s->T1);
5006                 /* XXX: use 32 bit mul which could be faster */
5007                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5008                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5009                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5010                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
5011                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5012                 set_cc_op(s, CC_OP_MULB);
5013                 break;
5014             case MO_16:
5015                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
5016                 tcg_gen_ext16s_tl(s->T0, s->T0);
5017                 tcg_gen_ext16s_tl(s->T1, s->T1);
5018                 /* XXX: use 32 bit mul which could be faster */
5019                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5020                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5021                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5022                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
5023                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5024                 tcg_gen_shri_tl(s->T0, s->T0, 16);
5025                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5026                 set_cc_op(s, CC_OP_MULW);
5027                 break;
5028             default:
5029             case MO_32:
5030                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5031                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5032                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5033                                   s->tmp2_i32, s->tmp3_i32);
5034                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5035                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5036                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5037                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5038                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5039                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5040                 set_cc_op(s, CC_OP_MULL);
5041                 break;
5042 #ifdef TARGET_X86_64
5043             case MO_64:
5044                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5045                                   s->T0, cpu_regs[R_EAX]);
5046                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5047                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
5048                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
5049                 set_cc_op(s, CC_OP_MULQ);
5050                 break;
5051 #endif
5052             }
5053             break;
5054         case 6: /* div */
5055             switch(ot) {
5056             case MO_8:
5057                 gen_helper_divb_AL(cpu_env, s->T0);
5058                 break;
5059             case MO_16:
5060                 gen_helper_divw_AX(cpu_env, s->T0);
5061                 break;
5062             default:
5063             case MO_32:
5064                 gen_helper_divl_EAX(cpu_env, s->T0);
5065                 break;
5066 #ifdef TARGET_X86_64
5067             case MO_64:
5068                 gen_helper_divq_EAX(cpu_env, s->T0);
5069                 break;
5070 #endif
5071             }
5072             break;
5073         case 7: /* idiv */
5074             switch(ot) {
5075             case MO_8:
5076                 gen_helper_idivb_AL(cpu_env, s->T0);
5077                 break;
5078             case MO_16:
5079                 gen_helper_idivw_AX(cpu_env, s->T0);
5080                 break;
5081             default:
5082             case MO_32:
5083                 gen_helper_idivl_EAX(cpu_env, s->T0);
5084                 break;
5085 #ifdef TARGET_X86_64
5086             case MO_64:
5087                 gen_helper_idivq_EAX(cpu_env, s->T0);
5088                 break;
5089 #endif
5090             }
5091             break;
5092         default:
5093             goto unknown_op;
5094         }
5095         break;
5096 
5097     case 0xfe: /* GRP4 */
5098     case 0xff: /* GRP5 */
5099         ot = mo_b_d(b, dflag);
5100 
5101         modrm = x86_ldub_code(env, s);
5102         mod = (modrm >> 6) & 3;
5103         rm = (modrm & 7) | REX_B(s);
5104         op = (modrm >> 3) & 7;
5105         if (op >= 2 && b == 0xfe) {
5106             goto unknown_op;
5107         }
5108         if (CODE64(s)) {
5109             if (op == 2 || op == 4) {
5110                 /* operand size for jumps is 64 bit */
5111                 ot = MO_64;
5112             } else if (op == 3 || op == 5) {
5113                 ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
5114             } else if (op == 6) {
5115                 /* default push size is 64 bit */
5116                 ot = mo_pushpop(s, dflag);
5117             }
5118         }
5119         if (mod != 3) {
5120             gen_lea_modrm(env, s, modrm);
5121             if (op >= 2 && op != 3 && op != 5)
5122                 gen_op_ld_v(s, ot, s->T0, s->A0);
5123         } else {
5124             gen_op_mov_v_reg(s, ot, s->T0, rm);
5125         }
5126 
5127         switch(op) {
5128         case 0: /* inc Ev */
5129             if (mod != 3)
5130                 opreg = OR_TMP0;
5131             else
5132                 opreg = rm;
5133             gen_inc(s, ot, opreg, 1);
5134             break;
5135         case 1: /* dec Ev */
5136             if (mod != 3)
5137                 opreg = OR_TMP0;
5138             else
5139                 opreg = rm;
5140             gen_inc(s, ot, opreg, -1);
5141             break;
5142         case 2: /* call Ev */
5143             /* XXX: optimize if memory (no 'and' is necessary) */
5144             if (dflag == MO_16) {
5145                 tcg_gen_ext16u_tl(s->T0, s->T0);
5146             }
5147             next_eip = s->pc - s->cs_base;
5148             tcg_gen_movi_tl(s->T1, next_eip);
5149             gen_push_v(s, s->T1);
5150             gen_op_jmp_v(s->T0);
5151             gen_bnd_jmp(s);
5152             gen_jr(s, s->T0);
5153             break;
5154         case 3: /* lcall Ev */
5155             if (mod == 3) {
5156                 goto illegal_op;
5157             }
5158             gen_op_ld_v(s, ot, s->T1, s->A0);
5159             gen_add_A0_im(s, 1 << ot);
5160             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5161         do_lcall:
5162             if (PE(s) && !VM86(s)) {
5163                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5164                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5165                                            tcg_const_i32(dflag - 1),
5166                                            tcg_const_tl(s->pc - s->cs_base));
5167             } else {
5168                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5169                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5170                                       tcg_const_i32(dflag - 1),
5171                                       tcg_const_i32(s->pc - s->cs_base));
5172             }
5173             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5174             gen_jr(s, s->tmp4);
5175             break;
5176         case 4: /* jmp Ev */
5177             if (dflag == MO_16) {
5178                 tcg_gen_ext16u_tl(s->T0, s->T0);
5179             }
5180             gen_op_jmp_v(s->T0);
5181             gen_bnd_jmp(s);
5182             gen_jr(s, s->T0);
5183             break;
5184         case 5: /* ljmp Ev */
5185             if (mod == 3) {
5186                 goto illegal_op;
5187             }
5188             gen_op_ld_v(s, ot, s->T1, s->A0);
5189             gen_add_A0_im(s, 1 << ot);
5190             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5191         do_ljmp:
5192             if (PE(s) && !VM86(s)) {
5193                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5194                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5195                                           tcg_const_tl(s->pc - s->cs_base));
5196             } else {
5197                 gen_op_movl_seg_T0_vm(s, R_CS);
5198                 gen_op_jmp_v(s->T1);
5199             }
5200             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5201             gen_jr(s, s->tmp4);
5202             break;
5203         case 6: /* push Ev */
5204             gen_push_v(s, s->T0);
5205             break;
5206         default:
5207             goto unknown_op;
5208         }
5209         break;
5210 
5211     case 0x84: /* test Ev, Gv */
5212     case 0x85:
5213         ot = mo_b_d(b, dflag);
5214 
5215         modrm = x86_ldub_code(env, s);
5216         reg = ((modrm >> 3) & 7) | REX_R(s);
5217 
5218         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5219         gen_op_mov_v_reg(s, ot, s->T1, reg);
5220         gen_op_testl_T0_T1_cc(s);
5221         set_cc_op(s, CC_OP_LOGICB + ot);
5222         break;
5223 
5224     case 0xa8: /* test eAX, Iv */
5225     case 0xa9:
5226         ot = mo_b_d(b, dflag);
5227         val = insn_get(env, s, ot);
5228 
5229         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5230         tcg_gen_movi_tl(s->T1, val);
5231         gen_op_testl_T0_T1_cc(s);
5232         set_cc_op(s, CC_OP_LOGICB + ot);
5233         break;
5234 
5235     case 0x98: /* CWDE/CBW */
5236         switch (dflag) {
5237 #ifdef TARGET_X86_64
5238         case MO_64:
5239             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5240             tcg_gen_ext32s_tl(s->T0, s->T0);
5241             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5242             break;
5243 #endif
5244         case MO_32:
5245             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5246             tcg_gen_ext16s_tl(s->T0, s->T0);
5247             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5248             break;
5249         case MO_16:
5250             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5251             tcg_gen_ext8s_tl(s->T0, s->T0);
5252             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5253             break;
5254         default:
5255             tcg_abort();
5256         }
5257         break;
5258     case 0x99: /* CDQ/CWD */
5259         switch (dflag) {
5260 #ifdef TARGET_X86_64
5261         case MO_64:
5262             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5263             tcg_gen_sari_tl(s->T0, s->T0, 63);
5264             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5265             break;
5266 #endif
5267         case MO_32:
5268             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5269             tcg_gen_ext32s_tl(s->T0, s->T0);
5270             tcg_gen_sari_tl(s->T0, s->T0, 31);
5271             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5272             break;
5273         case MO_16:
5274             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5275             tcg_gen_ext16s_tl(s->T0, s->T0);
5276             tcg_gen_sari_tl(s->T0, s->T0, 15);
5277             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5278             break;
5279         default:
5280             tcg_abort();
5281         }
5282         break;
5283     case 0x1af: /* imul Gv, Ev */
5284     case 0x69: /* imul Gv, Ev, I */
5285     case 0x6b:
5286         ot = dflag;
5287         modrm = x86_ldub_code(env, s);
5288         reg = ((modrm >> 3) & 7) | REX_R(s);
5289         if (b == 0x69)
5290             s->rip_offset = insn_const_size(ot);
5291         else if (b == 0x6b)
5292             s->rip_offset = 1;
5293         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5294         if (b == 0x69) {
5295             val = insn_get(env, s, ot);
5296             tcg_gen_movi_tl(s->T1, val);
5297         } else if (b == 0x6b) {
5298             val = (int8_t)insn_get(env, s, MO_8);
5299             tcg_gen_movi_tl(s->T1, val);
5300         } else {
5301             gen_op_mov_v_reg(s, ot, s->T1, reg);
5302         }
5303         switch (ot) {
5304 #ifdef TARGET_X86_64
5305         case MO_64:
5306             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5307             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5308             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5309             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5310             break;
5311 #endif
5312         case MO_32:
5313             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5314             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5315             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5316                               s->tmp2_i32, s->tmp3_i32);
5317             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5318             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5319             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5320             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5321             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5322             break;
5323         default:
5324             tcg_gen_ext16s_tl(s->T0, s->T0);
5325             tcg_gen_ext16s_tl(s->T1, s->T1);
5326             /* XXX: use 32 bit mul which could be faster */
5327             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5328             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5329             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5330             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5331             gen_op_mov_reg_v(s, ot, reg, s->T0);
5332             break;
5333         }
5334         set_cc_op(s, CC_OP_MULB + ot);
5335         break;
5336     case 0x1c0:
5337     case 0x1c1: /* xadd Ev, Gv */
5338         ot = mo_b_d(b, dflag);
5339         modrm = x86_ldub_code(env, s);
5340         reg = ((modrm >> 3) & 7) | REX_R(s);
5341         mod = (modrm >> 6) & 3;
5342         gen_op_mov_v_reg(s, ot, s->T0, reg);
5343         if (mod == 3) {
5344             rm = (modrm & 7) | REX_B(s);
5345             gen_op_mov_v_reg(s, ot, s->T1, rm);
5346             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5347             gen_op_mov_reg_v(s, ot, reg, s->T1);
5348             gen_op_mov_reg_v(s, ot, rm, s->T0);
5349         } else {
5350             gen_lea_modrm(env, s, modrm);
5351             if (s->prefix & PREFIX_LOCK) {
5352                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5353                                             s->mem_index, ot | MO_LE);
5354                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5355             } else {
5356                 gen_op_ld_v(s, ot, s->T1, s->A0);
5357                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5358                 gen_op_st_v(s, ot, s->T0, s->A0);
5359             }
5360             gen_op_mov_reg_v(s, ot, reg, s->T1);
5361         }
5362         gen_op_update2_cc(s);
5363         set_cc_op(s, CC_OP_ADDB + ot);
5364         break;
5365     case 0x1b0:
5366     case 0x1b1: /* cmpxchg Ev, Gv */
5367         {
5368             TCGv oldv, newv, cmpv;
5369 
5370             ot = mo_b_d(b, dflag);
5371             modrm = x86_ldub_code(env, s);
5372             reg = ((modrm >> 3) & 7) | REX_R(s);
5373             mod = (modrm >> 6) & 3;
5374             oldv = tcg_temp_new();
5375             newv = tcg_temp_new();
5376             cmpv = tcg_temp_new();
5377             gen_op_mov_v_reg(s, ot, newv, reg);
5378             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5379 
5380             if (s->prefix & PREFIX_LOCK) {
5381                 if (mod == 3) {
5382                     goto illegal_op;
5383                 }
5384                 gen_lea_modrm(env, s, modrm);
5385                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5386                                           s->mem_index, ot | MO_LE);
5387                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5388             } else {
5389                 if (mod == 3) {
5390                     rm = (modrm & 7) | REX_B(s);
5391                     gen_op_mov_v_reg(s, ot, oldv, rm);
5392                 } else {
5393                     gen_lea_modrm(env, s, modrm);
5394                     gen_op_ld_v(s, ot, oldv, s->A0);
5395                     rm = 0; /* avoid warning */
5396                 }
5397                 gen_extu(ot, oldv);
5398                 gen_extu(ot, cmpv);
5399                 /* store value = (old == cmp ? new : old);  */
5400                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5401                 if (mod == 3) {
5402                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5403                     gen_op_mov_reg_v(s, ot, rm, newv);
5404                 } else {
5405                     /* Perform an unconditional store cycle like physical cpu;
5406                        must be before changing accumulator to ensure
5407                        idempotency if the store faults and the instruction
5408                        is restarted */
5409                     gen_op_st_v(s, ot, newv, s->A0);
5410                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5411                 }
5412             }
5413             tcg_gen_mov_tl(cpu_cc_src, oldv);
5414             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5415             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5416             set_cc_op(s, CC_OP_SUBB + ot);
5417             tcg_temp_free(oldv);
5418             tcg_temp_free(newv);
5419             tcg_temp_free(cmpv);
5420         }
5421         break;
5422     case 0x1c7: /* cmpxchg8b */
5423         modrm = x86_ldub_code(env, s);
5424         mod = (modrm >> 6) & 3;
5425         switch ((modrm >> 3) & 7) {
5426         case 1: /* CMPXCHG8, CMPXCHG16 */
5427             if (mod == 3) {
5428                 goto illegal_op;
5429             }
5430 #ifdef TARGET_X86_64
5431             if (dflag == MO_64) {
5432                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5433                     goto illegal_op;
5434                 }
5435                 gen_lea_modrm(env, s, modrm);
5436                 if ((s->prefix & PREFIX_LOCK) &&
5437                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5438                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5439                 } else {
5440                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5441                 }
5442                 set_cc_op(s, CC_OP_EFLAGS);
5443                 break;
5444             }
5445 #endif
5446             if (!(s->cpuid_features & CPUID_CX8)) {
5447                 goto illegal_op;
5448             }
5449             gen_lea_modrm(env, s, modrm);
5450             if ((s->prefix & PREFIX_LOCK) &&
5451                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5452                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5453             } else {
5454                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5455             }
5456             set_cc_op(s, CC_OP_EFLAGS);
5457             break;
5458 
5459         case 7: /* RDSEED */
5460         case 6: /* RDRAND */
5461             if (mod != 3 ||
5462                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5463                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5464                 goto illegal_op;
5465             }
5466             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5467                 gen_io_start();
5468             }
5469             gen_helper_rdrand(s->T0, cpu_env);
5470             rm = (modrm & 7) | REX_B(s);
5471             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5472             set_cc_op(s, CC_OP_EFLAGS);
5473             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5474                 gen_jmp(s, s->pc - s->cs_base);
5475             }
5476             break;
5477 
5478         default:
5479             goto illegal_op;
5480         }
5481         break;
5482 
5483         /**************************/
5484         /* push/pop */
5485     case 0x50 ... 0x57: /* push */
5486         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5487         gen_push_v(s, s->T0);
5488         break;
5489     case 0x58 ... 0x5f: /* pop */
5490         ot = gen_pop_T0(s);
5491         /* NOTE: order is important for pop %sp */
5492         gen_pop_update(s, ot);
5493         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5494         break;
5495     case 0x60: /* pusha */
5496         if (CODE64(s))
5497             goto illegal_op;
5498         gen_pusha(s);
5499         break;
5500     case 0x61: /* popa */
5501         if (CODE64(s))
5502             goto illegal_op;
5503         gen_popa(s);
5504         break;
5505     case 0x68: /* push Iv */
5506     case 0x6a:
5507         ot = mo_pushpop(s, dflag);
5508         if (b == 0x68)
5509             val = insn_get(env, s, ot);
5510         else
5511             val = (int8_t)insn_get(env, s, MO_8);
5512         tcg_gen_movi_tl(s->T0, val);
5513         gen_push_v(s, s->T0);
5514         break;
5515     case 0x8f: /* pop Ev */
5516         modrm = x86_ldub_code(env, s);
5517         mod = (modrm >> 6) & 3;
5518         ot = gen_pop_T0(s);
5519         if (mod == 3) {
5520             /* NOTE: order is important for pop %sp */
5521             gen_pop_update(s, ot);
5522             rm = (modrm & 7) | REX_B(s);
5523             gen_op_mov_reg_v(s, ot, rm, s->T0);
5524         } else {
5525             /* NOTE: order is important too for MMU exceptions */
5526             s->popl_esp_hack = 1 << ot;
5527             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5528             s->popl_esp_hack = 0;
5529             gen_pop_update(s, ot);
5530         }
5531         break;
5532     case 0xc8: /* enter */
5533         {
5534             int level;
5535             val = x86_lduw_code(env, s);
5536             level = x86_ldub_code(env, s);
5537             gen_enter(s, val, level);
5538         }
5539         break;
5540     case 0xc9: /* leave */
5541         gen_leave(s);
5542         break;
5543     case 0x06: /* push es */
5544     case 0x0e: /* push cs */
5545     case 0x16: /* push ss */
5546     case 0x1e: /* push ds */
5547         if (CODE64(s))
5548             goto illegal_op;
5549         gen_op_movl_T0_seg(s, b >> 3);
5550         gen_push_v(s, s->T0);
5551         break;
5552     case 0x1a0: /* push fs */
5553     case 0x1a8: /* push gs */
5554         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5555         gen_push_v(s, s->T0);
5556         break;
5557     case 0x07: /* pop es */
5558     case 0x17: /* pop ss */
5559     case 0x1f: /* pop ds */
5560         if (CODE64(s))
5561             goto illegal_op;
5562         reg = b >> 3;
5563         ot = gen_pop_T0(s);
5564         gen_movl_seg_T0(s, reg);
5565         gen_pop_update(s, ot);
5566         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5567         if (s->base.is_jmp) {
5568             gen_jmp_im(s, s->pc - s->cs_base);
5569             if (reg == R_SS) {
5570                 s->flags &= ~HF_TF_MASK;
5571                 gen_eob_inhibit_irq(s, true);
5572             } else {
5573                 gen_eob(s);
5574             }
5575         }
5576         break;
5577     case 0x1a1: /* pop fs */
5578     case 0x1a9: /* pop gs */
5579         ot = gen_pop_T0(s);
5580         gen_movl_seg_T0(s, (b >> 3) & 7);
5581         gen_pop_update(s, ot);
5582         if (s->base.is_jmp) {
5583             gen_jmp_im(s, s->pc - s->cs_base);
5584             gen_eob(s);
5585         }
5586         break;
5587 
5588         /**************************/
5589         /* mov */
5590     case 0x88:
5591     case 0x89: /* mov Gv, Ev */
5592         ot = mo_b_d(b, dflag);
5593         modrm = x86_ldub_code(env, s);
5594         reg = ((modrm >> 3) & 7) | REX_R(s);
5595 
5596         /* generate a generic store */
5597         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5598         break;
5599     case 0xc6:
5600     case 0xc7: /* mov Ev, Iv */
5601         ot = mo_b_d(b, dflag);
5602         modrm = x86_ldub_code(env, s);
5603         mod = (modrm >> 6) & 3;
5604         if (mod != 3) {
5605             s->rip_offset = insn_const_size(ot);
5606             gen_lea_modrm(env, s, modrm);
5607         }
5608         val = insn_get(env, s, ot);
5609         tcg_gen_movi_tl(s->T0, val);
5610         if (mod != 3) {
5611             gen_op_st_v(s, ot, s->T0, s->A0);
5612         } else {
5613             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5614         }
5615         break;
5616     case 0x8a:
5617     case 0x8b: /* mov Ev, Gv */
5618         ot = mo_b_d(b, dflag);
5619         modrm = x86_ldub_code(env, s);
5620         reg = ((modrm >> 3) & 7) | REX_R(s);
5621 
5622         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5623         gen_op_mov_reg_v(s, ot, reg, s->T0);
5624         break;
5625     case 0x8e: /* mov seg, Gv */
5626         modrm = x86_ldub_code(env, s);
5627         reg = (modrm >> 3) & 7;
5628         if (reg >= 6 || reg == R_CS)
5629             goto illegal_op;
5630         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5631         gen_movl_seg_T0(s, reg);
5632         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5633         if (s->base.is_jmp) {
5634             gen_jmp_im(s, s->pc - s->cs_base);
5635             if (reg == R_SS) {
5636                 s->flags &= ~HF_TF_MASK;
5637                 gen_eob_inhibit_irq(s, true);
5638             } else {
5639                 gen_eob(s);
5640             }
5641         }
5642         break;
5643     case 0x8c: /* mov Gv, seg */
5644         modrm = x86_ldub_code(env, s);
5645         reg = (modrm >> 3) & 7;
5646         mod = (modrm >> 6) & 3;
5647         if (reg >= 6)
5648             goto illegal_op;
5649         gen_op_movl_T0_seg(s, reg);
5650         ot = mod == 3 ? dflag : MO_16;
5651         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5652         break;
5653 
5654     case 0x1b6: /* movzbS Gv, Eb */
5655     case 0x1b7: /* movzwS Gv, Eb */
5656     case 0x1be: /* movsbS Gv, Eb */
5657     case 0x1bf: /* movswS Gv, Eb */
5658         {
5659             MemOp d_ot;
5660             MemOp s_ot;
5661 
5662             /* d_ot is the size of destination */
5663             d_ot = dflag;
5664             /* ot is the size of source */
5665             ot = (b & 1) + MO_8;
5666             /* s_ot is the sign+size of source */
5667             s_ot = b & 8 ? MO_SIGN | ot : ot;
5668 
5669             modrm = x86_ldub_code(env, s);
5670             reg = ((modrm >> 3) & 7) | REX_R(s);
5671             mod = (modrm >> 6) & 3;
5672             rm = (modrm & 7) | REX_B(s);
5673 
5674             if (mod == 3) {
5675                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5676                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5677                 } else {
5678                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5679                     switch (s_ot) {
5680                     case MO_UB:
5681                         tcg_gen_ext8u_tl(s->T0, s->T0);
5682                         break;
5683                     case MO_SB:
5684                         tcg_gen_ext8s_tl(s->T0, s->T0);
5685                         break;
5686                     case MO_UW:
5687                         tcg_gen_ext16u_tl(s->T0, s->T0);
5688                         break;
5689                     default:
5690                     case MO_SW:
5691                         tcg_gen_ext16s_tl(s->T0, s->T0);
5692                         break;
5693                     }
5694                 }
5695                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5696             } else {
5697                 gen_lea_modrm(env, s, modrm);
5698                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5699                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5700             }
5701         }
5702         break;
5703 
5704     case 0x8d: /* lea */
5705         modrm = x86_ldub_code(env, s);
5706         mod = (modrm >> 6) & 3;
5707         if (mod == 3)
5708             goto illegal_op;
5709         reg = ((modrm >> 3) & 7) | REX_R(s);
5710         {
5711             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5712             TCGv ea = gen_lea_modrm_1(s, a);
5713             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5714             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5715         }
5716         break;
5717 
5718     case 0xa0: /* mov EAX, Ov */
5719     case 0xa1:
5720     case 0xa2: /* mov Ov, EAX */
5721     case 0xa3:
5722         {
5723             target_ulong offset_addr;
5724 
5725             ot = mo_b_d(b, dflag);
5726             switch (s->aflag) {
5727 #ifdef TARGET_X86_64
5728             case MO_64:
5729                 offset_addr = x86_ldq_code(env, s);
5730                 break;
5731 #endif
5732             default:
5733                 offset_addr = insn_get(env, s, s->aflag);
5734                 break;
5735             }
5736             tcg_gen_movi_tl(s->A0, offset_addr);
5737             gen_add_A0_ds_seg(s);
5738             if ((b & 2) == 0) {
5739                 gen_op_ld_v(s, ot, s->T0, s->A0);
5740                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5741             } else {
5742                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5743                 gen_op_st_v(s, ot, s->T0, s->A0);
5744             }
5745         }
5746         break;
5747     case 0xd7: /* xlat */
5748         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5749         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5750         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5751         gen_extu(s->aflag, s->A0);
5752         gen_add_A0_ds_seg(s);
5753         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5754         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5755         break;
5756     case 0xb0 ... 0xb7: /* mov R, Ib */
5757         val = insn_get(env, s, MO_8);
5758         tcg_gen_movi_tl(s->T0, val);
5759         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5760         break;
5761     case 0xb8 ... 0xbf: /* mov R, Iv */
5762 #ifdef TARGET_X86_64
5763         if (dflag == MO_64) {
5764             uint64_t tmp;
5765             /* 64 bit case */
5766             tmp = x86_ldq_code(env, s);
5767             reg = (b & 7) | REX_B(s);
5768             tcg_gen_movi_tl(s->T0, tmp);
5769             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5770         } else
5771 #endif
5772         {
5773             ot = dflag;
5774             val = insn_get(env, s, ot);
5775             reg = (b & 7) | REX_B(s);
5776             tcg_gen_movi_tl(s->T0, val);
5777             gen_op_mov_reg_v(s, ot, reg, s->T0);
5778         }
5779         break;
5780 
5781     case 0x91 ... 0x97: /* xchg R, EAX */
5782     do_xchg_reg_eax:
5783         ot = dflag;
5784         reg = (b & 7) | REX_B(s);
5785         rm = R_EAX;
5786         goto do_xchg_reg;
5787     case 0x86:
5788     case 0x87: /* xchg Ev, Gv */
5789         ot = mo_b_d(b, dflag);
5790         modrm = x86_ldub_code(env, s);
5791         reg = ((modrm >> 3) & 7) | REX_R(s);
5792         mod = (modrm >> 6) & 3;
5793         if (mod == 3) {
5794             rm = (modrm & 7) | REX_B(s);
5795         do_xchg_reg:
5796             gen_op_mov_v_reg(s, ot, s->T0, reg);
5797             gen_op_mov_v_reg(s, ot, s->T1, rm);
5798             gen_op_mov_reg_v(s, ot, rm, s->T0);
5799             gen_op_mov_reg_v(s, ot, reg, s->T1);
5800         } else {
5801             gen_lea_modrm(env, s, modrm);
5802             gen_op_mov_v_reg(s, ot, s->T0, reg);
5803             /* for xchg, lock is implicit */
5804             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5805                                    s->mem_index, ot | MO_LE);
5806             gen_op_mov_reg_v(s, ot, reg, s->T1);
5807         }
5808         break;
5809     case 0xc4: /* les Gv */
5810         /* In CODE64 this is VEX3; see above.  */
5811         op = R_ES;
5812         goto do_lxx;
5813     case 0xc5: /* lds Gv */
5814         /* In CODE64 this is VEX2; see above.  */
5815         op = R_DS;
5816         goto do_lxx;
5817     case 0x1b2: /* lss Gv */
5818         op = R_SS;
5819         goto do_lxx;
5820     case 0x1b4: /* lfs Gv */
5821         op = R_FS;
5822         goto do_lxx;
5823     case 0x1b5: /* lgs Gv */
5824         op = R_GS;
5825     do_lxx:
5826         ot = dflag != MO_16 ? MO_32 : MO_16;
5827         modrm = x86_ldub_code(env, s);
5828         reg = ((modrm >> 3) & 7) | REX_R(s);
5829         mod = (modrm >> 6) & 3;
5830         if (mod == 3)
5831             goto illegal_op;
5832         gen_lea_modrm(env, s, modrm);
5833         gen_op_ld_v(s, ot, s->T1, s->A0);
5834         gen_add_A0_im(s, 1 << ot);
5835         /* load the segment first to handle exceptions properly */
5836         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5837         gen_movl_seg_T0(s, op);
5838         /* then put the data */
5839         gen_op_mov_reg_v(s, ot, reg, s->T1);
5840         if (s->base.is_jmp) {
5841             gen_jmp_im(s, s->pc - s->cs_base);
5842             gen_eob(s);
5843         }
5844         break;
5845 
5846         /************************/
5847         /* shifts */
5848     case 0xc0:
5849     case 0xc1:
5850         /* shift Ev,Ib */
5851         shift = 2;
5852     grp2:
5853         {
5854             ot = mo_b_d(b, dflag);
5855             modrm = x86_ldub_code(env, s);
5856             mod = (modrm >> 6) & 3;
5857             op = (modrm >> 3) & 7;
5858 
5859             if (mod != 3) {
5860                 if (shift == 2) {
5861                     s->rip_offset = 1;
5862                 }
5863                 gen_lea_modrm(env, s, modrm);
5864                 opreg = OR_TMP0;
5865             } else {
5866                 opreg = (modrm & 7) | REX_B(s);
5867             }
5868 
5869             /* simpler op */
5870             if (shift == 0) {
5871                 gen_shift(s, op, ot, opreg, OR_ECX);
5872             } else {
5873                 if (shift == 2) {
5874                     shift = x86_ldub_code(env, s);
5875                 }
5876                 gen_shifti(s, op, ot, opreg, shift);
5877             }
5878         }
5879         break;
5880     case 0xd0:
5881     case 0xd1:
5882         /* shift Ev,1 */
5883         shift = 1;
5884         goto grp2;
5885     case 0xd2:
5886     case 0xd3:
5887         /* shift Ev,cl */
5888         shift = 0;
5889         goto grp2;
5890 
5891     case 0x1a4: /* shld imm */
5892         op = 0;
5893         shift = 1;
5894         goto do_shiftd;
5895     case 0x1a5: /* shld cl */
5896         op = 0;
5897         shift = 0;
5898         goto do_shiftd;
5899     case 0x1ac: /* shrd imm */
5900         op = 1;
5901         shift = 1;
5902         goto do_shiftd;
5903     case 0x1ad: /* shrd cl */
5904         op = 1;
5905         shift = 0;
5906     do_shiftd:
5907         ot = dflag;
5908         modrm = x86_ldub_code(env, s);
5909         mod = (modrm >> 6) & 3;
5910         rm = (modrm & 7) | REX_B(s);
5911         reg = ((modrm >> 3) & 7) | REX_R(s);
5912         if (mod != 3) {
5913             gen_lea_modrm(env, s, modrm);
5914             opreg = OR_TMP0;
5915         } else {
5916             opreg = rm;
5917         }
5918         gen_op_mov_v_reg(s, ot, s->T1, reg);
5919 
5920         if (shift) {
5921             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5922             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5923             tcg_temp_free(imm);
5924         } else {
5925             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5926         }
5927         break;
5928 
5929         /************************/
5930         /* floats */
5931     case 0xd8 ... 0xdf:
5932         if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5933             /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5934             /* XXX: what to do if illegal op ? */
5935             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5936             break;
5937         }
5938         modrm = x86_ldub_code(env, s);
5939         mod = (modrm >> 6) & 3;
5940         rm = modrm & 7;
5941         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5942         if (mod != 3) {
5943             /* memory op */
5944             gen_lea_modrm(env, s, modrm);
5945             switch(op) {
5946             case 0x00 ... 0x07: /* fxxxs */
5947             case 0x10 ... 0x17: /* fixxxl */
5948             case 0x20 ... 0x27: /* fxxxl */
5949             case 0x30 ... 0x37: /* fixxx */
5950                 {
5951                     int op1;
5952                     op1 = op & 7;
5953 
5954                     switch(op >> 4) {
5955                     case 0:
5956                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5957                                             s->mem_index, MO_LEUL);
5958                         gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5959                         break;
5960                     case 1:
5961                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5962                                             s->mem_index, MO_LEUL);
5963                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5964                         break;
5965                     case 2:
5966                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5967                                             s->mem_index, MO_LEQ);
5968                         gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5969                         break;
5970                     case 3:
5971                     default:
5972                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5973                                             s->mem_index, MO_LESW);
5974                         gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5975                         break;
5976                     }
5977 
5978                     gen_helper_fp_arith_ST0_FT0(op1);
5979                     if (op1 == 3) {
5980                         /* fcomp needs pop */
5981                         gen_helper_fpop(cpu_env);
5982                     }
5983                 }
5984                 break;
5985             case 0x08: /* flds */
5986             case 0x0a: /* fsts */
5987             case 0x0b: /* fstps */
5988             case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5989             case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5990             case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5991                 switch(op & 7) {
5992                 case 0:
5993                     switch(op >> 4) {
5994                     case 0:
5995                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5996                                             s->mem_index, MO_LEUL);
5997                         gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5998                         break;
5999                     case 1:
6000                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6001                                             s->mem_index, MO_LEUL);
6002                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6003                         break;
6004                     case 2:
6005                         tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6006                                             s->mem_index, MO_LEQ);
6007                         gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
6008                         break;
6009                     case 3:
6010                     default:
6011                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6012                                             s->mem_index, MO_LESW);
6013                         gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
6014                         break;
6015                     }
6016                     break;
6017                 case 1:
6018                     /* XXX: the corresponding CPUID bit must be tested ! */
6019                     switch(op >> 4) {
6020                     case 1:
6021                         gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
6022                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6023                                             s->mem_index, MO_LEUL);
6024                         break;
6025                     case 2:
6026                         gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
6027                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6028                                             s->mem_index, MO_LEQ);
6029                         break;
6030                     case 3:
6031                     default:
6032                         gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
6033                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6034                                             s->mem_index, MO_LEUW);
6035                         break;
6036                     }
6037                     gen_helper_fpop(cpu_env);
6038                     break;
6039                 default:
6040                     switch(op >> 4) {
6041                     case 0:
6042                         gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
6043                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6044                                             s->mem_index, MO_LEUL);
6045                         break;
6046                     case 1:
6047                         gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
6048                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6049                                             s->mem_index, MO_LEUL);
6050                         break;
6051                     case 2:
6052                         gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
6053                         tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6054                                             s->mem_index, MO_LEQ);
6055                         break;
6056                     case 3:
6057                     default:
6058                         gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
6059                         tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6060                                             s->mem_index, MO_LEUW);
6061                         break;
6062                     }
6063                     if ((op & 7) == 3)
6064                         gen_helper_fpop(cpu_env);
6065                     break;
6066                 }
6067                 break;
6068             case 0x0c: /* fldenv mem */
6069                 gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6070                 break;
6071             case 0x0d: /* fldcw mem */
6072                 tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6073                                     s->mem_index, MO_LEUW);
6074                 gen_helper_fldcw(cpu_env, s->tmp2_i32);
6075                 break;
6076             case 0x0e: /* fnstenv mem */
6077                 gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6078                 break;
6079             case 0x0f: /* fnstcw mem */
6080                 gen_helper_fnstcw(s->tmp2_i32, cpu_env);
6081                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6082                                     s->mem_index, MO_LEUW);
6083                 break;
6084             case 0x1d: /* fldt mem */
6085                 gen_helper_fldt_ST0(cpu_env, s->A0);
6086                 break;
6087             case 0x1f: /* fstpt mem */
6088                 gen_helper_fstt_ST0(cpu_env, s->A0);
6089                 gen_helper_fpop(cpu_env);
6090                 break;
6091             case 0x2c: /* frstor mem */
6092                 gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6093                 break;
6094             case 0x2e: /* fnsave mem */
6095                 gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1));
6096                 break;
6097             case 0x2f: /* fnstsw mem */
6098                 gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6099                 tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6100                                     s->mem_index, MO_LEUW);
6101                 break;
6102             case 0x3c: /* fbld */
6103                 gen_helper_fbld_ST0(cpu_env, s->A0);
6104                 break;
6105             case 0x3e: /* fbstp */
6106                 gen_helper_fbst_ST0(cpu_env, s->A0);
6107                 gen_helper_fpop(cpu_env);
6108                 break;
6109             case 0x3d: /* fildll */
6110                 tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6111                 gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6112                 break;
6113             case 0x3f: /* fistpll */
6114                 gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6115                 tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ);
6116                 gen_helper_fpop(cpu_env);
6117                 break;
6118             default:
6119                 goto unknown_op;
6120             }
6121         } else {
6122             /* register float ops */
6123             opreg = rm;
6124 
6125             switch(op) {
6126             case 0x08: /* fld sti */
6127                 gen_helper_fpush(cpu_env);
6128                 gen_helper_fmov_ST0_STN(cpu_env,
6129                                         tcg_const_i32((opreg + 1) & 7));
6130                 break;
6131             case 0x09: /* fxchg sti */
6132             case 0x29: /* fxchg4 sti, undocumented op */
6133             case 0x39: /* fxchg7 sti, undocumented op */
6134                 gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6135                 break;
6136             case 0x0a: /* grp d9/2 */
6137                 switch(rm) {
6138                 case 0: /* fnop */
6139                     /* check exceptions (FreeBSD FPU probe) */
6140                     gen_helper_fwait(cpu_env);
6141                     break;
6142                 default:
6143                     goto unknown_op;
6144                 }
6145                 break;
6146             case 0x0c: /* grp d9/4 */
6147                 switch(rm) {
6148                 case 0: /* fchs */
6149                     gen_helper_fchs_ST0(cpu_env);
6150                     break;
6151                 case 1: /* fabs */
6152                     gen_helper_fabs_ST0(cpu_env);
6153                     break;
6154                 case 4: /* ftst */
6155                     gen_helper_fldz_FT0(cpu_env);
6156                     gen_helper_fcom_ST0_FT0(cpu_env);
6157                     break;
6158                 case 5: /* fxam */
6159                     gen_helper_fxam_ST0(cpu_env);
6160                     break;
6161                 default:
6162                     goto unknown_op;
6163                 }
6164                 break;
6165             case 0x0d: /* grp d9/5 */
6166                 {
6167                     switch(rm) {
6168                     case 0:
6169                         gen_helper_fpush(cpu_env);
6170                         gen_helper_fld1_ST0(cpu_env);
6171                         break;
6172                     case 1:
6173                         gen_helper_fpush(cpu_env);
6174                         gen_helper_fldl2t_ST0(cpu_env);
6175                         break;
6176                     case 2:
6177                         gen_helper_fpush(cpu_env);
6178                         gen_helper_fldl2e_ST0(cpu_env);
6179                         break;
6180                     case 3:
6181                         gen_helper_fpush(cpu_env);
6182                         gen_helper_fldpi_ST0(cpu_env);
6183                         break;
6184                     case 4:
6185                         gen_helper_fpush(cpu_env);
6186                         gen_helper_fldlg2_ST0(cpu_env);
6187                         break;
6188                     case 5:
6189                         gen_helper_fpush(cpu_env);
6190                         gen_helper_fldln2_ST0(cpu_env);
6191                         break;
6192                     case 6:
6193                         gen_helper_fpush(cpu_env);
6194                         gen_helper_fldz_ST0(cpu_env);
6195                         break;
6196                     default:
6197                         goto unknown_op;
6198                     }
6199                 }
6200                 break;
6201             case 0x0e: /* grp d9/6 */
6202                 switch(rm) {
6203                 case 0: /* f2xm1 */
6204                     gen_helper_f2xm1(cpu_env);
6205                     break;
6206                 case 1: /* fyl2x */
6207                     gen_helper_fyl2x(cpu_env);
6208                     break;
6209                 case 2: /* fptan */
6210                     gen_helper_fptan(cpu_env);
6211                     break;
6212                 case 3: /* fpatan */
6213                     gen_helper_fpatan(cpu_env);
6214                     break;
6215                 case 4: /* fxtract */
6216                     gen_helper_fxtract(cpu_env);
6217                     break;
6218                 case 5: /* fprem1 */
6219                     gen_helper_fprem1(cpu_env);
6220                     break;
6221                 case 6: /* fdecstp */
6222                     gen_helper_fdecstp(cpu_env);
6223                     break;
6224                 default:
6225                 case 7: /* fincstp */
6226                     gen_helper_fincstp(cpu_env);
6227                     break;
6228                 }
6229                 break;
6230             case 0x0f: /* grp d9/7 */
6231                 switch(rm) {
6232                 case 0: /* fprem */
6233                     gen_helper_fprem(cpu_env);
6234                     break;
6235                 case 1: /* fyl2xp1 */
6236                     gen_helper_fyl2xp1(cpu_env);
6237                     break;
6238                 case 2: /* fsqrt */
6239                     gen_helper_fsqrt(cpu_env);
6240                     break;
6241                 case 3: /* fsincos */
6242                     gen_helper_fsincos(cpu_env);
6243                     break;
6244                 case 5: /* fscale */
6245                     gen_helper_fscale(cpu_env);
6246                     break;
6247                 case 4: /* frndint */
6248                     gen_helper_frndint(cpu_env);
6249                     break;
6250                 case 6: /* fsin */
6251                     gen_helper_fsin(cpu_env);
6252                     break;
6253                 default:
6254                 case 7: /* fcos */
6255                     gen_helper_fcos(cpu_env);
6256                     break;
6257                 }
6258                 break;
6259             case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6260             case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6261             case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6262                 {
6263                     int op1;
6264 
6265                     op1 = op & 7;
6266                     if (op >= 0x20) {
6267                         gen_helper_fp_arith_STN_ST0(op1, opreg);
6268                         if (op >= 0x30)
6269                             gen_helper_fpop(cpu_env);
6270                     } else {
6271                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6272                         gen_helper_fp_arith_ST0_FT0(op1);
6273                     }
6274                 }
6275                 break;
6276             case 0x02: /* fcom */
6277             case 0x22: /* fcom2, undocumented op */
6278                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6279                 gen_helper_fcom_ST0_FT0(cpu_env);
6280                 break;
6281             case 0x03: /* fcomp */
6282             case 0x23: /* fcomp3, undocumented op */
6283             case 0x32: /* fcomp5, undocumented op */
6284                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6285                 gen_helper_fcom_ST0_FT0(cpu_env);
6286                 gen_helper_fpop(cpu_env);
6287                 break;
6288             case 0x15: /* da/5 */
6289                 switch(rm) {
6290                 case 1: /* fucompp */
6291                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6292                     gen_helper_fucom_ST0_FT0(cpu_env);
6293                     gen_helper_fpop(cpu_env);
6294                     gen_helper_fpop(cpu_env);
6295                     break;
6296                 default:
6297                     goto unknown_op;
6298                 }
6299                 break;
6300             case 0x1c:
6301                 switch(rm) {
6302                 case 0: /* feni (287 only, just do nop here) */
6303                     break;
6304                 case 1: /* fdisi (287 only, just do nop here) */
6305                     break;
6306                 case 2: /* fclex */
6307                     gen_helper_fclex(cpu_env);
6308                     break;
6309                 case 3: /* fninit */
6310                     gen_helper_fninit(cpu_env);
6311                     break;
6312                 case 4: /* fsetpm (287 only, just do nop here) */
6313                     break;
6314                 default:
6315                     goto unknown_op;
6316                 }
6317                 break;
6318             case 0x1d: /* fucomi */
6319                 if (!(s->cpuid_features & CPUID_CMOV)) {
6320                     goto illegal_op;
6321                 }
6322                 gen_update_cc_op(s);
6323                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6324                 gen_helper_fucomi_ST0_FT0(cpu_env);
6325                 set_cc_op(s, CC_OP_EFLAGS);
6326                 break;
6327             case 0x1e: /* fcomi */
6328                 if (!(s->cpuid_features & CPUID_CMOV)) {
6329                     goto illegal_op;
6330                 }
6331                 gen_update_cc_op(s);
6332                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6333                 gen_helper_fcomi_ST0_FT0(cpu_env);
6334                 set_cc_op(s, CC_OP_EFLAGS);
6335                 break;
6336             case 0x28: /* ffree sti */
6337                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6338                 break;
6339             case 0x2a: /* fst sti */
6340                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6341                 break;
6342             case 0x2b: /* fstp sti */
6343             case 0x0b: /* fstp1 sti, undocumented op */
6344             case 0x3a: /* fstp8 sti, undocumented op */
6345             case 0x3b: /* fstp9 sti, undocumented op */
6346                 gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6347                 gen_helper_fpop(cpu_env);
6348                 break;
6349             case 0x2c: /* fucom st(i) */
6350                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6351                 gen_helper_fucom_ST0_FT0(cpu_env);
6352                 break;
6353             case 0x2d: /* fucomp st(i) */
6354                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6355                 gen_helper_fucom_ST0_FT0(cpu_env);
6356                 gen_helper_fpop(cpu_env);
6357                 break;
6358             case 0x33: /* de/3 */
6359                 switch(rm) {
6360                 case 1: /* fcompp */
6361                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6362                     gen_helper_fcom_ST0_FT0(cpu_env);
6363                     gen_helper_fpop(cpu_env);
6364                     gen_helper_fpop(cpu_env);
6365                     break;
6366                 default:
6367                     goto unknown_op;
6368                 }
6369                 break;
6370             case 0x38: /* ffreep sti, undocumented op */
6371                 gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6372                 gen_helper_fpop(cpu_env);
6373                 break;
6374             case 0x3c: /* df/4 */
6375                 switch(rm) {
6376                 case 0:
6377                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6378                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6379                     gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6380                     break;
6381                 default:
6382                     goto unknown_op;
6383                 }
6384                 break;
6385             case 0x3d: /* fucomip */
6386                 if (!(s->cpuid_features & CPUID_CMOV)) {
6387                     goto illegal_op;
6388                 }
6389                 gen_update_cc_op(s);
6390                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6391                 gen_helper_fucomi_ST0_FT0(cpu_env);
6392                 gen_helper_fpop(cpu_env);
6393                 set_cc_op(s, CC_OP_EFLAGS);
6394                 break;
6395             case 0x3e: /* fcomip */
6396                 if (!(s->cpuid_features & CPUID_CMOV)) {
6397                     goto illegal_op;
6398                 }
6399                 gen_update_cc_op(s);
6400                 gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6401                 gen_helper_fcomi_ST0_FT0(cpu_env);
6402                 gen_helper_fpop(cpu_env);
6403                 set_cc_op(s, CC_OP_EFLAGS);
6404                 break;
6405             case 0x10 ... 0x13: /* fcmovxx */
6406             case 0x18 ... 0x1b:
6407                 {
6408                     int op1;
6409                     TCGLabel *l1;
6410                     static const uint8_t fcmov_cc[8] = {
6411                         (JCC_B << 1),
6412                         (JCC_Z << 1),
6413                         (JCC_BE << 1),
6414                         (JCC_P << 1),
6415                     };
6416 
6417                     if (!(s->cpuid_features & CPUID_CMOV)) {
6418                         goto illegal_op;
6419                     }
6420                     op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6421                     l1 = gen_new_label();
6422                     gen_jcc1_noeob(s, op1, l1);
6423                     gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6424                     gen_set_label(l1);
6425                 }
6426                 break;
6427             default:
6428                 goto unknown_op;
6429             }
6430         }
6431         break;
6432         /************************/
6433         /* string ops */
6434 
6435     case 0xa4: /* movsS */
6436     case 0xa5:
6437         ot = mo_b_d(b, dflag);
6438         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6439             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6440         } else {
6441             gen_movs(s, ot);
6442         }
6443         break;
6444 
6445     case 0xaa: /* stosS */
6446     case 0xab:
6447         ot = mo_b_d(b, dflag);
6448         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6449             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6450         } else {
6451             gen_stos(s, ot);
6452         }
6453         break;
6454     case 0xac: /* lodsS */
6455     case 0xad:
6456         ot = mo_b_d(b, dflag);
6457         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6458             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6459         } else {
6460             gen_lods(s, ot);
6461         }
6462         break;
6463     case 0xae: /* scasS */
6464     case 0xaf:
6465         ot = mo_b_d(b, dflag);
6466         if (prefixes & PREFIX_REPNZ) {
6467             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6468         } else if (prefixes & PREFIX_REPZ) {
6469             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6470         } else {
6471             gen_scas(s, ot);
6472         }
6473         break;
6474 
6475     case 0xa6: /* cmpsS */
6476     case 0xa7:
6477         ot = mo_b_d(b, dflag);
6478         if (prefixes & PREFIX_REPNZ) {
6479             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6480         } else if (prefixes & PREFIX_REPZ) {
6481             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6482         } else {
6483             gen_cmps(s, ot);
6484         }
6485         break;
6486     case 0x6c: /* insS */
6487     case 0x6d:
6488         ot = mo_b_d32(b, dflag);
6489         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6490         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6491         if (!gen_check_io(s, ot, s->tmp2_i32,
6492                           SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
6493             break;
6494         }
6495         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6496             gen_io_start();
6497         }
6498         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6499             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6500             /* jump generated by gen_repz_ins */
6501         } else {
6502             gen_ins(s, ot);
6503             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6504                 gen_jmp(s, s->pc - s->cs_base);
6505             }
6506         }
6507         break;
6508     case 0x6e: /* outsS */
6509     case 0x6f:
6510         ot = mo_b_d32(b, dflag);
6511         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6512         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6513         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
6514             break;
6515         }
6516         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6517             gen_io_start();
6518         }
6519         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6520             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6521             /* jump generated by gen_repz_outs */
6522         } else {
6523             gen_outs(s, ot);
6524             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6525                 gen_jmp(s, s->pc - s->cs_base);
6526             }
6527         }
6528         break;
6529 
6530         /************************/
6531         /* port I/O */
6532 
6533     case 0xe4:
6534     case 0xe5:
6535         ot = mo_b_d32(b, dflag);
6536         val = x86_ldub_code(env, s);
6537         tcg_gen_movi_i32(s->tmp2_i32, val);
6538         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6539             break;
6540         }
6541         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6542             gen_io_start();
6543         }
6544         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6545         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6546         gen_bpt_io(s, s->tmp2_i32, ot);
6547         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6548             gen_jmp(s, s->pc - s->cs_base);
6549         }
6550         break;
6551     case 0xe6:
6552     case 0xe7:
6553         ot = mo_b_d32(b, dflag);
6554         val = x86_ldub_code(env, s);
6555         tcg_gen_movi_i32(s->tmp2_i32, val);
6556         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6557             break;
6558         }
6559         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6560             gen_io_start();
6561         }
6562         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6563         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6564         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6565         gen_bpt_io(s, s->tmp2_i32, ot);
6566         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6567             gen_jmp(s, s->pc - s->cs_base);
6568         }
6569         break;
6570     case 0xec:
6571     case 0xed:
6572         ot = mo_b_d32(b, dflag);
6573         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6574         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6575         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6576             break;
6577         }
6578         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6579             gen_io_start();
6580         }
6581         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6582         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6583         gen_bpt_io(s, s->tmp2_i32, ot);
6584         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6585             gen_jmp(s, s->pc - s->cs_base);
6586         }
6587         break;
6588     case 0xee:
6589     case 0xef:
6590         ot = mo_b_d32(b, dflag);
6591         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6592         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6593         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6594             break;
6595         }
6596         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6597             gen_io_start();
6598         }
6599         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6600         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6601         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6602         gen_bpt_io(s, s->tmp2_i32, ot);
6603         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6604             gen_jmp(s, s->pc - s->cs_base);
6605         }
6606         break;
6607 
6608         /************************/
6609         /* control */
6610     case 0xc2: /* ret im */
6611         val = x86_ldsw_code(env, s);
6612         ot = gen_pop_T0(s);
6613         gen_stack_update(s, val + (1 << ot));
6614         /* Note that gen_pop_T0 uses a zero-extending load.  */
6615         gen_op_jmp_v(s->T0);
6616         gen_bnd_jmp(s);
6617         gen_jr(s, s->T0);
6618         break;
6619     case 0xc3: /* ret */
6620         ot = gen_pop_T0(s);
6621         gen_pop_update(s, ot);
6622         /* Note that gen_pop_T0 uses a zero-extending load.  */
6623         gen_op_jmp_v(s->T0);
6624         gen_bnd_jmp(s);
6625         gen_jr(s, s->T0);
6626         break;
6627     case 0xca: /* lret im */
6628         val = x86_ldsw_code(env, s);
6629     do_lret:
6630         if (PE(s) && !VM86(s)) {
6631             gen_update_cc_op(s);
6632             gen_jmp_im(s, pc_start - s->cs_base);
6633             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6634                                       tcg_const_i32(val));
6635         } else {
6636             gen_stack_A0(s);
6637             /* pop offset */
6638             gen_op_ld_v(s, dflag, s->T0, s->A0);
6639             /* NOTE: keeping EIP updated is not a problem in case of
6640                exception */
6641             gen_op_jmp_v(s->T0);
6642             /* pop selector */
6643             gen_add_A0_im(s, 1 << dflag);
6644             gen_op_ld_v(s, dflag, s->T0, s->A0);
6645             gen_op_movl_seg_T0_vm(s, R_CS);
6646             /* add stack offset */
6647             gen_stack_update(s, val + (2 << dflag));
6648         }
6649         gen_eob(s);
6650         break;
6651     case 0xcb: /* lret */
6652         val = 0;
6653         goto do_lret;
6654     case 0xcf: /* iret */
6655         gen_svm_check_intercept(s, SVM_EXIT_IRET);
6656         if (!PE(s) || VM86(s)) {
6657             /* real mode or vm86 mode */
6658             if (!check_vm86_iopl(s)) {
6659                 break;
6660             }
6661             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6662         } else {
6663             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6664                                       tcg_const_i32(s->pc - s->cs_base));
6665         }
6666         set_cc_op(s, CC_OP_EFLAGS);
6667         gen_eob(s);
6668         break;
6669     case 0xe8: /* call im */
6670         {
6671             if (dflag != MO_16) {
6672                 tval = (int32_t)insn_get(env, s, MO_32);
6673             } else {
6674                 tval = (int16_t)insn_get(env, s, MO_16);
6675             }
6676             next_eip = s->pc - s->cs_base;
6677             tval += next_eip;
6678             if (dflag == MO_16) {
6679                 tval &= 0xffff;
6680             } else if (!CODE64(s)) {
6681                 tval &= 0xffffffff;
6682             }
6683             tcg_gen_movi_tl(s->T0, next_eip);
6684             gen_push_v(s, s->T0);
6685             gen_bnd_jmp(s);
6686             gen_jmp(s, tval);
6687         }
6688         break;
6689     case 0x9a: /* lcall im */
6690         {
6691             unsigned int selector, offset;
6692 
6693             if (CODE64(s))
6694                 goto illegal_op;
6695             ot = dflag;
6696             offset = insn_get(env, s, ot);
6697             selector = insn_get(env, s, MO_16);
6698 
6699             tcg_gen_movi_tl(s->T0, selector);
6700             tcg_gen_movi_tl(s->T1, offset);
6701         }
6702         goto do_lcall;
6703     case 0xe9: /* jmp im */
6704         if (dflag != MO_16) {
6705             tval = (int32_t)insn_get(env, s, MO_32);
6706         } else {
6707             tval = (int16_t)insn_get(env, s, MO_16);
6708         }
6709         tval += s->pc - s->cs_base;
6710         if (dflag == MO_16) {
6711             tval &= 0xffff;
6712         } else if (!CODE64(s)) {
6713             tval &= 0xffffffff;
6714         }
6715         gen_bnd_jmp(s);
6716         gen_jmp(s, tval);
6717         break;
6718     case 0xea: /* ljmp im */
6719         {
6720             unsigned int selector, offset;
6721 
6722             if (CODE64(s))
6723                 goto illegal_op;
6724             ot = dflag;
6725             offset = insn_get(env, s, ot);
6726             selector = insn_get(env, s, MO_16);
6727 
6728             tcg_gen_movi_tl(s->T0, selector);
6729             tcg_gen_movi_tl(s->T1, offset);
6730         }
6731         goto do_ljmp;
6732     case 0xeb: /* jmp Jb */
6733         tval = (int8_t)insn_get(env, s, MO_8);
6734         tval += s->pc - s->cs_base;
6735         if (dflag == MO_16) {
6736             tval &= 0xffff;
6737         }
6738         gen_jmp(s, tval);
6739         break;
6740     case 0x70 ... 0x7f: /* jcc Jb */
6741         tval = (int8_t)insn_get(env, s, MO_8);
6742         goto do_jcc;
6743     case 0x180 ... 0x18f: /* jcc Jv */
6744         if (dflag != MO_16) {
6745             tval = (int32_t)insn_get(env, s, MO_32);
6746         } else {
6747             tval = (int16_t)insn_get(env, s, MO_16);
6748         }
6749     do_jcc:
6750         next_eip = s->pc - s->cs_base;
6751         tval += next_eip;
6752         if (dflag == MO_16) {
6753             tval &= 0xffff;
6754         }
6755         gen_bnd_jmp(s);
6756         gen_jcc(s, b, tval, next_eip);
6757         break;
6758 
6759     case 0x190 ... 0x19f: /* setcc Gv */
6760         modrm = x86_ldub_code(env, s);
6761         gen_setcc1(s, b, s->T0);
6762         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6763         break;
6764     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6765         if (!(s->cpuid_features & CPUID_CMOV)) {
6766             goto illegal_op;
6767         }
6768         ot = dflag;
6769         modrm = x86_ldub_code(env, s);
6770         reg = ((modrm >> 3) & 7) | REX_R(s);
6771         gen_cmovcc1(env, s, ot, b, modrm, reg);
6772         break;
6773 
6774         /************************/
6775         /* flags */
6776     case 0x9c: /* pushf */
6777         gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
6778         if (check_vm86_iopl(s)) {
6779             gen_update_cc_op(s);
6780             gen_helper_read_eflags(s->T0, cpu_env);
6781             gen_push_v(s, s->T0);
6782         }
6783         break;
6784     case 0x9d: /* popf */
6785         gen_svm_check_intercept(s, SVM_EXIT_POPF);
6786         if (check_vm86_iopl(s)) {
6787             ot = gen_pop_T0(s);
6788             if (CPL(s) == 0) {
6789                 if (dflag != MO_16) {
6790                     gen_helper_write_eflags(cpu_env, s->T0,
6791                                             tcg_const_i32((TF_MASK | AC_MASK |
6792                                                            ID_MASK | NT_MASK |
6793                                                            IF_MASK |
6794                                                            IOPL_MASK)));
6795                 } else {
6796                     gen_helper_write_eflags(cpu_env, s->T0,
6797                                             tcg_const_i32((TF_MASK | AC_MASK |
6798                                                            ID_MASK | NT_MASK |
6799                                                            IF_MASK | IOPL_MASK)
6800                                                           & 0xffff));
6801                 }
6802             } else {
6803                 if (CPL(s) <= IOPL(s)) {
6804                     if (dflag != MO_16) {
6805                         gen_helper_write_eflags(cpu_env, s->T0,
6806                                                 tcg_const_i32((TF_MASK |
6807                                                                AC_MASK |
6808                                                                ID_MASK |
6809                                                                NT_MASK |
6810                                                                IF_MASK)));
6811                     } else {
6812                         gen_helper_write_eflags(cpu_env, s->T0,
6813                                                 tcg_const_i32((TF_MASK |
6814                                                                AC_MASK |
6815                                                                ID_MASK |
6816                                                                NT_MASK |
6817                                                                IF_MASK)
6818                                                               & 0xffff));
6819                     }
6820                 } else {
6821                     if (dflag != MO_16) {
6822                         gen_helper_write_eflags(cpu_env, s->T0,
6823                                            tcg_const_i32((TF_MASK | AC_MASK |
6824                                                           ID_MASK | NT_MASK)));
6825                     } else {
6826                         gen_helper_write_eflags(cpu_env, s->T0,
6827                                            tcg_const_i32((TF_MASK | AC_MASK |
6828                                                           ID_MASK | NT_MASK)
6829                                                          & 0xffff));
6830                     }
6831                 }
6832             }
6833             gen_pop_update(s, ot);
6834             set_cc_op(s, CC_OP_EFLAGS);
6835             /* abort translation because TF/AC flag may change */
6836             gen_jmp_im(s, s->pc - s->cs_base);
6837             gen_eob(s);
6838         }
6839         break;
6840     case 0x9e: /* sahf */
6841         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6842             goto illegal_op;
6843         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6844         gen_compute_eflags(s);
6845         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6846         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6847         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6848         break;
6849     case 0x9f: /* lahf */
6850         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6851             goto illegal_op;
6852         gen_compute_eflags(s);
6853         /* Note: gen_compute_eflags() only gives the condition codes */
6854         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6855         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6856         break;
6857     case 0xf5: /* cmc */
6858         gen_compute_eflags(s);
6859         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6860         break;
6861     case 0xf8: /* clc */
6862         gen_compute_eflags(s);
6863         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6864         break;
6865     case 0xf9: /* stc */
6866         gen_compute_eflags(s);
6867         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6868         break;
6869     case 0xfc: /* cld */
6870         tcg_gen_movi_i32(s->tmp2_i32, 1);
6871         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6872         break;
6873     case 0xfd: /* std */
6874         tcg_gen_movi_i32(s->tmp2_i32, -1);
6875         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6876         break;
6877 
6878         /************************/
6879         /* bit operations */
6880     case 0x1ba: /* bt/bts/btr/btc Gv, im */
6881         ot = dflag;
6882         modrm = x86_ldub_code(env, s);
6883         op = (modrm >> 3) & 7;
6884         mod = (modrm >> 6) & 3;
6885         rm = (modrm & 7) | REX_B(s);
6886         if (mod != 3) {
6887             s->rip_offset = 1;
6888             gen_lea_modrm(env, s, modrm);
6889             if (!(s->prefix & PREFIX_LOCK)) {
6890                 gen_op_ld_v(s, ot, s->T0, s->A0);
6891             }
6892         } else {
6893             gen_op_mov_v_reg(s, ot, s->T0, rm);
6894         }
6895         /* load shift */
6896         val = x86_ldub_code(env, s);
6897         tcg_gen_movi_tl(s->T1, val);
6898         if (op < 4)
6899             goto unknown_op;
6900         op -= 4;
6901         goto bt_op;
6902     case 0x1a3: /* bt Gv, Ev */
6903         op = 0;
6904         goto do_btx;
6905     case 0x1ab: /* bts */
6906         op = 1;
6907         goto do_btx;
6908     case 0x1b3: /* btr */
6909         op = 2;
6910         goto do_btx;
6911     case 0x1bb: /* btc */
6912         op = 3;
6913     do_btx:
6914         ot = dflag;
6915         modrm = x86_ldub_code(env, s);
6916         reg = ((modrm >> 3) & 7) | REX_R(s);
6917         mod = (modrm >> 6) & 3;
6918         rm = (modrm & 7) | REX_B(s);
6919         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6920         if (mod != 3) {
6921             AddressParts a = gen_lea_modrm_0(env, s, modrm);
6922             /* specific case: we need to add a displacement */
6923             gen_exts(ot, s->T1);
6924             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6925             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6926             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6927             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6928             if (!(s->prefix & PREFIX_LOCK)) {
6929                 gen_op_ld_v(s, ot, s->T0, s->A0);
6930             }
6931         } else {
6932             gen_op_mov_v_reg(s, ot, s->T0, rm);
6933         }
6934     bt_op:
6935         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6936         tcg_gen_movi_tl(s->tmp0, 1);
6937         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6938         if (s->prefix & PREFIX_LOCK) {
6939             switch (op) {
6940             case 0: /* bt */
6941                 /* Needs no atomic ops; we surpressed the normal
6942                    memory load for LOCK above so do it now.  */
6943                 gen_op_ld_v(s, ot, s->T0, s->A0);
6944                 break;
6945             case 1: /* bts */
6946                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6947                                            s->mem_index, ot | MO_LE);
6948                 break;
6949             case 2: /* btr */
6950                 tcg_gen_not_tl(s->tmp0, s->tmp0);
6951                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6952                                             s->mem_index, ot | MO_LE);
6953                 break;
6954             default:
6955             case 3: /* btc */
6956                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6957                                             s->mem_index, ot | MO_LE);
6958                 break;
6959             }
6960             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6961         } else {
6962             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6963             switch (op) {
6964             case 0: /* bt */
6965                 /* Data already loaded; nothing to do.  */
6966                 break;
6967             case 1: /* bts */
6968                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6969                 break;
6970             case 2: /* btr */
6971                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6972                 break;
6973             default:
6974             case 3: /* btc */
6975                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
6976                 break;
6977             }
6978             if (op != 0) {
6979                 if (mod != 3) {
6980                     gen_op_st_v(s, ot, s->T0, s->A0);
6981                 } else {
6982                     gen_op_mov_reg_v(s, ot, rm, s->T0);
6983                 }
6984             }
6985         }
6986 
6987         /* Delay all CC updates until after the store above.  Note that
6988            C is the result of the test, Z is unchanged, and the others
6989            are all undefined.  */
6990         switch (s->cc_op) {
6991         case CC_OP_MULB ... CC_OP_MULQ:
6992         case CC_OP_ADDB ... CC_OP_ADDQ:
6993         case CC_OP_ADCB ... CC_OP_ADCQ:
6994         case CC_OP_SUBB ... CC_OP_SUBQ:
6995         case CC_OP_SBBB ... CC_OP_SBBQ:
6996         case CC_OP_LOGICB ... CC_OP_LOGICQ:
6997         case CC_OP_INCB ... CC_OP_INCQ:
6998         case CC_OP_DECB ... CC_OP_DECQ:
6999         case CC_OP_SHLB ... CC_OP_SHLQ:
7000         case CC_OP_SARB ... CC_OP_SARQ:
7001         case CC_OP_BMILGB ... CC_OP_BMILGQ:
7002             /* Z was going to be computed from the non-zero status of CC_DST.
7003                We can get that same Z value (and the new C value) by leaving
7004                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
7005                same width.  */
7006             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
7007             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
7008             break;
7009         default:
7010             /* Otherwise, generate EFLAGS and replace the C bit.  */
7011             gen_compute_eflags(s);
7012             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
7013                                ctz32(CC_C), 1);
7014             break;
7015         }
7016         break;
7017     case 0x1bc: /* bsf / tzcnt */
7018     case 0x1bd: /* bsr / lzcnt */
7019         ot = dflag;
7020         modrm = x86_ldub_code(env, s);
7021         reg = ((modrm >> 3) & 7) | REX_R(s);
7022         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7023         gen_extu(ot, s->T0);
7024 
7025         /* Note that lzcnt and tzcnt are in different extensions.  */
7026         if ((prefixes & PREFIX_REPZ)
7027             && (b & 1
7028                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
7029                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
7030             int size = 8 << ot;
7031             /* For lzcnt/tzcnt, C bit is defined related to the input. */
7032             tcg_gen_mov_tl(cpu_cc_src, s->T0);
7033             if (b & 1) {
7034                 /* For lzcnt, reduce the target_ulong result by the
7035                    number of zeros that we expect to find at the top.  */
7036                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
7037                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
7038             } else {
7039                 /* For tzcnt, a zero input must return the operand size.  */
7040                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
7041             }
7042             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
7043             gen_op_update1_cc(s);
7044             set_cc_op(s, CC_OP_BMILGB + ot);
7045         } else {
7046             /* For bsr/bsf, only the Z bit is defined and it is related
7047                to the input and not the result.  */
7048             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
7049             set_cc_op(s, CC_OP_LOGICB + ot);
7050 
7051             /* ??? The manual says that the output is undefined when the
7052                input is zero, but real hardware leaves it unchanged, and
7053                real programs appear to depend on that.  Accomplish this
7054                by passing the output as the value to return upon zero.  */
7055             if (b & 1) {
7056                 /* For bsr, return the bit index of the first 1 bit,
7057                    not the count of leading zeros.  */
7058                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
7059                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
7060                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
7061             } else {
7062                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
7063             }
7064         }
7065         gen_op_mov_reg_v(s, ot, reg, s->T0);
7066         break;
7067         /************************/
7068         /* bcd */
7069     case 0x27: /* daa */
7070         if (CODE64(s))
7071             goto illegal_op;
7072         gen_update_cc_op(s);
7073         gen_helper_daa(cpu_env);
7074         set_cc_op(s, CC_OP_EFLAGS);
7075         break;
7076     case 0x2f: /* das */
7077         if (CODE64(s))
7078             goto illegal_op;
7079         gen_update_cc_op(s);
7080         gen_helper_das(cpu_env);
7081         set_cc_op(s, CC_OP_EFLAGS);
7082         break;
7083     case 0x37: /* aaa */
7084         if (CODE64(s))
7085             goto illegal_op;
7086         gen_update_cc_op(s);
7087         gen_helper_aaa(cpu_env);
7088         set_cc_op(s, CC_OP_EFLAGS);
7089         break;
7090     case 0x3f: /* aas */
7091         if (CODE64(s))
7092             goto illegal_op;
7093         gen_update_cc_op(s);
7094         gen_helper_aas(cpu_env);
7095         set_cc_op(s, CC_OP_EFLAGS);
7096         break;
7097     case 0xd4: /* aam */
7098         if (CODE64(s))
7099             goto illegal_op;
7100         val = x86_ldub_code(env, s);
7101         if (val == 0) {
7102             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7103         } else {
7104             gen_helper_aam(cpu_env, tcg_const_i32(val));
7105             set_cc_op(s, CC_OP_LOGICB);
7106         }
7107         break;
7108     case 0xd5: /* aad */
7109         if (CODE64(s))
7110             goto illegal_op;
7111         val = x86_ldub_code(env, s);
7112         gen_helper_aad(cpu_env, tcg_const_i32(val));
7113         set_cc_op(s, CC_OP_LOGICB);
7114         break;
7115         /************************/
7116         /* misc */
7117     case 0x90: /* nop */
7118         /* XXX: correct lock test for all insn */
7119         if (prefixes & PREFIX_LOCK) {
7120             goto illegal_op;
7121         }
7122         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7123         if (REX_B(s)) {
7124             goto do_xchg_reg_eax;
7125         }
7126         if (prefixes & PREFIX_REPZ) {
7127             gen_update_cc_op(s);
7128             gen_jmp_im(s, pc_start - s->cs_base);
7129             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7130             s->base.is_jmp = DISAS_NORETURN;
7131         }
7132         break;
7133     case 0x9b: /* fwait */
7134         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7135             (HF_MP_MASK | HF_TS_MASK)) {
7136             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7137         } else {
7138             gen_helper_fwait(cpu_env);
7139         }
7140         break;
7141     case 0xcc: /* int3 */
7142         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7143         break;
7144     case 0xcd: /* int N */
7145         val = x86_ldub_code(env, s);
7146         if (check_vm86_iopl(s)) {
7147             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7148         }
7149         break;
7150     case 0xce: /* into */
7151         if (CODE64(s))
7152             goto illegal_op;
7153         gen_update_cc_op(s);
7154         gen_jmp_im(s, pc_start - s->cs_base);
7155         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7156         break;
7157 #ifdef WANT_ICEBP
7158     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7159         gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
7160         gen_debug(s);
7161         break;
7162 #endif
7163     case 0xfa: /* cli */
7164         if (check_iopl(s)) {
7165             gen_helper_cli(cpu_env);
7166         }
7167         break;
7168     case 0xfb: /* sti */
7169         if (check_iopl(s)) {
7170             gen_helper_sti(cpu_env);
7171             /* interruptions are enabled only the first insn after sti */
7172             gen_jmp_im(s, s->pc - s->cs_base);
7173             gen_eob_inhibit_irq(s, true);
7174         }
7175         break;
7176     case 0x62: /* bound */
7177         if (CODE64(s))
7178             goto illegal_op;
7179         ot = dflag;
7180         modrm = x86_ldub_code(env, s);
7181         reg = (modrm >> 3) & 7;
7182         mod = (modrm >> 6) & 3;
7183         if (mod == 3)
7184             goto illegal_op;
7185         gen_op_mov_v_reg(s, ot, s->T0, reg);
7186         gen_lea_modrm(env, s, modrm);
7187         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7188         if (ot == MO_16) {
7189             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7190         } else {
7191             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7192         }
7193         break;
7194     case 0x1c8 ... 0x1cf: /* bswap reg */
7195         reg = (b & 7) | REX_B(s);
7196 #ifdef TARGET_X86_64
7197         if (dflag == MO_64) {
7198             gen_op_mov_v_reg(s, MO_64, s->T0, reg);
7199             tcg_gen_bswap64_i64(s->T0, s->T0);
7200             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
7201         } else
7202 #endif
7203         {
7204             gen_op_mov_v_reg(s, MO_32, s->T0, reg);
7205             tcg_gen_ext32u_tl(s->T0, s->T0);
7206             tcg_gen_bswap32_tl(s->T0, s->T0);
7207             gen_op_mov_reg_v(s, MO_32, reg, s->T0);
7208         }
7209         break;
7210     case 0xd6: /* salc */
7211         if (CODE64(s))
7212             goto illegal_op;
7213         gen_compute_eflags_c(s, s->T0);
7214         tcg_gen_neg_tl(s->T0, s->T0);
7215         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7216         break;
7217     case 0xe0: /* loopnz */
7218     case 0xe1: /* loopz */
7219     case 0xe2: /* loop */
7220     case 0xe3: /* jecxz */
7221         {
7222             TCGLabel *l1, *l2, *l3;
7223 
7224             tval = (int8_t)insn_get(env, s, MO_8);
7225             next_eip = s->pc - s->cs_base;
7226             tval += next_eip;
7227             if (dflag == MO_16) {
7228                 tval &= 0xffff;
7229             }
7230 
7231             l1 = gen_new_label();
7232             l2 = gen_new_label();
7233             l3 = gen_new_label();
7234             gen_update_cc_op(s);
7235             b &= 3;
7236             switch(b) {
7237             case 0: /* loopnz */
7238             case 1: /* loopz */
7239                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7240                 gen_op_jz_ecx(s, s->aflag, l3);
7241                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7242                 break;
7243             case 2: /* loop */
7244                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7245                 gen_op_jnz_ecx(s, s->aflag, l1);
7246                 break;
7247             default:
7248             case 3: /* jcxz */
7249                 gen_op_jz_ecx(s, s->aflag, l1);
7250                 break;
7251             }
7252 
7253             gen_set_label(l3);
7254             gen_jmp_im(s, next_eip);
7255             tcg_gen_br(l2);
7256 
7257             gen_set_label(l1);
7258             gen_jmp_im(s, tval);
7259             gen_set_label(l2);
7260             gen_eob(s);
7261         }
7262         break;
7263     case 0x130: /* wrmsr */
7264     case 0x132: /* rdmsr */
7265         if (check_cpl0(s)) {
7266             gen_update_cc_op(s);
7267             gen_jmp_im(s, pc_start - s->cs_base);
7268             if (b & 2) {
7269                 gen_helper_rdmsr(cpu_env);
7270             } else {
7271                 gen_helper_wrmsr(cpu_env);
7272                 gen_jmp_im(s, s->pc - s->cs_base);
7273                 gen_eob(s);
7274             }
7275         }
7276         break;
7277     case 0x131: /* rdtsc */
7278         gen_update_cc_op(s);
7279         gen_jmp_im(s, pc_start - s->cs_base);
7280         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7281             gen_io_start();
7282         }
7283         gen_helper_rdtsc(cpu_env);
7284         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7285             gen_jmp(s, s->pc - s->cs_base);
7286         }
7287         break;
7288     case 0x133: /* rdpmc */
7289         gen_update_cc_op(s);
7290         gen_jmp_im(s, pc_start - s->cs_base);
7291         gen_helper_rdpmc(cpu_env);
7292         s->base.is_jmp = DISAS_NORETURN;
7293         break;
7294     case 0x134: /* sysenter */
7295         /* For Intel SYSENTER is valid on 64-bit */
7296         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7297             goto illegal_op;
7298         if (!PE(s)) {
7299             gen_exception_gpf(s);
7300         } else {
7301             gen_helper_sysenter(cpu_env);
7302             gen_eob(s);
7303         }
7304         break;
7305     case 0x135: /* sysexit */
7306         /* For Intel SYSEXIT is valid on 64-bit */
7307         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7308             goto illegal_op;
7309         if (!PE(s)) {
7310             gen_exception_gpf(s);
7311         } else {
7312             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7313             gen_eob(s);
7314         }
7315         break;
7316 #ifdef TARGET_X86_64
7317     case 0x105: /* syscall */
7318         /* XXX: is it usable in real mode ? */
7319         gen_update_cc_op(s);
7320         gen_jmp_im(s, pc_start - s->cs_base);
7321         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7322         /* TF handling for the syscall insn is different. The TF bit is  checked
7323            after the syscall insn completes. This allows #DB to not be
7324            generated after one has entered CPL0 if TF is set in FMASK.  */
7325         gen_eob_worker(s, false, true);
7326         break;
7327     case 0x107: /* sysret */
7328         if (!PE(s)) {
7329             gen_exception_gpf(s);
7330         } else {
7331             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7332             /* condition codes are modified only in long mode */
7333             if (LMA(s)) {
7334                 set_cc_op(s, CC_OP_EFLAGS);
7335             }
7336             /* TF handling for the sysret insn is different. The TF bit is
7337                checked after the sysret insn completes. This allows #DB to be
7338                generated "as if" the syscall insn in userspace has just
7339                completed.  */
7340             gen_eob_worker(s, false, true);
7341         }
7342         break;
7343 #endif
7344     case 0x1a2: /* cpuid */
7345         gen_update_cc_op(s);
7346         gen_jmp_im(s, pc_start - s->cs_base);
7347         gen_helper_cpuid(cpu_env);
7348         break;
7349     case 0xf4: /* hlt */
7350         if (check_cpl0(s)) {
7351             gen_update_cc_op(s);
7352             gen_jmp_im(s, pc_start - s->cs_base);
7353             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7354             s->base.is_jmp = DISAS_NORETURN;
7355         }
7356         break;
7357     case 0x100:
7358         modrm = x86_ldub_code(env, s);
7359         mod = (modrm >> 6) & 3;
7360         op = (modrm >> 3) & 7;
7361         switch(op) {
7362         case 0: /* sldt */
7363             if (!PE(s) || VM86(s))
7364                 goto illegal_op;
7365             gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
7366             tcg_gen_ld32u_tl(s->T0, cpu_env,
7367                              offsetof(CPUX86State, ldt.selector));
7368             ot = mod == 3 ? dflag : MO_16;
7369             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7370             break;
7371         case 2: /* lldt */
7372             if (!PE(s) || VM86(s))
7373                 goto illegal_op;
7374             if (check_cpl0(s)) {
7375                 gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
7376                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7377                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7378                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7379             }
7380             break;
7381         case 1: /* str */
7382             if (!PE(s) || VM86(s))
7383                 goto illegal_op;
7384             gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
7385             tcg_gen_ld32u_tl(s->T0, cpu_env,
7386                              offsetof(CPUX86State, tr.selector));
7387             ot = mod == 3 ? dflag : MO_16;
7388             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7389             break;
7390         case 3: /* ltr */
7391             if (!PE(s) || VM86(s))
7392                 goto illegal_op;
7393             if (check_cpl0(s)) {
7394                 gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
7395                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7396                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7397                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7398             }
7399             break;
7400         case 4: /* verr */
7401         case 5: /* verw */
7402             if (!PE(s) || VM86(s))
7403                 goto illegal_op;
7404             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7405             gen_update_cc_op(s);
7406             if (op == 4) {
7407                 gen_helper_verr(cpu_env, s->T0);
7408             } else {
7409                 gen_helper_verw(cpu_env, s->T0);
7410             }
7411             set_cc_op(s, CC_OP_EFLAGS);
7412             break;
7413         default:
7414             goto unknown_op;
7415         }
7416         break;
7417 
7418     case 0x101:
7419         modrm = x86_ldub_code(env, s);
7420         switch (modrm) {
7421         CASE_MODRM_MEM_OP(0): /* sgdt */
7422             gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
7423             gen_lea_modrm(env, s, modrm);
7424             tcg_gen_ld32u_tl(s->T0,
7425                              cpu_env, offsetof(CPUX86State, gdt.limit));
7426             gen_op_st_v(s, MO_16, s->T0, s->A0);
7427             gen_add_A0_im(s, 2);
7428             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7429             if (dflag == MO_16) {
7430                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7431             }
7432             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7433             break;
7434 
7435         case 0xc8: /* monitor */
7436             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7437                 goto illegal_op;
7438             }
7439             gen_update_cc_op(s);
7440             gen_jmp_im(s, pc_start - s->cs_base);
7441             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7442             gen_extu(s->aflag, s->A0);
7443             gen_add_A0_ds_seg(s);
7444             gen_helper_monitor(cpu_env, s->A0);
7445             break;
7446 
7447         case 0xc9: /* mwait */
7448             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7449                 goto illegal_op;
7450             }
7451             gen_update_cc_op(s);
7452             gen_jmp_im(s, pc_start - s->cs_base);
7453             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7454             s->base.is_jmp = DISAS_NORETURN;
7455             break;
7456 
7457         case 0xca: /* clac */
7458             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7459                 || CPL(s) != 0) {
7460                 goto illegal_op;
7461             }
7462             gen_helper_clac(cpu_env);
7463             gen_jmp_im(s, s->pc - s->cs_base);
7464             gen_eob(s);
7465             break;
7466 
7467         case 0xcb: /* stac */
7468             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7469                 || CPL(s) != 0) {
7470                 goto illegal_op;
7471             }
7472             gen_helper_stac(cpu_env);
7473             gen_jmp_im(s, s->pc - s->cs_base);
7474             gen_eob(s);
7475             break;
7476 
7477         CASE_MODRM_MEM_OP(1): /* sidt */
7478             gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
7479             gen_lea_modrm(env, s, modrm);
7480             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7481             gen_op_st_v(s, MO_16, s->T0, s->A0);
7482             gen_add_A0_im(s, 2);
7483             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7484             if (dflag == MO_16) {
7485                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7486             }
7487             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7488             break;
7489 
7490         case 0xd0: /* xgetbv */
7491             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7492                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7493                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7494                 goto illegal_op;
7495             }
7496             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7497             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7498             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7499             break;
7500 
7501         case 0xd1: /* xsetbv */
7502             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7503                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7504                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7505                 goto illegal_op;
7506             }
7507             if (!check_cpl0(s)) {
7508                 break;
7509             }
7510             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7511                                   cpu_regs[R_EDX]);
7512             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7513             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7514             /* End TB because translation flags may change.  */
7515             gen_jmp_im(s, s->pc - s->cs_base);
7516             gen_eob(s);
7517             break;
7518 
7519         case 0xd8: /* VMRUN */
7520             if (!SVME(s) || !PE(s)) {
7521                 goto illegal_op;
7522             }
7523             if (!check_cpl0(s)) {
7524                 break;
7525             }
7526             gen_update_cc_op(s);
7527             gen_jmp_im(s, pc_start - s->cs_base);
7528             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7529                              tcg_const_i32(s->pc - pc_start));
7530             tcg_gen_exit_tb(NULL, 0);
7531             s->base.is_jmp = DISAS_NORETURN;
7532             break;
7533 
7534         case 0xd9: /* VMMCALL */
7535             if (!SVME(s)) {
7536                 goto illegal_op;
7537             }
7538             gen_update_cc_op(s);
7539             gen_jmp_im(s, pc_start - s->cs_base);
7540             gen_helper_vmmcall(cpu_env);
7541             break;
7542 
7543         case 0xda: /* VMLOAD */
7544             if (!SVME(s) || !PE(s)) {
7545                 goto illegal_op;
7546             }
7547             if (!check_cpl0(s)) {
7548                 break;
7549             }
7550             gen_update_cc_op(s);
7551             gen_jmp_im(s, pc_start - s->cs_base);
7552             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7553             break;
7554 
7555         case 0xdb: /* VMSAVE */
7556             if (!SVME(s) || !PE(s)) {
7557                 goto illegal_op;
7558             }
7559             if (!check_cpl0(s)) {
7560                 break;
7561             }
7562             gen_update_cc_op(s);
7563             gen_jmp_im(s, pc_start - s->cs_base);
7564             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7565             break;
7566 
7567         case 0xdc: /* STGI */
7568             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7569                 || !PE(s)) {
7570                 goto illegal_op;
7571             }
7572             if (!check_cpl0(s)) {
7573                 break;
7574             }
7575             gen_update_cc_op(s);
7576             gen_helper_stgi(cpu_env);
7577             gen_jmp_im(s, s->pc - s->cs_base);
7578             gen_eob(s);
7579             break;
7580 
7581         case 0xdd: /* CLGI */
7582             if (!SVME(s) || !PE(s)) {
7583                 goto illegal_op;
7584             }
7585             if (!check_cpl0(s)) {
7586                 break;
7587             }
7588             gen_update_cc_op(s);
7589             gen_jmp_im(s, pc_start - s->cs_base);
7590             gen_helper_clgi(cpu_env);
7591             break;
7592 
7593         case 0xde: /* SKINIT */
7594             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7595                 || !PE(s)) {
7596                 goto illegal_op;
7597             }
7598             gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
7599             /* If not intercepted, not implemented -- raise #UD. */
7600             goto illegal_op;
7601 
7602         case 0xdf: /* INVLPGA */
7603             if (!SVME(s) || !PE(s)) {
7604                 goto illegal_op;
7605             }
7606             if (!check_cpl0(s)) {
7607                 break;
7608             }
7609             gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
7610             if (s->aflag == MO_64) {
7611                 tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7612             } else {
7613                 tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
7614             }
7615             gen_helper_flush_page(cpu_env, s->A0);
7616             gen_jmp_im(s, s->pc - s->cs_base);
7617             gen_eob(s);
7618             break;
7619 
7620         CASE_MODRM_MEM_OP(2): /* lgdt */
7621             if (!check_cpl0(s)) {
7622                 break;
7623             }
7624             gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
7625             gen_lea_modrm(env, s, modrm);
7626             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7627             gen_add_A0_im(s, 2);
7628             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7629             if (dflag == MO_16) {
7630                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7631             }
7632             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7633             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7634             break;
7635 
7636         CASE_MODRM_MEM_OP(3): /* lidt */
7637             if (!check_cpl0(s)) {
7638                 break;
7639             }
7640             gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
7641             gen_lea_modrm(env, s, modrm);
7642             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7643             gen_add_A0_im(s, 2);
7644             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7645             if (dflag == MO_16) {
7646                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7647             }
7648             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7649             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7650             break;
7651 
7652         CASE_MODRM_OP(4): /* smsw */
7653             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
7654             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7655             /*
7656              * In 32-bit mode, the higher 16 bits of the destination
7657              * register are undefined.  In practice CR0[31:0] is stored
7658              * just like in 64-bit mode.
7659              */
7660             mod = (modrm >> 6) & 3;
7661             ot = (mod != 3 ? MO_16 : s->dflag);
7662             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7663             break;
7664         case 0xee: /* rdpkru */
7665             if (prefixes & PREFIX_LOCK) {
7666                 goto illegal_op;
7667             }
7668             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7669             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7670             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7671             break;
7672         case 0xef: /* wrpkru */
7673             if (prefixes & PREFIX_LOCK) {
7674                 goto illegal_op;
7675             }
7676             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7677                                   cpu_regs[R_EDX]);
7678             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7679             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7680             break;
7681 
7682         CASE_MODRM_OP(6): /* lmsw */
7683             if (!check_cpl0(s)) {
7684                 break;
7685             }
7686             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
7687             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7688             /*
7689              * Only the 4 lower bits of CR0 are modified.
7690              * PE cannot be set to zero if already set to one.
7691              */
7692             tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
7693             tcg_gen_andi_tl(s->T0, s->T0, 0xf);
7694             tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
7695             tcg_gen_or_tl(s->T0, s->T0, s->T1);
7696             gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
7697             gen_jmp_im(s, s->pc - s->cs_base);
7698             gen_eob(s);
7699             break;
7700 
7701         CASE_MODRM_MEM_OP(7): /* invlpg */
7702             if (!check_cpl0(s)) {
7703                 break;
7704             }
7705             gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
7706             gen_lea_modrm(env, s, modrm);
7707             gen_helper_flush_page(cpu_env, s->A0);
7708             gen_jmp_im(s, s->pc - s->cs_base);
7709             gen_eob(s);
7710             break;
7711 
7712         case 0xf8: /* swapgs */
7713 #ifdef TARGET_X86_64
7714             if (CODE64(s)) {
7715                 if (check_cpl0(s)) {
7716                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7717                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7718                                   offsetof(CPUX86State, kernelgsbase));
7719                     tcg_gen_st_tl(s->T0, cpu_env,
7720                                   offsetof(CPUX86State, kernelgsbase));
7721                 }
7722                 break;
7723             }
7724 #endif
7725             goto illegal_op;
7726 
7727         case 0xf9: /* rdtscp */
7728             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7729                 goto illegal_op;
7730             }
7731             gen_update_cc_op(s);
7732             gen_jmp_im(s, pc_start - s->cs_base);
7733             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7734                 gen_io_start();
7735             }
7736             gen_helper_rdtscp(cpu_env);
7737             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7738                 gen_jmp(s, s->pc - s->cs_base);
7739             }
7740             break;
7741 
7742         default:
7743             goto unknown_op;
7744         }
7745         break;
7746 
7747     case 0x108: /* invd */
7748     case 0x109: /* wbinvd */
7749         if (check_cpl0(s)) {
7750             gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7751             /* nothing to do */
7752         }
7753         break;
7754     case 0x63: /* arpl or movslS (x86_64) */
7755 #ifdef TARGET_X86_64
7756         if (CODE64(s)) {
7757             int d_ot;
7758             /* d_ot is the size of destination */
7759             d_ot = dflag;
7760 
7761             modrm = x86_ldub_code(env, s);
7762             reg = ((modrm >> 3) & 7) | REX_R(s);
7763             mod = (modrm >> 6) & 3;
7764             rm = (modrm & 7) | REX_B(s);
7765 
7766             if (mod == 3) {
7767                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7768                 /* sign extend */
7769                 if (d_ot == MO_64) {
7770                     tcg_gen_ext32s_tl(s->T0, s->T0);
7771                 }
7772                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7773             } else {
7774                 gen_lea_modrm(env, s, modrm);
7775                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7776                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7777             }
7778         } else
7779 #endif
7780         {
7781             TCGLabel *label1;
7782             TCGv t0, t1, t2, a0;
7783 
7784             if (!PE(s) || VM86(s))
7785                 goto illegal_op;
7786             t0 = tcg_temp_local_new();
7787             t1 = tcg_temp_local_new();
7788             t2 = tcg_temp_local_new();
7789             ot = MO_16;
7790             modrm = x86_ldub_code(env, s);
7791             reg = (modrm >> 3) & 7;
7792             mod = (modrm >> 6) & 3;
7793             rm = modrm & 7;
7794             if (mod != 3) {
7795                 gen_lea_modrm(env, s, modrm);
7796                 gen_op_ld_v(s, ot, t0, s->A0);
7797                 a0 = tcg_temp_local_new();
7798                 tcg_gen_mov_tl(a0, s->A0);
7799             } else {
7800                 gen_op_mov_v_reg(s, ot, t0, rm);
7801                 a0 = NULL;
7802             }
7803             gen_op_mov_v_reg(s, ot, t1, reg);
7804             tcg_gen_andi_tl(s->tmp0, t0, 3);
7805             tcg_gen_andi_tl(t1, t1, 3);
7806             tcg_gen_movi_tl(t2, 0);
7807             label1 = gen_new_label();
7808             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7809             tcg_gen_andi_tl(t0, t0, ~3);
7810             tcg_gen_or_tl(t0, t0, t1);
7811             tcg_gen_movi_tl(t2, CC_Z);
7812             gen_set_label(label1);
7813             if (mod != 3) {
7814                 gen_op_st_v(s, ot, t0, a0);
7815                 tcg_temp_free(a0);
7816            } else {
7817                 gen_op_mov_reg_v(s, ot, rm, t0);
7818             }
7819             gen_compute_eflags(s);
7820             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7821             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7822             tcg_temp_free(t0);
7823             tcg_temp_free(t1);
7824             tcg_temp_free(t2);
7825         }
7826         break;
7827     case 0x102: /* lar */
7828     case 0x103: /* lsl */
7829         {
7830             TCGLabel *label1;
7831             TCGv t0;
7832             if (!PE(s) || VM86(s))
7833                 goto illegal_op;
7834             ot = dflag != MO_16 ? MO_32 : MO_16;
7835             modrm = x86_ldub_code(env, s);
7836             reg = ((modrm >> 3) & 7) | REX_R(s);
7837             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7838             t0 = tcg_temp_local_new();
7839             gen_update_cc_op(s);
7840             if (b == 0x102) {
7841                 gen_helper_lar(t0, cpu_env, s->T0);
7842             } else {
7843                 gen_helper_lsl(t0, cpu_env, s->T0);
7844             }
7845             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7846             label1 = gen_new_label();
7847             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7848             gen_op_mov_reg_v(s, ot, reg, t0);
7849             gen_set_label(label1);
7850             set_cc_op(s, CC_OP_EFLAGS);
7851             tcg_temp_free(t0);
7852         }
7853         break;
7854     case 0x118:
7855         modrm = x86_ldub_code(env, s);
7856         mod = (modrm >> 6) & 3;
7857         op = (modrm >> 3) & 7;
7858         switch(op) {
7859         case 0: /* prefetchnta */
7860         case 1: /* prefetchnt0 */
7861         case 2: /* prefetchnt0 */
7862         case 3: /* prefetchnt0 */
7863             if (mod == 3)
7864                 goto illegal_op;
7865             gen_nop_modrm(env, s, modrm);
7866             /* nothing more to do */
7867             break;
7868         default: /* nop (multi byte) */
7869             gen_nop_modrm(env, s, modrm);
7870             break;
7871         }
7872         break;
7873     case 0x11a:
7874         modrm = x86_ldub_code(env, s);
7875         if (s->flags & HF_MPX_EN_MASK) {
7876             mod = (modrm >> 6) & 3;
7877             reg = ((modrm >> 3) & 7) | REX_R(s);
7878             if (prefixes & PREFIX_REPZ) {
7879                 /* bndcl */
7880                 if (reg >= 4
7881                     || (prefixes & PREFIX_LOCK)
7882                     || s->aflag == MO_16) {
7883                     goto illegal_op;
7884                 }
7885                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7886             } else if (prefixes & PREFIX_REPNZ) {
7887                 /* bndcu */
7888                 if (reg >= 4
7889                     || (prefixes & PREFIX_LOCK)
7890                     || s->aflag == MO_16) {
7891                     goto illegal_op;
7892                 }
7893                 TCGv_i64 notu = tcg_temp_new_i64();
7894                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7895                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7896                 tcg_temp_free_i64(notu);
7897             } else if (prefixes & PREFIX_DATA) {
7898                 /* bndmov -- from reg/mem */
7899                 if (reg >= 4 || s->aflag == MO_16) {
7900                     goto illegal_op;
7901                 }
7902                 if (mod == 3) {
7903                     int reg2 = (modrm & 7) | REX_B(s);
7904                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7905                         goto illegal_op;
7906                     }
7907                     if (s->flags & HF_MPX_IU_MASK) {
7908                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7909                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7910                     }
7911                 } else {
7912                     gen_lea_modrm(env, s, modrm);
7913                     if (CODE64(s)) {
7914                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7915                                             s->mem_index, MO_LEQ);
7916                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7917                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7918                                             s->mem_index, MO_LEQ);
7919                     } else {
7920                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7921                                             s->mem_index, MO_LEUL);
7922                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7923                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7924                                             s->mem_index, MO_LEUL);
7925                     }
7926                     /* bnd registers are now in-use */
7927                     gen_set_hflag(s, HF_MPX_IU_MASK);
7928                 }
7929             } else if (mod != 3) {
7930                 /* bndldx */
7931                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7932                 if (reg >= 4
7933                     || (prefixes & PREFIX_LOCK)
7934                     || s->aflag == MO_16
7935                     || a.base < -1) {
7936                     goto illegal_op;
7937                 }
7938                 if (a.base >= 0) {
7939                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7940                 } else {
7941                     tcg_gen_movi_tl(s->A0, 0);
7942                 }
7943                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7944                 if (a.index >= 0) {
7945                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7946                 } else {
7947                     tcg_gen_movi_tl(s->T0, 0);
7948                 }
7949                 if (CODE64(s)) {
7950                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7951                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7952                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7953                 } else {
7954                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7955                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7956                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7957                 }
7958                 gen_set_hflag(s, HF_MPX_IU_MASK);
7959             }
7960         }
7961         gen_nop_modrm(env, s, modrm);
7962         break;
7963     case 0x11b:
7964         modrm = x86_ldub_code(env, s);
7965         if (s->flags & HF_MPX_EN_MASK) {
7966             mod = (modrm >> 6) & 3;
7967             reg = ((modrm >> 3) & 7) | REX_R(s);
7968             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
7969                 /* bndmk */
7970                 if (reg >= 4
7971                     || (prefixes & PREFIX_LOCK)
7972                     || s->aflag == MO_16) {
7973                     goto illegal_op;
7974                 }
7975                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7976                 if (a.base >= 0) {
7977                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
7978                     if (!CODE64(s)) {
7979                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
7980                     }
7981                 } else if (a.base == -1) {
7982                     /* no base register has lower bound of 0 */
7983                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
7984                 } else {
7985                     /* rip-relative generates #ud */
7986                     goto illegal_op;
7987                 }
7988                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
7989                 if (!CODE64(s)) {
7990                     tcg_gen_ext32u_tl(s->A0, s->A0);
7991                 }
7992                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
7993                 /* bnd registers are now in-use */
7994                 gen_set_hflag(s, HF_MPX_IU_MASK);
7995                 break;
7996             } else if (prefixes & PREFIX_REPNZ) {
7997                 /* bndcn */
7998                 if (reg >= 4
7999                     || (prefixes & PREFIX_LOCK)
8000                     || s->aflag == MO_16) {
8001                     goto illegal_op;
8002                 }
8003                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
8004             } else if (prefixes & PREFIX_DATA) {
8005                 /* bndmov -- to reg/mem */
8006                 if (reg >= 4 || s->aflag == MO_16) {
8007                     goto illegal_op;
8008                 }
8009                 if (mod == 3) {
8010                     int reg2 = (modrm & 7) | REX_B(s);
8011                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8012                         goto illegal_op;
8013                     }
8014                     if (s->flags & HF_MPX_IU_MASK) {
8015                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
8016                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
8017                     }
8018                 } else {
8019                     gen_lea_modrm(env, s, modrm);
8020                     if (CODE64(s)) {
8021                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8022                                             s->mem_index, MO_LEQ);
8023                         tcg_gen_addi_tl(s->A0, s->A0, 8);
8024                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8025                                             s->mem_index, MO_LEQ);
8026                     } else {
8027                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8028                                             s->mem_index, MO_LEUL);
8029                         tcg_gen_addi_tl(s->A0, s->A0, 4);
8030                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8031                                             s->mem_index, MO_LEUL);
8032                     }
8033                 }
8034             } else if (mod != 3) {
8035                 /* bndstx */
8036                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8037                 if (reg >= 4
8038                     || (prefixes & PREFIX_LOCK)
8039                     || s->aflag == MO_16
8040                     || a.base < -1) {
8041                     goto illegal_op;
8042                 }
8043                 if (a.base >= 0) {
8044                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8045                 } else {
8046                     tcg_gen_movi_tl(s->A0, 0);
8047                 }
8048                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8049                 if (a.index >= 0) {
8050                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8051                 } else {
8052                     tcg_gen_movi_tl(s->T0, 0);
8053                 }
8054                 if (CODE64(s)) {
8055                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
8056                                         cpu_bndl[reg], cpu_bndu[reg]);
8057                 } else {
8058                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
8059                                         cpu_bndl[reg], cpu_bndu[reg]);
8060                 }
8061             }
8062         }
8063         gen_nop_modrm(env, s, modrm);
8064         break;
8065     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8066         modrm = x86_ldub_code(env, s);
8067         gen_nop_modrm(env, s, modrm);
8068         break;
8069 
8070     case 0x120: /* mov reg, crN */
8071     case 0x122: /* mov crN, reg */
8072         if (!check_cpl0(s)) {
8073             break;
8074         }
8075         modrm = x86_ldub_code(env, s);
8076         /*
8077          * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8078          * AMD documentation (24594.pdf) and testing of Intel 386 and 486
8079          * processors all show that the mod bits are assumed to be 1's,
8080          * regardless of actual values.
8081          */
8082         rm = (modrm & 7) | REX_B(s);
8083         reg = ((modrm >> 3) & 7) | REX_R(s);
8084         switch (reg) {
8085         case 0:
8086             if ((prefixes & PREFIX_LOCK) &&
8087                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8088                 reg = 8;
8089             }
8090             break;
8091         case 2:
8092         case 3:
8093         case 4:
8094             break;
8095         default:
8096             goto unknown_op;
8097         }
8098         ot  = (CODE64(s) ? MO_64 : MO_32);
8099 
8100         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8101             gen_io_start();
8102         }
8103         if (b & 2) {
8104             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
8105             gen_op_mov_v_reg(s, ot, s->T0, rm);
8106             gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
8107             gen_jmp_im(s, s->pc - s->cs_base);
8108             gen_eob(s);
8109         } else {
8110             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
8111             gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
8112             gen_op_mov_reg_v(s, ot, rm, s->T0);
8113             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8114                 gen_jmp(s, s->pc - s->cs_base);
8115             }
8116         }
8117         break;
8118 
8119     case 0x121: /* mov reg, drN */
8120     case 0x123: /* mov drN, reg */
8121         if (check_cpl0(s)) {
8122             modrm = x86_ldub_code(env, s);
8123             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8124              * AMD documentation (24594.pdf) and testing of
8125              * intel 386 and 486 processors all show that the mod bits
8126              * are assumed to be 1's, regardless of actual values.
8127              */
8128             rm = (modrm & 7) | REX_B(s);
8129             reg = ((modrm >> 3) & 7) | REX_R(s);
8130             if (CODE64(s))
8131                 ot = MO_64;
8132             else
8133                 ot = MO_32;
8134             if (reg >= 8) {
8135                 goto illegal_op;
8136             }
8137             if (b & 2) {
8138                 gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
8139                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8140                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8141                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8142                 gen_jmp_im(s, s->pc - s->cs_base);
8143                 gen_eob(s);
8144             } else {
8145                 gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
8146                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8147                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8148                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8149             }
8150         }
8151         break;
8152     case 0x106: /* clts */
8153         if (check_cpl0(s)) {
8154             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
8155             gen_helper_clts(cpu_env);
8156             /* abort block because static cpu state changed */
8157             gen_jmp_im(s, s->pc - s->cs_base);
8158             gen_eob(s);
8159         }
8160         break;
8161     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8162     case 0x1c3: /* MOVNTI reg, mem */
8163         if (!(s->cpuid_features & CPUID_SSE2))
8164             goto illegal_op;
8165         ot = mo_64_32(dflag);
8166         modrm = x86_ldub_code(env, s);
8167         mod = (modrm >> 6) & 3;
8168         if (mod == 3)
8169             goto illegal_op;
8170         reg = ((modrm >> 3) & 7) | REX_R(s);
8171         /* generate a generic store */
8172         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8173         break;
8174     case 0x1ae:
8175         modrm = x86_ldub_code(env, s);
8176         switch (modrm) {
8177         CASE_MODRM_MEM_OP(0): /* fxsave */
8178             if (!(s->cpuid_features & CPUID_FXSR)
8179                 || (prefixes & PREFIX_LOCK)) {
8180                 goto illegal_op;
8181             }
8182             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8183                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8184                 break;
8185             }
8186             gen_lea_modrm(env, s, modrm);
8187             gen_helper_fxsave(cpu_env, s->A0);
8188             break;
8189 
8190         CASE_MODRM_MEM_OP(1): /* fxrstor */
8191             if (!(s->cpuid_features & CPUID_FXSR)
8192                 || (prefixes & PREFIX_LOCK)) {
8193                 goto illegal_op;
8194             }
8195             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8196                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8197                 break;
8198             }
8199             gen_lea_modrm(env, s, modrm);
8200             gen_helper_fxrstor(cpu_env, s->A0);
8201             break;
8202 
8203         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8204             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8205                 goto illegal_op;
8206             }
8207             if (s->flags & HF_TS_MASK) {
8208                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8209                 break;
8210             }
8211             gen_lea_modrm(env, s, modrm);
8212             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8213             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8214             break;
8215 
8216         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8217             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8218                 goto illegal_op;
8219             }
8220             if (s->flags & HF_TS_MASK) {
8221                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8222                 break;
8223             }
8224             gen_helper_update_mxcsr(cpu_env);
8225             gen_lea_modrm(env, s, modrm);
8226             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8227             gen_op_st_v(s, MO_32, s->T0, s->A0);
8228             break;
8229 
8230         CASE_MODRM_MEM_OP(4): /* xsave */
8231             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8232                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8233                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8234                 goto illegal_op;
8235             }
8236             gen_lea_modrm(env, s, modrm);
8237             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8238                                   cpu_regs[R_EDX]);
8239             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8240             break;
8241 
8242         CASE_MODRM_MEM_OP(5): /* xrstor */
8243             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8244                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8245                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8246                 goto illegal_op;
8247             }
8248             gen_lea_modrm(env, s, modrm);
8249             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8250                                   cpu_regs[R_EDX]);
8251             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8252             /* XRSTOR is how MPX is enabled, which changes how
8253                we translate.  Thus we need to end the TB.  */
8254             gen_update_cc_op(s);
8255             gen_jmp_im(s, s->pc - s->cs_base);
8256             gen_eob(s);
8257             break;
8258 
8259         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8260             if (prefixes & PREFIX_LOCK) {
8261                 goto illegal_op;
8262             }
8263             if (prefixes & PREFIX_DATA) {
8264                 /* clwb */
8265                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8266                     goto illegal_op;
8267                 }
8268                 gen_nop_modrm(env, s, modrm);
8269             } else {
8270                 /* xsaveopt */
8271                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8272                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8273                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8274                     goto illegal_op;
8275                 }
8276                 gen_lea_modrm(env, s, modrm);
8277                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8278                                       cpu_regs[R_EDX]);
8279                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8280             }
8281             break;
8282 
8283         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8284             if (prefixes & PREFIX_LOCK) {
8285                 goto illegal_op;
8286             }
8287             if (prefixes & PREFIX_DATA) {
8288                 /* clflushopt */
8289                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8290                     goto illegal_op;
8291                 }
8292             } else {
8293                 /* clflush */
8294                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8295                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8296                     goto illegal_op;
8297                 }
8298             }
8299             gen_nop_modrm(env, s, modrm);
8300             break;
8301 
8302         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8303         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8304         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8305         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8306             if (CODE64(s)
8307                 && (prefixes & PREFIX_REPZ)
8308                 && !(prefixes & PREFIX_LOCK)
8309                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8310                 TCGv base, treg, src, dst;
8311 
8312                 /* Preserve hflags bits by testing CR4 at runtime.  */
8313                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8314                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8315 
8316                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8317                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8318 
8319                 if (modrm & 0x10) {
8320                     /* wr*base */
8321                     dst = base, src = treg;
8322                 } else {
8323                     /* rd*base */
8324                     dst = treg, src = base;
8325                 }
8326 
8327                 if (s->dflag == MO_32) {
8328                     tcg_gen_ext32u_tl(dst, src);
8329                 } else {
8330                     tcg_gen_mov_tl(dst, src);
8331                 }
8332                 break;
8333             }
8334             goto unknown_op;
8335 
8336         case 0xf8: /* sfence / pcommit */
8337             if (prefixes & PREFIX_DATA) {
8338                 /* pcommit */
8339                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8340                     || (prefixes & PREFIX_LOCK)) {
8341                     goto illegal_op;
8342                 }
8343                 break;
8344             }
8345             /* fallthru */
8346         case 0xf9 ... 0xff: /* sfence */
8347             if (!(s->cpuid_features & CPUID_SSE)
8348                 || (prefixes & PREFIX_LOCK)) {
8349                 goto illegal_op;
8350             }
8351             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8352             break;
8353         case 0xe8 ... 0xef: /* lfence */
8354             if (!(s->cpuid_features & CPUID_SSE)
8355                 || (prefixes & PREFIX_LOCK)) {
8356                 goto illegal_op;
8357             }
8358             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8359             break;
8360         case 0xf0 ... 0xf7: /* mfence */
8361             if (!(s->cpuid_features & CPUID_SSE2)
8362                 || (prefixes & PREFIX_LOCK)) {
8363                 goto illegal_op;
8364             }
8365             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8366             break;
8367 
8368         default:
8369             goto unknown_op;
8370         }
8371         break;
8372 
8373     case 0x10d: /* 3DNow! prefetch(w) */
8374         modrm = x86_ldub_code(env, s);
8375         mod = (modrm >> 6) & 3;
8376         if (mod == 3)
8377             goto illegal_op;
8378         gen_nop_modrm(env, s, modrm);
8379         break;
8380     case 0x1aa: /* rsm */
8381         gen_svm_check_intercept(s, SVM_EXIT_RSM);
8382         if (!(s->flags & HF_SMM_MASK))
8383             goto illegal_op;
8384 #ifdef CONFIG_USER_ONLY
8385         /* we should not be in SMM mode */
8386         g_assert_not_reached();
8387 #else
8388         gen_update_cc_op(s);
8389         gen_jmp_im(s, s->pc - s->cs_base);
8390         gen_helper_rsm(cpu_env);
8391 #endif /* CONFIG_USER_ONLY */
8392         gen_eob(s);
8393         break;
8394     case 0x1b8: /* SSE4.2 popcnt */
8395         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8396              PREFIX_REPZ)
8397             goto illegal_op;
8398         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8399             goto illegal_op;
8400 
8401         modrm = x86_ldub_code(env, s);
8402         reg = ((modrm >> 3) & 7) | REX_R(s);
8403 
8404         if (s->prefix & PREFIX_DATA) {
8405             ot = MO_16;
8406         } else {
8407             ot = mo_64_32(dflag);
8408         }
8409 
8410         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8411         gen_extu(ot, s->T0);
8412         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8413         tcg_gen_ctpop_tl(s->T0, s->T0);
8414         gen_op_mov_reg_v(s, ot, reg, s->T0);
8415 
8416         set_cc_op(s, CC_OP_POPCNT);
8417         break;
8418     case 0x10e ... 0x10f:
8419         /* 3DNow! instructions, ignore prefixes */
8420         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8421         /* fall through */
8422     case 0x110 ... 0x117:
8423     case 0x128 ... 0x12f:
8424     case 0x138 ... 0x13a:
8425     case 0x150 ... 0x179:
8426     case 0x17c ... 0x17f:
8427     case 0x1c2:
8428     case 0x1c4 ... 0x1c6:
8429     case 0x1d0 ... 0x1fe:
8430         gen_sse(env, s, b, pc_start);
8431         break;
8432     default:
8433         goto unknown_op;
8434     }
8435     return s->pc;
8436  illegal_op:
8437     gen_illegal_opcode(s);
8438     return s->pc;
8439  unknown_op:
8440     gen_unknown_opcode(env, s);
8441     return s->pc;
8442 }
8443 
8444 void tcg_x86_init(void)
8445 {
8446     static const char reg_names[CPU_NB_REGS][4] = {
8447 #ifdef TARGET_X86_64
8448         [R_EAX] = "rax",
8449         [R_EBX] = "rbx",
8450         [R_ECX] = "rcx",
8451         [R_EDX] = "rdx",
8452         [R_ESI] = "rsi",
8453         [R_EDI] = "rdi",
8454         [R_EBP] = "rbp",
8455         [R_ESP] = "rsp",
8456         [8]  = "r8",
8457         [9]  = "r9",
8458         [10] = "r10",
8459         [11] = "r11",
8460         [12] = "r12",
8461         [13] = "r13",
8462         [14] = "r14",
8463         [15] = "r15",
8464 #else
8465         [R_EAX] = "eax",
8466         [R_EBX] = "ebx",
8467         [R_ECX] = "ecx",
8468         [R_EDX] = "edx",
8469         [R_ESI] = "esi",
8470         [R_EDI] = "edi",
8471         [R_EBP] = "ebp",
8472         [R_ESP] = "esp",
8473 #endif
8474     };
8475     static const char seg_base_names[6][8] = {
8476         [R_CS] = "cs_base",
8477         [R_DS] = "ds_base",
8478         [R_ES] = "es_base",
8479         [R_FS] = "fs_base",
8480         [R_GS] = "gs_base",
8481         [R_SS] = "ss_base",
8482     };
8483     static const char bnd_regl_names[4][8] = {
8484         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8485     };
8486     static const char bnd_regu_names[4][8] = {
8487         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8488     };
8489     int i;
8490 
8491     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8492                                        offsetof(CPUX86State, cc_op), "cc_op");
8493     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8494                                     "cc_dst");
8495     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8496                                     "cc_src");
8497     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8498                                      "cc_src2");
8499 
8500     for (i = 0; i < CPU_NB_REGS; ++i) {
8501         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8502                                          offsetof(CPUX86State, regs[i]),
8503                                          reg_names[i]);
8504     }
8505 
8506     for (i = 0; i < 6; ++i) {
8507         cpu_seg_base[i]
8508             = tcg_global_mem_new(cpu_env,
8509                                  offsetof(CPUX86State, segs[i].base),
8510                                  seg_base_names[i]);
8511     }
8512 
8513     for (i = 0; i < 4; ++i) {
8514         cpu_bndl[i]
8515             = tcg_global_mem_new_i64(cpu_env,
8516                                      offsetof(CPUX86State, bnd_regs[i].lb),
8517                                      bnd_regl_names[i]);
8518         cpu_bndu[i]
8519             = tcg_global_mem_new_i64(cpu_env,
8520                                      offsetof(CPUX86State, bnd_regs[i].ub),
8521                                      bnd_regu_names[i]);
8522     }
8523 }
8524 
8525 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8526 {
8527     DisasContext *dc = container_of(dcbase, DisasContext, base);
8528     CPUX86State *env = cpu->env_ptr;
8529     uint32_t flags = dc->base.tb->flags;
8530     int cpl = (flags >> HF_CPL_SHIFT) & 3;
8531     int iopl = (flags >> IOPL_SHIFT) & 3;
8532 
8533     dc->cs_base = dc->base.tb->cs_base;
8534     dc->flags = flags;
8535 #ifndef CONFIG_USER_ONLY
8536     dc->cpl = cpl;
8537     dc->iopl = iopl;
8538 #endif
8539 
8540     /* We make some simplifying assumptions; validate they're correct. */
8541     g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
8542     g_assert(CPL(dc) == cpl);
8543     g_assert(IOPL(dc) == iopl);
8544     g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
8545     g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
8546     g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
8547     g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
8548     g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
8549     g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
8550     g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
8551     g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
8552 
8553     dc->cc_op = CC_OP_DYNAMIC;
8554     dc->cc_op_dirty = false;
8555     dc->popl_esp_hack = 0;
8556     /* select memory access functions */
8557     dc->mem_index = 0;
8558 #ifdef CONFIG_SOFTMMU
8559     dc->mem_index = cpu_mmu_index(env, false);
8560 #endif
8561     dc->cpuid_features = env->features[FEAT_1_EDX];
8562     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8563     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8564     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8565     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8566     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8567     dc->jmp_opt = !(dc->base.singlestep_enabled ||
8568                     (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
8569     /*
8570      * If jmp_opt, we want to handle each string instruction individually.
8571      * For icount also disable repz optimization so that each iteration
8572      * is accounted separately.
8573      */
8574     dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT);
8575 
8576     dc->T0 = tcg_temp_new();
8577     dc->T1 = tcg_temp_new();
8578     dc->A0 = tcg_temp_new();
8579 
8580     dc->tmp0 = tcg_temp_new();
8581     dc->tmp1_i64 = tcg_temp_new_i64();
8582     dc->tmp2_i32 = tcg_temp_new_i32();
8583     dc->tmp3_i32 = tcg_temp_new_i32();
8584     dc->tmp4 = tcg_temp_new();
8585     dc->ptr0 = tcg_temp_new_ptr();
8586     dc->ptr1 = tcg_temp_new_ptr();
8587     dc->cc_srcT = tcg_temp_local_new();
8588 }
8589 
8590 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8591 {
8592 }
8593 
8594 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8595 {
8596     DisasContext *dc = container_of(dcbase, DisasContext, base);
8597 
8598     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8599 }
8600 
8601 static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
8602                                      const CPUBreakpoint *bp)
8603 {
8604     DisasContext *dc = container_of(dcbase, DisasContext, base);
8605     /* If RF is set, suppress an internally generated breakpoint.  */
8606     int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY;
8607     if (bp->flags & flags) {
8608         gen_debug(dc);
8609         /* The address covered by the breakpoint must be included in
8610            [tb->pc, tb->pc + tb->size) in order to for it to be
8611            properly cleared -- thus we increment the PC here so that
8612            the generic logic setting tb->size later does the right thing.  */
8613         dc->base.pc_next += 1;
8614         return true;
8615     } else {
8616         return false;
8617     }
8618 }
8619 
8620 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8621 {
8622     DisasContext *dc = container_of(dcbase, DisasContext, base);
8623     target_ulong pc_next;
8624 
8625 #ifdef TARGET_VSYSCALL_PAGE
8626     /*
8627      * Detect entry into the vsyscall page and invoke the syscall.
8628      */
8629     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8630         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8631         dc->base.pc_next = dc->pc + 1;
8632         return;
8633     }
8634 #endif
8635 
8636     pc_next = disas_insn(dc, cpu);
8637 
8638     if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
8639         /* if single step mode, we generate only one instruction and
8640            generate an exception */
8641         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8642            the flag and abort the translation to give the irqs a
8643            chance to happen */
8644         dc->base.is_jmp = DISAS_TOO_MANY;
8645     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8646                && ((pc_next & TARGET_PAGE_MASK)
8647                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8648                        & TARGET_PAGE_MASK)
8649                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8650         /* Do not cross the boundary of the pages in icount mode,
8651            it can cause an exception. Do it only when boundary is
8652            crossed by the first instruction in the block.
8653            If current instruction already crossed the bound - it's ok,
8654            because an exception hasn't stopped this code.
8655          */
8656         dc->base.is_jmp = DISAS_TOO_MANY;
8657     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8658         dc->base.is_jmp = DISAS_TOO_MANY;
8659     }
8660 
8661     dc->base.pc_next = pc_next;
8662 }
8663 
8664 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8665 {
8666     DisasContext *dc = container_of(dcbase, DisasContext, base);
8667 
8668     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8669         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8670         gen_eob(dc);
8671     }
8672 }
8673 
8674 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8675                               CPUState *cpu)
8676 {
8677     DisasContext *dc = container_of(dcbase, DisasContext, base);
8678 
8679     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8680     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8681 }
8682 
8683 static const TranslatorOps i386_tr_ops = {
8684     .init_disas_context = i386_tr_init_disas_context,
8685     .tb_start           = i386_tr_tb_start,
8686     .insn_start         = i386_tr_insn_start,
8687     .breakpoint_check   = i386_tr_breakpoint_check,
8688     .translate_insn     = i386_tr_translate_insn,
8689     .tb_stop            = i386_tr_tb_stop,
8690     .disas_log          = i386_tr_disas_log,
8691 };
8692 
8693 /* generate intermediate code for basic block 'tb'.  */
8694 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8695 {
8696     DisasContext dc;
8697 
8698     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8699 }
8700 
8701 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8702                           target_ulong *data)
8703 {
8704     int cc_op = data[1];
8705     env->eip = data[0] - tb->cs_base;
8706     if (cc_op != CC_OP_DYNAMIC) {
8707         env->cc_op = cc_op;
8708     }
8709 }
8710