xref: /openbmc/qemu/target/i386/tcg/translate.c (revision 89fc45d5)
1 /*
2  *  i386 translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "qemu/osdep.h"
20 
21 #include "qemu/host-utils.h"
22 #include "cpu.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg-op.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/translator.h"
28 
29 #include "exec/helper-proto.h"
30 #include "exec/helper-gen.h"
31 #include "helper-tcg.h"
32 
33 #include "exec/log.h"
34 
35 #define PREFIX_REPZ   0x01
36 #define PREFIX_REPNZ  0x02
37 #define PREFIX_LOCK   0x04
38 #define PREFIX_DATA   0x08
39 #define PREFIX_ADR    0x10
40 #define PREFIX_VEX    0x20
41 #define PREFIX_REX    0x40
42 
43 #ifdef TARGET_X86_64
44 # define ctztl  ctz64
45 # define clztl  clz64
46 #else
47 # define ctztl  ctz32
48 # define clztl  clz32
49 #endif
50 
51 /* For a switch indexed by MODRM, match all memory operands for a given OP.  */
52 #define CASE_MODRM_MEM_OP(OP) \
53     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
54     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
55     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7
56 
57 #define CASE_MODRM_OP(OP) \
58     case (0 << 6) | (OP << 3) | 0 ... (0 << 6) | (OP << 3) | 7: \
59     case (1 << 6) | (OP << 3) | 0 ... (1 << 6) | (OP << 3) | 7: \
60     case (2 << 6) | (OP << 3) | 0 ... (2 << 6) | (OP << 3) | 7: \
61     case (3 << 6) | (OP << 3) | 0 ... (3 << 6) | (OP << 3) | 7
62 
63 //#define MACRO_TEST   1
64 
65 /* global register indexes */
66 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
67 static TCGv_i32 cpu_cc_op;
68 static TCGv cpu_regs[CPU_NB_REGS];
69 static TCGv cpu_seg_base[6];
70 static TCGv_i64 cpu_bndl[4];
71 static TCGv_i64 cpu_bndu[4];
72 
73 #include "exec/gen-icount.h"
74 
75 typedef struct DisasContext {
76     DisasContextBase base;
77 
78     target_ulong pc;       /* pc = eip + cs_base */
79     target_ulong pc_start; /* pc at TB entry */
80     target_ulong cs_base;  /* base of CS segment */
81 
82     MemOp aflag;
83     MemOp dflag;
84 
85     int8_t override; /* -1 if no override, else R_CS, R_DS, etc */
86     uint8_t prefix;
87 
88 #ifndef CONFIG_USER_ONLY
89     uint8_t cpl;   /* code priv level */
90     uint8_t iopl;  /* i/o priv level */
91 #endif
92     uint8_t vex_l;  /* vex vector length */
93     uint8_t vex_v;  /* vex vvvv register, without 1's complement.  */
94     uint8_t popl_esp_hack; /* for correct popl with esp base handling */
95     uint8_t rip_offset; /* only used in x86_64, but left for simplicity */
96 
97 #ifdef TARGET_X86_64
98     uint8_t rex_r;
99     uint8_t rex_x;
100     uint8_t rex_b;
101     bool rex_w;
102 #endif
103     bool jmp_opt; /* use direct block chaining for direct jumps */
104     bool repz_opt; /* optimize jumps within repz instructions */
105     bool cc_op_dirty;
106 
107     CCOp cc_op;  /* current CC operation */
108     int mem_index; /* select memory access functions */
109     uint32_t flags; /* all execution flags */
110     int cpuid_features;
111     int cpuid_ext_features;
112     int cpuid_ext2_features;
113     int cpuid_ext3_features;
114     int cpuid_7_0_ebx_features;
115     int cpuid_xsave_features;
116 
117     /* TCG local temps */
118     TCGv cc_srcT;
119     TCGv A0;
120     TCGv T0;
121     TCGv T1;
122 
123     /* TCG local register indexes (only used inside old micro ops) */
124     TCGv tmp0;
125     TCGv tmp4;
126     TCGv_ptr ptr0;
127     TCGv_ptr ptr1;
128     TCGv_i32 tmp2_i32;
129     TCGv_i32 tmp3_i32;
130     TCGv_i64 tmp1_i64;
131 
132     sigjmp_buf jmpbuf;
133 } DisasContext;
134 
135 /* The environment in which user-only runs is constrained. */
136 #ifdef CONFIG_USER_ONLY
137 #define PE(S)     true
138 #define CPL(S)    3
139 #define IOPL(S)   0
140 #define SVME(S)   false
141 #define GUEST(S)  false
142 #else
143 #define PE(S)     (((S)->flags & HF_PE_MASK) != 0)
144 #define CPL(S)    ((S)->cpl)
145 #define IOPL(S)   ((S)->iopl)
146 #define SVME(S)   (((S)->flags & HF_SVME_MASK) != 0)
147 #define GUEST(S)  (((S)->flags & HF_GUEST_MASK) != 0)
148 #endif
149 #if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64)
150 #define VM86(S)   false
151 #define CODE32(S) true
152 #define SS32(S)   true
153 #define ADDSEG(S) false
154 #else
155 #define VM86(S)   (((S)->flags & HF_VM_MASK) != 0)
156 #define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0)
157 #define SS32(S)   (((S)->flags & HF_SS32_MASK) != 0)
158 #define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0)
159 #endif
160 #if !defined(TARGET_X86_64)
161 #define CODE64(S) false
162 #define LMA(S)    false
163 #elif defined(CONFIG_USER_ONLY)
164 #define CODE64(S) true
165 #define LMA(S)    true
166 #else
167 #define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0)
168 #define LMA(S)    (((S)->flags & HF_LMA_MASK) != 0)
169 #endif
170 
171 #ifdef TARGET_X86_64
172 #define REX_PREFIX(S)  (((S)->prefix & PREFIX_REX) != 0)
173 #define REX_W(S)       ((S)->rex_w)
174 #define REX_R(S)       ((S)->rex_r + 0)
175 #define REX_X(S)       ((S)->rex_x + 0)
176 #define REX_B(S)       ((S)->rex_b + 0)
177 #else
178 #define REX_PREFIX(S)  false
179 #define REX_W(S)       false
180 #define REX_R(S)       0
181 #define REX_X(S)       0
182 #define REX_B(S)       0
183 #endif
184 
185 /*
186  * Many sysemu-only helpers are not reachable for user-only.
187  * Define stub generators here, so that we need not either sprinkle
188  * ifdefs through the translator, nor provide the helper function.
189  */
190 #define STUB_HELPER(NAME, ...) \
191     static inline void gen_helper_##NAME(__VA_ARGS__) \
192     { qemu_build_not_reached(); }
193 
194 #ifdef CONFIG_USER_ONLY
195 STUB_HELPER(clgi, TCGv_env env)
196 STUB_HELPER(flush_page, TCGv_env env, TCGv addr)
197 STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs)
198 STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port)
199 STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port)
200 STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port)
201 STUB_HELPER(monitor, TCGv_env env, TCGv addr)
202 STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs)
203 STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
204 STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
205 STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
206 STUB_HELPER(rdmsr, TCGv_env env)
207 STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
208 STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
209 STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
210 STUB_HELPER(stgi, TCGv_env env)
211 STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
212 STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
213 STUB_HELPER(vmmcall, TCGv_env env)
214 STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
215 STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
216 STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
217 STUB_HELPER(wrmsr, TCGv_env env)
218 #endif
219 
220 static void gen_eob(DisasContext *s);
221 static void gen_jr(DisasContext *s, TCGv dest);
222 static void gen_jmp(DisasContext *s, target_ulong eip);
223 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num);
224 static void gen_op(DisasContext *s1, int op, MemOp ot, int d);
225 static void gen_exception_gpf(DisasContext *s);
226 
227 /* i386 arith/logic operations */
228 enum {
229     OP_ADDL,
230     OP_ORL,
231     OP_ADCL,
232     OP_SBBL,
233     OP_ANDL,
234     OP_SUBL,
235     OP_XORL,
236     OP_CMPL,
237 };
238 
239 /* i386 shift ops */
240 enum {
241     OP_ROL,
242     OP_ROR,
243     OP_RCL,
244     OP_RCR,
245     OP_SHL,
246     OP_SHR,
247     OP_SHL1, /* undocumented */
248     OP_SAR = 7,
249 };
250 
251 enum {
252     JCC_O,
253     JCC_B,
254     JCC_Z,
255     JCC_BE,
256     JCC_S,
257     JCC_P,
258     JCC_L,
259     JCC_LE,
260 };
261 
262 enum {
263     /* I386 int registers */
264     OR_EAX,   /* MUST be even numbered */
265     OR_ECX,
266     OR_EDX,
267     OR_EBX,
268     OR_ESP,
269     OR_EBP,
270     OR_ESI,
271     OR_EDI,
272 
273     OR_TMP0 = 16,    /* temporary operand register */
274     OR_TMP1,
275     OR_A0, /* temporary register used when doing address evaluation */
276 };
277 
278 enum {
279     USES_CC_DST  = 1,
280     USES_CC_SRC  = 2,
281     USES_CC_SRC2 = 4,
282     USES_CC_SRCT = 8,
283 };
284 
285 /* Bit set if the global variable is live after setting CC_OP to X.  */
286 static const uint8_t cc_op_live[CC_OP_NB] = {
287     [CC_OP_DYNAMIC] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
288     [CC_OP_EFLAGS] = USES_CC_SRC,
289     [CC_OP_MULB ... CC_OP_MULQ] = USES_CC_DST | USES_CC_SRC,
290     [CC_OP_ADDB ... CC_OP_ADDQ] = USES_CC_DST | USES_CC_SRC,
291     [CC_OP_ADCB ... CC_OP_ADCQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
292     [CC_OP_SUBB ... CC_OP_SUBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRCT,
293     [CC_OP_SBBB ... CC_OP_SBBQ] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
294     [CC_OP_LOGICB ... CC_OP_LOGICQ] = USES_CC_DST,
295     [CC_OP_INCB ... CC_OP_INCQ] = USES_CC_DST | USES_CC_SRC,
296     [CC_OP_DECB ... CC_OP_DECQ] = USES_CC_DST | USES_CC_SRC,
297     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
298     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
299     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
300     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
301     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
302     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
303     [CC_OP_CLR] = 0,
304     [CC_OP_POPCNT] = USES_CC_SRC,
305 };
306 
307 static void set_cc_op(DisasContext *s, CCOp op)
308 {
309     int dead;
310 
311     if (s->cc_op == op) {
312         return;
313     }
314 
315     /* Discard CC computation that will no longer be used.  */
316     dead = cc_op_live[s->cc_op] & ~cc_op_live[op];
317     if (dead & USES_CC_DST) {
318         tcg_gen_discard_tl(cpu_cc_dst);
319     }
320     if (dead & USES_CC_SRC) {
321         tcg_gen_discard_tl(cpu_cc_src);
322     }
323     if (dead & USES_CC_SRC2) {
324         tcg_gen_discard_tl(cpu_cc_src2);
325     }
326     if (dead & USES_CC_SRCT) {
327         tcg_gen_discard_tl(s->cc_srcT);
328     }
329 
330     if (op == CC_OP_DYNAMIC) {
331         /* The DYNAMIC setting is translator only, and should never be
332            stored.  Thus we always consider it clean.  */
333         s->cc_op_dirty = false;
334     } else {
335         /* Discard any computed CC_OP value (see shifts).  */
336         if (s->cc_op == CC_OP_DYNAMIC) {
337             tcg_gen_discard_i32(cpu_cc_op);
338         }
339         s->cc_op_dirty = true;
340     }
341     s->cc_op = op;
342 }
343 
344 static void gen_update_cc_op(DisasContext *s)
345 {
346     if (s->cc_op_dirty) {
347         tcg_gen_movi_i32(cpu_cc_op, s->cc_op);
348         s->cc_op_dirty = false;
349     }
350 }
351 
352 #ifdef TARGET_X86_64
353 
354 #define NB_OP_SIZES 4
355 
356 #else /* !TARGET_X86_64 */
357 
358 #define NB_OP_SIZES 3
359 
360 #endif /* !TARGET_X86_64 */
361 
362 #if HOST_BIG_ENDIAN
363 #define REG_B_OFFSET (sizeof(target_ulong) - 1)
364 #define REG_H_OFFSET (sizeof(target_ulong) - 2)
365 #define REG_W_OFFSET (sizeof(target_ulong) - 2)
366 #define REG_L_OFFSET (sizeof(target_ulong) - 4)
367 #define REG_LH_OFFSET (sizeof(target_ulong) - 8)
368 #else
369 #define REG_B_OFFSET 0
370 #define REG_H_OFFSET 1
371 #define REG_W_OFFSET 0
372 #define REG_L_OFFSET 0
373 #define REG_LH_OFFSET 4
374 #endif
375 
376 /* In instruction encodings for byte register accesses the
377  * register number usually indicates "low 8 bits of register N";
378  * however there are some special cases where N 4..7 indicates
379  * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
380  * true for this special case, false otherwise.
381  */
382 static inline bool byte_reg_is_xH(DisasContext *s, int reg)
383 {
384     /* Any time the REX prefix is present, byte registers are uniform */
385     if (reg < 4 || REX_PREFIX(s)) {
386         return false;
387     }
388     return true;
389 }
390 
391 /* Select the size of a push/pop operation.  */
392 static inline MemOp mo_pushpop(DisasContext *s, MemOp ot)
393 {
394     if (CODE64(s)) {
395         return ot == MO_16 ? MO_16 : MO_64;
396     } else {
397         return ot;
398     }
399 }
400 
401 /* Select the size of the stack pointer.  */
402 static inline MemOp mo_stacksize(DisasContext *s)
403 {
404     return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
405 }
406 
407 /* Select only size 64 else 32.  Used for SSE operand sizes.  */
408 static inline MemOp mo_64_32(MemOp ot)
409 {
410 #ifdef TARGET_X86_64
411     return ot == MO_64 ? MO_64 : MO_32;
412 #else
413     return MO_32;
414 #endif
415 }
416 
417 /* Select size 8 if lsb of B is clear, else OT.  Used for decoding
418    byte vs word opcodes.  */
419 static inline MemOp mo_b_d(int b, MemOp ot)
420 {
421     return b & 1 ? ot : MO_8;
422 }
423 
424 /* Select size 8 if lsb of B is clear, else OT capped at 32.
425    Used for decoding operand size of port opcodes.  */
426 static inline MemOp mo_b_d32(int b, MemOp ot)
427 {
428     return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8;
429 }
430 
431 static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0)
432 {
433     switch(ot) {
434     case MO_8:
435         if (!byte_reg_is_xH(s, reg)) {
436             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
437         } else {
438             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
439         }
440         break;
441     case MO_16:
442         tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16);
443         break;
444     case MO_32:
445         /* For x86_64, this sets the higher half of register to zero.
446            For i386, this is equivalent to a mov. */
447         tcg_gen_ext32u_tl(cpu_regs[reg], t0);
448         break;
449 #ifdef TARGET_X86_64
450     case MO_64:
451         tcg_gen_mov_tl(cpu_regs[reg], t0);
452         break;
453 #endif
454     default:
455         tcg_abort();
456     }
457 }
458 
459 static inline
460 void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg)
461 {
462     if (ot == MO_8 && byte_reg_is_xH(s, reg)) {
463         tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8);
464     } else {
465         tcg_gen_mov_tl(t0, cpu_regs[reg]);
466     }
467 }
468 
469 static void gen_add_A0_im(DisasContext *s, int val)
470 {
471     tcg_gen_addi_tl(s->A0, s->A0, val);
472     if (!CODE64(s)) {
473         tcg_gen_ext32u_tl(s->A0, s->A0);
474     }
475 }
476 
477 static inline void gen_op_jmp_v(TCGv dest)
478 {
479     tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip));
480 }
481 
482 static inline
483 void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val)
484 {
485     tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val);
486     gen_op_mov_reg_v(s, size, reg, s->tmp0);
487 }
488 
489 static inline void gen_op_add_reg_T0(DisasContext *s, MemOp size, int reg)
490 {
491     tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0);
492     gen_op_mov_reg_v(s, size, reg, s->tmp0);
493 }
494 
495 static inline void gen_op_ld_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
496 {
497     tcg_gen_qemu_ld_tl(t0, a0, s->mem_index, idx | MO_LE);
498 }
499 
500 static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0)
501 {
502     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
503 }
504 
505 static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
506 {
507     if (d == OR_TMP0) {
508         gen_op_st_v(s, idx, s->T0, s->A0);
509     } else {
510         gen_op_mov_reg_v(s, idx, d, s->T0);
511     }
512 }
513 
514 static inline void gen_jmp_im(DisasContext *s, target_ulong pc)
515 {
516     tcg_gen_movi_tl(s->tmp0, pc);
517     gen_op_jmp_v(s->tmp0);
518 }
519 
520 /* Compute SEG:REG into A0.  SEG is selected from the override segment
521    (OVR_SEG) and the default segment (DEF_SEG).  OVR_SEG may be -1 to
522    indicate no override.  */
523 static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0,
524                           int def_seg, int ovr_seg)
525 {
526     switch (aflag) {
527 #ifdef TARGET_X86_64
528     case MO_64:
529         if (ovr_seg < 0) {
530             tcg_gen_mov_tl(s->A0, a0);
531             return;
532         }
533         break;
534 #endif
535     case MO_32:
536         /* 32 bit address */
537         if (ovr_seg < 0 && ADDSEG(s)) {
538             ovr_seg = def_seg;
539         }
540         if (ovr_seg < 0) {
541             tcg_gen_ext32u_tl(s->A0, a0);
542             return;
543         }
544         break;
545     case MO_16:
546         /* 16 bit address */
547         tcg_gen_ext16u_tl(s->A0, a0);
548         a0 = s->A0;
549         if (ovr_seg < 0) {
550             if (ADDSEG(s)) {
551                 ovr_seg = def_seg;
552             } else {
553                 return;
554             }
555         }
556         break;
557     default:
558         tcg_abort();
559     }
560 
561     if (ovr_seg >= 0) {
562         TCGv seg = cpu_seg_base[ovr_seg];
563 
564         if (aflag == MO_64) {
565             tcg_gen_add_tl(s->A0, a0, seg);
566         } else if (CODE64(s)) {
567             tcg_gen_ext32u_tl(s->A0, a0);
568             tcg_gen_add_tl(s->A0, s->A0, seg);
569         } else {
570             tcg_gen_add_tl(s->A0, a0, seg);
571             tcg_gen_ext32u_tl(s->A0, s->A0);
572         }
573     }
574 }
575 
576 static inline void gen_string_movl_A0_ESI(DisasContext *s)
577 {
578     gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override);
579 }
580 
581 static inline void gen_string_movl_A0_EDI(DisasContext *s)
582 {
583     gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1);
584 }
585 
586 static inline void gen_op_movl_T0_Dshift(DisasContext *s, MemOp ot)
587 {
588     tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df));
589     tcg_gen_shli_tl(s->T0, s->T0, ot);
590 };
591 
592 static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign)
593 {
594     switch (size) {
595     case MO_8:
596         if (sign) {
597             tcg_gen_ext8s_tl(dst, src);
598         } else {
599             tcg_gen_ext8u_tl(dst, src);
600         }
601         return dst;
602     case MO_16:
603         if (sign) {
604             tcg_gen_ext16s_tl(dst, src);
605         } else {
606             tcg_gen_ext16u_tl(dst, src);
607         }
608         return dst;
609 #ifdef TARGET_X86_64
610     case MO_32:
611         if (sign) {
612             tcg_gen_ext32s_tl(dst, src);
613         } else {
614             tcg_gen_ext32u_tl(dst, src);
615         }
616         return dst;
617 #endif
618     default:
619         return src;
620     }
621 }
622 
623 static void gen_extu(MemOp ot, TCGv reg)
624 {
625     gen_ext_tl(reg, reg, ot, false);
626 }
627 
628 static void gen_exts(MemOp ot, TCGv reg)
629 {
630     gen_ext_tl(reg, reg, ot, true);
631 }
632 
633 static inline
634 void gen_op_jnz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
635 {
636     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
637     gen_extu(size, s->tmp0);
638     tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1);
639 }
640 
641 static inline
642 void gen_op_jz_ecx(DisasContext *s, MemOp size, TCGLabel *label1)
643 {
644     tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]);
645     gen_extu(size, s->tmp0);
646     tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
647 }
648 
649 static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n)
650 {
651     switch (ot) {
652     case MO_8:
653         gen_helper_inb(v, cpu_env, n);
654         break;
655     case MO_16:
656         gen_helper_inw(v, cpu_env, n);
657         break;
658     case MO_32:
659         gen_helper_inl(v, cpu_env, n);
660         break;
661     default:
662         tcg_abort();
663     }
664 }
665 
666 static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n)
667 {
668     switch (ot) {
669     case MO_8:
670         gen_helper_outb(cpu_env, v, n);
671         break;
672     case MO_16:
673         gen_helper_outw(cpu_env, v, n);
674         break;
675     case MO_32:
676         gen_helper_outl(cpu_env, v, n);
677         break;
678     default:
679         tcg_abort();
680     }
681 }
682 
683 /*
684  * Validate that access to [port, port + 1<<ot) is allowed.
685  * Raise #GP, or VMM exit if not.
686  */
687 static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port,
688                          uint32_t svm_flags)
689 {
690 #ifdef CONFIG_USER_ONLY
691     /*
692      * We do not implement the ioperm(2) syscall, so the TSS check
693      * will always fail.
694      */
695     gen_exception_gpf(s);
696     return false;
697 #else
698     if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) {
699         gen_helper_check_io(cpu_env, port, tcg_constant_i32(1 << ot));
700     }
701     if (GUEST(s)) {
702         target_ulong cur_eip = s->base.pc_next - s->cs_base;
703         target_ulong next_eip = s->pc - s->cs_base;
704 
705         gen_update_cc_op(s);
706         gen_jmp_im(s, cur_eip);
707         if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
708             svm_flags |= SVM_IOIO_REP_MASK;
709         }
710         svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot);
711         gen_helper_svm_check_io(cpu_env, port,
712                                 tcg_constant_i32(svm_flags),
713                                 tcg_constant_i32(next_eip - cur_eip));
714     }
715     return true;
716 #endif
717 }
718 
719 static inline void gen_movs(DisasContext *s, MemOp ot)
720 {
721     gen_string_movl_A0_ESI(s);
722     gen_op_ld_v(s, ot, s->T0, s->A0);
723     gen_string_movl_A0_EDI(s);
724     gen_op_st_v(s, ot, s->T0, s->A0);
725     gen_op_movl_T0_Dshift(s, ot);
726     gen_op_add_reg_T0(s, s->aflag, R_ESI);
727     gen_op_add_reg_T0(s, s->aflag, R_EDI);
728 }
729 
730 static void gen_op_update1_cc(DisasContext *s)
731 {
732     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
733 }
734 
735 static void gen_op_update2_cc(DisasContext *s)
736 {
737     tcg_gen_mov_tl(cpu_cc_src, s->T1);
738     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
739 }
740 
741 static void gen_op_update3_cc(DisasContext *s, TCGv reg)
742 {
743     tcg_gen_mov_tl(cpu_cc_src2, reg);
744     tcg_gen_mov_tl(cpu_cc_src, s->T1);
745     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
746 }
747 
748 static inline void gen_op_testl_T0_T1_cc(DisasContext *s)
749 {
750     tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1);
751 }
752 
753 static void gen_op_update_neg_cc(DisasContext *s)
754 {
755     tcg_gen_mov_tl(cpu_cc_dst, s->T0);
756     tcg_gen_neg_tl(cpu_cc_src, s->T0);
757     tcg_gen_movi_tl(s->cc_srcT, 0);
758 }
759 
760 /* compute all eflags to cc_src */
761 static void gen_compute_eflags(DisasContext *s)
762 {
763     TCGv zero, dst, src1, src2;
764     int live, dead;
765 
766     if (s->cc_op == CC_OP_EFLAGS) {
767         return;
768     }
769     if (s->cc_op == CC_OP_CLR) {
770         tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P);
771         set_cc_op(s, CC_OP_EFLAGS);
772         return;
773     }
774 
775     zero = NULL;
776     dst = cpu_cc_dst;
777     src1 = cpu_cc_src;
778     src2 = cpu_cc_src2;
779 
780     /* Take care to not read values that are not live.  */
781     live = cc_op_live[s->cc_op] & ~USES_CC_SRCT;
782     dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2);
783     if (dead) {
784         zero = tcg_const_tl(0);
785         if (dead & USES_CC_DST) {
786             dst = zero;
787         }
788         if (dead & USES_CC_SRC) {
789             src1 = zero;
790         }
791         if (dead & USES_CC_SRC2) {
792             src2 = zero;
793         }
794     }
795 
796     gen_update_cc_op(s);
797     gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op);
798     set_cc_op(s, CC_OP_EFLAGS);
799 
800     if (dead) {
801         tcg_temp_free(zero);
802     }
803 }
804 
805 typedef struct CCPrepare {
806     TCGCond cond;
807     TCGv reg;
808     TCGv reg2;
809     target_ulong imm;
810     target_ulong mask;
811     bool use_reg2;
812     bool no_setcond;
813 } CCPrepare;
814 
815 /* compute eflags.C to reg */
816 static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
817 {
818     TCGv t0, t1;
819     int size, shift;
820 
821     switch (s->cc_op) {
822     case CC_OP_SUBB ... CC_OP_SUBQ:
823         /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */
824         size = s->cc_op - CC_OP_SUBB;
825         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
826         /* If no temporary was used, be careful not to alias t1 and t0.  */
827         t0 = t1 == cpu_cc_src ? s->tmp0 : reg;
828         tcg_gen_mov_tl(t0, s->cc_srcT);
829         gen_extu(size, t0);
830         goto add_sub;
831 
832     case CC_OP_ADDB ... CC_OP_ADDQ:
833         /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */
834         size = s->cc_op - CC_OP_ADDB;
835         t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
836         t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
837     add_sub:
838         return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0,
839                              .reg2 = t1, .mask = -1, .use_reg2 = true };
840 
841     case CC_OP_LOGICB ... CC_OP_LOGICQ:
842     case CC_OP_CLR:
843     case CC_OP_POPCNT:
844         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
845 
846     case CC_OP_INCB ... CC_OP_INCQ:
847     case CC_OP_DECB ... CC_OP_DECQ:
848         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
849                              .mask = -1, .no_setcond = true };
850 
851     case CC_OP_SHLB ... CC_OP_SHLQ:
852         /* (CC_SRC >> (DATA_BITS - 1)) & 1 */
853         size = s->cc_op - CC_OP_SHLB;
854         shift = (8 << size) - 1;
855         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
856                              .mask = (target_ulong)1 << shift };
857 
858     case CC_OP_MULB ... CC_OP_MULQ:
859         return (CCPrepare) { .cond = TCG_COND_NE,
860                              .reg = cpu_cc_src, .mask = -1 };
861 
862     case CC_OP_BMILGB ... CC_OP_BMILGQ:
863         size = s->cc_op - CC_OP_BMILGB;
864         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
865         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
866 
867     case CC_OP_ADCX:
868     case CC_OP_ADCOX:
869         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
870                              .mask = -1, .no_setcond = true };
871 
872     case CC_OP_EFLAGS:
873     case CC_OP_SARB ... CC_OP_SARQ:
874         /* CC_SRC & 1 */
875         return (CCPrepare) { .cond = TCG_COND_NE,
876                              .reg = cpu_cc_src, .mask = CC_C };
877 
878     default:
879        /* The need to compute only C from CC_OP_DYNAMIC is important
880           in efficiently implementing e.g. INC at the start of a TB.  */
881        gen_update_cc_op(s);
882        gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src,
883                                cpu_cc_src2, cpu_cc_op);
884        return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
885                             .mask = -1, .no_setcond = true };
886     }
887 }
888 
889 /* compute eflags.P to reg */
890 static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg)
891 {
892     gen_compute_eflags(s);
893     return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
894                          .mask = CC_P };
895 }
896 
897 /* compute eflags.S to reg */
898 static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg)
899 {
900     switch (s->cc_op) {
901     case CC_OP_DYNAMIC:
902         gen_compute_eflags(s);
903         /* FALLTHRU */
904     case CC_OP_EFLAGS:
905     case CC_OP_ADCX:
906     case CC_OP_ADOX:
907     case CC_OP_ADCOX:
908         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
909                              .mask = CC_S };
910     case CC_OP_CLR:
911     case CC_OP_POPCNT:
912         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
913     default:
914         {
915             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
916             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true);
917             return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 };
918         }
919     }
920 }
921 
922 /* compute eflags.O to reg */
923 static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
924 {
925     switch (s->cc_op) {
926     case CC_OP_ADOX:
927     case CC_OP_ADCOX:
928         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
929                              .mask = -1, .no_setcond = true };
930     case CC_OP_CLR:
931     case CC_OP_POPCNT:
932         return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
933     default:
934         gen_compute_eflags(s);
935         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
936                              .mask = CC_O };
937     }
938 }
939 
940 /* compute eflags.Z to reg */
941 static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg)
942 {
943     switch (s->cc_op) {
944     case CC_OP_DYNAMIC:
945         gen_compute_eflags(s);
946         /* FALLTHRU */
947     case CC_OP_EFLAGS:
948     case CC_OP_ADCX:
949     case CC_OP_ADOX:
950     case CC_OP_ADCOX:
951         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
952                              .mask = CC_Z };
953     case CC_OP_CLR:
954         return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
955     case CC_OP_POPCNT:
956         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src,
957                              .mask = -1 };
958     default:
959         {
960             MemOp size = (s->cc_op - CC_OP_ADDB) & 3;
961             TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false);
962             return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
963         }
964     }
965 }
966 
967 /* perform a conditional store into register 'reg' according to jump opcode
968    value 'b'. In the fast case, T0 is guaranted not to be used. */
969 static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg)
970 {
971     int inv, jcc_op, cond;
972     MemOp size;
973     CCPrepare cc;
974     TCGv t0;
975 
976     inv = b & 1;
977     jcc_op = (b >> 1) & 7;
978 
979     switch (s->cc_op) {
980     case CC_OP_SUBB ... CC_OP_SUBQ:
981         /* We optimize relational operators for the cmp/jcc case.  */
982         size = s->cc_op - CC_OP_SUBB;
983         switch (jcc_op) {
984         case JCC_BE:
985             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
986             gen_extu(size, s->tmp4);
987             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false);
988             cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4,
989                                .reg2 = t0, .mask = -1, .use_reg2 = true };
990             break;
991 
992         case JCC_L:
993             cond = TCG_COND_LT;
994             goto fast_jcc_l;
995         case JCC_LE:
996             cond = TCG_COND_LE;
997         fast_jcc_l:
998             tcg_gen_mov_tl(s->tmp4, s->cc_srcT);
999             gen_exts(size, s->tmp4);
1000             t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true);
1001             cc = (CCPrepare) { .cond = cond, .reg = s->tmp4,
1002                                .reg2 = t0, .mask = -1, .use_reg2 = true };
1003             break;
1004 
1005         default:
1006             goto slow_jcc;
1007         }
1008         break;
1009 
1010     default:
1011     slow_jcc:
1012         /* This actually generates good code for JC, JZ and JS.  */
1013         switch (jcc_op) {
1014         case JCC_O:
1015             cc = gen_prepare_eflags_o(s, reg);
1016             break;
1017         case JCC_B:
1018             cc = gen_prepare_eflags_c(s, reg);
1019             break;
1020         case JCC_Z:
1021             cc = gen_prepare_eflags_z(s, reg);
1022             break;
1023         case JCC_BE:
1024             gen_compute_eflags(s);
1025             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
1026                                .mask = CC_Z | CC_C };
1027             break;
1028         case JCC_S:
1029             cc = gen_prepare_eflags_s(s, reg);
1030             break;
1031         case JCC_P:
1032             cc = gen_prepare_eflags_p(s, reg);
1033             break;
1034         case JCC_L:
1035             gen_compute_eflags(s);
1036             if (reg == cpu_cc_src) {
1037                 reg = s->tmp0;
1038             }
1039             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1040             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1041             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1042                                .mask = CC_S };
1043             break;
1044         default:
1045         case JCC_LE:
1046             gen_compute_eflags(s);
1047             if (reg == cpu_cc_src) {
1048                 reg = s->tmp0;
1049             }
1050             tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */
1051             tcg_gen_xor_tl(reg, reg, cpu_cc_src);
1052             cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg,
1053                                .mask = CC_S | CC_Z };
1054             break;
1055         }
1056         break;
1057     }
1058 
1059     if (inv) {
1060         cc.cond = tcg_invert_cond(cc.cond);
1061     }
1062     return cc;
1063 }
1064 
1065 static void gen_setcc1(DisasContext *s, int b, TCGv reg)
1066 {
1067     CCPrepare cc = gen_prepare_cc(s, b, reg);
1068 
1069     if (cc.no_setcond) {
1070         if (cc.cond == TCG_COND_EQ) {
1071             tcg_gen_xori_tl(reg, cc.reg, 1);
1072         } else {
1073             tcg_gen_mov_tl(reg, cc.reg);
1074         }
1075         return;
1076     }
1077 
1078     if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 &&
1079         cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) {
1080         tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask));
1081         tcg_gen_andi_tl(reg, reg, 1);
1082         return;
1083     }
1084     if (cc.mask != -1) {
1085         tcg_gen_andi_tl(reg, cc.reg, cc.mask);
1086         cc.reg = reg;
1087     }
1088     if (cc.use_reg2) {
1089         tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2);
1090     } else {
1091         tcg_gen_setcondi_tl(cc.cond, reg, cc.reg, cc.imm);
1092     }
1093 }
1094 
1095 static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg)
1096 {
1097     gen_setcc1(s, JCC_B << 1, reg);
1098 }
1099 
1100 /* generate a conditional jump to label 'l1' according to jump opcode
1101    value 'b'. In the fast case, T0 is guaranted not to be used. */
1102 static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1)
1103 {
1104     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1105 
1106     if (cc.mask != -1) {
1107         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1108         cc.reg = s->T0;
1109     }
1110     if (cc.use_reg2) {
1111         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1112     } else {
1113         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1114     }
1115 }
1116 
1117 /* Generate a conditional jump to label 'l1' according to jump opcode
1118    value 'b'. In the fast case, T0 is guaranted not to be used.
1119    A translation block must end soon.  */
1120 static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1)
1121 {
1122     CCPrepare cc = gen_prepare_cc(s, b, s->T0);
1123 
1124     gen_update_cc_op(s);
1125     if (cc.mask != -1) {
1126         tcg_gen_andi_tl(s->T0, cc.reg, cc.mask);
1127         cc.reg = s->T0;
1128     }
1129     set_cc_op(s, CC_OP_DYNAMIC);
1130     if (cc.use_reg2) {
1131         tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1);
1132     } else {
1133         tcg_gen_brcondi_tl(cc.cond, cc.reg, cc.imm, l1);
1134     }
1135 }
1136 
1137 /* XXX: does not work with gdbstub "ice" single step - not a
1138    serious problem */
1139 static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip)
1140 {
1141     TCGLabel *l1 = gen_new_label();
1142     TCGLabel *l2 = gen_new_label();
1143     gen_op_jnz_ecx(s, s->aflag, l1);
1144     gen_set_label(l2);
1145     gen_jmp_tb(s, next_eip, 1);
1146     gen_set_label(l1);
1147     return l2;
1148 }
1149 
1150 static inline void gen_stos(DisasContext *s, MemOp ot)
1151 {
1152     gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
1153     gen_string_movl_A0_EDI(s);
1154     gen_op_st_v(s, ot, s->T0, s->A0);
1155     gen_op_movl_T0_Dshift(s, ot);
1156     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1157 }
1158 
1159 static inline void gen_lods(DisasContext *s, MemOp ot)
1160 {
1161     gen_string_movl_A0_ESI(s);
1162     gen_op_ld_v(s, ot, s->T0, s->A0);
1163     gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
1164     gen_op_movl_T0_Dshift(s, ot);
1165     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1166 }
1167 
1168 static inline void gen_scas(DisasContext *s, MemOp ot)
1169 {
1170     gen_string_movl_A0_EDI(s);
1171     gen_op_ld_v(s, ot, s->T1, s->A0);
1172     gen_op(s, OP_CMPL, ot, R_EAX);
1173     gen_op_movl_T0_Dshift(s, ot);
1174     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1175 }
1176 
1177 static inline void gen_cmps(DisasContext *s, MemOp ot)
1178 {
1179     gen_string_movl_A0_EDI(s);
1180     gen_op_ld_v(s, ot, s->T1, s->A0);
1181     gen_string_movl_A0_ESI(s);
1182     gen_op(s, OP_CMPL, ot, OR_TMP0);
1183     gen_op_movl_T0_Dshift(s, ot);
1184     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1185     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1186 }
1187 
1188 static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot)
1189 {
1190     if (s->flags & HF_IOBPT_MASK) {
1191 #ifdef CONFIG_USER_ONLY
1192         /* user-mode cpu should not be in IOBPT mode */
1193         g_assert_not_reached();
1194 #else
1195         TCGv_i32 t_size = tcg_const_i32(1 << ot);
1196         TCGv t_next = tcg_const_tl(s->pc - s->cs_base);
1197 
1198         gen_helper_bpt_io(cpu_env, t_port, t_size, t_next);
1199         tcg_temp_free_i32(t_size);
1200         tcg_temp_free(t_next);
1201 #endif /* CONFIG_USER_ONLY */
1202     }
1203 }
1204 
1205 static inline void gen_ins(DisasContext *s, MemOp ot)
1206 {
1207     gen_string_movl_A0_EDI(s);
1208     /* Note: we must do this dummy write first to be restartable in
1209        case of page fault. */
1210     tcg_gen_movi_tl(s->T0, 0);
1211     gen_op_st_v(s, ot, s->T0, s->A0);
1212     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1213     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1214     gen_helper_in_func(ot, s->T0, s->tmp2_i32);
1215     gen_op_st_v(s, ot, s->T0, s->A0);
1216     gen_op_movl_T0_Dshift(s, ot);
1217     gen_op_add_reg_T0(s, s->aflag, R_EDI);
1218     gen_bpt_io(s, s->tmp2_i32, ot);
1219 }
1220 
1221 static inline void gen_outs(DisasContext *s, MemOp ot)
1222 {
1223     gen_string_movl_A0_ESI(s);
1224     gen_op_ld_v(s, ot, s->T0, s->A0);
1225 
1226     tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
1227     tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
1228     tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
1229     gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
1230     gen_op_movl_T0_Dshift(s, ot);
1231     gen_op_add_reg_T0(s, s->aflag, R_ESI);
1232     gen_bpt_io(s, s->tmp2_i32, ot);
1233 }
1234 
1235 /* same method as Valgrind : we generate jumps to current or next
1236    instruction */
1237 #define GEN_REPZ(op)                                                          \
1238 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1239                                  target_ulong cur_eip, target_ulong next_eip) \
1240 {                                                                             \
1241     TCGLabel *l2;                                                             \
1242     gen_update_cc_op(s);                                                      \
1243     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1244     gen_ ## op(s, ot);                                                        \
1245     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1246     /* a loop would cause two single step exceptions if ECX = 1               \
1247        before rep string_insn */                                              \
1248     if (s->repz_opt)                                                          \
1249         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1250     gen_jmp(s, cur_eip);                                                      \
1251 }
1252 
1253 #define GEN_REPZ2(op)                                                         \
1254 static inline void gen_repz_ ## op(DisasContext *s, MemOp ot,              \
1255                                    target_ulong cur_eip,                      \
1256                                    target_ulong next_eip,                     \
1257                                    int nz)                                    \
1258 {                                                                             \
1259     TCGLabel *l2;                                                             \
1260     gen_update_cc_op(s);                                                      \
1261     l2 = gen_jz_ecx_string(s, next_eip);                                      \
1262     gen_ ## op(s, ot);                                                        \
1263     gen_op_add_reg_im(s, s->aflag, R_ECX, -1);                                \
1264     gen_update_cc_op(s);                                                      \
1265     gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2);                                 \
1266     if (s->repz_opt)                                                          \
1267         gen_op_jz_ecx(s, s->aflag, l2);                                       \
1268     gen_jmp(s, cur_eip);                                                      \
1269 }
1270 
1271 GEN_REPZ(movs)
1272 GEN_REPZ(stos)
1273 GEN_REPZ(lods)
1274 GEN_REPZ(ins)
1275 GEN_REPZ(outs)
1276 GEN_REPZ2(scas)
1277 GEN_REPZ2(cmps)
1278 
1279 static void gen_helper_fp_arith_ST0_FT0(int op)
1280 {
1281     switch (op) {
1282     case 0:
1283         gen_helper_fadd_ST0_FT0(cpu_env);
1284         break;
1285     case 1:
1286         gen_helper_fmul_ST0_FT0(cpu_env);
1287         break;
1288     case 2:
1289         gen_helper_fcom_ST0_FT0(cpu_env);
1290         break;
1291     case 3:
1292         gen_helper_fcom_ST0_FT0(cpu_env);
1293         break;
1294     case 4:
1295         gen_helper_fsub_ST0_FT0(cpu_env);
1296         break;
1297     case 5:
1298         gen_helper_fsubr_ST0_FT0(cpu_env);
1299         break;
1300     case 6:
1301         gen_helper_fdiv_ST0_FT0(cpu_env);
1302         break;
1303     case 7:
1304         gen_helper_fdivr_ST0_FT0(cpu_env);
1305         break;
1306     }
1307 }
1308 
1309 /* NOTE the exception in "r" op ordering */
1310 static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
1311 {
1312     TCGv_i32 tmp = tcg_const_i32(opreg);
1313     switch (op) {
1314     case 0:
1315         gen_helper_fadd_STN_ST0(cpu_env, tmp);
1316         break;
1317     case 1:
1318         gen_helper_fmul_STN_ST0(cpu_env, tmp);
1319         break;
1320     case 4:
1321         gen_helper_fsubr_STN_ST0(cpu_env, tmp);
1322         break;
1323     case 5:
1324         gen_helper_fsub_STN_ST0(cpu_env, tmp);
1325         break;
1326     case 6:
1327         gen_helper_fdivr_STN_ST0(cpu_env, tmp);
1328         break;
1329     case 7:
1330         gen_helper_fdiv_STN_ST0(cpu_env, tmp);
1331         break;
1332     }
1333 }
1334 
1335 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
1336 {
1337     gen_update_cc_op(s);
1338     gen_jmp_im(s, cur_eip);
1339     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
1340     s->base.is_jmp = DISAS_NORETURN;
1341 }
1342 
1343 /* Generate #UD for the current instruction.  The assumption here is that
1344    the instruction is known, but it isn't allowed in the current cpu mode.  */
1345 static void gen_illegal_opcode(DisasContext *s)
1346 {
1347     gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base);
1348 }
1349 
1350 /* Generate #GP for the current instruction. */
1351 static void gen_exception_gpf(DisasContext *s)
1352 {
1353     gen_exception(s, EXCP0D_GPF, s->pc_start - s->cs_base);
1354 }
1355 
1356 /* Check for cpl == 0; if not, raise #GP and return false. */
1357 static bool check_cpl0(DisasContext *s)
1358 {
1359     if (CPL(s) == 0) {
1360         return true;
1361     }
1362     gen_exception_gpf(s);
1363     return false;
1364 }
1365 
1366 /* If vm86, check for iopl == 3; if not, raise #GP and return false. */
1367 static bool check_vm86_iopl(DisasContext *s)
1368 {
1369     if (!VM86(s) || IOPL(s) == 3) {
1370         return true;
1371     }
1372     gen_exception_gpf(s);
1373     return false;
1374 }
1375 
1376 /* Check for iopl allowing access; if not, raise #GP and return false. */
1377 static bool check_iopl(DisasContext *s)
1378 {
1379     if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) {
1380         return true;
1381     }
1382     gen_exception_gpf(s);
1383     return false;
1384 }
1385 
1386 /* if d == OR_TMP0, it means memory operand (address in A0) */
1387 static void gen_op(DisasContext *s1, int op, MemOp ot, int d)
1388 {
1389     if (d != OR_TMP0) {
1390         if (s1->prefix & PREFIX_LOCK) {
1391             /* Lock prefix when destination is not memory.  */
1392             gen_illegal_opcode(s1);
1393             return;
1394         }
1395         gen_op_mov_v_reg(s1, ot, s1->T0, d);
1396     } else if (!(s1->prefix & PREFIX_LOCK)) {
1397         gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1398     }
1399     switch(op) {
1400     case OP_ADCL:
1401         gen_compute_eflags_c(s1, s1->tmp4);
1402         if (s1->prefix & PREFIX_LOCK) {
1403             tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1);
1404             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1405                                         s1->mem_index, ot | MO_LE);
1406         } else {
1407             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1408             tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4);
1409             gen_op_st_rm_T0_A0(s1, ot, d);
1410         }
1411         gen_op_update3_cc(s1, s1->tmp4);
1412         set_cc_op(s1, CC_OP_ADCB + ot);
1413         break;
1414     case OP_SBBL:
1415         gen_compute_eflags_c(s1, s1->tmp4);
1416         if (s1->prefix & PREFIX_LOCK) {
1417             tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4);
1418             tcg_gen_neg_tl(s1->T0, s1->T0);
1419             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1420                                         s1->mem_index, ot | MO_LE);
1421         } else {
1422             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1423             tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4);
1424             gen_op_st_rm_T0_A0(s1, ot, d);
1425         }
1426         gen_op_update3_cc(s1, s1->tmp4);
1427         set_cc_op(s1, CC_OP_SBBB + ot);
1428         break;
1429     case OP_ADDL:
1430         if (s1->prefix & PREFIX_LOCK) {
1431             tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1,
1432                                         s1->mem_index, ot | MO_LE);
1433         } else {
1434             tcg_gen_add_tl(s1->T0, s1->T0, s1->T1);
1435             gen_op_st_rm_T0_A0(s1, ot, d);
1436         }
1437         gen_op_update2_cc(s1);
1438         set_cc_op(s1, CC_OP_ADDB + ot);
1439         break;
1440     case OP_SUBL:
1441         if (s1->prefix & PREFIX_LOCK) {
1442             tcg_gen_neg_tl(s1->T0, s1->T1);
1443             tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0,
1444                                         s1->mem_index, ot | MO_LE);
1445             tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1);
1446         } else {
1447             tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1448             tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1);
1449             gen_op_st_rm_T0_A0(s1, ot, d);
1450         }
1451         gen_op_update2_cc(s1);
1452         set_cc_op(s1, CC_OP_SUBB + ot);
1453         break;
1454     default:
1455     case OP_ANDL:
1456         if (s1->prefix & PREFIX_LOCK) {
1457             tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1,
1458                                         s1->mem_index, ot | MO_LE);
1459         } else {
1460             tcg_gen_and_tl(s1->T0, s1->T0, s1->T1);
1461             gen_op_st_rm_T0_A0(s1, ot, d);
1462         }
1463         gen_op_update1_cc(s1);
1464         set_cc_op(s1, CC_OP_LOGICB + ot);
1465         break;
1466     case OP_ORL:
1467         if (s1->prefix & PREFIX_LOCK) {
1468             tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1,
1469                                        s1->mem_index, ot | MO_LE);
1470         } else {
1471             tcg_gen_or_tl(s1->T0, s1->T0, s1->T1);
1472             gen_op_st_rm_T0_A0(s1, ot, d);
1473         }
1474         gen_op_update1_cc(s1);
1475         set_cc_op(s1, CC_OP_LOGICB + ot);
1476         break;
1477     case OP_XORL:
1478         if (s1->prefix & PREFIX_LOCK) {
1479             tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1,
1480                                         s1->mem_index, ot | MO_LE);
1481         } else {
1482             tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1);
1483             gen_op_st_rm_T0_A0(s1, ot, d);
1484         }
1485         gen_op_update1_cc(s1);
1486         set_cc_op(s1, CC_OP_LOGICB + ot);
1487         break;
1488     case OP_CMPL:
1489         tcg_gen_mov_tl(cpu_cc_src, s1->T1);
1490         tcg_gen_mov_tl(s1->cc_srcT, s1->T0);
1491         tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1);
1492         set_cc_op(s1, CC_OP_SUBB + ot);
1493         break;
1494     }
1495 }
1496 
1497 /* if d == OR_TMP0, it means memory operand (address in A0) */
1498 static void gen_inc(DisasContext *s1, MemOp ot, int d, int c)
1499 {
1500     if (s1->prefix & PREFIX_LOCK) {
1501         if (d != OR_TMP0) {
1502             /* Lock prefix when destination is not memory */
1503             gen_illegal_opcode(s1);
1504             return;
1505         }
1506         tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1);
1507         tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0,
1508                                     s1->mem_index, ot | MO_LE);
1509     } else {
1510         if (d != OR_TMP0) {
1511             gen_op_mov_v_reg(s1, ot, s1->T0, d);
1512         } else {
1513             gen_op_ld_v(s1, ot, s1->T0, s1->A0);
1514         }
1515         tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1));
1516         gen_op_st_rm_T0_A0(s1, ot, d);
1517     }
1518 
1519     gen_compute_eflags_c(s1, cpu_cc_src);
1520     tcg_gen_mov_tl(cpu_cc_dst, s1->T0);
1521     set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot);
1522 }
1523 
1524 static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
1525                             TCGv shm1, TCGv count, bool is_right)
1526 {
1527     TCGv_i32 z32, s32, oldop;
1528     TCGv z_tl;
1529 
1530     /* Store the results into the CC variables.  If we know that the
1531        variable must be dead, store unconditionally.  Otherwise we'll
1532        need to not disrupt the current contents.  */
1533     z_tl = tcg_const_tl(0);
1534     if (cc_op_live[s->cc_op] & USES_CC_DST) {
1535         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
1536                            result, cpu_cc_dst);
1537     } else {
1538         tcg_gen_mov_tl(cpu_cc_dst, result);
1539     }
1540     if (cc_op_live[s->cc_op] & USES_CC_SRC) {
1541         tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
1542                            shm1, cpu_cc_src);
1543     } else {
1544         tcg_gen_mov_tl(cpu_cc_src, shm1);
1545     }
1546     tcg_temp_free(z_tl);
1547 
1548     /* Get the two potential CC_OP values into temporaries.  */
1549     tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1550     if (s->cc_op == CC_OP_DYNAMIC) {
1551         oldop = cpu_cc_op;
1552     } else {
1553         tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
1554         oldop = s->tmp3_i32;
1555     }
1556 
1557     /* Conditionally store the CC_OP value.  */
1558     z32 = tcg_const_i32(0);
1559     s32 = tcg_temp_new_i32();
1560     tcg_gen_trunc_tl_i32(s32, count);
1561     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
1562     tcg_temp_free_i32(z32);
1563     tcg_temp_free_i32(s32);
1564 
1565     /* The CC_OP value is no longer predictable.  */
1566     set_cc_op(s, CC_OP_DYNAMIC);
1567 }
1568 
1569 static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1,
1570                             int is_right, int is_arith)
1571 {
1572     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1573 
1574     /* load */
1575     if (op1 == OR_TMP0) {
1576         gen_op_ld_v(s, ot, s->T0, s->A0);
1577     } else {
1578         gen_op_mov_v_reg(s, ot, s->T0, op1);
1579     }
1580 
1581     tcg_gen_andi_tl(s->T1, s->T1, mask);
1582     tcg_gen_subi_tl(s->tmp0, s->T1, 1);
1583 
1584     if (is_right) {
1585         if (is_arith) {
1586             gen_exts(ot, s->T0);
1587             tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0);
1588             tcg_gen_sar_tl(s->T0, s->T0, s->T1);
1589         } else {
1590             gen_extu(ot, s->T0);
1591             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1592             tcg_gen_shr_tl(s->T0, s->T0, s->T1);
1593         }
1594     } else {
1595         tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1596         tcg_gen_shl_tl(s->T0, s->T0, s->T1);
1597     }
1598 
1599     /* store */
1600     gen_op_st_rm_T0_A0(s, ot, op1);
1601 
1602     gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right);
1603 }
1604 
1605 static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1606                             int is_right, int is_arith)
1607 {
1608     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1609 
1610     /* load */
1611     if (op1 == OR_TMP0)
1612         gen_op_ld_v(s, ot, s->T0, s->A0);
1613     else
1614         gen_op_mov_v_reg(s, ot, s->T0, op1);
1615 
1616     op2 &= mask;
1617     if (op2 != 0) {
1618         if (is_right) {
1619             if (is_arith) {
1620                 gen_exts(ot, s->T0);
1621                 tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1);
1622                 tcg_gen_sari_tl(s->T0, s->T0, op2);
1623             } else {
1624                 gen_extu(ot, s->T0);
1625                 tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1);
1626                 tcg_gen_shri_tl(s->T0, s->T0, op2);
1627             }
1628         } else {
1629             tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1);
1630             tcg_gen_shli_tl(s->T0, s->T0, op2);
1631         }
1632     }
1633 
1634     /* store */
1635     gen_op_st_rm_T0_A0(s, ot, op1);
1636 
1637     /* update eflags if non zero shift */
1638     if (op2 != 0) {
1639         tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
1640         tcg_gen_mov_tl(cpu_cc_dst, s->T0);
1641         set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
1642     }
1643 }
1644 
1645 static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right)
1646 {
1647     target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f);
1648     TCGv_i32 t0, t1;
1649 
1650     /* load */
1651     if (op1 == OR_TMP0) {
1652         gen_op_ld_v(s, ot, s->T0, s->A0);
1653     } else {
1654         gen_op_mov_v_reg(s, ot, s->T0, op1);
1655     }
1656 
1657     tcg_gen_andi_tl(s->T1, s->T1, mask);
1658 
1659     switch (ot) {
1660     case MO_8:
1661         /* Replicate the 8-bit input so that a 32-bit rotate works.  */
1662         tcg_gen_ext8u_tl(s->T0, s->T0);
1663         tcg_gen_muli_tl(s->T0, s->T0, 0x01010101);
1664         goto do_long;
1665     case MO_16:
1666         /* Replicate the 16-bit input so that a 32-bit rotate works.  */
1667         tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16);
1668         goto do_long;
1669     do_long:
1670 #ifdef TARGET_X86_64
1671     case MO_32:
1672         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1673         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
1674         if (is_right) {
1675             tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1676         } else {
1677             tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
1678         }
1679         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1680         break;
1681 #endif
1682     default:
1683         if (is_right) {
1684             tcg_gen_rotr_tl(s->T0, s->T0, s->T1);
1685         } else {
1686             tcg_gen_rotl_tl(s->T0, s->T0, s->T1);
1687         }
1688         break;
1689     }
1690 
1691     /* store */
1692     gen_op_st_rm_T0_A0(s, ot, op1);
1693 
1694     /* We'll need the flags computed into CC_SRC.  */
1695     gen_compute_eflags(s);
1696 
1697     /* The value that was "rotated out" is now present at the other end
1698        of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1699        since we've computed the flags into CC_SRC, these variables are
1700        currently dead.  */
1701     if (is_right) {
1702         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1703         tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1704         tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1705     } else {
1706         tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1707         tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1708     }
1709     tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1710     tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1711 
1712     /* Now conditionally store the new CC_OP value.  If the shift count
1713        is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
1714        Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
1715        exactly as we computed above.  */
1716     t0 = tcg_const_i32(0);
1717     t1 = tcg_temp_new_i32();
1718     tcg_gen_trunc_tl_i32(t1, s->T1);
1719     tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX);
1720     tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS);
1721     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
1722                         s->tmp2_i32, s->tmp3_i32);
1723     tcg_temp_free_i32(t0);
1724     tcg_temp_free_i32(t1);
1725 
1726     /* The CC_OP value is no longer predictable.  */
1727     set_cc_op(s, CC_OP_DYNAMIC);
1728 }
1729 
1730 static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2,
1731                           int is_right)
1732 {
1733     int mask = (ot == MO_64 ? 0x3f : 0x1f);
1734     int shift;
1735 
1736     /* load */
1737     if (op1 == OR_TMP0) {
1738         gen_op_ld_v(s, ot, s->T0, s->A0);
1739     } else {
1740         gen_op_mov_v_reg(s, ot, s->T0, op1);
1741     }
1742 
1743     op2 &= mask;
1744     if (op2 != 0) {
1745         switch (ot) {
1746 #ifdef TARGET_X86_64
1747         case MO_32:
1748             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
1749             if (is_right) {
1750                 tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2);
1751             } else {
1752                 tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2);
1753             }
1754             tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
1755             break;
1756 #endif
1757         default:
1758             if (is_right) {
1759                 tcg_gen_rotri_tl(s->T0, s->T0, op2);
1760             } else {
1761                 tcg_gen_rotli_tl(s->T0, s->T0, op2);
1762             }
1763             break;
1764         case MO_8:
1765             mask = 7;
1766             goto do_shifts;
1767         case MO_16:
1768             mask = 15;
1769         do_shifts:
1770             shift = op2 & mask;
1771             if (is_right) {
1772                 shift = mask + 1 - shift;
1773             }
1774             gen_extu(ot, s->T0);
1775             tcg_gen_shli_tl(s->tmp0, s->T0, shift);
1776             tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift);
1777             tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
1778             break;
1779         }
1780     }
1781 
1782     /* store */
1783     gen_op_st_rm_T0_A0(s, ot, op1);
1784 
1785     if (op2 != 0) {
1786         /* Compute the flags into CC_SRC.  */
1787         gen_compute_eflags(s);
1788 
1789         /* The value that was "rotated out" is now present at the other end
1790            of the word.  Compute C into CC_DST and O into CC_SRC2.  Note that
1791            since we've computed the flags into CC_SRC, these variables are
1792            currently dead.  */
1793         if (is_right) {
1794             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1);
1795             tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask);
1796             tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
1797         } else {
1798             tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask);
1799             tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1);
1800         }
1801         tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
1802         tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
1803         set_cc_op(s, CC_OP_ADCOX);
1804     }
1805 }
1806 
1807 /* XXX: add faster immediate = 1 case */
1808 static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1,
1809                            int is_right)
1810 {
1811     gen_compute_eflags(s);
1812     assert(s->cc_op == CC_OP_EFLAGS);
1813 
1814     /* load */
1815     if (op1 == OR_TMP0)
1816         gen_op_ld_v(s, ot, s->T0, s->A0);
1817     else
1818         gen_op_mov_v_reg(s, ot, s->T0, op1);
1819 
1820     if (is_right) {
1821         switch (ot) {
1822         case MO_8:
1823             gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1);
1824             break;
1825         case MO_16:
1826             gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1);
1827             break;
1828         case MO_32:
1829             gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1);
1830             break;
1831 #ifdef TARGET_X86_64
1832         case MO_64:
1833             gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1);
1834             break;
1835 #endif
1836         default:
1837             tcg_abort();
1838         }
1839     } else {
1840         switch (ot) {
1841         case MO_8:
1842             gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1);
1843             break;
1844         case MO_16:
1845             gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1);
1846             break;
1847         case MO_32:
1848             gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1);
1849             break;
1850 #ifdef TARGET_X86_64
1851         case MO_64:
1852             gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1);
1853             break;
1854 #endif
1855         default:
1856             tcg_abort();
1857         }
1858     }
1859     /* store */
1860     gen_op_st_rm_T0_A0(s, ot, op1);
1861 }
1862 
1863 /* XXX: add faster immediate case */
1864 static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
1865                              bool is_right, TCGv count_in)
1866 {
1867     target_ulong mask = (ot == MO_64 ? 63 : 31);
1868     TCGv count;
1869 
1870     /* load */
1871     if (op1 == OR_TMP0) {
1872         gen_op_ld_v(s, ot, s->T0, s->A0);
1873     } else {
1874         gen_op_mov_v_reg(s, ot, s->T0, op1);
1875     }
1876 
1877     count = tcg_temp_new();
1878     tcg_gen_andi_tl(count, count_in, mask);
1879 
1880     switch (ot) {
1881     case MO_16:
1882         /* Note: we implement the Intel behaviour for shift count > 16.
1883            This means "shrdw C, B, A" shifts A:B:A >> C.  Build the B:A
1884            portion by constructing it as a 32-bit value.  */
1885         if (is_right) {
1886             tcg_gen_deposit_tl(s->tmp0, s->T0, s->T1, 16, 16);
1887             tcg_gen_mov_tl(s->T1, s->T0);
1888             tcg_gen_mov_tl(s->T0, s->tmp0);
1889         } else {
1890             tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16);
1891         }
1892         /*
1893          * If TARGET_X86_64 defined then fall through into MO_32 case,
1894          * otherwise fall through default case.
1895          */
1896     case MO_32:
1897 #ifdef TARGET_X86_64
1898         /* Concatenate the two 32-bit values and use a 64-bit shift.  */
1899         tcg_gen_subi_tl(s->tmp0, count, 1);
1900         if (is_right) {
1901             tcg_gen_concat_tl_i64(s->T0, s->T0, s->T1);
1902             tcg_gen_shr_i64(s->tmp0, s->T0, s->tmp0);
1903             tcg_gen_shr_i64(s->T0, s->T0, count);
1904         } else {
1905             tcg_gen_concat_tl_i64(s->T0, s->T1, s->T0);
1906             tcg_gen_shl_i64(s->tmp0, s->T0, s->tmp0);
1907             tcg_gen_shl_i64(s->T0, s->T0, count);
1908             tcg_gen_shri_i64(s->tmp0, s->tmp0, 32);
1909             tcg_gen_shri_i64(s->T0, s->T0, 32);
1910         }
1911         break;
1912 #endif
1913     default:
1914         tcg_gen_subi_tl(s->tmp0, count, 1);
1915         if (is_right) {
1916             tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0);
1917 
1918             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1919             tcg_gen_shr_tl(s->T0, s->T0, count);
1920             tcg_gen_shl_tl(s->T1, s->T1, s->tmp4);
1921         } else {
1922             tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0);
1923             if (ot == MO_16) {
1924                 /* Only needed if count > 16, for Intel behaviour.  */
1925                 tcg_gen_subfi_tl(s->tmp4, 33, count);
1926                 tcg_gen_shr_tl(s->tmp4, s->T1, s->tmp4);
1927                 tcg_gen_or_tl(s->tmp0, s->tmp0, s->tmp4);
1928             }
1929 
1930             tcg_gen_subfi_tl(s->tmp4, mask + 1, count);
1931             tcg_gen_shl_tl(s->T0, s->T0, count);
1932             tcg_gen_shr_tl(s->T1, s->T1, s->tmp4);
1933         }
1934         tcg_gen_movi_tl(s->tmp4, 0);
1935         tcg_gen_movcond_tl(TCG_COND_EQ, s->T1, count, s->tmp4,
1936                            s->tmp4, s->T1);
1937         tcg_gen_or_tl(s->T0, s->T0, s->T1);
1938         break;
1939     }
1940 
1941     /* store */
1942     gen_op_st_rm_T0_A0(s, ot, op1);
1943 
1944     gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
1945     tcg_temp_free(count);
1946 }
1947 
1948 static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s)
1949 {
1950     if (s != OR_TMP1)
1951         gen_op_mov_v_reg(s1, ot, s1->T1, s);
1952     switch(op) {
1953     case OP_ROL:
1954         gen_rot_rm_T1(s1, ot, d, 0);
1955         break;
1956     case OP_ROR:
1957         gen_rot_rm_T1(s1, ot, d, 1);
1958         break;
1959     case OP_SHL:
1960     case OP_SHL1:
1961         gen_shift_rm_T1(s1, ot, d, 0, 0);
1962         break;
1963     case OP_SHR:
1964         gen_shift_rm_T1(s1, ot, d, 1, 0);
1965         break;
1966     case OP_SAR:
1967         gen_shift_rm_T1(s1, ot, d, 1, 1);
1968         break;
1969     case OP_RCL:
1970         gen_rotc_rm_T1(s1, ot, d, 0);
1971         break;
1972     case OP_RCR:
1973         gen_rotc_rm_T1(s1, ot, d, 1);
1974         break;
1975     }
1976 }
1977 
1978 static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c)
1979 {
1980     switch(op) {
1981     case OP_ROL:
1982         gen_rot_rm_im(s1, ot, d, c, 0);
1983         break;
1984     case OP_ROR:
1985         gen_rot_rm_im(s1, ot, d, c, 1);
1986         break;
1987     case OP_SHL:
1988     case OP_SHL1:
1989         gen_shift_rm_im(s1, ot, d, c, 0, 0);
1990         break;
1991     case OP_SHR:
1992         gen_shift_rm_im(s1, ot, d, c, 1, 0);
1993         break;
1994     case OP_SAR:
1995         gen_shift_rm_im(s1, ot, d, c, 1, 1);
1996         break;
1997     default:
1998         /* currently not optimized */
1999         tcg_gen_movi_tl(s1->T1, c);
2000         gen_shift(s1, op, ot, d, OR_TMP1);
2001         break;
2002     }
2003 }
2004 
2005 #define X86_MAX_INSN_LENGTH 15
2006 
2007 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
2008 {
2009     uint64_t pc = s->pc;
2010 
2011     s->pc += num_bytes;
2012     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
2013         /* If the instruction's 16th byte is on a different page than the 1st, a
2014          * page fault on the second page wins over the general protection fault
2015          * caused by the instruction being too long.
2016          * This can happen even if the operand is only one byte long!
2017          */
2018         if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) {
2019             volatile uint8_t unused =
2020                 cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK);
2021             (void) unused;
2022         }
2023         siglongjmp(s->jmpbuf, 1);
2024     }
2025 
2026     return pc;
2027 }
2028 
2029 static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
2030 {
2031     return translator_ldub(env, &s->base, advance_pc(env, s, 1));
2032 }
2033 
2034 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
2035 {
2036     return translator_ldsw(env, &s->base, advance_pc(env, s, 2));
2037 }
2038 
2039 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
2040 {
2041     return translator_lduw(env, &s->base, advance_pc(env, s, 2));
2042 }
2043 
2044 static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s)
2045 {
2046     return translator_ldl(env, &s->base, advance_pc(env, s, 4));
2047 }
2048 
2049 #ifdef TARGET_X86_64
2050 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s)
2051 {
2052     return translator_ldq(env, &s->base, advance_pc(env, s, 8));
2053 }
2054 #endif
2055 
2056 /* Decompose an address.  */
2057 
2058 typedef struct AddressParts {
2059     int def_seg;
2060     int base;
2061     int index;
2062     int scale;
2063     target_long disp;
2064 } AddressParts;
2065 
2066 static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s,
2067                                     int modrm)
2068 {
2069     int def_seg, base, index, scale, mod, rm;
2070     target_long disp;
2071     bool havesib;
2072 
2073     def_seg = R_DS;
2074     index = -1;
2075     scale = 0;
2076     disp = 0;
2077 
2078     mod = (modrm >> 6) & 3;
2079     rm = modrm & 7;
2080     base = rm | REX_B(s);
2081 
2082     if (mod == 3) {
2083         /* Normally filtered out earlier, but including this path
2084            simplifies multi-byte nop, as well as bndcl, bndcu, bndcn.  */
2085         goto done;
2086     }
2087 
2088     switch (s->aflag) {
2089     case MO_64:
2090     case MO_32:
2091         havesib = 0;
2092         if (rm == 4) {
2093             int code = x86_ldub_code(env, s);
2094             scale = (code >> 6) & 3;
2095             index = ((code >> 3) & 7) | REX_X(s);
2096             if (index == 4) {
2097                 index = -1;  /* no index */
2098             }
2099             base = (code & 7) | REX_B(s);
2100             havesib = 1;
2101         }
2102 
2103         switch (mod) {
2104         case 0:
2105             if ((base & 7) == 5) {
2106                 base = -1;
2107                 disp = (int32_t)x86_ldl_code(env, s);
2108                 if (CODE64(s) && !havesib) {
2109                     base = -2;
2110                     disp += s->pc + s->rip_offset;
2111                 }
2112             }
2113             break;
2114         case 1:
2115             disp = (int8_t)x86_ldub_code(env, s);
2116             break;
2117         default:
2118         case 2:
2119             disp = (int32_t)x86_ldl_code(env, s);
2120             break;
2121         }
2122 
2123         /* For correct popl handling with esp.  */
2124         if (base == R_ESP && s->popl_esp_hack) {
2125             disp += s->popl_esp_hack;
2126         }
2127         if (base == R_EBP || base == R_ESP) {
2128             def_seg = R_SS;
2129         }
2130         break;
2131 
2132     case MO_16:
2133         if (mod == 0) {
2134             if (rm == 6) {
2135                 base = -1;
2136                 disp = x86_lduw_code(env, s);
2137                 break;
2138             }
2139         } else if (mod == 1) {
2140             disp = (int8_t)x86_ldub_code(env, s);
2141         } else {
2142             disp = (int16_t)x86_lduw_code(env, s);
2143         }
2144 
2145         switch (rm) {
2146         case 0:
2147             base = R_EBX;
2148             index = R_ESI;
2149             break;
2150         case 1:
2151             base = R_EBX;
2152             index = R_EDI;
2153             break;
2154         case 2:
2155             base = R_EBP;
2156             index = R_ESI;
2157             def_seg = R_SS;
2158             break;
2159         case 3:
2160             base = R_EBP;
2161             index = R_EDI;
2162             def_seg = R_SS;
2163             break;
2164         case 4:
2165             base = R_ESI;
2166             break;
2167         case 5:
2168             base = R_EDI;
2169             break;
2170         case 6:
2171             base = R_EBP;
2172             def_seg = R_SS;
2173             break;
2174         default:
2175         case 7:
2176             base = R_EBX;
2177             break;
2178         }
2179         break;
2180 
2181     default:
2182         tcg_abort();
2183     }
2184 
2185  done:
2186     return (AddressParts){ def_seg, base, index, scale, disp };
2187 }
2188 
2189 /* Compute the address, with a minimum number of TCG ops.  */
2190 static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a)
2191 {
2192     TCGv ea = NULL;
2193 
2194     if (a.index >= 0) {
2195         if (a.scale == 0) {
2196             ea = cpu_regs[a.index];
2197         } else {
2198             tcg_gen_shli_tl(s->A0, cpu_regs[a.index], a.scale);
2199             ea = s->A0;
2200         }
2201         if (a.base >= 0) {
2202             tcg_gen_add_tl(s->A0, ea, cpu_regs[a.base]);
2203             ea = s->A0;
2204         }
2205     } else if (a.base >= 0) {
2206         ea = cpu_regs[a.base];
2207     }
2208     if (!ea) {
2209         tcg_gen_movi_tl(s->A0, a.disp);
2210         ea = s->A0;
2211     } else if (a.disp != 0) {
2212         tcg_gen_addi_tl(s->A0, ea, a.disp);
2213         ea = s->A0;
2214     }
2215 
2216     return ea;
2217 }
2218 
2219 static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
2220 {
2221     AddressParts a = gen_lea_modrm_0(env, s, modrm);
2222     TCGv ea = gen_lea_modrm_1(s, a);
2223     gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
2224 }
2225 
2226 static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
2227 {
2228     (void)gen_lea_modrm_0(env, s, modrm);
2229 }
2230 
2231 /* Used for BNDCL, BNDCU, BNDCN.  */
2232 static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm,
2233                       TCGCond cond, TCGv_i64 bndv)
2234 {
2235     TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm));
2236 
2237     tcg_gen_extu_tl_i64(s->tmp1_i64, ea);
2238     if (!CODE64(s)) {
2239         tcg_gen_ext32u_i64(s->tmp1_i64, s->tmp1_i64);
2240     }
2241     tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv);
2242     tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64);
2243     gen_helper_bndck(cpu_env, s->tmp2_i32);
2244 }
2245 
2246 /* used for LEA and MOV AX, mem */
2247 static void gen_add_A0_ds_seg(DisasContext *s)
2248 {
2249     gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override);
2250 }
2251 
2252 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
2253    OR_TMP0 */
2254 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
2255                            MemOp ot, int reg, int is_store)
2256 {
2257     int mod, rm;
2258 
2259     mod = (modrm >> 6) & 3;
2260     rm = (modrm & 7) | REX_B(s);
2261     if (mod == 3) {
2262         if (is_store) {
2263             if (reg != OR_TMP0)
2264                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2265             gen_op_mov_reg_v(s, ot, rm, s->T0);
2266         } else {
2267             gen_op_mov_v_reg(s, ot, s->T0, rm);
2268             if (reg != OR_TMP0)
2269                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2270         }
2271     } else {
2272         gen_lea_modrm(env, s, modrm);
2273         if (is_store) {
2274             if (reg != OR_TMP0)
2275                 gen_op_mov_v_reg(s, ot, s->T0, reg);
2276             gen_op_st_v(s, ot, s->T0, s->A0);
2277         } else {
2278             gen_op_ld_v(s, ot, s->T0, s->A0);
2279             if (reg != OR_TMP0)
2280                 gen_op_mov_reg_v(s, ot, reg, s->T0);
2281         }
2282     }
2283 }
2284 
2285 static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot)
2286 {
2287     uint32_t ret;
2288 
2289     switch (ot) {
2290     case MO_8:
2291         ret = x86_ldub_code(env, s);
2292         break;
2293     case MO_16:
2294         ret = x86_lduw_code(env, s);
2295         break;
2296     case MO_32:
2297 #ifdef TARGET_X86_64
2298     case MO_64:
2299 #endif
2300         ret = x86_ldl_code(env, s);
2301         break;
2302     default:
2303         tcg_abort();
2304     }
2305     return ret;
2306 }
2307 
2308 static inline int insn_const_size(MemOp ot)
2309 {
2310     if (ot <= MO_32) {
2311         return 1 << ot;
2312     } else {
2313         return 4;
2314     }
2315 }
2316 
2317 static void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
2318 {
2319     target_ulong pc = s->cs_base + eip;
2320 
2321     if (translator_use_goto_tb(&s->base, pc))  {
2322         /* jump to same page: we can use a direct jump */
2323         tcg_gen_goto_tb(tb_num);
2324         gen_jmp_im(s, eip);
2325         tcg_gen_exit_tb(s->base.tb, tb_num);
2326         s->base.is_jmp = DISAS_NORETURN;
2327     } else {
2328         /* jump to another page */
2329         gen_jmp_im(s, eip);
2330         gen_jr(s, s->tmp0);
2331     }
2332 }
2333 
2334 static inline void gen_jcc(DisasContext *s, int b,
2335                            target_ulong val, target_ulong next_eip)
2336 {
2337     TCGLabel *l1, *l2;
2338 
2339     if (s->jmp_opt) {
2340         l1 = gen_new_label();
2341         gen_jcc1(s, b, l1);
2342 
2343         gen_goto_tb(s, 0, next_eip);
2344 
2345         gen_set_label(l1);
2346         gen_goto_tb(s, 1, val);
2347     } else {
2348         l1 = gen_new_label();
2349         l2 = gen_new_label();
2350         gen_jcc1(s, b, l1);
2351 
2352         gen_jmp_im(s, next_eip);
2353         tcg_gen_br(l2);
2354 
2355         gen_set_label(l1);
2356         gen_jmp_im(s, val);
2357         gen_set_label(l2);
2358         gen_eob(s);
2359     }
2360 }
2361 
2362 static void gen_cmovcc1(CPUX86State *env, DisasContext *s, MemOp ot, int b,
2363                         int modrm, int reg)
2364 {
2365     CCPrepare cc;
2366 
2367     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
2368 
2369     cc = gen_prepare_cc(s, b, s->T1);
2370     if (cc.mask != -1) {
2371         TCGv t0 = tcg_temp_new();
2372         tcg_gen_andi_tl(t0, cc.reg, cc.mask);
2373         cc.reg = t0;
2374     }
2375     if (!cc.use_reg2) {
2376         cc.reg2 = tcg_const_tl(cc.imm);
2377     }
2378 
2379     tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2,
2380                        s->T0, cpu_regs[reg]);
2381     gen_op_mov_reg_v(s, ot, reg, s->T0);
2382 
2383     if (cc.mask != -1) {
2384         tcg_temp_free(cc.reg);
2385     }
2386     if (!cc.use_reg2) {
2387         tcg_temp_free(cc.reg2);
2388     }
2389 }
2390 
2391 static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg)
2392 {
2393     tcg_gen_ld32u_tl(s->T0, cpu_env,
2394                      offsetof(CPUX86State,segs[seg_reg].selector));
2395 }
2396 
2397 static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg)
2398 {
2399     tcg_gen_ext16u_tl(s->T0, s->T0);
2400     tcg_gen_st32_tl(s->T0, cpu_env,
2401                     offsetof(CPUX86State,segs[seg_reg].selector));
2402     tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4);
2403 }
2404 
2405 /* move T0 to seg_reg and compute if the CPU state may change. Never
2406    call this function with seg_reg == R_CS */
2407 static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg)
2408 {
2409     if (PE(s) && !VM86(s)) {
2410         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
2411         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32);
2412         /* abort translation because the addseg value may change or
2413            because ss32 may change. For R_SS, translation must always
2414            stop as a special handling must be done to disable hardware
2415            interrupts for the next instruction */
2416         if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) {
2417             s->base.is_jmp = DISAS_TOO_MANY;
2418         }
2419     } else {
2420         gen_op_movl_seg_T0_vm(s, seg_reg);
2421         if (seg_reg == R_SS) {
2422             s->base.is_jmp = DISAS_TOO_MANY;
2423         }
2424     }
2425 }
2426 
2427 static void gen_svm_check_intercept(DisasContext *s, uint32_t type)
2428 {
2429     /* no SVM activated; fast case */
2430     if (likely(!GUEST(s))) {
2431         return;
2432     }
2433     gen_helper_svm_check_intercept(cpu_env, tcg_constant_i32(type));
2434 }
2435 
2436 static inline void gen_stack_update(DisasContext *s, int addend)
2437 {
2438     gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend);
2439 }
2440 
2441 /* Generate a push. It depends on ss32, addseg and dflag.  */
2442 static void gen_push_v(DisasContext *s, TCGv val)
2443 {
2444     MemOp d_ot = mo_pushpop(s, s->dflag);
2445     MemOp a_ot = mo_stacksize(s);
2446     int size = 1 << d_ot;
2447     TCGv new_esp = s->A0;
2448 
2449     tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size);
2450 
2451     if (!CODE64(s)) {
2452         if (ADDSEG(s)) {
2453             new_esp = s->tmp4;
2454             tcg_gen_mov_tl(new_esp, s->A0);
2455         }
2456         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2457     }
2458 
2459     gen_op_st_v(s, d_ot, val, s->A0);
2460     gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp);
2461 }
2462 
2463 /* two step pop is necessary for precise exceptions */
2464 static MemOp gen_pop_T0(DisasContext *s)
2465 {
2466     MemOp d_ot = mo_pushpop(s, s->dflag);
2467 
2468     gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1);
2469     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2470 
2471     return d_ot;
2472 }
2473 
2474 static inline void gen_pop_update(DisasContext *s, MemOp ot)
2475 {
2476     gen_stack_update(s, 1 << ot);
2477 }
2478 
2479 static inline void gen_stack_A0(DisasContext *s)
2480 {
2481     gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1);
2482 }
2483 
2484 static void gen_pusha(DisasContext *s)
2485 {
2486     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2487     MemOp d_ot = s->dflag;
2488     int size = 1 << d_ot;
2489     int i;
2490 
2491     for (i = 0; i < 8; i++) {
2492         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size);
2493         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2494         gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0);
2495     }
2496 
2497     gen_stack_update(s, -8 * size);
2498 }
2499 
2500 static void gen_popa(DisasContext *s)
2501 {
2502     MemOp s_ot = SS32(s) ? MO_32 : MO_16;
2503     MemOp d_ot = s->dflag;
2504     int size = 1 << d_ot;
2505     int i;
2506 
2507     for (i = 0; i < 8; i++) {
2508         /* ESP is not reloaded */
2509         if (7 - i == R_ESP) {
2510             continue;
2511         }
2512         tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size);
2513         gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1);
2514         gen_op_ld_v(s, d_ot, s->T0, s->A0);
2515         gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0);
2516     }
2517 
2518     gen_stack_update(s, 8 * size);
2519 }
2520 
2521 static void gen_enter(DisasContext *s, int esp_addend, int level)
2522 {
2523     MemOp d_ot = mo_pushpop(s, s->dflag);
2524     MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
2525     int size = 1 << d_ot;
2526 
2527     /* Push BP; compute FrameTemp into T1.  */
2528     tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size);
2529     gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1);
2530     gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0);
2531 
2532     level &= 31;
2533     if (level != 0) {
2534         int i;
2535 
2536         /* Copy level-1 pointers from the previous frame.  */
2537         for (i = 1; i < level; ++i) {
2538             tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i);
2539             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2540             gen_op_ld_v(s, d_ot, s->tmp0, s->A0);
2541 
2542             tcg_gen_subi_tl(s->A0, s->T1, size * i);
2543             gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2544             gen_op_st_v(s, d_ot, s->tmp0, s->A0);
2545         }
2546 
2547         /* Push the current FrameTemp as the last level.  */
2548         tcg_gen_subi_tl(s->A0, s->T1, size * level);
2549         gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1);
2550         gen_op_st_v(s, d_ot, s->T1, s->A0);
2551     }
2552 
2553     /* Copy the FrameTemp value to EBP.  */
2554     gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1);
2555 
2556     /* Compute the final value of ESP.  */
2557     tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level);
2558     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2559 }
2560 
2561 static void gen_leave(DisasContext *s)
2562 {
2563     MemOp d_ot = mo_pushpop(s, s->dflag);
2564     MemOp a_ot = mo_stacksize(s);
2565 
2566     gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1);
2567     gen_op_ld_v(s, d_ot, s->T0, s->A0);
2568 
2569     tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot);
2570 
2571     gen_op_mov_reg_v(s, d_ot, R_EBP, s->T0);
2572     gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1);
2573 }
2574 
2575 /* Similarly, except that the assumption here is that we don't decode
2576    the instruction at all -- either a missing opcode, an unimplemented
2577    feature, or just a bogus instruction stream.  */
2578 static void gen_unknown_opcode(CPUX86State *env, DisasContext *s)
2579 {
2580     gen_illegal_opcode(s);
2581 
2582     if (qemu_loglevel_mask(LOG_UNIMP)) {
2583         FILE *logfile = qemu_log_lock();
2584         target_ulong pc = s->pc_start, end = s->pc;
2585 
2586         qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc);
2587         for (; pc < end; ++pc) {
2588             qemu_log(" %02x", cpu_ldub_code(env, pc));
2589         }
2590         qemu_log("\n");
2591         qemu_log_unlock(logfile);
2592     }
2593 }
2594 
2595 /* an interrupt is different from an exception because of the
2596    privilege checks */
2597 static void gen_interrupt(DisasContext *s, int intno,
2598                           target_ulong cur_eip, target_ulong next_eip)
2599 {
2600     gen_update_cc_op(s);
2601     gen_jmp_im(s, cur_eip);
2602     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
2603                                tcg_const_i32(next_eip - cur_eip));
2604     s->base.is_jmp = DISAS_NORETURN;
2605 }
2606 
2607 static void gen_set_hflag(DisasContext *s, uint32_t mask)
2608 {
2609     if ((s->flags & mask) == 0) {
2610         TCGv_i32 t = tcg_temp_new_i32();
2611         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2612         tcg_gen_ori_i32(t, t, mask);
2613         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2614         tcg_temp_free_i32(t);
2615         s->flags |= mask;
2616     }
2617 }
2618 
2619 static void gen_reset_hflag(DisasContext *s, uint32_t mask)
2620 {
2621     if (s->flags & mask) {
2622         TCGv_i32 t = tcg_temp_new_i32();
2623         tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2624         tcg_gen_andi_i32(t, t, ~mask);
2625         tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags));
2626         tcg_temp_free_i32(t);
2627         s->flags &= ~mask;
2628     }
2629 }
2630 
2631 /* Clear BND registers during legacy branches.  */
2632 static void gen_bnd_jmp(DisasContext *s)
2633 {
2634     /* Clear the registers only if BND prefix is missing, MPX is enabled,
2635        and if the BNDREGs are known to be in use (non-zero) already.
2636        The helper itself will check BNDPRESERVE at runtime.  */
2637     if ((s->prefix & PREFIX_REPNZ) == 0
2638         && (s->flags & HF_MPX_EN_MASK) != 0
2639         && (s->flags & HF_MPX_IU_MASK) != 0) {
2640         gen_helper_bnd_jmp(cpu_env);
2641     }
2642 }
2643 
2644 /* Generate an end of block. Trace exception is also generated if needed.
2645    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.
2646    If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of
2647    S->TF.  This is used by the syscall/sysret insns.  */
2648 static void
2649 do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr)
2650 {
2651     gen_update_cc_op(s);
2652 
2653     /* If several instructions disable interrupts, only the first does it.  */
2654     if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) {
2655         gen_set_hflag(s, HF_INHIBIT_IRQ_MASK);
2656     } else {
2657         gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK);
2658     }
2659 
2660     if (s->base.tb->flags & HF_RF_MASK) {
2661         gen_helper_reset_rf(cpu_env);
2662     }
2663     if (recheck_tf) {
2664         gen_helper_rechecking_single_step(cpu_env);
2665         tcg_gen_exit_tb(NULL, 0);
2666     } else if (s->flags & HF_TF_MASK) {
2667         gen_helper_single_step(cpu_env);
2668     } else if (jr) {
2669         tcg_gen_lookup_and_goto_ptr();
2670     } else {
2671         tcg_gen_exit_tb(NULL, 0);
2672     }
2673     s->base.is_jmp = DISAS_NORETURN;
2674 }
2675 
2676 static inline void
2677 gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf)
2678 {
2679     do_gen_eob_worker(s, inhibit, recheck_tf, false);
2680 }
2681 
2682 /* End of block.
2683    If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set.  */
2684 static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit)
2685 {
2686     gen_eob_worker(s, inhibit, false);
2687 }
2688 
2689 /* End of block, resetting the inhibit irq flag.  */
2690 static void gen_eob(DisasContext *s)
2691 {
2692     gen_eob_worker(s, false, false);
2693 }
2694 
2695 /* Jump to register */
2696 static void gen_jr(DisasContext *s, TCGv dest)
2697 {
2698     do_gen_eob_worker(s, false, false, true);
2699 }
2700 
2701 /* generate a jump to eip. No segment change must happen before as a
2702    direct call to the next block may occur */
2703 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
2704 {
2705     gen_update_cc_op(s);
2706     set_cc_op(s, CC_OP_DYNAMIC);
2707     if (s->jmp_opt) {
2708         gen_goto_tb(s, tb_num, eip);
2709     } else {
2710         gen_jmp_im(s, eip);
2711         gen_eob(s);
2712     }
2713 }
2714 
2715 static void gen_jmp(DisasContext *s, target_ulong eip)
2716 {
2717     gen_jmp_tb(s, eip, 0);
2718 }
2719 
2720 static inline void gen_ldq_env_A0(DisasContext *s, int offset)
2721 {
2722     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
2723     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset);
2724 }
2725 
2726 static inline void gen_stq_env_A0(DisasContext *s, int offset)
2727 {
2728     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset);
2729     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
2730 }
2731 
2732 static inline void gen_ldo_env_A0(DisasContext *s, int offset)
2733 {
2734     int mem_index = s->mem_index;
2735     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
2736     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2737     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2738     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
2739     tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2740 }
2741 
2742 static inline void gen_sto_env_A0(DisasContext *s, int offset)
2743 {
2744     int mem_index = s->mem_index;
2745     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0)));
2746     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEUQ);
2747     tcg_gen_addi_tl(s->tmp0, s->A0, 8);
2748     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1)));
2749     tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEUQ);
2750 }
2751 
2752 static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset)
2753 {
2754     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0)));
2755     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0)));
2756     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1)));
2757     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1)));
2758 }
2759 
2760 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
2761 {
2762     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
2763     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2764 }
2765 
2766 static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset)
2767 {
2768     tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset);
2769     tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset);
2770 }
2771 
2772 static inline void gen_op_movq_env_0(DisasContext *s, int d_offset)
2773 {
2774     tcg_gen_movi_i64(s->tmp1_i64, 0);
2775     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
2776 }
2777 
2778 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
2779 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
2780 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
2781 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
2782 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
2783 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2784                                TCGv_i32 val);
2785 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
2786 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
2787                                TCGv val);
2788 
2789 #define SSE_SPECIAL ((void *)1)
2790 #define SSE_DUMMY ((void *)2)
2791 
2792 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
2793 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
2794                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
2795 
2796 static const SSEFunc_0_epp sse_op_table1[256][4] = {
2797     /* 3DNow! extensions */
2798     [0x0e] = { SSE_DUMMY }, /* femms */
2799     [0x0f] = { SSE_DUMMY }, /* pf... */
2800     /* pure SSE operations */
2801     [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2802     [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
2803     [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */
2804     [0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
2805     [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
2806     [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
2807     [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, movshdup */
2808     [0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
2809 
2810     [0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2811     [0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
2812     [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
2813     [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */
2814     [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
2815     [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
2816     [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
2817     [0x2f] = { gen_helper_comiss, gen_helper_comisd },
2818     [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
2819     [0x51] = SSE_FOP(sqrt),
2820     [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
2821     [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
2822     [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
2823     [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */
2824     [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
2825     [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */
2826     [0x58] = SSE_FOP(add),
2827     [0x59] = SSE_FOP(mul),
2828     [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps,
2829                gen_helper_cvtss2sd, gen_helper_cvtsd2ss },
2830     [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq },
2831     [0x5c] = SSE_FOP(sub),
2832     [0x5d] = SSE_FOP(min),
2833     [0x5e] = SSE_FOP(div),
2834     [0x5f] = SSE_FOP(max),
2835 
2836     [0xc2] = SSE_FOP(cmpeq),
2837     [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
2838                (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
2839 
2840     /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
2841     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2842     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2843 
2844     /* MMX ops and their SSE extensions */
2845     [0x60] = MMX_OP2(punpcklbw),
2846     [0x61] = MMX_OP2(punpcklwd),
2847     [0x62] = MMX_OP2(punpckldq),
2848     [0x63] = MMX_OP2(packsswb),
2849     [0x64] = MMX_OP2(pcmpgtb),
2850     [0x65] = MMX_OP2(pcmpgtw),
2851     [0x66] = MMX_OP2(pcmpgtl),
2852     [0x67] = MMX_OP2(packuswb),
2853     [0x68] = MMX_OP2(punpckhbw),
2854     [0x69] = MMX_OP2(punpckhwd),
2855     [0x6a] = MMX_OP2(punpckhdq),
2856     [0x6b] = MMX_OP2(packssdw),
2857     [0x6c] = { NULL, gen_helper_punpcklqdq_xmm },
2858     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
2859     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
2860     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
2861     [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
2862                (SSEFunc_0_epp)gen_helper_pshufd_xmm,
2863                (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
2864                (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
2865     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
2866     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
2867     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
2868     [0x74] = MMX_OP2(pcmpeqb),
2869     [0x75] = MMX_OP2(pcmpeqw),
2870     [0x76] = MMX_OP2(pcmpeql),
2871     [0x77] = { SSE_DUMMY }, /* emms */
2872     [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */
2873     [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r },
2874     [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps },
2875     [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps },
2876     [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
2877     [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */
2878     [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
2879     [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
2880     [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
2881     [0xd1] = MMX_OP2(psrlw),
2882     [0xd2] = MMX_OP2(psrld),
2883     [0xd3] = MMX_OP2(psrlq),
2884     [0xd4] = MMX_OP2(paddq),
2885     [0xd5] = MMX_OP2(pmullw),
2886     [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
2887     [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
2888     [0xd8] = MMX_OP2(psubusb),
2889     [0xd9] = MMX_OP2(psubusw),
2890     [0xda] = MMX_OP2(pminub),
2891     [0xdb] = MMX_OP2(pand),
2892     [0xdc] = MMX_OP2(paddusb),
2893     [0xdd] = MMX_OP2(paddusw),
2894     [0xde] = MMX_OP2(pmaxub),
2895     [0xdf] = MMX_OP2(pandn),
2896     [0xe0] = MMX_OP2(pavgb),
2897     [0xe1] = MMX_OP2(psraw),
2898     [0xe2] = MMX_OP2(psrad),
2899     [0xe3] = MMX_OP2(pavgw),
2900     [0xe4] = MMX_OP2(pmulhuw),
2901     [0xe5] = MMX_OP2(pmulhw),
2902     [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq },
2903     [0xe7] = { SSE_SPECIAL , SSE_SPECIAL },  /* movntq, movntq */
2904     [0xe8] = MMX_OP2(psubsb),
2905     [0xe9] = MMX_OP2(psubsw),
2906     [0xea] = MMX_OP2(pminsw),
2907     [0xeb] = MMX_OP2(por),
2908     [0xec] = MMX_OP2(paddsb),
2909     [0xed] = MMX_OP2(paddsw),
2910     [0xee] = MMX_OP2(pmaxsw),
2911     [0xef] = MMX_OP2(pxor),
2912     [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
2913     [0xf1] = MMX_OP2(psllw),
2914     [0xf2] = MMX_OP2(pslld),
2915     [0xf3] = MMX_OP2(psllq),
2916     [0xf4] = MMX_OP2(pmuludq),
2917     [0xf5] = MMX_OP2(pmaddwd),
2918     [0xf6] = MMX_OP2(psadbw),
2919     [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
2920                (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
2921     [0xf8] = MMX_OP2(psubb),
2922     [0xf9] = MMX_OP2(psubw),
2923     [0xfa] = MMX_OP2(psubl),
2924     [0xfb] = MMX_OP2(psubq),
2925     [0xfc] = MMX_OP2(paddb),
2926     [0xfd] = MMX_OP2(paddw),
2927     [0xfe] = MMX_OP2(paddl),
2928 };
2929 
2930 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
2931     [0 + 2] = MMX_OP2(psrlw),
2932     [0 + 4] = MMX_OP2(psraw),
2933     [0 + 6] = MMX_OP2(psllw),
2934     [8 + 2] = MMX_OP2(psrld),
2935     [8 + 4] = MMX_OP2(psrad),
2936     [8 + 6] = MMX_OP2(pslld),
2937     [16 + 2] = MMX_OP2(psrlq),
2938     [16 + 3] = { NULL, gen_helper_psrldq_xmm },
2939     [16 + 6] = MMX_OP2(psllq),
2940     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
2941 };
2942 
2943 static const SSEFunc_0_epi sse_op_table3ai[] = {
2944     gen_helper_cvtsi2ss,
2945     gen_helper_cvtsi2sd
2946 };
2947 
2948 #ifdef TARGET_X86_64
2949 static const SSEFunc_0_epl sse_op_table3aq[] = {
2950     gen_helper_cvtsq2ss,
2951     gen_helper_cvtsq2sd
2952 };
2953 #endif
2954 
2955 static const SSEFunc_i_ep sse_op_table3bi[] = {
2956     gen_helper_cvttss2si,
2957     gen_helper_cvtss2si,
2958     gen_helper_cvttsd2si,
2959     gen_helper_cvtsd2si
2960 };
2961 
2962 #ifdef TARGET_X86_64
2963 static const SSEFunc_l_ep sse_op_table3bq[] = {
2964     gen_helper_cvttss2sq,
2965     gen_helper_cvtss2sq,
2966     gen_helper_cvttsd2sq,
2967     gen_helper_cvtsd2sq
2968 };
2969 #endif
2970 
2971 static const SSEFunc_0_epp sse_op_table4[8][4] = {
2972     SSE_FOP(cmpeq),
2973     SSE_FOP(cmplt),
2974     SSE_FOP(cmple),
2975     SSE_FOP(cmpunord),
2976     SSE_FOP(cmpneq),
2977     SSE_FOP(cmpnlt),
2978     SSE_FOP(cmpnle),
2979     SSE_FOP(cmpord),
2980 };
2981 
2982 static const SSEFunc_0_epp sse_op_table5[256] = {
2983     [0x0c] = gen_helper_pi2fw,
2984     [0x0d] = gen_helper_pi2fd,
2985     [0x1c] = gen_helper_pf2iw,
2986     [0x1d] = gen_helper_pf2id,
2987     [0x8a] = gen_helper_pfnacc,
2988     [0x8e] = gen_helper_pfpnacc,
2989     [0x90] = gen_helper_pfcmpge,
2990     [0x94] = gen_helper_pfmin,
2991     [0x96] = gen_helper_pfrcp,
2992     [0x97] = gen_helper_pfrsqrt,
2993     [0x9a] = gen_helper_pfsub,
2994     [0x9e] = gen_helper_pfadd,
2995     [0xa0] = gen_helper_pfcmpgt,
2996     [0xa4] = gen_helper_pfmax,
2997     [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */
2998     [0xa7] = gen_helper_movq, /* pfrsqit1 */
2999     [0xaa] = gen_helper_pfsubr,
3000     [0xae] = gen_helper_pfacc,
3001     [0xb0] = gen_helper_pfcmpeq,
3002     [0xb4] = gen_helper_pfmul,
3003     [0xb6] = gen_helper_movq, /* pfrcpit2 */
3004     [0xb7] = gen_helper_pmulhrw_mmx,
3005     [0xbb] = gen_helper_pswapd,
3006     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
3007 };
3008 
3009 struct SSEOpHelper_epp {
3010     SSEFunc_0_epp op[2];
3011     uint32_t ext_mask;
3012 };
3013 
3014 struct SSEOpHelper_eppi {
3015     SSEFunc_0_eppi op[2];
3016     uint32_t ext_mask;
3017 };
3018 
3019 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
3020 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
3021 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
3022 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
3023 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
3024         CPUID_EXT_PCLMULQDQ }
3025 #define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
3026 
3027 static const struct SSEOpHelper_epp sse_op_table6[256] = {
3028     [0x00] = SSSE3_OP(pshufb),
3029     [0x01] = SSSE3_OP(phaddw),
3030     [0x02] = SSSE3_OP(phaddd),
3031     [0x03] = SSSE3_OP(phaddsw),
3032     [0x04] = SSSE3_OP(pmaddubsw),
3033     [0x05] = SSSE3_OP(phsubw),
3034     [0x06] = SSSE3_OP(phsubd),
3035     [0x07] = SSSE3_OP(phsubsw),
3036     [0x08] = SSSE3_OP(psignb),
3037     [0x09] = SSSE3_OP(psignw),
3038     [0x0a] = SSSE3_OP(psignd),
3039     [0x0b] = SSSE3_OP(pmulhrsw),
3040     [0x10] = SSE41_OP(pblendvb),
3041     [0x14] = SSE41_OP(blendvps),
3042     [0x15] = SSE41_OP(blendvpd),
3043     [0x17] = SSE41_OP(ptest),
3044     [0x1c] = SSSE3_OP(pabsb),
3045     [0x1d] = SSSE3_OP(pabsw),
3046     [0x1e] = SSSE3_OP(pabsd),
3047     [0x20] = SSE41_OP(pmovsxbw),
3048     [0x21] = SSE41_OP(pmovsxbd),
3049     [0x22] = SSE41_OP(pmovsxbq),
3050     [0x23] = SSE41_OP(pmovsxwd),
3051     [0x24] = SSE41_OP(pmovsxwq),
3052     [0x25] = SSE41_OP(pmovsxdq),
3053     [0x28] = SSE41_OP(pmuldq),
3054     [0x29] = SSE41_OP(pcmpeqq),
3055     [0x2a] = SSE41_SPECIAL, /* movntqda */
3056     [0x2b] = SSE41_OP(packusdw),
3057     [0x30] = SSE41_OP(pmovzxbw),
3058     [0x31] = SSE41_OP(pmovzxbd),
3059     [0x32] = SSE41_OP(pmovzxbq),
3060     [0x33] = SSE41_OP(pmovzxwd),
3061     [0x34] = SSE41_OP(pmovzxwq),
3062     [0x35] = SSE41_OP(pmovzxdq),
3063     [0x37] = SSE42_OP(pcmpgtq),
3064     [0x38] = SSE41_OP(pminsb),
3065     [0x39] = SSE41_OP(pminsd),
3066     [0x3a] = SSE41_OP(pminuw),
3067     [0x3b] = SSE41_OP(pminud),
3068     [0x3c] = SSE41_OP(pmaxsb),
3069     [0x3d] = SSE41_OP(pmaxsd),
3070     [0x3e] = SSE41_OP(pmaxuw),
3071     [0x3f] = SSE41_OP(pmaxud),
3072     [0x40] = SSE41_OP(pmulld),
3073     [0x41] = SSE41_OP(phminposuw),
3074     [0xdb] = AESNI_OP(aesimc),
3075     [0xdc] = AESNI_OP(aesenc),
3076     [0xdd] = AESNI_OP(aesenclast),
3077     [0xde] = AESNI_OP(aesdec),
3078     [0xdf] = AESNI_OP(aesdeclast),
3079 };
3080 
3081 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
3082     [0x08] = SSE41_OP(roundps),
3083     [0x09] = SSE41_OP(roundpd),
3084     [0x0a] = SSE41_OP(roundss),
3085     [0x0b] = SSE41_OP(roundsd),
3086     [0x0c] = SSE41_OP(blendps),
3087     [0x0d] = SSE41_OP(blendpd),
3088     [0x0e] = SSE41_OP(pblendw),
3089     [0x0f] = SSSE3_OP(palignr),
3090     [0x14] = SSE41_SPECIAL, /* pextrb */
3091     [0x15] = SSE41_SPECIAL, /* pextrw */
3092     [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */
3093     [0x17] = SSE41_SPECIAL, /* extractps */
3094     [0x20] = SSE41_SPECIAL, /* pinsrb */
3095     [0x21] = SSE41_SPECIAL, /* insertps */
3096     [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */
3097     [0x40] = SSE41_OP(dpps),
3098     [0x41] = SSE41_OP(dppd),
3099     [0x42] = SSE41_OP(mpsadbw),
3100     [0x44] = PCLMULQDQ_OP(pclmulqdq),
3101     [0x60] = SSE42_OP(pcmpestrm),
3102     [0x61] = SSE42_OP(pcmpestri),
3103     [0x62] = SSE42_OP(pcmpistrm),
3104     [0x63] = SSE42_OP(pcmpistri),
3105     [0xdf] = AESNI_OP(aeskeygenassist),
3106 };
3107 
3108 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
3109                     target_ulong pc_start)
3110 {
3111     int b1, op1_offset, op2_offset, is_xmm, val;
3112     int modrm, mod, rm, reg;
3113     SSEFunc_0_epp sse_fn_epp;
3114     SSEFunc_0_eppi sse_fn_eppi;
3115     SSEFunc_0_ppi sse_fn_ppi;
3116     SSEFunc_0_eppt sse_fn_eppt;
3117     MemOp ot;
3118 
3119     b &= 0xff;
3120     if (s->prefix & PREFIX_DATA)
3121         b1 = 1;
3122     else if (s->prefix & PREFIX_REPZ)
3123         b1 = 2;
3124     else if (s->prefix & PREFIX_REPNZ)
3125         b1 = 3;
3126     else
3127         b1 = 0;
3128     sse_fn_epp = sse_op_table1[b][b1];
3129     if (!sse_fn_epp) {
3130         goto unknown_op;
3131     }
3132     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
3133         is_xmm = 1;
3134     } else {
3135         if (b1 == 0) {
3136             /* MMX case */
3137             is_xmm = 0;
3138         } else {
3139             is_xmm = 1;
3140         }
3141     }
3142     /* simple MMX/SSE operation */
3143     if (s->flags & HF_TS_MASK) {
3144         gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
3145         return;
3146     }
3147     if (s->flags & HF_EM_MASK) {
3148     illegal_op:
3149         gen_illegal_opcode(s);
3150         return;
3151     }
3152     if (is_xmm
3153         && !(s->flags & HF_OSFXSR_MASK)
3154         && (b != 0x38 && b != 0x3a)) {
3155         goto unknown_op;
3156     }
3157     if (b == 0x0e) {
3158         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
3159             /* If we were fully decoding this we might use illegal_op.  */
3160             goto unknown_op;
3161         }
3162         /* femms */
3163         gen_helper_emms(cpu_env);
3164         return;
3165     }
3166     if (b == 0x77) {
3167         /* emms */
3168         gen_helper_emms(cpu_env);
3169         return;
3170     }
3171     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
3172        the static cpu state) */
3173     if (!is_xmm) {
3174         gen_helper_enter_mmx(cpu_env);
3175     }
3176 
3177     modrm = x86_ldub_code(env, s);
3178     reg = ((modrm >> 3) & 7);
3179     if (is_xmm) {
3180         reg |= REX_R(s);
3181     }
3182     mod = (modrm >> 6) & 3;
3183     if (sse_fn_epp == SSE_SPECIAL) {
3184         b |= (b1 << 8);
3185         switch(b) {
3186         case 0x0e7: /* movntq */
3187             if (mod == 3) {
3188                 goto illegal_op;
3189             }
3190             gen_lea_modrm(env, s, modrm);
3191             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3192             break;
3193         case 0x1e7: /* movntdq */
3194         case 0x02b: /* movntps */
3195         case 0x12b: /* movntps */
3196             if (mod == 3)
3197                 goto illegal_op;
3198             gen_lea_modrm(env, s, modrm);
3199             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3200             break;
3201         case 0x3f0: /* lddqu */
3202             if (mod == 3)
3203                 goto illegal_op;
3204             gen_lea_modrm(env, s, modrm);
3205             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3206             break;
3207         case 0x22b: /* movntss */
3208         case 0x32b: /* movntsd */
3209             if (mod == 3)
3210                 goto illegal_op;
3211             gen_lea_modrm(env, s, modrm);
3212             if (b1 & 1) {
3213                 gen_stq_env_A0(s, offsetof(CPUX86State,
3214                                            xmm_regs[reg].ZMM_Q(0)));
3215             } else {
3216                 tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
3217                     xmm_regs[reg].ZMM_L(0)));
3218                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3219             }
3220             break;
3221         case 0x6e: /* movd mm, ea */
3222 #ifdef TARGET_X86_64
3223             if (s->dflag == MO_64) {
3224                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3225                 tcg_gen_st_tl(s->T0, cpu_env,
3226                               offsetof(CPUX86State, fpregs[reg].mmx));
3227             } else
3228 #endif
3229             {
3230                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3231                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3232                                  offsetof(CPUX86State,fpregs[reg].mmx));
3233                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3234                 gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32);
3235             }
3236             break;
3237         case 0x16e: /* movd xmm, ea */
3238 #ifdef TARGET_X86_64
3239             if (s->dflag == MO_64) {
3240                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
3241                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3242                                  offsetof(CPUX86State,xmm_regs[reg]));
3243                 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
3244             } else
3245 #endif
3246             {
3247                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
3248                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3249                                  offsetof(CPUX86State,xmm_regs[reg]));
3250                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3251                 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
3252             }
3253             break;
3254         case 0x6f: /* movq mm, ea */
3255             if (mod != 3) {
3256                 gen_lea_modrm(env, s, modrm);
3257                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3258             } else {
3259                 rm = (modrm & 7);
3260                 tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
3261                                offsetof(CPUX86State,fpregs[rm].mmx));
3262                 tcg_gen_st_i64(s->tmp1_i64, cpu_env,
3263                                offsetof(CPUX86State,fpregs[reg].mmx));
3264             }
3265             break;
3266         case 0x010: /* movups */
3267         case 0x110: /* movupd */
3268         case 0x028: /* movaps */
3269         case 0x128: /* movapd */
3270         case 0x16f: /* movdqa xmm, ea */
3271         case 0x26f: /* movdqu xmm, ea */
3272             if (mod != 3) {
3273                 gen_lea_modrm(env, s, modrm);
3274                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3275             } else {
3276                 rm = (modrm & 7) | REX_B(s);
3277                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
3278                             offsetof(CPUX86State,xmm_regs[rm]));
3279             }
3280             break;
3281         case 0x210: /* movss xmm, ea */
3282             if (mod != 3) {
3283                 gen_lea_modrm(env, s, modrm);
3284                 gen_op_ld_v(s, MO_32, s->T0, s->A0);
3285                 tcg_gen_st32_tl(s->T0, cpu_env,
3286                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3287                 tcg_gen_movi_tl(s->T0, 0);
3288                 tcg_gen_st32_tl(s->T0, cpu_env,
3289                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)));
3290                 tcg_gen_st32_tl(s->T0, cpu_env,
3291                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3292                 tcg_gen_st32_tl(s->T0, cpu_env,
3293                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3294             } else {
3295                 rm = (modrm & 7) | REX_B(s);
3296                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3297                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3298             }
3299             break;
3300         case 0x310: /* movsd xmm, ea */
3301             if (mod != 3) {
3302                 gen_lea_modrm(env, s, modrm);
3303                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3304                                            xmm_regs[reg].ZMM_Q(0)));
3305                 tcg_gen_movi_tl(s->T0, 0);
3306                 tcg_gen_st32_tl(s->T0, cpu_env,
3307                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)));
3308                 tcg_gen_st32_tl(s->T0, cpu_env,
3309                                 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
3310             } else {
3311                 rm = (modrm & 7) | REX_B(s);
3312                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3313                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3314             }
3315             break;
3316         case 0x012: /* movlps */
3317         case 0x112: /* movlpd */
3318             if (mod != 3) {
3319                 gen_lea_modrm(env, s, modrm);
3320                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3321                                            xmm_regs[reg].ZMM_Q(0)));
3322             } else {
3323                 /* movhlps */
3324                 rm = (modrm & 7) | REX_B(s);
3325                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3326                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1)));
3327             }
3328             break;
3329         case 0x212: /* movsldup */
3330             if (mod != 3) {
3331                 gen_lea_modrm(env, s, modrm);
3332                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3333             } else {
3334                 rm = (modrm & 7) | REX_B(s);
3335                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3336                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
3337                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3338                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2)));
3339             }
3340             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3341                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3342             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3343                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2)));
3344             break;
3345         case 0x312: /* movddup */
3346             if (mod != 3) {
3347                 gen_lea_modrm(env, s, modrm);
3348                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3349                                            xmm_regs[reg].ZMM_Q(0)));
3350             } else {
3351                 rm = (modrm & 7) | REX_B(s);
3352                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3353                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3354             }
3355             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3356                         offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3357             break;
3358         case 0x016: /* movhps */
3359         case 0x116: /* movhpd */
3360             if (mod != 3) {
3361                 gen_lea_modrm(env, s, modrm);
3362                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3363                                            xmm_regs[reg].ZMM_Q(1)));
3364             } else {
3365                 /* movlhps */
3366                 rm = (modrm & 7) | REX_B(s);
3367                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)),
3368                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3369             }
3370             break;
3371         case 0x216: /* movshdup */
3372             if (mod != 3) {
3373                 gen_lea_modrm(env, s, modrm);
3374                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3375             } else {
3376                 rm = (modrm & 7) | REX_B(s);
3377                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
3378                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1)));
3379                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)),
3380                             offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3)));
3381             }
3382             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
3383                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1)));
3384             gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)),
3385                         offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3)));
3386             break;
3387         case 0x178:
3388         case 0x378:
3389             {
3390                 int bit_index, field_length;
3391 
3392                 if (b1 == 1 && reg != 0)
3393                     goto illegal_op;
3394                 field_length = x86_ldub_code(env, s) & 0x3F;
3395                 bit_index = x86_ldub_code(env, s) & 0x3F;
3396                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3397                     offsetof(CPUX86State,xmm_regs[reg]));
3398                 if (b1 == 1)
3399                     gen_helper_extrq_i(cpu_env, s->ptr0,
3400                                        tcg_const_i32(bit_index),
3401                                        tcg_const_i32(field_length));
3402                 else
3403                     gen_helper_insertq_i(cpu_env, s->ptr0,
3404                                          tcg_const_i32(bit_index),
3405                                          tcg_const_i32(field_length));
3406             }
3407             break;
3408         case 0x7e: /* movd ea, mm */
3409 #ifdef TARGET_X86_64
3410             if (s->dflag == MO_64) {
3411                 tcg_gen_ld_i64(s->T0, cpu_env,
3412                                offsetof(CPUX86State,fpregs[reg].mmx));
3413                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3414             } else
3415 #endif
3416             {
3417                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3418                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
3419                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3420             }
3421             break;
3422         case 0x17e: /* movd ea, xmm */
3423 #ifdef TARGET_X86_64
3424             if (s->dflag == MO_64) {
3425                 tcg_gen_ld_i64(s->T0, cpu_env,
3426                                offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3427                 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
3428             } else
3429 #endif
3430             {
3431                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3432                                  offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3433                 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
3434             }
3435             break;
3436         case 0x27e: /* movq xmm, ea */
3437             if (mod != 3) {
3438                 gen_lea_modrm(env, s, modrm);
3439                 gen_ldq_env_A0(s, offsetof(CPUX86State,
3440                                            xmm_regs[reg].ZMM_Q(0)));
3441             } else {
3442                 rm = (modrm & 7) | REX_B(s);
3443                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3444                             offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3445             }
3446             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3447             break;
3448         case 0x7f: /* movq ea, mm */
3449             if (mod != 3) {
3450                 gen_lea_modrm(env, s, modrm);
3451                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
3452             } else {
3453                 rm = (modrm & 7);
3454                 gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx),
3455                             offsetof(CPUX86State,fpregs[reg].mmx));
3456             }
3457             break;
3458         case 0x011: /* movups */
3459         case 0x111: /* movupd */
3460         case 0x029: /* movaps */
3461         case 0x129: /* movapd */
3462         case 0x17f: /* movdqa ea, xmm */
3463         case 0x27f: /* movdqu ea, xmm */
3464             if (mod != 3) {
3465                 gen_lea_modrm(env, s, modrm);
3466                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
3467             } else {
3468                 rm = (modrm & 7) | REX_B(s);
3469                 gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]),
3470                             offsetof(CPUX86State,xmm_regs[reg]));
3471             }
3472             break;
3473         case 0x211: /* movss ea, xmm */
3474             if (mod != 3) {
3475                 gen_lea_modrm(env, s, modrm);
3476                 tcg_gen_ld32u_tl(s->T0, cpu_env,
3477                                  offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
3478                 gen_op_st_v(s, MO_32, s->T0, s->A0);
3479             } else {
3480                 rm = (modrm & 7) | REX_B(s);
3481                 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)),
3482                             offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0)));
3483             }
3484             break;
3485         case 0x311: /* movsd ea, xmm */
3486             if (mod != 3) {
3487                 gen_lea_modrm(env, s, modrm);
3488                 gen_stq_env_A0(s, offsetof(CPUX86State,
3489                                            xmm_regs[reg].ZMM_Q(0)));
3490             } else {
3491                 rm = (modrm & 7) | REX_B(s);
3492                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3493                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3494             }
3495             break;
3496         case 0x013: /* movlps */
3497         case 0x113: /* movlpd */
3498             if (mod != 3) {
3499                 gen_lea_modrm(env, s, modrm);
3500                 gen_stq_env_A0(s, offsetof(CPUX86State,
3501                                            xmm_regs[reg].ZMM_Q(0)));
3502             } else {
3503                 goto illegal_op;
3504             }
3505             break;
3506         case 0x017: /* movhps */
3507         case 0x117: /* movhpd */
3508             if (mod != 3) {
3509                 gen_lea_modrm(env, s, modrm);
3510                 gen_stq_env_A0(s, offsetof(CPUX86State,
3511                                            xmm_regs[reg].ZMM_Q(1)));
3512             } else {
3513                 goto illegal_op;
3514             }
3515             break;
3516         case 0x71: /* shift mm, im */
3517         case 0x72:
3518         case 0x73:
3519         case 0x171: /* shift xmm, im */
3520         case 0x172:
3521         case 0x173:
3522             val = x86_ldub_code(env, s);
3523             if (is_xmm) {
3524                 tcg_gen_movi_tl(s->T0, val);
3525                 tcg_gen_st32_tl(s->T0, cpu_env,
3526                                 offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3527                 tcg_gen_movi_tl(s->T0, 0);
3528                 tcg_gen_st32_tl(s->T0, cpu_env,
3529                                 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
3530                 op1_offset = offsetof(CPUX86State,xmm_t0);
3531             } else {
3532                 tcg_gen_movi_tl(s->T0, val);
3533                 tcg_gen_st32_tl(s->T0, cpu_env,
3534                                 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
3535                 tcg_gen_movi_tl(s->T0, 0);
3536                 tcg_gen_st32_tl(s->T0, cpu_env,
3537                                 offsetof(CPUX86State, mmx_t0.MMX_L(1)));
3538                 op1_offset = offsetof(CPUX86State,mmx_t0);
3539             }
3540             assert(b1 < 2);
3541             sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
3542                                        (((modrm >> 3)) & 7)][b1];
3543             if (!sse_fn_epp) {
3544                 goto unknown_op;
3545             }
3546             if (is_xmm) {
3547                 rm = (modrm & 7) | REX_B(s);
3548                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3549             } else {
3550                 rm = (modrm & 7);
3551                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3552             }
3553             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3554             tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset);
3555             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3556             break;
3557         case 0x050: /* movmskps */
3558             rm = (modrm & 7) | REX_B(s);
3559             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3560                              offsetof(CPUX86State,xmm_regs[rm]));
3561             gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0);
3562             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3563             break;
3564         case 0x150: /* movmskpd */
3565             rm = (modrm & 7) | REX_B(s);
3566             tcg_gen_addi_ptr(s->ptr0, cpu_env,
3567                              offsetof(CPUX86State,xmm_regs[rm]));
3568             gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0);
3569             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3570             break;
3571         case 0x02a: /* cvtpi2ps */
3572         case 0x12a: /* cvtpi2pd */
3573             gen_helper_enter_mmx(cpu_env);
3574             if (mod != 3) {
3575                 gen_lea_modrm(env, s, modrm);
3576                 op2_offset = offsetof(CPUX86State,mmx_t0);
3577                 gen_ldq_env_A0(s, op2_offset);
3578             } else {
3579                 rm = (modrm & 7);
3580                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3581             }
3582             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3583             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3584             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3585             switch(b >> 8) {
3586             case 0x0:
3587                 gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1);
3588                 break;
3589             default:
3590             case 0x1:
3591                 gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1);
3592                 break;
3593             }
3594             break;
3595         case 0x22a: /* cvtsi2ss */
3596         case 0x32a: /* cvtsi2sd */
3597             ot = mo_64_32(s->dflag);
3598             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3599             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3600             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3601             if (ot == MO_32) {
3602                 SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
3603                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3604                 sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32);
3605             } else {
3606 #ifdef TARGET_X86_64
3607                 SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
3608                 sse_fn_epl(cpu_env, s->ptr0, s->T0);
3609 #else
3610                 goto illegal_op;
3611 #endif
3612             }
3613             break;
3614         case 0x02c: /* cvttps2pi */
3615         case 0x12c: /* cvttpd2pi */
3616         case 0x02d: /* cvtps2pi */
3617         case 0x12d: /* cvtpd2pi */
3618             gen_helper_enter_mmx(cpu_env);
3619             if (mod != 3) {
3620                 gen_lea_modrm(env, s, modrm);
3621                 op2_offset = offsetof(CPUX86State,xmm_t0);
3622                 gen_ldo_env_A0(s, op2_offset);
3623             } else {
3624                 rm = (modrm & 7) | REX_B(s);
3625                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3626             }
3627             op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx);
3628             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3629             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3630             switch(b) {
3631             case 0x02c:
3632                 gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1);
3633                 break;
3634             case 0x12c:
3635                 gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1);
3636                 break;
3637             case 0x02d:
3638                 gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1);
3639                 break;
3640             case 0x12d:
3641                 gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1);
3642                 break;
3643             }
3644             break;
3645         case 0x22c: /* cvttss2si */
3646         case 0x32c: /* cvttsd2si */
3647         case 0x22d: /* cvtss2si */
3648         case 0x32d: /* cvtsd2si */
3649             ot = mo_64_32(s->dflag);
3650             if (mod != 3) {
3651                 gen_lea_modrm(env, s, modrm);
3652                 if ((b >> 8) & 1) {
3653                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0)));
3654                 } else {
3655                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
3656                     tcg_gen_st32_tl(s->T0, cpu_env,
3657                                     offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
3658                 }
3659                 op2_offset = offsetof(CPUX86State,xmm_t0);
3660             } else {
3661                 rm = (modrm & 7) | REX_B(s);
3662                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
3663             }
3664             tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset);
3665             if (ot == MO_32) {
3666                 SSEFunc_i_ep sse_fn_i_ep =
3667                     sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
3668                 sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0);
3669                 tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
3670             } else {
3671 #ifdef TARGET_X86_64
3672                 SSEFunc_l_ep sse_fn_l_ep =
3673                     sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
3674                 sse_fn_l_ep(s->T0, cpu_env, s->ptr0);
3675 #else
3676                 goto illegal_op;
3677 #endif
3678             }
3679             gen_op_mov_reg_v(s, ot, reg, s->T0);
3680             break;
3681         case 0xc4: /* pinsrw */
3682         case 0x1c4:
3683             s->rip_offset = 1;
3684             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
3685             val = x86_ldub_code(env, s);
3686             if (b1) {
3687                 val &= 7;
3688                 tcg_gen_st16_tl(s->T0, cpu_env,
3689                                 offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
3690             } else {
3691                 val &= 3;
3692                 tcg_gen_st16_tl(s->T0, cpu_env,
3693                                 offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
3694             }
3695             break;
3696         case 0xc5: /* pextrw */
3697         case 0x1c5:
3698             if (mod != 3)
3699                 goto illegal_op;
3700             ot = mo_64_32(s->dflag);
3701             val = x86_ldub_code(env, s);
3702             if (b1) {
3703                 val &= 7;
3704                 rm = (modrm & 7) | REX_B(s);
3705                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3706                                  offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val)));
3707             } else {
3708                 val &= 3;
3709                 rm = (modrm & 7);
3710                 tcg_gen_ld16u_tl(s->T0, cpu_env,
3711                                 offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val)));
3712             }
3713             reg = ((modrm >> 3) & 7) | REX_R(s);
3714             gen_op_mov_reg_v(s, ot, reg, s->T0);
3715             break;
3716         case 0x1d6: /* movq ea, xmm */
3717             if (mod != 3) {
3718                 gen_lea_modrm(env, s, modrm);
3719                 gen_stq_env_A0(s, offsetof(CPUX86State,
3720                                            xmm_regs[reg].ZMM_Q(0)));
3721             } else {
3722                 rm = (modrm & 7) | REX_B(s);
3723                 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)),
3724                             offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0)));
3725                 gen_op_movq_env_0(s,
3726                                   offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1)));
3727             }
3728             break;
3729         case 0x2d6: /* movq2dq */
3730             gen_helper_enter_mmx(cpu_env);
3731             rm = (modrm & 7);
3732             gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
3733                         offsetof(CPUX86State,fpregs[rm].mmx));
3734             gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)));
3735             break;
3736         case 0x3d6: /* movdq2q */
3737             gen_helper_enter_mmx(cpu_env);
3738             rm = (modrm & 7) | REX_B(s);
3739             gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx),
3740                         offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
3741             break;
3742         case 0xd7: /* pmovmskb */
3743         case 0x1d7:
3744             if (mod != 3)
3745                 goto illegal_op;
3746             if (b1) {
3747                 rm = (modrm & 7) | REX_B(s);
3748                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3749                                  offsetof(CPUX86State, xmm_regs[rm]));
3750                 gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0);
3751             } else {
3752                 rm = (modrm & 7);
3753                 tcg_gen_addi_ptr(s->ptr0, cpu_env,
3754                                  offsetof(CPUX86State, fpregs[rm].mmx));
3755                 gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0);
3756             }
3757             reg = ((modrm >> 3) & 7) | REX_R(s);
3758             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
3759             break;
3760 
3761         case 0x138:
3762         case 0x038:
3763             b = modrm;
3764             if ((b & 0xf0) == 0xf0) {
3765                 goto do_0f_38_fx;
3766             }
3767             modrm = x86_ldub_code(env, s);
3768             rm = modrm & 7;
3769             reg = ((modrm >> 3) & 7) | REX_R(s);
3770             mod = (modrm >> 6) & 3;
3771 
3772             assert(b1 < 2);
3773             sse_fn_epp = sse_op_table6[b].op[b1];
3774             if (!sse_fn_epp) {
3775                 goto unknown_op;
3776             }
3777             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
3778                 goto illegal_op;
3779 
3780             if (b1) {
3781                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
3782                 if (mod == 3) {
3783                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
3784                 } else {
3785                     op2_offset = offsetof(CPUX86State,xmm_t0);
3786                     gen_lea_modrm(env, s, modrm);
3787                     switch (b) {
3788                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
3789                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
3790                     case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
3791                         gen_ldq_env_A0(s, op2_offset +
3792                                         offsetof(ZMMReg, ZMM_Q(0)));
3793                         break;
3794                     case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
3795                     case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
3796                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
3797                                             s->mem_index, MO_LEUL);
3798                         tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset +
3799                                         offsetof(ZMMReg, ZMM_L(0)));
3800                         break;
3801                     case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
3802                         tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
3803                                            s->mem_index, MO_LEUW);
3804                         tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
3805                                         offsetof(ZMMReg, ZMM_W(0)));
3806                         break;
3807                     case 0x2a:            /* movntqda */
3808                         gen_ldo_env_A0(s, op1_offset);
3809                         return;
3810                     default:
3811                         gen_ldo_env_A0(s, op2_offset);
3812                     }
3813                 }
3814             } else {
3815                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
3816                 if (mod == 3) {
3817                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
3818                 } else {
3819                     op2_offset = offsetof(CPUX86State,mmx_t0);
3820                     gen_lea_modrm(env, s, modrm);
3821                     gen_ldq_env_A0(s, op2_offset);
3822                 }
3823             }
3824             if (sse_fn_epp == SSE_SPECIAL) {
3825                 goto unknown_op;
3826             }
3827 
3828             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
3829             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
3830             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
3831 
3832             if (b == 0x17) {
3833                 set_cc_op(s, CC_OP_EFLAGS);
3834             }
3835             break;
3836 
3837         case 0x238:
3838         case 0x338:
3839         do_0f_38_fx:
3840             /* Various integer extensions at 0f 38 f[0-f].  */
3841             b = modrm | (b1 << 8);
3842             modrm = x86_ldub_code(env, s);
3843             reg = ((modrm >> 3) & 7) | REX_R(s);
3844 
3845             switch (b) {
3846             case 0x3f0: /* crc32 Gd,Eb */
3847             case 0x3f1: /* crc32 Gd,Ey */
3848             do_crc32:
3849                 if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) {
3850                     goto illegal_op;
3851                 }
3852                 if ((b & 0xff) == 0xf0) {
3853                     ot = MO_8;
3854                 } else if (s->dflag != MO_64) {
3855                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3856                 } else {
3857                     ot = MO_64;
3858                 }
3859 
3860                 tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]);
3861                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3862                 gen_helper_crc32(s->T0, s->tmp2_i32,
3863                                  s->T0, tcg_const_i32(8 << ot));
3864 
3865                 ot = mo_64_32(s->dflag);
3866                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3867                 break;
3868 
3869             case 0x1f0: /* crc32 or movbe */
3870             case 0x1f1:
3871                 /* For these insns, the f3 prefix is supposed to have priority
3872                    over the 66 prefix, but that's not what we implement above
3873                    setting b1.  */
3874                 if (s->prefix & PREFIX_REPNZ) {
3875                     goto do_crc32;
3876                 }
3877                 /* FALLTHRU */
3878             case 0x0f0: /* movbe Gy,My */
3879             case 0x0f1: /* movbe My,Gy */
3880                 if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) {
3881                     goto illegal_op;
3882                 }
3883                 if (s->dflag != MO_64) {
3884                     ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32);
3885                 } else {
3886                     ot = MO_64;
3887                 }
3888 
3889                 gen_lea_modrm(env, s, modrm);
3890                 if ((b & 1) == 0) {
3891                     tcg_gen_qemu_ld_tl(s->T0, s->A0,
3892                                        s->mem_index, ot | MO_BE);
3893                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3894                 } else {
3895                     tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0,
3896                                        s->mem_index, ot | MO_BE);
3897                 }
3898                 break;
3899 
3900             case 0x0f2: /* andn Gy, By, Ey */
3901                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3902                     || !(s->prefix & PREFIX_VEX)
3903                     || s->vex_l != 0) {
3904                     goto illegal_op;
3905                 }
3906                 ot = mo_64_32(s->dflag);
3907                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3908                 tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]);
3909                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3910                 gen_op_update1_cc(s);
3911                 set_cc_op(s, CC_OP_LOGICB + ot);
3912                 break;
3913 
3914             case 0x0f7: /* bextr Gy, Ey, By */
3915                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
3916                     || !(s->prefix & PREFIX_VEX)
3917                     || s->vex_l != 0) {
3918                     goto illegal_op;
3919                 }
3920                 ot = mo_64_32(s->dflag);
3921                 {
3922                     TCGv bound, zero;
3923 
3924                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3925                     /* Extract START, and shift the operand.
3926                        Shifts larger than operand size get zeros.  */
3927                     tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]);
3928                     tcg_gen_shr_tl(s->T0, s->T0, s->A0);
3929 
3930                     bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3931                     zero = tcg_const_tl(0);
3932                     tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound,
3933                                        s->T0, zero);
3934                     tcg_temp_free(zero);
3935 
3936                     /* Extract the LEN into a mask.  Lengths larger than
3937                        operand size get all ones.  */
3938                     tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8);
3939                     tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound,
3940                                        s->A0, bound);
3941                     tcg_temp_free(bound);
3942                     tcg_gen_movi_tl(s->T1, 1);
3943                     tcg_gen_shl_tl(s->T1, s->T1, s->A0);
3944                     tcg_gen_subi_tl(s->T1, s->T1, 1);
3945                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
3946 
3947                     gen_op_mov_reg_v(s, ot, reg, s->T0);
3948                     gen_op_update1_cc(s);
3949                     set_cc_op(s, CC_OP_LOGICB + ot);
3950                 }
3951                 break;
3952 
3953             case 0x0f5: /* bzhi Gy, Ey, By */
3954                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3955                     || !(s->prefix & PREFIX_VEX)
3956                     || s->vex_l != 0) {
3957                     goto illegal_op;
3958                 }
3959                 ot = mo_64_32(s->dflag);
3960                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3961                 tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
3962                 {
3963                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
3964                     /* Note that since we're using BMILG (in order to get O
3965                        cleared) we need to store the inverse into C.  */
3966                     tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src,
3967                                        s->T1, bound);
3968                     tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1,
3969                                        bound, bound, s->T1);
3970                     tcg_temp_free(bound);
3971                 }
3972                 tcg_gen_movi_tl(s->A0, -1);
3973                 tcg_gen_shl_tl(s->A0, s->A0, s->T1);
3974                 tcg_gen_andc_tl(s->T0, s->T0, s->A0);
3975                 gen_op_mov_reg_v(s, ot, reg, s->T0);
3976                 gen_op_update1_cc(s);
3977                 set_cc_op(s, CC_OP_BMILGB + ot);
3978                 break;
3979 
3980             case 0x3f6: /* mulx By, Gy, rdx, Ey */
3981                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
3982                     || !(s->prefix & PREFIX_VEX)
3983                     || s->vex_l != 0) {
3984                     goto illegal_op;
3985                 }
3986                 ot = mo_64_32(s->dflag);
3987                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
3988                 switch (ot) {
3989                 default:
3990                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
3991                     tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]);
3992                     tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
3993                                       s->tmp2_i32, s->tmp3_i32);
3994                     tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
3995                     tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32);
3996                     break;
3997 #ifdef TARGET_X86_64
3998                 case MO_64:
3999                     tcg_gen_mulu2_i64(s->T0, s->T1,
4000                                       s->T0, cpu_regs[R_EDX]);
4001                     tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0);
4002                     tcg_gen_mov_i64(cpu_regs[reg], s->T1);
4003                     break;
4004 #endif
4005                 }
4006                 break;
4007 
4008             case 0x3f5: /* pdep Gy, By, Ey */
4009                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4010                     || !(s->prefix & PREFIX_VEX)
4011                     || s->vex_l != 0) {
4012                     goto illegal_op;
4013                 }
4014                 ot = mo_64_32(s->dflag);
4015                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4016                 /* Note that by zero-extending the source operand, we
4017                    automatically handle zero-extending the result.  */
4018                 if (ot == MO_64) {
4019                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4020                 } else {
4021                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4022                 }
4023                 gen_helper_pdep(cpu_regs[reg], s->T1, s->T0);
4024                 break;
4025 
4026             case 0x2f5: /* pext Gy, By, Ey */
4027                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4028                     || !(s->prefix & PREFIX_VEX)
4029                     || s->vex_l != 0) {
4030                     goto illegal_op;
4031                 }
4032                 ot = mo_64_32(s->dflag);
4033                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4034                 /* Note that by zero-extending the source operand, we
4035                    automatically handle zero-extending the result.  */
4036                 if (ot == MO_64) {
4037                     tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]);
4038                 } else {
4039                     tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]);
4040                 }
4041                 gen_helper_pext(cpu_regs[reg], s->T1, s->T0);
4042                 break;
4043 
4044             case 0x1f6: /* adcx Gy, Ey */
4045             case 0x2f6: /* adox Gy, Ey */
4046                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
4047                     goto illegal_op;
4048                 } else {
4049                     TCGv carry_in, carry_out, zero;
4050                     int end_op;
4051 
4052                     ot = mo_64_32(s->dflag);
4053                     gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4054 
4055                     /* Re-use the carry-out from a previous round.  */
4056                     carry_in = NULL;
4057                     carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
4058                     switch (s->cc_op) {
4059                     case CC_OP_ADCX:
4060                         if (b == 0x1f6) {
4061                             carry_in = cpu_cc_dst;
4062                             end_op = CC_OP_ADCX;
4063                         } else {
4064                             end_op = CC_OP_ADCOX;
4065                         }
4066                         break;
4067                     case CC_OP_ADOX:
4068                         if (b == 0x1f6) {
4069                             end_op = CC_OP_ADCOX;
4070                         } else {
4071                             carry_in = cpu_cc_src2;
4072                             end_op = CC_OP_ADOX;
4073                         }
4074                         break;
4075                     case CC_OP_ADCOX:
4076                         end_op = CC_OP_ADCOX;
4077                         carry_in = carry_out;
4078                         break;
4079                     default:
4080                         end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
4081                         break;
4082                     }
4083                     /* If we can't reuse carry-out, get it out of EFLAGS.  */
4084                     if (!carry_in) {
4085                         if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
4086                             gen_compute_eflags(s);
4087                         }
4088                         carry_in = s->tmp0;
4089                         tcg_gen_extract_tl(carry_in, cpu_cc_src,
4090                                            ctz32(b == 0x1f6 ? CC_C : CC_O), 1);
4091                     }
4092 
4093                     switch (ot) {
4094 #ifdef TARGET_X86_64
4095                     case MO_32:
4096                         /* If we know TL is 64-bit, and we want a 32-bit
4097                            result, just do everything in 64-bit arithmetic.  */
4098                         tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
4099                         tcg_gen_ext32u_i64(s->T0, s->T0);
4100                         tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]);
4101                         tcg_gen_add_i64(s->T0, s->T0, carry_in);
4102                         tcg_gen_ext32u_i64(cpu_regs[reg], s->T0);
4103                         tcg_gen_shri_i64(carry_out, s->T0, 32);
4104                         break;
4105 #endif
4106                     default:
4107                         /* Otherwise compute the carry-out in two steps.  */
4108                         zero = tcg_const_tl(0);
4109                         tcg_gen_add2_tl(s->T0, carry_out,
4110                                         s->T0, zero,
4111                                         carry_in, zero);
4112                         tcg_gen_add2_tl(cpu_regs[reg], carry_out,
4113                                         cpu_regs[reg], carry_out,
4114                                         s->T0, zero);
4115                         tcg_temp_free(zero);
4116                         break;
4117                     }
4118                     set_cc_op(s, end_op);
4119                 }
4120                 break;
4121 
4122             case 0x1f7: /* shlx Gy, Ey, By */
4123             case 0x2f7: /* sarx Gy, Ey, By */
4124             case 0x3f7: /* shrx Gy, Ey, By */
4125                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4126                     || !(s->prefix & PREFIX_VEX)
4127                     || s->vex_l != 0) {
4128                     goto illegal_op;
4129                 }
4130                 ot = mo_64_32(s->dflag);
4131                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4132                 if (ot == MO_64) {
4133                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63);
4134                 } else {
4135                     tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31);
4136                 }
4137                 if (b == 0x1f7) {
4138                     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
4139                 } else if (b == 0x2f7) {
4140                     if (ot != MO_64) {
4141                         tcg_gen_ext32s_tl(s->T0, s->T0);
4142                     }
4143                     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
4144                 } else {
4145                     if (ot != MO_64) {
4146                         tcg_gen_ext32u_tl(s->T0, s->T0);
4147                     }
4148                     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
4149                 }
4150                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4151                 break;
4152 
4153             case 0x0f3:
4154             case 0x1f3:
4155             case 0x2f3:
4156             case 0x3f3: /* Group 17 */
4157                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)
4158                     || !(s->prefix & PREFIX_VEX)
4159                     || s->vex_l != 0) {
4160                     goto illegal_op;
4161                 }
4162                 ot = mo_64_32(s->dflag);
4163                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4164 
4165                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4166                 switch (reg & 7) {
4167                 case 1: /* blsr By,Ey */
4168                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4169                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4170                     break;
4171                 case 2: /* blsmsk By,Ey */
4172                     tcg_gen_subi_tl(s->T1, s->T0, 1);
4173                     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
4174                     break;
4175                 case 3: /* blsi By, Ey */
4176                     tcg_gen_neg_tl(s->T1, s->T0);
4177                     tcg_gen_and_tl(s->T0, s->T0, s->T1);
4178                     break;
4179                 default:
4180                     goto unknown_op;
4181                 }
4182                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4183                 gen_op_mov_reg_v(s, ot, s->vex_v, s->T0);
4184                 set_cc_op(s, CC_OP_BMILGB + ot);
4185                 break;
4186 
4187             default:
4188                 goto unknown_op;
4189             }
4190             break;
4191 
4192         case 0x03a:
4193         case 0x13a:
4194             b = modrm;
4195             modrm = x86_ldub_code(env, s);
4196             rm = modrm & 7;
4197             reg = ((modrm >> 3) & 7) | REX_R(s);
4198             mod = (modrm >> 6) & 3;
4199 
4200             assert(b1 < 2);
4201             sse_fn_eppi = sse_op_table7[b].op[b1];
4202             if (!sse_fn_eppi) {
4203                 goto unknown_op;
4204             }
4205             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
4206                 goto illegal_op;
4207 
4208             s->rip_offset = 1;
4209 
4210             if (sse_fn_eppi == SSE_SPECIAL) {
4211                 ot = mo_64_32(s->dflag);
4212                 rm = (modrm & 7) | REX_B(s);
4213                 if (mod != 3)
4214                     gen_lea_modrm(env, s, modrm);
4215                 reg = ((modrm >> 3) & 7) | REX_R(s);
4216                 val = x86_ldub_code(env, s);
4217                 switch (b) {
4218                 case 0x14: /* pextrb */
4219                     tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4220                                             xmm_regs[reg].ZMM_B(val & 15)));
4221                     if (mod == 3) {
4222                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4223                     } else {
4224                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4225                                            s->mem_index, MO_UB);
4226                     }
4227                     break;
4228                 case 0x15: /* pextrw */
4229                     tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4230                                             xmm_regs[reg].ZMM_W(val & 7)));
4231                     if (mod == 3) {
4232                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4233                     } else {
4234                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4235                                            s->mem_index, MO_LEUW);
4236                     }
4237                     break;
4238                 case 0x16:
4239                     if (ot == MO_32) { /* pextrd */
4240                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4241                                         offsetof(CPUX86State,
4242                                                 xmm_regs[reg].ZMM_L(val & 3)));
4243                         if (mod == 3) {
4244                             tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32);
4245                         } else {
4246                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
4247                                                 s->mem_index, MO_LEUL);
4248                         }
4249                     } else { /* pextrq */
4250 #ifdef TARGET_X86_64
4251                         tcg_gen_ld_i64(s->tmp1_i64, cpu_env,
4252                                         offsetof(CPUX86State,
4253                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4254                         if (mod == 3) {
4255                             tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64);
4256                         } else {
4257                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
4258                                                 s->mem_index, MO_LEUQ);
4259                         }
4260 #else
4261                         goto illegal_op;
4262 #endif
4263                     }
4264                     break;
4265                 case 0x17: /* extractps */
4266                     tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State,
4267                                             xmm_regs[reg].ZMM_L(val & 3)));
4268                     if (mod == 3) {
4269                         gen_op_mov_reg_v(s, ot, rm, s->T0);
4270                     } else {
4271                         tcg_gen_qemu_st_tl(s->T0, s->A0,
4272                                            s->mem_index, MO_LEUL);
4273                     }
4274                     break;
4275                 case 0x20: /* pinsrb */
4276                     if (mod == 3) {
4277                         gen_op_mov_v_reg(s, MO_32, s->T0, rm);
4278                     } else {
4279                         tcg_gen_qemu_ld_tl(s->T0, s->A0,
4280                                            s->mem_index, MO_UB);
4281                     }
4282                     tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State,
4283                                             xmm_regs[reg].ZMM_B(val & 15)));
4284                     break;
4285                 case 0x21: /* insertps */
4286                     if (mod == 3) {
4287                         tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
4288                                         offsetof(CPUX86State,xmm_regs[rm]
4289                                                 .ZMM_L((val >> 6) & 3)));
4290                     } else {
4291                         tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4292                                             s->mem_index, MO_LEUL);
4293                     }
4294                     tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4295                                     offsetof(CPUX86State,xmm_regs[reg]
4296                                             .ZMM_L((val >> 4) & 3)));
4297                     if ((val >> 0) & 1)
4298                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4299                                         cpu_env, offsetof(CPUX86State,
4300                                                 xmm_regs[reg].ZMM_L(0)));
4301                     if ((val >> 1) & 1)
4302                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4303                                         cpu_env, offsetof(CPUX86State,
4304                                                 xmm_regs[reg].ZMM_L(1)));
4305                     if ((val >> 2) & 1)
4306                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4307                                         cpu_env, offsetof(CPUX86State,
4308                                                 xmm_regs[reg].ZMM_L(2)));
4309                     if ((val >> 3) & 1)
4310                         tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
4311                                         cpu_env, offsetof(CPUX86State,
4312                                                 xmm_regs[reg].ZMM_L(3)));
4313                     break;
4314                 case 0x22:
4315                     if (ot == MO_32) { /* pinsrd */
4316                         if (mod == 3) {
4317                             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]);
4318                         } else {
4319                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
4320                                                 s->mem_index, MO_LEUL);
4321                         }
4322                         tcg_gen_st_i32(s->tmp2_i32, cpu_env,
4323                                         offsetof(CPUX86State,
4324                                                 xmm_regs[reg].ZMM_L(val & 3)));
4325                     } else { /* pinsrq */
4326 #ifdef TARGET_X86_64
4327                         if (mod == 3) {
4328                             gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm);
4329                         } else {
4330                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
4331                                                 s->mem_index, MO_LEUQ);
4332                         }
4333                         tcg_gen_st_i64(s->tmp1_i64, cpu_env,
4334                                         offsetof(CPUX86State,
4335                                                 xmm_regs[reg].ZMM_Q(val & 1)));
4336 #else
4337                         goto illegal_op;
4338 #endif
4339                     }
4340                     break;
4341                 }
4342                 return;
4343             }
4344 
4345             if (b1) {
4346                 op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4347                 if (mod == 3) {
4348                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
4349                 } else {
4350                     op2_offset = offsetof(CPUX86State,xmm_t0);
4351                     gen_lea_modrm(env, s, modrm);
4352                     gen_ldo_env_A0(s, op2_offset);
4353                 }
4354             } else {
4355                 op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4356                 if (mod == 3) {
4357                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4358                 } else {
4359                     op2_offset = offsetof(CPUX86State,mmx_t0);
4360                     gen_lea_modrm(env, s, modrm);
4361                     gen_ldq_env_A0(s, op2_offset);
4362                 }
4363             }
4364             val = x86_ldub_code(env, s);
4365 
4366             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
4367                 set_cc_op(s, CC_OP_EFLAGS);
4368 
4369                 if (s->dflag == MO_64) {
4370                     /* The helper must use entire 64-bit gp registers */
4371                     val |= 1 << 8;
4372                 }
4373             }
4374 
4375             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4376             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4377             sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val));
4378             break;
4379 
4380         case 0x33a:
4381             /* Various integer extensions at 0f 3a f[0-f].  */
4382             b = modrm | (b1 << 8);
4383             modrm = x86_ldub_code(env, s);
4384             reg = ((modrm >> 3) & 7) | REX_R(s);
4385 
4386             switch (b) {
4387             case 0x3f0: /* rorx Gy,Ey, Ib */
4388                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2)
4389                     || !(s->prefix & PREFIX_VEX)
4390                     || s->vex_l != 0) {
4391                     goto illegal_op;
4392                 }
4393                 ot = mo_64_32(s->dflag);
4394                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
4395                 b = x86_ldub_code(env, s);
4396                 if (ot == MO_64) {
4397                     tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
4398                 } else {
4399                     tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4400                     tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
4401                     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
4402                 }
4403                 gen_op_mov_reg_v(s, ot, reg, s->T0);
4404                 break;
4405 
4406             default:
4407                 goto unknown_op;
4408             }
4409             break;
4410 
4411         default:
4412         unknown_op:
4413             gen_unknown_opcode(env, s);
4414             return;
4415         }
4416     } else {
4417         /* generic MMX or SSE operation */
4418         switch(b) {
4419         case 0x70: /* pshufx insn */
4420         case 0xc6: /* pshufx insn */
4421         case 0xc2: /* compare insns */
4422             s->rip_offset = 1;
4423             break;
4424         default:
4425             break;
4426         }
4427         if (is_xmm) {
4428             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
4429             if (mod != 3) {
4430                 int sz = 4;
4431 
4432                 gen_lea_modrm(env, s, modrm);
4433                 op2_offset = offsetof(CPUX86State,xmm_t0);
4434 
4435                 switch (b) {
4436                 case 0x50 ... 0x5a:
4437                 case 0x5c ... 0x5f:
4438                 case 0xc2:
4439                     /* Most sse scalar operations.  */
4440                     if (b1 == 2) {
4441                         sz = 2;
4442                     } else if (b1 == 3) {
4443                         sz = 3;
4444                     }
4445                     break;
4446 
4447                 case 0x2e:  /* ucomis[sd] */
4448                 case 0x2f:  /* comis[sd] */
4449                     if (b1 == 0) {
4450                         sz = 2;
4451                     } else {
4452                         sz = 3;
4453                     }
4454                     break;
4455                 }
4456 
4457                 switch (sz) {
4458                 case 2:
4459                     /* 32 bit access */
4460                     gen_op_ld_v(s, MO_32, s->T0, s->A0);
4461                     tcg_gen_st32_tl(s->T0, cpu_env,
4462                                     offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
4463                     break;
4464                 case 3:
4465                     /* 64 bit access */
4466                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0)));
4467                     break;
4468                 default:
4469                     /* 128 bit access */
4470                     gen_ldo_env_A0(s, op2_offset);
4471                     break;
4472                 }
4473             } else {
4474                 rm = (modrm & 7) | REX_B(s);
4475                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
4476             }
4477         } else {
4478             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
4479             if (mod != 3) {
4480                 gen_lea_modrm(env, s, modrm);
4481                 op2_offset = offsetof(CPUX86State,mmx_t0);
4482                 gen_ldq_env_A0(s, op2_offset);
4483             } else {
4484                 rm = (modrm & 7);
4485                 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
4486             }
4487         }
4488         switch(b) {
4489         case 0x0f: /* 3DNow! data insns */
4490             val = x86_ldub_code(env, s);
4491             sse_fn_epp = sse_op_table5[val];
4492             if (!sse_fn_epp) {
4493                 goto unknown_op;
4494             }
4495             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
4496                 goto illegal_op;
4497             }
4498             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4499             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4500             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4501             break;
4502         case 0x70: /* pshufx insn */
4503         case 0xc6: /* pshufx insn */
4504             val = x86_ldub_code(env, s);
4505             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4506             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4507             /* XXX: introduce a new table? */
4508             sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
4509             sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val));
4510             break;
4511         case 0xc2:
4512             /* compare insns, bits 7:3 (7:5 for AVX) are ignored */
4513             val = x86_ldub_code(env, s) & 7;
4514             sse_fn_epp = sse_op_table4[val][b1];
4515 
4516             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4517             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4518             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4519             break;
4520         case 0xf7:
4521             /* maskmov : we must prepare A0 */
4522             if (mod != 3)
4523                 goto illegal_op;
4524             tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
4525             gen_extu(s->aflag, s->A0);
4526             gen_add_A0_ds_seg(s);
4527 
4528             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4529             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4530             /* XXX: introduce a new table? */
4531             sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
4532             sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0);
4533             break;
4534         default:
4535             tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
4536             tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
4537             sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
4538             break;
4539         }
4540         if (b == 0x2e || b == 0x2f) {
4541             set_cc_op(s, CC_OP_EFLAGS);
4542         }
4543     }
4544 }
4545 
4546 /* convert one instruction. s->base.is_jmp is set if the translation must
4547    be stopped. Return the next pc value */
4548 static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
4549 {
4550     CPUX86State *env = cpu->env_ptr;
4551     int b, prefixes;
4552     int shift;
4553     MemOp ot, aflag, dflag;
4554     int modrm, reg, rm, mod, op, opreg, val;
4555     target_ulong next_eip, tval;
4556     target_ulong pc_start = s->base.pc_next;
4557 
4558     s->pc_start = s->pc = pc_start;
4559     s->override = -1;
4560 #ifdef TARGET_X86_64
4561     s->rex_w = false;
4562     s->rex_r = 0;
4563     s->rex_x = 0;
4564     s->rex_b = 0;
4565 #endif
4566     s->rip_offset = 0; /* for relative ip address */
4567     s->vex_l = 0;
4568     s->vex_v = 0;
4569     if (sigsetjmp(s->jmpbuf, 0) != 0) {
4570         gen_exception_gpf(s);
4571         return s->pc;
4572     }
4573 
4574     prefixes = 0;
4575 
4576  next_byte:
4577     b = x86_ldub_code(env, s);
4578     /* Collect prefixes.  */
4579     switch (b) {
4580     case 0xf3:
4581         prefixes |= PREFIX_REPZ;
4582         goto next_byte;
4583     case 0xf2:
4584         prefixes |= PREFIX_REPNZ;
4585         goto next_byte;
4586     case 0xf0:
4587         prefixes |= PREFIX_LOCK;
4588         goto next_byte;
4589     case 0x2e:
4590         s->override = R_CS;
4591         goto next_byte;
4592     case 0x36:
4593         s->override = R_SS;
4594         goto next_byte;
4595     case 0x3e:
4596         s->override = R_DS;
4597         goto next_byte;
4598     case 0x26:
4599         s->override = R_ES;
4600         goto next_byte;
4601     case 0x64:
4602         s->override = R_FS;
4603         goto next_byte;
4604     case 0x65:
4605         s->override = R_GS;
4606         goto next_byte;
4607     case 0x66:
4608         prefixes |= PREFIX_DATA;
4609         goto next_byte;
4610     case 0x67:
4611         prefixes |= PREFIX_ADR;
4612         goto next_byte;
4613 #ifdef TARGET_X86_64
4614     case 0x40 ... 0x4f:
4615         if (CODE64(s)) {
4616             /* REX prefix */
4617             prefixes |= PREFIX_REX;
4618             s->rex_w = (b >> 3) & 1;
4619             s->rex_r = (b & 0x4) << 1;
4620             s->rex_x = (b & 0x2) << 2;
4621             s->rex_b = (b & 0x1) << 3;
4622             goto next_byte;
4623         }
4624         break;
4625 #endif
4626     case 0xc5: /* 2-byte VEX */
4627     case 0xc4: /* 3-byte VEX */
4628         /* VEX prefixes cannot be used except in 32-bit mode.
4629            Otherwise the instruction is LES or LDS.  */
4630         if (CODE32(s) && !VM86(s)) {
4631             static const int pp_prefix[4] = {
4632                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
4633             };
4634             int vex3, vex2 = x86_ldub_code(env, s);
4635 
4636             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
4637                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
4638                    otherwise the instruction is LES or LDS.  */
4639                 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
4640                 break;
4641             }
4642 
4643             /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
4644             if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
4645                             | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
4646                 goto illegal_op;
4647             }
4648 #ifdef TARGET_X86_64
4649             s->rex_r = (~vex2 >> 4) & 8;
4650 #endif
4651             if (b == 0xc5) {
4652                 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
4653                 vex3 = vex2;
4654                 b = x86_ldub_code(env, s) | 0x100;
4655             } else {
4656                 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
4657                 vex3 = x86_ldub_code(env, s);
4658 #ifdef TARGET_X86_64
4659                 s->rex_x = (~vex2 >> 3) & 8;
4660                 s->rex_b = (~vex2 >> 2) & 8;
4661                 s->rex_w = (vex3 >> 7) & 1;
4662 #endif
4663                 switch (vex2 & 0x1f) {
4664                 case 0x01: /* Implied 0f leading opcode bytes.  */
4665                     b = x86_ldub_code(env, s) | 0x100;
4666                     break;
4667                 case 0x02: /* Implied 0f 38 leading opcode bytes.  */
4668                     b = 0x138;
4669                     break;
4670                 case 0x03: /* Implied 0f 3a leading opcode bytes.  */
4671                     b = 0x13a;
4672                     break;
4673                 default:   /* Reserved for future use.  */
4674                     goto unknown_op;
4675                 }
4676             }
4677             s->vex_v = (~vex3 >> 3) & 0xf;
4678             s->vex_l = (vex3 >> 2) & 1;
4679             prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
4680         }
4681         break;
4682     }
4683 
4684     /* Post-process prefixes.  */
4685     if (CODE64(s)) {
4686         /* In 64-bit mode, the default data size is 32-bit.  Select 64-bit
4687            data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
4688            over 0x66 if both are present.  */
4689         dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32);
4690         /* In 64-bit mode, 0x67 selects 32-bit addressing.  */
4691         aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64);
4692     } else {
4693         /* In 16/32-bit mode, 0x66 selects the opposite data size.  */
4694         if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) {
4695             dflag = MO_32;
4696         } else {
4697             dflag = MO_16;
4698         }
4699         /* In 16/32-bit mode, 0x67 selects the opposite addressing.  */
4700         if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) {
4701             aflag = MO_32;
4702         }  else {
4703             aflag = MO_16;
4704         }
4705     }
4706 
4707     s->prefix = prefixes;
4708     s->aflag = aflag;
4709     s->dflag = dflag;
4710 
4711     /* now check op code */
4712  reswitch:
4713     switch(b) {
4714     case 0x0f:
4715         /**************************/
4716         /* extended op code */
4717         b = x86_ldub_code(env, s) | 0x100;
4718         goto reswitch;
4719 
4720         /**************************/
4721         /* arith & logic */
4722     case 0x00 ... 0x05:
4723     case 0x08 ... 0x0d:
4724     case 0x10 ... 0x15:
4725     case 0x18 ... 0x1d:
4726     case 0x20 ... 0x25:
4727     case 0x28 ... 0x2d:
4728     case 0x30 ... 0x35:
4729     case 0x38 ... 0x3d:
4730         {
4731             int op, f, val;
4732             op = (b >> 3) & 7;
4733             f = (b >> 1) & 3;
4734 
4735             ot = mo_b_d(b, dflag);
4736 
4737             switch(f) {
4738             case 0: /* OP Ev, Gv */
4739                 modrm = x86_ldub_code(env, s);
4740                 reg = ((modrm >> 3) & 7) | REX_R(s);
4741                 mod = (modrm >> 6) & 3;
4742                 rm = (modrm & 7) | REX_B(s);
4743                 if (mod != 3) {
4744                     gen_lea_modrm(env, s, modrm);
4745                     opreg = OR_TMP0;
4746                 } else if (op == OP_XORL && rm == reg) {
4747                 xor_zero:
4748                     /* xor reg, reg optimisation */
4749                     set_cc_op(s, CC_OP_CLR);
4750                     tcg_gen_movi_tl(s->T0, 0);
4751                     gen_op_mov_reg_v(s, ot, reg, s->T0);
4752                     break;
4753                 } else {
4754                     opreg = rm;
4755                 }
4756                 gen_op_mov_v_reg(s, ot, s->T1, reg);
4757                 gen_op(s, op, ot, opreg);
4758                 break;
4759             case 1: /* OP Gv, Ev */
4760                 modrm = x86_ldub_code(env, s);
4761                 mod = (modrm >> 6) & 3;
4762                 reg = ((modrm >> 3) & 7) | REX_R(s);
4763                 rm = (modrm & 7) | REX_B(s);
4764                 if (mod != 3) {
4765                     gen_lea_modrm(env, s, modrm);
4766                     gen_op_ld_v(s, ot, s->T1, s->A0);
4767                 } else if (op == OP_XORL && rm == reg) {
4768                     goto xor_zero;
4769                 } else {
4770                     gen_op_mov_v_reg(s, ot, s->T1, rm);
4771                 }
4772                 gen_op(s, op, ot, reg);
4773                 break;
4774             case 2: /* OP A, Iv */
4775                 val = insn_get(env, s, ot);
4776                 tcg_gen_movi_tl(s->T1, val);
4777                 gen_op(s, op, ot, OR_EAX);
4778                 break;
4779             }
4780         }
4781         break;
4782 
4783     case 0x82:
4784         if (CODE64(s))
4785             goto illegal_op;
4786         /* fall through */
4787     case 0x80: /* GRP1 */
4788     case 0x81:
4789     case 0x83:
4790         {
4791             int val;
4792 
4793             ot = mo_b_d(b, dflag);
4794 
4795             modrm = x86_ldub_code(env, s);
4796             mod = (modrm >> 6) & 3;
4797             rm = (modrm & 7) | REX_B(s);
4798             op = (modrm >> 3) & 7;
4799 
4800             if (mod != 3) {
4801                 if (b == 0x83)
4802                     s->rip_offset = 1;
4803                 else
4804                     s->rip_offset = insn_const_size(ot);
4805                 gen_lea_modrm(env, s, modrm);
4806                 opreg = OR_TMP0;
4807             } else {
4808                 opreg = rm;
4809             }
4810 
4811             switch(b) {
4812             default:
4813             case 0x80:
4814             case 0x81:
4815             case 0x82:
4816                 val = insn_get(env, s, ot);
4817                 break;
4818             case 0x83:
4819                 val = (int8_t)insn_get(env, s, MO_8);
4820                 break;
4821             }
4822             tcg_gen_movi_tl(s->T1, val);
4823             gen_op(s, op, ot, opreg);
4824         }
4825         break;
4826 
4827         /**************************/
4828         /* inc, dec, and other misc arith */
4829     case 0x40 ... 0x47: /* inc Gv */
4830         ot = dflag;
4831         gen_inc(s, ot, OR_EAX + (b & 7), 1);
4832         break;
4833     case 0x48 ... 0x4f: /* dec Gv */
4834         ot = dflag;
4835         gen_inc(s, ot, OR_EAX + (b & 7), -1);
4836         break;
4837     case 0xf6: /* GRP3 */
4838     case 0xf7:
4839         ot = mo_b_d(b, dflag);
4840 
4841         modrm = x86_ldub_code(env, s);
4842         mod = (modrm >> 6) & 3;
4843         rm = (modrm & 7) | REX_B(s);
4844         op = (modrm >> 3) & 7;
4845         if (mod != 3) {
4846             if (op == 0) {
4847                 s->rip_offset = insn_const_size(ot);
4848             }
4849             gen_lea_modrm(env, s, modrm);
4850             /* For those below that handle locked memory, don't load here.  */
4851             if (!(s->prefix & PREFIX_LOCK)
4852                 || op != 2) {
4853                 gen_op_ld_v(s, ot, s->T0, s->A0);
4854             }
4855         } else {
4856             gen_op_mov_v_reg(s, ot, s->T0, rm);
4857         }
4858 
4859         switch(op) {
4860         case 0: /* test */
4861             val = insn_get(env, s, ot);
4862             tcg_gen_movi_tl(s->T1, val);
4863             gen_op_testl_T0_T1_cc(s);
4864             set_cc_op(s, CC_OP_LOGICB + ot);
4865             break;
4866         case 2: /* not */
4867             if (s->prefix & PREFIX_LOCK) {
4868                 if (mod == 3) {
4869                     goto illegal_op;
4870                 }
4871                 tcg_gen_movi_tl(s->T0, ~0);
4872                 tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0,
4873                                             s->mem_index, ot | MO_LE);
4874             } else {
4875                 tcg_gen_not_tl(s->T0, s->T0);
4876                 if (mod != 3) {
4877                     gen_op_st_v(s, ot, s->T0, s->A0);
4878                 } else {
4879                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4880                 }
4881             }
4882             break;
4883         case 3: /* neg */
4884             if (s->prefix & PREFIX_LOCK) {
4885                 TCGLabel *label1;
4886                 TCGv a0, t0, t1, t2;
4887 
4888                 if (mod == 3) {
4889                     goto illegal_op;
4890                 }
4891                 a0 = tcg_temp_local_new();
4892                 t0 = tcg_temp_local_new();
4893                 label1 = gen_new_label();
4894 
4895                 tcg_gen_mov_tl(a0, s->A0);
4896                 tcg_gen_mov_tl(t0, s->T0);
4897 
4898                 gen_set_label(label1);
4899                 t1 = tcg_temp_new();
4900                 t2 = tcg_temp_new();
4901                 tcg_gen_mov_tl(t2, t0);
4902                 tcg_gen_neg_tl(t1, t0);
4903                 tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1,
4904                                           s->mem_index, ot | MO_LE);
4905                 tcg_temp_free(t1);
4906                 tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1);
4907 
4908                 tcg_temp_free(t2);
4909                 tcg_temp_free(a0);
4910                 tcg_gen_mov_tl(s->T0, t0);
4911                 tcg_temp_free(t0);
4912             } else {
4913                 tcg_gen_neg_tl(s->T0, s->T0);
4914                 if (mod != 3) {
4915                     gen_op_st_v(s, ot, s->T0, s->A0);
4916                 } else {
4917                     gen_op_mov_reg_v(s, ot, rm, s->T0);
4918                 }
4919             }
4920             gen_op_update_neg_cc(s);
4921             set_cc_op(s, CC_OP_SUBB + ot);
4922             break;
4923         case 4: /* mul */
4924             switch(ot) {
4925             case MO_8:
4926                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4927                 tcg_gen_ext8u_tl(s->T0, s->T0);
4928                 tcg_gen_ext8u_tl(s->T1, s->T1);
4929                 /* XXX: use 32 bit mul which could be faster */
4930                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4931                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4932                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4933                 tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00);
4934                 set_cc_op(s, CC_OP_MULB);
4935                 break;
4936             case MO_16:
4937                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4938                 tcg_gen_ext16u_tl(s->T0, s->T0);
4939                 tcg_gen_ext16u_tl(s->T1, s->T1);
4940                 /* XXX: use 32 bit mul which could be faster */
4941                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4942                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4943                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4944                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4945                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4946                 tcg_gen_mov_tl(cpu_cc_src, s->T0);
4947                 set_cc_op(s, CC_OP_MULW);
4948                 break;
4949             default:
4950             case MO_32:
4951                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
4952                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
4953                 tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
4954                                   s->tmp2_i32, s->tmp3_i32);
4955                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
4956                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
4957                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4958                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4959                 set_cc_op(s, CC_OP_MULL);
4960                 break;
4961 #ifdef TARGET_X86_64
4962             case MO_64:
4963                 tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
4964                                   s->T0, cpu_regs[R_EAX]);
4965                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
4966                 tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
4967                 set_cc_op(s, CC_OP_MULQ);
4968                 break;
4969 #endif
4970             }
4971             break;
4972         case 5: /* imul */
4973             switch(ot) {
4974             case MO_8:
4975                 gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX);
4976                 tcg_gen_ext8s_tl(s->T0, s->T0);
4977                 tcg_gen_ext8s_tl(s->T1, s->T1);
4978                 /* XXX: use 32 bit mul which could be faster */
4979                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4980                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4981                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4982                 tcg_gen_ext8s_tl(s->tmp0, s->T0);
4983                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4984                 set_cc_op(s, CC_OP_MULB);
4985                 break;
4986             case MO_16:
4987                 gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX);
4988                 tcg_gen_ext16s_tl(s->T0, s->T0);
4989                 tcg_gen_ext16s_tl(s->T1, s->T1);
4990                 /* XXX: use 32 bit mul which could be faster */
4991                 tcg_gen_mul_tl(s->T0, s->T0, s->T1);
4992                 gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
4993                 tcg_gen_mov_tl(cpu_cc_dst, s->T0);
4994                 tcg_gen_ext16s_tl(s->tmp0, s->T0);
4995                 tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
4996                 tcg_gen_shri_tl(s->T0, s->T0, 16);
4997                 gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
4998                 set_cc_op(s, CC_OP_MULW);
4999                 break;
5000             default:
5001             case MO_32:
5002                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5003                 tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]);
5004                 tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5005                                   s->tmp2_i32, s->tmp3_i32);
5006                 tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32);
5007                 tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32);
5008                 tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5009                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5010                 tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5011                 tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5012                 set_cc_op(s, CC_OP_MULL);
5013                 break;
5014 #ifdef TARGET_X86_64
5015             case MO_64:
5016                 tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
5017                                   s->T0, cpu_regs[R_EAX]);
5018                 tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
5019                 tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
5020                 tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
5021                 set_cc_op(s, CC_OP_MULQ);
5022                 break;
5023 #endif
5024             }
5025             break;
5026         case 6: /* div */
5027             switch(ot) {
5028             case MO_8:
5029                 gen_helper_divb_AL(cpu_env, s->T0);
5030                 break;
5031             case MO_16:
5032                 gen_helper_divw_AX(cpu_env, s->T0);
5033                 break;
5034             default:
5035             case MO_32:
5036                 gen_helper_divl_EAX(cpu_env, s->T0);
5037                 break;
5038 #ifdef TARGET_X86_64
5039             case MO_64:
5040                 gen_helper_divq_EAX(cpu_env, s->T0);
5041                 break;
5042 #endif
5043             }
5044             break;
5045         case 7: /* idiv */
5046             switch(ot) {
5047             case MO_8:
5048                 gen_helper_idivb_AL(cpu_env, s->T0);
5049                 break;
5050             case MO_16:
5051                 gen_helper_idivw_AX(cpu_env, s->T0);
5052                 break;
5053             default:
5054             case MO_32:
5055                 gen_helper_idivl_EAX(cpu_env, s->T0);
5056                 break;
5057 #ifdef TARGET_X86_64
5058             case MO_64:
5059                 gen_helper_idivq_EAX(cpu_env, s->T0);
5060                 break;
5061 #endif
5062             }
5063             break;
5064         default:
5065             goto unknown_op;
5066         }
5067         break;
5068 
5069     case 0xfe: /* GRP4 */
5070     case 0xff: /* GRP5 */
5071         ot = mo_b_d(b, dflag);
5072 
5073         modrm = x86_ldub_code(env, s);
5074         mod = (modrm >> 6) & 3;
5075         rm = (modrm & 7) | REX_B(s);
5076         op = (modrm >> 3) & 7;
5077         if (op >= 2 && b == 0xfe) {
5078             goto unknown_op;
5079         }
5080         if (CODE64(s)) {
5081             if (op == 2 || op == 4) {
5082                 /* operand size for jumps is 64 bit */
5083                 ot = MO_64;
5084             } else if (op == 3 || op == 5) {
5085                 ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16;
5086             } else if (op == 6) {
5087                 /* default push size is 64 bit */
5088                 ot = mo_pushpop(s, dflag);
5089             }
5090         }
5091         if (mod != 3) {
5092             gen_lea_modrm(env, s, modrm);
5093             if (op >= 2 && op != 3 && op != 5)
5094                 gen_op_ld_v(s, ot, s->T0, s->A0);
5095         } else {
5096             gen_op_mov_v_reg(s, ot, s->T0, rm);
5097         }
5098 
5099         switch(op) {
5100         case 0: /* inc Ev */
5101             if (mod != 3)
5102                 opreg = OR_TMP0;
5103             else
5104                 opreg = rm;
5105             gen_inc(s, ot, opreg, 1);
5106             break;
5107         case 1: /* dec Ev */
5108             if (mod != 3)
5109                 opreg = OR_TMP0;
5110             else
5111                 opreg = rm;
5112             gen_inc(s, ot, opreg, -1);
5113             break;
5114         case 2: /* call Ev */
5115             /* XXX: optimize if memory (no 'and' is necessary) */
5116             if (dflag == MO_16) {
5117                 tcg_gen_ext16u_tl(s->T0, s->T0);
5118             }
5119             next_eip = s->pc - s->cs_base;
5120             tcg_gen_movi_tl(s->T1, next_eip);
5121             gen_push_v(s, s->T1);
5122             gen_op_jmp_v(s->T0);
5123             gen_bnd_jmp(s);
5124             gen_jr(s, s->T0);
5125             break;
5126         case 3: /* lcall Ev */
5127             if (mod == 3) {
5128                 goto illegal_op;
5129             }
5130             gen_op_ld_v(s, ot, s->T1, s->A0);
5131             gen_add_A0_im(s, 1 << ot);
5132             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5133         do_lcall:
5134             if (PE(s) && !VM86(s)) {
5135                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5136                 gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1,
5137                                            tcg_const_i32(dflag - 1),
5138                                            tcg_const_tl(s->pc - s->cs_base));
5139             } else {
5140                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5141                 gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1,
5142                                       tcg_const_i32(dflag - 1),
5143                                       tcg_const_i32(s->pc - s->cs_base));
5144             }
5145             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5146             gen_jr(s, s->tmp4);
5147             break;
5148         case 4: /* jmp Ev */
5149             if (dflag == MO_16) {
5150                 tcg_gen_ext16u_tl(s->T0, s->T0);
5151             }
5152             gen_op_jmp_v(s->T0);
5153             gen_bnd_jmp(s);
5154             gen_jr(s, s->T0);
5155             break;
5156         case 5: /* ljmp Ev */
5157             if (mod == 3) {
5158                 goto illegal_op;
5159             }
5160             gen_op_ld_v(s, ot, s->T1, s->A0);
5161             gen_add_A0_im(s, 1 << ot);
5162             gen_op_ld_v(s, MO_16, s->T0, s->A0);
5163         do_ljmp:
5164             if (PE(s) && !VM86(s)) {
5165                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5166                 gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1,
5167                                           tcg_const_tl(s->pc - s->cs_base));
5168             } else {
5169                 gen_op_movl_seg_T0_vm(s, R_CS);
5170                 gen_op_jmp_v(s->T1);
5171             }
5172             tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip));
5173             gen_jr(s, s->tmp4);
5174             break;
5175         case 6: /* push Ev */
5176             gen_push_v(s, s->T0);
5177             break;
5178         default:
5179             goto unknown_op;
5180         }
5181         break;
5182 
5183     case 0x84: /* test Ev, Gv */
5184     case 0x85:
5185         ot = mo_b_d(b, dflag);
5186 
5187         modrm = x86_ldub_code(env, s);
5188         reg = ((modrm >> 3) & 7) | REX_R(s);
5189 
5190         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5191         gen_op_mov_v_reg(s, ot, s->T1, reg);
5192         gen_op_testl_T0_T1_cc(s);
5193         set_cc_op(s, CC_OP_LOGICB + ot);
5194         break;
5195 
5196     case 0xa8: /* test eAX, Iv */
5197     case 0xa9:
5198         ot = mo_b_d(b, dflag);
5199         val = insn_get(env, s, ot);
5200 
5201         gen_op_mov_v_reg(s, ot, s->T0, OR_EAX);
5202         tcg_gen_movi_tl(s->T1, val);
5203         gen_op_testl_T0_T1_cc(s);
5204         set_cc_op(s, CC_OP_LOGICB + ot);
5205         break;
5206 
5207     case 0x98: /* CWDE/CBW */
5208         switch (dflag) {
5209 #ifdef TARGET_X86_64
5210         case MO_64:
5211             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5212             tcg_gen_ext32s_tl(s->T0, s->T0);
5213             gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0);
5214             break;
5215 #endif
5216         case MO_32:
5217             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5218             tcg_gen_ext16s_tl(s->T0, s->T0);
5219             gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0);
5220             break;
5221         case MO_16:
5222             gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX);
5223             tcg_gen_ext8s_tl(s->T0, s->T0);
5224             gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
5225             break;
5226         default:
5227             tcg_abort();
5228         }
5229         break;
5230     case 0x99: /* CDQ/CWD */
5231         switch (dflag) {
5232 #ifdef TARGET_X86_64
5233         case MO_64:
5234             gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX);
5235             tcg_gen_sari_tl(s->T0, s->T0, 63);
5236             gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0);
5237             break;
5238 #endif
5239         case MO_32:
5240             gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX);
5241             tcg_gen_ext32s_tl(s->T0, s->T0);
5242             tcg_gen_sari_tl(s->T0, s->T0, 31);
5243             gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0);
5244             break;
5245         case MO_16:
5246             gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX);
5247             tcg_gen_ext16s_tl(s->T0, s->T0);
5248             tcg_gen_sari_tl(s->T0, s->T0, 15);
5249             gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0);
5250             break;
5251         default:
5252             tcg_abort();
5253         }
5254         break;
5255     case 0x1af: /* imul Gv, Ev */
5256     case 0x69: /* imul Gv, Ev, I */
5257     case 0x6b:
5258         ot = dflag;
5259         modrm = x86_ldub_code(env, s);
5260         reg = ((modrm >> 3) & 7) | REX_R(s);
5261         if (b == 0x69)
5262             s->rip_offset = insn_const_size(ot);
5263         else if (b == 0x6b)
5264             s->rip_offset = 1;
5265         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5266         if (b == 0x69) {
5267             val = insn_get(env, s, ot);
5268             tcg_gen_movi_tl(s->T1, val);
5269         } else if (b == 0x6b) {
5270             val = (int8_t)insn_get(env, s, MO_8);
5271             tcg_gen_movi_tl(s->T1, val);
5272         } else {
5273             gen_op_mov_v_reg(s, ot, s->T1, reg);
5274         }
5275         switch (ot) {
5276 #ifdef TARGET_X86_64
5277         case MO_64:
5278             tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1);
5279             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5280             tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
5281             tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1);
5282             break;
5283 #endif
5284         case MO_32:
5285             tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
5286             tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
5287             tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32,
5288                               s->tmp2_i32, s->tmp3_i32);
5289             tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32);
5290             tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31);
5291             tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
5292             tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32);
5293             tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32);
5294             break;
5295         default:
5296             tcg_gen_ext16s_tl(s->T0, s->T0);
5297             tcg_gen_ext16s_tl(s->T1, s->T1);
5298             /* XXX: use 32 bit mul which could be faster */
5299             tcg_gen_mul_tl(s->T0, s->T0, s->T1);
5300             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
5301             tcg_gen_ext16s_tl(s->tmp0, s->T0);
5302             tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0);
5303             gen_op_mov_reg_v(s, ot, reg, s->T0);
5304             break;
5305         }
5306         set_cc_op(s, CC_OP_MULB + ot);
5307         break;
5308     case 0x1c0:
5309     case 0x1c1: /* xadd Ev, Gv */
5310         ot = mo_b_d(b, dflag);
5311         modrm = x86_ldub_code(env, s);
5312         reg = ((modrm >> 3) & 7) | REX_R(s);
5313         mod = (modrm >> 6) & 3;
5314         gen_op_mov_v_reg(s, ot, s->T0, reg);
5315         if (mod == 3) {
5316             rm = (modrm & 7) | REX_B(s);
5317             gen_op_mov_v_reg(s, ot, s->T1, rm);
5318             tcg_gen_add_tl(s->T0, s->T0, s->T1);
5319             gen_op_mov_reg_v(s, ot, reg, s->T1);
5320             gen_op_mov_reg_v(s, ot, rm, s->T0);
5321         } else {
5322             gen_lea_modrm(env, s, modrm);
5323             if (s->prefix & PREFIX_LOCK) {
5324                 tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
5325                                             s->mem_index, ot | MO_LE);
5326                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5327             } else {
5328                 gen_op_ld_v(s, ot, s->T1, s->A0);
5329                 tcg_gen_add_tl(s->T0, s->T0, s->T1);
5330                 gen_op_st_v(s, ot, s->T0, s->A0);
5331             }
5332             gen_op_mov_reg_v(s, ot, reg, s->T1);
5333         }
5334         gen_op_update2_cc(s);
5335         set_cc_op(s, CC_OP_ADDB + ot);
5336         break;
5337     case 0x1b0:
5338     case 0x1b1: /* cmpxchg Ev, Gv */
5339         {
5340             TCGv oldv, newv, cmpv;
5341 
5342             ot = mo_b_d(b, dflag);
5343             modrm = x86_ldub_code(env, s);
5344             reg = ((modrm >> 3) & 7) | REX_R(s);
5345             mod = (modrm >> 6) & 3;
5346             oldv = tcg_temp_new();
5347             newv = tcg_temp_new();
5348             cmpv = tcg_temp_new();
5349             gen_op_mov_v_reg(s, ot, newv, reg);
5350             tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
5351 
5352             if (s->prefix & PREFIX_LOCK) {
5353                 if (mod == 3) {
5354                     goto illegal_op;
5355                 }
5356                 gen_lea_modrm(env, s, modrm);
5357                 tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
5358                                           s->mem_index, ot | MO_LE);
5359                 gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5360             } else {
5361                 if (mod == 3) {
5362                     rm = (modrm & 7) | REX_B(s);
5363                     gen_op_mov_v_reg(s, ot, oldv, rm);
5364                 } else {
5365                     gen_lea_modrm(env, s, modrm);
5366                     gen_op_ld_v(s, ot, oldv, s->A0);
5367                     rm = 0; /* avoid warning */
5368                 }
5369                 gen_extu(ot, oldv);
5370                 gen_extu(ot, cmpv);
5371                 /* store value = (old == cmp ? new : old);  */
5372                 tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
5373                 if (mod == 3) {
5374                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5375                     gen_op_mov_reg_v(s, ot, rm, newv);
5376                 } else {
5377                     /* Perform an unconditional store cycle like physical cpu;
5378                        must be before changing accumulator to ensure
5379                        idempotency if the store faults and the instruction
5380                        is restarted */
5381                     gen_op_st_v(s, ot, newv, s->A0);
5382                     gen_op_mov_reg_v(s, ot, R_EAX, oldv);
5383                 }
5384             }
5385             tcg_gen_mov_tl(cpu_cc_src, oldv);
5386             tcg_gen_mov_tl(s->cc_srcT, cmpv);
5387             tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
5388             set_cc_op(s, CC_OP_SUBB + ot);
5389             tcg_temp_free(oldv);
5390             tcg_temp_free(newv);
5391             tcg_temp_free(cmpv);
5392         }
5393         break;
5394     case 0x1c7: /* cmpxchg8b */
5395         modrm = x86_ldub_code(env, s);
5396         mod = (modrm >> 6) & 3;
5397         switch ((modrm >> 3) & 7) {
5398         case 1: /* CMPXCHG8, CMPXCHG16 */
5399             if (mod == 3) {
5400                 goto illegal_op;
5401             }
5402 #ifdef TARGET_X86_64
5403             if (dflag == MO_64) {
5404                 if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) {
5405                     goto illegal_op;
5406                 }
5407                 gen_lea_modrm(env, s, modrm);
5408                 if ((s->prefix & PREFIX_LOCK) &&
5409                     (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5410                     gen_helper_cmpxchg16b(cpu_env, s->A0);
5411                 } else {
5412                     gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0);
5413                 }
5414                 set_cc_op(s, CC_OP_EFLAGS);
5415                 break;
5416             }
5417 #endif
5418             if (!(s->cpuid_features & CPUID_CX8)) {
5419                 goto illegal_op;
5420             }
5421             gen_lea_modrm(env, s, modrm);
5422             if ((s->prefix & PREFIX_LOCK) &&
5423                 (tb_cflags(s->base.tb) & CF_PARALLEL)) {
5424                 gen_helper_cmpxchg8b(cpu_env, s->A0);
5425             } else {
5426                 gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0);
5427             }
5428             set_cc_op(s, CC_OP_EFLAGS);
5429             break;
5430 
5431         case 7: /* RDSEED */
5432         case 6: /* RDRAND */
5433             if (mod != 3 ||
5434                 (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) ||
5435                 !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) {
5436                 goto illegal_op;
5437             }
5438             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5439                 gen_io_start();
5440             }
5441             gen_helper_rdrand(s->T0, cpu_env);
5442             rm = (modrm & 7) | REX_B(s);
5443             gen_op_mov_reg_v(s, dflag, rm, s->T0);
5444             set_cc_op(s, CC_OP_EFLAGS);
5445             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
5446                 gen_jmp(s, s->pc - s->cs_base);
5447             }
5448             break;
5449 
5450         default:
5451             goto illegal_op;
5452         }
5453         break;
5454 
5455         /**************************/
5456         /* push/pop */
5457     case 0x50 ... 0x57: /* push */
5458         gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s));
5459         gen_push_v(s, s->T0);
5460         break;
5461     case 0x58 ... 0x5f: /* pop */
5462         ot = gen_pop_T0(s);
5463         /* NOTE: order is important for pop %sp */
5464         gen_pop_update(s, ot);
5465         gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0);
5466         break;
5467     case 0x60: /* pusha */
5468         if (CODE64(s))
5469             goto illegal_op;
5470         gen_pusha(s);
5471         break;
5472     case 0x61: /* popa */
5473         if (CODE64(s))
5474             goto illegal_op;
5475         gen_popa(s);
5476         break;
5477     case 0x68: /* push Iv */
5478     case 0x6a:
5479         ot = mo_pushpop(s, dflag);
5480         if (b == 0x68)
5481             val = insn_get(env, s, ot);
5482         else
5483             val = (int8_t)insn_get(env, s, MO_8);
5484         tcg_gen_movi_tl(s->T0, val);
5485         gen_push_v(s, s->T0);
5486         break;
5487     case 0x8f: /* pop Ev */
5488         modrm = x86_ldub_code(env, s);
5489         mod = (modrm >> 6) & 3;
5490         ot = gen_pop_T0(s);
5491         if (mod == 3) {
5492             /* NOTE: order is important for pop %sp */
5493             gen_pop_update(s, ot);
5494             rm = (modrm & 7) | REX_B(s);
5495             gen_op_mov_reg_v(s, ot, rm, s->T0);
5496         } else {
5497             /* NOTE: order is important too for MMU exceptions */
5498             s->popl_esp_hack = 1 << ot;
5499             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5500             s->popl_esp_hack = 0;
5501             gen_pop_update(s, ot);
5502         }
5503         break;
5504     case 0xc8: /* enter */
5505         {
5506             int level;
5507             val = x86_lduw_code(env, s);
5508             level = x86_ldub_code(env, s);
5509             gen_enter(s, val, level);
5510         }
5511         break;
5512     case 0xc9: /* leave */
5513         gen_leave(s);
5514         break;
5515     case 0x06: /* push es */
5516     case 0x0e: /* push cs */
5517     case 0x16: /* push ss */
5518     case 0x1e: /* push ds */
5519         if (CODE64(s))
5520             goto illegal_op;
5521         gen_op_movl_T0_seg(s, b >> 3);
5522         gen_push_v(s, s->T0);
5523         break;
5524     case 0x1a0: /* push fs */
5525     case 0x1a8: /* push gs */
5526         gen_op_movl_T0_seg(s, (b >> 3) & 7);
5527         gen_push_v(s, s->T0);
5528         break;
5529     case 0x07: /* pop es */
5530     case 0x17: /* pop ss */
5531     case 0x1f: /* pop ds */
5532         if (CODE64(s))
5533             goto illegal_op;
5534         reg = b >> 3;
5535         ot = gen_pop_T0(s);
5536         gen_movl_seg_T0(s, reg);
5537         gen_pop_update(s, ot);
5538         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5539         if (s->base.is_jmp) {
5540             gen_jmp_im(s, s->pc - s->cs_base);
5541             if (reg == R_SS) {
5542                 s->flags &= ~HF_TF_MASK;
5543                 gen_eob_inhibit_irq(s, true);
5544             } else {
5545                 gen_eob(s);
5546             }
5547         }
5548         break;
5549     case 0x1a1: /* pop fs */
5550     case 0x1a9: /* pop gs */
5551         ot = gen_pop_T0(s);
5552         gen_movl_seg_T0(s, (b >> 3) & 7);
5553         gen_pop_update(s, ot);
5554         if (s->base.is_jmp) {
5555             gen_jmp_im(s, s->pc - s->cs_base);
5556             gen_eob(s);
5557         }
5558         break;
5559 
5560         /**************************/
5561         /* mov */
5562     case 0x88:
5563     case 0x89: /* mov Gv, Ev */
5564         ot = mo_b_d(b, dflag);
5565         modrm = x86_ldub_code(env, s);
5566         reg = ((modrm >> 3) & 7) | REX_R(s);
5567 
5568         /* generate a generic store */
5569         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
5570         break;
5571     case 0xc6:
5572     case 0xc7: /* mov Ev, Iv */
5573         ot = mo_b_d(b, dflag);
5574         modrm = x86_ldub_code(env, s);
5575         mod = (modrm >> 6) & 3;
5576         if (mod != 3) {
5577             s->rip_offset = insn_const_size(ot);
5578             gen_lea_modrm(env, s, modrm);
5579         }
5580         val = insn_get(env, s, ot);
5581         tcg_gen_movi_tl(s->T0, val);
5582         if (mod != 3) {
5583             gen_op_st_v(s, ot, s->T0, s->A0);
5584         } else {
5585             gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0);
5586         }
5587         break;
5588     case 0x8a:
5589     case 0x8b: /* mov Ev, Gv */
5590         ot = mo_b_d(b, dflag);
5591         modrm = x86_ldub_code(env, s);
5592         reg = ((modrm >> 3) & 7) | REX_R(s);
5593 
5594         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
5595         gen_op_mov_reg_v(s, ot, reg, s->T0);
5596         break;
5597     case 0x8e: /* mov seg, Gv */
5598         modrm = x86_ldub_code(env, s);
5599         reg = (modrm >> 3) & 7;
5600         if (reg >= 6 || reg == R_CS)
5601             goto illegal_op;
5602         gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
5603         gen_movl_seg_T0(s, reg);
5604         /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp.  */
5605         if (s->base.is_jmp) {
5606             gen_jmp_im(s, s->pc - s->cs_base);
5607             if (reg == R_SS) {
5608                 s->flags &= ~HF_TF_MASK;
5609                 gen_eob_inhibit_irq(s, true);
5610             } else {
5611                 gen_eob(s);
5612             }
5613         }
5614         break;
5615     case 0x8c: /* mov Gv, seg */
5616         modrm = x86_ldub_code(env, s);
5617         reg = (modrm >> 3) & 7;
5618         mod = (modrm >> 6) & 3;
5619         if (reg >= 6)
5620             goto illegal_op;
5621         gen_op_movl_T0_seg(s, reg);
5622         ot = mod == 3 ? dflag : MO_16;
5623         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
5624         break;
5625 
5626     case 0x1b6: /* movzbS Gv, Eb */
5627     case 0x1b7: /* movzwS Gv, Eb */
5628     case 0x1be: /* movsbS Gv, Eb */
5629     case 0x1bf: /* movswS Gv, Eb */
5630         {
5631             MemOp d_ot;
5632             MemOp s_ot;
5633 
5634             /* d_ot is the size of destination */
5635             d_ot = dflag;
5636             /* ot is the size of source */
5637             ot = (b & 1) + MO_8;
5638             /* s_ot is the sign+size of source */
5639             s_ot = b & 8 ? MO_SIGN | ot : ot;
5640 
5641             modrm = x86_ldub_code(env, s);
5642             reg = ((modrm >> 3) & 7) | REX_R(s);
5643             mod = (modrm >> 6) & 3;
5644             rm = (modrm & 7) | REX_B(s);
5645 
5646             if (mod == 3) {
5647                 if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) {
5648                     tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8);
5649                 } else {
5650                     gen_op_mov_v_reg(s, ot, s->T0, rm);
5651                     switch (s_ot) {
5652                     case MO_UB:
5653                         tcg_gen_ext8u_tl(s->T0, s->T0);
5654                         break;
5655                     case MO_SB:
5656                         tcg_gen_ext8s_tl(s->T0, s->T0);
5657                         break;
5658                     case MO_UW:
5659                         tcg_gen_ext16u_tl(s->T0, s->T0);
5660                         break;
5661                     default:
5662                     case MO_SW:
5663                         tcg_gen_ext16s_tl(s->T0, s->T0);
5664                         break;
5665                     }
5666                 }
5667                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5668             } else {
5669                 gen_lea_modrm(env, s, modrm);
5670                 gen_op_ld_v(s, s_ot, s->T0, s->A0);
5671                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
5672             }
5673         }
5674         break;
5675 
5676     case 0x8d: /* lea */
5677         modrm = x86_ldub_code(env, s);
5678         mod = (modrm >> 6) & 3;
5679         if (mod == 3)
5680             goto illegal_op;
5681         reg = ((modrm >> 3) & 7) | REX_R(s);
5682         {
5683             AddressParts a = gen_lea_modrm_0(env, s, modrm);
5684             TCGv ea = gen_lea_modrm_1(s, a);
5685             gen_lea_v_seg(s, s->aflag, ea, -1, -1);
5686             gen_op_mov_reg_v(s, dflag, reg, s->A0);
5687         }
5688         break;
5689 
5690     case 0xa0: /* mov EAX, Ov */
5691     case 0xa1:
5692     case 0xa2: /* mov Ov, EAX */
5693     case 0xa3:
5694         {
5695             target_ulong offset_addr;
5696 
5697             ot = mo_b_d(b, dflag);
5698             switch (s->aflag) {
5699 #ifdef TARGET_X86_64
5700             case MO_64:
5701                 offset_addr = x86_ldq_code(env, s);
5702                 break;
5703 #endif
5704             default:
5705                 offset_addr = insn_get(env, s, s->aflag);
5706                 break;
5707             }
5708             tcg_gen_movi_tl(s->A0, offset_addr);
5709             gen_add_A0_ds_seg(s);
5710             if ((b & 2) == 0) {
5711                 gen_op_ld_v(s, ot, s->T0, s->A0);
5712                 gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
5713             } else {
5714                 gen_op_mov_v_reg(s, ot, s->T0, R_EAX);
5715                 gen_op_st_v(s, ot, s->T0, s->A0);
5716             }
5717         }
5718         break;
5719     case 0xd7: /* xlat */
5720         tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]);
5721         tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]);
5722         tcg_gen_add_tl(s->A0, s->A0, s->T0);
5723         gen_extu(s->aflag, s->A0);
5724         gen_add_A0_ds_seg(s);
5725         gen_op_ld_v(s, MO_8, s->T0, s->A0);
5726         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
5727         break;
5728     case 0xb0 ... 0xb7: /* mov R, Ib */
5729         val = insn_get(env, s, MO_8);
5730         tcg_gen_movi_tl(s->T0, val);
5731         gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0);
5732         break;
5733     case 0xb8 ... 0xbf: /* mov R, Iv */
5734 #ifdef TARGET_X86_64
5735         if (dflag == MO_64) {
5736             uint64_t tmp;
5737             /* 64 bit case */
5738             tmp = x86_ldq_code(env, s);
5739             reg = (b & 7) | REX_B(s);
5740             tcg_gen_movi_tl(s->T0, tmp);
5741             gen_op_mov_reg_v(s, MO_64, reg, s->T0);
5742         } else
5743 #endif
5744         {
5745             ot = dflag;
5746             val = insn_get(env, s, ot);
5747             reg = (b & 7) | REX_B(s);
5748             tcg_gen_movi_tl(s->T0, val);
5749             gen_op_mov_reg_v(s, ot, reg, s->T0);
5750         }
5751         break;
5752 
5753     case 0x91 ... 0x97: /* xchg R, EAX */
5754     do_xchg_reg_eax:
5755         ot = dflag;
5756         reg = (b & 7) | REX_B(s);
5757         rm = R_EAX;
5758         goto do_xchg_reg;
5759     case 0x86:
5760     case 0x87: /* xchg Ev, Gv */
5761         ot = mo_b_d(b, dflag);
5762         modrm = x86_ldub_code(env, s);
5763         reg = ((modrm >> 3) & 7) | REX_R(s);
5764         mod = (modrm >> 6) & 3;
5765         if (mod == 3) {
5766             rm = (modrm & 7) | REX_B(s);
5767         do_xchg_reg:
5768             gen_op_mov_v_reg(s, ot, s->T0, reg);
5769             gen_op_mov_v_reg(s, ot, s->T1, rm);
5770             gen_op_mov_reg_v(s, ot, rm, s->T0);
5771             gen_op_mov_reg_v(s, ot, reg, s->T1);
5772         } else {
5773             gen_lea_modrm(env, s, modrm);
5774             gen_op_mov_v_reg(s, ot, s->T0, reg);
5775             /* for xchg, lock is implicit */
5776             tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0,
5777                                    s->mem_index, ot | MO_LE);
5778             gen_op_mov_reg_v(s, ot, reg, s->T1);
5779         }
5780         break;
5781     case 0xc4: /* les Gv */
5782         /* In CODE64 this is VEX3; see above.  */
5783         op = R_ES;
5784         goto do_lxx;
5785     case 0xc5: /* lds Gv */
5786         /* In CODE64 this is VEX2; see above.  */
5787         op = R_DS;
5788         goto do_lxx;
5789     case 0x1b2: /* lss Gv */
5790         op = R_SS;
5791         goto do_lxx;
5792     case 0x1b4: /* lfs Gv */
5793         op = R_FS;
5794         goto do_lxx;
5795     case 0x1b5: /* lgs Gv */
5796         op = R_GS;
5797     do_lxx:
5798         ot = dflag != MO_16 ? MO_32 : MO_16;
5799         modrm = x86_ldub_code(env, s);
5800         reg = ((modrm >> 3) & 7) | REX_R(s);
5801         mod = (modrm >> 6) & 3;
5802         if (mod == 3)
5803             goto illegal_op;
5804         gen_lea_modrm(env, s, modrm);
5805         gen_op_ld_v(s, ot, s->T1, s->A0);
5806         gen_add_A0_im(s, 1 << ot);
5807         /* load the segment first to handle exceptions properly */
5808         gen_op_ld_v(s, MO_16, s->T0, s->A0);
5809         gen_movl_seg_T0(s, op);
5810         /* then put the data */
5811         gen_op_mov_reg_v(s, ot, reg, s->T1);
5812         if (s->base.is_jmp) {
5813             gen_jmp_im(s, s->pc - s->cs_base);
5814             gen_eob(s);
5815         }
5816         break;
5817 
5818         /************************/
5819         /* shifts */
5820     case 0xc0:
5821     case 0xc1:
5822         /* shift Ev,Ib */
5823         shift = 2;
5824     grp2:
5825         {
5826             ot = mo_b_d(b, dflag);
5827             modrm = x86_ldub_code(env, s);
5828             mod = (modrm >> 6) & 3;
5829             op = (modrm >> 3) & 7;
5830 
5831             if (mod != 3) {
5832                 if (shift == 2) {
5833                     s->rip_offset = 1;
5834                 }
5835                 gen_lea_modrm(env, s, modrm);
5836                 opreg = OR_TMP0;
5837             } else {
5838                 opreg = (modrm & 7) | REX_B(s);
5839             }
5840 
5841             /* simpler op */
5842             if (shift == 0) {
5843                 gen_shift(s, op, ot, opreg, OR_ECX);
5844             } else {
5845                 if (shift == 2) {
5846                     shift = x86_ldub_code(env, s);
5847                 }
5848                 gen_shifti(s, op, ot, opreg, shift);
5849             }
5850         }
5851         break;
5852     case 0xd0:
5853     case 0xd1:
5854         /* shift Ev,1 */
5855         shift = 1;
5856         goto grp2;
5857     case 0xd2:
5858     case 0xd3:
5859         /* shift Ev,cl */
5860         shift = 0;
5861         goto grp2;
5862 
5863     case 0x1a4: /* shld imm */
5864         op = 0;
5865         shift = 1;
5866         goto do_shiftd;
5867     case 0x1a5: /* shld cl */
5868         op = 0;
5869         shift = 0;
5870         goto do_shiftd;
5871     case 0x1ac: /* shrd imm */
5872         op = 1;
5873         shift = 1;
5874         goto do_shiftd;
5875     case 0x1ad: /* shrd cl */
5876         op = 1;
5877         shift = 0;
5878     do_shiftd:
5879         ot = dflag;
5880         modrm = x86_ldub_code(env, s);
5881         mod = (modrm >> 6) & 3;
5882         rm = (modrm & 7) | REX_B(s);
5883         reg = ((modrm >> 3) & 7) | REX_R(s);
5884         if (mod != 3) {
5885             gen_lea_modrm(env, s, modrm);
5886             opreg = OR_TMP0;
5887         } else {
5888             opreg = rm;
5889         }
5890         gen_op_mov_v_reg(s, ot, s->T1, reg);
5891 
5892         if (shift) {
5893             TCGv imm = tcg_const_tl(x86_ldub_code(env, s));
5894             gen_shiftd_rm_T1(s, ot, opreg, op, imm);
5895             tcg_temp_free(imm);
5896         } else {
5897             gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
5898         }
5899         break;
5900 
5901         /************************/
5902         /* floats */
5903     case 0xd8 ... 0xdf:
5904         {
5905             bool update_fip = true;
5906 
5907             if (s->flags & (HF_EM_MASK | HF_TS_MASK)) {
5908                 /* if CR0.EM or CR0.TS are set, generate an FPU exception */
5909                 /* XXX: what to do if illegal op ? */
5910                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
5911                 break;
5912             }
5913             modrm = x86_ldub_code(env, s);
5914             mod = (modrm >> 6) & 3;
5915             rm = modrm & 7;
5916             op = ((b & 7) << 3) | ((modrm >> 3) & 7);
5917             if (mod != 3) {
5918                 /* memory op */
5919                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
5920                 TCGv ea = gen_lea_modrm_1(s, a);
5921                 TCGv last_addr = tcg_temp_new();
5922                 bool update_fdp = true;
5923 
5924                 tcg_gen_mov_tl(last_addr, ea);
5925                 gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override);
5926 
5927                 switch (op) {
5928                 case 0x00 ... 0x07: /* fxxxs */
5929                 case 0x10 ... 0x17: /* fixxxl */
5930                 case 0x20 ... 0x27: /* fxxxl */
5931                 case 0x30 ... 0x37: /* fixxx */
5932                     {
5933                         int op1;
5934                         op1 = op & 7;
5935 
5936                         switch (op >> 4) {
5937                         case 0:
5938                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5939                                                 s->mem_index, MO_LEUL);
5940                             gen_helper_flds_FT0(cpu_env, s->tmp2_i32);
5941                             break;
5942                         case 1:
5943                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5944                                                 s->mem_index, MO_LEUL);
5945                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5946                             break;
5947                         case 2:
5948                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5949                                                 s->mem_index, MO_LEUQ);
5950                             gen_helper_fldl_FT0(cpu_env, s->tmp1_i64);
5951                             break;
5952                         case 3:
5953                         default:
5954                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5955                                                 s->mem_index, MO_LESW);
5956                             gen_helper_fildl_FT0(cpu_env, s->tmp2_i32);
5957                             break;
5958                         }
5959 
5960                         gen_helper_fp_arith_ST0_FT0(op1);
5961                         if (op1 == 3) {
5962                             /* fcomp needs pop */
5963                             gen_helper_fpop(cpu_env);
5964                         }
5965                     }
5966                     break;
5967                 case 0x08: /* flds */
5968                 case 0x0a: /* fsts */
5969                 case 0x0b: /* fstps */
5970                 case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */
5971                 case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */
5972                 case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */
5973                     switch (op & 7) {
5974                     case 0:
5975                         switch (op >> 4) {
5976                         case 0:
5977                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5978                                                 s->mem_index, MO_LEUL);
5979                             gen_helper_flds_ST0(cpu_env, s->tmp2_i32);
5980                             break;
5981                         case 1:
5982                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5983                                                 s->mem_index, MO_LEUL);
5984                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5985                             break;
5986                         case 2:
5987                             tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
5988                                                 s->mem_index, MO_LEUQ);
5989                             gen_helper_fldl_ST0(cpu_env, s->tmp1_i64);
5990                             break;
5991                         case 3:
5992                         default:
5993                             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
5994                                                 s->mem_index, MO_LESW);
5995                             gen_helper_fildl_ST0(cpu_env, s->tmp2_i32);
5996                             break;
5997                         }
5998                         break;
5999                     case 1:
6000                         /* XXX: the corresponding CPUID bit must be tested ! */
6001                         switch (op >> 4) {
6002                         case 1:
6003                             gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env);
6004                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6005                                                 s->mem_index, MO_LEUL);
6006                             break;
6007                         case 2:
6008                             gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env);
6009                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6010                                                 s->mem_index, MO_LEUQ);
6011                             break;
6012                         case 3:
6013                         default:
6014                             gen_helper_fistt_ST0(s->tmp2_i32, cpu_env);
6015                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6016                                                 s->mem_index, MO_LEUW);
6017                             break;
6018                         }
6019                         gen_helper_fpop(cpu_env);
6020                         break;
6021                     default:
6022                         switch (op >> 4) {
6023                         case 0:
6024                             gen_helper_fsts_ST0(s->tmp2_i32, cpu_env);
6025                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6026                                                 s->mem_index, MO_LEUL);
6027                             break;
6028                         case 1:
6029                             gen_helper_fistl_ST0(s->tmp2_i32, cpu_env);
6030                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6031                                                 s->mem_index, MO_LEUL);
6032                             break;
6033                         case 2:
6034                             gen_helper_fstl_ST0(s->tmp1_i64, cpu_env);
6035                             tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6036                                                 s->mem_index, MO_LEUQ);
6037                             break;
6038                         case 3:
6039                         default:
6040                             gen_helper_fist_ST0(s->tmp2_i32, cpu_env);
6041                             tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6042                                                 s->mem_index, MO_LEUW);
6043                             break;
6044                         }
6045                         if ((op & 7) == 3) {
6046                             gen_helper_fpop(cpu_env);
6047                         }
6048                         break;
6049                     }
6050                     break;
6051                 case 0x0c: /* fldenv mem */
6052                     gen_helper_fldenv(cpu_env, s->A0,
6053                                       tcg_const_i32(dflag - 1));
6054                     update_fip = update_fdp = false;
6055                     break;
6056                 case 0x0d: /* fldcw mem */
6057                     tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0,
6058                                         s->mem_index, MO_LEUW);
6059                     gen_helper_fldcw(cpu_env, s->tmp2_i32);
6060                     update_fip = update_fdp = false;
6061                     break;
6062                 case 0x0e: /* fnstenv mem */
6063                     gen_helper_fstenv(cpu_env, s->A0,
6064                                       tcg_const_i32(dflag - 1));
6065                     update_fip = update_fdp = false;
6066                     break;
6067                 case 0x0f: /* fnstcw mem */
6068                     gen_helper_fnstcw(s->tmp2_i32, cpu_env);
6069                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6070                                         s->mem_index, MO_LEUW);
6071                     update_fip = update_fdp = false;
6072                     break;
6073                 case 0x1d: /* fldt mem */
6074                     gen_helper_fldt_ST0(cpu_env, s->A0);
6075                     break;
6076                 case 0x1f: /* fstpt mem */
6077                     gen_helper_fstt_ST0(cpu_env, s->A0);
6078                     gen_helper_fpop(cpu_env);
6079                     break;
6080                 case 0x2c: /* frstor mem */
6081                     gen_helper_frstor(cpu_env, s->A0,
6082                                       tcg_const_i32(dflag - 1));
6083                     update_fip = update_fdp = false;
6084                     break;
6085                 case 0x2e: /* fnsave mem */
6086                     gen_helper_fsave(cpu_env, s->A0,
6087                                      tcg_const_i32(dflag - 1));
6088                     update_fip = update_fdp = false;
6089                     break;
6090                 case 0x2f: /* fnstsw mem */
6091                     gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6092                     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0,
6093                                         s->mem_index, MO_LEUW);
6094                     update_fip = update_fdp = false;
6095                     break;
6096                 case 0x3c: /* fbld */
6097                     gen_helper_fbld_ST0(cpu_env, s->A0);
6098                     break;
6099                 case 0x3e: /* fbstp */
6100                     gen_helper_fbst_ST0(cpu_env, s->A0);
6101                     gen_helper_fpop(cpu_env);
6102                     break;
6103                 case 0x3d: /* fildll */
6104                     tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0,
6105                                         s->mem_index, MO_LEUQ);
6106                     gen_helper_fildll_ST0(cpu_env, s->tmp1_i64);
6107                     break;
6108                 case 0x3f: /* fistpll */
6109                     gen_helper_fistll_ST0(s->tmp1_i64, cpu_env);
6110                     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0,
6111                                         s->mem_index, MO_LEUQ);
6112                     gen_helper_fpop(cpu_env);
6113                     break;
6114                 default:
6115                     goto unknown_op;
6116                 }
6117 
6118                 if (update_fdp) {
6119                     int last_seg = s->override >= 0 ? s->override : a.def_seg;
6120 
6121                     tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6122                                    offsetof(CPUX86State,
6123                                             segs[last_seg].selector));
6124                     tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6125                                      offsetof(CPUX86State, fpds));
6126                     tcg_gen_st_tl(last_addr, cpu_env,
6127                                   offsetof(CPUX86State, fpdp));
6128                 }
6129                 tcg_temp_free(last_addr);
6130             } else {
6131                 /* register float ops */
6132                 opreg = rm;
6133 
6134                 switch (op) {
6135                 case 0x08: /* fld sti */
6136                     gen_helper_fpush(cpu_env);
6137                     gen_helper_fmov_ST0_STN(cpu_env,
6138                                             tcg_const_i32((opreg + 1) & 7));
6139                     break;
6140                 case 0x09: /* fxchg sti */
6141                 case 0x29: /* fxchg4 sti, undocumented op */
6142                 case 0x39: /* fxchg7 sti, undocumented op */
6143                     gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
6144                     break;
6145                 case 0x0a: /* grp d9/2 */
6146                     switch (rm) {
6147                     case 0: /* fnop */
6148                         /* check exceptions (FreeBSD FPU probe) */
6149                         gen_helper_fwait(cpu_env);
6150                         update_fip = false;
6151                         break;
6152                     default:
6153                         goto unknown_op;
6154                     }
6155                     break;
6156                 case 0x0c: /* grp d9/4 */
6157                     switch (rm) {
6158                     case 0: /* fchs */
6159                         gen_helper_fchs_ST0(cpu_env);
6160                         break;
6161                     case 1: /* fabs */
6162                         gen_helper_fabs_ST0(cpu_env);
6163                         break;
6164                     case 4: /* ftst */
6165                         gen_helper_fldz_FT0(cpu_env);
6166                         gen_helper_fcom_ST0_FT0(cpu_env);
6167                         break;
6168                     case 5: /* fxam */
6169                         gen_helper_fxam_ST0(cpu_env);
6170                         break;
6171                     default:
6172                         goto unknown_op;
6173                     }
6174                     break;
6175                 case 0x0d: /* grp d9/5 */
6176                     {
6177                         switch (rm) {
6178                         case 0:
6179                             gen_helper_fpush(cpu_env);
6180                             gen_helper_fld1_ST0(cpu_env);
6181                             break;
6182                         case 1:
6183                             gen_helper_fpush(cpu_env);
6184                             gen_helper_fldl2t_ST0(cpu_env);
6185                             break;
6186                         case 2:
6187                             gen_helper_fpush(cpu_env);
6188                             gen_helper_fldl2e_ST0(cpu_env);
6189                             break;
6190                         case 3:
6191                             gen_helper_fpush(cpu_env);
6192                             gen_helper_fldpi_ST0(cpu_env);
6193                             break;
6194                         case 4:
6195                             gen_helper_fpush(cpu_env);
6196                             gen_helper_fldlg2_ST0(cpu_env);
6197                             break;
6198                         case 5:
6199                             gen_helper_fpush(cpu_env);
6200                             gen_helper_fldln2_ST0(cpu_env);
6201                             break;
6202                         case 6:
6203                             gen_helper_fpush(cpu_env);
6204                             gen_helper_fldz_ST0(cpu_env);
6205                             break;
6206                         default:
6207                             goto unknown_op;
6208                         }
6209                     }
6210                     break;
6211                 case 0x0e: /* grp d9/6 */
6212                     switch (rm) {
6213                     case 0: /* f2xm1 */
6214                         gen_helper_f2xm1(cpu_env);
6215                         break;
6216                     case 1: /* fyl2x */
6217                         gen_helper_fyl2x(cpu_env);
6218                         break;
6219                     case 2: /* fptan */
6220                         gen_helper_fptan(cpu_env);
6221                         break;
6222                     case 3: /* fpatan */
6223                         gen_helper_fpatan(cpu_env);
6224                         break;
6225                     case 4: /* fxtract */
6226                         gen_helper_fxtract(cpu_env);
6227                         break;
6228                     case 5: /* fprem1 */
6229                         gen_helper_fprem1(cpu_env);
6230                         break;
6231                     case 6: /* fdecstp */
6232                         gen_helper_fdecstp(cpu_env);
6233                         break;
6234                     default:
6235                     case 7: /* fincstp */
6236                         gen_helper_fincstp(cpu_env);
6237                         break;
6238                     }
6239                     break;
6240                 case 0x0f: /* grp d9/7 */
6241                     switch (rm) {
6242                     case 0: /* fprem */
6243                         gen_helper_fprem(cpu_env);
6244                         break;
6245                     case 1: /* fyl2xp1 */
6246                         gen_helper_fyl2xp1(cpu_env);
6247                         break;
6248                     case 2: /* fsqrt */
6249                         gen_helper_fsqrt(cpu_env);
6250                         break;
6251                     case 3: /* fsincos */
6252                         gen_helper_fsincos(cpu_env);
6253                         break;
6254                     case 5: /* fscale */
6255                         gen_helper_fscale(cpu_env);
6256                         break;
6257                     case 4: /* frndint */
6258                         gen_helper_frndint(cpu_env);
6259                         break;
6260                     case 6: /* fsin */
6261                         gen_helper_fsin(cpu_env);
6262                         break;
6263                     default:
6264                     case 7: /* fcos */
6265                         gen_helper_fcos(cpu_env);
6266                         break;
6267                     }
6268                     break;
6269                 case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
6270                 case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
6271                 case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
6272                     {
6273                         int op1;
6274 
6275                         op1 = op & 7;
6276                         if (op >= 0x20) {
6277                             gen_helper_fp_arith_STN_ST0(op1, opreg);
6278                             if (op >= 0x30) {
6279                                 gen_helper_fpop(cpu_env);
6280                             }
6281                         } else {
6282                             gen_helper_fmov_FT0_STN(cpu_env,
6283                                                     tcg_const_i32(opreg));
6284                             gen_helper_fp_arith_ST0_FT0(op1);
6285                         }
6286                     }
6287                     break;
6288                 case 0x02: /* fcom */
6289                 case 0x22: /* fcom2, undocumented op */
6290                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6291                     gen_helper_fcom_ST0_FT0(cpu_env);
6292                     break;
6293                 case 0x03: /* fcomp */
6294                 case 0x23: /* fcomp3, undocumented op */
6295                 case 0x32: /* fcomp5, undocumented op */
6296                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6297                     gen_helper_fcom_ST0_FT0(cpu_env);
6298                     gen_helper_fpop(cpu_env);
6299                     break;
6300                 case 0x15: /* da/5 */
6301                     switch (rm) {
6302                     case 1: /* fucompp */
6303                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6304                         gen_helper_fucom_ST0_FT0(cpu_env);
6305                         gen_helper_fpop(cpu_env);
6306                         gen_helper_fpop(cpu_env);
6307                         break;
6308                     default:
6309                         goto unknown_op;
6310                     }
6311                     break;
6312                 case 0x1c:
6313                     switch (rm) {
6314                     case 0: /* feni (287 only, just do nop here) */
6315                         break;
6316                     case 1: /* fdisi (287 only, just do nop here) */
6317                         break;
6318                     case 2: /* fclex */
6319                         gen_helper_fclex(cpu_env);
6320                         update_fip = false;
6321                         break;
6322                     case 3: /* fninit */
6323                         gen_helper_fninit(cpu_env);
6324                         update_fip = false;
6325                         break;
6326                     case 4: /* fsetpm (287 only, just do nop here) */
6327                         break;
6328                     default:
6329                         goto unknown_op;
6330                     }
6331                     break;
6332                 case 0x1d: /* fucomi */
6333                     if (!(s->cpuid_features & CPUID_CMOV)) {
6334                         goto illegal_op;
6335                     }
6336                     gen_update_cc_op(s);
6337                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6338                     gen_helper_fucomi_ST0_FT0(cpu_env);
6339                     set_cc_op(s, CC_OP_EFLAGS);
6340                     break;
6341                 case 0x1e: /* fcomi */
6342                     if (!(s->cpuid_features & CPUID_CMOV)) {
6343                         goto illegal_op;
6344                     }
6345                     gen_update_cc_op(s);
6346                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6347                     gen_helper_fcomi_ST0_FT0(cpu_env);
6348                     set_cc_op(s, CC_OP_EFLAGS);
6349                     break;
6350                 case 0x28: /* ffree sti */
6351                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6352                     break;
6353                 case 0x2a: /* fst sti */
6354                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6355                     break;
6356                 case 0x2b: /* fstp sti */
6357                 case 0x0b: /* fstp1 sti, undocumented op */
6358                 case 0x3a: /* fstp8 sti, undocumented op */
6359                 case 0x3b: /* fstp9 sti, undocumented op */
6360                     gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
6361                     gen_helper_fpop(cpu_env);
6362                     break;
6363                 case 0x2c: /* fucom st(i) */
6364                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6365                     gen_helper_fucom_ST0_FT0(cpu_env);
6366                     break;
6367                 case 0x2d: /* fucomp st(i) */
6368                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6369                     gen_helper_fucom_ST0_FT0(cpu_env);
6370                     gen_helper_fpop(cpu_env);
6371                     break;
6372                 case 0x33: /* de/3 */
6373                     switch (rm) {
6374                     case 1: /* fcompp */
6375                         gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
6376                         gen_helper_fcom_ST0_FT0(cpu_env);
6377                         gen_helper_fpop(cpu_env);
6378                         gen_helper_fpop(cpu_env);
6379                         break;
6380                     default:
6381                         goto unknown_op;
6382                     }
6383                     break;
6384                 case 0x38: /* ffreep sti, undocumented op */
6385                     gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
6386                     gen_helper_fpop(cpu_env);
6387                     break;
6388                 case 0x3c: /* df/4 */
6389                     switch (rm) {
6390                     case 0:
6391                         gen_helper_fnstsw(s->tmp2_i32, cpu_env);
6392                         tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
6393                         gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0);
6394                         break;
6395                     default:
6396                         goto unknown_op;
6397                     }
6398                     break;
6399                 case 0x3d: /* fucomip */
6400                     if (!(s->cpuid_features & CPUID_CMOV)) {
6401                         goto illegal_op;
6402                     }
6403                     gen_update_cc_op(s);
6404                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6405                     gen_helper_fucomi_ST0_FT0(cpu_env);
6406                     gen_helper_fpop(cpu_env);
6407                     set_cc_op(s, CC_OP_EFLAGS);
6408                     break;
6409                 case 0x3e: /* fcomip */
6410                     if (!(s->cpuid_features & CPUID_CMOV)) {
6411                         goto illegal_op;
6412                     }
6413                     gen_update_cc_op(s);
6414                     gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
6415                     gen_helper_fcomi_ST0_FT0(cpu_env);
6416                     gen_helper_fpop(cpu_env);
6417                     set_cc_op(s, CC_OP_EFLAGS);
6418                     break;
6419                 case 0x10 ... 0x13: /* fcmovxx */
6420                 case 0x18 ... 0x1b:
6421                     {
6422                         int op1;
6423                         TCGLabel *l1;
6424                         static const uint8_t fcmov_cc[8] = {
6425                             (JCC_B << 1),
6426                             (JCC_Z << 1),
6427                             (JCC_BE << 1),
6428                             (JCC_P << 1),
6429                         };
6430 
6431                         if (!(s->cpuid_features & CPUID_CMOV)) {
6432                             goto illegal_op;
6433                         }
6434                         op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
6435                         l1 = gen_new_label();
6436                         gen_jcc1_noeob(s, op1, l1);
6437                         gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
6438                         gen_set_label(l1);
6439                     }
6440                     break;
6441                 default:
6442                     goto unknown_op;
6443                 }
6444             }
6445 
6446             if (update_fip) {
6447                 tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
6448                                offsetof(CPUX86State, segs[R_CS].selector));
6449                 tcg_gen_st16_i32(s->tmp2_i32, cpu_env,
6450                                  offsetof(CPUX86State, fpcs));
6451                 tcg_gen_st_tl(tcg_constant_tl(pc_start - s->cs_base),
6452                               cpu_env, offsetof(CPUX86State, fpip));
6453             }
6454         }
6455         break;
6456         /************************/
6457         /* string ops */
6458 
6459     case 0xa4: /* movsS */
6460     case 0xa5:
6461         ot = mo_b_d(b, dflag);
6462         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6463             gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6464         } else {
6465             gen_movs(s, ot);
6466         }
6467         break;
6468 
6469     case 0xaa: /* stosS */
6470     case 0xab:
6471         ot = mo_b_d(b, dflag);
6472         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6473             gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6474         } else {
6475             gen_stos(s, ot);
6476         }
6477         break;
6478     case 0xac: /* lodsS */
6479     case 0xad:
6480         ot = mo_b_d(b, dflag);
6481         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6482             gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6483         } else {
6484             gen_lods(s, ot);
6485         }
6486         break;
6487     case 0xae: /* scasS */
6488     case 0xaf:
6489         ot = mo_b_d(b, dflag);
6490         if (prefixes & PREFIX_REPNZ) {
6491             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6492         } else if (prefixes & PREFIX_REPZ) {
6493             gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6494         } else {
6495             gen_scas(s, ot);
6496         }
6497         break;
6498 
6499     case 0xa6: /* cmpsS */
6500     case 0xa7:
6501         ot = mo_b_d(b, dflag);
6502         if (prefixes & PREFIX_REPNZ) {
6503             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1);
6504         } else if (prefixes & PREFIX_REPZ) {
6505             gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0);
6506         } else {
6507             gen_cmps(s, ot);
6508         }
6509         break;
6510     case 0x6c: /* insS */
6511     case 0x6d:
6512         ot = mo_b_d32(b, dflag);
6513         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6514         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6515         if (!gen_check_io(s, ot, s->tmp2_i32,
6516                           SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) {
6517             break;
6518         }
6519         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6520             gen_io_start();
6521         }
6522         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6523             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6524             /* jump generated by gen_repz_ins */
6525         } else {
6526             gen_ins(s, ot);
6527             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6528                 gen_jmp(s, s->pc - s->cs_base);
6529             }
6530         }
6531         break;
6532     case 0x6e: /* outsS */
6533     case 0x6f:
6534         ot = mo_b_d32(b, dflag);
6535         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6536         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6537         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) {
6538             break;
6539         }
6540         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6541             gen_io_start();
6542         }
6543         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
6544             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
6545             /* jump generated by gen_repz_outs */
6546         } else {
6547             gen_outs(s, ot);
6548             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6549                 gen_jmp(s, s->pc - s->cs_base);
6550             }
6551         }
6552         break;
6553 
6554         /************************/
6555         /* port I/O */
6556 
6557     case 0xe4:
6558     case 0xe5:
6559         ot = mo_b_d32(b, dflag);
6560         val = x86_ldub_code(env, s);
6561         tcg_gen_movi_i32(s->tmp2_i32, val);
6562         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6563             break;
6564         }
6565         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6566             gen_io_start();
6567         }
6568         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6569         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6570         gen_bpt_io(s, s->tmp2_i32, ot);
6571         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6572             gen_jmp(s, s->pc - s->cs_base);
6573         }
6574         break;
6575     case 0xe6:
6576     case 0xe7:
6577         ot = mo_b_d32(b, dflag);
6578         val = x86_ldub_code(env, s);
6579         tcg_gen_movi_i32(s->tmp2_i32, val);
6580         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6581             break;
6582         }
6583         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6584             gen_io_start();
6585         }
6586         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6587         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6588         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6589         gen_bpt_io(s, s->tmp2_i32, ot);
6590         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6591             gen_jmp(s, s->pc - s->cs_base);
6592         }
6593         break;
6594     case 0xec:
6595     case 0xed:
6596         ot = mo_b_d32(b, dflag);
6597         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6598         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6599         if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) {
6600             break;
6601         }
6602         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6603             gen_io_start();
6604         }
6605         gen_helper_in_func(ot, s->T1, s->tmp2_i32);
6606         gen_op_mov_reg_v(s, ot, R_EAX, s->T1);
6607         gen_bpt_io(s, s->tmp2_i32, ot);
6608         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6609             gen_jmp(s, s->pc - s->cs_base);
6610         }
6611         break;
6612     case 0xee:
6613     case 0xef:
6614         ot = mo_b_d32(b, dflag);
6615         tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]);
6616         tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32);
6617         if (!gen_check_io(s, ot, s->tmp2_i32, 0)) {
6618             break;
6619         }
6620         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6621             gen_io_start();
6622         }
6623         gen_op_mov_v_reg(s, ot, s->T1, R_EAX);
6624         tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
6625         gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
6626         gen_bpt_io(s, s->tmp2_i32, ot);
6627         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
6628             gen_jmp(s, s->pc - s->cs_base);
6629         }
6630         break;
6631 
6632         /************************/
6633         /* control */
6634     case 0xc2: /* ret im */
6635         val = x86_ldsw_code(env, s);
6636         ot = gen_pop_T0(s);
6637         gen_stack_update(s, val + (1 << ot));
6638         /* Note that gen_pop_T0 uses a zero-extending load.  */
6639         gen_op_jmp_v(s->T0);
6640         gen_bnd_jmp(s);
6641         gen_jr(s, s->T0);
6642         break;
6643     case 0xc3: /* ret */
6644         ot = gen_pop_T0(s);
6645         gen_pop_update(s, ot);
6646         /* Note that gen_pop_T0 uses a zero-extending load.  */
6647         gen_op_jmp_v(s->T0);
6648         gen_bnd_jmp(s);
6649         gen_jr(s, s->T0);
6650         break;
6651     case 0xca: /* lret im */
6652         val = x86_ldsw_code(env, s);
6653     do_lret:
6654         if (PE(s) && !VM86(s)) {
6655             gen_update_cc_op(s);
6656             gen_jmp_im(s, pc_start - s->cs_base);
6657             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
6658                                       tcg_const_i32(val));
6659         } else {
6660             gen_stack_A0(s);
6661             /* pop offset */
6662             gen_op_ld_v(s, dflag, s->T0, s->A0);
6663             /* NOTE: keeping EIP updated is not a problem in case of
6664                exception */
6665             gen_op_jmp_v(s->T0);
6666             /* pop selector */
6667             gen_add_A0_im(s, 1 << dflag);
6668             gen_op_ld_v(s, dflag, s->T0, s->A0);
6669             gen_op_movl_seg_T0_vm(s, R_CS);
6670             /* add stack offset */
6671             gen_stack_update(s, val + (2 << dflag));
6672         }
6673         gen_eob(s);
6674         break;
6675     case 0xcb: /* lret */
6676         val = 0;
6677         goto do_lret;
6678     case 0xcf: /* iret */
6679         gen_svm_check_intercept(s, SVM_EXIT_IRET);
6680         if (!PE(s) || VM86(s)) {
6681             /* real mode or vm86 mode */
6682             if (!check_vm86_iopl(s)) {
6683                 break;
6684             }
6685             gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1));
6686         } else {
6687             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
6688                                       tcg_const_i32(s->pc - s->cs_base));
6689         }
6690         set_cc_op(s, CC_OP_EFLAGS);
6691         gen_eob(s);
6692         break;
6693     case 0xe8: /* call im */
6694         {
6695             if (dflag != MO_16) {
6696                 tval = (int32_t)insn_get(env, s, MO_32);
6697             } else {
6698                 tval = (int16_t)insn_get(env, s, MO_16);
6699             }
6700             next_eip = s->pc - s->cs_base;
6701             tval += next_eip;
6702             if (dflag == MO_16) {
6703                 tval &= 0xffff;
6704             } else if (!CODE64(s)) {
6705                 tval &= 0xffffffff;
6706             }
6707             tcg_gen_movi_tl(s->T0, next_eip);
6708             gen_push_v(s, s->T0);
6709             gen_bnd_jmp(s);
6710             gen_jmp(s, tval);
6711         }
6712         break;
6713     case 0x9a: /* lcall im */
6714         {
6715             unsigned int selector, offset;
6716 
6717             if (CODE64(s))
6718                 goto illegal_op;
6719             ot = dflag;
6720             offset = insn_get(env, s, ot);
6721             selector = insn_get(env, s, MO_16);
6722 
6723             tcg_gen_movi_tl(s->T0, selector);
6724             tcg_gen_movi_tl(s->T1, offset);
6725         }
6726         goto do_lcall;
6727     case 0xe9: /* jmp im */
6728         if (dflag != MO_16) {
6729             tval = (int32_t)insn_get(env, s, MO_32);
6730         } else {
6731             tval = (int16_t)insn_get(env, s, MO_16);
6732         }
6733         tval += s->pc - s->cs_base;
6734         if (dflag == MO_16) {
6735             tval &= 0xffff;
6736         } else if (!CODE64(s)) {
6737             tval &= 0xffffffff;
6738         }
6739         gen_bnd_jmp(s);
6740         gen_jmp(s, tval);
6741         break;
6742     case 0xea: /* ljmp im */
6743         {
6744             unsigned int selector, offset;
6745 
6746             if (CODE64(s))
6747                 goto illegal_op;
6748             ot = dflag;
6749             offset = insn_get(env, s, ot);
6750             selector = insn_get(env, s, MO_16);
6751 
6752             tcg_gen_movi_tl(s->T0, selector);
6753             tcg_gen_movi_tl(s->T1, offset);
6754         }
6755         goto do_ljmp;
6756     case 0xeb: /* jmp Jb */
6757         tval = (int8_t)insn_get(env, s, MO_8);
6758         tval += s->pc - s->cs_base;
6759         if (dflag == MO_16) {
6760             tval &= 0xffff;
6761         }
6762         gen_jmp(s, tval);
6763         break;
6764     case 0x70 ... 0x7f: /* jcc Jb */
6765         tval = (int8_t)insn_get(env, s, MO_8);
6766         goto do_jcc;
6767     case 0x180 ... 0x18f: /* jcc Jv */
6768         if (dflag != MO_16) {
6769             tval = (int32_t)insn_get(env, s, MO_32);
6770         } else {
6771             tval = (int16_t)insn_get(env, s, MO_16);
6772         }
6773     do_jcc:
6774         next_eip = s->pc - s->cs_base;
6775         tval += next_eip;
6776         if (dflag == MO_16) {
6777             tval &= 0xffff;
6778         }
6779         gen_bnd_jmp(s);
6780         gen_jcc(s, b, tval, next_eip);
6781         break;
6782 
6783     case 0x190 ... 0x19f: /* setcc Gv */
6784         modrm = x86_ldub_code(env, s);
6785         gen_setcc1(s, b, s->T0);
6786         gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
6787         break;
6788     case 0x140 ... 0x14f: /* cmov Gv, Ev */
6789         if (!(s->cpuid_features & CPUID_CMOV)) {
6790             goto illegal_op;
6791         }
6792         ot = dflag;
6793         modrm = x86_ldub_code(env, s);
6794         reg = ((modrm >> 3) & 7) | REX_R(s);
6795         gen_cmovcc1(env, s, ot, b, modrm, reg);
6796         break;
6797 
6798         /************************/
6799         /* flags */
6800     case 0x9c: /* pushf */
6801         gen_svm_check_intercept(s, SVM_EXIT_PUSHF);
6802         if (check_vm86_iopl(s)) {
6803             gen_update_cc_op(s);
6804             gen_helper_read_eflags(s->T0, cpu_env);
6805             gen_push_v(s, s->T0);
6806         }
6807         break;
6808     case 0x9d: /* popf */
6809         gen_svm_check_intercept(s, SVM_EXIT_POPF);
6810         if (check_vm86_iopl(s)) {
6811             ot = gen_pop_T0(s);
6812             if (CPL(s) == 0) {
6813                 if (dflag != MO_16) {
6814                     gen_helper_write_eflags(cpu_env, s->T0,
6815                                             tcg_const_i32((TF_MASK | AC_MASK |
6816                                                            ID_MASK | NT_MASK |
6817                                                            IF_MASK |
6818                                                            IOPL_MASK)));
6819                 } else {
6820                     gen_helper_write_eflags(cpu_env, s->T0,
6821                                             tcg_const_i32((TF_MASK | AC_MASK |
6822                                                            ID_MASK | NT_MASK |
6823                                                            IF_MASK | IOPL_MASK)
6824                                                           & 0xffff));
6825                 }
6826             } else {
6827                 if (CPL(s) <= IOPL(s)) {
6828                     if (dflag != MO_16) {
6829                         gen_helper_write_eflags(cpu_env, s->T0,
6830                                                 tcg_const_i32((TF_MASK |
6831                                                                AC_MASK |
6832                                                                ID_MASK |
6833                                                                NT_MASK |
6834                                                                IF_MASK)));
6835                     } else {
6836                         gen_helper_write_eflags(cpu_env, s->T0,
6837                                                 tcg_const_i32((TF_MASK |
6838                                                                AC_MASK |
6839                                                                ID_MASK |
6840                                                                NT_MASK |
6841                                                                IF_MASK)
6842                                                               & 0xffff));
6843                     }
6844                 } else {
6845                     if (dflag != MO_16) {
6846                         gen_helper_write_eflags(cpu_env, s->T0,
6847                                            tcg_const_i32((TF_MASK | AC_MASK |
6848                                                           ID_MASK | NT_MASK)));
6849                     } else {
6850                         gen_helper_write_eflags(cpu_env, s->T0,
6851                                            tcg_const_i32((TF_MASK | AC_MASK |
6852                                                           ID_MASK | NT_MASK)
6853                                                          & 0xffff));
6854                     }
6855                 }
6856             }
6857             gen_pop_update(s, ot);
6858             set_cc_op(s, CC_OP_EFLAGS);
6859             /* abort translation because TF/AC flag may change */
6860             gen_jmp_im(s, s->pc - s->cs_base);
6861             gen_eob(s);
6862         }
6863         break;
6864     case 0x9e: /* sahf */
6865         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6866             goto illegal_op;
6867         gen_op_mov_v_reg(s, MO_8, s->T0, R_AH);
6868         gen_compute_eflags(s);
6869         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O);
6870         tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C);
6871         tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
6872         break;
6873     case 0x9f: /* lahf */
6874         if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM))
6875             goto illegal_op;
6876         gen_compute_eflags(s);
6877         /* Note: gen_compute_eflags() only gives the condition codes */
6878         tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02);
6879         gen_op_mov_reg_v(s, MO_8, R_AH, s->T0);
6880         break;
6881     case 0xf5: /* cmc */
6882         gen_compute_eflags(s);
6883         tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6884         break;
6885     case 0xf8: /* clc */
6886         gen_compute_eflags(s);
6887         tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
6888         break;
6889     case 0xf9: /* stc */
6890         gen_compute_eflags(s);
6891         tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
6892         break;
6893     case 0xfc: /* cld */
6894         tcg_gen_movi_i32(s->tmp2_i32, 1);
6895         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6896         break;
6897     case 0xfd: /* std */
6898         tcg_gen_movi_i32(s->tmp2_i32, -1);
6899         tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df));
6900         break;
6901 
6902         /************************/
6903         /* bit operations */
6904     case 0x1ba: /* bt/bts/btr/btc Gv, im */
6905         ot = dflag;
6906         modrm = x86_ldub_code(env, s);
6907         op = (modrm >> 3) & 7;
6908         mod = (modrm >> 6) & 3;
6909         rm = (modrm & 7) | REX_B(s);
6910         if (mod != 3) {
6911             s->rip_offset = 1;
6912             gen_lea_modrm(env, s, modrm);
6913             if (!(s->prefix & PREFIX_LOCK)) {
6914                 gen_op_ld_v(s, ot, s->T0, s->A0);
6915             }
6916         } else {
6917             gen_op_mov_v_reg(s, ot, s->T0, rm);
6918         }
6919         /* load shift */
6920         val = x86_ldub_code(env, s);
6921         tcg_gen_movi_tl(s->T1, val);
6922         if (op < 4)
6923             goto unknown_op;
6924         op -= 4;
6925         goto bt_op;
6926     case 0x1a3: /* bt Gv, Ev */
6927         op = 0;
6928         goto do_btx;
6929     case 0x1ab: /* bts */
6930         op = 1;
6931         goto do_btx;
6932     case 0x1b3: /* btr */
6933         op = 2;
6934         goto do_btx;
6935     case 0x1bb: /* btc */
6936         op = 3;
6937     do_btx:
6938         ot = dflag;
6939         modrm = x86_ldub_code(env, s);
6940         reg = ((modrm >> 3) & 7) | REX_R(s);
6941         mod = (modrm >> 6) & 3;
6942         rm = (modrm & 7) | REX_B(s);
6943         gen_op_mov_v_reg(s, MO_32, s->T1, reg);
6944         if (mod != 3) {
6945             AddressParts a = gen_lea_modrm_0(env, s, modrm);
6946             /* specific case: we need to add a displacement */
6947             gen_exts(ot, s->T1);
6948             tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot);
6949             tcg_gen_shli_tl(s->tmp0, s->tmp0, ot);
6950             tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0);
6951             gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
6952             if (!(s->prefix & PREFIX_LOCK)) {
6953                 gen_op_ld_v(s, ot, s->T0, s->A0);
6954             }
6955         } else {
6956             gen_op_mov_v_reg(s, ot, s->T0, rm);
6957         }
6958     bt_op:
6959         tcg_gen_andi_tl(s->T1, s->T1, (1 << (3 + ot)) - 1);
6960         tcg_gen_movi_tl(s->tmp0, 1);
6961         tcg_gen_shl_tl(s->tmp0, s->tmp0, s->T1);
6962         if (s->prefix & PREFIX_LOCK) {
6963             switch (op) {
6964             case 0: /* bt */
6965                 /* Needs no atomic ops; we surpressed the normal
6966                    memory load for LOCK above so do it now.  */
6967                 gen_op_ld_v(s, ot, s->T0, s->A0);
6968                 break;
6969             case 1: /* bts */
6970                 tcg_gen_atomic_fetch_or_tl(s->T0, s->A0, s->tmp0,
6971                                            s->mem_index, ot | MO_LE);
6972                 break;
6973             case 2: /* btr */
6974                 tcg_gen_not_tl(s->tmp0, s->tmp0);
6975                 tcg_gen_atomic_fetch_and_tl(s->T0, s->A0, s->tmp0,
6976                                             s->mem_index, ot | MO_LE);
6977                 break;
6978             default:
6979             case 3: /* btc */
6980                 tcg_gen_atomic_fetch_xor_tl(s->T0, s->A0, s->tmp0,
6981                                             s->mem_index, ot | MO_LE);
6982                 break;
6983             }
6984             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6985         } else {
6986             tcg_gen_shr_tl(s->tmp4, s->T0, s->T1);
6987             switch (op) {
6988             case 0: /* bt */
6989                 /* Data already loaded; nothing to do.  */
6990                 break;
6991             case 1: /* bts */
6992                 tcg_gen_or_tl(s->T0, s->T0, s->tmp0);
6993                 break;
6994             case 2: /* btr */
6995                 tcg_gen_andc_tl(s->T0, s->T0, s->tmp0);
6996                 break;
6997             default:
6998             case 3: /* btc */
6999                 tcg_gen_xor_tl(s->T0, s->T0, s->tmp0);
7000                 break;
7001             }
7002             if (op != 0) {
7003                 if (mod != 3) {
7004                     gen_op_st_v(s, ot, s->T0, s->A0);
7005                 } else {
7006                     gen_op_mov_reg_v(s, ot, rm, s->T0);
7007                 }
7008             }
7009         }
7010 
7011         /* Delay all CC updates until after the store above.  Note that
7012            C is the result of the test, Z is unchanged, and the others
7013            are all undefined.  */
7014         switch (s->cc_op) {
7015         case CC_OP_MULB ... CC_OP_MULQ:
7016         case CC_OP_ADDB ... CC_OP_ADDQ:
7017         case CC_OP_ADCB ... CC_OP_ADCQ:
7018         case CC_OP_SUBB ... CC_OP_SUBQ:
7019         case CC_OP_SBBB ... CC_OP_SBBQ:
7020         case CC_OP_LOGICB ... CC_OP_LOGICQ:
7021         case CC_OP_INCB ... CC_OP_INCQ:
7022         case CC_OP_DECB ... CC_OP_DECQ:
7023         case CC_OP_SHLB ... CC_OP_SHLQ:
7024         case CC_OP_SARB ... CC_OP_SARQ:
7025         case CC_OP_BMILGB ... CC_OP_BMILGQ:
7026             /* Z was going to be computed from the non-zero status of CC_DST.
7027                We can get that same Z value (and the new C value) by leaving
7028                CC_DST alone, setting CC_SRC, and using a CC_OP_SAR of the
7029                same width.  */
7030             tcg_gen_mov_tl(cpu_cc_src, s->tmp4);
7031             set_cc_op(s, ((s->cc_op - CC_OP_MULB) & 3) + CC_OP_SARB);
7032             break;
7033         default:
7034             /* Otherwise, generate EFLAGS and replace the C bit.  */
7035             gen_compute_eflags(s);
7036             tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, s->tmp4,
7037                                ctz32(CC_C), 1);
7038             break;
7039         }
7040         break;
7041     case 0x1bc: /* bsf / tzcnt */
7042     case 0x1bd: /* bsr / lzcnt */
7043         ot = dflag;
7044         modrm = x86_ldub_code(env, s);
7045         reg = ((modrm >> 3) & 7) | REX_R(s);
7046         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
7047         gen_extu(ot, s->T0);
7048 
7049         /* Note that lzcnt and tzcnt are in different extensions.  */
7050         if ((prefixes & PREFIX_REPZ)
7051             && (b & 1
7052                 ? s->cpuid_ext3_features & CPUID_EXT3_ABM
7053                 : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
7054             int size = 8 << ot;
7055             /* For lzcnt/tzcnt, C bit is defined related to the input. */
7056             tcg_gen_mov_tl(cpu_cc_src, s->T0);
7057             if (b & 1) {
7058                 /* For lzcnt, reduce the target_ulong result by the
7059                    number of zeros that we expect to find at the top.  */
7060                 tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
7061                 tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
7062             } else {
7063                 /* For tzcnt, a zero input must return the operand size.  */
7064                 tcg_gen_ctzi_tl(s->T0, s->T0, size);
7065             }
7066             /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
7067             gen_op_update1_cc(s);
7068             set_cc_op(s, CC_OP_BMILGB + ot);
7069         } else {
7070             /* For bsr/bsf, only the Z bit is defined and it is related
7071                to the input and not the result.  */
7072             tcg_gen_mov_tl(cpu_cc_dst, s->T0);
7073             set_cc_op(s, CC_OP_LOGICB + ot);
7074 
7075             /* ??? The manual says that the output is undefined when the
7076                input is zero, but real hardware leaves it unchanged, and
7077                real programs appear to depend on that.  Accomplish this
7078                by passing the output as the value to return upon zero.  */
7079             if (b & 1) {
7080                 /* For bsr, return the bit index of the first 1 bit,
7081                    not the count of leading zeros.  */
7082                 tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
7083                 tcg_gen_clz_tl(s->T0, s->T0, s->T1);
7084                 tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
7085             } else {
7086                 tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
7087             }
7088         }
7089         gen_op_mov_reg_v(s, ot, reg, s->T0);
7090         break;
7091         /************************/
7092         /* bcd */
7093     case 0x27: /* daa */
7094         if (CODE64(s))
7095             goto illegal_op;
7096         gen_update_cc_op(s);
7097         gen_helper_daa(cpu_env);
7098         set_cc_op(s, CC_OP_EFLAGS);
7099         break;
7100     case 0x2f: /* das */
7101         if (CODE64(s))
7102             goto illegal_op;
7103         gen_update_cc_op(s);
7104         gen_helper_das(cpu_env);
7105         set_cc_op(s, CC_OP_EFLAGS);
7106         break;
7107     case 0x37: /* aaa */
7108         if (CODE64(s))
7109             goto illegal_op;
7110         gen_update_cc_op(s);
7111         gen_helper_aaa(cpu_env);
7112         set_cc_op(s, CC_OP_EFLAGS);
7113         break;
7114     case 0x3f: /* aas */
7115         if (CODE64(s))
7116             goto illegal_op;
7117         gen_update_cc_op(s);
7118         gen_helper_aas(cpu_env);
7119         set_cc_op(s, CC_OP_EFLAGS);
7120         break;
7121     case 0xd4: /* aam */
7122         if (CODE64(s))
7123             goto illegal_op;
7124         val = x86_ldub_code(env, s);
7125         if (val == 0) {
7126             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
7127         } else {
7128             gen_helper_aam(cpu_env, tcg_const_i32(val));
7129             set_cc_op(s, CC_OP_LOGICB);
7130         }
7131         break;
7132     case 0xd5: /* aad */
7133         if (CODE64(s))
7134             goto illegal_op;
7135         val = x86_ldub_code(env, s);
7136         gen_helper_aad(cpu_env, tcg_const_i32(val));
7137         set_cc_op(s, CC_OP_LOGICB);
7138         break;
7139         /************************/
7140         /* misc */
7141     case 0x90: /* nop */
7142         /* XXX: correct lock test for all insn */
7143         if (prefixes & PREFIX_LOCK) {
7144             goto illegal_op;
7145         }
7146         /* If REX_B is set, then this is xchg eax, r8d, not a nop.  */
7147         if (REX_B(s)) {
7148             goto do_xchg_reg_eax;
7149         }
7150         if (prefixes & PREFIX_REPZ) {
7151             gen_update_cc_op(s);
7152             gen_jmp_im(s, pc_start - s->cs_base);
7153             gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start));
7154             s->base.is_jmp = DISAS_NORETURN;
7155         }
7156         break;
7157     case 0x9b: /* fwait */
7158         if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) ==
7159             (HF_MP_MASK | HF_TS_MASK)) {
7160             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
7161         } else {
7162             gen_helper_fwait(cpu_env);
7163         }
7164         break;
7165     case 0xcc: /* int3 */
7166         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
7167         break;
7168     case 0xcd: /* int N */
7169         val = x86_ldub_code(env, s);
7170         if (check_vm86_iopl(s)) {
7171             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
7172         }
7173         break;
7174     case 0xce: /* into */
7175         if (CODE64(s))
7176             goto illegal_op;
7177         gen_update_cc_op(s);
7178         gen_jmp_im(s, pc_start - s->cs_base);
7179         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
7180         break;
7181 #ifdef WANT_ICEBP
7182     case 0xf1: /* icebp (undocumented, exits to external debugger) */
7183         gen_svm_check_intercept(s, SVM_EXIT_ICEBP);
7184         gen_debug(s);
7185         break;
7186 #endif
7187     case 0xfa: /* cli */
7188         if (check_iopl(s)) {
7189             gen_helper_cli(cpu_env);
7190         }
7191         break;
7192     case 0xfb: /* sti */
7193         if (check_iopl(s)) {
7194             gen_helper_sti(cpu_env);
7195             /* interruptions are enabled only the first insn after sti */
7196             gen_jmp_im(s, s->pc - s->cs_base);
7197             gen_eob_inhibit_irq(s, true);
7198         }
7199         break;
7200     case 0x62: /* bound */
7201         if (CODE64(s))
7202             goto illegal_op;
7203         ot = dflag;
7204         modrm = x86_ldub_code(env, s);
7205         reg = (modrm >> 3) & 7;
7206         mod = (modrm >> 6) & 3;
7207         if (mod == 3)
7208             goto illegal_op;
7209         gen_op_mov_v_reg(s, ot, s->T0, reg);
7210         gen_lea_modrm(env, s, modrm);
7211         tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7212         if (ot == MO_16) {
7213             gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32);
7214         } else {
7215             gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32);
7216         }
7217         break;
7218     case 0x1c8 ... 0x1cf: /* bswap reg */
7219         reg = (b & 7) | REX_B(s);
7220 #ifdef TARGET_X86_64
7221         if (dflag == MO_64) {
7222             tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]);
7223             break;
7224         }
7225 #endif
7226         tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ);
7227         break;
7228     case 0xd6: /* salc */
7229         if (CODE64(s))
7230             goto illegal_op;
7231         gen_compute_eflags_c(s, s->T0);
7232         tcg_gen_neg_tl(s->T0, s->T0);
7233         gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0);
7234         break;
7235     case 0xe0: /* loopnz */
7236     case 0xe1: /* loopz */
7237     case 0xe2: /* loop */
7238     case 0xe3: /* jecxz */
7239         {
7240             TCGLabel *l1, *l2, *l3;
7241 
7242             tval = (int8_t)insn_get(env, s, MO_8);
7243             next_eip = s->pc - s->cs_base;
7244             tval += next_eip;
7245             if (dflag == MO_16) {
7246                 tval &= 0xffff;
7247             }
7248 
7249             l1 = gen_new_label();
7250             l2 = gen_new_label();
7251             l3 = gen_new_label();
7252             gen_update_cc_op(s);
7253             b &= 3;
7254             switch(b) {
7255             case 0: /* loopnz */
7256             case 1: /* loopz */
7257                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7258                 gen_op_jz_ecx(s, s->aflag, l3);
7259                 gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1);
7260                 break;
7261             case 2: /* loop */
7262                 gen_op_add_reg_im(s, s->aflag, R_ECX, -1);
7263                 gen_op_jnz_ecx(s, s->aflag, l1);
7264                 break;
7265             default:
7266             case 3: /* jcxz */
7267                 gen_op_jz_ecx(s, s->aflag, l1);
7268                 break;
7269             }
7270 
7271             gen_set_label(l3);
7272             gen_jmp_im(s, next_eip);
7273             tcg_gen_br(l2);
7274 
7275             gen_set_label(l1);
7276             gen_jmp_im(s, tval);
7277             gen_set_label(l2);
7278             gen_eob(s);
7279         }
7280         break;
7281     case 0x130: /* wrmsr */
7282     case 0x132: /* rdmsr */
7283         if (check_cpl0(s)) {
7284             gen_update_cc_op(s);
7285             gen_jmp_im(s, pc_start - s->cs_base);
7286             if (b & 2) {
7287                 gen_helper_rdmsr(cpu_env);
7288             } else {
7289                 gen_helper_wrmsr(cpu_env);
7290                 gen_jmp_im(s, s->pc - s->cs_base);
7291                 gen_eob(s);
7292             }
7293         }
7294         break;
7295     case 0x131: /* rdtsc */
7296         gen_update_cc_op(s);
7297         gen_jmp_im(s, pc_start - s->cs_base);
7298         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7299             gen_io_start();
7300         }
7301         gen_helper_rdtsc(cpu_env);
7302         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7303             gen_jmp(s, s->pc - s->cs_base);
7304         }
7305         break;
7306     case 0x133: /* rdpmc */
7307         gen_update_cc_op(s);
7308         gen_jmp_im(s, pc_start - s->cs_base);
7309         gen_helper_rdpmc(cpu_env);
7310         s->base.is_jmp = DISAS_NORETURN;
7311         break;
7312     case 0x134: /* sysenter */
7313         /* For Intel SYSENTER is valid on 64-bit */
7314         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7315             goto illegal_op;
7316         if (!PE(s)) {
7317             gen_exception_gpf(s);
7318         } else {
7319             gen_helper_sysenter(cpu_env);
7320             gen_eob(s);
7321         }
7322         break;
7323     case 0x135: /* sysexit */
7324         /* For Intel SYSEXIT is valid on 64-bit */
7325         if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
7326             goto illegal_op;
7327         if (!PE(s)) {
7328             gen_exception_gpf(s);
7329         } else {
7330             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
7331             gen_eob(s);
7332         }
7333         break;
7334 #ifdef TARGET_X86_64
7335     case 0x105: /* syscall */
7336         /* XXX: is it usable in real mode ? */
7337         gen_update_cc_op(s);
7338         gen_jmp_im(s, pc_start - s->cs_base);
7339         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
7340         /* TF handling for the syscall insn is different. The TF bit is  checked
7341            after the syscall insn completes. This allows #DB to not be
7342            generated after one has entered CPL0 if TF is set in FMASK.  */
7343         gen_eob_worker(s, false, true);
7344         break;
7345     case 0x107: /* sysret */
7346         if (!PE(s)) {
7347             gen_exception_gpf(s);
7348         } else {
7349             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
7350             /* condition codes are modified only in long mode */
7351             if (LMA(s)) {
7352                 set_cc_op(s, CC_OP_EFLAGS);
7353             }
7354             /* TF handling for the sysret insn is different. The TF bit is
7355                checked after the sysret insn completes. This allows #DB to be
7356                generated "as if" the syscall insn in userspace has just
7357                completed.  */
7358             gen_eob_worker(s, false, true);
7359         }
7360         break;
7361 #endif
7362     case 0x1a2: /* cpuid */
7363         gen_update_cc_op(s);
7364         gen_jmp_im(s, pc_start - s->cs_base);
7365         gen_helper_cpuid(cpu_env);
7366         break;
7367     case 0xf4: /* hlt */
7368         if (check_cpl0(s)) {
7369             gen_update_cc_op(s);
7370             gen_jmp_im(s, pc_start - s->cs_base);
7371             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
7372             s->base.is_jmp = DISAS_NORETURN;
7373         }
7374         break;
7375     case 0x100:
7376         modrm = x86_ldub_code(env, s);
7377         mod = (modrm >> 6) & 3;
7378         op = (modrm >> 3) & 7;
7379         switch(op) {
7380         case 0: /* sldt */
7381             if (!PE(s) || VM86(s))
7382                 goto illegal_op;
7383             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7384                 break;
7385             }
7386             gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ);
7387             tcg_gen_ld32u_tl(s->T0, cpu_env,
7388                              offsetof(CPUX86State, ldt.selector));
7389             ot = mod == 3 ? dflag : MO_16;
7390             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7391             break;
7392         case 2: /* lldt */
7393             if (!PE(s) || VM86(s))
7394                 goto illegal_op;
7395             if (check_cpl0(s)) {
7396                 gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE);
7397                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7398                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7399                 gen_helper_lldt(cpu_env, s->tmp2_i32);
7400             }
7401             break;
7402         case 1: /* str */
7403             if (!PE(s) || VM86(s))
7404                 goto illegal_op;
7405             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7406                 break;
7407             }
7408             gen_svm_check_intercept(s, SVM_EXIT_TR_READ);
7409             tcg_gen_ld32u_tl(s->T0, cpu_env,
7410                              offsetof(CPUX86State, tr.selector));
7411             ot = mod == 3 ? dflag : MO_16;
7412             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7413             break;
7414         case 3: /* ltr */
7415             if (!PE(s) || VM86(s))
7416                 goto illegal_op;
7417             if (check_cpl0(s)) {
7418                 gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE);
7419                 gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7420                 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
7421                 gen_helper_ltr(cpu_env, s->tmp2_i32);
7422             }
7423             break;
7424         case 4: /* verr */
7425         case 5: /* verw */
7426             if (!PE(s) || VM86(s))
7427                 goto illegal_op;
7428             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7429             gen_update_cc_op(s);
7430             if (op == 4) {
7431                 gen_helper_verr(cpu_env, s->T0);
7432             } else {
7433                 gen_helper_verw(cpu_env, s->T0);
7434             }
7435             set_cc_op(s, CC_OP_EFLAGS);
7436             break;
7437         default:
7438             goto unknown_op;
7439         }
7440         break;
7441 
7442     case 0x101:
7443         modrm = x86_ldub_code(env, s);
7444         switch (modrm) {
7445         CASE_MODRM_MEM_OP(0): /* sgdt */
7446             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7447                 break;
7448             }
7449             gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ);
7450             gen_lea_modrm(env, s, modrm);
7451             tcg_gen_ld32u_tl(s->T0,
7452                              cpu_env, offsetof(CPUX86State, gdt.limit));
7453             gen_op_st_v(s, MO_16, s->T0, s->A0);
7454             gen_add_A0_im(s, 2);
7455             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7456             if (dflag == MO_16) {
7457                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7458             }
7459             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7460             break;
7461 
7462         case 0xc8: /* monitor */
7463             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7464                 goto illegal_op;
7465             }
7466             gen_update_cc_op(s);
7467             gen_jmp_im(s, pc_start - s->cs_base);
7468             tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7469             gen_extu(s->aflag, s->A0);
7470             gen_add_A0_ds_seg(s);
7471             gen_helper_monitor(cpu_env, s->A0);
7472             break;
7473 
7474         case 0xc9: /* mwait */
7475             if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) {
7476                 goto illegal_op;
7477             }
7478             gen_update_cc_op(s);
7479             gen_jmp_im(s, pc_start - s->cs_base);
7480             gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
7481             s->base.is_jmp = DISAS_NORETURN;
7482             break;
7483 
7484         case 0xca: /* clac */
7485             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7486                 || CPL(s) != 0) {
7487                 goto illegal_op;
7488             }
7489             gen_helper_clac(cpu_env);
7490             gen_jmp_im(s, s->pc - s->cs_base);
7491             gen_eob(s);
7492             break;
7493 
7494         case 0xcb: /* stac */
7495             if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)
7496                 || CPL(s) != 0) {
7497                 goto illegal_op;
7498             }
7499             gen_helper_stac(cpu_env);
7500             gen_jmp_im(s, s->pc - s->cs_base);
7501             gen_eob(s);
7502             break;
7503 
7504         CASE_MODRM_MEM_OP(1): /* sidt */
7505             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7506                 break;
7507             }
7508             gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ);
7509             gen_lea_modrm(env, s, modrm);
7510             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit));
7511             gen_op_st_v(s, MO_16, s->T0, s->A0);
7512             gen_add_A0_im(s, 2);
7513             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7514             if (dflag == MO_16) {
7515                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7516             }
7517             gen_op_st_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7518             break;
7519 
7520         case 0xd0: /* xgetbv */
7521             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7522                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7523                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7524                 goto illegal_op;
7525             }
7526             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7527             gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32);
7528             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7529             break;
7530 
7531         case 0xd1: /* xsetbv */
7532             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
7533                 || (s->prefix & (PREFIX_LOCK | PREFIX_DATA
7534                                  | PREFIX_REPZ | PREFIX_REPNZ))) {
7535                 goto illegal_op;
7536             }
7537             if (!check_cpl0(s)) {
7538                 break;
7539             }
7540             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7541                                   cpu_regs[R_EDX]);
7542             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7543             gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64);
7544             /* End TB because translation flags may change.  */
7545             gen_jmp_im(s, s->pc - s->cs_base);
7546             gen_eob(s);
7547             break;
7548 
7549         case 0xd8: /* VMRUN */
7550             if (!SVME(s) || !PE(s)) {
7551                 goto illegal_op;
7552             }
7553             if (!check_cpl0(s)) {
7554                 break;
7555             }
7556             gen_update_cc_op(s);
7557             gen_jmp_im(s, pc_start - s->cs_base);
7558             gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1),
7559                              tcg_const_i32(s->pc - pc_start));
7560             tcg_gen_exit_tb(NULL, 0);
7561             s->base.is_jmp = DISAS_NORETURN;
7562             break;
7563 
7564         case 0xd9: /* VMMCALL */
7565             if (!SVME(s)) {
7566                 goto illegal_op;
7567             }
7568             gen_update_cc_op(s);
7569             gen_jmp_im(s, pc_start - s->cs_base);
7570             gen_helper_vmmcall(cpu_env);
7571             break;
7572 
7573         case 0xda: /* VMLOAD */
7574             if (!SVME(s) || !PE(s)) {
7575                 goto illegal_op;
7576             }
7577             if (!check_cpl0(s)) {
7578                 break;
7579             }
7580             gen_update_cc_op(s);
7581             gen_jmp_im(s, pc_start - s->cs_base);
7582             gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1));
7583             break;
7584 
7585         case 0xdb: /* VMSAVE */
7586             if (!SVME(s) || !PE(s)) {
7587                 goto illegal_op;
7588             }
7589             if (!check_cpl0(s)) {
7590                 break;
7591             }
7592             gen_update_cc_op(s);
7593             gen_jmp_im(s, pc_start - s->cs_base);
7594             gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1));
7595             break;
7596 
7597         case 0xdc: /* STGI */
7598             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7599                 || !PE(s)) {
7600                 goto illegal_op;
7601             }
7602             if (!check_cpl0(s)) {
7603                 break;
7604             }
7605             gen_update_cc_op(s);
7606             gen_helper_stgi(cpu_env);
7607             gen_jmp_im(s, s->pc - s->cs_base);
7608             gen_eob(s);
7609             break;
7610 
7611         case 0xdd: /* CLGI */
7612             if (!SVME(s) || !PE(s)) {
7613                 goto illegal_op;
7614             }
7615             if (!check_cpl0(s)) {
7616                 break;
7617             }
7618             gen_update_cc_op(s);
7619             gen_jmp_im(s, pc_start - s->cs_base);
7620             gen_helper_clgi(cpu_env);
7621             break;
7622 
7623         case 0xde: /* SKINIT */
7624             if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT))
7625                 || !PE(s)) {
7626                 goto illegal_op;
7627             }
7628             gen_svm_check_intercept(s, SVM_EXIT_SKINIT);
7629             /* If not intercepted, not implemented -- raise #UD. */
7630             goto illegal_op;
7631 
7632         case 0xdf: /* INVLPGA */
7633             if (!SVME(s) || !PE(s)) {
7634                 goto illegal_op;
7635             }
7636             if (!check_cpl0(s)) {
7637                 break;
7638             }
7639             gen_svm_check_intercept(s, SVM_EXIT_INVLPGA);
7640             if (s->aflag == MO_64) {
7641                 tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]);
7642             } else {
7643                 tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]);
7644             }
7645             gen_helper_flush_page(cpu_env, s->A0);
7646             gen_jmp_im(s, s->pc - s->cs_base);
7647             gen_eob(s);
7648             break;
7649 
7650         CASE_MODRM_MEM_OP(2): /* lgdt */
7651             if (!check_cpl0(s)) {
7652                 break;
7653             }
7654             gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE);
7655             gen_lea_modrm(env, s, modrm);
7656             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7657             gen_add_A0_im(s, 2);
7658             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7659             if (dflag == MO_16) {
7660                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7661             }
7662             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base));
7663             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit));
7664             break;
7665 
7666         CASE_MODRM_MEM_OP(3): /* lidt */
7667             if (!check_cpl0(s)) {
7668                 break;
7669             }
7670             gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE);
7671             gen_lea_modrm(env, s, modrm);
7672             gen_op_ld_v(s, MO_16, s->T1, s->A0);
7673             gen_add_A0_im(s, 2);
7674             gen_op_ld_v(s, CODE64(s) + MO_32, s->T0, s->A0);
7675             if (dflag == MO_16) {
7676                 tcg_gen_andi_tl(s->T0, s->T0, 0xffffff);
7677             }
7678             tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base));
7679             tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit));
7680             break;
7681 
7682         CASE_MODRM_OP(4): /* smsw */
7683             if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) {
7684                 break;
7685             }
7686             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0);
7687             tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0]));
7688             /*
7689              * In 32-bit mode, the higher 16 bits of the destination
7690              * register are undefined.  In practice CR0[31:0] is stored
7691              * just like in 64-bit mode.
7692              */
7693             mod = (modrm >> 6) & 3;
7694             ot = (mod != 3 ? MO_16 : s->dflag);
7695             gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
7696             break;
7697         case 0xee: /* rdpkru */
7698             if (prefixes & PREFIX_LOCK) {
7699                 goto illegal_op;
7700             }
7701             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7702             gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32);
7703             tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64);
7704             break;
7705         case 0xef: /* wrpkru */
7706             if (prefixes & PREFIX_LOCK) {
7707                 goto illegal_op;
7708             }
7709             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
7710                                   cpu_regs[R_EDX]);
7711             tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]);
7712             gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64);
7713             break;
7714 
7715         CASE_MODRM_OP(6): /* lmsw */
7716             if (!check_cpl0(s)) {
7717                 break;
7718             }
7719             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
7720             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7721             /*
7722              * Only the 4 lower bits of CR0 are modified.
7723              * PE cannot be set to zero if already set to one.
7724              */
7725             tcg_gen_ld_tl(s->T1, cpu_env, offsetof(CPUX86State, cr[0]));
7726             tcg_gen_andi_tl(s->T0, s->T0, 0xf);
7727             tcg_gen_andi_tl(s->T1, s->T1, ~0xe);
7728             tcg_gen_or_tl(s->T0, s->T0, s->T1);
7729             gen_helper_write_crN(cpu_env, tcg_constant_i32(0), s->T0);
7730             gen_jmp_im(s, s->pc - s->cs_base);
7731             gen_eob(s);
7732             break;
7733 
7734         CASE_MODRM_MEM_OP(7): /* invlpg */
7735             if (!check_cpl0(s)) {
7736                 break;
7737             }
7738             gen_svm_check_intercept(s, SVM_EXIT_INVLPG);
7739             gen_lea_modrm(env, s, modrm);
7740             gen_helper_flush_page(cpu_env, s->A0);
7741             gen_jmp_im(s, s->pc - s->cs_base);
7742             gen_eob(s);
7743             break;
7744 
7745         case 0xf8: /* swapgs */
7746 #ifdef TARGET_X86_64
7747             if (CODE64(s)) {
7748                 if (check_cpl0(s)) {
7749                     tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]);
7750                     tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env,
7751                                   offsetof(CPUX86State, kernelgsbase));
7752                     tcg_gen_st_tl(s->T0, cpu_env,
7753                                   offsetof(CPUX86State, kernelgsbase));
7754                 }
7755                 break;
7756             }
7757 #endif
7758             goto illegal_op;
7759 
7760         case 0xf9: /* rdtscp */
7761             if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP)) {
7762                 goto illegal_op;
7763             }
7764             gen_update_cc_op(s);
7765             gen_jmp_im(s, pc_start - s->cs_base);
7766             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7767                 gen_io_start();
7768             }
7769             gen_helper_rdtscp(cpu_env);
7770             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
7771                 gen_jmp(s, s->pc - s->cs_base);
7772             }
7773             break;
7774 
7775         default:
7776             goto unknown_op;
7777         }
7778         break;
7779 
7780     case 0x108: /* invd */
7781     case 0x109: /* wbinvd */
7782         if (check_cpl0(s)) {
7783             gen_svm_check_intercept(s, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD);
7784             /* nothing to do */
7785         }
7786         break;
7787     case 0x63: /* arpl or movslS (x86_64) */
7788 #ifdef TARGET_X86_64
7789         if (CODE64(s)) {
7790             int d_ot;
7791             /* d_ot is the size of destination */
7792             d_ot = dflag;
7793 
7794             modrm = x86_ldub_code(env, s);
7795             reg = ((modrm >> 3) & 7) | REX_R(s);
7796             mod = (modrm >> 6) & 3;
7797             rm = (modrm & 7) | REX_B(s);
7798 
7799             if (mod == 3) {
7800                 gen_op_mov_v_reg(s, MO_32, s->T0, rm);
7801                 /* sign extend */
7802                 if (d_ot == MO_64) {
7803                     tcg_gen_ext32s_tl(s->T0, s->T0);
7804                 }
7805                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7806             } else {
7807                 gen_lea_modrm(env, s, modrm);
7808                 gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0);
7809                 gen_op_mov_reg_v(s, d_ot, reg, s->T0);
7810             }
7811         } else
7812 #endif
7813         {
7814             TCGLabel *label1;
7815             TCGv t0, t1, t2, a0;
7816 
7817             if (!PE(s) || VM86(s))
7818                 goto illegal_op;
7819             t0 = tcg_temp_local_new();
7820             t1 = tcg_temp_local_new();
7821             t2 = tcg_temp_local_new();
7822             ot = MO_16;
7823             modrm = x86_ldub_code(env, s);
7824             reg = (modrm >> 3) & 7;
7825             mod = (modrm >> 6) & 3;
7826             rm = modrm & 7;
7827             if (mod != 3) {
7828                 gen_lea_modrm(env, s, modrm);
7829                 gen_op_ld_v(s, ot, t0, s->A0);
7830                 a0 = tcg_temp_local_new();
7831                 tcg_gen_mov_tl(a0, s->A0);
7832             } else {
7833                 gen_op_mov_v_reg(s, ot, t0, rm);
7834                 a0 = NULL;
7835             }
7836             gen_op_mov_v_reg(s, ot, t1, reg);
7837             tcg_gen_andi_tl(s->tmp0, t0, 3);
7838             tcg_gen_andi_tl(t1, t1, 3);
7839             tcg_gen_movi_tl(t2, 0);
7840             label1 = gen_new_label();
7841             tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1);
7842             tcg_gen_andi_tl(t0, t0, ~3);
7843             tcg_gen_or_tl(t0, t0, t1);
7844             tcg_gen_movi_tl(t2, CC_Z);
7845             gen_set_label(label1);
7846             if (mod != 3) {
7847                 gen_op_st_v(s, ot, t0, a0);
7848                 tcg_temp_free(a0);
7849            } else {
7850                 gen_op_mov_reg_v(s, ot, rm, t0);
7851             }
7852             gen_compute_eflags(s);
7853             tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z);
7854             tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2);
7855             tcg_temp_free(t0);
7856             tcg_temp_free(t1);
7857             tcg_temp_free(t2);
7858         }
7859         break;
7860     case 0x102: /* lar */
7861     case 0x103: /* lsl */
7862         {
7863             TCGLabel *label1;
7864             TCGv t0;
7865             if (!PE(s) || VM86(s))
7866                 goto illegal_op;
7867             ot = dflag != MO_16 ? MO_32 : MO_16;
7868             modrm = x86_ldub_code(env, s);
7869             reg = ((modrm >> 3) & 7) | REX_R(s);
7870             gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
7871             t0 = tcg_temp_local_new();
7872             gen_update_cc_op(s);
7873             if (b == 0x102) {
7874                 gen_helper_lar(t0, cpu_env, s->T0);
7875             } else {
7876                 gen_helper_lsl(t0, cpu_env, s->T0);
7877             }
7878             tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
7879             label1 = gen_new_label();
7880             tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
7881             gen_op_mov_reg_v(s, ot, reg, t0);
7882             gen_set_label(label1);
7883             set_cc_op(s, CC_OP_EFLAGS);
7884             tcg_temp_free(t0);
7885         }
7886         break;
7887     case 0x118:
7888         modrm = x86_ldub_code(env, s);
7889         mod = (modrm >> 6) & 3;
7890         op = (modrm >> 3) & 7;
7891         switch(op) {
7892         case 0: /* prefetchnta */
7893         case 1: /* prefetchnt0 */
7894         case 2: /* prefetchnt0 */
7895         case 3: /* prefetchnt0 */
7896             if (mod == 3)
7897                 goto illegal_op;
7898             gen_nop_modrm(env, s, modrm);
7899             /* nothing more to do */
7900             break;
7901         default: /* nop (multi byte) */
7902             gen_nop_modrm(env, s, modrm);
7903             break;
7904         }
7905         break;
7906     case 0x11a:
7907         modrm = x86_ldub_code(env, s);
7908         if (s->flags & HF_MPX_EN_MASK) {
7909             mod = (modrm >> 6) & 3;
7910             reg = ((modrm >> 3) & 7) | REX_R(s);
7911             if (prefixes & PREFIX_REPZ) {
7912                 /* bndcl */
7913                 if (reg >= 4
7914                     || (prefixes & PREFIX_LOCK)
7915                     || s->aflag == MO_16) {
7916                     goto illegal_op;
7917                 }
7918                 gen_bndck(env, s, modrm, TCG_COND_LTU, cpu_bndl[reg]);
7919             } else if (prefixes & PREFIX_REPNZ) {
7920                 /* bndcu */
7921                 if (reg >= 4
7922                     || (prefixes & PREFIX_LOCK)
7923                     || s->aflag == MO_16) {
7924                     goto illegal_op;
7925                 }
7926                 TCGv_i64 notu = tcg_temp_new_i64();
7927                 tcg_gen_not_i64(notu, cpu_bndu[reg]);
7928                 gen_bndck(env, s, modrm, TCG_COND_GTU, notu);
7929                 tcg_temp_free_i64(notu);
7930             } else if (prefixes & PREFIX_DATA) {
7931                 /* bndmov -- from reg/mem */
7932                 if (reg >= 4 || s->aflag == MO_16) {
7933                     goto illegal_op;
7934                 }
7935                 if (mod == 3) {
7936                     int reg2 = (modrm & 7) | REX_B(s);
7937                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
7938                         goto illegal_op;
7939                     }
7940                     if (s->flags & HF_MPX_IU_MASK) {
7941                         tcg_gen_mov_i64(cpu_bndl[reg], cpu_bndl[reg2]);
7942                         tcg_gen_mov_i64(cpu_bndu[reg], cpu_bndu[reg2]);
7943                     }
7944                 } else {
7945                     gen_lea_modrm(env, s, modrm);
7946                     if (CODE64(s)) {
7947                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7948                                             s->mem_index, MO_LEUQ);
7949                         tcg_gen_addi_tl(s->A0, s->A0, 8);
7950                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7951                                             s->mem_index, MO_LEUQ);
7952                     } else {
7953                         tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0,
7954                                             s->mem_index, MO_LEUL);
7955                         tcg_gen_addi_tl(s->A0, s->A0, 4);
7956                         tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0,
7957                                             s->mem_index, MO_LEUL);
7958                     }
7959                     /* bnd registers are now in-use */
7960                     gen_set_hflag(s, HF_MPX_IU_MASK);
7961                 }
7962             } else if (mod != 3) {
7963                 /* bndldx */
7964                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
7965                 if (reg >= 4
7966                     || (prefixes & PREFIX_LOCK)
7967                     || s->aflag == MO_16
7968                     || a.base < -1) {
7969                     goto illegal_op;
7970                 }
7971                 if (a.base >= 0) {
7972                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
7973                 } else {
7974                     tcg_gen_movi_tl(s->A0, 0);
7975                 }
7976                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
7977                 if (a.index >= 0) {
7978                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
7979                 } else {
7980                     tcg_gen_movi_tl(s->T0, 0);
7981                 }
7982                 if (CODE64(s)) {
7983                     gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0);
7984                     tcg_gen_ld_i64(cpu_bndu[reg], cpu_env,
7985                                    offsetof(CPUX86State, mmx_t0.MMX_Q(0)));
7986                 } else {
7987                     gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0);
7988                     tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]);
7989                     tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32);
7990                 }
7991                 gen_set_hflag(s, HF_MPX_IU_MASK);
7992             }
7993         }
7994         gen_nop_modrm(env, s, modrm);
7995         break;
7996     case 0x11b:
7997         modrm = x86_ldub_code(env, s);
7998         if (s->flags & HF_MPX_EN_MASK) {
7999             mod = (modrm >> 6) & 3;
8000             reg = ((modrm >> 3) & 7) | REX_R(s);
8001             if (mod != 3 && (prefixes & PREFIX_REPZ)) {
8002                 /* bndmk */
8003                 if (reg >= 4
8004                     || (prefixes & PREFIX_LOCK)
8005                     || s->aflag == MO_16) {
8006                     goto illegal_op;
8007                 }
8008                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8009                 if (a.base >= 0) {
8010                     tcg_gen_extu_tl_i64(cpu_bndl[reg], cpu_regs[a.base]);
8011                     if (!CODE64(s)) {
8012                         tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndl[reg]);
8013                     }
8014                 } else if (a.base == -1) {
8015                     /* no base register has lower bound of 0 */
8016                     tcg_gen_movi_i64(cpu_bndl[reg], 0);
8017                 } else {
8018                     /* rip-relative generates #ud */
8019                     goto illegal_op;
8020                 }
8021                 tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a));
8022                 if (!CODE64(s)) {
8023                     tcg_gen_ext32u_tl(s->A0, s->A0);
8024                 }
8025                 tcg_gen_extu_tl_i64(cpu_bndu[reg], s->A0);
8026                 /* bnd registers are now in-use */
8027                 gen_set_hflag(s, HF_MPX_IU_MASK);
8028                 break;
8029             } else if (prefixes & PREFIX_REPNZ) {
8030                 /* bndcn */
8031                 if (reg >= 4
8032                     || (prefixes & PREFIX_LOCK)
8033                     || s->aflag == MO_16) {
8034                     goto illegal_op;
8035                 }
8036                 gen_bndck(env, s, modrm, TCG_COND_GTU, cpu_bndu[reg]);
8037             } else if (prefixes & PREFIX_DATA) {
8038                 /* bndmov -- to reg/mem */
8039                 if (reg >= 4 || s->aflag == MO_16) {
8040                     goto illegal_op;
8041                 }
8042                 if (mod == 3) {
8043                     int reg2 = (modrm & 7) | REX_B(s);
8044                     if (reg2 >= 4 || (prefixes & PREFIX_LOCK)) {
8045                         goto illegal_op;
8046                     }
8047                     if (s->flags & HF_MPX_IU_MASK) {
8048                         tcg_gen_mov_i64(cpu_bndl[reg2], cpu_bndl[reg]);
8049                         tcg_gen_mov_i64(cpu_bndu[reg2], cpu_bndu[reg]);
8050                     }
8051                 } else {
8052                     gen_lea_modrm(env, s, modrm);
8053                     if (CODE64(s)) {
8054                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8055                                             s->mem_index, MO_LEUQ);
8056                         tcg_gen_addi_tl(s->A0, s->A0, 8);
8057                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8058                                             s->mem_index, MO_LEUQ);
8059                     } else {
8060                         tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0,
8061                                             s->mem_index, MO_LEUL);
8062                         tcg_gen_addi_tl(s->A0, s->A0, 4);
8063                         tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0,
8064                                             s->mem_index, MO_LEUL);
8065                     }
8066                 }
8067             } else if (mod != 3) {
8068                 /* bndstx */
8069                 AddressParts a = gen_lea_modrm_0(env, s, modrm);
8070                 if (reg >= 4
8071                     || (prefixes & PREFIX_LOCK)
8072                     || s->aflag == MO_16
8073                     || a.base < -1) {
8074                     goto illegal_op;
8075                 }
8076                 if (a.base >= 0) {
8077                     tcg_gen_addi_tl(s->A0, cpu_regs[a.base], a.disp);
8078                 } else {
8079                     tcg_gen_movi_tl(s->A0, 0);
8080                 }
8081                 gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override);
8082                 if (a.index >= 0) {
8083                     tcg_gen_mov_tl(s->T0, cpu_regs[a.index]);
8084                 } else {
8085                     tcg_gen_movi_tl(s->T0, 0);
8086                 }
8087                 if (CODE64(s)) {
8088                     gen_helper_bndstx64(cpu_env, s->A0, s->T0,
8089                                         cpu_bndl[reg], cpu_bndu[reg]);
8090                 } else {
8091                     gen_helper_bndstx32(cpu_env, s->A0, s->T0,
8092                                         cpu_bndl[reg], cpu_bndu[reg]);
8093                 }
8094             }
8095         }
8096         gen_nop_modrm(env, s, modrm);
8097         break;
8098     case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */
8099         modrm = x86_ldub_code(env, s);
8100         gen_nop_modrm(env, s, modrm);
8101         break;
8102 
8103     case 0x120: /* mov reg, crN */
8104     case 0x122: /* mov crN, reg */
8105         if (!check_cpl0(s)) {
8106             break;
8107         }
8108         modrm = x86_ldub_code(env, s);
8109         /*
8110          * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8111          * AMD documentation (24594.pdf) and testing of Intel 386 and 486
8112          * processors all show that the mod bits are assumed to be 1's,
8113          * regardless of actual values.
8114          */
8115         rm = (modrm & 7) | REX_B(s);
8116         reg = ((modrm >> 3) & 7) | REX_R(s);
8117         switch (reg) {
8118         case 0:
8119             if ((prefixes & PREFIX_LOCK) &&
8120                 (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
8121                 reg = 8;
8122             }
8123             break;
8124         case 2:
8125         case 3:
8126         case 4:
8127         case 8:
8128             break;
8129         default:
8130             goto unknown_op;
8131         }
8132         ot  = (CODE64(s) ? MO_64 : MO_32);
8133 
8134         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8135             gen_io_start();
8136         }
8137         if (b & 2) {
8138             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
8139             gen_op_mov_v_reg(s, ot, s->T0, rm);
8140             gen_helper_write_crN(cpu_env, tcg_constant_i32(reg), s->T0);
8141             gen_jmp_im(s, s->pc - s->cs_base);
8142             gen_eob(s);
8143         } else {
8144             gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
8145             gen_helper_read_crN(s->T0, cpu_env, tcg_constant_i32(reg));
8146             gen_op_mov_reg_v(s, ot, rm, s->T0);
8147             if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
8148                 gen_jmp(s, s->pc - s->cs_base);
8149             }
8150         }
8151         break;
8152 
8153     case 0x121: /* mov reg, drN */
8154     case 0x123: /* mov drN, reg */
8155         if (check_cpl0(s)) {
8156             modrm = x86_ldub_code(env, s);
8157             /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
8158              * AMD documentation (24594.pdf) and testing of
8159              * intel 386 and 486 processors all show that the mod bits
8160              * are assumed to be 1's, regardless of actual values.
8161              */
8162             rm = (modrm & 7) | REX_B(s);
8163             reg = ((modrm >> 3) & 7) | REX_R(s);
8164             if (CODE64(s))
8165                 ot = MO_64;
8166             else
8167                 ot = MO_32;
8168             if (reg >= 8) {
8169                 goto illegal_op;
8170             }
8171             if (b & 2) {
8172                 gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
8173                 gen_op_mov_v_reg(s, ot, s->T0, rm);
8174                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8175                 gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0);
8176                 gen_jmp_im(s, s->pc - s->cs_base);
8177                 gen_eob(s);
8178             } else {
8179                 gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
8180                 tcg_gen_movi_i32(s->tmp2_i32, reg);
8181                 gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32);
8182                 gen_op_mov_reg_v(s, ot, rm, s->T0);
8183             }
8184         }
8185         break;
8186     case 0x106: /* clts */
8187         if (check_cpl0(s)) {
8188             gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
8189             gen_helper_clts(cpu_env);
8190             /* abort block because static cpu state changed */
8191             gen_jmp_im(s, s->pc - s->cs_base);
8192             gen_eob(s);
8193         }
8194         break;
8195     /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
8196     case 0x1c3: /* MOVNTI reg, mem */
8197         if (!(s->cpuid_features & CPUID_SSE2))
8198             goto illegal_op;
8199         ot = mo_64_32(dflag);
8200         modrm = x86_ldub_code(env, s);
8201         mod = (modrm >> 6) & 3;
8202         if (mod == 3)
8203             goto illegal_op;
8204         reg = ((modrm >> 3) & 7) | REX_R(s);
8205         /* generate a generic store */
8206         gen_ldst_modrm(env, s, modrm, ot, reg, 1);
8207         break;
8208     case 0x1ae:
8209         modrm = x86_ldub_code(env, s);
8210         switch (modrm) {
8211         CASE_MODRM_MEM_OP(0): /* fxsave */
8212             if (!(s->cpuid_features & CPUID_FXSR)
8213                 || (prefixes & PREFIX_LOCK)) {
8214                 goto illegal_op;
8215             }
8216             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8217                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8218                 break;
8219             }
8220             gen_lea_modrm(env, s, modrm);
8221             gen_helper_fxsave(cpu_env, s->A0);
8222             break;
8223 
8224         CASE_MODRM_MEM_OP(1): /* fxrstor */
8225             if (!(s->cpuid_features & CPUID_FXSR)
8226                 || (prefixes & PREFIX_LOCK)) {
8227                 goto illegal_op;
8228             }
8229             if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
8230                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8231                 break;
8232             }
8233             gen_lea_modrm(env, s, modrm);
8234             gen_helper_fxrstor(cpu_env, s->A0);
8235             break;
8236 
8237         CASE_MODRM_MEM_OP(2): /* ldmxcsr */
8238             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8239                 goto illegal_op;
8240             }
8241             if (s->flags & HF_TS_MASK) {
8242                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8243                 break;
8244             }
8245             gen_lea_modrm(env, s, modrm);
8246             tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
8247             gen_helper_ldmxcsr(cpu_env, s->tmp2_i32);
8248             break;
8249 
8250         CASE_MODRM_MEM_OP(3): /* stmxcsr */
8251             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
8252                 goto illegal_op;
8253             }
8254             if (s->flags & HF_TS_MASK) {
8255                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
8256                 break;
8257             }
8258             gen_helper_update_mxcsr(cpu_env);
8259             gen_lea_modrm(env, s, modrm);
8260             tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr));
8261             gen_op_st_v(s, MO_32, s->T0, s->A0);
8262             break;
8263 
8264         CASE_MODRM_MEM_OP(4): /* xsave */
8265             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8266                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8267                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8268                 goto illegal_op;
8269             }
8270             gen_lea_modrm(env, s, modrm);
8271             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8272                                   cpu_regs[R_EDX]);
8273             gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64);
8274             break;
8275 
8276         CASE_MODRM_MEM_OP(5): /* xrstor */
8277             if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8278                 || (prefixes & (PREFIX_LOCK | PREFIX_DATA
8279                                 | PREFIX_REPZ | PREFIX_REPNZ))) {
8280                 goto illegal_op;
8281             }
8282             gen_lea_modrm(env, s, modrm);
8283             tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8284                                   cpu_regs[R_EDX]);
8285             gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64);
8286             /* XRSTOR is how MPX is enabled, which changes how
8287                we translate.  Thus we need to end the TB.  */
8288             gen_update_cc_op(s);
8289             gen_jmp_im(s, s->pc - s->cs_base);
8290             gen_eob(s);
8291             break;
8292 
8293         CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
8294             if (prefixes & PREFIX_LOCK) {
8295                 goto illegal_op;
8296             }
8297             if (prefixes & PREFIX_DATA) {
8298                 /* clwb */
8299                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
8300                     goto illegal_op;
8301                 }
8302                 gen_nop_modrm(env, s, modrm);
8303             } else {
8304                 /* xsaveopt */
8305                 if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
8306                     || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
8307                     || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
8308                     goto illegal_op;
8309                 }
8310                 gen_lea_modrm(env, s, modrm);
8311                 tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
8312                                       cpu_regs[R_EDX]);
8313                 gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64);
8314             }
8315             break;
8316 
8317         CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
8318             if (prefixes & PREFIX_LOCK) {
8319                 goto illegal_op;
8320             }
8321             if (prefixes & PREFIX_DATA) {
8322                 /* clflushopt */
8323                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
8324                     goto illegal_op;
8325                 }
8326             } else {
8327                 /* clflush */
8328                 if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
8329                     || !(s->cpuid_features & CPUID_CLFLUSH)) {
8330                     goto illegal_op;
8331                 }
8332             }
8333             gen_nop_modrm(env, s, modrm);
8334             break;
8335 
8336         case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
8337         case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
8338         case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
8339         case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
8340             if (CODE64(s)
8341                 && (prefixes & PREFIX_REPZ)
8342                 && !(prefixes & PREFIX_LOCK)
8343                 && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
8344                 TCGv base, treg, src, dst;
8345 
8346                 /* Preserve hflags bits by testing CR4 at runtime.  */
8347                 tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
8348                 gen_helper_cr4_testbit(cpu_env, s->tmp2_i32);
8349 
8350                 base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
8351                 treg = cpu_regs[(modrm & 7) | REX_B(s)];
8352 
8353                 if (modrm & 0x10) {
8354                     /* wr*base */
8355                     dst = base, src = treg;
8356                 } else {
8357                     /* rd*base */
8358                     dst = treg, src = base;
8359                 }
8360 
8361                 if (s->dflag == MO_32) {
8362                     tcg_gen_ext32u_tl(dst, src);
8363                 } else {
8364                     tcg_gen_mov_tl(dst, src);
8365                 }
8366                 break;
8367             }
8368             goto unknown_op;
8369 
8370         case 0xf8: /* sfence / pcommit */
8371             if (prefixes & PREFIX_DATA) {
8372                 /* pcommit */
8373                 if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT)
8374                     || (prefixes & PREFIX_LOCK)) {
8375                     goto illegal_op;
8376                 }
8377                 break;
8378             }
8379             /* fallthru */
8380         case 0xf9 ... 0xff: /* sfence */
8381             if (!(s->cpuid_features & CPUID_SSE)
8382                 || (prefixes & PREFIX_LOCK)) {
8383                 goto illegal_op;
8384             }
8385             tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
8386             break;
8387         case 0xe8 ... 0xef: /* lfence */
8388             if (!(s->cpuid_features & CPUID_SSE)
8389                 || (prefixes & PREFIX_LOCK)) {
8390                 goto illegal_op;
8391             }
8392             tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
8393             break;
8394         case 0xf0 ... 0xf7: /* mfence */
8395             if (!(s->cpuid_features & CPUID_SSE2)
8396                 || (prefixes & PREFIX_LOCK)) {
8397                 goto illegal_op;
8398             }
8399             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8400             break;
8401 
8402         default:
8403             goto unknown_op;
8404         }
8405         break;
8406 
8407     case 0x10d: /* 3DNow! prefetch(w) */
8408         modrm = x86_ldub_code(env, s);
8409         mod = (modrm >> 6) & 3;
8410         if (mod == 3)
8411             goto illegal_op;
8412         gen_nop_modrm(env, s, modrm);
8413         break;
8414     case 0x1aa: /* rsm */
8415         gen_svm_check_intercept(s, SVM_EXIT_RSM);
8416         if (!(s->flags & HF_SMM_MASK))
8417             goto illegal_op;
8418 #ifdef CONFIG_USER_ONLY
8419         /* we should not be in SMM mode */
8420         g_assert_not_reached();
8421 #else
8422         gen_update_cc_op(s);
8423         gen_jmp_im(s, s->pc - s->cs_base);
8424         gen_helper_rsm(cpu_env);
8425 #endif /* CONFIG_USER_ONLY */
8426         gen_eob(s);
8427         break;
8428     case 0x1b8: /* SSE4.2 popcnt */
8429         if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
8430              PREFIX_REPZ)
8431             goto illegal_op;
8432         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
8433             goto illegal_op;
8434 
8435         modrm = x86_ldub_code(env, s);
8436         reg = ((modrm >> 3) & 7) | REX_R(s);
8437 
8438         if (s->prefix & PREFIX_DATA) {
8439             ot = MO_16;
8440         } else {
8441             ot = mo_64_32(dflag);
8442         }
8443 
8444         gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
8445         gen_extu(ot, s->T0);
8446         tcg_gen_mov_tl(cpu_cc_src, s->T0);
8447         tcg_gen_ctpop_tl(s->T0, s->T0);
8448         gen_op_mov_reg_v(s, ot, reg, s->T0);
8449 
8450         set_cc_op(s, CC_OP_POPCNT);
8451         break;
8452     case 0x10e ... 0x10f:
8453         /* 3DNow! instructions, ignore prefixes */
8454         s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
8455         /* fall through */
8456     case 0x110 ... 0x117:
8457     case 0x128 ... 0x12f:
8458     case 0x138 ... 0x13a:
8459     case 0x150 ... 0x179:
8460     case 0x17c ... 0x17f:
8461     case 0x1c2:
8462     case 0x1c4 ... 0x1c6:
8463     case 0x1d0 ... 0x1fe:
8464         gen_sse(env, s, b, pc_start);
8465         break;
8466     default:
8467         goto unknown_op;
8468     }
8469     return s->pc;
8470  illegal_op:
8471     gen_illegal_opcode(s);
8472     return s->pc;
8473  unknown_op:
8474     gen_unknown_opcode(env, s);
8475     return s->pc;
8476 }
8477 
8478 void tcg_x86_init(void)
8479 {
8480     static const char reg_names[CPU_NB_REGS][4] = {
8481 #ifdef TARGET_X86_64
8482         [R_EAX] = "rax",
8483         [R_EBX] = "rbx",
8484         [R_ECX] = "rcx",
8485         [R_EDX] = "rdx",
8486         [R_ESI] = "rsi",
8487         [R_EDI] = "rdi",
8488         [R_EBP] = "rbp",
8489         [R_ESP] = "rsp",
8490         [8]  = "r8",
8491         [9]  = "r9",
8492         [10] = "r10",
8493         [11] = "r11",
8494         [12] = "r12",
8495         [13] = "r13",
8496         [14] = "r14",
8497         [15] = "r15",
8498 #else
8499         [R_EAX] = "eax",
8500         [R_EBX] = "ebx",
8501         [R_ECX] = "ecx",
8502         [R_EDX] = "edx",
8503         [R_ESI] = "esi",
8504         [R_EDI] = "edi",
8505         [R_EBP] = "ebp",
8506         [R_ESP] = "esp",
8507 #endif
8508     };
8509     static const char seg_base_names[6][8] = {
8510         [R_CS] = "cs_base",
8511         [R_DS] = "ds_base",
8512         [R_ES] = "es_base",
8513         [R_FS] = "fs_base",
8514         [R_GS] = "gs_base",
8515         [R_SS] = "ss_base",
8516     };
8517     static const char bnd_regl_names[4][8] = {
8518         "bnd0_lb", "bnd1_lb", "bnd2_lb", "bnd3_lb"
8519     };
8520     static const char bnd_regu_names[4][8] = {
8521         "bnd0_ub", "bnd1_ub", "bnd2_ub", "bnd3_ub"
8522     };
8523     int i;
8524 
8525     cpu_cc_op = tcg_global_mem_new_i32(cpu_env,
8526                                        offsetof(CPUX86State, cc_op), "cc_op");
8527     cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst),
8528                                     "cc_dst");
8529     cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src),
8530                                     "cc_src");
8531     cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2),
8532                                      "cc_src2");
8533 
8534     for (i = 0; i < CPU_NB_REGS; ++i) {
8535         cpu_regs[i] = tcg_global_mem_new(cpu_env,
8536                                          offsetof(CPUX86State, regs[i]),
8537                                          reg_names[i]);
8538     }
8539 
8540     for (i = 0; i < 6; ++i) {
8541         cpu_seg_base[i]
8542             = tcg_global_mem_new(cpu_env,
8543                                  offsetof(CPUX86State, segs[i].base),
8544                                  seg_base_names[i]);
8545     }
8546 
8547     for (i = 0; i < 4; ++i) {
8548         cpu_bndl[i]
8549             = tcg_global_mem_new_i64(cpu_env,
8550                                      offsetof(CPUX86State, bnd_regs[i].lb),
8551                                      bnd_regl_names[i]);
8552         cpu_bndu[i]
8553             = tcg_global_mem_new_i64(cpu_env,
8554                                      offsetof(CPUX86State, bnd_regs[i].ub),
8555                                      bnd_regu_names[i]);
8556     }
8557 }
8558 
8559 static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu)
8560 {
8561     DisasContext *dc = container_of(dcbase, DisasContext, base);
8562     CPUX86State *env = cpu->env_ptr;
8563     uint32_t flags = dc->base.tb->flags;
8564     uint32_t cflags = tb_cflags(dc->base.tb);
8565     int cpl = (flags >> HF_CPL_SHIFT) & 3;
8566     int iopl = (flags >> IOPL_SHIFT) & 3;
8567 
8568     dc->cs_base = dc->base.tb->cs_base;
8569     dc->flags = flags;
8570 #ifndef CONFIG_USER_ONLY
8571     dc->cpl = cpl;
8572     dc->iopl = iopl;
8573 #endif
8574 
8575     /* We make some simplifying assumptions; validate they're correct. */
8576     g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0));
8577     g_assert(CPL(dc) == cpl);
8578     g_assert(IOPL(dc) == iopl);
8579     g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0));
8580     g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0));
8581     g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0));
8582     g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0));
8583     g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0));
8584     g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0));
8585     g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0));
8586     g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0));
8587 
8588     dc->cc_op = CC_OP_DYNAMIC;
8589     dc->cc_op_dirty = false;
8590     dc->popl_esp_hack = 0;
8591     /* select memory access functions */
8592     dc->mem_index = 0;
8593 #ifdef CONFIG_SOFTMMU
8594     dc->mem_index = cpu_mmu_index(env, false);
8595 #endif
8596     dc->cpuid_features = env->features[FEAT_1_EDX];
8597     dc->cpuid_ext_features = env->features[FEAT_1_ECX];
8598     dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
8599     dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
8600     dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
8601     dc->cpuid_xsave_features = env->features[FEAT_XSAVE];
8602     dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) ||
8603                     (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)));
8604     /*
8605      * If jmp_opt, we want to handle each string instruction individually.
8606      * For icount also disable repz optimization so that each iteration
8607      * is accounted separately.
8608      */
8609     dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT);
8610 
8611     dc->T0 = tcg_temp_new();
8612     dc->T1 = tcg_temp_new();
8613     dc->A0 = tcg_temp_new();
8614 
8615     dc->tmp0 = tcg_temp_new();
8616     dc->tmp1_i64 = tcg_temp_new_i64();
8617     dc->tmp2_i32 = tcg_temp_new_i32();
8618     dc->tmp3_i32 = tcg_temp_new_i32();
8619     dc->tmp4 = tcg_temp_new();
8620     dc->ptr0 = tcg_temp_new_ptr();
8621     dc->ptr1 = tcg_temp_new_ptr();
8622     dc->cc_srcT = tcg_temp_local_new();
8623 }
8624 
8625 static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu)
8626 {
8627 }
8628 
8629 static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
8630 {
8631     DisasContext *dc = container_of(dcbase, DisasContext, base);
8632 
8633     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
8634 }
8635 
8636 static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
8637 {
8638     DisasContext *dc = container_of(dcbase, DisasContext, base);
8639     target_ulong pc_next;
8640 
8641 #ifdef TARGET_VSYSCALL_PAGE
8642     /*
8643      * Detect entry into the vsyscall page and invoke the syscall.
8644      */
8645     if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) {
8646         gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next);
8647         dc->base.pc_next = dc->pc + 1;
8648         return;
8649     }
8650 #endif
8651 
8652     pc_next = disas_insn(dc, cpu);
8653 
8654     if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
8655         /* if single step mode, we generate only one instruction and
8656            generate an exception */
8657         /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
8658            the flag and abort the translation to give the irqs a
8659            chance to happen */
8660         dc->base.is_jmp = DISAS_TOO_MANY;
8661     } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
8662                && ((pc_next & TARGET_PAGE_MASK)
8663                    != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
8664                        & TARGET_PAGE_MASK)
8665                    || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
8666         /* Do not cross the boundary of the pages in icount mode,
8667            it can cause an exception. Do it only when boundary is
8668            crossed by the first instruction in the block.
8669            If current instruction already crossed the bound - it's ok,
8670            because an exception hasn't stopped this code.
8671          */
8672         dc->base.is_jmp = DISAS_TOO_MANY;
8673     } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
8674         dc->base.is_jmp = DISAS_TOO_MANY;
8675     }
8676 
8677     dc->base.pc_next = pc_next;
8678 }
8679 
8680 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
8681 {
8682     DisasContext *dc = container_of(dcbase, DisasContext, base);
8683 
8684     if (dc->base.is_jmp == DISAS_TOO_MANY) {
8685         gen_jmp_im(dc, dc->base.pc_next - dc->cs_base);
8686         gen_eob(dc);
8687     }
8688 }
8689 
8690 static void i386_tr_disas_log(const DisasContextBase *dcbase,
8691                               CPUState *cpu)
8692 {
8693     DisasContext *dc = container_of(dcbase, DisasContext, base);
8694 
8695     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
8696     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
8697 }
8698 
8699 static const TranslatorOps i386_tr_ops = {
8700     .init_disas_context = i386_tr_init_disas_context,
8701     .tb_start           = i386_tr_tb_start,
8702     .insn_start         = i386_tr_insn_start,
8703     .translate_insn     = i386_tr_translate_insn,
8704     .tb_stop            = i386_tr_tb_stop,
8705     .disas_log          = i386_tr_disas_log,
8706 };
8707 
8708 /* generate intermediate code for basic block 'tb'.  */
8709 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
8710 {
8711     DisasContext dc;
8712 
8713     translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
8714 }
8715 
8716 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
8717                           target_ulong *data)
8718 {
8719     int cc_op = data[1];
8720     env->eip = data[0] - tb->cs_base;
8721     if (cc_op != CC_OP_DYNAMIC) {
8722         env->cc_op = cc_op;
8723     }
8724 }
8725